From a731ab3a2aad56e6ceb8b4e2444a61353246295c Mon Sep 17 00:00:00 2001 From: gdkchan Date: Thu, 8 Aug 2019 15:56:22 -0300 Subject: [PATCH] Add a new JIT compiler for CPU code (#693) * Start of the ARMeilleure project * Refactoring around the old IRAdapter, now renamed to PreAllocator * Optimize the LowestBitSet method * Add CLZ support and fix CLS implementation * Add missing Equals and GetHashCode overrides on some structs, misc small tweaks * Implement the ByteSwap IR instruction, and some refactoring on the assembler * Implement the DivideUI IR instruction and fix 64-bits IDIV * Correct constant operand type on CSINC * Move division instructions implementation to InstEmitDiv * Fix destination type for the ConditionalSelect IR instruction * Implement UMULH and SMULH, with new IR instructions * Fix some issues with shift instructions * Fix constant types for BFM instructions * Fix up new tests using the new V128 struct * Update tests * Move DIV tests to a separate file * Add support for calls, and some instructions that depends on them * Start adding support for SIMD & FP types, along with some of the related ARM instructions * Fix some typos and the divide instruction with FP operands * Fix wrong method call on Clz_V * Implement ARM FP & SIMD move instructions, Saddlv_V, and misc. fixes * Implement SIMD logical instructions and more misc. fixes * Fix PSRAD x86 instruction encoding, TRN, UABD and UABDL implementations * Implement float conversion instruction, merge in LDj3SNuD fixes, and some other misc. fixes * Implement SIMD shift instruction and fix Dup_V * Add SCVTF and UCVTF (vector, fixed-point) variants to the opcode table * Fix check with tolerance on tester * Implement FP & SIMD comparison instructions, and some fixes * Update FCVT (Scalar) encoding on the table to support the Half-float variants * Support passing V128 structs, some cleanup on the register allocator, merge LDj3SNuD fixes * Use old memory access methods, made a start on SIMD memory insts support, some fixes * Fix float constant passed to functions, save and restore non-volatile XMM registers, other fixes * Fix arguments count with struct return values, other fixes * More instructions * Misc. fixes and integrate LDj3SNuD fixes * Update tests * Add a faster linear scan allocator, unwinding support on windows, and other changes * Update Ryujinx.HLE * Update Ryujinx.Graphics * Fix V128 return pointer passing, RCX is clobbered * Update Ryujinx.Tests * Update ITimeZoneService * Stop using GetFunctionPointer as that can't be called from native code, misc. fixes and tweaks * Use generic GetFunctionPointerForDelegate method and other tweaks * Some refactoring on the code generator, assert on invalid operations and use a separate enum for intrinsics * Remove some unused code on the assembler * Fix REX.W prefix regression on float conversion instructions, add some sort of profiler * Add hardware capability detection * Fix regression on Sha1h and revert Fcm** changes * Add SSE2-only paths on vector extract and insert, some refactoring on the pre-allocator * Fix silly mistake introduced on last commit on CpuId * Generate inline stack probes when the stack allocation is too large * Initial support for the System-V ABI * Support multiple destination operands * Fix SSE2 VectorInsert8 path, and other fixes * Change placement of XMM callee save and restore code to match other compilers * Rename Dest to Destination and Inst to Instruction * Fix a regression related to calls and the V128 type * Add an extra space on comments to match code style * Some refactoring * Fix vector insert FP32 SSE2 path * Port over the ARM32 instructions * Avoid memory protection races on JIT Cache * Another fix on VectorInsert FP32 (thanks to LDj3SNuD * Float operands don't need to use the same register when VEX is supported * Add a new register allocator, higher quality code for hot code (tier up), and other tweaks * Some nits, small improvements on the pre allocator * CpuThreadState is gone * Allow changing CPU emulators with a config entry * Add runtime identifiers on the ARMeilleure project * Allow switching between CPUs through a config entry (pt. 2) * Change win10-x64 to win-x64 on projects * Update the Ryujinx project to use ARMeilleure * Ensure that the selected register is valid on the hybrid allocator * Allow exiting on returns to 0 (should fix test regression) * Remove register assignments for most used variables on the hybrid allocator * Do not use fixed registers as spill temp * Add missing namespace and remove unneeded using * Address PR feedback * Fix types, etc * Enable AssumeStrictAbiCompliance by default * Ensure that Spill and Fill don't load or store any more than necessary --- ARMeilleure/ARMeilleure.csproj | 20 + ARMeilleure/CodeGen/CompiledFunction.cs | 17 + .../CodeGen/Optimizations/ConstantFolding.cs | 258 ++ .../CodeGen/Optimizations/Optimizer.cs | 126 + .../CodeGen/Optimizations/Simplification.cs | 157 + .../RegisterAllocators/AllocationResult.cs | 19 + .../RegisterAllocators/CopyResolver.cs | 246 ++ .../RegisterAllocators/HybridAllocator.cs | 382 ++ .../RegisterAllocators/IRegisterAllocator.cs | 12 + .../RegisterAllocators/LinearScanAllocator.cs | 1019 ++++++ .../RegisterAllocators/LiveInterval.cs | 390 ++ .../CodeGen/RegisterAllocators/LiveRange.cs | 31 + .../RegisterAllocators/RegisterMasks.cs | 47 + .../RegisterAllocators/StackAllocator.cs | 27 + ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs | 18 + .../CodeGen/Unwinding/UnwindPushEntry.cs | 20 + ARMeilleure/CodeGen/X86/Assembler.cs | 1358 +++++++ ARMeilleure/CodeGen/X86/CallConvName.cs | 8 + ARMeilleure/CodeGen/X86/CallingConvention.cs | 159 + ARMeilleure/CodeGen/X86/CodeGenContext.cs | 305 ++ ARMeilleure/CodeGen/X86/CodeGenerator.cs | 1661 +++++++++ .../CodeGen/X86/HardwareCapabilities.cs | 52 + ARMeilleure/CodeGen/X86/IntrinsicInfo.cs | 14 + ARMeilleure/CodeGen/X86/IntrinsicTable.cs | 160 + ARMeilleure/CodeGen/X86/IntrinsicType.cs | 14 + ARMeilleure/CodeGen/X86/PreAllocator.cs | 1280 +++++++ ARMeilleure/CodeGen/X86/X86Condition.cs | 22 + ARMeilleure/CodeGen/X86/X86Instruction.cs | 190 + ARMeilleure/CodeGen/X86/X86Register.cs | 41 + ARMeilleure/Common/BitMap.cs | 138 + ARMeilleure/Common/BitUtils.cs | 109 + ARMeilleure/Common/EnumUtils.cs | 12 + ARMeilleure/Decoders/Block.cs | 99 + ARMeilleure/Decoders/Condition.cs | 32 + ARMeilleure/Decoders/DataOp.cs | 10 + ARMeilleure/Decoders/Decoder.cs | 351 ++ ARMeilleure/Decoders/DecoderHelper.cs | 113 + ARMeilleure/Decoders/IOpCode.cs | 17 + ARMeilleure/Decoders/IOpCode32.cs | 9 + ARMeilleure/Decoders/IOpCode32Alu.cs | 10 + ARMeilleure/Decoders/IOpCode32BImm.cs | 4 + ARMeilleure/Decoders/IOpCode32BReg.cs | 7 + ARMeilleure/Decoders/IOpCode32Mem.cs | 12 + ARMeilleure/Decoders/IOpCode32MemMult.cs | 13 + ARMeilleure/Decoders/IOpCodeAlu.cs | 10 + ARMeilleure/Decoders/IOpCodeAluImm.cs | 7 + ARMeilleure/Decoders/IOpCodeAluRs.cs | 10 + ARMeilleure/Decoders/IOpCodeAluRx.cs | 10 + ARMeilleure/Decoders/IOpCodeBImm.cs | 7 + ARMeilleure/Decoders/IOpCodeCond.cs | 7 + ARMeilleure/Decoders/IOpCodeLit.cs | 11 + ARMeilleure/Decoders/IOpCodeSimd.cs | 7 + ARMeilleure/Decoders/InstDescriptor.cs | 18 + ARMeilleure/Decoders/InstEmitter.cs | 6 + ARMeilleure/Decoders/IntType.cs | 14 + ARMeilleure/Decoders/OpCode.cs | 48 + ARMeilleure/Decoders/OpCode32.cs | 21 + ARMeilleure/Decoders/OpCode32Alu.cs | 18 + ARMeilleure/Decoders/OpCode32AluImm.cs | 21 + ARMeilleure/Decoders/OpCode32AluRsImm.cs | 18 + ARMeilleure/Decoders/OpCode32BImm.cs | 27 + ARMeilleure/Decoders/OpCode32BReg.cs | 12 + ARMeilleure/Decoders/OpCode32Mem.cs | 37 + ARMeilleure/Decoders/OpCode32MemImm.cs | 10 + ARMeilleure/Decoders/OpCode32MemImm8.cs | 13 + ARMeilleure/Decoders/OpCode32MemMult.cs | 55 + ARMeilleure/Decoders/OpCodeAdr.cs | 17 + ARMeilleure/Decoders/OpCodeAlu.cs | 21 + ARMeilleure/Decoders/OpCodeAluBinary.cs | 12 + ARMeilleure/Decoders/OpCodeAluImm.cs | 38 + ARMeilleure/Decoders/OpCodeAluRs.cs | 27 + ARMeilleure/Decoders/OpCodeAluRx.cs | 17 + ARMeilleure/Decoders/OpCodeBImm.cs | 9 + ARMeilleure/Decoders/OpCodeBImmAl.cs | 10 + ARMeilleure/Decoders/OpCodeBImmCmp.cs | 18 + ARMeilleure/Decoders/OpCodeBImmCond.cs | 23 + ARMeilleure/Decoders/OpCodeBImmTest.cs | 18 + ARMeilleure/Decoders/OpCodeBReg.cs | 22 + ARMeilleure/Decoders/OpCodeBfm.cs | 27 + ARMeilleure/Decoders/OpCodeCcmp.cs | 30 + ARMeilleure/Decoders/OpCodeCcmpImm.cs | 9 + ARMeilleure/Decoders/OpCodeCcmpReg.cs | 13 + ARMeilleure/Decoders/OpCodeCsel.cs | 15 + ARMeilleure/Decoders/OpCodeException.cs | 12 + ARMeilleure/Decoders/OpCodeMem.cs | 17 + ARMeilleure/Decoders/OpCodeMemEx.cs | 14 + ARMeilleure/Decoders/OpCodeMemImm.cs | 51 + ARMeilleure/Decoders/OpCodeMemLit.cs | 26 + ARMeilleure/Decoders/OpCodeMemPair.cs | 23 + ARMeilleure/Decoders/OpCodeMemReg.cs | 18 + ARMeilleure/Decoders/OpCodeMov.cs | 36 + ARMeilleure/Decoders/OpCodeMul.cs | 14 + ARMeilleure/Decoders/OpCodeSimd.cs | 22 + ARMeilleure/Decoders/OpCodeSimdCvt.cs | 19 + ARMeilleure/Decoders/OpCodeSimdExt.cs | 12 + ARMeilleure/Decoders/OpCodeSimdFcond.cs | 15 + ARMeilleure/Decoders/OpCodeSimdFmov.cs | 31 + ARMeilleure/Decoders/OpCodeSimdImm.cs | 98 + ARMeilleure/Decoders/OpCodeSimdIns.cs | 34 + ARMeilleure/Decoders/OpCodeSimdMemImm.cs | 17 + ARMeilleure/Decoders/OpCodeSimdMemLit.cs | 29 + ARMeilleure/Decoders/OpCodeSimdMemMs.cs | 46 + ARMeilleure/Decoders/OpCodeSimdMemPair.cs | 14 + ARMeilleure/Decoders/OpCodeSimdMemReg.cs | 12 + ARMeilleure/Decoders/OpCodeSimdMemSs.cs | 95 + ARMeilleure/Decoders/OpCodeSimdReg.cs | 16 + ARMeilleure/Decoders/OpCodeSimdRegElem.cs | 29 + ARMeilleure/Decoders/OpCodeSimdRegElemF.cs | 31 + ARMeilleure/Decoders/OpCodeSimdShImm.cs | 16 + ARMeilleure/Decoders/OpCodeSimdTbl.cs | 10 + ARMeilleure/Decoders/OpCodeSystem.cs | 22 + ARMeilleure/Decoders/OpCodeT16.cs | 12 + ARMeilleure/Decoders/OpCodeT16AluImm8.cs | 20 + ARMeilleure/Decoders/OpCodeT16BReg.cs | 12 + ARMeilleure/Decoders/OpCodeTable.cs | 787 ++++ ARMeilleure/Decoders/RegisterSize.cs | 10 + ARMeilleure/Decoders/ShiftType.cs | 10 + ARMeilleure/Diagnostics/IRDumper.cs | 168 + ARMeilleure/Diagnostics/Logger.cs | 59 + ARMeilleure/Diagnostics/PassName.cs | 17 + ARMeilleure/Instructions/CryptoHelper.cs | 279 ++ ARMeilleure/Instructions/DelegateTypes.cs | 78 + ARMeilleure/Instructions/InstEmitAlu.cs | 369 ++ ARMeilleure/Instructions/InstEmitAlu32.cs | 129 + ARMeilleure/Instructions/InstEmitAluHelper.cs | 351 ++ ARMeilleure/Instructions/InstEmitBfm.cs | 196 + ARMeilleure/Instructions/InstEmitCcmp.cs | 61 + ARMeilleure/Instructions/InstEmitCsel.cs | 53 + ARMeilleure/Instructions/InstEmitDiv.cs | 67 + ARMeilleure/Instructions/InstEmitException.cs | 55 + ARMeilleure/Instructions/InstEmitFlow.cs | 159 + ARMeilleure/Instructions/InstEmitFlow32.cs | 71 + .../Instructions/InstEmitFlowHelper.cs | 192 + ARMeilleure/Instructions/InstEmitHash.cs | 64 + ARMeilleure/Instructions/InstEmitHelper.cs | 218 ++ ARMeilleure/Instructions/InstEmitMemory.cs | 177 + ARMeilleure/Instructions/InstEmitMemory32.cs | 256 ++ ARMeilleure/Instructions/InstEmitMemoryEx.cs | 261 ++ .../Instructions/InstEmitMemoryHelper.cs | 512 +++ ARMeilleure/Instructions/InstEmitMove.cs | 41 + ARMeilleure/Instructions/InstEmitMul.cs | 100 + .../Instructions/InstEmitSimdArithmetic.cs | 3159 +++++++++++++++++ ARMeilleure/Instructions/InstEmitSimdCmp.cs | 712 ++++ .../Instructions/InstEmitSimdCrypto.cs | 49 + ARMeilleure/Instructions/InstEmitSimdCvt.cs | 1166 ++++++ ARMeilleure/Instructions/InstEmitSimdHash.cs | 147 + .../Instructions/InstEmitSimdHelper.cs | 1477 ++++++++ .../Instructions/InstEmitSimdLogical.cs | 456 +++ .../Instructions/InstEmitSimdMemory.cs | 160 + ARMeilleure/Instructions/InstEmitSimdMove.cs | 794 +++++ ARMeilleure/Instructions/InstEmitSimdShift.cs | 1057 ++++++ ARMeilleure/Instructions/InstEmitSystem.cs | 114 + ARMeilleure/Instructions/InstName.cs | 459 +++ ARMeilleure/Instructions/NativeInterface.cs | 367 ++ ARMeilleure/Instructions/SoftFallback.cs | 1307 +++++++ ARMeilleure/Instructions/SoftFloat.cs | 2757 ++++++++++++++ .../IntermediateRepresentation/BasicBlock.cs | 83 + .../IntermediateRepresentation/Instruction.cs | 79 + .../IntermediateRepresentation/Intrinsic.cs | 138 + .../IntrinsicOperation.cs | 12 + .../MemoryOperand.cs | 25 + .../IntermediateRepresentation/Multiplier.cs | 10 + .../IntermediateRepresentation/Node.cs | 163 + .../IntermediateRepresentation/Operand.cs | 124 + .../OperandHelper.cs | 68 + .../IntermediateRepresentation/OperandKind.cs | 12 + .../IntermediateRepresentation/OperandType.cs | 51 + .../IntermediateRepresentation/Operation.cs | 40 + .../IntermediateRepresentation/PhiNode.cs | 22 + .../IntermediateRepresentation/Register.cs | 43 + .../RegisterType.cs | 9 + ARMeilleure/Memory/IMemory.cs | 37 + ARMeilleure/Memory/IMemoryManager.cs | 40 + ARMeilleure/Memory/MemoryHelper.cs | 71 + ARMeilleure/Memory/MemoryManagement.cs | 114 + ARMeilleure/Memory/MemoryManagementUnix.cs | 71 + ARMeilleure/Memory/MemoryManagementWindows.cs | 156 + ARMeilleure/Memory/MemoryManager.cs | 835 +++++ ARMeilleure/Memory/MemoryManagerPal.cs | 77 + ARMeilleure/Memory/MemoryProtection.cs | 17 + .../Memory/MemoryProtectionException.cs | 9 + ARMeilleure/Optimizations.cs | 33 + ARMeilleure/State/Aarch32Mode.cs | 15 + ARMeilleure/State/ExecutionContext.cs | 130 + ARMeilleure/State/ExecutionMode.cs | 9 + ARMeilleure/State/FPCR.cs | 23 + ARMeilleure/State/FPException.cs | 12 + ARMeilleure/State/FPRoundingMode.cs | 10 + ARMeilleure/State/FPSR.cs | 11 + ARMeilleure/State/FPType.cs | 11 + ARMeilleure/State/IExecutionContext.cs | 37 + ARMeilleure/State/InstExceptionEventArgs.cs | 16 + ARMeilleure/State/InstUndefinedEventArgs.cs | 16 + ARMeilleure/State/NativeContext.cs | 157 + ARMeilleure/State/PState.cs | 16 + ARMeilleure/State/RegisterAlias.cs | 41 + ARMeilleure/State/RegisterConsts.cs | 13 + ARMeilleure/State/V128.cs | 214 ++ ARMeilleure/Statistics.cs | 92 + ARMeilleure/Translation/ArmEmitterContext.cs | 153 + ARMeilleure/Translation/Compiler.cs | 47 + ARMeilleure/Translation/CompilerContext.cs | 26 + ARMeilleure/Translation/CompilerOptions.cs | 16 + ARMeilleure/Translation/ControlFlowGraph.cs | 158 + ARMeilleure/Translation/DelegateCache.cs | 26 + ARMeilleure/Translation/Dominance.cs | 95 + ARMeilleure/Translation/EmitterContext.cs | 562 +++ ARMeilleure/Translation/GuestFunction.cs | 6 + ARMeilleure/Translation/ITranslator.cs | 9 + ARMeilleure/Translation/JitCache.cs | 135 + ARMeilleure/Translation/JitCacheEntry.cs | 19 + ARMeilleure/Translation/JitUnwindWindows.cs | 164 + ARMeilleure/Translation/PriorityQueue.cs | 39 + ARMeilleure/Translation/RegisterToLocal.cs | 52 + ARMeilleure/Translation/RegisterUsage.cs | 413 +++ ARMeilleure/Translation/SsaConstruction.cs | 293 ++ ARMeilleure/Translation/SsaDeconstruction.cs | 46 + ARMeilleure/Translation/TranslatedFunction.cs | 30 + ARMeilleure/Translation/Translator.cs | 253 ++ ChocolArm64/ChocolArm64.csproj | 3 +- ChocolArm64/CpuThread.cs | 66 - .../Instructions/InstEmitMemoryHelper.cs | 10 +- ChocolArm64/Instructions/InstEmitSystem.cs | 8 +- ChocolArm64/Instructions/SoftFloat.cs | 112 +- ChocolArm64/Memory/MemoryManager.cs | 10 +- ChocolArm64/Optimizations.cs | 35 +- ChocolArm64/State/CpuThreadState.cs | 187 +- ChocolArm64/Translation/Translator.cs | 23 +- Ryujinx.Audio/Ryujinx.Audio.csproj | 2 +- Ryujinx.Common/Ryujinx.Common.csproj | 4 +- .../Graphics3d/Texture/ImageUtils.cs | 7 +- .../Graphics3d/Texture/TextureHelper.cs | 6 +- Ryujinx.Graphics/Memory/NvGpuVmm.cs | 6 +- Ryujinx.Graphics/Memory/NvGpuVmmCache.cs | 6 +- Ryujinx.Graphics/Ryujinx.Graphics.csproj | 5 +- Ryujinx.Graphics/VDec/VideoDecoder.cs | 2 +- Ryujinx.HLE/DeviceMemory.cs | 2 +- .../UndefinedInstructionException.cs | 2 +- Ryujinx.HLE/HOS/Homebrew.cs | 8 +- Ryujinx.HLE/HOS/Horizon.cs | 2 + Ryujinx.HLE/HOS/Ipc/IpcHandler.cs | 4 +- .../HOS/Kernel/Common/KernelTransfer.cs | 2 +- .../HOS/Kernel/Memory/KMemoryManager.cs | 6 +- .../HOS/Kernel/Process/HleProcessDebugger.cs | 12 +- Ryujinx.HLE/HOS/Kernel/Process/KProcess.cs | 48 +- .../HOS/Kernel/SupervisorCall/SvcHandler.cs | 15 +- .../HOS/Kernel/SupervisorCall/SvcIpc.cs | 2 +- .../HOS/Kernel/SupervisorCall/SvcSystem.cs | 4 +- .../HOS/Kernel/SupervisorCall/SvcTable.cs | 59 +- .../HOS/Kernel/SupervisorCall/SvcThread.cs | 149 +- .../HOS/Kernel/Threading/HleScheduler.cs | 18 +- .../HOS/Kernel/Threading/KCoreContext.cs | 6 +- .../HOS/Kernel/Threading/KCriticalSection.cs | 10 +- .../HOS/Kernel/Threading/KScheduler.cs | 2 +- Ryujinx.HLE/HOS/Kernel/Threading/KThread.cs | 72 +- Ryujinx.HLE/HOS/ProgramLoader.cs | 2 +- Ryujinx.HLE/HOS/ServiceCtx.cs | 6 +- Ryujinx.HLE/HOS/Services/Acc/IProfile.cs | 2 +- .../HOS/Services/Aud/AudioOut/IAudioOut.cs | 2 +- .../Aud/AudioRenderer/IAudioRenderer.cs | 6 +- .../Aud/AudioRenderer/VoiceContext.cs | 6 +- .../HOS/Services/Aud/IAudioOutManager.cs | 2 +- Ryujinx.HLE/HOS/Services/Ldr/IRoInterface.cs | 2 +- Ryujinx.HLE/HOS/Services/Nv/INvDrvServices.cs | 2 +- .../HOS/Services/Nv/NvGpuAS/NvGpuASIoctl.cs | 2 +- .../HOS/Services/Nv/NvGpuGpu/NvGpuGpuIoctl.cs | 2 +- .../Nv/NvHostChannel/NvHostChannelIoctl.cs | 2 +- .../Services/Nv/NvHostCtrl/NvHostCtrlIoctl.cs | 2 +- .../HOS/Services/Nv/NvMap/NvMapIoctl.cs | 2 +- .../Time/Clock/StandardSteadyClockCore.cs | 2 +- .../Time/Clock/TickBasedSteadyClockCore.cs | 2 +- .../HOS/Services/Time/IStaticService.cs | 2 +- .../HOS/Services/Time/ITimeZoneService.cs | 4 +- .../Services/Vi/IApplicationDisplayService.cs | 2 +- Ryujinx.HLE/Ryujinx.HLE.csproj | 6 +- Ryujinx.HLE/Utilities/StructReader.cs | 6 +- Ryujinx.HLE/Utilities/StructWriter.cs | 6 +- Ryujinx.LLE/Luea.csproj | 2 +- Ryujinx.Profiler/Ryujinx.Profiler.csproj | 2 +- .../Ryujinx.ShaderTools.csproj | 2 +- Ryujinx.Tests.Unicorn/IndexedProperty.cs | 14 +- Ryujinx.Tests.Unicorn/Native/Interface.cs | 4 +- Ryujinx.Tests.Unicorn/Native/UnicornArch.cs | 2 +- Ryujinx.Tests.Unicorn/Native/UnicornMode.cs | 2 +- .../Ryujinx.Tests.Unicorn.csproj | 3 +- Ryujinx.Tests.Unicorn/SimdValue.cs | 112 + Ryujinx.Tests.Unicorn/UnicornAArch64.cs | 204 +- Ryujinx.Tests/Cpu/CpuTest.cs | 478 ++- Ryujinx.Tests/Cpu/CpuTestAluBinary.cs | 238 ++ Ryujinx.Tests/Cpu/CpuTestAluRs.cs | 224 -- Ryujinx.Tests/Cpu/CpuTestMisc.cs | 82 +- Ryujinx.Tests/Cpu/CpuTestSimd.cs | 491 +-- Ryujinx.Tests/Cpu/CpuTestSimdCrypto.cs | 64 +- Ryujinx.Tests/Cpu/CpuTestSimdCvt.cs | 35 +- Ryujinx.Tests/Cpu/CpuTestSimdExt.cs | 16 +- Ryujinx.Tests/Cpu/CpuTestSimdFcond.cs | 23 +- Ryujinx.Tests/Cpu/CpuTestSimdFmov.cs | 8 +- Ryujinx.Tests/Cpu/CpuTestSimdImm.cs | 19 +- Ryujinx.Tests/Cpu/CpuTestSimdIns.cs | 82 +- Ryujinx.Tests/Cpu/CpuTestSimdReg.cs | 933 ++--- Ryujinx.Tests/Cpu/CpuTestSimdRegElem.cs | 28 +- Ryujinx.Tests/Cpu/CpuTestSimdRegElemF.cs | 51 +- Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs | 107 +- Ryujinx.Tests/Cpu/CpuTestSimdTbl.cs | 69 +- Ryujinx.Tests/Ryujinx.Tests.csproj | 5 +- Ryujinx.sln | 6 +- Ryujinx/Config.jsonc | 6 +- Ryujinx/Configuration.cs | 10 +- Ryujinx/Ryujinx.csproj | 4 +- Ryujinx/_schema.json | 14 +- 310 files changed, 37389 insertions(+), 2086 deletions(-) create mode 100644 ARMeilleure/ARMeilleure.csproj create mode 100644 ARMeilleure/CodeGen/CompiledFunction.cs create mode 100644 ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs create mode 100644 ARMeilleure/CodeGen/Optimizations/Optimizer.cs create mode 100644 ARMeilleure/CodeGen/Optimizations/Simplification.cs create mode 100644 ARMeilleure/CodeGen/RegisterAllocators/AllocationResult.cs create mode 100644 ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs create mode 100644 ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs create mode 100644 ARMeilleure/CodeGen/RegisterAllocators/IRegisterAllocator.cs create mode 100644 ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs create mode 100644 ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs create mode 100644 ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs create mode 100644 ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs create mode 100644 ARMeilleure/CodeGen/RegisterAllocators/StackAllocator.cs create mode 100644 ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs create mode 100644 ARMeilleure/CodeGen/Unwinding/UnwindPushEntry.cs create mode 100644 ARMeilleure/CodeGen/X86/Assembler.cs create mode 100644 ARMeilleure/CodeGen/X86/CallConvName.cs create mode 100644 ARMeilleure/CodeGen/X86/CallingConvention.cs create mode 100644 ARMeilleure/CodeGen/X86/CodeGenContext.cs create mode 100644 ARMeilleure/CodeGen/X86/CodeGenerator.cs create mode 100644 ARMeilleure/CodeGen/X86/HardwareCapabilities.cs create mode 100644 ARMeilleure/CodeGen/X86/IntrinsicInfo.cs create mode 100644 ARMeilleure/CodeGen/X86/IntrinsicTable.cs create mode 100644 ARMeilleure/CodeGen/X86/IntrinsicType.cs create mode 100644 ARMeilleure/CodeGen/X86/PreAllocator.cs create mode 100644 ARMeilleure/CodeGen/X86/X86Condition.cs create mode 100644 ARMeilleure/CodeGen/X86/X86Instruction.cs create mode 100644 ARMeilleure/CodeGen/X86/X86Register.cs create mode 100644 ARMeilleure/Common/BitMap.cs create mode 100644 ARMeilleure/Common/BitUtils.cs create mode 100644 ARMeilleure/Common/EnumUtils.cs create mode 100644 ARMeilleure/Decoders/Block.cs create mode 100644 ARMeilleure/Decoders/Condition.cs create mode 100644 ARMeilleure/Decoders/DataOp.cs create mode 100644 ARMeilleure/Decoders/Decoder.cs create mode 100644 ARMeilleure/Decoders/DecoderHelper.cs create mode 100644 ARMeilleure/Decoders/IOpCode.cs create mode 100644 ARMeilleure/Decoders/IOpCode32.cs create mode 100644 ARMeilleure/Decoders/IOpCode32Alu.cs create mode 100644 ARMeilleure/Decoders/IOpCode32BImm.cs create mode 100644 ARMeilleure/Decoders/IOpCode32BReg.cs create mode 100644 ARMeilleure/Decoders/IOpCode32Mem.cs create mode 100644 ARMeilleure/Decoders/IOpCode32MemMult.cs create mode 100644 ARMeilleure/Decoders/IOpCodeAlu.cs create mode 100644 ARMeilleure/Decoders/IOpCodeAluImm.cs create mode 100644 ARMeilleure/Decoders/IOpCodeAluRs.cs create mode 100644 ARMeilleure/Decoders/IOpCodeAluRx.cs create mode 100644 ARMeilleure/Decoders/IOpCodeBImm.cs create mode 100644 ARMeilleure/Decoders/IOpCodeCond.cs create mode 100644 ARMeilleure/Decoders/IOpCodeLit.cs create mode 100644 ARMeilleure/Decoders/IOpCodeSimd.cs create mode 100644 ARMeilleure/Decoders/InstDescriptor.cs create mode 100644 ARMeilleure/Decoders/InstEmitter.cs create mode 100644 ARMeilleure/Decoders/IntType.cs create mode 100644 ARMeilleure/Decoders/OpCode.cs create mode 100644 ARMeilleure/Decoders/OpCode32.cs create mode 100644 ARMeilleure/Decoders/OpCode32Alu.cs create mode 100644 ARMeilleure/Decoders/OpCode32AluImm.cs create mode 100644 ARMeilleure/Decoders/OpCode32AluRsImm.cs create mode 100644 ARMeilleure/Decoders/OpCode32BImm.cs create mode 100644 ARMeilleure/Decoders/OpCode32BReg.cs create mode 100644 ARMeilleure/Decoders/OpCode32Mem.cs create mode 100644 ARMeilleure/Decoders/OpCode32MemImm.cs create mode 100644 ARMeilleure/Decoders/OpCode32MemImm8.cs create mode 100644 ARMeilleure/Decoders/OpCode32MemMult.cs create mode 100644 ARMeilleure/Decoders/OpCodeAdr.cs create mode 100644 ARMeilleure/Decoders/OpCodeAlu.cs create mode 100644 ARMeilleure/Decoders/OpCodeAluBinary.cs create mode 100644 ARMeilleure/Decoders/OpCodeAluImm.cs create mode 100644 ARMeilleure/Decoders/OpCodeAluRs.cs create mode 100644 ARMeilleure/Decoders/OpCodeAluRx.cs create mode 100644 ARMeilleure/Decoders/OpCodeBImm.cs create mode 100644 ARMeilleure/Decoders/OpCodeBImmAl.cs create mode 100644 ARMeilleure/Decoders/OpCodeBImmCmp.cs create mode 100644 ARMeilleure/Decoders/OpCodeBImmCond.cs create mode 100644 ARMeilleure/Decoders/OpCodeBImmTest.cs create mode 100644 ARMeilleure/Decoders/OpCodeBReg.cs create mode 100644 ARMeilleure/Decoders/OpCodeBfm.cs create mode 100644 ARMeilleure/Decoders/OpCodeCcmp.cs create mode 100644 ARMeilleure/Decoders/OpCodeCcmpImm.cs create mode 100644 ARMeilleure/Decoders/OpCodeCcmpReg.cs create mode 100644 ARMeilleure/Decoders/OpCodeCsel.cs create mode 100644 ARMeilleure/Decoders/OpCodeException.cs create mode 100644 ARMeilleure/Decoders/OpCodeMem.cs create mode 100644 ARMeilleure/Decoders/OpCodeMemEx.cs create mode 100644 ARMeilleure/Decoders/OpCodeMemImm.cs create mode 100644 ARMeilleure/Decoders/OpCodeMemLit.cs create mode 100644 ARMeilleure/Decoders/OpCodeMemPair.cs create mode 100644 ARMeilleure/Decoders/OpCodeMemReg.cs create mode 100644 ARMeilleure/Decoders/OpCodeMov.cs create mode 100644 ARMeilleure/Decoders/OpCodeMul.cs create mode 100644 ARMeilleure/Decoders/OpCodeSimd.cs create mode 100644 ARMeilleure/Decoders/OpCodeSimdCvt.cs create mode 100644 ARMeilleure/Decoders/OpCodeSimdExt.cs create mode 100644 ARMeilleure/Decoders/OpCodeSimdFcond.cs create mode 100644 ARMeilleure/Decoders/OpCodeSimdFmov.cs create mode 100644 ARMeilleure/Decoders/OpCodeSimdImm.cs create mode 100644 ARMeilleure/Decoders/OpCodeSimdIns.cs create mode 100644 ARMeilleure/Decoders/OpCodeSimdMemImm.cs create mode 100644 ARMeilleure/Decoders/OpCodeSimdMemLit.cs create mode 100644 ARMeilleure/Decoders/OpCodeSimdMemMs.cs create mode 100644 ARMeilleure/Decoders/OpCodeSimdMemPair.cs create mode 100644 ARMeilleure/Decoders/OpCodeSimdMemReg.cs create mode 100644 ARMeilleure/Decoders/OpCodeSimdMemSs.cs create mode 100644 ARMeilleure/Decoders/OpCodeSimdReg.cs create mode 100644 ARMeilleure/Decoders/OpCodeSimdRegElem.cs create mode 100644 ARMeilleure/Decoders/OpCodeSimdRegElemF.cs create mode 100644 ARMeilleure/Decoders/OpCodeSimdShImm.cs create mode 100644 ARMeilleure/Decoders/OpCodeSimdTbl.cs create mode 100644 ARMeilleure/Decoders/OpCodeSystem.cs create mode 100644 ARMeilleure/Decoders/OpCodeT16.cs create mode 100644 ARMeilleure/Decoders/OpCodeT16AluImm8.cs create mode 100644 ARMeilleure/Decoders/OpCodeT16BReg.cs create mode 100644 ARMeilleure/Decoders/OpCodeTable.cs create mode 100644 ARMeilleure/Decoders/RegisterSize.cs create mode 100644 ARMeilleure/Decoders/ShiftType.cs create mode 100644 ARMeilleure/Diagnostics/IRDumper.cs create mode 100644 ARMeilleure/Diagnostics/Logger.cs create mode 100644 ARMeilleure/Diagnostics/PassName.cs create mode 100644 ARMeilleure/Instructions/CryptoHelper.cs create mode 100644 ARMeilleure/Instructions/DelegateTypes.cs create mode 100644 ARMeilleure/Instructions/InstEmitAlu.cs create mode 100644 ARMeilleure/Instructions/InstEmitAlu32.cs create mode 100644 ARMeilleure/Instructions/InstEmitAluHelper.cs create mode 100644 ARMeilleure/Instructions/InstEmitBfm.cs create mode 100644 ARMeilleure/Instructions/InstEmitCcmp.cs create mode 100644 ARMeilleure/Instructions/InstEmitCsel.cs create mode 100644 ARMeilleure/Instructions/InstEmitDiv.cs create mode 100644 ARMeilleure/Instructions/InstEmitException.cs create mode 100644 ARMeilleure/Instructions/InstEmitFlow.cs create mode 100644 ARMeilleure/Instructions/InstEmitFlow32.cs create mode 100644 ARMeilleure/Instructions/InstEmitFlowHelper.cs create mode 100644 ARMeilleure/Instructions/InstEmitHash.cs create mode 100644 ARMeilleure/Instructions/InstEmitHelper.cs create mode 100644 ARMeilleure/Instructions/InstEmitMemory.cs create mode 100644 ARMeilleure/Instructions/InstEmitMemory32.cs create mode 100644 ARMeilleure/Instructions/InstEmitMemoryEx.cs create mode 100644 ARMeilleure/Instructions/InstEmitMemoryHelper.cs create mode 100644 ARMeilleure/Instructions/InstEmitMove.cs create mode 100644 ARMeilleure/Instructions/InstEmitMul.cs create mode 100644 ARMeilleure/Instructions/InstEmitSimdArithmetic.cs create mode 100644 ARMeilleure/Instructions/InstEmitSimdCmp.cs create mode 100644 ARMeilleure/Instructions/InstEmitSimdCrypto.cs create mode 100644 ARMeilleure/Instructions/InstEmitSimdCvt.cs create mode 100644 ARMeilleure/Instructions/InstEmitSimdHash.cs create mode 100644 ARMeilleure/Instructions/InstEmitSimdHelper.cs create mode 100644 ARMeilleure/Instructions/InstEmitSimdLogical.cs create mode 100644 ARMeilleure/Instructions/InstEmitSimdMemory.cs create mode 100644 ARMeilleure/Instructions/InstEmitSimdMove.cs create mode 100644 ARMeilleure/Instructions/InstEmitSimdShift.cs create mode 100644 ARMeilleure/Instructions/InstEmitSystem.cs create mode 100644 ARMeilleure/Instructions/InstName.cs create mode 100644 ARMeilleure/Instructions/NativeInterface.cs create mode 100644 ARMeilleure/Instructions/SoftFallback.cs create mode 100644 ARMeilleure/Instructions/SoftFloat.cs create mode 100644 ARMeilleure/IntermediateRepresentation/BasicBlock.cs create mode 100644 ARMeilleure/IntermediateRepresentation/Instruction.cs create mode 100644 ARMeilleure/IntermediateRepresentation/Intrinsic.cs create mode 100644 ARMeilleure/IntermediateRepresentation/IntrinsicOperation.cs create mode 100644 ARMeilleure/IntermediateRepresentation/MemoryOperand.cs create mode 100644 ARMeilleure/IntermediateRepresentation/Multiplier.cs create mode 100644 ARMeilleure/IntermediateRepresentation/Node.cs create mode 100644 ARMeilleure/IntermediateRepresentation/Operand.cs create mode 100644 ARMeilleure/IntermediateRepresentation/OperandHelper.cs create mode 100644 ARMeilleure/IntermediateRepresentation/OperandKind.cs create mode 100644 ARMeilleure/IntermediateRepresentation/OperandType.cs create mode 100644 ARMeilleure/IntermediateRepresentation/Operation.cs create mode 100644 ARMeilleure/IntermediateRepresentation/PhiNode.cs create mode 100644 ARMeilleure/IntermediateRepresentation/Register.cs create mode 100644 ARMeilleure/IntermediateRepresentation/RegisterType.cs create mode 100644 ARMeilleure/Memory/IMemory.cs create mode 100644 ARMeilleure/Memory/IMemoryManager.cs create mode 100644 ARMeilleure/Memory/MemoryHelper.cs create mode 100644 ARMeilleure/Memory/MemoryManagement.cs create mode 100644 ARMeilleure/Memory/MemoryManagementUnix.cs create mode 100644 ARMeilleure/Memory/MemoryManagementWindows.cs create mode 100644 ARMeilleure/Memory/MemoryManager.cs create mode 100644 ARMeilleure/Memory/MemoryManagerPal.cs create mode 100644 ARMeilleure/Memory/MemoryProtection.cs create mode 100644 ARMeilleure/Memory/MemoryProtectionException.cs create mode 100644 ARMeilleure/Optimizations.cs create mode 100644 ARMeilleure/State/Aarch32Mode.cs create mode 100644 ARMeilleure/State/ExecutionContext.cs create mode 100644 ARMeilleure/State/ExecutionMode.cs create mode 100644 ARMeilleure/State/FPCR.cs create mode 100644 ARMeilleure/State/FPException.cs create mode 100644 ARMeilleure/State/FPRoundingMode.cs create mode 100644 ARMeilleure/State/FPSR.cs create mode 100644 ARMeilleure/State/FPType.cs create mode 100644 ARMeilleure/State/IExecutionContext.cs create mode 100644 ARMeilleure/State/InstExceptionEventArgs.cs create mode 100644 ARMeilleure/State/InstUndefinedEventArgs.cs create mode 100644 ARMeilleure/State/NativeContext.cs create mode 100644 ARMeilleure/State/PState.cs create mode 100644 ARMeilleure/State/RegisterAlias.cs create mode 100644 ARMeilleure/State/RegisterConsts.cs create mode 100644 ARMeilleure/State/V128.cs create mode 100644 ARMeilleure/Statistics.cs create mode 100644 ARMeilleure/Translation/ArmEmitterContext.cs create mode 100644 ARMeilleure/Translation/Compiler.cs create mode 100644 ARMeilleure/Translation/CompilerContext.cs create mode 100644 ARMeilleure/Translation/CompilerOptions.cs create mode 100644 ARMeilleure/Translation/ControlFlowGraph.cs create mode 100644 ARMeilleure/Translation/DelegateCache.cs create mode 100644 ARMeilleure/Translation/Dominance.cs create mode 100644 ARMeilleure/Translation/EmitterContext.cs create mode 100644 ARMeilleure/Translation/GuestFunction.cs create mode 100644 ARMeilleure/Translation/ITranslator.cs create mode 100644 ARMeilleure/Translation/JitCache.cs create mode 100644 ARMeilleure/Translation/JitCacheEntry.cs create mode 100644 ARMeilleure/Translation/JitUnwindWindows.cs create mode 100644 ARMeilleure/Translation/PriorityQueue.cs create mode 100644 ARMeilleure/Translation/RegisterToLocal.cs create mode 100644 ARMeilleure/Translation/RegisterUsage.cs create mode 100644 ARMeilleure/Translation/SsaConstruction.cs create mode 100644 ARMeilleure/Translation/SsaDeconstruction.cs create mode 100644 ARMeilleure/Translation/TranslatedFunction.cs create mode 100644 ARMeilleure/Translation/Translator.cs delete mode 100644 ChocolArm64/CpuThread.cs create mode 100644 Ryujinx.Tests.Unicorn/SimdValue.cs create mode 100644 Ryujinx.Tests/Cpu/CpuTestAluBinary.cs diff --git a/ARMeilleure/ARMeilleure.csproj b/ARMeilleure/ARMeilleure.csproj new file mode 100644 index 000000000..9268dcbee --- /dev/null +++ b/ARMeilleure/ARMeilleure.csproj @@ -0,0 +1,20 @@ + + + + netcoreapp2.1 + win-x64;osx-x64;linux-x64 + + + + true + + + + true + + + + + + + diff --git a/ARMeilleure/CodeGen/CompiledFunction.cs b/ARMeilleure/CodeGen/CompiledFunction.cs new file mode 100644 index 000000000..61e89c240 --- /dev/null +++ b/ARMeilleure/CodeGen/CompiledFunction.cs @@ -0,0 +1,17 @@ +using ARMeilleure.CodeGen.Unwinding; + +namespace ARMeilleure.CodeGen +{ + struct CompiledFunction + { + public byte[] Code { get; } + + public UnwindInfo UnwindInfo { get; } + + public CompiledFunction(byte[] code, UnwindInfo unwindInfo) + { + Code = code; + UnwindInfo = unwindInfo; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs b/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs new file mode 100644 index 000000000..84eedee0e --- /dev/null +++ b/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs @@ -0,0 +1,258 @@ +using ARMeilleure.IntermediateRepresentation; +using System; + +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.CodeGen.Optimizations +{ + static class ConstantFolding + { + public static void RunPass(Operation operation) + { + if (operation.Destination == null || operation.SourcesCount == 0) + { + return; + } + + if (!AreAllSourcesConstant(operation)) + { + return; + } + + OperandType type = operation.Destination.Type; + + switch (operation.Instruction) + { + case Instruction.Add: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => x + y); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => x + y); + } + break; + + case Instruction.BitwiseAnd: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => x & y); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => x & y); + } + break; + + case Instruction.BitwiseExclusiveOr: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => x ^ y); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => x ^ y); + } + break; + + case Instruction.BitwiseNot: + if (type == OperandType.I32) + { + EvaluateUnaryI32(operation, (x) => ~x); + } + else if (type == OperandType.I64) + { + EvaluateUnaryI64(operation, (x) => ~x); + } + break; + + case Instruction.BitwiseOr: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => x | y); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => x | y); + } + break; + + case Instruction.Copy: + if (type == OperandType.I32) + { + EvaluateUnaryI32(operation, (x) => x); + } + else if (type == OperandType.I64) + { + EvaluateUnaryI64(operation, (x) => x); + } + break; + + case Instruction.Divide: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => y != 0 ? x / y : 0); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => y != 0 ? x / y : 0); + } + break; + + case Instruction.DivideUI: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => y != 0 ? (int)((uint)x / (uint)y) : 0); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => y != 0 ? (long)((ulong)x / (ulong)y) : 0); + } + break; + + case Instruction.Multiply: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => x * y); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => x * y); + } + break; + + case Instruction.Negate: + if (type == OperandType.I32) + { + EvaluateUnaryI32(operation, (x) => -x); + } + else if (type == OperandType.I64) + { + EvaluateUnaryI64(operation, (x) => -x); + } + break; + + case Instruction.ShiftLeft: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => x << y); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => x << (int)y); + } + break; + + case Instruction.ShiftRightSI: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => x >> y); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => x >> (int)y); + } + break; + + case Instruction.ShiftRightUI: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => (int)((uint)x >> y)); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => (long)((ulong)x >> (int)y)); + } + break; + + case Instruction.SignExtend16: + if (type == OperandType.I32) + { + EvaluateUnaryI32(operation, (x) => (short)x); + } + else if (type == OperandType.I64) + { + EvaluateUnaryI64(operation, (x) => (short)x); + } + break; + + case Instruction.SignExtend32: + if (type == OperandType.I32) + { + EvaluateUnaryI32(operation, (x) => x); + } + else if (type == OperandType.I64) + { + EvaluateUnaryI64(operation, (x) => (int)x); + } + break; + + case Instruction.SignExtend8: + if (type == OperandType.I32) + { + EvaluateUnaryI32(operation, (x) => (sbyte)x); + } + else if (type == OperandType.I64) + { + EvaluateUnaryI64(operation, (x) => (sbyte)x); + } + break; + + case Instruction.Subtract: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => x - y); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => x - y); + } + break; + } + } + + private static bool AreAllSourcesConstant(Operation operation) + { + for (int index = 0; index < operation.SourcesCount; index++) + { + if (operation.GetSource(index).Kind != OperandKind.Constant) + { + return false; + } + } + + return true; + } + + private static void EvaluateUnaryI32(Operation operation, Func op) + { + int x = operation.GetSource(0).AsInt32(); + + operation.TurnIntoCopy(Const(op(x))); + } + + private static void EvaluateUnaryI64(Operation operation, Func op) + { + long x = operation.GetSource(0).AsInt64(); + + operation.TurnIntoCopy(Const(op(x))); + } + + private static void EvaluateBinaryI32(Operation operation, Func op) + { + int x = operation.GetSource(0).AsInt32(); + int y = operation.GetSource(1).AsInt32(); + + operation.TurnIntoCopy(Const(op(x, y))); + } + + private static void EvaluateBinaryI64(Operation operation, Func op) + { + long x = operation.GetSource(0).AsInt64(); + long y = operation.GetSource(1).AsInt64(); + + operation.TurnIntoCopy(Const(op(x, y))); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/CodeGen/Optimizations/Optimizer.cs b/ARMeilleure/CodeGen/Optimizations/Optimizer.cs new file mode 100644 index 000000000..c01a8f1e7 --- /dev/null +++ b/ARMeilleure/CodeGen/Optimizations/Optimizer.cs @@ -0,0 +1,126 @@ +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; + +namespace ARMeilleure.CodeGen.Optimizations +{ + static class Optimizer + { + public static void RunPass(ControlFlowGraph cfg) + { + bool modified; + + do + { + modified = false; + + foreach (BasicBlock block in cfg.Blocks) + { + LinkedListNode node = block.Operations.First; + + while (node != null) + { + LinkedListNode nextNode = node.Next; + + bool isUnused = IsUnused(node.Value); + + if (!(node.Value is Operation operation) || isUnused) + { + if (isUnused) + { + RemoveNode(block, node); + + modified = true; + } + + node = nextNode; + + continue; + } + + ConstantFolding.RunPass(operation); + + Simplification.RunPass(operation); + + if (DestIsLocalVar(operation) && IsPropagableCopy(operation)) + { + PropagateCopy(operation); + + RemoveNode(block, node); + + modified = true; + } + + node = nextNode; + } + } + } + while (modified); + } + + private static void PropagateCopy(Operation copyOp) + { + // Propagate copy source operand to all uses of the destination operand. + Operand dest = copyOp.Destination; + Operand source = copyOp.GetSource(0); + + Node[] uses = dest.Uses.ToArray(); + + foreach (Node use in uses) + { + for (int index = 0; index < use.SourcesCount; index++) + { + if (use.GetSource(index) == dest) + { + use.SetSource(index, source); + } + } + } + } + + private static void RemoveNode(BasicBlock block, LinkedListNode llNode) + { + // Remove a node from the nodes list, and also remove itself + // from all the use lists on the operands that this node uses. + block.Operations.Remove(llNode); + + Node node = llNode.Value; + + for (int index = 0; index < node.SourcesCount; index++) + { + node.SetSource(index, null); + } + + Debug.Assert(node.Destination == null || node.Destination.Uses.Count == 0); + + node.Destination = null; + } + + private static bool IsUnused(Node node) + { + return DestIsLocalVar(node) && node.Destination.Uses.Count == 0 && !HasSideEffects(node); + } + + private static bool DestIsLocalVar(Node node) + { + return node.Destination != null && node.Destination.Kind == OperandKind.LocalVariable; + } + + private static bool HasSideEffects(Node node) + { + return (node is Operation operation) && operation.Instruction == Instruction.Call; + } + + private static bool IsPropagableCopy(Operation operation) + { + if (operation.Instruction != Instruction.Copy) + { + return false; + } + + return operation.Destination.Type == operation.GetSource(0).Type; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/CodeGen/Optimizations/Simplification.cs b/ARMeilleure/CodeGen/Optimizations/Simplification.cs new file mode 100644 index 000000000..cafc025ca --- /dev/null +++ b/ARMeilleure/CodeGen/Optimizations/Simplification.cs @@ -0,0 +1,157 @@ +using ARMeilleure.IntermediateRepresentation; +using System; + +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.CodeGen.Optimizations +{ + static class Simplification + { + public static void RunPass(Operation operation) + { + switch (operation.Instruction) + { + case Instruction.Add: + case Instruction.BitwiseExclusiveOr: + TryEliminateBinaryOpComutative(operation, 0); + break; + + case Instruction.BitwiseAnd: + TryEliminateBitwiseAnd(operation); + break; + + case Instruction.BitwiseOr: + TryEliminateBitwiseOr(operation); + break; + + case Instruction.ConditionalSelect: + TryEliminateConditionalSelect(operation); + break; + + case Instruction.Divide: + TryEliminateBinaryOpY(operation, 1); + break; + + case Instruction.Multiply: + TryEliminateBinaryOpComutative(operation, 1); + break; + + case Instruction.ShiftLeft: + case Instruction.ShiftRightSI: + case Instruction.ShiftRightUI: + case Instruction.Subtract: + TryEliminateBinaryOpY(operation, 0); + break; + } + } + + private static void TryEliminateBitwiseAnd(Operation operation) + { + // Try to recognize and optimize those 3 patterns (in order): + // x & 0xFFFFFFFF == x, 0xFFFFFFFF & y == y, + // x & 0x00000000 == 0x00000000, 0x00000000 & y == 0x00000000 + Operand x = operation.GetSource(0); + Operand y = operation.GetSource(1); + + if (IsConstEqual(x, AllOnes(x.Type))) + { + operation.TurnIntoCopy(y); + } + else if (IsConstEqual(y, AllOnes(y.Type))) + { + operation.TurnIntoCopy(x); + } + else if (IsConstEqual(x, 0) || IsConstEqual(y, 0)) + { + operation.TurnIntoCopy(Const(0)); + } + } + + private static void TryEliminateBitwiseOr(Operation operation) + { + // Try to recognize and optimize those 3 patterns (in order): + // x | 0x00000000 == x, 0x00000000 | y == y, + // x | 0xFFFFFFFF == 0xFFFFFFFF, 0xFFFFFFFF | y == 0xFFFFFFFF + Operand x = operation.GetSource(0); + Operand y = operation.GetSource(1); + + if (IsConstEqual(x, 0)) + { + operation.TurnIntoCopy(y); + } + else if (IsConstEqual(y, 0)) + { + operation.TurnIntoCopy(x); + } + else if (IsConstEqual(x, AllOnes(x.Type)) || IsConstEqual(y, AllOnes(y.Type))) + { + operation.TurnIntoCopy(Const(AllOnes(x.Type))); + } + } + + private static void TryEliminateBinaryOpY(Operation operation, ulong comparand) + { + Operand x = operation.GetSource(0); + Operand y = operation.GetSource(1); + + if (IsConstEqual(y, comparand)) + { + operation.TurnIntoCopy(x); + } + } + + private static void TryEliminateBinaryOpComutative(Operation operation, ulong comparand) + { + Operand x = operation.GetSource(0); + Operand y = operation.GetSource(1); + + if (IsConstEqual(x, comparand)) + { + operation.TurnIntoCopy(y); + } + else if (IsConstEqual(y, comparand)) + { + operation.TurnIntoCopy(x); + } + } + + private static void TryEliminateConditionalSelect(Operation operation) + { + Operand cond = operation.GetSource(0); + + if (cond.Kind != OperandKind.Constant) + { + return; + } + + // The condition is constant, we can turn it into a copy, and select + // the source based on the condition value. + int srcIndex = cond.Value != 0 ? 1 : 2; + + Operand source = operation.GetSource(srcIndex); + + operation.TurnIntoCopy(source); + } + + private static bool IsConstEqual(Operand operand, ulong comparand) + { + if (operand.Kind != OperandKind.Constant || !operand.Type.IsInteger()) + { + return false; + } + + return operand.Value == comparand; + } + + private static ulong AllOnes(OperandType type) + { + switch (type) + { + case OperandType.I32: return ~0U; + case OperandType.I64: return ~0UL; + } + + throw new ArgumentException("Invalid operand type \"" + type + "\"."); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/CodeGen/RegisterAllocators/AllocationResult.cs b/ARMeilleure/CodeGen/RegisterAllocators/AllocationResult.cs new file mode 100644 index 000000000..94ac6991b --- /dev/null +++ b/ARMeilleure/CodeGen/RegisterAllocators/AllocationResult.cs @@ -0,0 +1,19 @@ +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + struct AllocationResult + { + public int IntUsedRegisters { get; } + public int VecUsedRegisters { get; } + public int SpillRegionSize { get; } + + public AllocationResult( + int intUsedRegisters, + int vecUsedRegisters, + int spillRegionSize) + { + IntUsedRegisters = intUsedRegisters; + VecUsedRegisters = vecUsedRegisters; + SpillRegionSize = spillRegionSize; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs b/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs new file mode 100644 index 000000000..65901e80c --- /dev/null +++ b/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs @@ -0,0 +1,246 @@ +using ARMeilleure.IntermediateRepresentation; +using System; +using System.Collections.Generic; + +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + class CopyResolver + { + private class ParallelCopy + { + private struct Copy + { + public Register Dest { get; } + public Register Source { get; } + + public OperandType Type { get; } + + public Copy(Register dest, Register source, OperandType type) + { + Dest = dest; + Source = source; + Type = type; + } + } + + private List _copies; + + public int Count => _copies.Count; + + public ParallelCopy() + { + _copies = new List(); + } + + public void AddCopy(Register dest, Register source, OperandType type) + { + _copies.Add(new Copy(dest, source, type)); + } + + public void Sequence(List sequence) + { + Dictionary locations = new Dictionary(); + Dictionary sources = new Dictionary(); + + Dictionary types = new Dictionary(); + + Queue pendingQueue = new Queue(); + Queue readyQueue = new Queue(); + + foreach (Copy copy in _copies) + { + locations[copy.Source] = copy.Source; + sources[copy.Dest] = copy.Source; + types[copy.Dest] = copy.Type; + + pendingQueue.Enqueue(copy.Dest); + } + + foreach (Copy copy in _copies) + { + // If the destination is not used anywhere, we can assign it immediately. + if (!locations.ContainsKey(copy.Dest)) + { + readyQueue.Enqueue(copy.Dest); + } + } + + while (pendingQueue.TryDequeue(out Register current)) + { + Register copyDest; + Register origSource; + Register copySource; + + while (readyQueue.TryDequeue(out copyDest)) + { + origSource = sources[copyDest]; + copySource = locations[origSource]; + + OperandType type = types[copyDest]; + + EmitCopy(sequence, GetRegister(copyDest, type), GetRegister(copySource, type)); + + locations[origSource] = copyDest; + + if (origSource == copySource && sources.ContainsKey(origSource)) + { + readyQueue.Enqueue(origSource); + } + } + + copyDest = current; + origSource = sources[copyDest]; + copySource = locations[origSource]; + + if (copyDest != copySource) + { + OperandType type = types[copyDest]; + + type = type.IsInteger() ? OperandType.I64 : OperandType.V128; + + EmitXorSwap(sequence, GetRegister(copyDest, type), GetRegister(copySource, type)); + + locations[origSource] = copyDest; + + Register swapOther = copySource; + + if (copyDest != locations[sources[copySource]]) + { + // Find the other swap destination register. + // To do that, we search all the pending registers, and pick + // the one where the copy source register is equal to the + // current destination register being processed (copyDest). + foreach (Register pending in pendingQueue) + { + // Is this a copy of pending <- copyDest? + if (copyDest == locations[sources[pending]]) + { + swapOther = pending; + + break; + } + } + } + + // The value that was previously at "copyDest" now lives on + // "copySource" thanks to the swap, now we need to update the + // location for the next copy that is supposed to copy the value + // that used to live on "copyDest". + locations[sources[swapOther]] = copySource; + } + } + } + + private static void EmitCopy(List sequence, Operand x, Operand y) + { + sequence.Add(new Operation(Instruction.Copy, x, y)); + } + + private static void EmitXorSwap(List sequence, Operand x, Operand y) + { + sequence.Add(new Operation(Instruction.BitwiseExclusiveOr, x, x, y)); + sequence.Add(new Operation(Instruction.BitwiseExclusiveOr, y, y, x)); + sequence.Add(new Operation(Instruction.BitwiseExclusiveOr, x, x, y)); + } + } + + private Queue _fillQueue = new Queue(); + private Queue _spillQueue = new Queue(); + + private ParallelCopy _parallelCopy; + + public bool HasCopy { get; private set; } + + public CopyResolver() + { + _fillQueue = new Queue(); + _spillQueue = new Queue(); + + _parallelCopy = new ParallelCopy(); + } + + public void AddSplit(LiveInterval left, LiveInterval right) + { + if (left.Local != right.Local) + { + throw new ArgumentException("Intervals of different variables are not allowed."); + } + + OperandType type = left.Local.Type; + + if (left.IsSpilled && !right.IsSpilled) + { + // Move from the stack to a register. + AddSplitFill(left, right, type); + } + else if (!left.IsSpilled && right.IsSpilled) + { + // Move from a register to the stack. + AddSplitSpill(left, right, type); + } + else if (!left.IsSpilled && !right.IsSpilled && left.Register != right.Register) + { + // Move from one register to another. + AddSplitCopy(left, right, type); + } + else if (left.SpillOffset != right.SpillOffset) + { + // This would be the stack-to-stack move case, but this is not supported. + throw new ArgumentException("Both intervals were spilled."); + } + } + + private void AddSplitFill(LiveInterval left, LiveInterval right, OperandType type) + { + Operand register = GetRegister(right.Register, type); + + Operand offset = new Operand(left.SpillOffset); + + _fillQueue.Enqueue(new Operation(Instruction.Fill, register, offset)); + + HasCopy = true; + } + + private void AddSplitSpill(LiveInterval left, LiveInterval right, OperandType type) + { + Operand offset = new Operand(right.SpillOffset); + + Operand register = GetRegister(left.Register, type); + + _spillQueue.Enqueue(new Operation(Instruction.Spill, null, offset, register)); + + HasCopy = true; + } + + private void AddSplitCopy(LiveInterval left, LiveInterval right, OperandType type) + { + _parallelCopy.AddCopy(right.Register, left.Register, type); + + HasCopy = true; + } + + public Operation[] Sequence() + { + List sequence = new List(); + + while (_spillQueue.TryDequeue(out Operation spillOp)) + { + sequence.Add(spillOp); + } + + _parallelCopy.Sequence(sequence); + + while (_fillQueue.TryDequeue(out Operation fillOp)) + { + sequence.Add(fillOp); + } + + return sequence.ToArray(); + } + + private static Operand GetRegister(Register reg, OperandType type) + { + return new Operand(reg.Index, reg.Type, type); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs b/ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs new file mode 100644 index 000000000..9a827420b --- /dev/null +++ b/ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs @@ -0,0 +1,382 @@ +using ARMeilleure.Common; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System.Collections.Generic; +using System.Diagnostics; + +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + class HybridAllocator : IRegisterAllocator + { + private const int RegistersCount = 16; + private const int MaxIROperands = 4; + + private struct BlockInfo + { + public bool HasCall { get; } + + public int IntFixedRegisters { get; } + public int VecFixedRegisters { get; } + + public BlockInfo(bool hasCall, int intFixedRegisters, int vecFixedRegisters) + { + HasCall = hasCall; + IntFixedRegisters = intFixedRegisters; + VecFixedRegisters = vecFixedRegisters; + } + } + + private class LocalInfo + { + public int Uses { get; set; } + public int UseCount { get; set; } + + public bool PreAllocated { get; set; } + public int Register { get; set; } + public int SpillOffset { get; set; } + + public int Sequence { get; set; } + + public Operand Temp { get; set; } + + public OperandType Type { get; } + + private int _first; + private int _last; + + public bool IsBlockLocal => _first == _last; + + public LocalInfo(OperandType type, int uses) + { + Uses = uses; + Type = type; + + _first = -1; + _last = -1; + } + + public void SetBlockIndex(int blkIndex) + { + if (_first == -1 || blkIndex < _first) + { + _first = blkIndex; + } + + if (_last == -1 || blkIndex > _last) + { + _last = blkIndex; + } + } + } + + public AllocationResult RunPass( + ControlFlowGraph cfg, + StackAllocator stackAlloc, + RegisterMasks regMasks) + { + int intUsedRegisters = 0; + int vecUsedRegisters = 0; + + int intFreeRegisters = regMasks.IntAvailableRegisters; + int vecFreeRegisters = regMasks.VecAvailableRegisters; + + BlockInfo[] blockInfo = new BlockInfo[cfg.Blocks.Count]; + + List locInfo = new List(); + + for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--) + { + BasicBlock block = cfg.PostOrderBlocks[index]; + + int intFixedRegisters = 0; + int vecFixedRegisters = 0; + + bool hasCall = false; + + foreach (Node node in block.Operations) + { + if (node is Operation operation && operation.Instruction == Instruction.Call) + { + hasCall = true; + } + + for (int srcIndex = 0; srcIndex < node.SourcesCount; srcIndex++) + { + Operand source = node.GetSource(srcIndex); + + if (source.Kind == OperandKind.LocalVariable) + { + locInfo[source.AsInt32() - 1].SetBlockIndex(block.Index); + } + } + + for (int dstIndex = 0; dstIndex < node.DestinationsCount; dstIndex++) + { + Operand dest = node.GetDestination(dstIndex); + + if (dest.Kind == OperandKind.LocalVariable) + { + LocalInfo info; + + if (dest.Value != 0) + { + info = locInfo[dest.AsInt32() - 1]; + } + else + { + dest.NumberLocal(locInfo.Count + 1); + + info = new LocalInfo(dest.Type, UsesCount(dest)); + + locInfo.Add(info); + } + + info.SetBlockIndex(block.Index); + } + else if (dest.Kind == OperandKind.Register) + { + if (dest.Type.IsInteger()) + { + intFixedRegisters |= 1 << dest.GetRegister().Index; + } + else + { + vecFixedRegisters |= 1 << dest.GetRegister().Index; + } + } + } + } + + blockInfo[block.Index] = new BlockInfo(hasCall, intFixedRegisters, vecFixedRegisters); + } + + int sequence = 0; + + for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--) + { + BasicBlock block = cfg.PostOrderBlocks[index]; + + BlockInfo blkInfo = blockInfo[block.Index]; + + int intLocalFreeRegisters = intFreeRegisters & ~blkInfo.IntFixedRegisters; + int vecLocalFreeRegisters = vecFreeRegisters & ~blkInfo.VecFixedRegisters; + + int intCallerSavedRegisters = blkInfo.HasCall ? regMasks.IntCallerSavedRegisters : 0; + int vecCallerSavedRegisters = blkInfo.HasCall ? regMasks.VecCallerSavedRegisters : 0; + + int intSpillTempRegisters = SelectSpillTemps( + intCallerSavedRegisters & ~blkInfo.IntFixedRegisters, + intLocalFreeRegisters); + int vecSpillTempRegisters = SelectSpillTemps( + vecCallerSavedRegisters & ~blkInfo.VecFixedRegisters, + vecLocalFreeRegisters); + + intLocalFreeRegisters &= ~(intSpillTempRegisters | intCallerSavedRegisters); + vecLocalFreeRegisters &= ~(vecSpillTempRegisters | vecCallerSavedRegisters); + + for (LinkedListNode llNode = block.Operations.First; llNode != null; llNode = llNode.Next) + { + Node node = llNode.Value; + + int intLocalUse = 0; + int vecLocalUse = 0; + + for (int srcIndex = 0; srcIndex < node.SourcesCount; srcIndex++) + { + Operand source = node.GetSource(srcIndex); + + if (source.Kind != OperandKind.LocalVariable) + { + continue; + } + + LocalInfo info = locInfo[source.AsInt32() - 1]; + + info.UseCount++; + + Debug.Assert(info.UseCount <= info.Uses); + + if (info.Register != -1) + { + node.SetSource(srcIndex, Register(info.Register, source.Type.ToRegisterType(), source.Type)); + + if (info.UseCount == info.Uses && !info.PreAllocated) + { + if (source.Type.IsInteger()) + { + intLocalFreeRegisters |= 1 << info.Register; + } + else + { + vecLocalFreeRegisters |= 1 << info.Register; + } + } + } + else + { + Operand temp = info.Temp; + + if (temp == null || info.Sequence != sequence) + { + temp = source.Type.IsInteger() + ? GetSpillTemp(source, intSpillTempRegisters, ref intLocalUse) + : GetSpillTemp(source, vecSpillTempRegisters, ref vecLocalUse); + + info.Sequence = sequence; + info.Temp = temp; + } + + node.SetSource(srcIndex, temp); + + Operation fillOp = new Operation(Instruction.Fill, temp, Const(info.SpillOffset)); + + block.Operations.AddBefore(llNode, fillOp); + } + } + + int intLocalAsg = 0; + int vecLocalAsg = 0; + + for (int dstIndex = 0; dstIndex < node.DestinationsCount; dstIndex++) + { + Operand dest = node.GetDestination(dstIndex); + + if (dest.Kind != OperandKind.LocalVariable) + { + continue; + } + + LocalInfo info = locInfo[dest.AsInt32() - 1]; + + if (info.UseCount == 0 && !info.PreAllocated) + { + int mask = dest.Type.IsInteger() + ? intLocalFreeRegisters + : vecLocalFreeRegisters; + + if (info.IsBlockLocal && mask != 0) + { + int selectedReg = BitUtils.LowestBitSet(mask); + + info.Register = selectedReg; + + if (dest.Type.IsInteger()) + { + intLocalFreeRegisters &= ~(1 << selectedReg); + intUsedRegisters |= 1 << selectedReg; + } + else + { + vecLocalFreeRegisters &= ~(1 << selectedReg); + vecUsedRegisters |= 1 << selectedReg; + } + } + else + { + info.Register = -1; + info.SpillOffset = stackAlloc.Allocate(dest.Type.GetSizeInBytes()); + } + } + + info.UseCount++; + + Debug.Assert(info.UseCount <= info.Uses); + + if (info.Register != -1) + { + node.SetDestination(dstIndex, Register(info.Register, dest.Type.ToRegisterType(), dest.Type)); + } + else + { + Operand temp = info.Temp; + + if (temp == null || info.Sequence != sequence) + { + temp = dest.Type.IsInteger() + ? GetSpillTemp(dest, intSpillTempRegisters, ref intLocalAsg) + : GetSpillTemp(dest, vecSpillTempRegisters, ref vecLocalAsg); + + info.Sequence = sequence; + info.Temp = temp; + } + + node.SetDestination(dstIndex, temp); + + Operation spillOp = new Operation(Instruction.Spill, null, Const(info.SpillOffset), temp); + + llNode = block.Operations.AddAfter(llNode, spillOp); + } + } + + sequence++; + + intUsedRegisters |= intLocalAsg | intLocalUse; + vecUsedRegisters |= vecLocalAsg | vecLocalUse; + } + } + + return new AllocationResult(intUsedRegisters, vecUsedRegisters, stackAlloc.TotalSize); + } + + private static int SelectSpillTemps(int mask0, int mask1) + { + int selection = 0; + int count = 0; + + while (count < MaxIROperands && mask0 != 0) + { + int mask = mask0 & -mask0; + + selection |= mask; + + mask0 &= ~mask; + + count++; + } + + while (count < MaxIROperands && mask1 != 0) + { + int mask = mask1 & -mask1; + + selection |= mask; + + mask1 &= ~mask; + + count++; + } + + Debug.Assert(count == MaxIROperands, "No enough registers for spill temps."); + + return selection; + } + + private static Operand GetSpillTemp(Operand local, int freeMask, ref int useMask) + { + int selectedReg = BitUtils.LowestBitSet(freeMask & ~useMask); + + useMask |= 1 << selectedReg; + + return Register(selectedReg, local.Type.ToRegisterType(), local.Type); + } + + private static int UsesCount(Operand local) + { + return local.Assignments.Count + local.Uses.Count; + } + + private static IEnumerable Successors(BasicBlock block) + { + if (block.Next != null) + { + yield return block.Next; + } + + if (block.Branch != null) + { + yield return block.Branch; + } + } + } +} \ No newline at end of file diff --git a/ARMeilleure/CodeGen/RegisterAllocators/IRegisterAllocator.cs b/ARMeilleure/CodeGen/RegisterAllocators/IRegisterAllocator.cs new file mode 100644 index 000000000..8f236c253 --- /dev/null +++ b/ARMeilleure/CodeGen/RegisterAllocators/IRegisterAllocator.cs @@ -0,0 +1,12 @@ +using ARMeilleure.Translation; + +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + interface IRegisterAllocator + { + AllocationResult RunPass( + ControlFlowGraph cfg, + StackAllocator stackAlloc, + RegisterMasks regMasks); + } +} \ No newline at end of file diff --git a/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs b/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs new file mode 100644 index 000000000..6d5ecc141 --- /dev/null +++ b/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs @@ -0,0 +1,1019 @@ +using ARMeilleure.Common; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; + +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + // Based on: + // "Linear Scan Register Allocation for the Java(tm) HotSpot Client Compiler". + // http://www.christianwimmer.at/Publications/Wimmer04a/Wimmer04a.pdf + class LinearScanAllocator : IRegisterAllocator + { + private const int InstructionGap = 2; + private const int InstructionGapMask = InstructionGap - 1; + + private const int RegistersCount = 16; + + private HashSet _blockEdges; + + private LiveRange[] _blockRanges; + + private BitMap[] _blockLiveIn; + + private List _intervals; + + private LiveInterval[] _parentIntervals; + + private List> _operationNodes; + + private int _operationsCount; + + private class AllocationContext + { + public RegisterMasks Masks { get; } + + public StackAllocator StackAlloc { get; } + + public BitMap Active { get; } + public BitMap Inactive { get; } + + public int IntUsedRegisters { get; set; } + public int VecUsedRegisters { get; set; } + + public AllocationContext(StackAllocator stackAlloc, RegisterMasks masks, int intervalsCount) + { + StackAlloc = stackAlloc; + Masks = masks; + + Active = new BitMap(intervalsCount); + Inactive = new BitMap(intervalsCount); + } + + public void MoveActiveToInactive(int bit) + { + Move(Active, Inactive, bit); + } + + public void MoveInactiveToActive(int bit) + { + Move(Inactive, Active, bit); + } + + private static void Move(BitMap source, BitMap dest, int bit) + { + source.Clear(bit); + + dest.Set(bit); + } + } + + public AllocationResult RunPass( + ControlFlowGraph cfg, + StackAllocator stackAlloc, + RegisterMasks regMasks) + { + NumberLocals(cfg); + + AllocationContext context = new AllocationContext(stackAlloc, regMasks, _intervals.Count); + + BuildIntervals(cfg, context); + + for (int index = 0; index < _intervals.Count; index++) + { + LiveInterval current = _intervals[index]; + + if (current.IsEmpty) + { + continue; + } + + if (current.IsFixed) + { + context.Active.Set(index); + + if (current.Register.Type == RegisterType.Integer) + { + context.IntUsedRegisters |= 1 << current.Register.Index; + } + else /* if (interval.Register.Type == RegisterType.Vector) */ + { + context.VecUsedRegisters |= 1 << current.Register.Index; + } + + continue; + } + + AllocateInterval(context, current, index); + } + + for (int index = RegistersCount * 2; index < _intervals.Count; index++) + { + if (!_intervals[index].IsSpilled) + { + ReplaceLocalWithRegister(_intervals[index]); + } + } + + InsertSplitCopies(); + InsertSplitCopiesAtEdges(cfg); + + return new AllocationResult( + context.IntUsedRegisters, + context.VecUsedRegisters, + context.StackAlloc.TotalSize); + } + + private void AllocateInterval(AllocationContext context, LiveInterval current, int cIndex) + { + // Check active intervals that already ended. + foreach (int iIndex in context.Active) + { + LiveInterval interval = _intervals[iIndex]; + + if (interval.GetEnd() < current.GetStart()) + { + context.Active.Clear(iIndex); + } + else if (!interval.Overlaps(current.GetStart())) + { + context.MoveActiveToInactive(iIndex); + } + } + + // Check inactive intervals that already ended or were reactivated. + foreach (int iIndex in context.Inactive) + { + LiveInterval interval = _intervals[iIndex]; + + if (interval.GetEnd() < current.GetStart()) + { + context.Inactive.Clear(iIndex); + } + else if (interval.Overlaps(current.GetStart())) + { + context.MoveInactiveToActive(iIndex); + } + } + + if (!TryAllocateRegWithoutSpill(context, current, cIndex)) + { + AllocateRegWithSpill(context, current, cIndex); + } + } + + private bool TryAllocateRegWithoutSpill(AllocationContext context, LiveInterval current, int cIndex) + { + RegisterType regType = current.Local.Type.ToRegisterType(); + + int availableRegisters = context.Masks.GetAvailableRegisters(regType); + + int[] freePositions = new int[RegistersCount]; + + for (int index = 0; index < RegistersCount; index++) + { + if ((availableRegisters & (1 << index)) != 0) + { + freePositions[index] = int.MaxValue; + } + } + + foreach (int iIndex in context.Active) + { + LiveInterval interval = _intervals[iIndex]; + + if (interval.Register.Type == regType) + { + freePositions[interval.Register.Index] = 0; + } + } + + foreach (int iIndex in context.Inactive) + { + LiveInterval interval = _intervals[iIndex]; + + if (interval.Register.Type == regType) + { + int overlapPosition = interval.GetOverlapPosition(current); + + if (overlapPosition != LiveInterval.NotFound && freePositions[interval.Register.Index] > overlapPosition) + { + freePositions[interval.Register.Index] = overlapPosition; + } + } + } + + int selectedReg = GetHighestValueIndex(freePositions); + + int selectedNextUse = freePositions[selectedReg]; + + // Intervals starts and ends at odd positions, unless they span an entire + // block, in this case they will have ranges at a even position. + // When a interval is loaded from the stack to a register, we can only + // do the split at a odd position, because otherwise the split interval + // that is inserted on the list to be processed may clobber a register + // used by the instruction at the same position as the split. + // The problem only happens when a interval ends exactly at this instruction, + // because otherwise they would interfere, and the register wouldn't be selected. + // When the interval is aligned and the above happens, there's no problem as + // the instruction that is actually with the last use is the one + // before that position. + selectedNextUse &= ~InstructionGapMask; + + if (selectedNextUse <= current.GetStart()) + { + return false; + } + else if (selectedNextUse < current.GetEnd()) + { + Debug.Assert(selectedNextUse > current.GetStart(), "Trying to split interval at the start."); + + LiveInterval splitChild = current.Split(selectedNextUse); + + if (splitChild.UsesCount != 0) + { + Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position."); + + InsertInterval(splitChild); + } + else + { + Spill(context, splitChild); + } + } + + current.Register = new Register(selectedReg, regType); + + if (regType == RegisterType.Integer) + { + context.IntUsedRegisters |= 1 << selectedReg; + } + else /* if (regType == RegisterType.Vector) */ + { + context.VecUsedRegisters |= 1 << selectedReg; + } + + context.Active.Set(cIndex); + + return true; + } + + private void AllocateRegWithSpill(AllocationContext context, LiveInterval current, int cIndex) + { + RegisterType regType = current.Local.Type.ToRegisterType(); + + int availableRegisters = context.Masks.GetAvailableRegisters(regType); + + int[] usePositions = new int[RegistersCount]; + int[] blockedPositions = new int[RegistersCount]; + + for (int index = 0; index < RegistersCount; index++) + { + if ((availableRegisters & (1 << index)) != 0) + { + usePositions[index] = int.MaxValue; + + blockedPositions[index] = int.MaxValue; + } + } + + void SetUsePosition(int index, int position) + { + usePositions[index] = Math.Min(usePositions[index], position); + } + + void SetBlockedPosition(int index, int position) + { + blockedPositions[index] = Math.Min(blockedPositions[index], position); + + SetUsePosition(index, position); + } + + foreach (int iIndex in context.Active) + { + LiveInterval interval = _intervals[iIndex]; + + if (!interval.IsFixed && interval.Register.Type == regType) + { + int nextUse = interval.NextUseAfter(current.GetStart()); + + if (nextUse != -1) + { + SetUsePosition(interval.Register.Index, nextUse); + } + } + } + + foreach (int iIndex in context.Inactive) + { + LiveInterval interval = _intervals[iIndex]; + + if (!interval.IsFixed && interval.Register.Type == regType && interval.Overlaps(current)) + { + int nextUse = interval.NextUseAfter(current.GetStart()); + + if (nextUse != -1) + { + SetUsePosition(interval.Register.Index, nextUse); + } + } + } + + foreach (int iIndex in context.Active) + { + LiveInterval interval = _intervals[iIndex]; + + if (interval.IsFixed && interval.Register.Type == regType) + { + SetBlockedPosition(interval.Register.Index, 0); + } + } + + foreach (int iIndex in context.Inactive) + { + LiveInterval interval = _intervals[iIndex]; + + if (interval.IsFixed && interval.Register.Type == regType) + { + int overlapPosition = interval.GetOverlapPosition(current); + + if (overlapPosition != LiveInterval.NotFound) + { + SetBlockedPosition(interval.Register.Index, overlapPosition); + } + } + } + + int selectedReg = GetHighestValueIndex(usePositions); + + int currentFirstUse = current.FirstUse(); + + Debug.Assert(currentFirstUse >= 0, "Current interval has no uses."); + + if (usePositions[selectedReg] < currentFirstUse) + { + // All intervals on inactive and active are being used before current, + // so spill the current interval. + Debug.Assert(currentFirstUse > current.GetStart(), "Trying to spill a interval currently being used."); + + LiveInterval splitChild = current.Split(currentFirstUse); + + Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position."); + + InsertInterval(splitChild); + + Spill(context, current); + } + else if (blockedPositions[selectedReg] > current.GetEnd()) + { + // Spill made the register available for the entire current lifetime, + // so we only need to split the intervals using the selected register. + current.Register = new Register(selectedReg, regType); + + SplitAndSpillOverlappingIntervals(context, current); + + context.Active.Set(cIndex); + } + else + { + // There are conflicts even after spill due to the use of fixed registers + // that can't be spilled, so we need to also split current at the point of + // the first fixed register use. + current.Register = new Register(selectedReg, regType); + + int splitPosition = blockedPositions[selectedReg] & ~InstructionGapMask; + + Debug.Assert(splitPosition > current.GetStart(), "Trying to split a interval at a invalid position."); + + LiveInterval splitChild = current.Split(splitPosition); + + if (splitChild.UsesCount != 0) + { + Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position."); + + InsertInterval(splitChild); + } + else + { + Spill(context, splitChild); + } + + SplitAndSpillOverlappingIntervals(context, current); + + context.Active.Set(cIndex); + } + } + + private static int GetHighestValueIndex(int[] array) + { + int higuest = array[0]; + + if (higuest == int.MaxValue) + { + return 0; + } + + int selected = 0; + + for (int index = 1; index < array.Length; index++) + { + int current = array[index]; + + if (higuest < current) + { + higuest = current; + selected = index; + + if (current == int.MaxValue) + { + break; + } + } + } + + return selected; + } + + private void SplitAndSpillOverlappingIntervals(AllocationContext context, LiveInterval current) + { + foreach (int iIndex in context.Active) + { + LiveInterval interval = _intervals[iIndex]; + + if (!interval.IsFixed && interval.Register == current.Register) + { + SplitAndSpillOverlappingInterval(context, current, interval); + + context.Active.Clear(iIndex); + } + } + + foreach (int iIndex in context.Inactive) + { + LiveInterval interval = _intervals[iIndex]; + + if (!interval.IsFixed && interval.Register == current.Register && interval.Overlaps(current)) + { + SplitAndSpillOverlappingInterval(context, current, interval); + + context.Inactive.Clear(iIndex); + } + } + } + + private void SplitAndSpillOverlappingInterval( + AllocationContext context, + LiveInterval current, + LiveInterval interval) + { + // If there's a next use after the start of the current interval, + // we need to split the spilled interval twice, and re-insert it + // on the "pending" list to ensure that it will get a new register + // on that use position. + int nextUse = interval.NextUseAfter(current.GetStart()); + + LiveInterval splitChild; + + if (interval.GetStart() < current.GetStart()) + { + splitChild = interval.Split(current.GetStart()); + } + else + { + splitChild = interval; + } + + if (nextUse != -1) + { + Debug.Assert(nextUse > current.GetStart(), "Trying to spill a interval currently being used."); + + if (nextUse > splitChild.GetStart()) + { + LiveInterval right = splitChild.Split(nextUse); + + Spill(context, splitChild); + + splitChild = right; + } + + InsertInterval(splitChild); + } + else + { + Spill(context, splitChild); + } + } + + private void InsertInterval(LiveInterval interval) + { + Debug.Assert(interval.UsesCount != 0, "Trying to insert a interval without uses."); + Debug.Assert(!interval.IsEmpty, "Trying to insert a empty interval."); + Debug.Assert(!interval.IsSpilled, "Trying to insert a spilled interval."); + + int startIndex = RegistersCount * 2; + + int insertIndex = _intervals.BinarySearch(startIndex, _intervals.Count - startIndex, interval, null); + + if (insertIndex < 0) + { + insertIndex = ~insertIndex; + } + + _intervals.Insert(insertIndex, interval); + } + + private void Spill(AllocationContext context, LiveInterval interval) + { + Debug.Assert(!interval.IsFixed, "Trying to spill a fixed interval."); + Debug.Assert(interval.UsesCount == 0, "Trying to spill a interval with uses."); + + // We first check if any of the siblings were spilled, if so we can reuse + // the stack offset. Otherwise, we allocate a new space on the stack. + // This prevents stack-to-stack copies being necessary for a split interval. + if (!interval.TrySpillWithSiblingOffset()) + { + interval.Spill(context.StackAlloc.Allocate(interval.Local.Type)); + } + } + + private void InsertSplitCopies() + { + Dictionary copyResolvers = new Dictionary(); + + CopyResolver GetCopyResolver(int position) + { + CopyResolver copyResolver = new CopyResolver(); + + if (copyResolvers.TryAdd(position, copyResolver)) + { + return copyResolver; + } + + return copyResolvers[position]; + } + + foreach (LiveInterval interval in _intervals.Where(x => x.IsSplit)) + { + LiveInterval previous = interval; + + foreach (LiveInterval splitChild in interval.SplitChilds()) + { + int splitPosition = splitChild.GetStart(); + + if (!_blockEdges.Contains(splitPosition) && previous.GetEnd() == splitPosition) + { + GetCopyResolver(splitPosition).AddSplit(previous, splitChild); + } + + previous = splitChild; + } + } + + foreach (KeyValuePair kv in copyResolvers) + { + CopyResolver copyResolver = kv.Value; + + if (!copyResolver.HasCopy) + { + continue; + } + + int splitPosition = kv.Key; + + LinkedListNode node = GetOperationNode(splitPosition); + + Operation[] sequence = copyResolver.Sequence(); + + node = node.List.AddBefore(node, sequence[0]); + + for (int index = 1; index < sequence.Length; index++) + { + node = node.List.AddAfter(node, sequence[index]); + } + } + } + + private void InsertSplitCopiesAtEdges(ControlFlowGraph cfg) + { + int blocksCount = cfg.Blocks.Count; + + bool IsSplitEdgeBlock(BasicBlock block) + { + return block.Index >= blocksCount; + } + + for (LinkedListNode node = cfg.Blocks.First; node != null; node = node.Next) + { + BasicBlock block = node.Value; + + if (IsSplitEdgeBlock(block)) + { + continue; + } + + bool hasSingleOrNoSuccessor = block.Next == null || block.Branch == null; + + foreach (BasicBlock successor in Successors(block)) + { + int succIndex = successor.Index; + + // If the current node is a split node, then the actual successor node + // (the successor before the split) should be right after it. + if (IsSplitEdgeBlock(successor)) + { + succIndex = Successors(successor).First().Index; + } + + CopyResolver copyResolver = new CopyResolver(); + + foreach (int iIndex in _blockLiveIn[succIndex]) + { + LiveInterval interval = _parentIntervals[iIndex]; + + if (!interval.IsSplit) + { + continue; + } + + int lEnd = _blockRanges[block.Index].End - 1; + int rStart = _blockRanges[succIndex].Start; + + LiveInterval left = interval.GetSplitChild(lEnd); + LiveInterval right = interval.GetSplitChild(rStart); + + if (left != null && right != null && left != right) + { + copyResolver.AddSplit(left, right); + } + } + + if (!copyResolver.HasCopy) + { + continue; + } + + Operation[] sequence = copyResolver.Sequence(); + + if (hasSingleOrNoSuccessor) + { + foreach (Operation operation in sequence) + { + block.Append(operation); + } + } + else if (successor.Predecessors.Count == 1) + { + LinkedListNode prependNode = successor.Operations.AddFirst(sequence[0]); + + for (int index = 1; index < sequence.Length; index++) + { + Operation operation = sequence[index]; + + prependNode = successor.Operations.AddAfter(prependNode, operation); + } + } + else + { + // Split the critical edge. + BasicBlock splitBlock = cfg.SplitEdge(block, successor); + + foreach (Operation operation in sequence) + { + splitBlock.Append(operation); + } + } + } + } + } + + private void ReplaceLocalWithRegister(LiveInterval current) + { + Operand register = GetRegister(current); + + foreach (int usePosition in current.UsePositions()) + { + Node operation = GetOperationNode(usePosition).Value; + + for (int index = 0; index < operation.SourcesCount; index++) + { + Operand source = operation.GetSource(index); + + if (source == current.Local) + { + operation.SetSource(index, register); + } + } + + for (int index = 0; index < operation.DestinationsCount; index++) + { + Operand dest = operation.GetDestination(index); + + if (dest == current.Local) + { + operation.SetDestination(index, register); + } + } + } + } + + private static Operand GetRegister(LiveInterval interval) + { + Debug.Assert(!interval.IsSpilled, "Spilled intervals are not allowed."); + + return new Operand( + interval.Register.Index, + interval.Register.Type, + interval.Local.Type); + } + + private LinkedListNode GetOperationNode(int position) + { + return _operationNodes[position / InstructionGap]; + } + + private void NumberLocals(ControlFlowGraph cfg) + { + _operationNodes = new List>(); + + _intervals = new List(); + + for (int index = 0; index < RegistersCount; index++) + { + _intervals.Add(new LiveInterval(new Register(index, RegisterType.Integer))); + _intervals.Add(new LiveInterval(new Register(index, RegisterType.Vector))); + } + + HashSet visited = new HashSet(); + + _operationsCount = 0; + + for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--) + { + BasicBlock block = cfg.PostOrderBlocks[index]; + + for (LinkedListNode node = block.Operations.First; node != null; node = node.Next) + { + _operationNodes.Add(node); + + Node operation = node.Value; + + foreach (Operand dest in Destinations(operation)) + { + if (dest.Kind == OperandKind.LocalVariable && visited.Add(dest)) + { + dest.NumberLocal(_intervals.Count); + + _intervals.Add(new LiveInterval(dest)); + } + } + } + + _operationsCount += block.Operations.Count * InstructionGap; + + if (block.Operations.Count == 0) + { + // Pretend we have a dummy instruction on the empty block. + _operationNodes.Add(null); + + _operationsCount += InstructionGap; + } + } + + _parentIntervals = _intervals.ToArray(); + } + + private void BuildIntervals(ControlFlowGraph cfg, AllocationContext context) + { + _blockRanges = new LiveRange[cfg.Blocks.Count]; + + int mapSize = _intervals.Count; + + BitMap[] blkLiveGen = new BitMap[cfg.Blocks.Count]; + BitMap[] blkLiveKill = new BitMap[cfg.Blocks.Count]; + + // Compute local live sets. + foreach (BasicBlock block in cfg.Blocks) + { + BitMap liveGen = new BitMap(mapSize); + BitMap liveKill = new BitMap(mapSize); + + foreach (Node node in block.Operations) + { + foreach (Operand source in Sources(node)) + { + int id = GetOperandId(source); + + if (!liveKill.IsSet(id)) + { + liveGen.Set(id); + } + } + + foreach (Operand dest in Destinations(node)) + { + liveKill.Set(GetOperandId(dest)); + } + } + + blkLiveGen [block.Index] = liveGen; + blkLiveKill[block.Index] = liveKill; + } + + // Compute global live sets. + BitMap[] blkLiveIn = new BitMap[cfg.Blocks.Count]; + BitMap[] blkLiveOut = new BitMap[cfg.Blocks.Count]; + + for (int index = 0; index < cfg.Blocks.Count; index++) + { + blkLiveIn [index] = new BitMap(mapSize); + blkLiveOut[index] = new BitMap(mapSize); + } + + bool modified; + + do + { + modified = false; + + for (int index = 0; index < cfg.PostOrderBlocks.Length; index++) + { + BasicBlock block = cfg.PostOrderBlocks[index]; + + BitMap liveOut = blkLiveOut[block.Index]; + + foreach (BasicBlock successor in Successors(block)) + { + if (liveOut.Set(blkLiveIn[successor.Index])) + { + modified = true; + } + } + + BitMap liveIn = blkLiveIn[block.Index]; + + liveIn.Set (liveOut); + liveIn.Clear(blkLiveKill[block.Index]); + liveIn.Set (blkLiveGen [block.Index]); + } + } + while (modified); + + _blockLiveIn = blkLiveIn; + + _blockEdges = new HashSet(); + + // Compute lifetime intervals. + int operationPos = _operationsCount; + + for (int index = 0; index < cfg.PostOrderBlocks.Length; index++) + { + BasicBlock block = cfg.PostOrderBlocks[index]; + + // We handle empty blocks by pretending they have a dummy instruction, + // because otherwise the block would have the same start and end position, + // and this is not valid. + int instCount = Math.Max(block.Operations.Count, 1); + + int blockStart = operationPos - instCount * InstructionGap; + int blockEnd = operationPos; + + _blockRanges[block.Index] = new LiveRange(blockStart, blockEnd); + + _blockEdges.Add(blockStart); + + BitMap liveOut = blkLiveOut[block.Index]; + + foreach (int id in liveOut) + { + _intervals[id].AddRange(blockStart, blockEnd); + } + + if (block.Operations.Count == 0) + { + operationPos -= InstructionGap; + + continue; + } + + foreach (Node node in BottomOperations(block)) + { + operationPos -= InstructionGap; + + foreach (Operand dest in Destinations(node)) + { + LiveInterval interval = _intervals[GetOperandId(dest)]; + + interval.SetStart(operationPos + 1); + interval.AddUsePosition(operationPos + 1); + } + + foreach (Operand source in Sources(node)) + { + LiveInterval interval = _intervals[GetOperandId(source)]; + + interval.AddRange(blockStart, operationPos + 1); + interval.AddUsePosition(operationPos); + } + + if (node is Operation operation && operation.Instruction == Instruction.Call) + { + AddIntervalCallerSavedReg(context.Masks.IntCallerSavedRegisters, operationPos, RegisterType.Integer); + AddIntervalCallerSavedReg(context.Masks.VecCallerSavedRegisters, operationPos, RegisterType.Vector); + } + } + } + } + + private void AddIntervalCallerSavedReg(int mask, int operationPos, RegisterType regType) + { + while (mask != 0) + { + int regIndex = BitUtils.LowestBitSet(mask); + + Register callerSavedReg = new Register(regIndex, regType); + + LiveInterval interval = _intervals[GetRegisterId(callerSavedReg)]; + + interval.AddRange(operationPos + 1, operationPos + InstructionGap); + + mask &= ~(1 << regIndex); + } + } + + private static int GetOperandId(Operand operand) + { + if (operand.Kind == OperandKind.LocalVariable) + { + return operand.AsInt32(); + } + else if (operand.Kind == OperandKind.Register) + { + return GetRegisterId(operand.GetRegister()); + } + else + { + throw new ArgumentException($"Invalid operand kind \"{operand.Kind}\"."); + } + } + + private static int GetRegisterId(Register register) + { + return (register.Index << 1) | (register.Type == RegisterType.Vector ? 1 : 0); + } + + private static IEnumerable Successors(BasicBlock block) + { + if (block.Next != null) + { + yield return block.Next; + } + + if (block.Branch != null) + { + yield return block.Branch; + } + } + + private static IEnumerable BottomOperations(BasicBlock block) + { + LinkedListNode node = block.Operations.Last; + + while (node != null && !(node.Value is PhiNode)) + { + yield return node.Value; + + node = node.Previous; + } + } + + private static IEnumerable Destinations(Node node) + { + for (int index = 0; index < node.DestinationsCount; index++) + { + yield return node.GetDestination(index); + } + } + + private static IEnumerable Sources(Node node) + { + for (int index = 0; index < node.SourcesCount; index++) + { + Operand source = node.GetSource(index); + + if (IsLocalOrRegister(source.Kind)) + { + yield return source; + } + } + } + + private static bool IsLocalOrRegister(OperandKind kind) + { + return kind == OperandKind.LocalVariable || + kind == OperandKind.Register; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs b/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs new file mode 100644 index 000000000..18858a768 --- /dev/null +++ b/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs @@ -0,0 +1,390 @@ +using ARMeilleure.IntermediateRepresentation; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; + +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + class LiveInterval : IComparable + { + public const int NotFound = -1; + + private LiveInterval _parent; + + private SortedSet _usePositions; + + public int UsesCount => _usePositions.Count; + + private List _ranges; + + private SortedList _childs; + + public bool IsSplit => _childs.Count != 0; + + public Operand Local { get; } + + public Register Register { get; set; } + + public int SpillOffset { get; private set; } + + public bool IsSpilled => SpillOffset != -1; + public bool IsFixed { get; } + + public bool IsEmpty => _ranges.Count == 0; + + public LiveInterval(Operand local = null, LiveInterval parent = null) + { + Local = local; + _parent = parent ?? this; + + _usePositions = new SortedSet(); + + _ranges = new List(); + + _childs = new SortedList(); + + SpillOffset = -1; + } + + public LiveInterval(Register register) : this() + { + IsFixed = true; + Register = register; + } + + public void SetStart(int position) + { + if (_ranges.Count != 0) + { + Debug.Assert(position != _ranges[0].End); + + _ranges[0] = new LiveRange(position, _ranges[0].End); + } + else + { + _ranges.Add(new LiveRange(position, position + 1)); + } + } + + public int GetStart() + { + if (_ranges.Count == 0) + { + throw new InvalidOperationException("Empty interval."); + } + + return _ranges[0].Start; + } + + public void SetEnd(int position) + { + if (_ranges.Count != 0) + { + int lastIdx = _ranges.Count - 1; + + Debug.Assert(position != _ranges[lastIdx].Start); + + _ranges[lastIdx] = new LiveRange(_ranges[lastIdx].Start, position); + } + else + { + _ranges.Add(new LiveRange(position, position + 1)); + } + } + + public int GetEnd() + { + if (_ranges.Count == 0) + { + throw new InvalidOperationException("Empty interval."); + } + + return _ranges[_ranges.Count - 1].End; + } + + public void AddRange(int start, int end) + { + if (start >= end) + { + throw new ArgumentException("Invalid range start position " + start + ", " + end); + } + + int index = _ranges.BinarySearch(new LiveRange(start, end)); + + if (index >= 0) + { + // New range insersects with an existing range, we need to remove + // all the intersecting ranges before adding the new one. + // We also extend the new range as needed, based on the values of + // the existing ranges being removed. + int lIndex = index; + int rIndex = index; + + while (lIndex > 0 && _ranges[lIndex - 1].End >= start) + { + lIndex--; + } + + while (rIndex + 1 < _ranges.Count && _ranges[rIndex + 1].Start <= end) + { + rIndex++; + } + + if (start > _ranges[lIndex].Start) + { + start = _ranges[lIndex].Start; + } + + if (end < _ranges[rIndex].End) + { + end = _ranges[rIndex].End; + } + + _ranges.RemoveRange(lIndex, (rIndex - lIndex) + 1); + + InsertRange(lIndex, start, end); + } + else + { + InsertRange(~index, start, end); + } + } + + private void InsertRange(int index, int start, int end) + { + // Here we insert a new range on the ranges list. + // If possible, we extend an existing range rather than inserting a new one. + // We can extend an existing range if any of the following conditions are true: + // - The new range starts right after the end of the previous range on the list. + // - The new range ends right before the start of the next range on the list. + // If both cases are true, we can extend either one. We prefer to extend the + // previous range, and then remove the next one, but theres no specific reason + // for that, extending either one will do. + int? extIndex = null; + + if (index > 0 && _ranges[index - 1].End == start) + { + start = _ranges[index - 1].Start; + + extIndex = index - 1; + } + + if (index < _ranges.Count && _ranges[index].Start == end) + { + end = _ranges[index].End; + + if (extIndex.HasValue) + { + _ranges.RemoveAt(index); + } + else + { + extIndex = index; + } + } + + if (extIndex.HasValue) + { + _ranges[extIndex.Value] = new LiveRange(start, end); + } + else + { + _ranges.Insert(index, new LiveRange(start, end)); + } + } + + public void AddUsePosition(int position) + { + _usePositions.Add(position); + } + + public bool Overlaps(int position) + { + return _ranges.BinarySearch(new LiveRange(position, position + 1)) >= 0; + } + + public bool Overlaps(LiveInterval other) + { + foreach (LiveRange range in other._ranges) + { + if (_ranges.BinarySearch(range) >= 0) + { + return true; + } + } + + return false; + } + + public int GetOverlapPosition(LiveInterval other) + { + foreach (LiveRange range in other._ranges) + { + int overlapIndex = _ranges.BinarySearch(range); + + if (overlapIndex >= 0) + { + // It's possible that we have multiple overlaps within a single interval, + // in this case, we pick the one with the lowest start position, since + // we return the first overlap position. + while (overlapIndex > 0 && _ranges[overlapIndex - 1].End > range.Start) + { + overlapIndex--; + } + + LiveRange overlappingRange = _ranges[overlapIndex]; + + return overlappingRange.Start; + } + } + + return NotFound; + } + + public IEnumerable SplitChilds() + { + return _childs.Values; + } + + public IEnumerable UsePositions() + { + return _usePositions; + } + + public int FirstUse() + { + if (_usePositions.Count == 0) + { + return NotFound; + } + + return _usePositions.First(); + } + + public int NextUseAfter(int position) + { + foreach (int usePosition in _usePositions) + { + if (usePosition >= position) + { + return usePosition; + } + } + + return NotFound; + } + + public LiveInterval Split(int position) + { + LiveInterval right = new LiveInterval(Local, _parent); + + int splitIndex = 0; + + for (; splitIndex < _ranges.Count; splitIndex++) + { + LiveRange range = _ranges[splitIndex]; + + if (position > range.Start && position <= range.End) + { + right._ranges.Add(new LiveRange(position, range.End)); + + range = new LiveRange(range.Start, position); + + _ranges[splitIndex++] = range; + + break; + } + + if (range.Start >= position) + { + break; + } + } + + if (splitIndex < _ranges.Count) + { + int count = _ranges.Count - splitIndex; + + right._ranges.AddRange(_ranges.GetRange(splitIndex, count)); + + _ranges.RemoveRange(splitIndex, count); + } + + foreach (int usePosition in _usePositions.Where(x => x >= position)) + { + right._usePositions.Add(usePosition); + } + + _usePositions.RemoveWhere(x => x >= position); + + Debug.Assert(_ranges.Count != 0, "Left interval is empty after split."); + + Debug.Assert(right._ranges.Count != 0, "Right interval is empty after split."); + + AddSplitChild(right); + + return right; + } + + private void AddSplitChild(LiveInterval child) + { + Debug.Assert(!child.IsEmpty, "Trying to insert a empty interval."); + + _parent._childs.Add(child.GetStart(), child); + } + + public LiveInterval GetSplitChild(int position) + { + if (Overlaps(position)) + { + return this; + } + + foreach (LiveInterval splitChild in _childs.Values) + { + if (splitChild.Overlaps(position)) + { + return splitChild; + } + } + + return null; + } + + public bool TrySpillWithSiblingOffset() + { + foreach (LiveInterval splitChild in _parent._childs.Values) + { + if (splitChild.IsSpilled) + { + Spill(splitChild.SpillOffset); + + return true; + } + } + + return false; + } + + public void Spill(int offset) + { + SpillOffset = offset; + } + + public int CompareTo(LiveInterval other) + { + if (_ranges.Count == 0 || other._ranges.Count == 0) + { + return _ranges.Count.CompareTo(other._ranges.Count); + } + + return _ranges[0].Start.CompareTo(other._ranges[0].Start); + } + + public override string ToString() + { + return string.Join("; ", _ranges); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs b/ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs new file mode 100644 index 000000000..b5faeffd5 --- /dev/null +++ b/ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs @@ -0,0 +1,31 @@ +using System; + +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + struct LiveRange : IComparable + { + public int Start { get; } + public int End { get; } + + public LiveRange(int start, int end) + { + Start = start; + End = end; + } + + public int CompareTo(LiveRange other) + { + if (Start < other.End && other.Start < End) + { + return 0; + } + + return Start.CompareTo(other.Start); + } + + public override string ToString() + { + return $"[{Start}, {End}["; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs b/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs new file mode 100644 index 000000000..9652224e5 --- /dev/null +++ b/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs @@ -0,0 +1,47 @@ +using ARMeilleure.IntermediateRepresentation; +using System; + +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + struct RegisterMasks + { + public int IntAvailableRegisters { get; } + public int VecAvailableRegisters { get; } + public int IntCallerSavedRegisters { get; } + public int VecCallerSavedRegisters { get; } + public int IntCalleeSavedRegisters { get; } + public int VecCalleeSavedRegisters { get; } + + public RegisterMasks( + int intAvailableRegisters, + int vecAvailableRegisters, + int intCallerSavedRegisters, + int vecCallerSavedRegisters, + int intCalleeSavedRegisters, + int vecCalleeSavedRegisters) + { + IntAvailableRegisters = intAvailableRegisters; + VecAvailableRegisters = vecAvailableRegisters; + IntCallerSavedRegisters = intCallerSavedRegisters; + VecCallerSavedRegisters = vecCallerSavedRegisters; + IntCalleeSavedRegisters = intCalleeSavedRegisters; + VecCalleeSavedRegisters = vecCalleeSavedRegisters; + } + + public int GetAvailableRegisters(RegisterType type) + { + if (type == RegisterType.Integer) + { + return IntAvailableRegisters; + } + else if (type == RegisterType.Vector) + { + return VecAvailableRegisters; + } + else + { + throw new ArgumentException($"Invalid register type \"{type}\"."); + } + } + } +} \ No newline at end of file diff --git a/ARMeilleure/CodeGen/RegisterAllocators/StackAllocator.cs b/ARMeilleure/CodeGen/RegisterAllocators/StackAllocator.cs new file mode 100644 index 000000000..a6233d6ee --- /dev/null +++ b/ARMeilleure/CodeGen/RegisterAllocators/StackAllocator.cs @@ -0,0 +1,27 @@ +using ARMeilleure.Common; +using ARMeilleure.IntermediateRepresentation; +using System; + +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + class StackAllocator + { + private int _offset; + + public int TotalSize => _offset; + + public int Allocate(OperandType type) + { + return Allocate(type.GetSizeInBytes()); + } + + public int Allocate(int sizeInBytes) + { + int offset = _offset; + + _offset += sizeInBytes; + + return offset; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs b/ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs new file mode 100644 index 000000000..4955f1b4a --- /dev/null +++ b/ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs @@ -0,0 +1,18 @@ +namespace ARMeilleure.CodeGen.Unwinding +{ + struct UnwindInfo + { + public UnwindPushEntry[] PushEntries { get; } + + public int PrologueSize { get; } + + public int FixedAllocSize { get; } + + public UnwindInfo(UnwindPushEntry[] pushEntries, int prologueSize, int fixedAllocSize) + { + PushEntries = pushEntries; + PrologueSize = prologueSize; + FixedAllocSize = fixedAllocSize; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/CodeGen/Unwinding/UnwindPushEntry.cs b/ARMeilleure/CodeGen/Unwinding/UnwindPushEntry.cs new file mode 100644 index 000000000..6597e2b4b --- /dev/null +++ b/ARMeilleure/CodeGen/Unwinding/UnwindPushEntry.cs @@ -0,0 +1,20 @@ +using ARMeilleure.IntermediateRepresentation; + +namespace ARMeilleure.CodeGen.Unwinding +{ + struct UnwindPushEntry + { + public int Index { get; } + + public RegisterType Type { get; } + + public int StreamEndOffset { get; } + + public UnwindPushEntry(int index, RegisterType type, int streamEndOffset) + { + Index = index; + Type = type; + StreamEndOffset = streamEndOffset; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/CodeGen/X86/Assembler.cs b/ARMeilleure/CodeGen/X86/Assembler.cs new file mode 100644 index 000000000..c64838945 --- /dev/null +++ b/ARMeilleure/CodeGen/X86/Assembler.cs @@ -0,0 +1,1358 @@ +using ARMeilleure.IntermediateRepresentation; +using System; +using System.Diagnostics; +using System.IO; + +namespace ARMeilleure.CodeGen.X86 +{ + class Assembler + { + private const int BadOp = 0; + private const int OpModRMBits = 24; + + private const byte RexPrefix = 0x40; + private const byte RexWPrefix = 0x48; + private const byte LockPrefix = 0xf0; + + [Flags] + private enum InstructionFlags + { + None = 0, + RegOnly = 1 << 0, + Reg8Src = 1 << 1, + Reg8Dest = 1 << 2, + RexW = 1 << 3, + Vex = 1 << 4, + + PrefixBit = 16, + PrefixMask = 3 << PrefixBit, + Prefix66 = 1 << PrefixBit, + PrefixF3 = 2 << PrefixBit, + PrefixF2 = 3 << PrefixBit + } + + private struct InstructionInfo + { + public int OpRMR { get; } + public int OpRMImm8 { get; } + public int OpRMImm32 { get; } + public int OpRImm64 { get; } + public int OpRRM { get; } + + public InstructionFlags Flags { get; } + + public InstructionInfo( + int opRMR, + int opRMImm8, + int opRMImm32, + int opRImm64, + int opRRM, + InstructionFlags flags) + { + OpRMR = opRMR; + OpRMImm8 = opRMImm8; + OpRMImm32 = opRMImm32; + OpRImm64 = opRImm64; + OpRRM = opRRM; + Flags = flags; + } + } + + private static InstructionInfo[] _instTable; + + private Stream _stream; + + static Assembler() + { + _instTable = new InstructionInfo[(int)X86Instruction.Count]; + + // Name RM/R RM/I8 RM/I32 R/I64 R/RM Flags + Add(X86Instruction.Add, new InstructionInfo(0x00000001, 0x00000083, 0x00000081, BadOp, 0x00000003, InstructionFlags.None)); + Add(X86Instruction.Addpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Addps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex)); + Add(X86Instruction.Addsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Addss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.And, new InstructionInfo(0x00000021, 0x04000083, 0x04000081, BadOp, 0x00000023, InstructionFlags.None)); + Add(X86Instruction.Andnpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f55, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Andnps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f55, InstructionFlags.Vex)); + Add(X86Instruction.Bsr, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbd, InstructionFlags.None)); + Add(X86Instruction.Bswap, new InstructionInfo(0x00000fc8, BadOp, BadOp, BadOp, BadOp, InstructionFlags.RegOnly)); + Add(X86Instruction.Call, new InstructionInfo(0x020000ff, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Cmovcc, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f40, InstructionFlags.None)); + Add(X86Instruction.Cmp, new InstructionInfo(0x00000039, 0x07000083, 0x07000081, BadOp, 0x0000003b, InstructionFlags.None)); + Add(X86Instruction.Cmppd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Cmpps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex)); + Add(X86Instruction.Cmpsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Cmpss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Cmpxchg16b, new InstructionInfo(0x01000fc7, BadOp, BadOp, BadOp, BadOp, InstructionFlags.RexW)); + Add(X86Instruction.Comisd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2f, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Comiss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2f, InstructionFlags.Vex)); + Add(X86Instruction.Cpuid, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fa2, InstructionFlags.RegOnly)); + Add(X86Instruction.Cvtdq2pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe6, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Cvtdq2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5b, InstructionFlags.Vex)); + Add(X86Instruction.Cvtpd2dq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe6, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Cvtpd2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Cvtps2dq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5b, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Cvtps2pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex)); + Add(X86Instruction.Cvtsd2si, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2c, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Cvtsd2ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Cvtsi2sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2a, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Cvtsi2ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2a, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Cvtss2sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Div, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x060000f7, InstructionFlags.None)); + Add(X86Instruction.Divpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Divps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstructionFlags.Vex)); + Add(X86Instruction.Divsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Divss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Haddpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7c, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Haddps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7c, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Idiv, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x070000f7, InstructionFlags.None)); + Add(X86Instruction.Imul, new InstructionInfo(BadOp, 0x0000006b, 0x00000069, BadOp, 0x00000faf, InstructionFlags.None)); + Add(X86Instruction.Imul128, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x050000f7, InstructionFlags.None)); + Add(X86Instruction.Insertps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a21, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Lea, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x0000008d, InstructionFlags.None)); + Add(X86Instruction.Maxpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Maxps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex)); + Add(X86Instruction.Maxsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Maxss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Minpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Minps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstructionFlags.Vex)); + Add(X86Instruction.Minsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Minss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Mov, new InstructionInfo(0x00000089, BadOp, 0x000000c7, 0x000000b8, 0x0000008b, InstructionFlags.None)); + Add(X86Instruction.Mov16, new InstructionInfo(0x00000089, BadOp, 0x000000c7, BadOp, 0x0000008b, InstructionFlags.Prefix66)); + Add(X86Instruction.Mov8, new InstructionInfo(0x00000088, 0x000000c6, BadOp, BadOp, 0x0000008a, InstructionFlags.Reg8Src | InstructionFlags.Reg8Dest)); + Add(X86Instruction.Movd, new InstructionInfo(0x00000f7e, BadOp, BadOp, BadOp, 0x00000f6e, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Movdqu, new InstructionInfo(0x00000f7f, BadOp, BadOp, BadOp, 0x00000f6f, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Movhlps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f12, InstructionFlags.Vex)); + Add(X86Instruction.Movlhps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f16, InstructionFlags.Vex)); + Add(X86Instruction.Movq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7e, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Movsd, new InstructionInfo(0x00000f11, BadOp, BadOp, BadOp, 0x00000f10, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Movss, new InstructionInfo(0x00000f11, BadOp, BadOp, BadOp, 0x00000f10, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Movsx16, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbf, InstructionFlags.None)); + Add(X86Instruction.Movsx32, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000063, InstructionFlags.None)); + Add(X86Instruction.Movsx8, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbe, InstructionFlags.Reg8Src)); + Add(X86Instruction.Movzx16, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb7, InstructionFlags.None)); + Add(X86Instruction.Movzx8, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb6, InstructionFlags.Reg8Src)); + Add(X86Instruction.Mul128, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x040000f7, InstructionFlags.None)); + Add(X86Instruction.Mulpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Mulps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstructionFlags.Vex)); + Add(X86Instruction.Mulsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Mulss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Neg, new InstructionInfo(0x030000f7, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Not, new InstructionInfo(0x020000f7, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Or, new InstructionInfo(0x00000009, 0x01000083, 0x01000081, BadOp, 0x0000000b, InstructionFlags.None)); + Add(X86Instruction.Paddb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffc, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Paddd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffe, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Paddq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fd4, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Paddw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffd, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pand, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdb, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pandn, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdf, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pavgb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe0, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pavgw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe3, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3810, InstructionFlags.Prefix66)); + Add(X86Instruction.Pcmpeqb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f74, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pcmpeqd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f76, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pcmpeqq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3829, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pcmpeqw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f75, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pcmpgtb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f64, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pcmpgtd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f66, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pcmpgtq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3837, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pcmpgtw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f65, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pextrb, new InstructionInfo(0x000f3a14, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pextrd, new InstructionInfo(0x000f3a16, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pextrq, new InstructionInfo(0x000f3a16, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.RexW | InstructionFlags.Prefix66)); + Add(X86Instruction.Pextrw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc5, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pinsrb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a20, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pinsrd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a22, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pinsrq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a22, InstructionFlags.Vex | InstructionFlags.RexW | InstructionFlags.Prefix66)); + Add(X86Instruction.Pinsrw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc4, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmaxsb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383c, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmaxsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383d, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmaxsw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fee, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmaxub, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fde, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmaxud, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383f, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmaxuw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383e, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pminsb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3838, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pminsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3839, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pminsw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fea, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pminub, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fda, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pminud, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383b, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pminuw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383a, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmovsxbw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3820, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmovsxdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3825, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmovsxwd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3823, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmovzxbw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3830, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmovzxdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3835, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmovzxwd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3833, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmulld, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3840, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmullw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fd5, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pop, new InstructionInfo(0x0000008f, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Popcnt, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb8, InstructionFlags.PrefixF3)); + Add(X86Instruction.Por, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000feb, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pshufb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3800, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pshufd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f70, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pslld, new InstructionInfo(BadOp, 0x06000f72, BadOp, BadOp, 0x00000ff2, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pslldq, new InstructionInfo(BadOp, 0x07000f73, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psllq, new InstructionInfo(BadOp, 0x06000f73, BadOp, BadOp, 0x00000ff3, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psllw, new InstructionInfo(BadOp, 0x06000f71, BadOp, BadOp, 0x00000ff1, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psrad, new InstructionInfo(BadOp, 0x04000f72, BadOp, BadOp, 0x00000fe2, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psraw, new InstructionInfo(BadOp, 0x04000f71, BadOp, BadOp, 0x00000fe1, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psrld, new InstructionInfo(BadOp, 0x02000f72, BadOp, BadOp, 0x00000fd2, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psrlq, new InstructionInfo(BadOp, 0x02000f73, BadOp, BadOp, 0x00000fd3, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psrldq, new InstructionInfo(BadOp, 0x03000f73, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psrlw, new InstructionInfo(BadOp, 0x02000f71, BadOp, BadOp, 0x00000fd1, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psubb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ff8, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psubd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffa, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psubq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffb, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psubw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ff9, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Punpckhbw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f68, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Punpckhdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f6a, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Punpckhqdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f6d, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Punpckhwd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f69, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Punpcklbw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f60, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Punpckldq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f62, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Punpcklqdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f6c, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Punpcklwd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f61, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Push, new InstructionInfo(BadOp, 0x0000006a, 0x00000068, BadOp, 0x060000ff, InstructionFlags.None)); + Add(X86Instruction.Pxor, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fef, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Rcpps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f53, InstructionFlags.Vex)); + Add(X86Instruction.Rcpss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f53, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Ror, new InstructionInfo(0x010000d3, 0x010000c1, BadOp, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Roundpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a09, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Roundps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a08, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Roundsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a0b, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Roundss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a0a, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Rsqrtps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f52, InstructionFlags.Vex)); + Add(X86Instruction.Rsqrtss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f52, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Sar, new InstructionInfo(0x070000d3, 0x070000c1, BadOp, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Setcc, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f90, InstructionFlags.Reg8Dest)); + Add(X86Instruction.Shl, new InstructionInfo(0x040000d3, 0x040000c1, BadOp, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Shr, new InstructionInfo(0x050000d3, 0x050000c1, BadOp, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Shufpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc6, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Shufps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc6, InstructionFlags.Vex)); + Add(X86Instruction.Sqrtpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Sqrtps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex)); + Add(X86Instruction.Sqrtsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Sqrtss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Sub, new InstructionInfo(0x00000029, 0x05000083, 0x05000081, BadOp, 0x0000002b, InstructionFlags.None)); + Add(X86Instruction.Subpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Subps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex)); + Add(X86Instruction.Subsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Subss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Test, new InstructionInfo(0x00000085, BadOp, 0x000000f7, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Unpckhpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f15, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Unpckhps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f15, InstructionFlags.Vex)); + Add(X86Instruction.Unpcklpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f14, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Unpcklps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f14, InstructionFlags.Vex)); + Add(X86Instruction.Vpblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4c, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Xor, new InstructionInfo(0x00000031, 0x06000083, 0x06000081, BadOp, 0x00000033, InstructionFlags.None)); + Add(X86Instruction.Xorpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Xorps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex)); + } + + private static void Add(X86Instruction inst, InstructionInfo info) + { + _instTable[(int)inst] = info; + } + + public Assembler(Stream stream) + { + _stream = stream; + } + + public void Add(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Add); + } + + public void Addsd(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Addsd); + } + + public void Addss(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Addss); + } + + public void And(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.And); + } + + public void Bsr(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Bsr); + } + + public void Bswap(Operand dest) + { + WriteInstruction(dest, null, dest.Type, X86Instruction.Bswap); + } + + public void Call(Operand dest) + { + WriteInstruction(dest, null, OperandType.None, X86Instruction.Call); + } + + public void Cdq() + { + WriteByte(0x99); + } + + public void Cmovcc(Operand dest, Operand source, OperandType type, X86Condition condition) + { + InstructionInfo info = _instTable[(int)X86Instruction.Cmovcc]; + + WriteOpCode(dest, null, source, type, info.Flags, info.OpRRM | (int)condition, rrm: true); + } + + public void Cmp(Operand src1, Operand src2, OperandType type) + { + WriteInstruction(src1, src2, type, X86Instruction.Cmp); + } + + public void Cqo() + { + WriteByte(0x48); + WriteByte(0x99); + } + + public void Cmpxchg16b(MemoryOperand memOp) + { + WriteByte(LockPrefix); + + WriteInstruction(memOp, null, OperandType.None, X86Instruction.Cmpxchg16b); + } + + public void Comisd(Operand src1, Operand src2) + { + WriteInstruction(src1, null, src2, X86Instruction.Comisd); + } + + public void Comiss(Operand src1, Operand src2) + { + WriteInstruction(src1, null, src2, X86Instruction.Comiss); + } + + public void Cpuid() + { + WriteInstruction(null, null, OperandType.None, X86Instruction.Cpuid); + } + + public void Cvtsd2ss(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Cvtsd2ss); + } + + public void Cvtsi2sd(Operand dest, Operand src1, Operand src2, OperandType type) + { + WriteInstruction(dest, src1, src2, X86Instruction.Cvtsi2sd, type); + } + + public void Cvtsi2ss(Operand dest, Operand src1, Operand src2, OperandType type) + { + WriteInstruction(dest, src1, src2, X86Instruction.Cvtsi2ss, type); + } + + public void Cvtss2sd(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Cvtss2sd); + } + + public void Div(Operand source) + { + WriteInstruction(null, source, source.Type, X86Instruction.Div); + } + + public void Divsd(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Divsd); + } + + public void Divss(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Divss); + } + + public void Idiv(Operand source) + { + WriteInstruction(null, source, source.Type, X86Instruction.Idiv); + } + + public void Imul(Operand source) + { + WriteInstruction(null, source, source.Type, X86Instruction.Imul128); + } + + public void Imul(Operand dest, Operand source, OperandType type) + { + if (source.Kind != OperandKind.Register) + { + throw new ArgumentException($"Invalid source operand kind \"{source.Kind}\"."); + } + + WriteInstruction(dest, source, type, X86Instruction.Imul); + } + + public void Imul(Operand dest, Operand src1, Operand src2, OperandType type) + { + InstructionInfo info = _instTable[(int)X86Instruction.Imul]; + + if (src2.Kind != OperandKind.Constant) + { + throw new ArgumentException($"Invalid source 2 operand kind \"{src2.Kind}\"."); + } + + if (IsImm8(src2.Value, src2.Type) && info.OpRMImm8 != BadOp) + { + WriteOpCode(dest, null, src1, type, info.Flags, info.OpRMImm8, rrm: true); + + WriteByte(src2.AsByte()); + } + else if (IsImm32(src2.Value, src2.Type) && info.OpRMImm32 != BadOp) + { + WriteOpCode(dest, null, src1, type, info.Flags, info.OpRMImm32, rrm: true); + + WriteInt32(src2.AsInt32()); + } + else + { + throw new ArgumentException($"Failed to encode constant 0x{src2.Value:X}."); + } + } + + public void Insertps(Operand dest, Operand src1, Operand src2, byte imm) + { + WriteInstruction(dest, src1, src2, X86Instruction.Insertps); + + WriteByte(imm); + } + + public void Jcc(X86Condition condition, long offset) + { + if (ConstFitsOnS8(offset)) + { + WriteByte((byte)(0x70 | (int)condition)); + + WriteByte((byte)offset); + } + else if (ConstFitsOnS32(offset)) + { + WriteByte(0x0f); + WriteByte((byte)(0x80 | (int)condition)); + + WriteInt32((int)offset); + } + else + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } + } + + public void Jmp(long offset) + { + if (ConstFitsOnS8(offset)) + { + WriteByte(0xeb); + + WriteByte((byte)offset); + } + else if (ConstFitsOnS32(offset)) + { + WriteByte(0xe9); + + WriteInt32((int)offset); + } + else + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } + } + + public void Lea(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Lea); + } + + public void Mov(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Mov); + } + + public void Mov16(Operand dest, Operand source) + { + WriteInstruction(dest, source, OperandType.None, X86Instruction.Mov16); + } + + public void Mov8(Operand dest, Operand source) + { + WriteInstruction(dest, source, OperandType.None, X86Instruction.Mov8); + } + + public void Movd(Operand dest, Operand source) + { + InstructionInfo info = _instTable[(int)X86Instruction.Movd]; + + if (source.Type.IsInteger() || source.Kind == OperandKind.Memory) + { + WriteOpCode(dest, null, source, OperandType.None, info.Flags, info.OpRRM, rrm: true); + } + else + { + WriteOpCode(dest, null, source, OperandType.None, info.Flags, info.OpRMR); + } + } + + public void Movdqu(Operand dest, Operand source) + { + WriteInstruction(dest, null, source, X86Instruction.Movdqu); + } + + public void Movhlps(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Movhlps); + } + + public void Movlhps(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Movlhps); + } + + public void Movq(Operand dest, Operand source) + { + InstructionInfo info = _instTable[(int)X86Instruction.Movd]; + + InstructionFlags flags = info.Flags | InstructionFlags.RexW; + + if (source.Type.IsInteger() || source.Kind == OperandKind.Memory) + { + WriteOpCode(dest, null, source, OperandType.None, flags, info.OpRRM, rrm: true); + } + else if (dest.Type.IsInteger() || dest.Kind == OperandKind.Memory) + { + WriteOpCode(dest, null, source, OperandType.None, flags, info.OpRMR); + } + else + { + WriteInstruction(dest, source, OperandType.None, X86Instruction.Movq); + } + } + + public void Movsd(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Movsd); + } + + public void Movss(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Movss); + } + + public void Movsx16(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Movsx16); + } + + public void Movsx32(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Movsx32); + } + + public void Movsx8(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Movsx8); + } + + public void Movzx16(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Movzx16); + } + + public void Movzx8(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Movzx8); + } + + public void Mul(Operand source) + { + WriteInstruction(null, source, source.Type, X86Instruction.Mul128); + } + + public void Mulsd(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Mulsd); + } + + public void Mulss(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Mulss); + } + + public void Neg(Operand dest) + { + WriteInstruction(dest, null, dest.Type, X86Instruction.Neg); + } + + public void Not(Operand dest) + { + WriteInstruction(dest, null, dest.Type, X86Instruction.Not); + } + + public void Or(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Or); + } + + public void Pcmpeqw(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Pcmpeqw); + } + + public void Pextrb(Operand dest, Operand source, byte imm) + { + WriteInstruction(dest, null, source, X86Instruction.Pextrb); + + WriteByte(imm); + } + + public void Pextrd(Operand dest, Operand source, byte imm) + { + WriteInstruction(dest, null, source, X86Instruction.Pextrd); + + WriteByte(imm); + } + + public void Pextrq(Operand dest, Operand source, byte imm) + { + WriteInstruction(dest, null, source, X86Instruction.Pextrq); + + WriteByte(imm); + } + + public void Pextrw(Operand dest, Operand source, byte imm) + { + WriteInstruction(dest, null, source, X86Instruction.Pextrw); + + WriteByte(imm); + } + + public void Pinsrb(Operand dest, Operand src1, Operand src2, byte imm) + { + WriteInstruction(dest, src1, src2, X86Instruction.Pinsrb); + + WriteByte(imm); + } + + public void Pinsrd(Operand dest, Operand src1, Operand src2, byte imm) + { + WriteInstruction(dest, src1, src2, X86Instruction.Pinsrd); + + WriteByte(imm); + } + + public void Pinsrq(Operand dest, Operand src1, Operand src2, byte imm) + { + WriteInstruction(dest, src1, src2, X86Instruction.Pinsrq); + + WriteByte(imm); + } + + public void Pinsrw(Operand dest, Operand src1, Operand src2, byte imm) + { + WriteInstruction(dest, src1, src2, X86Instruction.Pinsrw); + + WriteByte(imm); + } + + public void Pop(Operand dest) + { + if (dest.Kind == OperandKind.Register) + { + WriteCompactInst(dest, 0x58); + } + else + { + WriteInstruction(dest, null, dest.Type, X86Instruction.Pop); + } + } + + public void Popcnt(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Popcnt); + } + + public void Pshufd(Operand dest, Operand source, byte imm) + { + WriteInstruction(dest, null, source, X86Instruction.Pshufd); + + WriteByte(imm); + } + + public void Push(Operand source) + { + if (source.Kind == OperandKind.Register) + { + WriteCompactInst(source, 0x50); + } + else + { + WriteInstruction(null, source, source.Type, X86Instruction.Push); + } + } + + public void Return() + { + WriteByte(0xc3); + } + + public void Ror(Operand dest, Operand source, OperandType type) + { + WriteShiftInst(dest, source, type, X86Instruction.Ror); + } + + public void Sar(Operand dest, Operand source, OperandType type) + { + WriteShiftInst(dest, source, type, X86Instruction.Sar); + } + + public void Shl(Operand dest, Operand source, OperandType type) + { + WriteShiftInst(dest, source, type, X86Instruction.Shl); + } + + public void Shr(Operand dest, Operand source, OperandType type) + { + WriteShiftInst(dest, source, type, X86Instruction.Shr); + } + + public void Setcc(Operand dest, X86Condition condition) + { + InstructionInfo info = _instTable[(int)X86Instruction.Setcc]; + + WriteOpCode(dest, null, null, OperandType.None, info.Flags, info.OpRRM | (int)condition); + } + + public void Sub(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Sub); + } + + public void Subsd(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Subsd); + } + + public void Subss(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Subss); + } + + public void Test(Operand src1, Operand src2, OperandType type) + { + WriteInstruction(src1, src2, type, X86Instruction.Test); + } + + public void Xor(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Xor); + } + + public void Xorps(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Xorps); + } + + public void WriteInstruction( + X86Instruction inst, + Operand dest, + Operand source, + OperandType type = OperandType.None) + { + WriteInstruction(dest, null, source, inst, type); + } + + public void WriteInstruction(X86Instruction inst, Operand dest, Operand src1, Operand src2) + { + if (src2.Kind == OperandKind.Constant) + { + WriteInstruction(src1, dest, src2, inst); + } + else + { + WriteInstruction(dest, src1, src2, inst); + } + } + + public void WriteInstruction(X86Instruction inst, Operand dest, Operand source, byte imm) + { + WriteInstruction(dest, null, source, inst); + + WriteByte(imm); + } + + public void WriteInstruction( + X86Instruction inst, + Operand dest, + Operand src1, + Operand src2, + Operand src3) + { + // 3+ operands can only be encoded with the VEX encoding scheme. + Debug.Assert(HardwareCapabilities.SupportsVexEncoding); + + WriteInstruction(dest, src1, src2, inst); + + WriteByte((byte)(src3.AsByte() << 4)); + } + + public void WriteInstruction( + X86Instruction inst, + Operand dest, + Operand src1, + Operand src2, + byte imm) + { + WriteInstruction(dest, src1, src2, inst); + + WriteByte(imm); + } + + private void WriteShiftInst(Operand dest, Operand source, OperandType type, X86Instruction inst) + { + if (source.Kind == OperandKind.Register) + { + X86Register shiftReg = (X86Register)source.GetRegister().Index; + + if (shiftReg != X86Register.Rcx) + { + throw new ArgumentException($"Invalid shift register \"{shiftReg}\"."); + } + + source = null; + } + + WriteInstruction(dest, source, type, inst); + } + + private void WriteInstruction(Operand dest, Operand source, OperandType type, X86Instruction inst) + { + InstructionInfo info = _instTable[(int)inst]; + + if (source != null) + { + if (source.Kind == OperandKind.Constant) + { + ulong imm = source.Value; + + if (inst == X86Instruction.Mov8) + { + WriteOpCode(dest, null, null, type, info.Flags, info.OpRMImm8); + + WriteByte((byte)imm); + } + else if (inst == X86Instruction.Mov16) + { + WriteOpCode(dest, null, null, type, info.Flags, info.OpRMImm32); + + WriteInt16((short)imm); + } + else if (IsImm8(imm, type) && info.OpRMImm8 != BadOp) + { + WriteOpCode(dest, null, null, type, info.Flags, info.OpRMImm8); + + WriteByte((byte)imm); + } + else if (IsImm32(imm, type) && info.OpRMImm32 != BadOp) + { + WriteOpCode(dest, null, null, type, info.Flags, info.OpRMImm32); + + WriteInt32((int)imm); + } + else if (dest != null && dest.Kind == OperandKind.Register && info.OpRImm64 != BadOp) + { + int rexPrefix = GetRexPrefix(dest, source, type, rrm: false); + + if (rexPrefix != 0) + { + WriteByte((byte)rexPrefix); + } + + WriteByte((byte)(info.OpRImm64 + (dest.GetRegister().Index & 0b111))); + + WriteUInt64(imm); + } + else + { + throw new ArgumentException($"Failed to encode constant 0x{imm:X}."); + } + } + else if (source.Kind == OperandKind.Register && info.OpRMR != BadOp) + { + WriteOpCode(dest, null, source, type, info.Flags, info.OpRMR); + } + else if (info.OpRRM != BadOp) + { + WriteOpCode(dest, null, source, type, info.Flags, info.OpRRM, rrm: true); + } + else + { + throw new ArgumentException($"Invalid source operand kind \"{source.Kind}\"."); + } + } + else if (info.OpRRM != BadOp) + { + WriteOpCode(dest, null, source, type, info.Flags, info.OpRRM, rrm: true); + } + else if (info.OpRMR != BadOp) + { + WriteOpCode(dest, null, source, type, info.Flags, info.OpRMR); + } + else + { + throw new ArgumentNullException(nameof(source)); + } + } + + private void WriteInstruction( + Operand dest, + Operand src1, + Operand src2, + X86Instruction inst, + OperandType type = OperandType.None) + { + InstructionInfo info = _instTable[(int)inst]; + + if (src2 != null) + { + if (src2.Kind == OperandKind.Constant) + { + ulong imm = src2.Value; + + if ((byte)imm == imm && info.OpRMImm8 != BadOp) + { + WriteOpCode(dest, src1, null, type, info.Flags, info.OpRMImm8); + + WriteByte((byte)imm); + } + else + { + throw new ArgumentException($"Failed to encode constant 0x{imm:X}."); + } + } + else if (src2.Kind == OperandKind.Register && info.OpRMR != BadOp) + { + WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRMR); + } + else if (info.OpRRM != BadOp) + { + WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRRM, rrm: true); + } + else + { + throw new ArgumentException($"Invalid source operand kind \"{src2.Kind}\"."); + } + } + else if (info.OpRRM != BadOp) + { + WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRRM, rrm: true); + } + else if (info.OpRMR != BadOp) + { + WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRMR); + } + else + { + throw new ArgumentNullException(nameof(src2)); + } + } + + private void WriteOpCode( + Operand dest, + Operand src1, + Operand src2, + OperandType type, + InstructionFlags flags, + int opCode, + bool rrm = false) + { + int rexPrefix = GetRexPrefix(dest, src2, type, rrm); + + if ((flags & InstructionFlags.RexW) != 0) + { + rexPrefix |= RexWPrefix; + } + + int modRM = (opCode >> OpModRMBits) << 3; + + MemoryOperand memOp = null; + + if (dest != null) + { + if (dest.Kind == OperandKind.Register) + { + int regIndex = dest.GetRegister().Index; + + modRM |= (regIndex & 0b111) << (rrm ? 3 : 0); + + if ((flags & InstructionFlags.Reg8Dest) != 0 && regIndex >= 4) + { + rexPrefix |= RexPrefix; + } + } + else if (dest.Kind == OperandKind.Memory) + { + memOp = dest as MemoryOperand; + } + else + { + throw new ArgumentException("Invalid destination operand kind \"" + dest.Kind + "\"."); + } + } + + if (src2 != null) + { + if (src2.Kind == OperandKind.Register) + { + int regIndex = src2.GetRegister().Index; + + modRM |= (regIndex & 0b111) << (rrm ? 0 : 3); + + if ((flags & InstructionFlags.Reg8Src) != 0 && regIndex >= 4) + { + rexPrefix |= RexPrefix; + } + } + else if (src2.Kind == OperandKind.Memory && memOp == null) + { + memOp = src2 as MemoryOperand; + } + else + { + throw new ArgumentException("Invalid source operand kind \"" + src2.Kind + "\"."); + } + } + + bool needsSibByte = false; + bool needsDisplacement = false; + + int sib = 0; + + if (memOp != null) + { + // Either source or destination is a memory operand. + Register baseReg = memOp.BaseAddress.GetRegister(); + + X86Register baseRegLow = (X86Register)(baseReg.Index & 0b111); + + needsSibByte = memOp.Index != null || baseRegLow == X86Register.Rsp; + needsDisplacement = memOp.Displacement != 0 || baseRegLow == X86Register.Rbp; + + if (needsDisplacement) + { + if (ConstFitsOnS8(memOp.Displacement)) + { + modRM |= 0x40; + } + else /* if (ConstFitsOnS32(memOp.Displacement)) */ + { + modRM |= 0x80; + } + } + + if (baseReg.Index >= 8) + { + rexPrefix |= RexPrefix | (baseReg.Index >> 3); + } + + if (needsSibByte) + { + sib = (int)baseRegLow; + + if (memOp.Index != null) + { + int indexReg = memOp.Index.GetRegister().Index; + + if (indexReg == (int)X86Register.Rsp) + { + throw new ArgumentException("Using RSP as index register on the memory operand is not allowed."); + } + + if (indexReg >= 8) + { + rexPrefix |= RexPrefix | (indexReg >> 3) << 1; + } + + sib |= (indexReg & 0b111) << 3; + } + else + { + sib |= 0b100 << 3; + } + + sib |= (int)memOp.Scale << 6; + + modRM |= 0b100; + } + else + { + modRM |= (int)baseRegLow; + } + } + else + { + // Source and destination are registers. + modRM |= 0xc0; + } + + Debug.Assert(opCode != BadOp, "Invalid opcode value."); + + if ((flags & InstructionFlags.Vex) != 0 && HardwareCapabilities.SupportsVexEncoding) + { + int vexByte2 = (int)(flags & InstructionFlags.PrefixMask) >> (int)InstructionFlags.PrefixBit; + + if (src1 != null) + { + vexByte2 |= (src1.GetRegister().Index ^ 0xf) << 3; + } + else + { + vexByte2 |= 0b1111 << 3; + } + + ushort opCodeHigh = (ushort)(opCode >> 8); + + if ((rexPrefix & 0b1011) == 0 && opCodeHigh == 0xf) + { + // Two-byte form. + WriteByte(0xc5); + + vexByte2 |= (~rexPrefix & 4) << 5; + + WriteByte((byte)vexByte2); + } + else + { + // Three-byte form. + WriteByte(0xc4); + + int vexByte1 = (~rexPrefix & 7) << 5; + + switch (opCodeHigh) + { + case 0xf: vexByte1 |= 1; break; + case 0xf38: vexByte1 |= 2; break; + case 0xf3a: vexByte1 |= 3; break; + + default: Debug.Assert(false, $"Failed to VEX encode opcode 0x{opCode:X}."); break; + } + + vexByte2 |= (rexPrefix & 8) << 4; + + WriteByte((byte)vexByte1); + WriteByte((byte)vexByte2); + } + + opCode &= 0xff; + } + else + { + switch (flags & InstructionFlags.PrefixMask) + { + case InstructionFlags.Prefix66: WriteByte(0x66); break; + case InstructionFlags.PrefixF2: WriteByte(0xf2); break; + case InstructionFlags.PrefixF3: WriteByte(0xf3); break; + } + + if (rexPrefix != 0) + { + WriteByte((byte)rexPrefix); + } + } + + if (dest != null && (flags & InstructionFlags.RegOnly) != 0) + { + opCode += dest.GetRegister().Index & 7; + } + + if ((opCode & 0xff0000) != 0) + { + WriteByte((byte)(opCode >> 16)); + } + + if ((opCode & 0xff00) != 0) + { + WriteByte((byte)(opCode >> 8)); + } + + WriteByte((byte)opCode); + + if ((flags & InstructionFlags.RegOnly) == 0) + { + WriteByte((byte)modRM); + + if (needsSibByte) + { + WriteByte((byte)sib); + } + + if (needsDisplacement) + { + if (ConstFitsOnS8(memOp.Displacement)) + { + WriteByte((byte)memOp.Displacement); + } + else /* if (ConstFitsOnS32(memOp.Displacement)) */ + { + WriteInt32(memOp.Displacement); + } + } + } + } + + private void WriteCompactInst(Operand operand, int opCode) + { + int regIndex = operand.GetRegister().Index; + + if (regIndex >= 8) + { + WriteByte(0x41); + } + + WriteByte((byte)(opCode + (regIndex & 0b111))); + } + + private static int GetRexPrefix(Operand dest, Operand source, OperandType type, bool rrm) + { + int rexPrefix = 0; + + if (Is64Bits(type)) + { + rexPrefix = RexWPrefix; + } + + void SetRegisterHighBit(Register reg, int bit) + { + if (reg.Index >= 8) + { + rexPrefix |= RexPrefix | (reg.Index >> 3) << bit; + } + } + + if (dest != null && dest.Kind == OperandKind.Register) + { + SetRegisterHighBit(dest.GetRegister(), rrm ? 2 : 0); + } + + if (source != null && source.Kind == OperandKind.Register) + { + SetRegisterHighBit(source.GetRegister(), rrm ? 0 : 2); + } + + return rexPrefix; + } + + private static bool Is64Bits(OperandType type) + { + return type == OperandType.I64 || type == OperandType.FP64; + } + + private static bool IsImm8(ulong immediate, OperandType type) + { + long value = type == OperandType.I32 ? (int)immediate : (long)immediate; + + return ConstFitsOnS8(value); + } + + private static bool IsImm32(ulong immediate, OperandType type) + { + long value = type == OperandType.I32 ? (int)immediate : (long)immediate; + + return ConstFitsOnS32(value); + } + + public static int GetJccLength(long offset) + { + if (ConstFitsOnS8(offset < 0 ? offset - 2 : offset)) + { + return 2; + } + else if (ConstFitsOnS32(offset < 0 ? offset - 6 : offset)) + { + return 6; + } + else + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } + } + + public static int GetJmpLength(long offset) + { + if (ConstFitsOnS8(offset < 0 ? offset - 2 : offset)) + { + return 2; + } + else if (ConstFitsOnS32(offset < 0 ? offset - 5 : offset)) + { + return 5; + } + else + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } + } + + private static bool ConstFitsOnS8(long value) + { + return value == (sbyte)value; + } + + private static bool ConstFitsOnS32(long value) + { + return value == (int)value; + } + + private void WriteInt16(short value) + { + WriteUInt16((ushort)value); + } + + private void WriteInt32(int value) + { + WriteUInt32((uint)value); + } + + private void WriteByte(byte value) + { + _stream.WriteByte(value); + } + + private void WriteUInt16(ushort value) + { + _stream.WriteByte((byte)(value >> 0)); + _stream.WriteByte((byte)(value >> 8)); + } + + private void WriteUInt32(uint value) + { + _stream.WriteByte((byte)(value >> 0)); + _stream.WriteByte((byte)(value >> 8)); + _stream.WriteByte((byte)(value >> 16)); + _stream.WriteByte((byte)(value >> 24)); + } + + private void WriteUInt64(ulong value) + { + _stream.WriteByte((byte)(value >> 0)); + _stream.WriteByte((byte)(value >> 8)); + _stream.WriteByte((byte)(value >> 16)); + _stream.WriteByte((byte)(value >> 24)); + _stream.WriteByte((byte)(value >> 32)); + _stream.WriteByte((byte)(value >> 40)); + _stream.WriteByte((byte)(value >> 48)); + _stream.WriteByte((byte)(value >> 56)); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/CodeGen/X86/CallConvName.cs b/ARMeilleure/CodeGen/X86/CallConvName.cs new file mode 100644 index 000000000..be3676282 --- /dev/null +++ b/ARMeilleure/CodeGen/X86/CallConvName.cs @@ -0,0 +1,8 @@ +namespace ARMeilleure.CodeGen.X86 +{ + enum CallConvName + { + SystemV, + Windows + } +} \ No newline at end of file diff --git a/ARMeilleure/CodeGen/X86/CallingConvention.cs b/ARMeilleure/CodeGen/X86/CallingConvention.cs new file mode 100644 index 000000000..2769fd93e --- /dev/null +++ b/ARMeilleure/CodeGen/X86/CallingConvention.cs @@ -0,0 +1,159 @@ +using System; +using System.Runtime.InteropServices; + +namespace ARMeilleure.CodeGen.X86 +{ + static class CallingConvention + { + private const int RegistersMask = 0xffff; + + public static int GetIntAvailableRegisters() + { + return RegistersMask & ~(1 << (int)X86Register.Rsp); + } + + public static int GetVecAvailableRegisters() + { + return RegistersMask; + } + + public static int GetIntCallerSavedRegisters() + { + if (GetCurrentCallConv() == CallConvName.Windows) + { + return (1 << (int)X86Register.Rax) | + (1 << (int)X86Register.Rcx) | + (1 << (int)X86Register.Rdx) | + (1 << (int)X86Register.R8) | + (1 << (int)X86Register.R9) | + (1 << (int)X86Register.R10) | + (1 << (int)X86Register.R11); + } + else /* if (GetCurrentCallConv() == CallConvName.SystemV) */ + { + return (1 << (int)X86Register.Rax) | + (1 << (int)X86Register.Rcx) | + (1 << (int)X86Register.Rdx) | + (1 << (int)X86Register.Rsi) | + (1 << (int)X86Register.Rdi) | + (1 << (int)X86Register.R8) | + (1 << (int)X86Register.R9) | + (1 << (int)X86Register.R10) | + (1 << (int)X86Register.R11); + } + } + + public static int GetVecCallerSavedRegisters() + { + if (GetCurrentCallConv() == CallConvName.Windows) + { + return (1 << (int)X86Register.Xmm0) | + (1 << (int)X86Register.Xmm1) | + (1 << (int)X86Register.Xmm2) | + (1 << (int)X86Register.Xmm3) | + (1 << (int)X86Register.Xmm4) | + (1 << (int)X86Register.Xmm5); + } + else /* if (GetCurrentCallConv() == CallConvName.SystemV) */ + { + return RegistersMask; + } + } + + public static int GetIntCalleeSavedRegisters() + { + return GetIntCallerSavedRegisters() ^ RegistersMask; + } + + public static int GetVecCalleeSavedRegisters() + { + return GetVecCallerSavedRegisters() ^ RegistersMask; + } + + public static int GetArgumentsOnRegsCount() + { + return 4; + } + + public static int GetIntArgumentsOnRegsCount() + { + return 6; + } + + public static int GetVecArgumentsOnRegsCount() + { + return 8; + } + + public static X86Register GetIntArgumentRegister(int index) + { + if (GetCurrentCallConv() == CallConvName.Windows) + { + switch (index) + { + case 0: return X86Register.Rcx; + case 1: return X86Register.Rdx; + case 2: return X86Register.R8; + case 3: return X86Register.R9; + } + } + else /* if (GetCurrentCallConv() == CallConvName.SystemV) */ + { + switch (index) + { + case 0: return X86Register.Rdi; + case 1: return X86Register.Rsi; + case 2: return X86Register.Rdx; + case 3: return X86Register.Rcx; + case 4: return X86Register.R8; + case 5: return X86Register.R9; + } + } + + throw new ArgumentOutOfRangeException(nameof(index)); + } + + public static X86Register GetVecArgumentRegister(int index) + { + int count; + + if (GetCurrentCallConv() == CallConvName.Windows) + { + count = 4; + } + else /* if (GetCurrentCallConv() == CallConvName.SystemV) */ + { + count = 8; + } + + if ((uint)index < count) + { + return X86Register.Xmm0 + index; + } + + throw new ArgumentOutOfRangeException(nameof(index)); + } + + public static X86Register GetIntReturnRegister() + { + return X86Register.Rax; + } + + public static X86Register GetIntReturnRegisterHigh() + { + return X86Register.Rdx; + } + + public static X86Register GetVecReturnRegister() + { + return X86Register.Xmm0; + } + + public static CallConvName GetCurrentCallConv() + { + return RuntimeInformation.IsOSPlatform(OSPlatform.Windows) + ? CallConvName.Windows + : CallConvName.SystemV; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/CodeGen/X86/CodeGenContext.cs b/ARMeilleure/CodeGen/X86/CodeGenContext.cs new file mode 100644 index 000000000..d719b5164 --- /dev/null +++ b/ARMeilleure/CodeGen/X86/CodeGenContext.cs @@ -0,0 +1,305 @@ +using ARMeilleure.CodeGen.RegisterAllocators; +using ARMeilleure.Common; +using ARMeilleure.IntermediateRepresentation; +using System.Collections.Generic; +using System.Diagnostics; +using System.IO; + +namespace ARMeilleure.CodeGen.X86 +{ + class CodeGenContext + { + private const int ReservedBytesForJump = 1; + + private Stream _stream; + + public int StreamOffset => (int)_stream.Length; + + public AllocationResult AllocResult { get; } + + public Assembler Assembler { get; } + + public BasicBlock CurrBlock { get; private set; } + + public int CallArgsRegionSize { get; } + public int XmmSaveRegionSize { get; } + + private long[] _blockOffsets; + + private struct Jump + { + public bool IsConditional { get; } + + public X86Condition Condition { get; } + + public BasicBlock Target { get; } + + public long JumpPosition { get; } + + public long RelativeOffset { get; set; } + + public int InstSize { get; set; } + + public Jump(BasicBlock target, long jumpPosition) + { + IsConditional = false; + Condition = 0; + Target = target; + JumpPosition = jumpPosition; + + RelativeOffset = 0; + + InstSize = 0; + } + + public Jump(X86Condition condition, BasicBlock target, long jumpPosition) + { + IsConditional = true; + Condition = condition; + Target = target; + JumpPosition = jumpPosition; + + RelativeOffset = 0; + + InstSize = 0; + } + } + + private List _jumps; + + private X86Condition _jNearCondition; + + private long _jNearPosition; + private int _jNearLength; + + public CodeGenContext(Stream stream, AllocationResult allocResult, int maxCallArgs, int blocksCount) + { + _stream = stream; + + AllocResult = allocResult; + + Assembler = new Assembler(stream); + + CallArgsRegionSize = GetCallArgsRegionSize(allocResult, maxCallArgs, out int xmmSaveRegionSize); + XmmSaveRegionSize = xmmSaveRegionSize; + + _blockOffsets = new long[blocksCount]; + + _jumps = new List(); + } + + private int GetCallArgsRegionSize(AllocationResult allocResult, int maxCallArgs, out int xmmSaveRegionSize) + { + // We need to add 8 bytes to the total size, as the call to this + // function already pushed 8 bytes (the return address). + int intMask = CallingConvention.GetIntCalleeSavedRegisters() & allocResult.IntUsedRegisters; + int vecMask = CallingConvention.GetVecCalleeSavedRegisters() & allocResult.VecUsedRegisters; + + xmmSaveRegionSize = BitUtils.CountBits(vecMask) * 16; + + int calleeSaveRegionSize = BitUtils.CountBits(intMask) * 8 + xmmSaveRegionSize + 8; + + int argsCount = maxCallArgs; + + if (argsCount < 0) + { + // When the function has no calls, argsCount is -1. + // In this case, we don't need to allocate the shadow space. + argsCount = 0; + } + else if (argsCount < 4) + { + // The ABI mandates that the space for at least 4 arguments + // is reserved on the stack (this is called shadow space). + argsCount = 4; + } + + int frameSize = calleeSaveRegionSize + allocResult.SpillRegionSize; + + // TODO: Instead of always multiplying by 16 (the largest possible size of a variable, + // since a V128 has 16 bytes), we should calculate the exact size consumed by the + // arguments passed to the called functions on the stack. + int callArgsAndFrameSize = frameSize + argsCount * 16; + + // Ensure that the Stack Pointer will be aligned to 16 bytes. + callArgsAndFrameSize = (callArgsAndFrameSize + 0xf) & ~0xf; + + return callArgsAndFrameSize - frameSize; + } + + public void EnterBlock(BasicBlock block) + { + _blockOffsets[block.Index] = _stream.Position; + + CurrBlock = block; + } + + public void JumpTo(BasicBlock target) + { + _jumps.Add(new Jump(target, _stream.Position)); + + WritePadding(ReservedBytesForJump); + } + + public void JumpTo(X86Condition condition, BasicBlock target) + { + _jumps.Add(new Jump(condition, target, _stream.Position)); + + WritePadding(ReservedBytesForJump); + } + + public void JumpToNear(X86Condition condition) + { + _jNearCondition = condition; + _jNearPosition = _stream.Position; + _jNearLength = Assembler.GetJccLength(0); + + _stream.Seek(_jNearLength, SeekOrigin.Current); + } + + public void JumpHere() + { + long currentPosition = _stream.Position; + + _stream.Seek(_jNearPosition, SeekOrigin.Begin); + + long offset = currentPosition - (_jNearPosition + _jNearLength); + + Debug.Assert(_jNearLength == Assembler.GetJccLength(offset), "Relative offset doesn't fit on near jump."); + + Assembler.Jcc(_jNearCondition, offset); + + _stream.Seek(currentPosition, SeekOrigin.Begin); + } + + private void WritePadding(int size) + { + while (size-- > 0) + { + _stream.WriteByte(0); + } + } + + public byte[] GetCode() + { + // Write jump relative offsets. + bool modified; + + do + { + modified = false; + + for (int index = 0; index < _jumps.Count; index++) + { + Jump jump = _jumps[index]; + + long jumpTarget = _blockOffsets[jump.Target.Index]; + + long offset = jumpTarget - jump.JumpPosition; + + if (offset < 0) + { + for (int index2 = index - 1; index2 >= 0; index2--) + { + Jump jump2 = _jumps[index2]; + + if (jump2.JumpPosition < jumpTarget) + { + break; + } + + offset -= jump2.InstSize - ReservedBytesForJump; + } + } + else + { + for (int index2 = index + 1; index2 < _jumps.Count; index2++) + { + Jump jump2 = _jumps[index2]; + + if (jump2.JumpPosition >= jumpTarget) + { + break; + } + + offset += jump2.InstSize - ReservedBytesForJump; + } + + offset -= ReservedBytesForJump; + } + + if (jump.IsConditional) + { + jump.InstSize = Assembler.GetJccLength(offset); + } + else + { + jump.InstSize = Assembler.GetJmpLength(offset); + } + + // The jump is relative to the next instruction, not the current one. + // Since we didn't know the next instruction address when calculating + // the offset (as the size of the current jump instruction was not know), + // we now need to compensate the offset with the jump instruction size. + // It's also worth to note that: + // - This is only needed for backward jumps. + // - The GetJmpLength and GetJccLength also compensates the offset + // internally when computing the jump instruction size. + if (offset < 0) + { + offset -= jump.InstSize; + } + + if (jump.RelativeOffset != offset) + { + modified = true; + } + + jump.RelativeOffset = offset; + + _jumps[index] = jump; + } + } + while (modified); + + // Write the code, ignoring the dummy bytes after jumps, into a new stream. + _stream.Seek(0, SeekOrigin.Begin); + + using (MemoryStream codeStream = new MemoryStream()) + { + Assembler assembler = new Assembler(codeStream); + + byte[] buffer; + + for (int index = 0; index < _jumps.Count; index++) + { + Jump jump = _jumps[index]; + + buffer = new byte[jump.JumpPosition - _stream.Position]; + + _stream.Read(buffer, 0, buffer.Length); + _stream.Seek(ReservedBytesForJump, SeekOrigin.Current); + + codeStream.Write(buffer); + + if (jump.IsConditional) + { + assembler.Jcc(jump.Condition, jump.RelativeOffset); + } + else + { + assembler.Jmp(jump.RelativeOffset); + } + } + + buffer = new byte[_stream.Length - _stream.Position]; + + _stream.Read(buffer, 0, buffer.Length); + + codeStream.Write(buffer); + + return codeStream.ToArray(); + } + } + } +} \ No newline at end of file diff --git a/ARMeilleure/CodeGen/X86/CodeGenerator.cs b/ARMeilleure/CodeGen/X86/CodeGenerator.cs new file mode 100644 index 000000000..ae24b5631 --- /dev/null +++ b/ARMeilleure/CodeGen/X86/CodeGenerator.cs @@ -0,0 +1,1661 @@ +using ARMeilleure.CodeGen.Optimizations; +using ARMeilleure.CodeGen.RegisterAllocators; +using ARMeilleure.CodeGen.Unwinding; +using ARMeilleure.Common; +using ARMeilleure.Diagnostics; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.IO; + +namespace ARMeilleure.CodeGen.X86 +{ + static class CodeGenerator + { + private const int PageSize = 0x1000; + private const int StackGuardSize = 0x2000; + + private static Action[] _instTable; + + static CodeGenerator() + { + _instTable = new Action[EnumUtils.GetCount(typeof(Instruction))]; + + Add(Instruction.Add, GenerateAdd); + Add(Instruction.BitwiseAnd, GenerateBitwiseAnd); + Add(Instruction.BitwiseExclusiveOr, GenerateBitwiseExclusiveOr); + Add(Instruction.BitwiseNot, GenerateBitwiseNot); + Add(Instruction.BitwiseOr, GenerateBitwiseOr); + Add(Instruction.Branch, GenerateBranch); + Add(Instruction.BranchIfFalse, GenerateBranchIfFalse); + Add(Instruction.BranchIfTrue, GenerateBranchIfTrue); + Add(Instruction.ByteSwap, GenerateByteSwap); + Add(Instruction.Call, GenerateCall); + Add(Instruction.Clobber, GenerateClobber); + Add(Instruction.CompareAndSwap128, GenerateCompareAndSwap128); + Add(Instruction.CompareEqual, GenerateCompareEqual); + Add(Instruction.CompareGreater, GenerateCompareGreater); + Add(Instruction.CompareGreaterOrEqual, GenerateCompareGreaterOrEqual); + Add(Instruction.CompareGreaterOrEqualUI, GenerateCompareGreaterOrEqualUI); + Add(Instruction.CompareGreaterUI, GenerateCompareGreaterUI); + Add(Instruction.CompareLess, GenerateCompareLess); + Add(Instruction.CompareLessOrEqual, GenerateCompareLessOrEqual); + Add(Instruction.CompareLessOrEqualUI, GenerateCompareLessOrEqualUI); + Add(Instruction.CompareLessUI, GenerateCompareLessUI); + Add(Instruction.CompareNotEqual, GenerateCompareNotEqual); + Add(Instruction.ConditionalSelect, GenerateConditionalSelect); + Add(Instruction.ConvertI64ToI32, GenerateConvertI64ToI32); + Add(Instruction.ConvertToFP, GenerateConvertToFP); + Add(Instruction.Copy, GenerateCopy); + Add(Instruction.CountLeadingZeros, GenerateCountLeadingZeros); + Add(Instruction.CpuId, GenerateCpuId); + Add(Instruction.Divide, GenerateDivide); + Add(Instruction.DivideUI, GenerateDivideUI); + Add(Instruction.Fill, GenerateFill); + Add(Instruction.Load, GenerateLoad); + Add(Instruction.Load16, GenerateLoad16); + Add(Instruction.Load8, GenerateLoad8); + Add(Instruction.Multiply, GenerateMultiply); + Add(Instruction.Multiply64HighSI, GenerateMultiply64HighSI); + Add(Instruction.Multiply64HighUI, GenerateMultiply64HighUI); + Add(Instruction.Negate, GenerateNegate); + Add(Instruction.Return, GenerateReturn); + Add(Instruction.RotateRight, GenerateRotateRight); + Add(Instruction.ShiftLeft, GenerateShiftLeft); + Add(Instruction.ShiftRightSI, GenerateShiftRightSI); + Add(Instruction.ShiftRightUI, GenerateShiftRightUI); + Add(Instruction.SignExtend16, GenerateSignExtend16); + Add(Instruction.SignExtend32, GenerateSignExtend32); + Add(Instruction.SignExtend8, GenerateSignExtend8); + Add(Instruction.Spill, GenerateSpill); + Add(Instruction.SpillArg, GenerateSpillArg); + Add(Instruction.StackAlloc, GenerateStackAlloc); + Add(Instruction.Store, GenerateStore); + Add(Instruction.Store16, GenerateStore16); + Add(Instruction.Store8, GenerateStore8); + Add(Instruction.Subtract, GenerateSubtract); + Add(Instruction.VectorCreateScalar, GenerateVectorCreateScalar); + Add(Instruction.VectorExtract, GenerateVectorExtract); + Add(Instruction.VectorExtract16, GenerateVectorExtract16); + Add(Instruction.VectorExtract8, GenerateVectorExtract8); + Add(Instruction.VectorInsert, GenerateVectorInsert); + Add(Instruction.VectorInsert16, GenerateVectorInsert16); + Add(Instruction.VectorInsert8, GenerateVectorInsert8); + Add(Instruction.VectorOne, GenerateVectorOne); + Add(Instruction.VectorZero, GenerateVectorZero); + Add(Instruction.VectorZeroUpper64, GenerateVectorZeroUpper64); + Add(Instruction.VectorZeroUpper96, GenerateVectorZeroUpper96); + Add(Instruction.ZeroExtend16, GenerateZeroExtend16); + Add(Instruction.ZeroExtend32, GenerateZeroExtend32); + Add(Instruction.ZeroExtend8, GenerateZeroExtend8); + } + + private static void Add(Instruction inst, Action func) + { + _instTable[(int)inst] = func; + } + + public static CompiledFunction Generate(CompilerContext cctx) + { + ControlFlowGraph cfg = cctx.Cfg; + + Logger.StartPass(PassName.Optimization); + + if ((cctx.Options & CompilerOptions.SsaForm) != 0 && + (cctx.Options & CompilerOptions.Optimize) != 0) + { + Optimizer.RunPass(cfg); + } + + Logger.EndPass(PassName.Optimization, cfg); + + Logger.StartPass(PassName.PreAllocation); + + StackAllocator stackAlloc = new StackAllocator(); + + PreAllocator.RunPass(cctx, stackAlloc, out int maxCallArgs); + + Logger.EndPass(PassName.PreAllocation, cfg); + + Logger.StartPass(PassName.RegisterAllocation); + + if ((cctx.Options & CompilerOptions.SsaForm) != 0) + { + Ssa.Deconstruct(cfg); + } + + IRegisterAllocator regAlloc; + + if ((cctx.Options & CompilerOptions.Lsra) != 0) + { + regAlloc = new LinearScanAllocator(); + } + else + { + regAlloc = new HybridAllocator(); + } + + RegisterMasks regMasks = new RegisterMasks( + CallingConvention.GetIntAvailableRegisters(), + CallingConvention.GetVecAvailableRegisters(), + CallingConvention.GetIntCallerSavedRegisters(), + CallingConvention.GetVecCallerSavedRegisters(), + CallingConvention.GetIntCalleeSavedRegisters(), + CallingConvention.GetVecCalleeSavedRegisters()); + + AllocationResult allocResult = regAlloc.RunPass(cfg, stackAlloc, regMasks); + + Logger.EndPass(PassName.RegisterAllocation, cfg); + + Logger.StartPass(PassName.CodeGeneration); + + using (MemoryStream stream = new MemoryStream()) + { + CodeGenContext context = new CodeGenContext(stream, allocResult, maxCallArgs, cfg.Blocks.Count); + + UnwindInfo unwindInfo = WritePrologue(context); + + foreach (BasicBlock block in cfg.Blocks) + { + context.EnterBlock(block); + + foreach (Node node in block.Operations) + { + if (node is Operation operation) + { + GenerateOperation(context, operation); + } + } + } + + Logger.EndPass(PassName.CodeGeneration); + + return new CompiledFunction(context.GetCode(), unwindInfo); + } + } + + private static void GenerateOperation(CodeGenContext context, Operation operation) + { + if (operation.Instruction == Instruction.Extended) + { + IntrinsicOperation intrinOp = (IntrinsicOperation)operation; + + IntrinsicInfo info = IntrinsicTable.GetInfo(intrinOp.Intrinsic); + + switch (info.Type) + { + case IntrinsicType.Comis_: + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + switch (intrinOp.Intrinsic) + { + case Intrinsic.X86Comisdeq: + context.Assembler.Comisd(src1, src2); + context.Assembler.Setcc(dest, X86Condition.Equal); + break; + + case Intrinsic.X86Comisdge: + context.Assembler.Comisd(src1, src2); + context.Assembler.Setcc(dest, X86Condition.AboveOrEqual); + break; + + case Intrinsic.X86Comisdlt: + context.Assembler.Comisd(src1, src2); + context.Assembler.Setcc(dest, X86Condition.Below); + break; + + case Intrinsic.X86Comisseq: + context.Assembler.Comiss(src1, src2); + context.Assembler.Setcc(dest, X86Condition.Equal); + break; + + case Intrinsic.X86Comissge: + context.Assembler.Comiss(src1, src2); + context.Assembler.Setcc(dest, X86Condition.AboveOrEqual); + break; + + case Intrinsic.X86Comisslt: + context.Assembler.Comiss(src1, src2); + context.Assembler.Setcc(dest, X86Condition.Below); + break; + } + + context.Assembler.Movzx8(dest, dest, OperandType.I32); + + break; + } + + case IntrinsicType.PopCount: + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + EnsureSameType(dest, source); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.Popcnt(dest, source, dest.Type); + + break; + } + + case IntrinsicType.Unary: + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + EnsureSameType(dest, source); + + Debug.Assert(!dest.Type.IsInteger()); + + context.Assembler.WriteInstruction(info.Inst, dest, source); + + break; + } + + case IntrinsicType.UnaryToGpr: + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && !source.Type.IsInteger()); + + context.Assembler.WriteInstruction(info.Inst, dest, source, dest.Type); + + break; + } + + case IntrinsicType.Binary: + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + EnsureSameType(dest, src1); + + if (!HardwareCapabilities.SupportsVexEncoding) + { + EnsureSameReg(dest, src1); + } + + Debug.Assert(!dest.Type.IsInteger()); + Debug.Assert(!src2.Type.IsInteger() || src2.Kind == OperandKind.Constant); + + context.Assembler.WriteInstruction(info.Inst, dest, src1, src2); + + break; + } + + case IntrinsicType.BinaryImm: + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + EnsureSameType(dest, src1); + + if (!HardwareCapabilities.SupportsVexEncoding) + { + EnsureSameReg(dest, src1); + } + + Debug.Assert(!dest.Type.IsInteger() && src2.Kind == OperandKind.Constant); + + context.Assembler.WriteInstruction(info.Inst, dest, src1, src2.AsByte()); + + break; + } + + case IntrinsicType.Ternary: + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + Operand src3 = operation.GetSource(2); + + EnsureSameType(dest, src1, src2, src3); + + Debug.Assert(!dest.Type.IsInteger()); + + if (info.Inst == X86Instruction.Pblendvb && HardwareCapabilities.SupportsVexEncoding) + { + context.Assembler.WriteInstruction(X86Instruction.Vpblendvb, dest, src1, src2, src3); + } + else + { + EnsureSameReg(dest, src1); + + Debug.Assert(src3.GetRegister().Index == 0); + + context.Assembler.WriteInstruction(info.Inst, dest, src1, src2); + } + + break; + } + + case IntrinsicType.TernaryImm: + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + Operand src3 = operation.GetSource(2); + + EnsureSameType(dest, src1, src2); + + if (!HardwareCapabilities.SupportsVexEncoding) + { + EnsureSameReg(dest, src1); + } + + Debug.Assert(!dest.Type.IsInteger() && src3.Kind == OperandKind.Constant); + + context.Assembler.WriteInstruction(info.Inst, dest, src1, src2, src3.AsByte()); + + break; + } + } + } + else + { + Action func = _instTable[(int)operation.Instruction]; + + if (func != null) + { + func(context, operation); + } + else + { + throw new ArgumentException($"Invalid instruction \"{operation.Instruction}\"."); + } + } + } + + private static void GenerateAdd(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateBinOp(dest, src1, src2); + + if (dest.Type.IsInteger()) + { + context.Assembler.Add(dest, src2, dest.Type); + } + else if (dest.Type == OperandType.FP32) + { + context.Assembler.Addss(dest, src1, src2); + } + else /* if (dest.Type == OperandType.FP64) */ + { + context.Assembler.Addsd(dest, src1, src2); + } + } + + private static void GenerateBitwiseAnd(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateBinOp(dest, src1, src2); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.And(dest, src2, dest.Type); + } + + private static void GenerateBitwiseExclusiveOr(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateBinOp(dest, src1, src2); + + if (dest.Type.IsInteger()) + { + context.Assembler.Xor(dest, src2, dest.Type); + } + else + { + context.Assembler.Xorps(dest, src1, src2); + } + } + + private static void GenerateBitwiseNot(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + ValidateUnOp(dest, source); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.Not(dest); + } + + private static void GenerateBitwiseOr(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateBinOp(dest, src1, src2); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.Or(dest, src2, dest.Type); + } + + private static void GenerateBranch(CodeGenContext context, Operation operation) + { + context.JumpTo(context.CurrBlock.Branch); + } + + private static void GenerateBranchIfFalse(CodeGenContext context, Operation operation) + { + Operand source = operation.GetSource(0); + + context.Assembler.Test(source, source, source.Type); + + context.JumpTo(X86Condition.Equal, context.CurrBlock.Branch); + } + + private static void GenerateBranchIfTrue(CodeGenContext context, Operation operation) + { + Operand source = operation.GetSource(0); + + context.Assembler.Test(source, source, source.Type); + + context.JumpTo(X86Condition.NotEqual, context.CurrBlock.Branch); + } + + private static void GenerateByteSwap(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + ValidateUnOp(dest, source); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.Bswap(dest); + } + + private static void GenerateCall(CodeGenContext context, Operation operation) + { + context.Assembler.Call(operation.GetSource(0)); + } + + private static void GenerateClobber(CodeGenContext context, Operation operation) + { + // This is only used to indicate that a register is clobbered to the + // register allocator, we don't need to produce any code. + } + + private static void GenerateCompareAndSwap128(CodeGenContext context, Operation operation) + { + Operand source = operation.GetSource(0); + + MemoryOperand memOp = new MemoryOperand(OperandType.I64, source); + + context.Assembler.Cmpxchg16b(memOp); + } + + private static void GenerateCompareEqual(CodeGenContext context, Operation operation) + { + GenerateCompare(context, operation, X86Condition.Equal); + } + + private static void GenerateCompareGreater(CodeGenContext context, Operation operation) + { + GenerateCompare(context, operation, X86Condition.Greater); + } + + private static void GenerateCompareGreaterOrEqual(CodeGenContext context, Operation operation) + { + GenerateCompare(context, operation, X86Condition.GreaterOrEqual); + } + + private static void GenerateCompareGreaterOrEqualUI(CodeGenContext context, Operation operation) + { + GenerateCompare(context, operation, X86Condition.AboveOrEqual); + } + + private static void GenerateCompareGreaterUI(CodeGenContext context, Operation operation) + { + GenerateCompare(context, operation, X86Condition.Above); + } + + private static void GenerateCompareLess(CodeGenContext context, Operation operation) + { + GenerateCompare(context, operation, X86Condition.Less); + } + + private static void GenerateCompareLessOrEqual(CodeGenContext context, Operation operation) + { + GenerateCompare(context, operation, X86Condition.LessOrEqual); + } + + private static void GenerateCompareLessOrEqualUI(CodeGenContext context, Operation operation) + { + GenerateCompare(context, operation, X86Condition.BelowOrEqual); + } + + private static void GenerateCompareLessUI(CodeGenContext context, Operation operation) + { + GenerateCompare(context, operation, X86Condition.Below); + } + + private static void GenerateCompareNotEqual(CodeGenContext context, Operation operation) + { + GenerateCompare(context, operation, X86Condition.NotEqual); + } + + private static void GenerateCompare(CodeGenContext context, Operation operation, X86Condition condition) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + EnsureSameType(src1, src2); + + Debug.Assert(dest.Type == OperandType.I32); + + context.Assembler.Cmp(src1, src2, src1.Type); + context.Assembler.Setcc(dest, condition); + context.Assembler.Movzx8(dest, dest, OperandType.I32); + } + + private static void GenerateConditionalSelect(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + Operand src3 = operation.GetSource(2); + + EnsureSameReg (dest, src3); + EnsureSameType(dest, src2, src3); + + Debug.Assert(dest.Type.IsInteger()); + Debug.Assert(src1.Type == OperandType.I32); + + context.Assembler.Test (src1, src1, src1.Type); + context.Assembler.Cmovcc(dest, src2, dest.Type, X86Condition.NotEqual); + } + + private static void GenerateConvertI64ToI32(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type == OperandType.I32 && source.Type == OperandType.I64); + + context.Assembler.Mov(dest, source, OperandType.I32); + } + + private static void GenerateConvertToFP(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type == OperandType.FP32 || dest.Type == OperandType.FP64); + + if (dest.Type == OperandType.FP32) + { + Debug.Assert(source.Type.IsInteger() || source.Type == OperandType.FP64); + + if (source.Type.IsInteger()) + { + context.Assembler.Xorps (dest, dest, dest); + context.Assembler.Cvtsi2ss(dest, dest, source, source.Type); + } + else /* if (source.Type == OperandType.FP64) */ + { + context.Assembler.Cvtsd2ss(dest, dest, source); + + GenerateZeroUpper96(context, dest, dest); + } + } + else /* if (dest.Type == OperandType.FP64) */ + { + Debug.Assert(source.Type.IsInteger() || source.Type == OperandType.FP32); + + if (source.Type.IsInteger()) + { + context.Assembler.Xorps (dest, dest, dest); + context.Assembler.Cvtsi2sd(dest, dest, source, source.Type); + } + else /* if (source.Type == OperandType.FP32) */ + { + context.Assembler.Cvtss2sd(dest, dest, source); + + GenerateZeroUpper64(context, dest, dest); + } + } + } + + private static void GenerateCopy(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + EnsureSameType(dest, source); + + Debug.Assert(dest.Type.IsInteger() || source.Kind != OperandKind.Constant); + + // Moves to the same register are useless. + if (dest.Kind == source.Kind && dest.Value == source.Value) + { + return; + } + + if (dest.Kind == OperandKind.Register && + source.Kind == OperandKind.Constant && source.Value == 0) + { + // Assemble "mov reg, 0" as "xor reg, reg" as the later is more efficient. + context.Assembler.Xor(dest, dest, OperandType.I32); + } + else if (dest.Type.IsInteger()) + { + context.Assembler.Mov(dest, source, dest.Type); + } + else + { + context.Assembler.Movdqu(dest, source); + } + } + + private static void GenerateCountLeadingZeros(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + EnsureSameType(dest, source); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.Bsr(dest, source, dest.Type); + + int operandSize = dest.Type == OperandType.I32 ? 32 : 64; + int operandMask = operandSize - 1; + + // When the input operand is 0, the result is undefined, however the + // ZF flag is set. We are supposed to return the operand size on that + // case. So, add an additional jump to handle that case, by moving the + // operand size constant to the destination register. + context.JumpToNear(X86Condition.NotEqual); + + context.Assembler.Mov(dest, new Operand(operandSize | operandMask), OperandType.I32); + + context.JumpHere(); + + // BSR returns the zero based index of the last bit set on the operand, + // starting from the least significant bit. However we are supposed to + // return the number of 0 bits on the high end. So, we invert the result + // of the BSR using XOR to get the correct value. + context.Assembler.Xor(dest, new Operand(operandMask), OperandType.I32); + } + + private static void GenerateCpuId(CodeGenContext context, Operation operation) + { + context.Assembler.Cpuid(); + } + + private static void GenerateDivide(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand dividend = operation.GetSource(0); + Operand divisor = operation.GetSource(1); + + if (!dest.Type.IsInteger()) + { + ValidateBinOp(dest, dividend, divisor); + } + + if (dest.Type.IsInteger()) + { + divisor = operation.GetSource(2); + + EnsureSameType(dest, divisor); + + if (divisor.Type == OperandType.I32) + { + context.Assembler.Cdq(); + } + else + { + context.Assembler.Cqo(); + } + + context.Assembler.Idiv(divisor); + } + else if (dest.Type == OperandType.FP32) + { + context.Assembler.Divss(dest, dividend, divisor); + } + else /* if (dest.Type == OperandType.FP64) */ + { + context.Assembler.Divsd(dest, dividend, divisor); + } + } + + private static void GenerateDivideUI(CodeGenContext context, Operation operation) + { + Operand divisor = operation.GetSource(2); + + Operand rdx = Register(X86Register.Rdx); + + Debug.Assert(divisor.Type.IsInteger()); + + context.Assembler.Xor(rdx, rdx, OperandType.I32); + context.Assembler.Div(divisor); + } + + private static void GenerateFill(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand offset = operation.GetSource(0); + + Debug.Assert(offset.Kind == OperandKind.Constant); + + int offs = offset.AsInt32() + context.CallArgsRegionSize; + + Operand rsp = Register(X86Register.Rsp); + + MemoryOperand memOp = new MemoryOperand(dest.Type, rsp, null, Multiplier.x1, offs); + + GenerateLoad(context, memOp, dest); + } + + private static void GenerateLoad(CodeGenContext context, Operation operation) + { + Operand value = operation.Destination; + Operand address = Memory(operation.GetSource(0), value.Type); + + GenerateLoad(context, address, value); + } + + private static void GenerateLoad16(CodeGenContext context, Operation operation) + { + Operand value = operation.Destination; + Operand address = Memory(operation.GetSource(0), value.Type); + + Debug.Assert(value.Type.IsInteger()); + + context.Assembler.Movzx16(value, address, value.Type); + } + + private static void GenerateLoad8(CodeGenContext context, Operation operation) + { + Operand value = operation.Destination; + Operand address = Memory(operation.GetSource(0), value.Type); + + Debug.Assert(value.Type.IsInteger()); + + context.Assembler.Movzx8(value, address, value.Type); + } + + private static void GenerateMultiply(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + if (src2.Kind != OperandKind.Constant) + { + EnsureSameReg(dest, src1); + } + + EnsureSameType(dest, src1, src2); + + if (dest.Type.IsInteger()) + { + if (src2.Kind == OperandKind.Constant) + { + context.Assembler.Imul(dest, src1, src2, dest.Type); + } + else + { + context.Assembler.Imul(dest, src2, dest.Type); + } + } + else if (dest.Type == OperandType.FP32) + { + context.Assembler.Mulss(dest, src1, src2); + } + else /* if (dest.Type == OperandType.FP64) */ + { + context.Assembler.Mulsd(dest, src1, src2); + } + } + + private static void GenerateMultiply64HighSI(CodeGenContext context, Operation operation) + { + Operand source = operation.GetSource(1); + + Debug.Assert(source.Type == OperandType.I64); + + context.Assembler.Imul(source); + } + + private static void GenerateMultiply64HighUI(CodeGenContext context, Operation operation) + { + Operand source = operation.GetSource(1); + + Debug.Assert(source.Type == OperandType.I64); + + context.Assembler.Mul(source); + } + + private static void GenerateNegate(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + ValidateUnOp(dest, source); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.Neg(dest); + } + + private static void GenerateReturn(CodeGenContext context, Operation operation) + { + WriteEpilogue(context); + + context.Assembler.Return(); + } + + private static void GenerateRotateRight(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateShift(dest, src1, src2); + + context.Assembler.Ror(dest, src2, dest.Type); + } + + private static void GenerateShiftLeft(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateShift(dest, src1, src2); + + context.Assembler.Shl(dest, src2, dest.Type); + } + + private static void GenerateShiftRightSI(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateShift(dest, src1, src2); + + context.Assembler.Sar(dest, src2, dest.Type); + } + + private static void GenerateShiftRightUI(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateShift(dest, src1, src2); + + context.Assembler.Shr(dest, src2, dest.Type); + } + + private static void GenerateSignExtend16(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Movsx16(dest, source, dest.Type); + } + + private static void GenerateSignExtend32(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Movsx32(dest, source, dest.Type); + } + + private static void GenerateSignExtend8(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Movsx8(dest, source, dest.Type); + } + + private static void GenerateSpill(CodeGenContext context, Operation operation) + { + GenerateSpill(context, operation, context.CallArgsRegionSize); + } + + private static void GenerateSpillArg(CodeGenContext context, Operation operation) + { + GenerateSpill(context, operation, 0); + } + + private static void GenerateSpill(CodeGenContext context, Operation operation, int baseOffset) + { + Operand offset = operation.GetSource(0); + Operand source = operation.GetSource(1); + + Debug.Assert(offset.Kind == OperandKind.Constant); + + int offs = offset.AsInt32() + baseOffset; + + Operand rsp = Register(X86Register.Rsp); + + MemoryOperand memOp = new MemoryOperand(source.Type, rsp, null, Multiplier.x1, offs); + + GenerateStore(context, memOp, source); + } + + private static void GenerateStackAlloc(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand offset = operation.GetSource(0); + + Debug.Assert(offset.Kind == OperandKind.Constant); + + int offs = offset.AsInt32() + context.CallArgsRegionSize; + + Operand rsp = Register(X86Register.Rsp); + + MemoryOperand memOp = new MemoryOperand(OperandType.I64, rsp, null, Multiplier.x1, offs); + + context.Assembler.Lea(dest, memOp, OperandType.I64); + } + + private static void GenerateStore(CodeGenContext context, Operation operation) + { + Operand value = operation.GetSource(1); + Operand address = Memory(operation.GetSource(0), value.Type); + + GenerateStore(context, address, value); + } + + private static void GenerateStore16(CodeGenContext context, Operation operation) + { + Operand value = operation.GetSource(1); + Operand address = Memory(operation.GetSource(0), value.Type); + + Debug.Assert(value.Type.IsInteger()); + + context.Assembler.Mov16(address, value); + } + + private static void GenerateStore8(CodeGenContext context, Operation operation) + { + Operand value = operation.GetSource(1); + Operand address = Memory(operation.GetSource(0), value.Type); + + Debug.Assert(value.Type.IsInteger()); + + context.Assembler.Mov8(address, value); + } + + private static void GenerateSubtract(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateBinOp(dest, src1, src2); + + if (dest.Type.IsInteger()) + { + context.Assembler.Sub(dest, src2, dest.Type); + } + else if (dest.Type == OperandType.FP32) + { + context.Assembler.Subss(dest, src1, src2); + } + else /* if (dest.Type == OperandType.FP64) */ + { + context.Assembler.Subsd(dest, src1, src2); + } + } + + private static void GenerateVectorCreateScalar(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(!dest.Type.IsInteger() && source.Type.IsInteger()); + + if (source.Type == OperandType.I32) + { + context.Assembler.Movd(dest, source); + } + else /* if (source.Type == OperandType.I64) */ + { + context.Assembler.Movq(dest, source); + } + } + + private static void GenerateVectorExtract(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; //Value + Operand src1 = operation.GetSource(0); //Vector + Operand src2 = operation.GetSource(1); //Index + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src2.Kind == OperandKind.Constant); + + byte index = src2.AsByte(); + + if (dest.Type == OperandType.I32) + { + Debug.Assert(index < 4); + + if (HardwareCapabilities.SupportsSse41) + { + context.Assembler.Pextrd(dest, src1, index); + } + else + { + if (index != 0) + { + int mask0 = 0b11_10_01_00; + int mask1 = 0b11_10_01_00; + + mask0 = BitUtils.RotateRight(mask0, index * 2, 8); + mask1 = BitUtils.RotateRight(mask1, 8 - index * 2, 8); + + context.Assembler.Pshufd(src1, src1, (byte)mask0); + context.Assembler.Movd (dest, src1); + context.Assembler.Pshufd(src1, src1, (byte)mask1); + } + else + { + context.Assembler.Movd(dest, src1); + } + } + } + else if (dest.Type == OperandType.I64) + { + Debug.Assert(index < 2); + + if (HardwareCapabilities.SupportsSse41) + { + context.Assembler.Pextrq(dest, src1, index); + } + else + { + if (index != 0) + { + const byte mask = 0b01_00_11_10; + + context.Assembler.Pshufd(src1, src1, mask); + context.Assembler.Movq (dest, src1); + context.Assembler.Pshufd(src1, src1, mask); + } + else + { + context.Assembler.Movq(dest, src1); + } + } + } + else + { + Debug.Assert(index < (dest.Type == OperandType.FP32 ? 4 : 2)); + + // Floating-point types. + if ((index >= 2 && dest.Type == OperandType.FP32) || + (index == 1 && dest.Type == OperandType.FP64)) + { + context.Assembler.Movhlps(dest, dest, src1); + context.Assembler.Movq (dest, dest); + } + else + { + context.Assembler.Movq(dest, src1); + } + + if (dest.Type == OperandType.FP32) + { + context.Assembler.Pshufd(dest, dest, (byte)(0xfc | (index & 1))); + } + } + } + + private static void GenerateVectorExtract16(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; //Value + Operand src1 = operation.GetSource(0); //Vector + Operand src2 = operation.GetSource(1); //Index + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src2.Kind == OperandKind.Constant); + + byte index = src2.AsByte(); + + Debug.Assert(index < 8); + + context.Assembler.Pextrw(dest, src1, index); + } + + private static void GenerateVectorExtract8(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; //Value + Operand src1 = operation.GetSource(0); //Vector + Operand src2 = operation.GetSource(1); //Index + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src2.Kind == OperandKind.Constant); + + byte index = src2.AsByte(); + + Debug.Assert(index < 16); + + if (HardwareCapabilities.SupportsSse41) + { + context.Assembler.Pextrb(dest, src1, index); + } + else + { + context.Assembler.Pextrw(dest, src1, (byte)(index >> 1)); + + if ((index & 1) != 0) + { + context.Assembler.Shr(dest, new Operand(8), OperandType.I32); + } + else + { + context.Assembler.Movzx8(dest, dest, OperandType.I32); + } + } + } + + private static void GenerateVectorInsert(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); //Vector + Operand src2 = operation.GetSource(1); //Value + Operand src3 = operation.GetSource(2); //Index + + if (!HardwareCapabilities.SupportsVexEncoding) + { + EnsureSameReg(dest, src1); + } + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src3.Kind == OperandKind.Constant); + + byte index = src3.AsByte(); + + void InsertIntSse2(int words) + { + if (dest.GetRegister() != src1.GetRegister()) + { + context.Assembler.Movdqu(dest, src1); + } + + for (int word = 0; word < words; word++) + { + // Insert lower 16-bits. + context.Assembler.Pinsrw(dest, dest, src2, (byte)(index * words + word)); + + // Move next word down. + context.Assembler.Ror(src2, new Operand(16), src2.Type); + } + } + + if (src2.Type == OperandType.I32) + { + Debug.Assert(index < 4); + + if (HardwareCapabilities.SupportsSse41) + { + context.Assembler.Pinsrd(dest, src1, src2, index); + } + else + { + InsertIntSse2(2); + } + } + else if (src2.Type == OperandType.I64) + { + Debug.Assert(index < 2); + + if (HardwareCapabilities.SupportsSse41) + { + context.Assembler.Pinsrq(dest, src1, src2, index); + } + else + { + InsertIntSse2(4); + } + } + else if (src2.Type == OperandType.FP32) + { + Debug.Assert(index < 4); + + if (index != 0) + { + if (HardwareCapabilities.SupportsSse41) + { + context.Assembler.Insertps(dest, src1, src2, (byte)(index << 4)); + } + else + { + if (src1.GetRegister() == src2.GetRegister()) + { + int mask = 0b11_10_01_00; + + mask &= ~(0b11 << index * 2); + + context.Assembler.Pshufd(dest, src1, (byte)mask); + } + else + { + int mask0 = 0b11_10_01_00; + int mask1 = 0b11_10_01_00; + + mask0 = BitUtils.RotateRight(mask0, index * 2, 8); + mask1 = BitUtils.RotateRight(mask1, 8 - index * 2, 8); + + context.Assembler.Pshufd(src1, src1, (byte)mask0); // Lane to be inserted in position 0. + context.Assembler.Movss (dest, src1, src2); // dest[127:0] = src1[127:32] | src2[31:0] + context.Assembler.Pshufd(dest, dest, (byte)mask1); // Inserted lane in original position. + + if (dest.GetRegister() != src1.GetRegister()) + { + context.Assembler.Pshufd(src1, src1, (byte)mask1); // Restore src1. + } + } + } + } + else + { + context.Assembler.Movss(dest, src1, src2); + } + } + else /* if (src2.Type == OperandType.FP64) */ + { + Debug.Assert(index < 2); + + if (index != 0) + { + context.Assembler.Movlhps(dest, src1, src2); + } + else + { + context.Assembler.Movsd(dest, src1, src2); + } + } + } + + private static void GenerateVectorInsert16(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); //Vector + Operand src2 = operation.GetSource(1); //Value + Operand src3 = operation.GetSource(2); //Index + + if (!HardwareCapabilities.SupportsVexEncoding) + { + EnsureSameReg(dest, src1); + } + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src3.Kind == OperandKind.Constant); + + byte index = src3.AsByte(); + + context.Assembler.Pinsrw(dest, src1, src2, index); + } + + private static void GenerateVectorInsert8(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); //Vector + Operand src2 = operation.GetSource(1); //Value + Operand src3 = operation.GetSource(2); //Index + + // It's not possible to emulate this instruction without + // SSE 4.1 support without the use of a temporary register, + // so we instead handle that case on the pre-allocator when + // SSE 4.1 is not supported on the CPU. + Debug.Assert(HardwareCapabilities.SupportsSse41); + + if (!HardwareCapabilities.SupportsVexEncoding) + { + EnsureSameReg(dest, src1); + } + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src3.Kind == OperandKind.Constant); + + byte index = src3.AsByte(); + + context.Assembler.Pinsrb(dest, src1, src2, index); + } + + private static void GenerateVectorOne(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + + Debug.Assert(!dest.Type.IsInteger()); + + context.Assembler.Pcmpeqw(dest, dest, dest); + } + + private static void GenerateVectorZero(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + + Debug.Assert(!dest.Type.IsInteger()); + + context.Assembler.Xorps(dest, dest, dest); + } + + private static void GenerateVectorZeroUpper64(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type == OperandType.V128 && source.Type == OperandType.V128); + + GenerateZeroUpper64(context, dest, source); + } + + private static void GenerateVectorZeroUpper96(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type == OperandType.V128 && source.Type == OperandType.V128); + + GenerateZeroUpper96(context, dest, source); + } + + private static void GenerateZeroExtend16(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Movzx16(dest, source, OperandType.I32); + } + + private static void GenerateZeroExtend32(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Mov(dest, source, OperandType.I32); + } + + private static void GenerateZeroExtend8(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Movzx8(dest, source, OperandType.I32); + } + + private static void GenerateLoad(CodeGenContext context, Operand address, Operand value) + { + switch (value.Type) + { + case OperandType.I32: context.Assembler.Mov (value, address, OperandType.I32); break; + case OperandType.I64: context.Assembler.Mov (value, address, OperandType.I64); break; + case OperandType.FP32: context.Assembler.Movd (value, address); break; + case OperandType.FP64: context.Assembler.Movq (value, address); break; + case OperandType.V128: context.Assembler.Movdqu(value, address); break; + + default: Debug.Assert(false); break; + } + } + + private static void GenerateStore(CodeGenContext context, Operand address, Operand value) + { + switch (value.Type) + { + case OperandType.I32: context.Assembler.Mov (address, value, OperandType.I32); break; + case OperandType.I64: context.Assembler.Mov (address, value, OperandType.I64); break; + case OperandType.FP32: context.Assembler.Movd (address, value); break; + case OperandType.FP64: context.Assembler.Movq (address, value); break; + case OperandType.V128: context.Assembler.Movdqu(address, value); break; + + default: Debug.Assert(false); break; + } + } + + private static void GenerateZeroUpper64(CodeGenContext context, Operand dest, Operand source) + { + context.Assembler.Movq(dest, source); + } + + private static void GenerateZeroUpper96(CodeGenContext context, Operand dest, Operand source) + { + context.Assembler.Movq(dest, source); + context.Assembler.Pshufd(dest, dest, 0xfc); + } + + private static void ValidateUnOp(Operand dest, Operand source) + { +#if DEBUG + EnsureSameReg (dest, source); + EnsureSameType(dest, source); +#endif + } + + private static void ValidateBinOp(Operand dest, Operand src1, Operand src2) + { +#if DEBUG + EnsureSameReg (dest, src1); + EnsureSameType(dest, src1, src2); +#endif + } + + private static void ValidateShift(Operand dest, Operand src1, Operand src2) + { +#if DEBUG + EnsureSameReg (dest, src1); + EnsureSameType(dest, src1); + + Debug.Assert(dest.Type.IsInteger() && src2.Type == OperandType.I32); +#endif + } + + private static void EnsureSameReg(Operand op1, Operand op2) + { + if (!op1.Type.IsInteger() && HardwareCapabilities.SupportsVexEncoding) + { + return; + } + + Debug.Assert(op1.Kind == OperandKind.Register || op1.Kind == OperandKind.Memory); + Debug.Assert(op1.Kind == op2.Kind); + Debug.Assert(op1.Value == op2.Value); + } + + private static void EnsureSameType(Operand op1, Operand op2) + { + Debug.Assert(op1.Type == op2.Type); + } + + private static void EnsureSameType(Operand op1, Operand op2, Operand op3) + { + Debug.Assert(op1.Type == op2.Type); + Debug.Assert(op1.Type == op3.Type); + } + + private static void EnsureSameType(Operand op1, Operand op2, Operand op3, Operand op4) + { + Debug.Assert(op1.Type == op2.Type); + Debug.Assert(op1.Type == op3.Type); + Debug.Assert(op1.Type == op4.Type); + } + + private static UnwindInfo WritePrologue(CodeGenContext context) + { + List pushEntries = new List(); + + Operand rsp = Register(X86Register.Rsp); + + int mask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters; + + while (mask != 0) + { + int bit = BitUtils.LowestBitSet(mask); + + context.Assembler.Push(Register((X86Register)bit)); + + pushEntries.Add(new UnwindPushEntry(bit, RegisterType.Integer, context.StreamOffset)); + + mask &= ~(1 << bit); + } + + int reservedStackSize = context.CallArgsRegionSize + context.AllocResult.SpillRegionSize; + + reservedStackSize += context.XmmSaveRegionSize; + + if (reservedStackSize >= StackGuardSize) + { + GenerateInlineStackProbe(context, reservedStackSize); + } + + if (reservedStackSize != 0) + { + context.Assembler.Sub(rsp, new Operand(reservedStackSize), OperandType.I64); + } + + int offset = reservedStackSize; + + mask = CallingConvention.GetVecCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters; + + while (mask != 0) + { + int bit = BitUtils.LowestBitSet(mask); + + offset -= 16; + + MemoryOperand memOp = new MemoryOperand(OperandType.V128, rsp, null, Multiplier.x1, offset); + + context.Assembler.Movdqu(memOp, Xmm((X86Register)bit)); + + pushEntries.Add(new UnwindPushEntry(bit, RegisterType.Vector, context.StreamOffset)); + + mask &= ~(1 << bit); + } + + return new UnwindInfo(pushEntries.ToArray(), context.StreamOffset, reservedStackSize); + } + + private static void WriteEpilogue(CodeGenContext context) + { + Operand rsp = Register(X86Register.Rsp); + + int reservedStackSize = context.CallArgsRegionSize + context.AllocResult.SpillRegionSize; + + reservedStackSize += context.XmmSaveRegionSize; + + int offset = reservedStackSize; + + int mask = CallingConvention.GetVecCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters; + + while (mask != 0) + { + int bit = BitUtils.LowestBitSet(mask); + + offset -= 16; + + MemoryOperand memOp = new MemoryOperand(OperandType.V128, rsp, null, Multiplier.x1, offset); + + context.Assembler.Movdqu(Xmm((X86Register)bit), memOp); + + mask &= ~(1 << bit); + } + + if (reservedStackSize != 0) + { + context.Assembler.Add(rsp, new Operand(reservedStackSize), OperandType.I64); + } + + mask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters; + + while (mask != 0) + { + int bit = BitUtils.HighestBitSet(mask); + + context.Assembler.Pop(Register((X86Register)bit)); + + mask &= ~(1 << bit); + } + } + + private static void GenerateInlineStackProbe(CodeGenContext context, int size) + { + // Windows does lazy stack allocation, and there are just 2 + // guard pages on the end of the stack. So, if the allocation + // size we make is greater than this guard size, we must ensure + // that the OS will map all pages that we'll use. We do that by + // doing a dummy read on those pages, forcing a page fault and + // the OS to map them. If they are already mapped, nothing happens. + const int pageMask = PageSize - 1; + + size = (size + pageMask) & ~pageMask; + + Operand rsp = Register(X86Register.Rsp); + Operand temp = Register(CallingConvention.GetIntReturnRegister()); + + for (int offset = PageSize; offset < size; offset += PageSize) + { + Operand memOp = new MemoryOperand(OperandType.I32, rsp, null, Multiplier.x1, -offset);; + + context.Assembler.Mov(temp, memOp, OperandType.I32); + } + } + + private static MemoryOperand Memory(Operand operand, OperandType type) + { + if (operand.Kind == OperandKind.Memory) + { + return operand as MemoryOperand; + } + + return new MemoryOperand(type, operand); + } + + private static Operand Register(X86Register register, OperandType type = OperandType.I64) + { + return new Operand((int)register, RegisterType.Integer, type); + } + + private static Operand Xmm(X86Register register) + { + return new Operand((int)register, RegisterType.Vector, OperandType.V128); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs b/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs new file mode 100644 index 000000000..7f930d6b9 --- /dev/null +++ b/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs @@ -0,0 +1,52 @@ +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; + +namespace ARMeilleure.CodeGen.X86 +{ + static class HardwareCapabilities + { + private delegate ulong GetFeatureInfo(); + + private static ulong _featureInfo; + + public static bool SupportsSse3 => (_featureInfo & (1UL << 0)) != 0; + public static bool SupportsPclmulqdq => (_featureInfo & (1UL << 1)) != 0; + public static bool SupportsSsse3 => (_featureInfo & (1UL << 9)) != 0; + public static bool SupportsFma => (_featureInfo & (1UL << 12)) != 0; + public static bool SupportsCx16 => (_featureInfo & (1UL << 13)) != 0; + public static bool SupportsSse41 => (_featureInfo & (1UL << 19)) != 0; + public static bool SupportsSse42 => (_featureInfo & (1UL << 20)) != 0; + public static bool SupportsPopcnt => (_featureInfo & (1UL << 23)) != 0; + public static bool SupportsAesni => (_featureInfo & (1UL << 25)) != 0; + public static bool SupportsAvx => (_featureInfo & (1UL << 28)) != 0; + public static bool SupportsF16c => (_featureInfo & (1UL << 29)) != 0; + + public static bool SupportsSse => (_featureInfo & (1UL << 32 + 25)) != 0; + public static bool SupportsSse2 => (_featureInfo & (1UL << 32 + 26)) != 0; + + public static bool ForceLegacySse { get; set; } + + public static bool SupportsVexEncoding => !ForceLegacySse && SupportsAvx; + + static HardwareCapabilities() + { + EmitterContext context = new EmitterContext(); + + Operand featureInfo = context.CpuId(); + + context.Return(featureInfo); + + ControlFlowGraph cfg = context.GetControlFlowGraph(); + + OperandType[] argTypes = new OperandType[0]; + + GetFeatureInfo getFeatureInfo = Compiler.Compile( + cfg, + argTypes, + OperandType.I64, + CompilerOptions.HighCq); + + _featureInfo = getFeatureInfo(); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/CodeGen/X86/IntrinsicInfo.cs b/ARMeilleure/CodeGen/X86/IntrinsicInfo.cs new file mode 100644 index 000000000..b1af352bc --- /dev/null +++ b/ARMeilleure/CodeGen/X86/IntrinsicInfo.cs @@ -0,0 +1,14 @@ +namespace ARMeilleure.CodeGen.X86 +{ + struct IntrinsicInfo + { + public X86Instruction Inst { get; } + public IntrinsicType Type { get; } + + public IntrinsicInfo(X86Instruction inst, IntrinsicType type) + { + Inst = inst; + Type = type; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs new file mode 100644 index 000000000..e225f2542 --- /dev/null +++ b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs @@ -0,0 +1,160 @@ +using ARMeilleure.Common; +using ARMeilleure.IntermediateRepresentation; + +namespace ARMeilleure.CodeGen.X86 +{ + static class IntrinsicTable + { + private const int BadOp = 0; + + private static IntrinsicInfo[] _intrinTable; + + static IntrinsicTable() + { + _intrinTable = new IntrinsicInfo[EnumUtils.GetCount(typeof(Intrinsic))]; + + Add(Intrinsic.X86Addpd, new IntrinsicInfo(X86Instruction.Addpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Addps, new IntrinsicInfo(X86Instruction.Addps, IntrinsicType.Binary)); + Add(Intrinsic.X86Addsd, new IntrinsicInfo(X86Instruction.Addsd, IntrinsicType.Binary)); + Add(Intrinsic.X86Addss, new IntrinsicInfo(X86Instruction.Addss, IntrinsicType.Binary)); + Add(Intrinsic.X86Andnpd, new IntrinsicInfo(X86Instruction.Andnpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Andnps, new IntrinsicInfo(X86Instruction.Andnps, IntrinsicType.Binary)); + Add(Intrinsic.X86Cmppd, new IntrinsicInfo(X86Instruction.Cmppd, IntrinsicType.TernaryImm)); + Add(Intrinsic.X86Cmpps, new IntrinsicInfo(X86Instruction.Cmpps, IntrinsicType.TernaryImm)); + Add(Intrinsic.X86Cmpsd, new IntrinsicInfo(X86Instruction.Cmpsd, IntrinsicType.TernaryImm)); + Add(Intrinsic.X86Cmpss, new IntrinsicInfo(X86Instruction.Cmpss, IntrinsicType.TernaryImm)); + Add(Intrinsic.X86Comisdeq, new IntrinsicInfo(X86Instruction.Comisd, IntrinsicType.Comis_)); + Add(Intrinsic.X86Comisdge, new IntrinsicInfo(X86Instruction.Comisd, IntrinsicType.Comis_)); + Add(Intrinsic.X86Comisdlt, new IntrinsicInfo(X86Instruction.Comisd, IntrinsicType.Comis_)); + Add(Intrinsic.X86Comisseq, new IntrinsicInfo(X86Instruction.Comiss, IntrinsicType.Comis_)); + Add(Intrinsic.X86Comissge, new IntrinsicInfo(X86Instruction.Comiss, IntrinsicType.Comis_)); + Add(Intrinsic.X86Comisslt, new IntrinsicInfo(X86Instruction.Comiss, IntrinsicType.Comis_)); + Add(Intrinsic.X86Cvtdq2pd, new IntrinsicInfo(X86Instruction.Cvtdq2pd, IntrinsicType.Unary)); + Add(Intrinsic.X86Cvtdq2ps, new IntrinsicInfo(X86Instruction.Cvtdq2ps, IntrinsicType.Unary)); + Add(Intrinsic.X86Cvtpd2dq, new IntrinsicInfo(X86Instruction.Cvtpd2dq, IntrinsicType.Unary)); + Add(Intrinsic.X86Cvtpd2ps, new IntrinsicInfo(X86Instruction.Cvtpd2ps, IntrinsicType.Unary)); + Add(Intrinsic.X86Cvtps2dq, new IntrinsicInfo(X86Instruction.Cvtps2dq, IntrinsicType.Unary)); + Add(Intrinsic.X86Cvtps2pd, new IntrinsicInfo(X86Instruction.Cvtps2pd, IntrinsicType.Unary)); + Add(Intrinsic.X86Cvtsd2si, new IntrinsicInfo(X86Instruction.Cvtsd2si, IntrinsicType.UnaryToGpr)); + Add(Intrinsic.X86Cvtsd2ss, new IntrinsicInfo(X86Instruction.Cvtsd2ss, IntrinsicType.Binary)); + Add(Intrinsic.X86Cvtss2sd, new IntrinsicInfo(X86Instruction.Cvtss2sd, IntrinsicType.Binary)); + Add(Intrinsic.X86Divpd, new IntrinsicInfo(X86Instruction.Divpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Divps, new IntrinsicInfo(X86Instruction.Divps, IntrinsicType.Binary)); + Add(Intrinsic.X86Divsd, new IntrinsicInfo(X86Instruction.Divsd, IntrinsicType.Binary)); + Add(Intrinsic.X86Divss, new IntrinsicInfo(X86Instruction.Divss, IntrinsicType.Binary)); + Add(Intrinsic.X86Haddpd, new IntrinsicInfo(X86Instruction.Haddpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Haddps, new IntrinsicInfo(X86Instruction.Haddps, IntrinsicType.Binary)); + Add(Intrinsic.X86Maxpd, new IntrinsicInfo(X86Instruction.Maxpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Maxps, new IntrinsicInfo(X86Instruction.Maxps, IntrinsicType.Binary)); + Add(Intrinsic.X86Maxsd, new IntrinsicInfo(X86Instruction.Maxsd, IntrinsicType.Binary)); + Add(Intrinsic.X86Maxss, new IntrinsicInfo(X86Instruction.Maxss, IntrinsicType.Binary)); + Add(Intrinsic.X86Minpd, new IntrinsicInfo(X86Instruction.Minpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Minps, new IntrinsicInfo(X86Instruction.Minps, IntrinsicType.Binary)); + Add(Intrinsic.X86Minsd, new IntrinsicInfo(X86Instruction.Minsd, IntrinsicType.Binary)); + Add(Intrinsic.X86Minss, new IntrinsicInfo(X86Instruction.Minss, IntrinsicType.Binary)); + Add(Intrinsic.X86Movhlps, new IntrinsicInfo(X86Instruction.Movhlps, IntrinsicType.Binary)); + Add(Intrinsic.X86Movlhps, new IntrinsicInfo(X86Instruction.Movlhps, IntrinsicType.Binary)); + Add(Intrinsic.X86Mulpd, new IntrinsicInfo(X86Instruction.Mulpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Mulps, new IntrinsicInfo(X86Instruction.Mulps, IntrinsicType.Binary)); + Add(Intrinsic.X86Mulsd, new IntrinsicInfo(X86Instruction.Mulsd, IntrinsicType.Binary)); + Add(Intrinsic.X86Mulss, new IntrinsicInfo(X86Instruction.Mulss, IntrinsicType.Binary)); + Add(Intrinsic.X86Paddb, new IntrinsicInfo(X86Instruction.Paddb, IntrinsicType.Binary)); + Add(Intrinsic.X86Paddd, new IntrinsicInfo(X86Instruction.Paddd, IntrinsicType.Binary)); + Add(Intrinsic.X86Paddq, new IntrinsicInfo(X86Instruction.Paddq, IntrinsicType.Binary)); + Add(Intrinsic.X86Paddw, new IntrinsicInfo(X86Instruction.Paddw, IntrinsicType.Binary)); + Add(Intrinsic.X86Pand, new IntrinsicInfo(X86Instruction.Pand, IntrinsicType.Binary)); + Add(Intrinsic.X86Pandn, new IntrinsicInfo(X86Instruction.Pandn, IntrinsicType.Binary)); + Add(Intrinsic.X86Pavgb, new IntrinsicInfo(X86Instruction.Pavgb, IntrinsicType.Binary)); + Add(Intrinsic.X86Pavgw, new IntrinsicInfo(X86Instruction.Pavgw, IntrinsicType.Binary)); + Add(Intrinsic.X86Pblendvb, new IntrinsicInfo(X86Instruction.Pblendvb, IntrinsicType.Ternary)); + Add(Intrinsic.X86Pcmpeqb, new IntrinsicInfo(X86Instruction.Pcmpeqb, IntrinsicType.Binary)); + Add(Intrinsic.X86Pcmpeqd, new IntrinsicInfo(X86Instruction.Pcmpeqd, IntrinsicType.Binary)); + Add(Intrinsic.X86Pcmpeqq, new IntrinsicInfo(X86Instruction.Pcmpeqq, IntrinsicType.Binary)); + Add(Intrinsic.X86Pcmpeqw, new IntrinsicInfo(X86Instruction.Pcmpeqw, IntrinsicType.Binary)); + Add(Intrinsic.X86Pcmpgtb, new IntrinsicInfo(X86Instruction.Pcmpgtb, IntrinsicType.Binary)); + Add(Intrinsic.X86Pcmpgtd, new IntrinsicInfo(X86Instruction.Pcmpgtd, IntrinsicType.Binary)); + Add(Intrinsic.X86Pcmpgtq, new IntrinsicInfo(X86Instruction.Pcmpgtq, IntrinsicType.Binary)); + Add(Intrinsic.X86Pcmpgtw, new IntrinsicInfo(X86Instruction.Pcmpgtw, IntrinsicType.Binary)); + Add(Intrinsic.X86Pmaxsb, new IntrinsicInfo(X86Instruction.Pmaxsb, IntrinsicType.Binary)); + Add(Intrinsic.X86Pmaxsd, new IntrinsicInfo(X86Instruction.Pmaxsd, IntrinsicType.Binary)); + Add(Intrinsic.X86Pmaxsw, new IntrinsicInfo(X86Instruction.Pmaxsw, IntrinsicType.Binary)); + Add(Intrinsic.X86Pmaxub, new IntrinsicInfo(X86Instruction.Pmaxub, IntrinsicType.Binary)); + Add(Intrinsic.X86Pmaxud, new IntrinsicInfo(X86Instruction.Pmaxud, IntrinsicType.Binary)); + Add(Intrinsic.X86Pmaxuw, new IntrinsicInfo(X86Instruction.Pmaxuw, IntrinsicType.Binary)); + Add(Intrinsic.X86Pminsb, new IntrinsicInfo(X86Instruction.Pminsb, IntrinsicType.Binary)); + Add(Intrinsic.X86Pminsd, new IntrinsicInfo(X86Instruction.Pminsd, IntrinsicType.Binary)); + Add(Intrinsic.X86Pminsw, new IntrinsicInfo(X86Instruction.Pminsw, IntrinsicType.Binary)); + Add(Intrinsic.X86Pminub, new IntrinsicInfo(X86Instruction.Pminub, IntrinsicType.Binary)); + Add(Intrinsic.X86Pminud, new IntrinsicInfo(X86Instruction.Pminud, IntrinsicType.Binary)); + Add(Intrinsic.X86Pminuw, new IntrinsicInfo(X86Instruction.Pminuw, IntrinsicType.Binary)); + Add(Intrinsic.X86Pmovsxbw, new IntrinsicInfo(X86Instruction.Pmovsxbw, IntrinsicType.Unary)); + Add(Intrinsic.X86Pmovsxdq, new IntrinsicInfo(X86Instruction.Pmovsxdq, IntrinsicType.Unary)); + Add(Intrinsic.X86Pmovsxwd, new IntrinsicInfo(X86Instruction.Pmovsxwd, IntrinsicType.Unary)); + Add(Intrinsic.X86Pmovzxbw, new IntrinsicInfo(X86Instruction.Pmovzxbw, IntrinsicType.Unary)); + Add(Intrinsic.X86Pmovzxdq, new IntrinsicInfo(X86Instruction.Pmovzxdq, IntrinsicType.Unary)); + Add(Intrinsic.X86Pmovzxwd, new IntrinsicInfo(X86Instruction.Pmovzxwd, IntrinsicType.Unary)); + Add(Intrinsic.X86Pmulld, new IntrinsicInfo(X86Instruction.Pmulld, IntrinsicType.Binary)); + Add(Intrinsic.X86Pmullw, new IntrinsicInfo(X86Instruction.Pmullw, IntrinsicType.Binary)); + Add(Intrinsic.X86Popcnt, new IntrinsicInfo(X86Instruction.Popcnt, IntrinsicType.PopCount)); + Add(Intrinsic.X86Por, new IntrinsicInfo(X86Instruction.Por, IntrinsicType.Binary)); + Add(Intrinsic.X86Pshufb, new IntrinsicInfo(X86Instruction.Pshufb, IntrinsicType.Binary)); + Add(Intrinsic.X86Pslld, new IntrinsicInfo(X86Instruction.Pslld, IntrinsicType.Binary)); + Add(Intrinsic.X86Pslldq, new IntrinsicInfo(X86Instruction.Pslldq, IntrinsicType.Binary)); + Add(Intrinsic.X86Psllq, new IntrinsicInfo(X86Instruction.Psllq, IntrinsicType.Binary)); + Add(Intrinsic.X86Psllw, new IntrinsicInfo(X86Instruction.Psllw, IntrinsicType.Binary)); + Add(Intrinsic.X86Psrad, new IntrinsicInfo(X86Instruction.Psrad, IntrinsicType.Binary)); + Add(Intrinsic.X86Psraw, new IntrinsicInfo(X86Instruction.Psraw, IntrinsicType.Binary)); + Add(Intrinsic.X86Psrld, new IntrinsicInfo(X86Instruction.Psrld, IntrinsicType.Binary)); + Add(Intrinsic.X86Psrlq, new IntrinsicInfo(X86Instruction.Psrlq, IntrinsicType.Binary)); + Add(Intrinsic.X86Psrldq, new IntrinsicInfo(X86Instruction.Psrldq, IntrinsicType.Binary)); + Add(Intrinsic.X86Psrlw, new IntrinsicInfo(X86Instruction.Psrlw, IntrinsicType.Binary)); + Add(Intrinsic.X86Psubb, new IntrinsicInfo(X86Instruction.Psubb, IntrinsicType.Binary)); + Add(Intrinsic.X86Psubd, new IntrinsicInfo(X86Instruction.Psubd, IntrinsicType.Binary)); + Add(Intrinsic.X86Psubq, new IntrinsicInfo(X86Instruction.Psubq, IntrinsicType.Binary)); + Add(Intrinsic.X86Psubw, new IntrinsicInfo(X86Instruction.Psubw, IntrinsicType.Binary)); + Add(Intrinsic.X86Punpckhbw, new IntrinsicInfo(X86Instruction.Punpckhbw, IntrinsicType.Binary)); + Add(Intrinsic.X86Punpckhdq, new IntrinsicInfo(X86Instruction.Punpckhdq, IntrinsicType.Binary)); + Add(Intrinsic.X86Punpckhqdq, new IntrinsicInfo(X86Instruction.Punpckhqdq, IntrinsicType.Binary)); + Add(Intrinsic.X86Punpckhwd, new IntrinsicInfo(X86Instruction.Punpckhwd, IntrinsicType.Binary)); + Add(Intrinsic.X86Punpcklbw, new IntrinsicInfo(X86Instruction.Punpcklbw, IntrinsicType.Binary)); + Add(Intrinsic.X86Punpckldq, new IntrinsicInfo(X86Instruction.Punpckldq, IntrinsicType.Binary)); + Add(Intrinsic.X86Punpcklqdq, new IntrinsicInfo(X86Instruction.Punpcklqdq, IntrinsicType.Binary)); + Add(Intrinsic.X86Punpcklwd, new IntrinsicInfo(X86Instruction.Punpcklwd, IntrinsicType.Binary)); + Add(Intrinsic.X86Pxor, new IntrinsicInfo(X86Instruction.Pxor, IntrinsicType.Binary)); + Add(Intrinsic.X86Rcpps, new IntrinsicInfo(X86Instruction.Rcpps, IntrinsicType.Unary)); + Add(Intrinsic.X86Rcpss, new IntrinsicInfo(X86Instruction.Rcpss, IntrinsicType.Unary)); + Add(Intrinsic.X86Roundpd, new IntrinsicInfo(X86Instruction.Roundpd, IntrinsicType.BinaryImm)); + Add(Intrinsic.X86Roundps, new IntrinsicInfo(X86Instruction.Roundps, IntrinsicType.BinaryImm)); + Add(Intrinsic.X86Roundsd, new IntrinsicInfo(X86Instruction.Roundsd, IntrinsicType.BinaryImm)); + Add(Intrinsic.X86Roundss, new IntrinsicInfo(X86Instruction.Roundss, IntrinsicType.BinaryImm)); + Add(Intrinsic.X86Rsqrtps, new IntrinsicInfo(X86Instruction.Rsqrtps, IntrinsicType.Unary)); + Add(Intrinsic.X86Rsqrtss, new IntrinsicInfo(X86Instruction.Rsqrtss, IntrinsicType.Unary)); + Add(Intrinsic.X86Shufpd, new IntrinsicInfo(X86Instruction.Shufpd, IntrinsicType.TernaryImm)); + Add(Intrinsic.X86Shufps, new IntrinsicInfo(X86Instruction.Shufps, IntrinsicType.TernaryImm)); + Add(Intrinsic.X86Sqrtpd, new IntrinsicInfo(X86Instruction.Sqrtpd, IntrinsicType.Unary)); + Add(Intrinsic.X86Sqrtps, new IntrinsicInfo(X86Instruction.Sqrtps, IntrinsicType.Unary)); + Add(Intrinsic.X86Sqrtsd, new IntrinsicInfo(X86Instruction.Sqrtsd, IntrinsicType.Unary)); + Add(Intrinsic.X86Sqrtss, new IntrinsicInfo(X86Instruction.Sqrtss, IntrinsicType.Unary)); + Add(Intrinsic.X86Subpd, new IntrinsicInfo(X86Instruction.Subpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Subps, new IntrinsicInfo(X86Instruction.Subps, IntrinsicType.Binary)); + Add(Intrinsic.X86Subsd, new IntrinsicInfo(X86Instruction.Subsd, IntrinsicType.Binary)); + Add(Intrinsic.X86Subss, new IntrinsicInfo(X86Instruction.Subss, IntrinsicType.Binary)); + Add(Intrinsic.X86Unpckhpd, new IntrinsicInfo(X86Instruction.Unpckhpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Unpckhps, new IntrinsicInfo(X86Instruction.Unpckhps, IntrinsicType.Binary)); + Add(Intrinsic.X86Unpcklpd, new IntrinsicInfo(X86Instruction.Unpcklpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Unpcklps, new IntrinsicInfo(X86Instruction.Unpcklps, IntrinsicType.Binary)); + Add(Intrinsic.X86Xorpd, new IntrinsicInfo(X86Instruction.Xorpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Xorps, new IntrinsicInfo(X86Instruction.Xorps, IntrinsicType.Binary)); + } + + private static void Add(Intrinsic intrin, IntrinsicInfo info) + { + _intrinTable[(int)intrin] = info; + } + + public static IntrinsicInfo GetInfo(Intrinsic intrin) + { + return _intrinTable[(int)intrin]; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/CodeGen/X86/IntrinsicType.cs b/ARMeilleure/CodeGen/X86/IntrinsicType.cs new file mode 100644 index 000000000..4e9b33e1e --- /dev/null +++ b/ARMeilleure/CodeGen/X86/IntrinsicType.cs @@ -0,0 +1,14 @@ +namespace ARMeilleure.CodeGen.X86 +{ + enum IntrinsicType + { + Comis_, + PopCount, + Unary, + UnaryToGpr, + Binary, + BinaryImm, + Ternary, + TernaryImm + } +} \ No newline at end of file diff --git a/ARMeilleure/CodeGen/X86/PreAllocator.cs b/ARMeilleure/CodeGen/X86/PreAllocator.cs new file mode 100644 index 000000000..a14901311 --- /dev/null +++ b/ARMeilleure/CodeGen/X86/PreAllocator.cs @@ -0,0 +1,1280 @@ +using ARMeilleure.CodeGen.RegisterAllocators; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System.Collections.Generic; +using System.Diagnostics; + +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.CodeGen.X86 +{ + using LLNode = LinkedListNode; + + static class PreAllocator + { + public static void RunPass(CompilerContext cctx, StackAllocator stackAlloc, out int maxCallArgs) + { + maxCallArgs = -1; + + CallConvName callConv = CallingConvention.GetCurrentCallConv(); + + Operand[] preservedArgs = new Operand[CallingConvention.GetArgumentsOnRegsCount()]; + + foreach (BasicBlock block in cctx.Cfg.Blocks) + { + LLNode nextNode; + + for (LLNode node = block.Operations.First; node != null; node = nextNode) + { + nextNode = node.Next; + + if (!(node.Value is Operation operation)) + { + continue; + } + + HandleConstantCopy(node, operation); + + HandleSameDestSrc1Copy(node, operation); + + HandleFixedRegisterCopy(node, operation); + + switch (operation.Instruction) + { + case Instruction.Call: + // Get the maximum number of arguments used on a call. + // On windows, when a struct is returned from the call, + // we also need to pass the pointer where the struct + // should be written on the first argument. + int argsCount = operation.SourcesCount - 1; + + if (operation.Destination != null && operation.Destination.Type == OperandType.V128) + { + argsCount++; + } + + if (maxCallArgs < argsCount) + { + maxCallArgs = argsCount; + } + + // Copy values to registers expected by the function + // being called, as mandated by the ABI. + if (callConv == CallConvName.Windows) + { + node = HandleCallWindowsAbi(stackAlloc, node, operation); + } + else /* if (callConv == CallConvName.SystemV) */ + { + node = HandleCallSystemVAbi(node, operation); + } + break; + + case Instruction.ConvertToFPUI: + HandleConvertToFPUI(node, operation); + break; + + case Instruction.LoadArgument: + if (callConv == CallConvName.Windows) + { + HandleLoadArgumentWindowsAbi(cctx, node, preservedArgs, operation); + } + else /* if (callConv == CallConvName.SystemV) */ + { + HandleLoadArgumentSystemVAbi(cctx, node, preservedArgs, operation); + } + break; + + case Instruction.Negate: + if (!operation.GetSource(0).Type.IsInteger()) + { + node = HandleNegate(node, operation); + } + break; + + case Instruction.Return: + if (callConv == CallConvName.Windows) + { + HandleReturnWindowsAbi(cctx, node, preservedArgs, operation); + } + else /* if (callConv == CallConvName.SystemV) */ + { + HandleReturnSystemVAbi(node, operation); + } + break; + + case Instruction.VectorInsert8: + if (!HardwareCapabilities.SupportsSse41) + { + node = HandleVectorInsert8(node, operation); + } + break; + } + } + } + } + + private static void HandleConstantCopy(LLNode node, Operation operation) + { + if (operation.SourcesCount == 0 || IsIntrinsic(operation.Instruction)) + { + return; + } + + Instruction inst = operation.Instruction; + + Operand src1 = operation.GetSource(0); + Operand src2; + + if (src1.Kind == OperandKind.Constant) + { + if (!src1.Type.IsInteger()) + { + // Handle non-integer types (FP32, FP64 and V128). + // For instructions without an immediate operand, we do the following: + // - Insert a copy with the constant value (as integer) to a GPR. + // - Insert a copy from the GPR to a XMM register. + // - Replace the constant use with the XMM register. + src1 = AddXmmCopy(node, src1); + + operation.SetSource(0, src1); + } + else if (!HasConstSrc1(inst)) + { + // Handle integer types. + // Most ALU instructions accepts a 32-bits immediate on the second operand. + // We need to ensure the following: + // - If the constant is on operand 1, we need to move it. + // -- But first, we try to swap operand 1 and 2 if the instruction is commutative. + // -- Doing so may allow us to encode the constant as operand 2 and avoid a copy. + // - If the constant is on operand 2, we check if the instruction supports it, + // if not, we also add a copy. 64-bits constants are usually not supported. + if (IsCommutative(inst)) + { + src2 = operation.GetSource(1); + + Operand temp = src1; + + src1 = src2; + src2 = temp; + + operation.SetSource(0, src1); + operation.SetSource(1, src2); + } + + if (src1.Kind == OperandKind.Constant) + { + src1 = AddCopy(node, src1); + + operation.SetSource(0, src1); + } + } + } + + if (operation.SourcesCount < 2) + { + return; + } + + src2 = operation.GetSource(1); + + if (src2.Kind == OperandKind.Constant) + { + if (!src2.Type.IsInteger()) + { + src2 = AddXmmCopy(node, src2); + + operation.SetSource(1, src2); + } + else if (!HasConstSrc2(inst) || IsLongConst(src2)) + { + src2 = AddCopy(node, src2); + + operation.SetSource(1, src2); + } + } + } + + private static LLNode HandleFixedRegisterCopy(LLNode node, Operation operation) + { + Operand dest = operation.Destination; + + LinkedList nodes = node.List; + + switch (operation.Instruction) + { + case Instruction.CompareAndSwap128: + { + // Handle the many restrictions of the compare and exchange (16 bytes) instruction: + // - The expected value should be in RDX:RAX. + // - The new value to be written should be in RCX:RBX. + // - The value at the memory location is loaded to RDX:RAX. + void SplitOperand(Operand source, Operand lr, Operand hr) + { + nodes.AddBefore(node, new Operation(Instruction.VectorExtract, lr, source, Const(0))); + nodes.AddBefore(node, new Operation(Instruction.VectorExtract, hr, source, Const(1))); + } + + Operand rax = Gpr(X86Register.Rax, OperandType.I64); + Operand rbx = Gpr(X86Register.Rbx, OperandType.I64); + Operand rcx = Gpr(X86Register.Rcx, OperandType.I64); + Operand rdx = Gpr(X86Register.Rdx, OperandType.I64); + + SplitOperand(operation.GetSource(1), rax, rdx); + SplitOperand(operation.GetSource(2), rbx, rcx); + + node = nodes.AddAfter(node, new Operation(Instruction.VectorCreateScalar, dest, rax)); + node = nodes.AddAfter(node, new Operation(Instruction.VectorInsert, dest, dest, rdx, Const(1))); + + operation.SetDestinations(new Operand[] { rdx, rax }); + + operation.SetSources(new Operand[] { operation.GetSource(0), rdx, rax, rcx, rbx }); + + break; + } + + case Instruction.CpuId: + { + // Handle the many restrictions of the CPU Id instruction: + // - EAX controls the information returned by this instruction. + // - When EAX is 1, feature information is returned. + // - The information is written to registers EAX, EBX, ECX and EDX. + Debug.Assert(dest.Type == OperandType.I64); + + Operand eax = Gpr(X86Register.Rax, OperandType.I32); + Operand ebx = Gpr(X86Register.Rbx, OperandType.I32); + Operand ecx = Gpr(X86Register.Rcx, OperandType.I32); + Operand edx = Gpr(X86Register.Rdx, OperandType.I32); + + // Value 0x01 = Version, family and feature information. + nodes.AddBefore(node, new Operation(Instruction.Copy, eax, Const(1))); + + // Copy results to the destination register. + // The values are split into 2 32-bits registers, we merge them + // into a single 64-bits register. + Operand rcx = Gpr(X86Register.Rcx, OperandType.I64); + + node = nodes.AddAfter(node, new Operation(Instruction.ZeroExtend32, dest, edx)); + node = nodes.AddAfter(node, new Operation(Instruction.ShiftLeft, dest, dest, Const(32))); + node = nodes.AddAfter(node, new Operation(Instruction.BitwiseOr, dest, dest, rcx)); + + operation.SetDestinations(new Operand[] { eax, ebx, ecx, edx }); + + operation.SetSources(new Operand[] { eax }); + + break; + } + + case Instruction.Divide: + case Instruction.DivideUI: + { + // Handle the many restrictions of the division instructions: + // - The dividend is always in RDX:RAX. + // - The result is always in RAX. + // - Additionally it also writes the remainder in RDX. + if (dest.Type.IsInteger()) + { + Operand src1 = operation.GetSource(0); + + Operand rax = Gpr(X86Register.Rax, src1.Type); + Operand rdx = Gpr(X86Register.Rdx, src1.Type); + + nodes.AddBefore(node, new Operation(Instruction.Copy, rax, src1)); + nodes.AddBefore(node, new Operation(Instruction.Clobber, rdx)); + + node = nodes.AddAfter(node, new Operation(Instruction.Copy, dest, rax)); + + operation.SetDestinations(new Operand[] { rdx, rax }); + + operation.SetSources(new Operand[] { rdx, rax, operation.GetSource(1) }); + + operation.Destination = rax; + } + + break; + } + + case Instruction.Extended: + { + IntrinsicOperation intrinOp = (IntrinsicOperation)operation; + + // PBLENDVB last operand is always implied to be XMM0 when VEX is not supported. + if (intrinOp.Intrinsic == Intrinsic.X86Pblendvb && !HardwareCapabilities.SupportsVexEncoding) + { + Operand xmm0 = Xmm(X86Register.Xmm0, OperandType.V128); + + nodes.AddBefore(node, new Operation(Instruction.Copy, xmm0, operation.GetSource(2))); + + operation.SetSource(2, xmm0); + } + + break; + } + + case Instruction.Multiply64HighSI: + case Instruction.Multiply64HighUI: + { + // Handle the many restrictions of the i64 * i64 = i128 multiply instructions: + // - The multiplicand is always in RAX. + // - The lower 64-bits of the result is always in RAX. + // - The higher 64-bits of the result is always in RDX. + Operand src1 = operation.GetSource(0); + + Operand rax = Gpr(X86Register.Rax, src1.Type); + Operand rdx = Gpr(X86Register.Rdx, src1.Type); + + nodes.AddBefore(node, new Operation(Instruction.Copy, rax, src1)); + + operation.SetSource(0, rax); + + node = nodes.AddAfter(node, new Operation(Instruction.Copy, dest, rdx)); + + operation.SetDestinations(new Operand[] { rdx, rax }); + + break; + } + + case Instruction.RotateRight: + case Instruction.ShiftLeft: + case Instruction.ShiftRightSI: + case Instruction.ShiftRightUI: + { + // The shift register is always implied to be CL (low 8-bits of RCX or ECX). + if (operation.GetSource(1).Kind == OperandKind.LocalVariable) + { + Operand rcx = Gpr(X86Register.Rcx, OperandType.I32); + + nodes.AddBefore(node, new Operation(Instruction.Copy, rcx, operation.GetSource(1))); + + operation.SetSource(1, rcx); + } + + break; + } + } + + return node; + } + + private static LLNode HandleSameDestSrc1Copy(LLNode node, Operation operation) + { + if (operation.Destination == null || operation.SourcesCount == 0) + { + return node; + } + + Instruction inst = operation.Instruction; + + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + + LinkedList nodes = node.List; + + // The multiply instruction (that maps to IMUL) is somewhat special, it has + // a three operand form where the second source is a immediate value. + bool threeOperandForm = inst == Instruction.Multiply && operation.GetSource(1).Kind == OperandKind.Constant; + + if (IsSameOperandDestSrc1(operation) && src1.Kind == OperandKind.LocalVariable && !threeOperandForm) + { + bool useNewLocal = false; + + for (int srcIndex = 1; srcIndex < operation.SourcesCount; srcIndex++) + { + if (operation.GetSource(srcIndex) == dest) + { + useNewLocal = true; + + break; + } + } + + if (useNewLocal) + { + // Dest is being used as some source already, we need to use a new + // local to store the temporary value, otherwise the value on dest + // local would be overwritten. + Operand temp = Local(dest.Type); + + nodes.AddBefore(node, new Operation(Instruction.Copy, temp, src1)); + + operation.SetSource(0, temp); + + node = nodes.AddAfter(node, new Operation(Instruction.Copy, dest, temp)); + + operation.Destination = temp; + } + else + { + nodes.AddBefore(node, new Operation(Instruction.Copy, dest, src1)); + + operation.SetSource(0, dest); + } + } + else if (inst == Instruction.ConditionalSelect) + { + Operand src2 = operation.GetSource(1); + Operand src3 = operation.GetSource(2); + + if (src1 == dest || src2 == dest) + { + Operand temp = Local(dest.Type); + + nodes.AddBefore(node, new Operation(Instruction.Copy, temp, src3)); + + operation.SetSource(2, temp); + + node = nodes.AddAfter(node, new Operation(Instruction.Copy, dest, temp)); + + operation.Destination = temp; + } + else + { + nodes.AddBefore(node, new Operation(Instruction.Copy, dest, src3)); + + operation.SetSource(2, dest); + } + } + + return node; + } + + private static LLNode HandleConvertToFPUI(LLNode node, Operation operation) + { + // Unsigned integer to FP conversions are not supported on X86. + // We need to turn them into signed integer to FP conversions, and + // adjust the final result. + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(source.Type.IsInteger(), $"Invalid source type \"{source.Type}\"."); + + LinkedList nodes = node.List; + + LLNode currentNode = node; + + if (source.Type == OperandType.I32) + { + // For 32-bits integers, we can just zero-extend to 64-bits, + // and then use the 64-bits signed conversion instructions. + Operand zex = Local(OperandType.I64); + + node = nodes.AddAfter(node, new Operation(Instruction.ZeroExtend32, zex, source)); + node = nodes.AddAfter(node, new Operation(Instruction.ConvertToFP, dest, zex)); + } + else /* if (source.Type == OperandType.I64) */ + { + // For 64-bits integers, we need to do the following: + // - Ensure that the integer has the most significant bit clear. + // -- This can be done by shifting the value right by 1, that is, dividing by 2. + // -- The least significant bit is lost in this case though. + // - We can then convert the shifted value with a signed integer instruction. + // - The result still needs to be corrected after that. + // -- First, we need to multiply the result by 2, as we divided it by 2 before. + // --- This can be done efficiently by adding the result to itself. + // -- Then, we need to add the least significant bit that was shifted out. + // --- We can convert the least significant bit to float, and add it to the result. + Operand lsb = Local(OperandType.I64); + Operand half = Local(OperandType.I64); + + Operand lsbF = Local(dest.Type); + + node = nodes.AddAfter(node, new Operation(Instruction.Copy, lsb, source)); + node = nodes.AddAfter(node, new Operation(Instruction.Copy, half, source)); + + node = nodes.AddAfter(node, new Operation(Instruction.BitwiseAnd, lsb, lsb, Const(1L))); + node = nodes.AddAfter(node, new Operation(Instruction.ShiftRightUI, half, half, Const(1))); + + node = nodes.AddAfter(node, new Operation(Instruction.ConvertToFP, lsbF, lsb)); + node = nodes.AddAfter(node, new Operation(Instruction.ConvertToFP, dest, half)); + + node = nodes.AddAfter(node, new Operation(Instruction.Add, dest, dest, dest)); + node = nodes.AddAfter(node, new Operation(Instruction.Add, dest, dest, lsbF)); + } + + Delete(currentNode, operation); + + return node; + } + + private static LLNode HandleNegate(LLNode node, Operation operation) + { + // There's no SSE FP negate instruction, so we need to transform that into + // a XOR of the value to be negated with a mask with the highest bit set. + // This also produces -0 for a negation of the value 0. + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type == OperandType.FP32 || + dest.Type == OperandType.FP64, $"Invalid destination type \"{dest.Type}\"."); + + LinkedList nodes = node.List; + + LLNode currentNode = node; + + Operand res = Local(dest.Type); + + node = nodes.AddAfter(node, new Operation(Instruction.VectorOne, res)); + + if (dest.Type == OperandType.FP32) + { + node = nodes.AddAfter(node, new IntrinsicOperation(Intrinsic.X86Pslld, res, res, Const(31))); + } + else /* if (dest.Type == OperandType.FP64) */ + { + node = nodes.AddAfter(node, new IntrinsicOperation(Intrinsic.X86Psllq, res, res, Const(63))); + } + + node = nodes.AddAfter(node, new IntrinsicOperation(Intrinsic.X86Xorps, res, res, source)); + + node = nodes.AddAfter(node, new Operation(Instruction.Copy, dest, res)); + + Delete(currentNode, operation); + + return node; + } + + private static LLNode HandleVectorInsert8(LLNode node, Operation operation) + { + // Handle vector insertion, when SSE 4.1 is not supported. + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); // Vector + Operand src2 = operation.GetSource(1); // Value + Operand src3 = operation.GetSource(2); // Index + + Debug.Assert(src3.Kind == OperandKind.Constant); + + byte index = src3.AsByte(); + + Debug.Assert(index < 16); + + LinkedList nodes = node.List; + + LLNode currentNode = node; + + Operand temp1 = Local(OperandType.I32); + Operand temp2 = Local(OperandType.I32); + + node = nodes.AddAfter(node, new Operation(Instruction.Copy, temp2, src2)); + + Operation vextOp = new Operation(Instruction.VectorExtract16, temp1, src1, Const(index >> 1)); + + node = nodes.AddAfter(node, vextOp); + + if ((index & 1) != 0) + { + node = nodes.AddAfter(node, new Operation(Instruction.ZeroExtend8, temp1, temp1)); + node = nodes.AddAfter(node, new Operation(Instruction.ShiftLeft, temp2, temp2, Const(8))); + node = nodes.AddAfter(node, new Operation(Instruction.BitwiseOr, temp1, temp1, temp2)); + } + else + { + node = nodes.AddAfter(node, new Operation(Instruction.ZeroExtend8, temp2, temp2)); + node = nodes.AddAfter(node, new Operation(Instruction.BitwiseAnd, temp1, temp1, Const(0xff00))); + node = nodes.AddAfter(node, new Operation(Instruction.BitwiseOr, temp1, temp1, temp2)); + } + + Operation vinsOp = new Operation(Instruction.VectorInsert16, dest, src1, temp1, Const(index >> 1)); + + node = nodes.AddAfter(node, vinsOp); + + Delete(currentNode, operation); + + return node; + } + + private static LLNode HandleCallWindowsAbi(StackAllocator stackAlloc, LLNode node, Operation operation) + { + Operand dest = operation.Destination; + + LinkedList nodes = node.List; + + // Handle struct arguments. + int retArgs = 0; + + int stackAllocOffset = 0; + + int AllocateOnStack(int size) + { + // We assume that the stack allocator is initially empty (TotalSize = 0). + // Taking that into account, we can reuse the space allocated for other + // calls by keeping track of our own allocated size (stackAllocOffset). + // If the space allocated is not big enough, then we just expand it. + int offset = stackAllocOffset; + + if (stackAllocOffset + size > stackAlloc.TotalSize) + { + stackAlloc.Allocate((stackAllocOffset + size) - stackAlloc.TotalSize); + } + + stackAllocOffset += size; + + return offset; + } + + Operand arg0Reg = null; + + if (dest != null && dest.Type == OperandType.V128) + { + int stackOffset = AllocateOnStack(dest.Type.GetSizeInBytes()); + + arg0Reg = Gpr(CallingConvention.GetIntArgumentRegister(0), OperandType.I64); + + Operation allocOp = new Operation(Instruction.StackAlloc, arg0Reg, Const(stackOffset)); + + nodes.AddBefore(node, allocOp); + + retArgs = 1; + } + + int argsCount = operation.SourcesCount - 1; + + int maxArgs = CallingConvention.GetArgumentsOnRegsCount() - retArgs; + + if (argsCount > maxArgs) + { + argsCount = maxArgs; + } + + Operand[] sources = new Operand[1 + retArgs + argsCount]; + + sources[0] = operation.GetSource(0); + + if (arg0Reg != null) + { + sources[1] = arg0Reg; + } + + for (int index = 1; index < operation.SourcesCount; index++) + { + Operand source = operation.GetSource(index); + + if (source.Type == OperandType.V128) + { + Operand stackAddr = Local(OperandType.I64); + + int stackOffset = AllocateOnStack(source.Type.GetSizeInBytes()); + + nodes.AddBefore(node, new Operation(Instruction.StackAlloc, stackAddr, Const(stackOffset))); + + Operation storeOp = new Operation(Instruction.Store, null, stackAddr, source); + + HandleConstantCopy(nodes.AddBefore(node, storeOp), storeOp); + + operation.SetSource(index, stackAddr); + } + } + + // Handle arguments passed on registers. + for (int index = 0; index < argsCount; index++) + { + Operand source = operation.GetSource(index + 1); + + Operand argReg; + + int argIndex = index + retArgs; + + if (source.Type.IsInteger()) + { + argReg = Gpr(CallingConvention.GetIntArgumentRegister(argIndex), source.Type); + } + else + { + argReg = Xmm(CallingConvention.GetVecArgumentRegister(argIndex), source.Type); + } + + Operation copyOp = new Operation(Instruction.Copy, argReg, source); + + HandleConstantCopy(nodes.AddBefore(node, copyOp), copyOp); + + sources[1 + retArgs + index] = argReg; + } + + // The remaining arguments (those that are not passed on registers) + // should be passed on the stack, we write them to the stack with "SpillArg". + for (int index = argsCount; index < operation.SourcesCount - 1; index++) + { + Operand source = operation.GetSource(index + 1); + + Operand offset = new Operand((index + retArgs) * 8); + + Operation spillOp = new Operation(Instruction.SpillArg, null, offset, source); + + HandleConstantCopy(nodes.AddBefore(node, spillOp), spillOp); + } + + if (dest != null) + { + if (dest.Type == OperandType.V128) + { + Operand retValueAddr = Local(OperandType.I64); + + nodes.AddBefore(node, new Operation(Instruction.Copy, retValueAddr, arg0Reg)); + + Operation loadOp = new Operation(Instruction.Load, dest, retValueAddr); + + node = nodes.AddAfter(node, loadOp); + + operation.Destination = null; + } + else + { + Operand retReg = dest.Type.IsInteger() + ? Gpr(CallingConvention.GetIntReturnRegister(), dest.Type) + : Xmm(CallingConvention.GetVecReturnRegister(), dest.Type); + + Operation copyOp = new Operation(Instruction.Copy, dest, retReg); + + node = nodes.AddAfter(node, copyOp); + + operation.Destination = retReg; + } + } + + operation.SetSources(sources); + + return node; + } + + private static LLNode HandleCallSystemVAbi(LLNode node, Operation operation) + { + Operand dest = operation.Destination; + + LinkedList nodes = node.List; + + List sources = new List(); + + sources.Add(operation.GetSource(0)); + + int argsCount = operation.SourcesCount - 1; + + int intMax = CallingConvention.GetIntArgumentsOnRegsCount(); + int vecMax = CallingConvention.GetVecArgumentsOnRegsCount(); + + int intCount = 0; + int vecCount = 0; + + int stackOffset = 0; + + for (int index = 0; index < argsCount; index++) + { + Operand source = operation.GetSource(index + 1); + + bool passOnReg; + + if (source.Type.IsInteger()) + { + passOnReg = intCount < intMax; + } + else if (source.Type == OperandType.V128) + { + passOnReg = intCount + 1 < intMax; + } + else + { + passOnReg = vecCount < vecMax; + } + + if (source.Type == OperandType.V128 && passOnReg) + { + // V128 is a struct, we pass each half on a GPR if possible. + Operand argReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64); + Operand argReg2 = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64); + + nodes.AddBefore(node, new Operation(Instruction.VectorExtract, argReg, source, Const(0))); + nodes.AddBefore(node, new Operation(Instruction.VectorExtract, argReg2, source, Const(1))); + + continue; + } + + if (passOnReg) + { + Operand argReg = source.Type.IsInteger() + ? Gpr(CallingConvention.GetIntArgumentRegister(intCount++), source.Type) + : Xmm(CallingConvention.GetVecArgumentRegister(vecCount++), source.Type); + + Operation copyOp = new Operation(Instruction.Copy, argReg, source); + + HandleConstantCopy(nodes.AddBefore(node, copyOp), copyOp); + + sources.Add(argReg); + } + else + { + Operand offset = new Operand(stackOffset); + + Operation spillOp = new Operation(Instruction.SpillArg, null, offset, source); + + HandleConstantCopy(nodes.AddBefore(node, spillOp), spillOp); + + stackOffset += source.Type.GetSizeInBytes(); + } + } + + if (dest != null) + { + if (dest.Type == OperandType.V128) + { + Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64); + Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64); + + node = nodes.AddAfter(node, new Operation(Instruction.VectorCreateScalar, dest, retLReg)); + node = nodes.AddAfter(node, new Operation(Instruction.VectorInsert, dest, dest, retHReg, Const(1))); + + operation.Destination = null; + } + else + { + Operand retReg = dest.Type.IsInteger() + ? Gpr(CallingConvention.GetIntReturnRegister(), dest.Type) + : Xmm(CallingConvention.GetVecReturnRegister(), dest.Type); + + Operation copyOp = new Operation(Instruction.Copy, dest, retReg); + + node = nodes.AddAfter(node, copyOp); + + operation.Destination = retReg; + } + } + + operation.SetSources(sources.ToArray()); + + return node; + } + + private static void HandleLoadArgumentWindowsAbi( + CompilerContext cctx, + LLNode node, + Operand[] preservedArgs, + Operation operation) + { + Operand source = operation.GetSource(0); + + Debug.Assert(source.Kind == OperandKind.Constant, "Non-constant LoadArgument source kind."); + + int retArgs = cctx.FuncReturnType == OperandType.V128 ? 1 : 0; + + int index = source.AsInt32() + retArgs; + + if (index < CallingConvention.GetArgumentsOnRegsCount()) + { + Operand dest = operation.Destination; + + if (preservedArgs[index] == null) + { + Operand argReg, pArg; + + if (dest.Type.IsInteger()) + { + argReg = Gpr(CallingConvention.GetIntArgumentRegister(index), dest.Type); + + pArg = Local(dest.Type); + } + else if (dest.Type == OperandType.V128) + { + argReg = Gpr(CallingConvention.GetIntArgumentRegister(index), OperandType.I64); + + pArg = Local(OperandType.I64); + } + else + { + argReg = Xmm(CallingConvention.GetVecArgumentRegister(index), dest.Type); + + pArg = Local(dest.Type); + } + + Operation copyOp = new Operation(Instruction.Copy, pArg, argReg); + + cctx.Cfg.Entry.Operations.AddFirst(copyOp); + + preservedArgs[index] = pArg; + } + + Operation argCopyOp = new Operation(dest.Type == OperandType.V128 + ? Instruction.Load + : Instruction.Copy, dest, preservedArgs[index]); + + node.List.AddBefore(node, argCopyOp); + + Delete(node, operation); + } + else + { + // TODO: Pass on stack. + } + } + + private static void HandleLoadArgumentSystemVAbi( + CompilerContext cctx, + LLNode node, + Operand[] preservedArgs, + Operation operation) + { + Operand source = operation.GetSource(0); + + Debug.Assert(source.Kind == OperandKind.Constant, "Non-constant LoadArgument source kind."); + + int index = source.AsInt32(); + + int intCount = 0; + int vecCount = 0; + + for (int cIndex = 0; cIndex < index; cIndex++) + { + OperandType argType = cctx.FuncArgTypes[cIndex]; + + if (argType.IsInteger()) + { + intCount++; + } + else if (argType == OperandType.V128) + { + intCount += 2; + } + else + { + vecCount++; + } + } + + bool passOnReg; + + if (source.Type.IsInteger()) + { + passOnReg = intCount < CallingConvention.GetIntArgumentsOnRegsCount(); + } + else if (source.Type == OperandType.V128) + { + passOnReg = intCount + 1 < CallingConvention.GetIntArgumentsOnRegsCount(); + } + else + { + passOnReg = vecCount < CallingConvention.GetVecArgumentsOnRegsCount(); + } + + if (passOnReg) + { + Operand dest = operation.Destination; + + if (preservedArgs[index] == null) + { + if (dest.Type == OperandType.V128) + { + // V128 is a struct, we pass each half on a GPR if possible. + Operand pArg = Local(OperandType.V128); + + Operand argLReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount), OperandType.I64); + Operand argHReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount + 1), OperandType.I64); + + Operation copyL = new Operation(Instruction.VectorCreateScalar, pArg, argLReg); + Operation copyH = new Operation(Instruction.VectorInsert, pArg, pArg, argHReg, Const(1)); + + cctx.Cfg.Entry.Operations.AddFirst(copyH); + cctx.Cfg.Entry.Operations.AddFirst(copyL); + + preservedArgs[index] = pArg; + } + else + { + Operand pArg = Local(dest.Type); + + Operand argReg = dest.Type.IsInteger() + ? Gpr(CallingConvention.GetIntArgumentRegister(intCount), dest.Type) + : Xmm(CallingConvention.GetVecArgumentRegister(vecCount), dest.Type); + + Operation copyOp = new Operation(Instruction.Copy, pArg, argReg); + + cctx.Cfg.Entry.Operations.AddFirst(copyOp); + + preservedArgs[index] = pArg; + } + } + + Operation argCopyOp = new Operation(Instruction.Copy, dest, preservedArgs[index]); + + node.List.AddBefore(node, argCopyOp); + + Delete(node, operation); + } + else + { + // TODO: Pass on stack. + } + } + + private static void HandleReturnWindowsAbi( + CompilerContext cctx, + LLNode node, + Operand[] preservedArgs, + Operation operation) + { + if (operation.SourcesCount == 0) + { + return; + } + + Operand source = operation.GetSource(0); + + Operand retReg; + + if (source.Type.IsInteger()) + { + retReg = Gpr(CallingConvention.GetIntReturnRegister(), source.Type); + } + else if (source.Type == OperandType.V128) + { + if (preservedArgs[0] == null) + { + Operand preservedArg = Local(OperandType.I64); + + Operand arg0 = Gpr(CallingConvention.GetIntArgumentRegister(0), OperandType.I64); + + Operation copyOp = new Operation(Instruction.Copy, preservedArg, arg0); + + cctx.Cfg.Entry.Operations.AddFirst(copyOp); + + preservedArgs[0] = preservedArg; + } + + retReg = preservedArgs[0]; + } + else + { + retReg = Xmm(CallingConvention.GetVecReturnRegister(), source.Type); + } + + if (source.Type == OperandType.V128) + { + Operation retStoreOp = new Operation(Instruction.Store, null, retReg, source); + + node.List.AddBefore(node, retStoreOp); + } + else + { + Operation retCopyOp = new Operation(Instruction.Copy, retReg, source); + + node.List.AddBefore(node, retCopyOp); + } + + operation.SetSources(new Operand[0]); + } + + private static void HandleReturnSystemVAbi(LLNode node, Operation operation) + { + if (operation.SourcesCount == 0) + { + return; + } + + Operand source = operation.GetSource(0); + + if (source.Type == OperandType.V128) + { + Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64); + Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64); + + node.List.AddBefore(node, new Operation(Instruction.VectorExtract, retLReg, source, Const(0))); + node.List.AddBefore(node, new Operation(Instruction.VectorExtract, retHReg, source, Const(1))); + } + else + { + Operand retReg = source.Type.IsInteger() + ? Gpr(CallingConvention.GetIntReturnRegister(), source.Type) + : Xmm(CallingConvention.GetVecReturnRegister(), source.Type); + + Operation retCopyOp = new Operation(Instruction.Copy, retReg, source); + + node.List.AddBefore(node, retCopyOp); + } + } + + private static Operand AddXmmCopy(LLNode node, Operand source) + { + Operand temp = Local(source.Type); + + Operand intConst = AddCopy(node, GetIntConst(source)); + + Operation copyOp = new Operation(Instruction.VectorCreateScalar, temp, intConst); + + node.List.AddBefore(node, copyOp); + + return temp; + } + + private static Operand AddCopy(LLNode node, Operand source) + { + Operand temp = Local(source.Type); + + Operation copyOp = new Operation(Instruction.Copy, temp, source); + + node.List.AddBefore(node, copyOp); + + return temp; + } + + private static Operand GetIntConst(Operand value) + { + if (value.Type == OperandType.FP32) + { + return Const(value.AsInt32()); + } + else if (value.Type == OperandType.FP64) + { + return Const(value.AsInt64()); + } + + return value; + } + + private static bool IsLongConst(Operand operand) + { + long value = operand.Type == OperandType.I32 + ? operand.AsInt32() + : operand.AsInt64(); + + return !ConstFitsOnS32(value); + } + + private static bool ConstFitsOnS32(long value) + { + return value == (int)value; + } + + private static void Delete(LLNode node, Operation operation) + { + operation.Destination = null; + + for (int index = 0; index < operation.SourcesCount; index++) + { + operation.SetSource(index, null); + } + + node.List.Remove(node); + } + + private static Operand Gpr(X86Register register, OperandType type) + { + return Register((int)register, RegisterType.Integer, type); + } + + private static Operand Xmm(X86Register register, OperandType type) + { + return Register((int)register, RegisterType.Vector, type); + } + + private static bool IsSameOperandDestSrc1(Operation operation) + { + switch (operation.Instruction) + { + case Instruction.Add: + case Instruction.Multiply: + case Instruction.Subtract: + return !HardwareCapabilities.SupportsVexEncoding || operation.Destination.Type.IsInteger(); + + case Instruction.BitwiseAnd: + case Instruction.BitwiseExclusiveOr: + case Instruction.BitwiseNot: + case Instruction.BitwiseOr: + case Instruction.ByteSwap: + case Instruction.Negate: + case Instruction.RotateRight: + case Instruction.ShiftLeft: + case Instruction.ShiftRightSI: + case Instruction.ShiftRightUI: + return true; + + case Instruction.Divide: + return !HardwareCapabilities.SupportsVexEncoding && !operation.Destination.Type.IsInteger(); + + case Instruction.VectorInsert: + case Instruction.VectorInsert16: + case Instruction.VectorInsert8: + return !HardwareCapabilities.SupportsVexEncoding; + } + + return IsVexSameOperandDestSrc1(operation); + } + + private static bool IsVexSameOperandDestSrc1(Operation operation) + { + if (IsIntrinsic(operation.Instruction)) + { + bool isUnary = operation.SourcesCount < 2; + + bool hasVecDest = operation.Destination != null && operation.Destination.Type == OperandType.V128; + + return !HardwareCapabilities.SupportsVexEncoding && !isUnary && hasVecDest; + } + + return false; + } + + private static bool HasConstSrc1(Instruction inst) + { + switch (inst) + { + case Instruction.Copy: + case Instruction.LoadArgument: + case Instruction.Spill: + case Instruction.SpillArg: + return true; + } + + return false; + } + + private static bool HasConstSrc2(Instruction inst) + { + switch (inst) + { + case Instruction.Add: + case Instruction.BitwiseAnd: + case Instruction.BitwiseExclusiveOr: + case Instruction.BitwiseOr: + case Instruction.CompareEqual: + case Instruction.CompareGreater: + case Instruction.CompareGreaterOrEqual: + case Instruction.CompareGreaterOrEqualUI: + case Instruction.CompareGreaterUI: + case Instruction.CompareLess: + case Instruction.CompareLessOrEqual: + case Instruction.CompareLessOrEqualUI: + case Instruction.CompareLessUI: + case Instruction.CompareNotEqual: + case Instruction.Multiply: + case Instruction.RotateRight: + case Instruction.ShiftLeft: + case Instruction.ShiftRightSI: + case Instruction.ShiftRightUI: + case Instruction.Subtract: + case Instruction.VectorExtract: + case Instruction.VectorExtract16: + case Instruction.VectorExtract8: + return true; + } + + return false; + } + + private static bool IsCommutative(Instruction inst) + { + switch (inst) + { + case Instruction.Add: + case Instruction.BitwiseAnd: + case Instruction.BitwiseExclusiveOr: + case Instruction.BitwiseOr: + case Instruction.CompareEqual: + case Instruction.CompareNotEqual: + case Instruction.Multiply: + return true; + } + + return false; + } + + private static bool IsIntrinsic(Instruction inst) + { + return inst == Instruction.Extended; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/CodeGen/X86/X86Condition.cs b/ARMeilleure/CodeGen/X86/X86Condition.cs new file mode 100644 index 000000000..a17c6d6c5 --- /dev/null +++ b/ARMeilleure/CodeGen/X86/X86Condition.cs @@ -0,0 +1,22 @@ +namespace ARMeilleure.CodeGen.X86 +{ + enum X86Condition + { + Overflow = 0x0, + NotOverflow = 0x1, + Below = 0x2, + AboveOrEqual = 0x3, + Equal = 0x4, + NotEqual = 0x5, + BelowOrEqual = 0x6, + Above = 0x7, + Sign = 0x8, + NotSign = 0x9, + ParityEven = 0xa, + ParityOdd = 0xb, + Less = 0xc, + GreaterOrEqual = 0xd, + LessOrEqual = 0xe, + Greater = 0xf + } +} \ No newline at end of file diff --git a/ARMeilleure/CodeGen/X86/X86Instruction.cs b/ARMeilleure/CodeGen/X86/X86Instruction.cs new file mode 100644 index 000000000..10ba891aa --- /dev/null +++ b/ARMeilleure/CodeGen/X86/X86Instruction.cs @@ -0,0 +1,190 @@ +namespace ARMeilleure.CodeGen.X86 +{ + enum X86Instruction + { + Add, + Addpd, + Addps, + Addsd, + Addss, + And, + Andnpd, + Andnps, + Bsr, + Bswap, + Call, + Cmovcc, + Cmp, + Cmppd, + Cmpps, + Cmpsd, + Cmpss, + Cmpxchg16b, + Comisd, + Comiss, + Cpuid, + Cvtdq2pd, + Cvtdq2ps, + Cvtpd2dq, + Cvtpd2ps, + Cvtps2dq, + Cvtps2pd, + Cvtsd2si, + Cvtsd2ss, + Cvtsi2sd, + Cvtsi2ss, + Cvtss2sd, + Div, + Divpd, + Divps, + Divsd, + Divss, + Haddpd, + Haddps, + Idiv, + Imul, + Imul128, + Insertps, + Lea, + Maxpd, + Maxps, + Maxsd, + Maxss, + Minpd, + Minps, + Minsd, + Minss, + Mov, + Mov16, + Mov8, + Movd, + Movdqu, + Movhlps, + Movlhps, + Movq, + Movsd, + Movss, + Movsx16, + Movsx32, + Movsx8, + Movzx16, + Movzx8, + Mul128, + Mulpd, + Mulps, + Mulsd, + Mulss, + Neg, + Not, + Or, + Paddb, + Paddd, + Paddq, + Paddw, + Pand, + Pandn, + Pavgb, + Pavgw, + Pblendvb, + Pcmpeqb, + Pcmpeqd, + Pcmpeqq, + Pcmpeqw, + Pcmpgtb, + Pcmpgtd, + Pcmpgtq, + Pcmpgtw, + Pextrb, + Pextrd, + Pextrq, + Pextrw, + Pinsrb, + Pinsrd, + Pinsrq, + Pinsrw, + Pmaxsb, + Pmaxsd, + Pmaxsw, + Pmaxub, + Pmaxud, + Pmaxuw, + Pminsb, + Pminsd, + Pminsw, + Pminub, + Pminud, + Pminuw, + Pmovsxbw, + Pmovsxdq, + Pmovsxwd, + Pmovzxbw, + Pmovzxdq, + Pmovzxwd, + Pmulld, + Pmullw, + Pop, + Popcnt, + Por, + Pshufb, + Pshufd, + Pslld, + Pslldq, + Psllq, + Psllw, + Psrad, + Psraw, + Psrld, + Psrlq, + Psrldq, + Psrlw, + Psubb, + Psubd, + Psubq, + Psubw, + Punpckhbw, + Punpckhdq, + Punpckhqdq, + Punpckhwd, + Punpcklbw, + Punpckldq, + Punpcklqdq, + Punpcklwd, + Push, + Pxor, + Rcpps, + Rcpss, + Ror, + Roundpd, + Roundps, + Roundsd, + Roundss, + Rsqrtps, + Rsqrtss, + Sar, + Setcc, + Shl, + Shr, + Shufpd, + Shufps, + Sqrtpd, + Sqrtps, + Sqrtsd, + Sqrtss, + Sub, + Subpd, + Subps, + Subsd, + Subss, + Test, + Unpckhpd, + Unpckhps, + Unpcklpd, + Unpcklps, + Vpblendvb, + Xor, + Xorpd, + Xorps, + + Count + } +} \ No newline at end of file diff --git a/ARMeilleure/CodeGen/X86/X86Register.cs b/ARMeilleure/CodeGen/X86/X86Register.cs new file mode 100644 index 000000000..01f63e311 --- /dev/null +++ b/ARMeilleure/CodeGen/X86/X86Register.cs @@ -0,0 +1,41 @@ +namespace ARMeilleure.CodeGen.X86 +{ + enum X86Register + { + Invalid = -1, + + Rax = 0, + Rcx = 1, + Rdx = 2, + Rbx = 3, + Rsp = 4, + Rbp = 5, + Rsi = 6, + Rdi = 7, + R8 = 8, + R9 = 9, + R10 = 10, + R11 = 11, + R12 = 12, + R13 = 13, + R14 = 14, + R15 = 15, + + Xmm0 = 0, + Xmm1 = 1, + Xmm2 = 2, + Xmm3 = 3, + Xmm4 = 4, + Xmm5 = 5, + Xmm6 = 6, + Xmm7 = 7, + Xmm8 = 8, + Xmm9 = 9, + Xmm10 = 10, + Xmm11 = 11, + Xmm12 = 12, + Xmm13 = 13, + Xmm14 = 14, + Xmm15 = 15 + } +} \ No newline at end of file diff --git a/ARMeilleure/Common/BitMap.cs b/ARMeilleure/Common/BitMap.cs new file mode 100644 index 000000000..9dff271b4 --- /dev/null +++ b/ARMeilleure/Common/BitMap.cs @@ -0,0 +1,138 @@ +using System.Collections; +using System.Collections.Generic; + +namespace ARMeilleure.Common +{ + class BitMap : IEnumerable + { + private const int IntSize = 32; + private const int IntMask = IntSize - 1; + + private List _masks; + + public BitMap(int initialCapacity) + { + int count = (initialCapacity + IntMask) / IntSize; + + _masks = new List(count); + + while (count-- > 0) + { + _masks.Add(0); + } + } + + public bool Set(int bit) + { + EnsureCapacity(bit + 1); + + int wordIndex = bit / IntSize; + int wordBit = bit & IntMask; + + int wordMask = 1 << wordBit; + + if ((_masks[wordIndex] & wordMask) != 0) + { + return false; + } + + _masks[wordIndex] |= wordMask; + + return true; + } + + public void Clear(int bit) + { + EnsureCapacity(bit + 1); + + int wordIndex = bit / IntSize; + int wordBit = bit & IntMask; + + int wordMask = 1 << wordBit; + + _masks[wordIndex] &= ~wordMask; + } + + public bool IsSet(int bit) + { + EnsureCapacity(bit + 1); + + int wordIndex = bit / IntSize; + int wordBit = bit & IntMask; + + return (_masks[wordIndex] & (1 << wordBit)) != 0; + } + + public bool Set(BitMap map) + { + EnsureCapacity(map._masks.Count * IntSize); + + bool modified = false; + + for (int index = 0; index < _masks.Count; index++) + { + int newValue = _masks[index] | map._masks[index]; + + if (_masks[index] != newValue) + { + _masks[index] = newValue; + + modified = true; + } + } + + return modified; + } + + public bool Clear(BitMap map) + { + EnsureCapacity(map._masks.Count * IntSize); + + bool modified = false; + + for (int index = 0; index < _masks.Count; index++) + { + int newValue = _masks[index] & ~map._masks[index]; + + if (_masks[index] != newValue) + { + _masks[index] = newValue; + + modified = true; + } + } + + return modified; + } + + private void EnsureCapacity(int size) + { + while (_masks.Count * IntSize < size) + { + _masks.Add(0); + } + } + + public IEnumerator GetEnumerator() + { + for (int index = 0; index < _masks.Count; index++) + { + int mask = _masks[index]; + + while (mask != 0) + { + int bit = BitUtils.LowestBitSet(mask); + + mask &= ~(1 << bit); + + yield return index * IntSize + bit; + } + } + } + + IEnumerator IEnumerable.GetEnumerator() + { + return GetEnumerator(); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Common/BitUtils.cs b/ARMeilleure/Common/BitUtils.cs new file mode 100644 index 000000000..55344608c --- /dev/null +++ b/ARMeilleure/Common/BitUtils.cs @@ -0,0 +1,109 @@ +using System.Runtime.CompilerServices; + +namespace ARMeilleure.Common +{ + static class BitUtils + { + private const int DeBrujinSequence = 0x77cb531; + + private static int[] DeBrujinLbsLut; + + static BitUtils() + { + DeBrujinLbsLut = new int[32]; + + for (int index = 0; index < DeBrujinLbsLut.Length; index++) + { + uint lutIndex = (uint)(DeBrujinSequence * (1 << index)) >> 27; + + DeBrujinLbsLut[lutIndex] = index; + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int LowestBitSet(int value) + { + if (value == 0) + { + return -1; + } + + int lsb = value & -value; + + return DeBrujinLbsLut[(uint)(DeBrujinSequence * lsb) >> 27]; + } + + public static int HighestBitSet(int value) + { + if (value == 0) + { + return -1; + } + + for (int bit = 31; bit >= 0; bit--) + { + if (((value >> bit) & 1) != 0) + { + return bit; + } + } + + return -1; + } + + private static readonly sbyte[] HbsNibbleLut = { -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3 }; + + public static int HighestBitSetNibble(int value) => HbsNibbleLut[value & 0b1111]; + + public static long Replicate(long bits, int size) + { + long output = 0; + + for (int bit = 0; bit < 64; bit += size) + { + output |= bits << bit; + } + + return output; + } + + public static int CountBits(int value) + { + int count = 0; + + while (value != 0) + { + value &= ~(value & -value); + + count++; + } + + return count; + } + + public static long FillWithOnes(int bits) + { + return bits == 64 ? -1L : (1L << bits) - 1; + } + + public static int RotateRight(int bits, int shift, int size) + { + return (int)RotateRight((uint)bits, shift, size); + } + + public static uint RotateRight(uint bits, int shift, int size) + { + return (bits >> shift) | (bits << (size - shift)); + } + + public static long RotateRight(long bits, int shift, int size) + { + return (long)RotateRight((ulong)bits, shift, size); + } + + public static ulong RotateRight(ulong bits, int shift, int size) + { + return (bits >> shift) | (bits << (size - shift)); + } + } +} diff --git a/ARMeilleure/Common/EnumUtils.cs b/ARMeilleure/Common/EnumUtils.cs new file mode 100644 index 000000000..2a4aa645b --- /dev/null +++ b/ARMeilleure/Common/EnumUtils.cs @@ -0,0 +1,12 @@ +using System; + +namespace ARMeilleure.Common +{ + static class EnumUtils + { + public static int GetCount(Type enumType) + { + return Enum.GetNames(enumType).Length; + } + } +} diff --git a/ARMeilleure/Decoders/Block.cs b/ARMeilleure/Decoders/Block.cs new file mode 100644 index 000000000..3d13c2d5e --- /dev/null +++ b/ARMeilleure/Decoders/Block.cs @@ -0,0 +1,99 @@ +using System; +using System.Collections.Generic; + +namespace ARMeilleure.Decoders +{ + class Block + { + public ulong Address { get; set; } + public ulong EndAddress { get; set; } + + public Block Next { get; set; } + public Block Branch { get; set; } + + public List OpCodes { get; private set; } + + public Block() + { + OpCodes = new List(); + } + + public Block(ulong address) : this() + { + Address = address; + } + + public void Split(Block rightBlock) + { + int splitIndex = BinarySearch(OpCodes, rightBlock.Address); + + if ((ulong)OpCodes[splitIndex].Address < rightBlock.Address) + { + splitIndex++; + } + + int splitCount = OpCodes.Count - splitIndex; + + if (splitCount <= 0) + { + throw new ArgumentException("Can't split at right block address."); + } + + rightBlock.EndAddress = EndAddress; + + rightBlock.Next = Next; + rightBlock.Branch = Branch; + + rightBlock.OpCodes.AddRange(OpCodes.GetRange(splitIndex, splitCount)); + + EndAddress = rightBlock.Address; + + Next = rightBlock; + Branch = null; + + OpCodes.RemoveRange(splitIndex, splitCount); + } + + private static int BinarySearch(List opCodes, ulong address) + { + int left = 0; + int middle = 0; + int right = opCodes.Count - 1; + + while (left <= right) + { + int size = right - left; + + middle = left + (size >> 1); + + OpCode opCode = opCodes[middle]; + + if (address == (ulong)opCode.Address) + { + break; + } + + if (address < (ulong)opCode.Address) + { + right = middle - 1; + } + else + { + left = middle + 1; + } + } + + return middle; + } + + public OpCode GetLastOp() + { + if (OpCodes.Count > 0) + { + return OpCodes[OpCodes.Count - 1]; + } + + return null; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/Condition.cs b/ARMeilleure/Decoders/Condition.cs new file mode 100644 index 000000000..727f897da --- /dev/null +++ b/ARMeilleure/Decoders/Condition.cs @@ -0,0 +1,32 @@ +namespace ARMeilleure.Decoders +{ + enum Condition + { + Eq = 0, + Ne = 1, + GeUn = 2, + LtUn = 3, + Mi = 4, + Pl = 5, + Vs = 6, + Vc = 7, + GtUn = 8, + LeUn = 9, + Ge = 10, + Lt = 11, + Gt = 12, + Le = 13, + Al = 14, + Nv = 15 + } + + static class ConditionExtensions + { + public static Condition Invert(this Condition cond) + { + // Bit 0 of all conditions is basically a negation bit, so + // inverting this bit has the effect of inverting the condition. + return (Condition)((int)cond ^ 1); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/DataOp.cs b/ARMeilleure/Decoders/DataOp.cs new file mode 100644 index 000000000..464d00898 --- /dev/null +++ b/ARMeilleure/Decoders/DataOp.cs @@ -0,0 +1,10 @@ +namespace ARMeilleure.Decoders +{ + enum DataOp + { + Adr = 0, + Arithmetic = 1, + Logical = 2, + BitField = 3 + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/Decoder.cs b/ARMeilleure/Decoders/Decoder.cs new file mode 100644 index 000000000..2311e9e96 --- /dev/null +++ b/ARMeilleure/Decoders/Decoder.cs @@ -0,0 +1,351 @@ +using ARMeilleure.Instructions; +using ARMeilleure.Memory; +using ARMeilleure.State; +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Reflection.Emit; + +namespace ARMeilleure.Decoders +{ + static class Decoder + { + private delegate object MakeOp(InstDescriptor inst, ulong address, int opCode); + + private static ConcurrentDictionary _opActivators; + + static Decoder() + { + _opActivators = new ConcurrentDictionary(); + } + + public static Block[] DecodeBasicBlock(MemoryManager memory, ulong address, ExecutionMode mode) + { + Block block = new Block(address); + + FillBlock(memory, mode, block, ulong.MaxValue); + + return new Block[] { block }; + } + + public static Block[] DecodeFunction(MemoryManager memory, ulong address, ExecutionMode mode) + { + List blocks = new List(); + + Queue workQueue = new Queue(); + + Dictionary visited = new Dictionary(); + + Block GetBlock(ulong blkAddress) + { + if (!visited.TryGetValue(blkAddress, out Block block)) + { + block = new Block(blkAddress); + + workQueue.Enqueue(block); + + visited.Add(blkAddress, block); + } + + return block; + } + + GetBlock(address); + + while (workQueue.TryDequeue(out Block currBlock)) + { + // Check if the current block is inside another block. + if (BinarySearch(blocks, currBlock.Address, out int nBlkIndex)) + { + Block nBlock = blocks[nBlkIndex]; + + if (nBlock.Address == currBlock.Address) + { + throw new InvalidOperationException("Found duplicate block address on the list."); + } + + nBlock.Split(currBlock); + + blocks.Insert(nBlkIndex + 1, currBlock); + + continue; + } + + // If we have a block after the current one, set the limit address. + ulong limitAddress = ulong.MaxValue; + + if (nBlkIndex != blocks.Count) + { + Block nBlock = blocks[nBlkIndex]; + + int nextIndex = nBlkIndex + 1; + + if (nBlock.Address < currBlock.Address && nextIndex < blocks.Count) + { + limitAddress = blocks[nextIndex].Address; + } + else if (nBlock.Address > currBlock.Address) + { + limitAddress = blocks[nBlkIndex].Address; + } + } + + FillBlock(memory, mode, currBlock, limitAddress); + + if (currBlock.OpCodes.Count != 0) + { + // Set child blocks. "Branch" is the block the branch instruction + // points to (when taken), "Next" is the block at the next address, + // executed when the branch is not taken. For Unconditional Branches + // (except BL/BLR that are sub calls) or end of executable, Next is null. + OpCode lastOp = currBlock.GetLastOp(); + + bool isCall = IsCall(lastOp); + + if (lastOp is IOpCodeBImm op && !isCall) + { + currBlock.Branch = GetBlock((ulong)op.Immediate); + } + + if (!IsUnconditionalBranch(lastOp) /*|| isCall*/) + { + currBlock.Next = GetBlock(currBlock.EndAddress); + } + } + + // Insert the new block on the list (sorted by address). + if (blocks.Count != 0) + { + Block nBlock = blocks[nBlkIndex]; + + blocks.Insert(nBlkIndex + (nBlock.Address < currBlock.Address ? 1 : 0), currBlock); + } + else + { + blocks.Add(currBlock); + } + } + + return blocks.ToArray(); + } + + private static bool BinarySearch(List blocks, ulong address, out int index) + { + index = 0; + + int left = 0; + int right = blocks.Count - 1; + + while (left <= right) + { + int size = right - left; + + int middle = left + (size >> 1); + + Block block = blocks[middle]; + + index = middle; + + if (address >= block.Address && address < block.EndAddress) + { + return true; + } + + if (address < block.Address) + { + right = middle - 1; + } + else + { + left = middle + 1; + } + } + + return false; + } + + private static void FillBlock( + MemoryManager memory, + ExecutionMode mode, + Block block, + ulong limitAddress) + { + ulong address = block.Address; + + OpCode opCode; + + do + { + if (address >= limitAddress) + { + break; + } + + opCode = DecodeOpCode(memory, address, mode); + + block.OpCodes.Add(opCode); + + address += (ulong)opCode.OpCodeSizeInBytes; + } + while (!(IsBranch(opCode) || IsException(opCode))); + + block.EndAddress = address; + } + + private static bool IsBranch(OpCode opCode) + { + return opCode is OpCodeBImm || + opCode is OpCodeBReg || IsAarch32Branch(opCode); + } + + private static bool IsUnconditionalBranch(OpCode opCode) + { + return opCode is OpCodeBImmAl || + opCode is OpCodeBReg || IsAarch32UnconditionalBranch(opCode); + } + + private static bool IsAarch32UnconditionalBranch(OpCode opCode) + { + if (!(opCode is OpCode32 op)) + { + return false; + } + + // Note: On ARM32, most instructions have conditional execution, + // so there's no "Always" (unconditional) branch like on ARM64. + // We need to check if the condition is "Always" instead. + return IsAarch32Branch(op) && op.Cond >= Condition.Al; + } + + private static bool IsAarch32Branch(OpCode opCode) + { + // Note: On ARM32, most ALU operations can write to R15 (PC), + // so we must consider such operations as a branch in potential aswell. + if (opCode is IOpCode32Alu opAlu && opAlu.Rd == RegisterAlias.Aarch32Pc) + { + return true; + } + + // Same thing for memory operations. We have the cases where PC is a target + // register (Rt == 15 or (mask & (1 << 15)) != 0), and cases where there is + // a write back to PC (wback == true && Rn == 15), however the later may + // be "undefined" depending on the CPU, so compilers should not produce that. + if (opCode is IOpCode32Mem || opCode is IOpCode32MemMult) + { + int rt, rn; + + bool wBack, isLoad; + + if (opCode is IOpCode32Mem opMem) + { + rt = opMem.Rt; + rn = opMem.Rn; + wBack = opMem.WBack; + isLoad = opMem.IsLoad; + + // For the dual load, we also need to take into account the + // case were Rt2 == 15 (PC). + if (rt == 14 && opMem.Instruction.Name == InstName.Ldrd) + { + rt = RegisterAlias.Aarch32Pc; + } + } + else if (opCode is IOpCode32MemMult opMemMult) + { + const int pcMask = 1 << RegisterAlias.Aarch32Pc; + + rt = (opMemMult.RegisterMask & pcMask) != 0 ? RegisterAlias.Aarch32Pc : 0; + rn = opMemMult.Rn; + wBack = opMemMult.PostOffset != 0; + isLoad = opMemMult.IsLoad; + } + else + { + throw new NotImplementedException($"The type \"{opCode.GetType().Name}\" is not implemented on the decoder."); + } + + if ((rt == RegisterAlias.Aarch32Pc && isLoad) || + (rn == RegisterAlias.Aarch32Pc && wBack)) + { + return true; + } + } + + // Explicit branch instructions. + return opCode is IOpCode32BImm || + opCode is IOpCode32BReg; + } + + private static bool IsCall(OpCode opCode) + { + // TODO (CQ): ARM32 support. + return opCode.Instruction.Name == InstName.Bl || + opCode.Instruction.Name == InstName.Blr; + } + + private static bool IsException(OpCode opCode) + { + return opCode.Instruction.Name == InstName.Brk || + opCode.Instruction.Name == InstName.Svc || + opCode.Instruction.Name == InstName.Und; + } + + public static OpCode DecodeOpCode(MemoryManager memory, ulong address, ExecutionMode mode) + { + int opCode = memory.ReadInt32((long)address); + + InstDescriptor inst; + + Type type; + + if (mode == ExecutionMode.Aarch64) + { + (inst, type) = OpCodeTable.GetInstA64(opCode); + } + else + { + if (mode == ExecutionMode.Aarch32Arm) + { + (inst, type) = OpCodeTable.GetInstA32(opCode); + } + else /* if (mode == ExecutionMode.Aarch32Thumb) */ + { + (inst, type) = OpCodeTable.GetInstT32(opCode); + } + } + + if (type != null) + { + return MakeOpCode(inst, type, address, opCode); + } + else + { + return new OpCode(inst, address, opCode); + } + } + + private static OpCode MakeOpCode(InstDescriptor inst, Type type, ulong address, int opCode) + { + MakeOp createInstance = _opActivators.GetOrAdd(type, CacheOpActivator); + + return (OpCode)createInstance(inst, address, opCode); + } + + private static MakeOp CacheOpActivator(Type type) + { + Type[] argTypes = new Type[] { typeof(InstDescriptor), typeof(ulong), typeof(int) }; + + DynamicMethod mthd = new DynamicMethod($"Make{type.Name}", type, argTypes); + + ILGenerator generator = mthd.GetILGenerator(); + + generator.Emit(OpCodes.Ldarg_0); + generator.Emit(OpCodes.Ldarg_1); + generator.Emit(OpCodes.Ldarg_2); + generator.Emit(OpCodes.Newobj, type.GetConstructor(argTypes)); + generator.Emit(OpCodes.Ret); + + return (MakeOp)mthd.CreateDelegate(typeof(MakeOp)); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/DecoderHelper.cs b/ARMeilleure/Decoders/DecoderHelper.cs new file mode 100644 index 000000000..3cbd49123 --- /dev/null +++ b/ARMeilleure/Decoders/DecoderHelper.cs @@ -0,0 +1,113 @@ +using ARMeilleure.Common; +using System; + +namespace ARMeilleure.Decoders +{ + static class DecoderHelper + { + public struct BitMask + { + public long WMask; + public long TMask; + public int Pos; + public int Shift; + public bool IsUndefined; + + public static BitMask Invalid => new BitMask { IsUndefined = true }; + } + + public static BitMask DecodeBitMask(int opCode, bool immediate) + { + int immS = (opCode >> 10) & 0x3f; + int immR = (opCode >> 16) & 0x3f; + + int n = (opCode >> 22) & 1; + int sf = (opCode >> 31) & 1; + + int length = BitUtils.HighestBitSet((~immS & 0x3f) | (n << 6)); + + if (length < 1 || (sf == 0 && n != 0)) + { + return BitMask.Invalid; + } + + int size = 1 << length; + + int levels = size - 1; + + int s = immS & levels; + int r = immR & levels; + + if (immediate && s == levels) + { + return BitMask.Invalid; + } + + long wMask = BitUtils.FillWithOnes(s + 1); + long tMask = BitUtils.FillWithOnes(((s - r) & levels) + 1); + + if (r > 0) + { + wMask = BitUtils.RotateRight(wMask, r, size); + wMask &= BitUtils.FillWithOnes(size); + } + + return new BitMask() + { + WMask = BitUtils.Replicate(wMask, size), + TMask = BitUtils.Replicate(tMask, size), + + Pos = immS, + Shift = immR + }; + } + + public static long DecodeImm8Float(long imm, int size) + { + int e = 0, f = 0; + + switch (size) + { + case 0: e = 8; f = 23; break; + case 1: e = 11; f = 52; break; + + default: throw new ArgumentOutOfRangeException(nameof(size)); + } + + long value = (imm & 0x3f) << f - 4; + + long eBit = (imm >> 6) & 1; + long sBit = (imm >> 7) & 1; + + if (eBit != 0) + { + value |= (1L << e - 3) - 1 << f + 2; + } + + value |= (eBit ^ 1) << f + e - 1; + value |= sBit << f + e; + + return value; + } + + public static long DecodeImm24_2(int opCode) + { + return ((long)opCode << 40) >> 38; + } + + public static long DecodeImm26_2(int opCode) + { + return ((long)opCode << 38) >> 36; + } + + public static long DecodeImmS19_2(int opCode) + { + return (((long)opCode << 40) >> 43) & ~3; + } + + public static long DecodeImmS14_2(int opCode) + { + return (((long)opCode << 45) >> 48) & ~3; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/IOpCode.cs b/ARMeilleure/Decoders/IOpCode.cs new file mode 100644 index 000000000..37ba7a4c6 --- /dev/null +++ b/ARMeilleure/Decoders/IOpCode.cs @@ -0,0 +1,17 @@ +using ARMeilleure.IntermediateRepresentation; + +namespace ARMeilleure.Decoders +{ + interface IOpCode + { + ulong Address { get; } + + InstDescriptor Instruction { get; } + + RegisterSize RegisterSize { get; } + + int GetBitsCount(); + + OperandType GetOperandType(); + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/IOpCode32.cs b/ARMeilleure/Decoders/IOpCode32.cs new file mode 100644 index 000000000..126c10690 --- /dev/null +++ b/ARMeilleure/Decoders/IOpCode32.cs @@ -0,0 +1,9 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32 : IOpCode + { + Condition Cond { get; } + + uint GetPc(); + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/IOpCode32Alu.cs b/ARMeilleure/Decoders/IOpCode32Alu.cs new file mode 100644 index 000000000..72aea30ef --- /dev/null +++ b/ARMeilleure/Decoders/IOpCode32Alu.cs @@ -0,0 +1,10 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32Alu : IOpCode32 + { + int Rd { get; } + int Rn { get; } + + bool SetFlags { get; } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/IOpCode32BImm.cs b/ARMeilleure/Decoders/IOpCode32BImm.cs new file mode 100644 index 000000000..ec7db2c26 --- /dev/null +++ b/ARMeilleure/Decoders/IOpCode32BImm.cs @@ -0,0 +1,4 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32BImm : IOpCode32, IOpCodeBImm { } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/IOpCode32BReg.cs b/ARMeilleure/Decoders/IOpCode32BReg.cs new file mode 100644 index 000000000..097ab4275 --- /dev/null +++ b/ARMeilleure/Decoders/IOpCode32BReg.cs @@ -0,0 +1,7 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32BReg : IOpCode32 + { + int Rm { get; } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/IOpCode32Mem.cs b/ARMeilleure/Decoders/IOpCode32Mem.cs new file mode 100644 index 000000000..0585ab53a --- /dev/null +++ b/ARMeilleure/Decoders/IOpCode32Mem.cs @@ -0,0 +1,12 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32Mem : IOpCode32 + { + int Rt { get; } + int Rn { get; } + + bool WBack { get; } + + bool IsLoad { get; } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/IOpCode32MemMult.cs b/ARMeilleure/Decoders/IOpCode32MemMult.cs new file mode 100644 index 000000000..18fd3f6bf --- /dev/null +++ b/ARMeilleure/Decoders/IOpCode32MemMult.cs @@ -0,0 +1,13 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32MemMult : IOpCode32 + { + int Rn { get; } + + int RegisterMask { get; } + + int PostOffset { get; } + + bool IsLoad { get; } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/IOpCodeAlu.cs b/ARMeilleure/Decoders/IOpCodeAlu.cs new file mode 100644 index 000000000..b8c28513d --- /dev/null +++ b/ARMeilleure/Decoders/IOpCodeAlu.cs @@ -0,0 +1,10 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCodeAlu : IOpCode + { + int Rd { get; } + int Rn { get; } + + DataOp DataOp { get; } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/IOpCodeAluImm.cs b/ARMeilleure/Decoders/IOpCodeAluImm.cs new file mode 100644 index 000000000..02f4c997b --- /dev/null +++ b/ARMeilleure/Decoders/IOpCodeAluImm.cs @@ -0,0 +1,7 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCodeAluImm : IOpCodeAlu + { + long Immediate { get; } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/IOpCodeAluRs.cs b/ARMeilleure/Decoders/IOpCodeAluRs.cs new file mode 100644 index 000000000..22540b11a --- /dev/null +++ b/ARMeilleure/Decoders/IOpCodeAluRs.cs @@ -0,0 +1,10 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCodeAluRs : IOpCodeAlu + { + int Shift { get; } + int Rm { get; } + + ShiftType ShiftType { get; } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/IOpCodeAluRx.cs b/ARMeilleure/Decoders/IOpCodeAluRx.cs new file mode 100644 index 000000000..9d16be787 --- /dev/null +++ b/ARMeilleure/Decoders/IOpCodeAluRx.cs @@ -0,0 +1,10 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCodeAluRx : IOpCodeAlu + { + int Shift { get; } + int Rm { get; } + + IntType IntType { get; } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/IOpCodeBImm.cs b/ARMeilleure/Decoders/IOpCodeBImm.cs new file mode 100644 index 000000000..958bff28d --- /dev/null +++ b/ARMeilleure/Decoders/IOpCodeBImm.cs @@ -0,0 +1,7 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCodeBImm : IOpCode + { + long Immediate { get; } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/IOpCodeCond.cs b/ARMeilleure/Decoders/IOpCodeCond.cs new file mode 100644 index 000000000..9808f7c08 --- /dev/null +++ b/ARMeilleure/Decoders/IOpCodeCond.cs @@ -0,0 +1,7 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCodeCond : IOpCode + { + Condition Cond { get; } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/IOpCodeLit.cs b/ARMeilleure/Decoders/IOpCodeLit.cs new file mode 100644 index 000000000..74084a457 --- /dev/null +++ b/ARMeilleure/Decoders/IOpCodeLit.cs @@ -0,0 +1,11 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCodeLit : IOpCode + { + int Rt { get; } + long Immediate { get; } + int Size { get; } + bool Signed { get; } + bool Prefetch { get; } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/IOpCodeSimd.cs b/ARMeilleure/Decoders/IOpCodeSimd.cs new file mode 100644 index 000000000..056ef045c --- /dev/null +++ b/ARMeilleure/Decoders/IOpCodeSimd.cs @@ -0,0 +1,7 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCodeSimd : IOpCode + { + int Size { get; } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/InstDescriptor.cs b/ARMeilleure/Decoders/InstDescriptor.cs new file mode 100644 index 000000000..ee2b1c2e4 --- /dev/null +++ b/ARMeilleure/Decoders/InstDescriptor.cs @@ -0,0 +1,18 @@ +using ARMeilleure.Instructions; + +namespace ARMeilleure.Decoders +{ + struct InstDescriptor + { + public static InstDescriptor Undefined => new InstDescriptor(InstName.Und, null); + + public InstName Name { get; } + public InstEmitter Emitter { get; } + + public InstDescriptor(InstName name, InstEmitter emitter) + { + Name = name; + Emitter = emitter; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/InstEmitter.cs b/ARMeilleure/Decoders/InstEmitter.cs new file mode 100644 index 000000000..a8b526569 --- /dev/null +++ b/ARMeilleure/Decoders/InstEmitter.cs @@ -0,0 +1,6 @@ +using ARMeilleure.Translation; + +namespace ARMeilleure.Decoders +{ + delegate void InstEmitter(ArmEmitterContext context); +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/IntType.cs b/ARMeilleure/Decoders/IntType.cs new file mode 100644 index 000000000..244e96805 --- /dev/null +++ b/ARMeilleure/Decoders/IntType.cs @@ -0,0 +1,14 @@ +namespace ARMeilleure.Decoders +{ + enum IntType + { + UInt8 = 0, + UInt16 = 1, + UInt32 = 2, + UInt64 = 3, + Int8 = 4, + Int16 = 5, + Int32 = 6, + Int64 = 7 + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCode.cs b/ARMeilleure/Decoders/OpCode.cs new file mode 100644 index 000000000..0bfc2456b --- /dev/null +++ b/ARMeilleure/Decoders/OpCode.cs @@ -0,0 +1,48 @@ +using ARMeilleure.IntermediateRepresentation; +using System; + +namespace ARMeilleure.Decoders +{ + class OpCode : IOpCode + { + public ulong Address { get; private set; } + public int RawOpCode { get; private set; } + + public int OpCodeSizeInBytes { get; protected set; } = 4; + + public InstDescriptor Instruction { get; protected set; } + + public RegisterSize RegisterSize { get; protected set; } + + public OpCode(InstDescriptor inst, ulong address, int opCode) + { + Address = address; + RawOpCode = opCode; + + Instruction = inst; + + RegisterSize = RegisterSize.Int64; + } + + public int GetPairsCount() => GetBitsCount() / 16; + public int GetBytesCount() => GetBitsCount() / 8; + + public int GetBitsCount() + { + switch (RegisterSize) + { + case RegisterSize.Int32: return 32; + case RegisterSize.Int64: return 64; + case RegisterSize.Simd64: return 64; + case RegisterSize.Simd128: return 128; + } + + throw new InvalidOperationException(); + } + + public OperandType GetOperandType() + { + return RegisterSize == RegisterSize.Int32 ? OperandType.I32 : OperandType.I64; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCode32.cs b/ARMeilleure/Decoders/OpCode32.cs new file mode 100644 index 000000000..20927d5e4 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32.cs @@ -0,0 +1,21 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32 : OpCode + { + public Condition Cond { get; protected set; } + + public OpCode32(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + RegisterSize = RegisterSize.Int32; + + Cond = (Condition)((uint)opCode >> 28); + } + + public uint GetPc() + { + // Due to backwards compatibility and legacy behavior of ARMv4 CPUs pipeline, + // the PC actually points 2 instructions ahead. + return (uint)Address + (uint)OpCodeSizeInBytes * 2; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCode32Alu.cs b/ARMeilleure/Decoders/OpCode32Alu.cs new file mode 100644 index 000000000..8d03baddb --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32Alu.cs @@ -0,0 +1,18 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32Alu : OpCode32, IOpCode32Alu + { + public int Rd { get; private set; } + public int Rn { get; private set; } + + public bool SetFlags { get; private set; } + + public OpCode32Alu(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rd = (opCode >> 12) & 0xf; + Rn = (opCode >> 16) & 0xf; + + SetFlags = ((opCode >> 20) & 1) != 0; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCode32AluImm.cs b/ARMeilleure/Decoders/OpCode32AluImm.cs new file mode 100644 index 000000000..bba03e4d8 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32AluImm.cs @@ -0,0 +1,21 @@ +using ARMeilleure.Common; + +namespace ARMeilleure.Decoders +{ + class OpCode32AluImm : OpCode32Alu + { + public int Immediate { get; private set; } + + public bool IsRotated { get; private set; } + + public OpCode32AluImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + int value = (opCode >> 0) & 0xff; + int shift = (opCode >> 8) & 0xf; + + Immediate = BitUtils.RotateRight(value, shift * 2, 32); + + IsRotated = shift != 0; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCode32AluRsImm.cs b/ARMeilleure/Decoders/OpCode32AluRsImm.cs new file mode 100644 index 000000000..779d6cecf --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32AluRsImm.cs @@ -0,0 +1,18 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32AluRsImm : OpCode32Alu + { + public int Rm { get; private set; } + public int Imm { get; private set; } + + public ShiftType ShiftType { get; private set; } + + public OpCode32AluRsImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rm = (opCode >> 0) & 0xf; + Imm = (opCode >> 7) & 0x1f; + + ShiftType = (ShiftType)((opCode >> 5) & 3); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCode32BImm.cs b/ARMeilleure/Decoders/OpCode32BImm.cs new file mode 100644 index 000000000..ea6443bc8 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32BImm.cs @@ -0,0 +1,27 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32BImm : OpCode32, IOpCode32BImm + { + public long Immediate { get; private set; } + + public OpCode32BImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + uint pc = GetPc(); + + // When the condition is never, the instruction is BLX to Thumb mode. + if (Cond != Condition.Nv) + { + pc &= ~3u; + } + + Immediate = pc + DecoderHelper.DecodeImm24_2(opCode); + + if (Cond == Condition.Nv) + { + long H = (opCode >> 23) & 2; + + Immediate |= H; + } + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCode32BReg.cs b/ARMeilleure/Decoders/OpCode32BReg.cs new file mode 100644 index 000000000..ffb487070 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32BReg.cs @@ -0,0 +1,12 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32BReg : OpCode32, IOpCode32BReg + { + public int Rm { get; private set; } + + public OpCode32BReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rm = opCode & 0xf; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCode32Mem.cs b/ARMeilleure/Decoders/OpCode32Mem.cs new file mode 100644 index 000000000..f4e88d592 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32Mem.cs @@ -0,0 +1,37 @@ +using ARMeilleure.Instructions; + +namespace ARMeilleure.Decoders +{ + class OpCode32Mem : OpCode32, IOpCode32Mem + { + public int Rt { get; private set; } + public int Rn { get; private set; } + + public int Immediate { get; protected set; } + + public bool Index { get; private set; } + public bool Add { get; private set; } + public bool WBack { get; private set; } + public bool Unprivileged { get; private set; } + + public bool IsLoad { get; private set; } + + public OpCode32Mem(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rt = (opCode >> 12) & 0xf; + Rn = (opCode >> 16) & 0xf; + + bool isLoad = (opCode & (1 << 20)) != 0; + bool w = (opCode & (1 << 21)) != 0; + bool u = (opCode & (1 << 23)) != 0; + bool p = (opCode & (1 << 24)) != 0; + + Index = p; + Add = u; + WBack = !p || w; + Unprivileged = !p && w; + + IsLoad = isLoad || inst.Name == InstName.Ldrd; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCode32MemImm.cs b/ARMeilleure/Decoders/OpCode32MemImm.cs new file mode 100644 index 000000000..f79c63197 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32MemImm.cs @@ -0,0 +1,10 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32MemImm : OpCode32Mem + { + public OpCode32MemImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Immediate = opCode & 0xfff; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCode32MemImm8.cs b/ARMeilleure/Decoders/OpCode32MemImm8.cs new file mode 100644 index 000000000..08027fb75 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32MemImm8.cs @@ -0,0 +1,13 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32MemImm8 : OpCode32Mem + { + public OpCode32MemImm8(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + int imm4L = (opCode >> 0) & 0xf; + int imm4H = (opCode >> 8) & 0xf; + + Immediate = imm4L | (imm4H << 4); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCode32MemMult.cs b/ARMeilleure/Decoders/OpCode32MemMult.cs new file mode 100644 index 000000000..b61b50ea8 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32MemMult.cs @@ -0,0 +1,55 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32MemMult : OpCode32, IOpCode32MemMult + { + public int Rn { get; private set; } + + public int RegisterMask { get; private set; } + public int Offset { get; private set; } + public int PostOffset { get; private set; } + + public bool IsLoad { get; private set; } + + public OpCode32MemMult(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rn = (opCode >> 16) & 0xf; + + bool isLoad = (opCode & (1 << 20)) != 0; + bool w = (opCode & (1 << 21)) != 0; + bool u = (opCode & (1 << 23)) != 0; + bool p = (opCode & (1 << 24)) != 0; + + RegisterMask = opCode & 0xffff; + + int regsSize = 0; + + for (int index = 0; index < 16; index++) + { + regsSize += (RegisterMask >> index) & 1; + } + + regsSize *= 4; + + if (!u) + { + Offset -= regsSize; + } + + if (u == p) + { + Offset += 4; + } + + if (w) + { + PostOffset = u ? regsSize : -regsSize; + } + else + { + PostOffset = 0; + } + + IsLoad = isLoad; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeAdr.cs b/ARMeilleure/Decoders/OpCodeAdr.cs new file mode 100644 index 000000000..fc8219f6c --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeAdr.cs @@ -0,0 +1,17 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeAdr : OpCode + { + public int Rd { get; private set; } + + public long Immediate { get; private set; } + + public OpCodeAdr(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rd = opCode & 0x1f; + + Immediate = DecoderHelper.DecodeImmS19_2(opCode); + Immediate |= ((long)opCode >> 29) & 3; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeAlu.cs b/ARMeilleure/Decoders/OpCodeAlu.cs new file mode 100644 index 000000000..171662a06 --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeAlu.cs @@ -0,0 +1,21 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeAlu : OpCode, IOpCodeAlu + { + public int Rd { get; protected set; } + public int Rn { get; private set; } + + public DataOp DataOp { get; private set; } + + public OpCodeAlu(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rd = (opCode >> 0) & 0x1f; + Rn = (opCode >> 5) & 0x1f; + DataOp = (DataOp)((opCode >> 24) & 0x3); + + RegisterSize = (opCode >> 31) != 0 + ? RegisterSize.Int64 + : RegisterSize.Int32; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeAluBinary.cs b/ARMeilleure/Decoders/OpCodeAluBinary.cs new file mode 100644 index 000000000..2bdf1d798 --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeAluBinary.cs @@ -0,0 +1,12 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeAluBinary : OpCodeAlu + { + public int Rm { get; private set; } + + public OpCodeAluBinary(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rm = (opCode >> 16) & 0x1f; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeAluImm.cs b/ARMeilleure/Decoders/OpCodeAluImm.cs new file mode 100644 index 000000000..35c83fcc3 --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeAluImm.cs @@ -0,0 +1,38 @@ +using System; + +namespace ARMeilleure.Decoders +{ + class OpCodeAluImm : OpCodeAlu, IOpCodeAluImm + { + public long Immediate { get; private set; } + + public OpCodeAluImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + if (DataOp == DataOp.Arithmetic) + { + Immediate = (opCode >> 10) & 0xfff; + + int shift = (opCode >> 22) & 3; + + Immediate <<= shift * 12; + } + else if (DataOp == DataOp.Logical) + { + var bm = DecoderHelper.DecodeBitMask(opCode, true); + + if (bm.IsUndefined) + { + Instruction = InstDescriptor.Undefined; + + return; + } + + Immediate = bm.WMask; + } + else + { + throw new ArgumentException(nameof(opCode)); + } + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeAluRs.cs b/ARMeilleure/Decoders/OpCodeAluRs.cs new file mode 100644 index 000000000..84fb6ac6d --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeAluRs.cs @@ -0,0 +1,27 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeAluRs : OpCodeAlu, IOpCodeAluRs + { + public int Shift { get; private set; } + public int Rm { get; private set; } + + public ShiftType ShiftType { get; private set; } + + public OpCodeAluRs(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + int shift = (opCode >> 10) & 0x3f; + + if (shift >= GetBitsCount()) + { + Instruction = InstDescriptor.Undefined; + + return; + } + + Shift = shift; + + Rm = (opCode >> 16) & 0x1f; + ShiftType = (ShiftType)((opCode >> 22) & 0x3); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeAluRx.cs b/ARMeilleure/Decoders/OpCodeAluRx.cs new file mode 100644 index 000000000..5c8d427e8 --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeAluRx.cs @@ -0,0 +1,17 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeAluRx : OpCodeAlu, IOpCodeAluRx + { + public int Shift { get; private set; } + public int Rm { get; private set; } + + public IntType IntType { get; private set; } + + public OpCodeAluRx(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Shift = (opCode >> 10) & 0x7; + IntType = (IntType)((opCode >> 13) & 0x7); + Rm = (opCode >> 16) & 0x1f; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeBImm.cs b/ARMeilleure/Decoders/OpCodeBImm.cs new file mode 100644 index 000000000..2821a6246 --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeBImm.cs @@ -0,0 +1,9 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeBImm : OpCode, IOpCodeBImm + { + public long Immediate { get; protected set; } + + public OpCodeBImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeBImmAl.cs b/ARMeilleure/Decoders/OpCodeBImmAl.cs new file mode 100644 index 000000000..94bcea884 --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeBImmAl.cs @@ -0,0 +1,10 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeBImmAl : OpCodeBImm + { + public OpCodeBImmAl(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Immediate = (long)address + DecoderHelper.DecodeImm26_2(opCode); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeBImmCmp.cs b/ARMeilleure/Decoders/OpCodeBImmCmp.cs new file mode 100644 index 000000000..2b7c28341 --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeBImmCmp.cs @@ -0,0 +1,18 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeBImmCmp : OpCodeBImm + { + public int Rt { get; private set; } + + public OpCodeBImmCmp(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rt = opCode & 0x1f; + + Immediate = (long)address + DecoderHelper.DecodeImmS19_2(opCode); + + RegisterSize = (opCode >> 31) != 0 + ? RegisterSize.Int64 + : RegisterSize.Int32; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeBImmCond.cs b/ARMeilleure/Decoders/OpCodeBImmCond.cs new file mode 100644 index 000000000..f898821ac --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeBImmCond.cs @@ -0,0 +1,23 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeBImmCond : OpCodeBImm, IOpCodeCond + { + public Condition Cond { get; private set; } + + public OpCodeBImmCond(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + int o0 = (opCode >> 4) & 1; + + if (o0 != 0) + { + Instruction = InstDescriptor.Undefined; + + return; + } + + Cond = (Condition)(opCode & 0xf); + + Immediate = (long)address + DecoderHelper.DecodeImmS19_2(opCode); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeBImmTest.cs b/ARMeilleure/Decoders/OpCodeBImmTest.cs new file mode 100644 index 000000000..6687c2e7a --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeBImmTest.cs @@ -0,0 +1,18 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeBImmTest : OpCodeBImm + { + public int Rt { get; private set; } + public int Bit { get; private set; } + + public OpCodeBImmTest(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rt = opCode & 0x1f; + + Immediate = (long)address + DecoderHelper.DecodeImmS14_2(opCode); + + Bit = (opCode >> 19) & 0x1f; + Bit |= (opCode >> 26) & 0x20; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeBReg.cs b/ARMeilleure/Decoders/OpCodeBReg.cs new file mode 100644 index 000000000..00c51ec71 --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeBReg.cs @@ -0,0 +1,22 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeBReg : OpCode + { + public int Rn { get; private set; } + + public OpCodeBReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + int op4 = (opCode >> 0) & 0x1f; + int op2 = (opCode >> 16) & 0x1f; + + if (op2 != 0b11111 || op4 != 0b00000) + { + Instruction = InstDescriptor.Undefined; + + return; + } + + Rn = (opCode >> 5) & 0x1f; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeBfm.cs b/ARMeilleure/Decoders/OpCodeBfm.cs new file mode 100644 index 000000000..2ae8edf56 --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeBfm.cs @@ -0,0 +1,27 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeBfm : OpCodeAlu + { + public long WMask { get; private set; } + public long TMask { get; private set; } + public int Pos { get; private set; } + public int Shift { get; private set; } + + public OpCodeBfm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + var bm = DecoderHelper.DecodeBitMask(opCode, false); + + if (bm.IsUndefined) + { + Instruction = InstDescriptor.Undefined; + + return; + } + + WMask = bm.WMask; + TMask = bm.TMask; + Pos = bm.Pos; + Shift = bm.Shift; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeCcmp.cs b/ARMeilleure/Decoders/OpCodeCcmp.cs new file mode 100644 index 000000000..c302f6a32 --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeCcmp.cs @@ -0,0 +1,30 @@ +using ARMeilleure.State; + +namespace ARMeilleure.Decoders +{ + class OpCodeCcmp : OpCodeAlu, IOpCodeCond + { + public int Nzcv { get; private set; } + protected int RmImm; + + public Condition Cond { get; private set; } + + public OpCodeCcmp(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + int o3 = (opCode >> 4) & 1; + + if (o3 != 0) + { + Instruction = InstDescriptor.Undefined; + + return; + } + + Nzcv = (opCode >> 0) & 0xf; + Cond = (Condition)((opCode >> 12) & 0xf); + RmImm = (opCode >> 16) & 0x1f; + + Rd = RegisterAlias.Zr; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeCcmpImm.cs b/ARMeilleure/Decoders/OpCodeCcmpImm.cs new file mode 100644 index 000000000..4a2d01f46 --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeCcmpImm.cs @@ -0,0 +1,9 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeCcmpImm : OpCodeCcmp, IOpCodeAluImm + { + public long Immediate => RmImm; + + public OpCodeCcmpImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeCcmpReg.cs b/ARMeilleure/Decoders/OpCodeCcmpReg.cs new file mode 100644 index 000000000..0e2b922cf --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeCcmpReg.cs @@ -0,0 +1,13 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeCcmpReg : OpCodeCcmp, IOpCodeAluRs + { + public int Rm => RmImm; + + public int Shift => 0; + + public ShiftType ShiftType => ShiftType.Lsl; + + public OpCodeCcmpReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeCsel.cs b/ARMeilleure/Decoders/OpCodeCsel.cs new file mode 100644 index 000000000..fd07e6fd4 --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeCsel.cs @@ -0,0 +1,15 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeCsel : OpCodeAlu, IOpCodeCond + { + public int Rm { get; private set; } + + public Condition Cond { get; private set; } + + public OpCodeCsel(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rm = (opCode >> 16) & 0x1f; + Cond = (Condition)((opCode >> 12) & 0xf); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeException.cs b/ARMeilleure/Decoders/OpCodeException.cs new file mode 100644 index 000000000..9781c543b --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeException.cs @@ -0,0 +1,12 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeException : OpCode + { + public int Id { get; private set; } + + public OpCodeException(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Id = (opCode >> 5) & 0xffff; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeMem.cs b/ARMeilleure/Decoders/OpCodeMem.cs new file mode 100644 index 000000000..5a7ab482a --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeMem.cs @@ -0,0 +1,17 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeMem : OpCode + { + public int Rt { get; protected set; } + public int Rn { get; protected set; } + public int Size { get; protected set; } + public bool Extend64 { get; protected set; } + + public OpCodeMem(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rt = (opCode >> 0) & 0x1f; + Rn = (opCode >> 5) & 0x1f; + Size = (opCode >> 30) & 0x3; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeMemEx.cs b/ARMeilleure/Decoders/OpCodeMemEx.cs new file mode 100644 index 000000000..5956f3672 --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeMemEx.cs @@ -0,0 +1,14 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeMemEx : OpCodeMem + { + public int Rt2 { get; private set; } + public int Rs { get; private set; } + + public OpCodeMemEx(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rt2 = (opCode >> 10) & 0x1f; + Rs = (opCode >> 16) & 0x1f; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeMemImm.cs b/ARMeilleure/Decoders/OpCodeMemImm.cs new file mode 100644 index 000000000..517434f29 --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeMemImm.cs @@ -0,0 +1,51 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeMemImm : OpCodeMem + { + public long Immediate { get; protected set; } + public bool WBack { get; protected set; } + public bool PostIdx { get; protected set; } + protected bool Unscaled { get; private set; } + + private enum MemOp + { + Unscaled = 0, + PostIndexed = 1, + Unprivileged = 2, + PreIndexed = 3, + Unsigned + } + + public OpCodeMemImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Extend64 = ((opCode >> 22) & 3) == 2; + WBack = ((opCode >> 24) & 1) == 0; + + // The type is not valid for the Unsigned Immediate 12-bits encoding, + // because the bits 11:10 are used for the larger Immediate offset. + MemOp type = WBack ? (MemOp)((opCode >> 10) & 3) : MemOp.Unsigned; + + PostIdx = type == MemOp.PostIndexed; + Unscaled = type == MemOp.Unscaled || + type == MemOp.Unprivileged; + + // Unscaled and Unprivileged doesn't write back, + // but they do use the 9-bits Signed Immediate. + if (Unscaled) + { + WBack = false; + } + + if (WBack || Unscaled) + { + // 9-bits Signed Immediate. + Immediate = (opCode << 11) >> 23; + } + else + { + // 12-bits Unsigned Immediate. + Immediate = ((opCode >> 10) & 0xfff) << Size; + } + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeMemLit.cs b/ARMeilleure/Decoders/OpCodeMemLit.cs new file mode 100644 index 000000000..b80585cb4 --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeMemLit.cs @@ -0,0 +1,26 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeMemLit : OpCode, IOpCodeLit + { + public int Rt { get; private set; } + public long Immediate { get; private set; } + public int Size { get; private set; } + public bool Signed { get; private set; } + public bool Prefetch { get; private set; } + + public OpCodeMemLit(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rt = opCode & 0x1f; + + Immediate = (long)address + DecoderHelper.DecodeImmS19_2(opCode); + + switch ((opCode >> 30) & 3) + { + case 0: Size = 2; Signed = false; Prefetch = false; break; + case 1: Size = 3; Signed = false; Prefetch = false; break; + case 2: Size = 2; Signed = true; Prefetch = false; break; + case 3: Size = 0; Signed = false; Prefetch = true; break; + } + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeMemPair.cs b/ARMeilleure/Decoders/OpCodeMemPair.cs new file mode 100644 index 000000000..ea329a1db --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeMemPair.cs @@ -0,0 +1,23 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeMemPair : OpCodeMemImm + { + public int Rt2 { get; private set; } + + public OpCodeMemPair(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rt2 = (opCode >> 10) & 0x1f; + WBack = ((opCode >> 23) & 0x1) != 0; + PostIdx = ((opCode >> 23) & 0x3) == 1; + Extend64 = ((opCode >> 30) & 0x3) == 1; + Size = ((opCode >> 31) & 0x1) | 2; + + DecodeImm(opCode); + } + + protected void DecodeImm(int opCode) + { + Immediate = ((long)(opCode >> 15) << 57) >> (57 - Size); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeMemReg.cs b/ARMeilleure/Decoders/OpCodeMemReg.cs new file mode 100644 index 000000000..f5c2f9911 --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeMemReg.cs @@ -0,0 +1,18 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeMemReg : OpCodeMem + { + public bool Shift { get; private set; } + public int Rm { get; private set; } + + public IntType IntType { get; private set; } + + public OpCodeMemReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Shift = ((opCode >> 12) & 0x1) != 0; + IntType = (IntType)((opCode >> 13) & 0x7); + Rm = (opCode >> 16) & 0x1f; + Extend64 = ((opCode >> 22) & 0x3) == 2; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeMov.cs b/ARMeilleure/Decoders/OpCodeMov.cs new file mode 100644 index 000000000..b65178cff --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeMov.cs @@ -0,0 +1,36 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeMov : OpCode + { + public int Rd { get; private set; } + + public long Immediate { get; private set; } + + public int Bit { get; private set; } + + public OpCodeMov(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + int p1 = (opCode >> 22) & 1; + int sf = (opCode >> 31) & 1; + + if (sf == 0 && p1 != 0) + { + Instruction = InstDescriptor.Undefined; + + return; + } + + Rd = (opCode >> 0) & 0x1f; + Immediate = (opCode >> 5) & 0xffff; + Bit = (opCode >> 21) & 0x3; + + Bit <<= 4; + + Immediate <<= Bit; + + RegisterSize = (opCode >> 31) != 0 + ? RegisterSize.Int64 + : RegisterSize.Int32; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeMul.cs b/ARMeilleure/Decoders/OpCodeMul.cs new file mode 100644 index 000000000..3eb4dc97c --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeMul.cs @@ -0,0 +1,14 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeMul : OpCodeAlu + { + public int Rm { get; private set; } + public int Ra { get; private set; } + + public OpCodeMul(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Ra = (opCode >> 10) & 0x1f; + Rm = (opCode >> 16) & 0x1f; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeSimd.cs b/ARMeilleure/Decoders/OpCodeSimd.cs new file mode 100644 index 000000000..a258446c1 --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeSimd.cs @@ -0,0 +1,22 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeSimd : OpCode, IOpCodeSimd + { + public int Rd { get; private set; } + public int Rn { get; private set; } + public int Opc { get; private set; } + public int Size { get; protected set; } + + public OpCodeSimd(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rd = (opCode >> 0) & 0x1f; + Rn = (opCode >> 5) & 0x1f; + Opc = (opCode >> 15) & 0x3; + Size = (opCode >> 22) & 0x3; + + RegisterSize = ((opCode >> 30) & 1) != 0 + ? RegisterSize.Simd128 + : RegisterSize.Simd64; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeSimdCvt.cs b/ARMeilleure/Decoders/OpCodeSimdCvt.cs new file mode 100644 index 000000000..15658bb89 --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeSimdCvt.cs @@ -0,0 +1,19 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeSimdCvt : OpCodeSimd + { + public int FBits { get; private set; } + + public OpCodeSimdCvt(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + int scale = (opCode >> 10) & 0x3f; + int sf = (opCode >> 31) & 0x1; + + FBits = 64 - scale; + + RegisterSize = sf != 0 + ? RegisterSize.Int64 + : RegisterSize.Int32; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeSimdExt.cs b/ARMeilleure/Decoders/OpCodeSimdExt.cs new file mode 100644 index 000000000..d585449c1 --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeSimdExt.cs @@ -0,0 +1,12 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeSimdExt : OpCodeSimdReg + { + public int Imm4 { get; private set; } + + public OpCodeSimdExt(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Imm4 = (opCode >> 11) & 0xf; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeSimdFcond.cs b/ARMeilleure/Decoders/OpCodeSimdFcond.cs new file mode 100644 index 000000000..9e7a5f3bf --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeSimdFcond.cs @@ -0,0 +1,15 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeSimdFcond : OpCodeSimdReg, IOpCodeCond + { + public int Nzcv { get; private set; } + + public Condition Cond { get; private set; } + + public OpCodeSimdFcond(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Nzcv = (opCode >> 0) & 0xf; + Cond = (Condition)((opCode >> 12) & 0xf); + } + } +} diff --git a/ARMeilleure/Decoders/OpCodeSimdFmov.cs b/ARMeilleure/Decoders/OpCodeSimdFmov.cs new file mode 100644 index 000000000..61a3f077d --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeSimdFmov.cs @@ -0,0 +1,31 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeSimdFmov : OpCode, IOpCodeSimd + { + public int Rd { get; private set; } + public long Immediate { get; private set; } + public int Size { get; private set; } + + public OpCodeSimdFmov(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + int imm5 = (opCode >> 5) & 0x1f; + int type = (opCode >> 22) & 0x3; + + if (imm5 != 0b00000 || type > 1) + { + Instruction = InstDescriptor.Undefined; + + return; + } + + Size = type; + + long imm; + + Rd = (opCode >> 0) & 0x1f; + imm = (opCode >> 13) & 0xff; + + Immediate = DecoderHelper.DecodeImm8Float(imm, type); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeSimdImm.cs b/ARMeilleure/Decoders/OpCodeSimdImm.cs new file mode 100644 index 000000000..ecad906d9 --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeSimdImm.cs @@ -0,0 +1,98 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeSimdImm : OpCode, IOpCodeSimd + { + public int Rd { get; private set; } + public long Immediate { get; private set; } + public int Size { get; private set; } + + public OpCodeSimdImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rd = opCode & 0x1f; + + int cMode = (opCode >> 12) & 0xf; + int op = (opCode >> 29) & 0x1; + + int modeLow = cMode & 1; + int modeHigh = cMode >> 1; + + long imm; + + imm = ((uint)opCode >> 5) & 0x1f; + imm |= ((uint)opCode >> 11) & 0xe0; + + if (modeHigh == 0b111) + { + Size = modeLow != 0 ? op : 3; + + switch (op | (modeLow << 1)) + { + case 0: + // 64-bits Immediate. + // Transform abcd efgh into abcd efgh abcd efgh ... + imm = (long)((ulong)imm * 0x0101010101010101); + break; + + case 1: + // 64-bits Immediate. + // Transform abcd efgh into aaaa aaaa bbbb bbbb ... + imm = (imm & 0xf0) >> 4 | (imm & 0x0f) << 4; + imm = (imm & 0xcc) >> 2 | (imm & 0x33) << 2; + imm = (imm & 0xaa) >> 1 | (imm & 0x55) << 1; + + imm = (long)((ulong)imm * 0x8040201008040201); + imm = (long)((ulong)imm & 0x8080808080808080); + + imm |= imm >> 4; + imm |= imm >> 2; + imm |= imm >> 1; + break; + + case 2: + case 3: + // Floating point Immediate. + imm = DecoderHelper.DecodeImm8Float(imm, Size); + break; + } + } + else if ((modeHigh & 0b110) == 0b100) + { + // 16-bits shifted Immediate. + Size = 1; imm <<= (modeHigh & 1) << 3; + } + else if ((modeHigh & 0b100) == 0b000) + { + // 32-bits shifted Immediate. + Size = 2; imm <<= modeHigh << 3; + } + else if ((modeHigh & 0b111) == 0b110) + { + // 32-bits shifted Immediate (fill with ones). + Size = 2; imm = ShlOnes(imm, 8 << modeLow); + } + else + { + // 8 bits without shift. + Size = 0; + } + + Immediate = imm; + + RegisterSize = ((opCode >> 30) & 1) != 0 + ? RegisterSize.Simd128 + : RegisterSize.Simd64; + } + + private static long ShlOnes(long value, int shift) + { + if (shift != 0) + { + return value << shift | (long)(ulong.MaxValue >> (64 - shift)); + } + else + { + return value; + } + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeSimdIns.cs b/ARMeilleure/Decoders/OpCodeSimdIns.cs new file mode 100644 index 000000000..78328adb5 --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeSimdIns.cs @@ -0,0 +1,34 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeSimdIns : OpCodeSimd + { + public int SrcIndex { get; private set; } + public int DstIndex { get; private set; } + + public OpCodeSimdIns(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + int imm4 = (opCode >> 11) & 0xf; + int imm5 = (opCode >> 16) & 0x1f; + + if (imm5 == 0b10000) + { + Instruction = InstDescriptor.Undefined; + + return; + } + + Size = imm5 & -imm5; + + switch (Size) + { + case 1: Size = 0; break; + case 2: Size = 1; break; + case 4: Size = 2; break; + case 8: Size = 3; break; + } + + SrcIndex = imm4 >> Size; + DstIndex = imm5 >> (Size + 1); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeSimdMemImm.cs b/ARMeilleure/Decoders/OpCodeSimdMemImm.cs new file mode 100644 index 000000000..6b9e66d93 --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeSimdMemImm.cs @@ -0,0 +1,17 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeSimdMemImm : OpCodeMemImm, IOpCodeSimd + { + public OpCodeSimdMemImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Size |= (opCode >> 21) & 4; + + if (!WBack && !Unscaled && Size >= 4) + { + Immediate <<= 4; + } + + Extend64 = false; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeSimdMemLit.cs b/ARMeilleure/Decoders/OpCodeSimdMemLit.cs new file mode 100644 index 000000000..607df1392 --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeSimdMemLit.cs @@ -0,0 +1,29 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeSimdMemLit : OpCode, IOpCodeSimd, IOpCodeLit + { + public int Rt { get; private set; } + public long Immediate { get; private set; } + public int Size { get; private set; } + public bool Signed => false; + public bool Prefetch => false; + + public OpCodeSimdMemLit(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + int opc = (opCode >> 30) & 3; + + if (opc == 3) + { + Instruction = InstDescriptor.Undefined; + + return; + } + + Rt = opCode & 0x1f; + + Immediate = (long)address + DecoderHelper.DecodeImmS19_2(opCode); + + Size = opc + 2; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeSimdMemMs.cs b/ARMeilleure/Decoders/OpCodeSimdMemMs.cs new file mode 100644 index 000000000..9fa5ff42c --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeSimdMemMs.cs @@ -0,0 +1,46 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeSimdMemMs : OpCodeMemReg, IOpCodeSimd + { + public int Reps { get; private set; } + public int SElems { get; private set; } + public int Elems { get; private set; } + public bool WBack { get; private set; } + + public OpCodeSimdMemMs(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + switch ((opCode >> 12) & 0xf) + { + case 0b0000: Reps = 1; SElems = 4; break; + case 0b0010: Reps = 4; SElems = 1; break; + case 0b0100: Reps = 1; SElems = 3; break; + case 0b0110: Reps = 3; SElems = 1; break; + case 0b0111: Reps = 1; SElems = 1; break; + case 0b1000: Reps = 1; SElems = 2; break; + case 0b1010: Reps = 2; SElems = 1; break; + + default: Instruction = InstDescriptor.Undefined; return; + } + + Size = (opCode >> 10) & 3; + WBack = ((opCode >> 23) & 1) != 0; + + bool q = ((opCode >> 30) & 1) != 0; + + if (!q && Size == 3 && SElems != 1) + { + Instruction = InstDescriptor.Undefined; + + return; + } + + Extend64 = false; + + RegisterSize = q + ? RegisterSize.Simd128 + : RegisterSize.Simd64; + + Elems = (GetBitsCount() >> 3) >> Size; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeSimdMemPair.cs b/ARMeilleure/Decoders/OpCodeSimdMemPair.cs new file mode 100644 index 000000000..a4af49d02 --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeSimdMemPair.cs @@ -0,0 +1,14 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeSimdMemPair : OpCodeMemPair, IOpCodeSimd + { + public OpCodeSimdMemPair(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Size = ((opCode >> 30) & 3) + 2; + + Extend64 = false; + + DecodeImm(opCode); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeSimdMemReg.cs b/ARMeilleure/Decoders/OpCodeSimdMemReg.cs new file mode 100644 index 000000000..7b783d63d --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeSimdMemReg.cs @@ -0,0 +1,12 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeSimdMemReg : OpCodeMemReg, IOpCodeSimd + { + public OpCodeSimdMemReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Size |= (opCode >> 21) & 4; + + Extend64 = false; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeSimdMemSs.cs b/ARMeilleure/Decoders/OpCodeSimdMemSs.cs new file mode 100644 index 000000000..302decbcc --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeSimdMemSs.cs @@ -0,0 +1,95 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeSimdMemSs : OpCodeMemReg, IOpCodeSimd + { + public int SElems { get; private set; } + public int Index { get; private set; } + public bool Replicate { get; private set; } + public bool WBack { get; private set; } + + public OpCodeSimdMemSs(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + int size = (opCode >> 10) & 3; + int s = (opCode >> 12) & 1; + int sElems = (opCode >> 12) & 2; + int scale = (opCode >> 14) & 3; + int l = (opCode >> 22) & 1; + int q = (opCode >> 30) & 1; + + sElems |= (opCode >> 21) & 1; + + sElems++; + + int index = (q << 3) | (s << 2) | size; + + switch (scale) + { + case 1: + { + if ((size & 1) != 0) + { + Instruction = InstDescriptor.Undefined; + + return; + } + + index >>= 1; + + break; + } + + case 2: + { + if ((size & 2) != 0 || + ((size & 1) != 0 && s != 0)) + { + Instruction = InstDescriptor.Undefined; + + return; + } + + if ((size & 1) != 0) + { + index >>= 3; + + scale = 3; + } + else + { + index >>= 2; + } + + break; + } + + case 3: + { + if (l == 0 || s != 0) + { + Instruction = InstDescriptor.Undefined; + + return; + } + + scale = size; + + Replicate = true; + + break; + } + } + + Index = index; + SElems = sElems; + Size = scale; + + Extend64 = false; + + WBack = ((opCode >> 23) & 1) != 0; + + RegisterSize = q != 0 + ? RegisterSize.Simd128 + : RegisterSize.Simd64; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeSimdReg.cs b/ARMeilleure/Decoders/OpCodeSimdReg.cs new file mode 100644 index 000000000..d076806a6 --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeSimdReg.cs @@ -0,0 +1,16 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeSimdReg : OpCodeSimd + { + public bool Bit3 { get; private set; } + public int Ra { get; private set; } + public int Rm { get; protected set; } + + public OpCodeSimdReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Bit3 = ((opCode >> 3) & 0x1) != 0; + Ra = (opCode >> 10) & 0x1f; + Rm = (opCode >> 16) & 0x1f; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeSimdRegElem.cs b/ARMeilleure/Decoders/OpCodeSimdRegElem.cs new file mode 100644 index 000000000..d2f1583d2 --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeSimdRegElem.cs @@ -0,0 +1,29 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeSimdRegElem : OpCodeSimdReg + { + public int Index { get; private set; } + + public OpCodeSimdRegElem(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + switch (Size) + { + case 1: + Index = (opCode >> 20) & 3 | + (opCode >> 9) & 4; + + Rm &= 0xf; + + break; + + case 2: + Index = (opCode >> 21) & 1 | + (opCode >> 10) & 2; + + break; + + default: Instruction = InstDescriptor.Undefined; break; + } + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeSimdRegElemF.cs b/ARMeilleure/Decoders/OpCodeSimdRegElemF.cs new file mode 100644 index 000000000..365b77172 --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeSimdRegElemF.cs @@ -0,0 +1,31 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeSimdRegElemF : OpCodeSimdReg + { + public int Index { get; private set; } + + public OpCodeSimdRegElemF(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + switch ((opCode >> 21) & 3) // sz:L + { + case 0: // H:0 + Index = (opCode >> 10) & 2; // 0, 2 + + break; + + case 1: // H:1 + Index = (opCode >> 10) & 2; + Index++; // 1, 3 + + break; + + case 2: // H + Index = (opCode >> 11) & 1; // 0, 1 + + break; + + default: Instruction = InstDescriptor.Undefined; break; + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCodeSimdShImm.cs b/ARMeilleure/Decoders/OpCodeSimdShImm.cs new file mode 100644 index 000000000..d260c4b3e --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeSimdShImm.cs @@ -0,0 +1,16 @@ +using ARMeilleure.Common; + +namespace ARMeilleure.Decoders +{ + class OpCodeSimdShImm : OpCodeSimd + { + public int Imm { get; private set; } + + public OpCodeSimdShImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Imm = (opCode >> 16) & 0x7f; + + Size = BitUtils.HighestBitSetNibble(Imm >> 3); + } + } +} diff --git a/ARMeilleure/Decoders/OpCodeSimdTbl.cs b/ARMeilleure/Decoders/OpCodeSimdTbl.cs new file mode 100644 index 000000000..14fdd6489 --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeSimdTbl.cs @@ -0,0 +1,10 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeSimdTbl : OpCodeSimdReg + { + public OpCodeSimdTbl(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Size = ((opCode >> 13) & 3) + 1; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeSystem.cs b/ARMeilleure/Decoders/OpCodeSystem.cs new file mode 100644 index 000000000..cf7c5cc15 --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeSystem.cs @@ -0,0 +1,22 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeSystem : OpCode + { + public int Rt { get; private set; } + public int Op2 { get; private set; } + public int CRm { get; private set; } + public int CRn { get; private set; } + public int Op1 { get; private set; } + public int Op0 { get; private set; } + + public OpCodeSystem(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rt = (opCode >> 0) & 0x1f; + Op2 = (opCode >> 5) & 0x7; + CRm = (opCode >> 8) & 0xf; + CRn = (opCode >> 12) & 0xf; + Op1 = (opCode >> 16) & 0x7; + Op0 = ((opCode >> 19) & 0x1) | 2; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeT16.cs b/ARMeilleure/Decoders/OpCodeT16.cs new file mode 100644 index 000000000..e7b7aff53 --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeT16.cs @@ -0,0 +1,12 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeT16 : OpCode32 + { + public OpCodeT16(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Cond = Condition.Al; + + OpCodeSizeInBytes = 2; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeT16AluImm8.cs b/ARMeilleure/Decoders/OpCodeT16AluImm8.cs new file mode 100644 index 000000000..197d3b091 --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeT16AluImm8.cs @@ -0,0 +1,20 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeT16AluImm8 : OpCodeT16, IOpCode32Alu + { + private int _rdn; + + public int Rd => _rdn; + public int Rn => _rdn; + + public bool SetFlags => false; + + public int Immediate { get; private set; } + + public OpCodeT16AluImm8(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Immediate = (opCode >> 0) & 0xff; + _rdn = (opCode >> 8) & 0x7; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeT16BReg.cs b/ARMeilleure/Decoders/OpCodeT16BReg.cs new file mode 100644 index 000000000..1fb397591 --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeT16BReg.cs @@ -0,0 +1,12 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeT16BReg : OpCodeT16, IOpCode32BReg + { + public int Rm { get; private set; } + + public OpCodeT16BReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rm = (opCode >> 3) & 0xf; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs new file mode 100644 index 000000000..22c762d62 --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeTable.cs @@ -0,0 +1,787 @@ +using ARMeilleure.Instructions; +using ARMeilleure.State; +using System; +using System.Collections.Generic; + +namespace ARMeilleure.Decoders +{ + static class OpCodeTable + { + private const int FastLookupSize = 0x1000; + + private struct InstInfo + { + public int Mask { get; } + public int Value { get; } + + public InstDescriptor Inst { get; } + + public Type Type { get; } + + public InstInfo(int mask, int value, InstDescriptor inst, Type type) + { + Mask = mask; + Value = value; + Inst = inst; + Type = type; + } + } + + private static List _allInstA32 = new List(); + private static List _allInstT32 = new List(); + private static List _allInstA64 = new List(); + + private static InstInfo[][] _instA32FastLookup = new InstInfo[FastLookupSize][]; + private static InstInfo[][] _instT32FastLookup = new InstInfo[FastLookupSize][]; + private static InstInfo[][] _instA64FastLookup = new InstInfo[FastLookupSize][]; + + static OpCodeTable() + { +#region "OpCode Table (AArch64)" + // Base + SetA64("x0011010000xxxxx000000xxxxxxxxxx", InstName.Adc, InstEmit.Adc, typeof(OpCodeAluRs)); + SetA64("x0111010000xxxxx000000xxxxxxxxxx", InstName.Adcs, InstEmit.Adcs, typeof(OpCodeAluRs)); + SetA64("x00100010xxxxxxxxxxxxxxxxxxxxxxx", InstName.Add, InstEmit.Add, typeof(OpCodeAluImm)); + SetA64("00001011<<0xxxxx0xxxxxxxxxxxxxxx", InstName.Add, InstEmit.Add, typeof(OpCodeAluRs)); + SetA64("10001011<<0xxxxxxxxxxxxxxxxxxxxx", InstName.Add, InstEmit.Add, typeof(OpCodeAluRs)); + SetA64("x0001011001xxxxxxxx0xxxxxxxxxxxx", InstName.Add, InstEmit.Add, typeof(OpCodeAluRx)); + SetA64("x0001011001xxxxxxxx100xxxxxxxxxx", InstName.Add, InstEmit.Add, typeof(OpCodeAluRx)); + SetA64("x01100010xxxxxxxxxxxxxxxxxxxxxxx", InstName.Adds, InstEmit.Adds, typeof(OpCodeAluImm)); + SetA64("00101011<<0xxxxx0xxxxxxxxxxxxxxx", InstName.Adds, InstEmit.Adds, typeof(OpCodeAluRs)); + SetA64("10101011<<0xxxxxxxxxxxxxxxxxxxxx", InstName.Adds, InstEmit.Adds, typeof(OpCodeAluRs)); + SetA64("x0101011001xxxxxxxx0xxxxxxxxxxxx", InstName.Adds, InstEmit.Adds, typeof(OpCodeAluRx)); + SetA64("x0101011001xxxxxxxx100xxxxxxxxxx", InstName.Adds, InstEmit.Adds, typeof(OpCodeAluRx)); + SetA64("0xx10000xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Adr, InstEmit.Adr, typeof(OpCodeAdr)); + SetA64("1xx10000xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Adrp, InstEmit.Adrp, typeof(OpCodeAdr)); + SetA64("0001001000xxxxxxxxxxxxxxxxxxxxxx", InstName.And, InstEmit.And, typeof(OpCodeAluImm)); + SetA64("100100100xxxxxxxxxxxxxxxxxxxxxxx", InstName.And, InstEmit.And, typeof(OpCodeAluImm)); + SetA64("00001010xx0xxxxx0xxxxxxxxxxxxxxx", InstName.And, InstEmit.And, typeof(OpCodeAluRs)); + SetA64("10001010xx0xxxxxxxxxxxxxxxxxxxxx", InstName.And, InstEmit.And, typeof(OpCodeAluRs)); + SetA64("0111001000xxxxxxxxxxxxxxxxxxxxxx", InstName.Ands, InstEmit.Ands, typeof(OpCodeAluImm)); + SetA64("111100100xxxxxxxxxxxxxxxxxxxxxxx", InstName.Ands, InstEmit.Ands, typeof(OpCodeAluImm)); + SetA64("01101010xx0xxxxx0xxxxxxxxxxxxxxx", InstName.Ands, InstEmit.Ands, typeof(OpCodeAluRs)); + SetA64("11101010xx0xxxxxxxxxxxxxxxxxxxxx", InstName.Ands, InstEmit.Ands, typeof(OpCodeAluRs)); + SetA64("x0011010110xxxxx001010xxxxxxxxxx", InstName.Asrv, InstEmit.Asrv, typeof(OpCodeAluRs)); + SetA64("000101xxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.B, InstEmit.B, typeof(OpCodeBImmAl)); + SetA64("01010100xxxxxxxxxxxxxxxxxxx0xxxx", InstName.B_Cond, InstEmit.B_Cond, typeof(OpCodeBImmCond)); + SetA64("00110011000xxxxx0xxxxxxxxxxxxxxx", InstName.Bfm, InstEmit.Bfm, typeof(OpCodeBfm)); + SetA64("1011001101xxxxxxxxxxxxxxxxxxxxxx", InstName.Bfm, InstEmit.Bfm, typeof(OpCodeBfm)); + SetA64("00001010xx1xxxxx0xxxxxxxxxxxxxxx", InstName.Bic, InstEmit.Bic, typeof(OpCodeAluRs)); + SetA64("10001010xx1xxxxxxxxxxxxxxxxxxxxx", InstName.Bic, InstEmit.Bic, typeof(OpCodeAluRs)); + SetA64("01101010xx1xxxxx0xxxxxxxxxxxxxxx", InstName.Bics, InstEmit.Bics, typeof(OpCodeAluRs)); + SetA64("11101010xx1xxxxxxxxxxxxxxxxxxxxx", InstName.Bics, InstEmit.Bics, typeof(OpCodeAluRs)); + SetA64("100101xxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bl, InstEmit.Bl, typeof(OpCodeBImmAl)); + SetA64("1101011000111111000000xxxxx00000", InstName.Blr, InstEmit.Blr, typeof(OpCodeBReg)); + SetA64("1101011000011111000000xxxxx00000", InstName.Br, InstEmit.Br, typeof(OpCodeBReg)); + SetA64("11010100001xxxxxxxxxxxxxxxx00000", InstName.Brk, InstEmit.Brk, typeof(OpCodeException)); + SetA64("x0110101xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cbnz, InstEmit.Cbnz, typeof(OpCodeBImmCmp)); + SetA64("x0110100xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cbz, InstEmit.Cbz, typeof(OpCodeBImmCmp)); + SetA64("x0111010010xxxxxxxxx10xxxxx0xxxx", InstName.Ccmn, InstEmit.Ccmn, typeof(OpCodeCcmpImm)); + SetA64("x0111010010xxxxxxxxx00xxxxx0xxxx", InstName.Ccmn, InstEmit.Ccmn, typeof(OpCodeCcmpReg)); + SetA64("x1111010010xxxxxxxxx10xxxxx0xxxx", InstName.Ccmp, InstEmit.Ccmp, typeof(OpCodeCcmpImm)); + SetA64("x1111010010xxxxxxxxx00xxxxx0xxxx", InstName.Ccmp, InstEmit.Ccmp, typeof(OpCodeCcmpReg)); + SetA64("11010101000000110011xxxx01011111", InstName.Clrex, InstEmit.Clrex, typeof(OpCodeSystem)); + SetA64("x101101011000000000101xxxxxxxxxx", InstName.Cls, InstEmit.Cls, typeof(OpCodeAlu)); + SetA64("x101101011000000000100xxxxxxxxxx", InstName.Clz, InstEmit.Clz, typeof(OpCodeAlu)); + SetA64("00011010110xxxxx010000xxxxxxxxxx", InstName.Crc32b, InstEmit.Crc32b, typeof(OpCodeAluBinary)); + SetA64("00011010110xxxxx010001xxxxxxxxxx", InstName.Crc32h, InstEmit.Crc32h, typeof(OpCodeAluBinary)); + SetA64("00011010110xxxxx010010xxxxxxxxxx", InstName.Crc32w, InstEmit.Crc32w, typeof(OpCodeAluBinary)); + SetA64("10011010110xxxxx010011xxxxxxxxxx", InstName.Crc32x, InstEmit.Crc32x, typeof(OpCodeAluBinary)); + SetA64("00011010110xxxxx010100xxxxxxxxxx", InstName.Crc32cb, InstEmit.Crc32cb, typeof(OpCodeAluBinary)); + SetA64("00011010110xxxxx010101xxxxxxxxxx", InstName.Crc32ch, InstEmit.Crc32ch, typeof(OpCodeAluBinary)); + SetA64("00011010110xxxxx010110xxxxxxxxxx", InstName.Crc32cw, InstEmit.Crc32cw, typeof(OpCodeAluBinary)); + SetA64("10011010110xxxxx010111xxxxxxxxxx", InstName.Crc32cx, InstEmit.Crc32cx, typeof(OpCodeAluBinary)); + SetA64("x0011010100xxxxxxxxx00xxxxxxxxxx", InstName.Csel, InstEmit.Csel, typeof(OpCodeCsel)); + SetA64("x0011010100xxxxxxxxx01xxxxxxxxxx", InstName.Csinc, InstEmit.Csinc, typeof(OpCodeCsel)); + SetA64("x1011010100xxxxxxxxx00xxxxxxxxxx", InstName.Csinv, InstEmit.Csinv, typeof(OpCodeCsel)); + SetA64("x1011010100xxxxxxxxx01xxxxxxxxxx", InstName.Csneg, InstEmit.Csneg, typeof(OpCodeCsel)); + SetA64("11010101000000110011xxxx10111111", InstName.Dmb, InstEmit.Dmb, typeof(OpCodeSystem)); + SetA64("11010101000000110011xxxx10011111", InstName.Dsb, InstEmit.Dsb, typeof(OpCodeSystem)); + SetA64("01001010xx1xxxxx0xxxxxxxxxxxxxxx", InstName.Eon, InstEmit.Eon, typeof(OpCodeAluRs)); + SetA64("11001010xx1xxxxxxxxxxxxxxxxxxxxx", InstName.Eon, InstEmit.Eon, typeof(OpCodeAluRs)); + SetA64("0101001000xxxxxxxxxxxxxxxxxxxxxx", InstName.Eor, InstEmit.Eor, typeof(OpCodeAluImm)); + SetA64("110100100xxxxxxxxxxxxxxxxxxxxxxx", InstName.Eor, InstEmit.Eor, typeof(OpCodeAluImm)); + SetA64("01001010xx0xxxxx0xxxxxxxxxxxxxxx", InstName.Eor, InstEmit.Eor, typeof(OpCodeAluRs)); + SetA64("11001010xx0xxxxxxxxxxxxxxxxxxxxx", InstName.Eor, InstEmit.Eor, typeof(OpCodeAluRs)); + SetA64("00010011100xxxxx0xxxxxxxxxxxxxxx", InstName.Extr, InstEmit.Extr, typeof(OpCodeAluRs)); + SetA64("10010011110xxxxxxxxxxxxxxxxxxxxx", InstName.Extr, InstEmit.Extr, typeof(OpCodeAluRs)); + SetA64("11010101000000110010xxxxxxx11111", InstName.Hint, InstEmit.Hint, typeof(OpCodeSystem)); + SetA64("11010101000000110011xxxx11011111", InstName.Isb, InstEmit.Isb, typeof(OpCodeSystem)); + SetA64("xx001000110xxxxx1xxxxxxxxxxxxxxx", InstName.Ldar, InstEmit.Ldar, typeof(OpCodeMemEx)); + SetA64("1x001000011xxxxx1xxxxxxxxxxxxxxx", InstName.Ldaxp, InstEmit.Ldaxp, typeof(OpCodeMemEx)); + SetA64("xx001000010xxxxx1xxxxxxxxxxxxxxx", InstName.Ldaxr, InstEmit.Ldaxr, typeof(OpCodeMemEx)); + SetA64("<<10100xx1xxxxxxxxxxxxxxxxxxxxxx", InstName.Ldp, InstEmit.Ldp, typeof(OpCodeMemPair)); + SetA64("xx111000010xxxxxxxxxxxxxxxxxxxxx", InstName.Ldr, InstEmit.Ldr, typeof(OpCodeMemImm)); + SetA64("xx11100101xxxxxxxxxxxxxxxxxxxxxx", InstName.Ldr, InstEmit.Ldr, typeof(OpCodeMemImm)); + SetA64("xx111000011xxxxxxxxx10xxxxxxxxxx", InstName.Ldr, InstEmit.Ldr, typeof(OpCodeMemReg)); + SetA64("xx011000xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ldr_Literal, InstEmit.Ldr_Literal, typeof(OpCodeMemLit)); + SetA64("0x1110001x0xxxxxxxxxxxxxxxxxxxxx", InstName.Ldrs, InstEmit.Ldrs, typeof(OpCodeMemImm)); + SetA64("0x1110011xxxxxxxxxxxxxxxxxxxxxxx", InstName.Ldrs, InstEmit.Ldrs, typeof(OpCodeMemImm)); + SetA64("10111000100xxxxxxxxxxxxxxxxxxxxx", InstName.Ldrs, InstEmit.Ldrs, typeof(OpCodeMemImm)); + SetA64("1011100110xxxxxxxxxxxxxxxxxxxxxx", InstName.Ldrs, InstEmit.Ldrs, typeof(OpCodeMemImm)); + SetA64("0x1110001x1xxxxxxxxx10xxxxxxxxxx", InstName.Ldrs, InstEmit.Ldrs, typeof(OpCodeMemReg)); + SetA64("10111000101xxxxxxxxx10xxxxxxxxxx", InstName.Ldrs, InstEmit.Ldrs, typeof(OpCodeMemReg)); + SetA64("xx001000010xxxxx0xxxxxxxxxxxxxxx", InstName.Ldxr, InstEmit.Ldxr, typeof(OpCodeMemEx)); + SetA64("1x001000011xxxxx0xxxxxxxxxxxxxxx", InstName.Ldxp, InstEmit.Ldxp, typeof(OpCodeMemEx)); + SetA64("x0011010110xxxxx001000xxxxxxxxxx", InstName.Lslv, InstEmit.Lslv, typeof(OpCodeAluRs)); + SetA64("x0011010110xxxxx001001xxxxxxxxxx", InstName.Lsrv, InstEmit.Lsrv, typeof(OpCodeAluRs)); + SetA64("x0011011000xxxxx0xxxxxxxxxxxxxxx", InstName.Madd, InstEmit.Madd, typeof(OpCodeMul)); + SetA64("0111001010xxxxxxxxxxxxxxxxxxxxxx", InstName.Movk, InstEmit.Movk, typeof(OpCodeMov)); + SetA64("111100101xxxxxxxxxxxxxxxxxxxxxxx", InstName.Movk, InstEmit.Movk, typeof(OpCodeMov)); + SetA64("0001001010xxxxxxxxxxxxxxxxxxxxxx", InstName.Movn, InstEmit.Movn, typeof(OpCodeMov)); + SetA64("100100101xxxxxxxxxxxxxxxxxxxxxxx", InstName.Movn, InstEmit.Movn, typeof(OpCodeMov)); + SetA64("0101001010xxxxxxxxxxxxxxxxxxxxxx", InstName.Movz, InstEmit.Movz, typeof(OpCodeMov)); + SetA64("110100101xxxxxxxxxxxxxxxxxxxxxxx", InstName.Movz, InstEmit.Movz, typeof(OpCodeMov)); + SetA64("110101010011xxxxxxxxxxxxxxxxxxxx", InstName.Mrs, InstEmit.Mrs, typeof(OpCodeSystem)); + SetA64("110101010001xxxxxxxxxxxxxxxxxxxx", InstName.Msr, InstEmit.Msr, typeof(OpCodeSystem)); + SetA64("x0011011000xxxxx1xxxxxxxxxxxxxxx", InstName.Msub, InstEmit.Msub, typeof(OpCodeMul)); + SetA64("11010101000000110010000000011111", InstName.Nop, InstEmit.Nop, typeof(OpCodeSystem)); + SetA64("00101010xx1xxxxx0xxxxxxxxxxxxxxx", InstName.Orn, InstEmit.Orn, typeof(OpCodeAluRs)); + SetA64("10101010xx1xxxxxxxxxxxxxxxxxxxxx", InstName.Orn, InstEmit.Orn, typeof(OpCodeAluRs)); + SetA64("0011001000xxxxxxxxxxxxxxxxxxxxxx", InstName.Orr, InstEmit.Orr, typeof(OpCodeAluImm)); + SetA64("101100100xxxxxxxxxxxxxxxxxxxxxxx", InstName.Orr, InstEmit.Orr, typeof(OpCodeAluImm)); + SetA64("00101010xx0xxxxx0xxxxxxxxxxxxxxx", InstName.Orr, InstEmit.Orr, typeof(OpCodeAluRs)); + SetA64("10101010xx0xxxxxxxxxxxxxxxxxxxxx", InstName.Orr, InstEmit.Orr, typeof(OpCodeAluRs)); + SetA64("1111100110xxxxxxxxxxxxxxxxxxxxxx", InstName.Pfrm, InstEmit.Pfrm, typeof(OpCodeMemImm)); + SetA64("11111000100xxxxxxxxx00xxxxxxxxxx", InstName.Pfrm, InstEmit.Pfrm, typeof(OpCodeMemImm)); + SetA64("11011000xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Pfrm, InstEmit.Pfrm, typeof(OpCodeMemLit)); + SetA64("x101101011000000000000xxxxxxxxxx", InstName.Rbit, InstEmit.Rbit, typeof(OpCodeAlu)); + SetA64("1101011001011111000000xxxxx00000", InstName.Ret, InstEmit.Ret, typeof(OpCodeBReg)); + SetA64("x101101011000000000001xxxxxxxxxx", InstName.Rev16, InstEmit.Rev16, typeof(OpCodeAlu)); + SetA64("x101101011000000000010xxxxxxxxxx", InstName.Rev32, InstEmit.Rev32, typeof(OpCodeAlu)); + SetA64("1101101011000000000011xxxxxxxxxx", InstName.Rev64, InstEmit.Rev64, typeof(OpCodeAlu)); + SetA64("x0011010110xxxxx001011xxxxxxxxxx", InstName.Rorv, InstEmit.Rorv, typeof(OpCodeAluRs)); + SetA64("x1011010000xxxxx000000xxxxxxxxxx", InstName.Sbc, InstEmit.Sbc, typeof(OpCodeAluRs)); + SetA64("x1111010000xxxxx000000xxxxxxxxxx", InstName.Sbcs, InstEmit.Sbcs, typeof(OpCodeAluRs)); + SetA64("00010011000xxxxx0xxxxxxxxxxxxxxx", InstName.Sbfm, InstEmit.Sbfm, typeof(OpCodeBfm)); + SetA64("1001001101xxxxxxxxxxxxxxxxxxxxxx", InstName.Sbfm, InstEmit.Sbfm, typeof(OpCodeBfm)); + SetA64("x0011010110xxxxx000011xxxxxxxxxx", InstName.Sdiv, InstEmit.Sdiv, typeof(OpCodeAluBinary)); + SetA64("10011011001xxxxx0xxxxxxxxxxxxxxx", InstName.Smaddl, InstEmit.Smaddl, typeof(OpCodeMul)); + SetA64("10011011001xxxxx1xxxxxxxxxxxxxxx", InstName.Smsubl, InstEmit.Smsubl, typeof(OpCodeMul)); + SetA64("10011011010xxxxx0xxxxxxxxxxxxxxx", InstName.Smulh, InstEmit.Smulh, typeof(OpCodeMul)); + SetA64("xx001000100xxxxx1xxxxxxxxxxxxxxx", InstName.Stlr, InstEmit.Stlr, typeof(OpCodeMemEx)); + SetA64("1x001000001xxxxx1xxxxxxxxxxxxxxx", InstName.Stlxp, InstEmit.Stlxp, typeof(OpCodeMemEx)); + SetA64("xx001000000xxxxx1xxxxxxxxxxxxxxx", InstName.Stlxr, InstEmit.Stlxr, typeof(OpCodeMemEx)); + SetA64("x010100xx0xxxxxxxxxxxxxxxxxxxxxx", InstName.Stp, InstEmit.Stp, typeof(OpCodeMemPair)); + SetA64("xx111000000xxxxxxxxxxxxxxxxxxxxx", InstName.Str, InstEmit.Str, typeof(OpCodeMemImm)); + SetA64("xx11100100xxxxxxxxxxxxxxxxxxxxxx", InstName.Str, InstEmit.Str, typeof(OpCodeMemImm)); + SetA64("xx111000001xxxxxxxxx10xxxxxxxxxx", InstName.Str, InstEmit.Str, typeof(OpCodeMemReg)); + SetA64("1x001000001xxxxx0xxxxxxxxxxxxxxx", InstName.Stxp, InstEmit.Stxp, typeof(OpCodeMemEx)); + SetA64("xx001000000xxxxx0xxxxxxxxxxxxxxx", InstName.Stxr, InstEmit.Stxr, typeof(OpCodeMemEx)); + SetA64("x10100010xxxxxxxxxxxxxxxxxxxxxxx", InstName.Sub, InstEmit.Sub, typeof(OpCodeAluImm)); + SetA64("01001011<<0xxxxx0xxxxxxxxxxxxxxx", InstName.Sub, InstEmit.Sub, typeof(OpCodeAluRs)); + SetA64("11001011<<0xxxxxxxxxxxxxxxxxxxxx", InstName.Sub, InstEmit.Sub, typeof(OpCodeAluRs)); + SetA64("x1001011001xxxxxxxx0xxxxxxxxxxxx", InstName.Sub, InstEmit.Sub, typeof(OpCodeAluRx)); + SetA64("x1001011001xxxxxxxx100xxxxxxxxxx", InstName.Sub, InstEmit.Sub, typeof(OpCodeAluRx)); + SetA64("x11100010xxxxxxxxxxxxxxxxxxxxxxx", InstName.Subs, InstEmit.Subs, typeof(OpCodeAluImm)); + SetA64("01101011<<0xxxxx0xxxxxxxxxxxxxxx", InstName.Subs, InstEmit.Subs, typeof(OpCodeAluRs)); + SetA64("11101011<<0xxxxxxxxxxxxxxxxxxxxx", InstName.Subs, InstEmit.Subs, typeof(OpCodeAluRs)); + SetA64("x1101011001xxxxxxxx0xxxxxxxxxxxx", InstName.Subs, InstEmit.Subs, typeof(OpCodeAluRx)); + SetA64("x1101011001xxxxxxxx100xxxxxxxxxx", InstName.Subs, InstEmit.Subs, typeof(OpCodeAluRx)); + SetA64("11010100000xxxxxxxxxxxxxxxx00001", InstName.Svc, InstEmit.Svc, typeof(OpCodeException)); + SetA64("1101010100001xxxxxxxxxxxxxxxxxxx", InstName.Sys, InstEmit.Sys, typeof(OpCodeSystem)); + SetA64("x0110111xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Tbnz, InstEmit.Tbnz, typeof(OpCodeBImmTest)); + SetA64("x0110110xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Tbz, InstEmit.Tbz, typeof(OpCodeBImmTest)); + SetA64("01010011000xxxxx0xxxxxxxxxxxxxxx", InstName.Ubfm, InstEmit.Ubfm, typeof(OpCodeBfm)); + SetA64("1101001101xxxxxxxxxxxxxxxxxxxxxx", InstName.Ubfm, InstEmit.Ubfm, typeof(OpCodeBfm)); + SetA64("x0011010110xxxxx000010xxxxxxxxxx", InstName.Udiv, InstEmit.Udiv, typeof(OpCodeAluBinary)); + SetA64("10011011101xxxxx0xxxxxxxxxxxxxxx", InstName.Umaddl, InstEmit.Umaddl, typeof(OpCodeMul)); + SetA64("10011011101xxxxx1xxxxxxxxxxxxxxx", InstName.Umsubl, InstEmit.Umsubl, typeof(OpCodeMul)); + SetA64("10011011110xxxxx0xxxxxxxxxxxxxxx", InstName.Umulh, InstEmit.Umulh, typeof(OpCodeMul)); + + // FP & SIMD + SetA64("0101111011100000101110xxxxxxxxxx", InstName.Abs_S, InstEmit.Abs_S, typeof(OpCodeSimd)); + SetA64("0>001110<<100000101110xxxxxxxxxx", InstName.Abs_V, InstEmit.Abs_V, typeof(OpCodeSimd)); + SetA64("01011110111xxxxx100001xxxxxxxxxx", InstName.Add_S, InstEmit.Add_S, typeof(OpCodeSimdReg)); + SetA64("0>001110<<1xxxxx100001xxxxxxxxxx", InstName.Add_V, InstEmit.Add_V, typeof(OpCodeSimdReg)); + SetA64("0x001110<<1xxxxx010000xxxxxxxxxx", InstName.Addhn_V, InstEmit.Addhn_V, typeof(OpCodeSimdReg)); + SetA64("0101111011110001101110xxxxxxxxxx", InstName.Addp_S, InstEmit.Addp_S, typeof(OpCodeSimd)); + SetA64("0>001110<<1xxxxx101111xxxxxxxxxx", InstName.Addp_V, InstEmit.Addp_V, typeof(OpCodeSimdReg)); + SetA64("000011100x110001101110xxxxxxxxxx", InstName.Addv_V, InstEmit.Addv_V, typeof(OpCodeSimd)); + SetA64("01001110<<110001101110xxxxxxxxxx", InstName.Addv_V, InstEmit.Addv_V, typeof(OpCodeSimd)); + SetA64("0100111000101000010110xxxxxxxxxx", InstName.Aesd_V, InstEmit.Aesd_V, typeof(OpCodeSimd)); + SetA64("0100111000101000010010xxxxxxxxxx", InstName.Aese_V, InstEmit.Aese_V, typeof(OpCodeSimd)); + SetA64("0100111000101000011110xxxxxxxxxx", InstName.Aesimc_V, InstEmit.Aesimc_V, typeof(OpCodeSimd)); + SetA64("0100111000101000011010xxxxxxxxxx", InstName.Aesmc_V, InstEmit.Aesmc_V, typeof(OpCodeSimd)); + SetA64("0x001110001xxxxx000111xxxxxxxxxx", InstName.And_V, InstEmit.And_V, typeof(OpCodeSimdReg)); + SetA64("0x001110011xxxxx000111xxxxxxxxxx", InstName.Bic_V, InstEmit.Bic_V, typeof(OpCodeSimdReg)); + SetA64("0x10111100000xxx0xx101xxxxxxxxxx", InstName.Bic_Vi, InstEmit.Bic_Vi, typeof(OpCodeSimdImm)); + SetA64("0x10111100000xxx10x101xxxxxxxxxx", InstName.Bic_Vi, InstEmit.Bic_Vi, typeof(OpCodeSimdImm)); + SetA64("0x101110111xxxxx000111xxxxxxxxxx", InstName.Bif_V, InstEmit.Bif_V, typeof(OpCodeSimdReg)); + SetA64("0x101110101xxxxx000111xxxxxxxxxx", InstName.Bit_V, InstEmit.Bit_V, typeof(OpCodeSimdReg)); + SetA64("0x101110011xxxxx000111xxxxxxxxxx", InstName.Bsl_V, InstEmit.Bsl_V, typeof(OpCodeSimdReg)); + SetA64("0x001110<<100000010010xxxxxxxxxx", InstName.Cls_V, InstEmit.Cls_V, typeof(OpCodeSimd)); + SetA64("0x101110<<100000010010xxxxxxxxxx", InstName.Clz_V, InstEmit.Clz_V, typeof(OpCodeSimd)); + SetA64("01111110111xxxxx100011xxxxxxxxxx", InstName.Cmeq_S, InstEmit.Cmeq_S, typeof(OpCodeSimdReg)); + SetA64("0101111011100000100110xxxxxxxxxx", InstName.Cmeq_S, InstEmit.Cmeq_S, typeof(OpCodeSimd)); + SetA64("0>101110<<1xxxxx100011xxxxxxxxxx", InstName.Cmeq_V, InstEmit.Cmeq_V, typeof(OpCodeSimdReg)); + SetA64("0>001110<<100000100110xxxxxxxxxx", InstName.Cmeq_V, InstEmit.Cmeq_V, typeof(OpCodeSimd)); + SetA64("01011110111xxxxx001111xxxxxxxxxx", InstName.Cmge_S, InstEmit.Cmge_S, typeof(OpCodeSimdReg)); + SetA64("0111111011100000100010xxxxxxxxxx", InstName.Cmge_S, InstEmit.Cmge_S, typeof(OpCodeSimd)); + SetA64("0>001110<<1xxxxx001111xxxxxxxxxx", InstName.Cmge_V, InstEmit.Cmge_V, typeof(OpCodeSimdReg)); + SetA64("0>101110<<100000100010xxxxxxxxxx", InstName.Cmge_V, InstEmit.Cmge_V, typeof(OpCodeSimd)); + SetA64("01011110111xxxxx001101xxxxxxxxxx", InstName.Cmgt_S, InstEmit.Cmgt_S, typeof(OpCodeSimdReg)); + SetA64("0101111011100000100010xxxxxxxxxx", InstName.Cmgt_S, InstEmit.Cmgt_S, typeof(OpCodeSimd)); + SetA64("0>001110<<1xxxxx001101xxxxxxxxxx", InstName.Cmgt_V, InstEmit.Cmgt_V, typeof(OpCodeSimdReg)); + SetA64("0>001110<<100000100010xxxxxxxxxx", InstName.Cmgt_V, InstEmit.Cmgt_V, typeof(OpCodeSimd)); + SetA64("01111110111xxxxx001101xxxxxxxxxx", InstName.Cmhi_S, InstEmit.Cmhi_S, typeof(OpCodeSimdReg)); + SetA64("0>101110<<1xxxxx001101xxxxxxxxxx", InstName.Cmhi_V, InstEmit.Cmhi_V, typeof(OpCodeSimdReg)); + SetA64("01111110111xxxxx001111xxxxxxxxxx", InstName.Cmhs_S, InstEmit.Cmhs_S, typeof(OpCodeSimdReg)); + SetA64("0>101110<<1xxxxx001111xxxxxxxxxx", InstName.Cmhs_V, InstEmit.Cmhs_V, typeof(OpCodeSimdReg)); + SetA64("0111111011100000100110xxxxxxxxxx", InstName.Cmle_S, InstEmit.Cmle_S, typeof(OpCodeSimd)); + SetA64("0>101110<<100000100110xxxxxxxxxx", InstName.Cmle_V, InstEmit.Cmle_V, typeof(OpCodeSimd)); + SetA64("0101111011100000101010xxxxxxxxxx", InstName.Cmlt_S, InstEmit.Cmlt_S, typeof(OpCodeSimd)); + SetA64("0>001110<<100000101010xxxxxxxxxx", InstName.Cmlt_V, InstEmit.Cmlt_V, typeof(OpCodeSimd)); + SetA64("01011110111xxxxx100011xxxxxxxxxx", InstName.Cmtst_S, InstEmit.Cmtst_S, typeof(OpCodeSimdReg)); + SetA64("0>001110<<1xxxxx100011xxxxxxxxxx", InstName.Cmtst_V, InstEmit.Cmtst_V, typeof(OpCodeSimdReg)); + SetA64("0x00111000100000010110xxxxxxxxxx", InstName.Cnt_V, InstEmit.Cnt_V, typeof(OpCodeSimd)); + SetA64("0>001110000x<>>>000011xxxxxxxxxx", InstName.Dup_Gp, InstEmit.Dup_Gp, typeof(OpCodeSimdIns)); + SetA64("01011110000xxxxx000001xxxxxxxxxx", InstName.Dup_S, InstEmit.Dup_S, typeof(OpCodeSimdIns)); + SetA64("0>001110000x<>>>000001xxxxxxxxxx", InstName.Dup_V, InstEmit.Dup_V, typeof(OpCodeSimdIns)); + SetA64("0x101110001xxxxx000111xxxxxxxxxx", InstName.Eor_V, InstEmit.Eor_V, typeof(OpCodeSimdReg)); + SetA64("0>101110000xxxxx01011101<1xxxxx110101xxxxxxxxxx", InstName.Fabd_V, InstEmit.Fabd_V, typeof(OpCodeSimdReg)); + SetA64("000111100x100000110000xxxxxxxxxx", InstName.Fabs_S, InstEmit.Fabs_S, typeof(OpCodeSimd)); + SetA64("0>0011101<100000111110xxxxxxxxxx", InstName.Fabs_V, InstEmit.Fabs_V, typeof(OpCodeSimd)); + SetA64("000111100x1xxxxx001010xxxxxxxxxx", InstName.Fadd_S, InstEmit.Fadd_S, typeof(OpCodeSimdReg)); + SetA64("0>0011100<1xxxxx110101xxxxxxxxxx", InstName.Fadd_V, InstEmit.Fadd_V, typeof(OpCodeSimdReg)); + SetA64("011111100x110000110110xxxxxxxxxx", InstName.Faddp_S, InstEmit.Faddp_S, typeof(OpCodeSimd)); + SetA64("0>1011100<1xxxxx110101xxxxxxxxxx", InstName.Faddp_V, InstEmit.Faddp_V, typeof(OpCodeSimdReg)); + SetA64("000111100x1xxxxxxxxx01xxxxx0xxxx", InstName.Fccmp_S, InstEmit.Fccmp_S, typeof(OpCodeSimdFcond)); + SetA64("000111100x1xxxxxxxxx01xxxxx1xxxx", InstName.Fccmpe_S, InstEmit.Fccmpe_S, typeof(OpCodeSimdFcond)); + SetA64("010111100x1xxxxx111001xxxxxxxxxx", InstName.Fcmeq_S, InstEmit.Fcmeq_S, typeof(OpCodeSimdReg)); + SetA64("010111101x100000110110xxxxxxxxxx", InstName.Fcmeq_S, InstEmit.Fcmeq_S, typeof(OpCodeSimd)); + SetA64("0>0011100<1xxxxx111001xxxxxxxxxx", InstName.Fcmeq_V, InstEmit.Fcmeq_V, typeof(OpCodeSimdReg)); + SetA64("0>0011101<100000110110xxxxxxxxxx", InstName.Fcmeq_V, InstEmit.Fcmeq_V, typeof(OpCodeSimd)); + SetA64("011111100x1xxxxx111001xxxxxxxxxx", InstName.Fcmge_S, InstEmit.Fcmge_S, typeof(OpCodeSimdReg)); + SetA64("011111101x100000110010xxxxxxxxxx", InstName.Fcmge_S, InstEmit.Fcmge_S, typeof(OpCodeSimd)); + SetA64("0>1011100<1xxxxx111001xxxxxxxxxx", InstName.Fcmge_V, InstEmit.Fcmge_V, typeof(OpCodeSimdReg)); + SetA64("0>1011101<100000110010xxxxxxxxxx", InstName.Fcmge_V, InstEmit.Fcmge_V, typeof(OpCodeSimd)); + SetA64("011111101x1xxxxx111001xxxxxxxxxx", InstName.Fcmgt_S, InstEmit.Fcmgt_S, typeof(OpCodeSimdReg)); + SetA64("010111101x100000110010xxxxxxxxxx", InstName.Fcmgt_S, InstEmit.Fcmgt_S, typeof(OpCodeSimd)); + SetA64("0>1011101<1xxxxx111001xxxxxxxxxx", InstName.Fcmgt_V, InstEmit.Fcmgt_V, typeof(OpCodeSimdReg)); + SetA64("0>0011101<100000110010xxxxxxxxxx", InstName.Fcmgt_V, InstEmit.Fcmgt_V, typeof(OpCodeSimd)); + SetA64("011111101x100000110110xxxxxxxxxx", InstName.Fcmle_S, InstEmit.Fcmle_S, typeof(OpCodeSimd)); + SetA64("0>1011101<100000110110xxxxxxxxxx", InstName.Fcmle_V, InstEmit.Fcmle_V, typeof(OpCodeSimd)); + SetA64("010111101x100000111010xxxxxxxxxx", InstName.Fcmlt_S, InstEmit.Fcmlt_S, typeof(OpCodeSimd)); + SetA64("0>0011101<100000111010xxxxxxxxxx", InstName.Fcmlt_V, InstEmit.Fcmlt_V, typeof(OpCodeSimd)); + SetA64("000111100x1xxxxx001000xxxxx0x000", InstName.Fcmp_S, InstEmit.Fcmp_S, typeof(OpCodeSimdReg)); + SetA64("000111100x1xxxxx001000xxxxx1x000", InstName.Fcmpe_S, InstEmit.Fcmpe_S, typeof(OpCodeSimdReg)); + SetA64("000111100x1xxxxxxxxx11xxxxxxxxxx", InstName.Fcsel_S, InstEmit.Fcsel_S, typeof(OpCodeSimdFcond)); + SetA64("00011110xx10001xx10000xxxxxxxxxx", InstName.Fcvt_S, InstEmit.Fcvt_S, typeof(OpCodeSimd)); + SetA64("x00111100x100100000000xxxxxxxxxx", InstName.Fcvtas_Gp, InstEmit.Fcvtas_Gp, typeof(OpCodeSimdCvt)); + SetA64("x00111100x100101000000xxxxxxxxxx", InstName.Fcvtau_Gp, InstEmit.Fcvtau_Gp, typeof(OpCodeSimdCvt)); + SetA64("0x0011100x100001011110xxxxxxxxxx", InstName.Fcvtl_V, InstEmit.Fcvtl_V, typeof(OpCodeSimd)); + SetA64("x00111100x110000000000xxxxxxxxxx", InstName.Fcvtms_Gp, InstEmit.Fcvtms_Gp, typeof(OpCodeSimdCvt)); + SetA64("x00111100x110001000000xxxxxxxxxx", InstName.Fcvtmu_Gp, InstEmit.Fcvtmu_Gp, typeof(OpCodeSimdCvt)); + SetA64("0x0011100x100001011010xxxxxxxxxx", InstName.Fcvtn_V, InstEmit.Fcvtn_V, typeof(OpCodeSimd)); + SetA64("010111100x100001101010xxxxxxxxxx", InstName.Fcvtns_S, InstEmit.Fcvtns_S, typeof(OpCodeSimd)); + SetA64("0>0011100<100001101010xxxxxxxxxx", InstName.Fcvtns_V, InstEmit.Fcvtns_V, typeof(OpCodeSimd)); + SetA64("011111100x100001101010xxxxxxxxxx", InstName.Fcvtnu_S, InstEmit.Fcvtnu_S, typeof(OpCodeSimd)); + SetA64("0>1011100<100001101010xxxxxxxxxx", InstName.Fcvtnu_V, InstEmit.Fcvtnu_V, typeof(OpCodeSimd)); + SetA64("x00111100x101000000000xxxxxxxxxx", InstName.Fcvtps_Gp, InstEmit.Fcvtps_Gp, typeof(OpCodeSimdCvt)); + SetA64("x00111100x101001000000xxxxxxxxxx", InstName.Fcvtpu_Gp, InstEmit.Fcvtpu_Gp, typeof(OpCodeSimdCvt)); + SetA64("x00111100x111000000000xxxxxxxxxx", InstName.Fcvtzs_Gp, InstEmit.Fcvtzs_Gp, typeof(OpCodeSimdCvt)); + SetA64(">00111100x011000>xxxxxxxxxxxxxxx", InstName.Fcvtzs_Gp_Fixed, InstEmit.Fcvtzs_Gp_Fixed, typeof(OpCodeSimdCvt)); + SetA64("010111101x100001101110xxxxxxxxxx", InstName.Fcvtzs_S, InstEmit.Fcvtzs_S, typeof(OpCodeSimd)); + SetA64("0>0011101<100001101110xxxxxxxxxx", InstName.Fcvtzs_V, InstEmit.Fcvtzs_V, typeof(OpCodeSimd)); + SetA64("0x001111001xxxxx111111xxxxxxxxxx", InstName.Fcvtzs_V_Fixed, InstEmit.Fcvtzs_V_Fixed, typeof(OpCodeSimdShImm)); + SetA64("0100111101xxxxxx111111xxxxxxxxxx", InstName.Fcvtzs_V_Fixed, InstEmit.Fcvtzs_V_Fixed, typeof(OpCodeSimdShImm)); + SetA64("x00111100x111001000000xxxxxxxxxx", InstName.Fcvtzu_Gp, InstEmit.Fcvtzu_Gp, typeof(OpCodeSimdCvt)); + SetA64(">00111100x011001>xxxxxxxxxxxxxxx", InstName.Fcvtzu_Gp_Fixed, InstEmit.Fcvtzu_Gp_Fixed, typeof(OpCodeSimdCvt)); + SetA64("011111101x100001101110xxxxxxxxxx", InstName.Fcvtzu_S, InstEmit.Fcvtzu_S, typeof(OpCodeSimd)); + SetA64("0>1011101<100001101110xxxxxxxxxx", InstName.Fcvtzu_V, InstEmit.Fcvtzu_V, typeof(OpCodeSimd)); + SetA64("0x101111001xxxxx111111xxxxxxxxxx", InstName.Fcvtzu_V_Fixed, InstEmit.Fcvtzu_V_Fixed, typeof(OpCodeSimdShImm)); + SetA64("0110111101xxxxxx111111xxxxxxxxxx", InstName.Fcvtzu_V_Fixed, InstEmit.Fcvtzu_V_Fixed, typeof(OpCodeSimdShImm)); + SetA64("000111100x1xxxxx000110xxxxxxxxxx", InstName.Fdiv_S, InstEmit.Fdiv_S, typeof(OpCodeSimdReg)); + SetA64("0>1011100<1xxxxx111111xxxxxxxxxx", InstName.Fdiv_V, InstEmit.Fdiv_V, typeof(OpCodeSimdReg)); + SetA64("000111110x0xxxxx0xxxxxxxxxxxxxxx", InstName.Fmadd_S, InstEmit.Fmadd_S, typeof(OpCodeSimdReg)); + SetA64("000111100x1xxxxx010010xxxxxxxxxx", InstName.Fmax_S, InstEmit.Fmax_S, typeof(OpCodeSimdReg)); + SetA64("0>0011100<1xxxxx111101xxxxxxxxxx", InstName.Fmax_V, InstEmit.Fmax_V, typeof(OpCodeSimdReg)); + SetA64("000111100x1xxxxx011010xxxxxxxxxx", InstName.Fmaxnm_S, InstEmit.Fmaxnm_S, typeof(OpCodeSimdReg)); + SetA64("0>0011100<1xxxxx110001xxxxxxxxxx", InstName.Fmaxnm_V, InstEmit.Fmaxnm_V, typeof(OpCodeSimdReg)); + SetA64("0>1011100<1xxxxx111101xxxxxxxxxx", InstName.Fmaxp_V, InstEmit.Fmaxp_V, typeof(OpCodeSimdReg)); + SetA64("000111100x1xxxxx010110xxxxxxxxxx", InstName.Fmin_S, InstEmit.Fmin_S, typeof(OpCodeSimdReg)); + SetA64("0>0011101<1xxxxx111101xxxxxxxxxx", InstName.Fmin_V, InstEmit.Fmin_V, typeof(OpCodeSimdReg)); + SetA64("000111100x1xxxxx011110xxxxxxxxxx", InstName.Fminnm_S, InstEmit.Fminnm_S, typeof(OpCodeSimdReg)); + SetA64("0>0011101<1xxxxx110001xxxxxxxxxx", InstName.Fminnm_V, InstEmit.Fminnm_V, typeof(OpCodeSimdReg)); + SetA64("0>1011101<1xxxxx111101xxxxxxxxxx", InstName.Fminp_V, InstEmit.Fminp_V, typeof(OpCodeSimdReg)); + SetA64("010111111xxxxxxx0001x0xxxxxxxxxx", InstName.Fmla_Se, InstEmit.Fmla_Se, typeof(OpCodeSimdRegElemF)); + SetA64("0>0011100<1xxxxx110011xxxxxxxxxx", InstName.Fmla_V, InstEmit.Fmla_V, typeof(OpCodeSimdReg)); + SetA64("0>00111110011101<1xxxxx110011xxxxxxxxxx", InstName.Fmls_V, InstEmit.Fmls_V, typeof(OpCodeSimdReg)); + SetA64("0>00111111011100<1xxxxx110111xxxxxxxxxx", InstName.Fmul_V, InstEmit.Fmul_V, typeof(OpCodeSimdReg)); + SetA64("0>00111110011100<1xxxxx110111xxxxxxxxxx", InstName.Fmulx_V, InstEmit.Fmulx_V, typeof(OpCodeSimdReg)); + SetA64("0>10111111011101<100000111110xxxxxxxxxx", InstName.Fneg_V, InstEmit.Fneg_V, typeof(OpCodeSimd)); + SetA64("000111110x1xxxxx0xxxxxxxxxxxxxxx", InstName.Fnmadd_S, InstEmit.Fnmadd_S, typeof(OpCodeSimdReg)); + SetA64("000111110x1xxxxx1xxxxxxxxxxxxxxx", InstName.Fnmsub_S, InstEmit.Fnmsub_S, typeof(OpCodeSimdReg)); + SetA64("000111100x1xxxxx100010xxxxxxxxxx", InstName.Fnmul_S, InstEmit.Fnmul_S, typeof(OpCodeSimdReg)); + SetA64("010111101x100001110110xxxxxxxxxx", InstName.Frecpe_S, InstEmit.Frecpe_S, typeof(OpCodeSimd)); + SetA64("0>0011101<100001110110xxxxxxxxxx", InstName.Frecpe_V, InstEmit.Frecpe_V, typeof(OpCodeSimd)); + SetA64("010111100x1xxxxx111111xxxxxxxxxx", InstName.Frecps_S, InstEmit.Frecps_S, typeof(OpCodeSimdReg)); + SetA64("0>0011100<1xxxxx111111xxxxxxxxxx", InstName.Frecps_V, InstEmit.Frecps_V, typeof(OpCodeSimdReg)); + SetA64("010111101x100001111110xxxxxxxxxx", InstName.Frecpx_S, InstEmit.Frecpx_S, typeof(OpCodeSimd)); + SetA64("000111100x100110010000xxxxxxxxxx", InstName.Frinta_S, InstEmit.Frinta_S, typeof(OpCodeSimd)); + SetA64("0>1011100<100001100010xxxxxxxxxx", InstName.Frinta_V, InstEmit.Frinta_V, typeof(OpCodeSimd)); + SetA64("000111100x100111110000xxxxxxxxxx", InstName.Frinti_S, InstEmit.Frinti_S, typeof(OpCodeSimd)); + SetA64("0>1011101<100001100110xxxxxxxxxx", InstName.Frinti_V, InstEmit.Frinti_V, typeof(OpCodeSimd)); + SetA64("000111100x100101010000xxxxxxxxxx", InstName.Frintm_S, InstEmit.Frintm_S, typeof(OpCodeSimd)); + SetA64("0>0011100<100001100110xxxxxxxxxx", InstName.Frintm_V, InstEmit.Frintm_V, typeof(OpCodeSimd)); + SetA64("000111100x100100010000xxxxxxxxxx", InstName.Frintn_S, InstEmit.Frintn_S, typeof(OpCodeSimd)); + SetA64("0>0011100<100001100010xxxxxxxxxx", InstName.Frintn_V, InstEmit.Frintn_V, typeof(OpCodeSimd)); + SetA64("000111100x100100110000xxxxxxxxxx", InstName.Frintp_S, InstEmit.Frintp_S, typeof(OpCodeSimd)); + SetA64("0>0011101<100001100010xxxxxxxxxx", InstName.Frintp_V, InstEmit.Frintp_V, typeof(OpCodeSimd)); + SetA64("000111100x100111010000xxxxxxxxxx", InstName.Frintx_S, InstEmit.Frintx_S, typeof(OpCodeSimd)); + SetA64("0>1011100<100001100110xxxxxxxxxx", InstName.Frintx_V, InstEmit.Frintx_V, typeof(OpCodeSimd)); + SetA64("000111100x100101110000xxxxxxxxxx", InstName.Frintz_S, InstEmit.Frintz_S, typeof(OpCodeSimd)); + SetA64("0>0011101<100001100110xxxxxxxxxx", InstName.Frintz_V, InstEmit.Frintz_V, typeof(OpCodeSimd)); + SetA64("011111101x100001110110xxxxxxxxxx", InstName.Frsqrte_S, InstEmit.Frsqrte_S, typeof(OpCodeSimd)); + SetA64("0>1011101<100001110110xxxxxxxxxx", InstName.Frsqrte_V, InstEmit.Frsqrte_V, typeof(OpCodeSimd)); + SetA64("010111101x1xxxxx111111xxxxxxxxxx", InstName.Frsqrts_S, InstEmit.Frsqrts_S, typeof(OpCodeSimdReg)); + SetA64("0>0011101<1xxxxx111111xxxxxxxxxx", InstName.Frsqrts_V, InstEmit.Frsqrts_V, typeof(OpCodeSimdReg)); + SetA64("000111100x100001110000xxxxxxxxxx", InstName.Fsqrt_S, InstEmit.Fsqrt_S, typeof(OpCodeSimd)); + SetA64("0>1011101<100001111110xxxxxxxxxx", InstName.Fsqrt_V, InstEmit.Fsqrt_V, typeof(OpCodeSimd)); + SetA64("000111100x1xxxxx001110xxxxxxxxxx", InstName.Fsub_S, InstEmit.Fsub_S, typeof(OpCodeSimdReg)); + SetA64("0>0011101<1xxxxx110101xxxxxxxxxx", InstName.Fsub_V, InstEmit.Fsub_V, typeof(OpCodeSimdReg)); + SetA64("01001110000xxxxx000111xxxxxxxxxx", InstName.Ins_Gp, InstEmit.Ins_Gp, typeof(OpCodeSimdIns)); + SetA64("01101110000xxxxx0xxxx1xxxxxxxxxx", InstName.Ins_V, InstEmit.Ins_V, typeof(OpCodeSimdIns)); + SetA64("0x00110001000000xxxxxxxxxxxxxxxx", InstName.Ld__Vms, InstEmit.Ld__Vms, typeof(OpCodeSimdMemMs)); + SetA64("0x001100110xxxxxxxxxxxxxxxxxxxxx", InstName.Ld__Vms, InstEmit.Ld__Vms, typeof(OpCodeSimdMemMs)); + SetA64("0x00110101x00000xxxxxxxxxxxxxxxx", InstName.Ld__Vss, InstEmit.Ld__Vss, typeof(OpCodeSimdMemSs)); + SetA64("0x00110111xxxxxxxxxxxxxxxxxxxxxx", InstName.Ld__Vss, InstEmit.Ld__Vss, typeof(OpCodeSimdMemSs)); + SetA64("xx10110xx1xxxxxxxxxxxxxxxxxxxxxx", InstName.Ldp, InstEmit.Ldp, typeof(OpCodeSimdMemPair)); + SetA64("xx111100x10xxxxxxxxx00xxxxxxxxxx", InstName.Ldr, InstEmit.Ldr, typeof(OpCodeSimdMemImm)); + SetA64("xx111100x10xxxxxxxxx01xxxxxxxxxx", InstName.Ldr, InstEmit.Ldr, typeof(OpCodeSimdMemImm)); + SetA64("xx111100x10xxxxxxxxx11xxxxxxxxxx", InstName.Ldr, InstEmit.Ldr, typeof(OpCodeSimdMemImm)); + SetA64("xx111101x1xxxxxxxxxxxxxxxxxxxxxx", InstName.Ldr, InstEmit.Ldr, typeof(OpCodeSimdMemImm)); + SetA64("xx111100x11xxxxxxxxx10xxxxxxxxxx", InstName.Ldr, InstEmit.Ldr, typeof(OpCodeSimdMemReg)); + SetA64("xx011100xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ldr_Literal, InstEmit.Ldr_Literal, typeof(OpCodeSimdMemLit)); + SetA64("0x001110<<1xxxxx100101xxxxxxxxxx", InstName.Mla_V, InstEmit.Mla_V, typeof(OpCodeSimdReg)); + SetA64("0x101111xxxxxxxx0000x0xxxxxxxxxx", InstName.Mla_Ve, InstEmit.Mla_Ve, typeof(OpCodeSimdRegElem)); + SetA64("0x101110<<1xxxxx100101xxxxxxxxxx", InstName.Mls_V, InstEmit.Mls_V, typeof(OpCodeSimdReg)); + SetA64("0x101111xxxxxxxx0100x0xxxxxxxxxx", InstName.Mls_Ve, InstEmit.Mls_Ve, typeof(OpCodeSimdRegElem)); + SetA64("0x00111100000xxx0xx001xxxxxxxxxx", InstName.Movi_V, InstEmit.Movi_V, typeof(OpCodeSimdImm)); + SetA64("0x00111100000xxx10x001xxxxxxxxxx", InstName.Movi_V, InstEmit.Movi_V, typeof(OpCodeSimdImm)); + SetA64("0x00111100000xxx110x01xxxxxxxxxx", InstName.Movi_V, InstEmit.Movi_V, typeof(OpCodeSimdImm)); + SetA64("0xx0111100000xxx111001xxxxxxxxxx", InstName.Movi_V, InstEmit.Movi_V, typeof(OpCodeSimdImm)); + SetA64("0x001110<<1xxxxx100111xxxxxxxxxx", InstName.Mul_V, InstEmit.Mul_V, typeof(OpCodeSimdReg)); + SetA64("0x001111xxxxxxxx1000x0xxxxxxxxxx", InstName.Mul_Ve, InstEmit.Mul_Ve, typeof(OpCodeSimdRegElem)); + SetA64("0x10111100000xxx0xx001xxxxxxxxxx", InstName.Mvni_V, InstEmit.Mvni_V, typeof(OpCodeSimdImm)); + SetA64("0x10111100000xxx10x001xxxxxxxxxx", InstName.Mvni_V, InstEmit.Mvni_V, typeof(OpCodeSimdImm)); + SetA64("0x10111100000xxx110x01xxxxxxxxxx", InstName.Mvni_V, InstEmit.Mvni_V, typeof(OpCodeSimdImm)); + SetA64("0111111011100000101110xxxxxxxxxx", InstName.Neg_S, InstEmit.Neg_S, typeof(OpCodeSimd)); + SetA64("0>101110<<100000101110xxxxxxxxxx", InstName.Neg_V, InstEmit.Neg_V, typeof(OpCodeSimd)); + SetA64("0x10111000100000010110xxxxxxxxxx", InstName.Not_V, InstEmit.Not_V, typeof(OpCodeSimd)); + SetA64("0x001110111xxxxx000111xxxxxxxxxx", InstName.Orn_V, InstEmit.Orn_V, typeof(OpCodeSimdReg)); + SetA64("0x001110101xxxxx000111xxxxxxxxxx", InstName.Orr_V, InstEmit.Orr_V, typeof(OpCodeSimdReg)); + SetA64("0x00111100000xxx0xx101xxxxxxxxxx", InstName.Orr_Vi, InstEmit.Orr_Vi, typeof(OpCodeSimdImm)); + SetA64("0x00111100000xxx10x101xxxxxxxxxx", InstName.Orr_Vi, InstEmit.Orr_Vi, typeof(OpCodeSimdImm)); + SetA64("0x101110<<1xxxxx010000xxxxxxxxxx", InstName.Raddhn_V, InstEmit.Raddhn_V, typeof(OpCodeSimdReg)); + SetA64("0x10111001100000010110xxxxxxxxxx", InstName.Rbit_V, InstEmit.Rbit_V, typeof(OpCodeSimd)); + SetA64("0x00111000100000000110xxxxxxxxxx", InstName.Rev16_V, InstEmit.Rev16_V, typeof(OpCodeSimd)); + SetA64("0x1011100x100000000010xxxxxxxxxx", InstName.Rev32_V, InstEmit.Rev32_V, typeof(OpCodeSimd)); + SetA64("0x001110<<100000000010xxxxxxxxxx", InstName.Rev64_V, InstEmit.Rev64_V, typeof(OpCodeSimd)); + SetA64("0x00111100>>>xxx100011xxxxxxxxxx", InstName.Rshrn_V, InstEmit.Rshrn_V, typeof(OpCodeSimdShImm)); + SetA64("0x101110<<1xxxxx011000xxxxxxxxxx", InstName.Rsubhn_V, InstEmit.Rsubhn_V, typeof(OpCodeSimdReg)); + SetA64("0x001110<<1xxxxx011111xxxxxxxxxx", InstName.Saba_V, InstEmit.Saba_V, typeof(OpCodeSimdReg)); + SetA64("0x001110<<1xxxxx010100xxxxxxxxxx", InstName.Sabal_V, InstEmit.Sabal_V, typeof(OpCodeSimdReg)); + SetA64("0x001110<<1xxxxx011101xxxxxxxxxx", InstName.Sabd_V, InstEmit.Sabd_V, typeof(OpCodeSimdReg)); + SetA64("0x001110<<1xxxxx011100xxxxxxxxxx", InstName.Sabdl_V, InstEmit.Sabdl_V, typeof(OpCodeSimdReg)); + SetA64("0x001110<<100000011010xxxxxxxxxx", InstName.Sadalp_V, InstEmit.Sadalp_V, typeof(OpCodeSimd)); + SetA64("0x001110<<1xxxxx000000xxxxxxxxxx", InstName.Saddl_V, InstEmit.Saddl_V, typeof(OpCodeSimdReg)); + SetA64("0x001110<<100000001010xxxxxxxxxx", InstName.Saddlp_V, InstEmit.Saddlp_V, typeof(OpCodeSimd)); + SetA64("000011100x110000001110xxxxxxxxxx", InstName.Saddlv_V, InstEmit.Saddlv_V, typeof(OpCodeSimd)); + SetA64("01001110<<110000001110xxxxxxxxxx", InstName.Saddlv_V, InstEmit.Saddlv_V, typeof(OpCodeSimd)); + SetA64("0x001110<<1xxxxx000100xxxxxxxxxx", InstName.Saddw_V, InstEmit.Saddw_V, typeof(OpCodeSimdReg)); + SetA64("x00111100x100010000000xxxxxxxxxx", InstName.Scvtf_Gp, InstEmit.Scvtf_Gp, typeof(OpCodeSimdCvt)); + SetA64(">00111100x000010>xxxxxxxxxxxxxxx", InstName.Scvtf_Gp_Fixed, InstEmit.Scvtf_Gp_Fixed, typeof(OpCodeSimdCvt)); + SetA64("010111100x100001110110xxxxxxxxxx", InstName.Scvtf_S, InstEmit.Scvtf_S, typeof(OpCodeSimd)); + SetA64("0>0011100<100001110110xxxxxxxxxx", InstName.Scvtf_V, InstEmit.Scvtf_V, typeof(OpCodeSimd)); + SetA64("0x001111001xxxxx111001xxxxxxxxxx", InstName.Scvtf_V_Fixed, InstEmit.Scvtf_V_Fixed, typeof(OpCodeSimdShImm)); + SetA64("0100111101xxxxxx111001xxxxxxxxxx", InstName.Scvtf_V_Fixed, InstEmit.Scvtf_V_Fixed, typeof(OpCodeSimdShImm)); + SetA64("01011110000xxxxx000000xxxxxxxxxx", InstName.Sha1c_V, InstEmit.Sha1c_V, typeof(OpCodeSimdReg)); + SetA64("0101111000101000000010xxxxxxxxxx", InstName.Sha1h_V, InstEmit.Sha1h_V, typeof(OpCodeSimd)); + SetA64("01011110000xxxxx001000xxxxxxxxxx", InstName.Sha1m_V, InstEmit.Sha1m_V, typeof(OpCodeSimdReg)); + SetA64("01011110000xxxxx000100xxxxxxxxxx", InstName.Sha1p_V, InstEmit.Sha1p_V, typeof(OpCodeSimdReg)); + SetA64("01011110000xxxxx001100xxxxxxxxxx", InstName.Sha1su0_V, InstEmit.Sha1su0_V, typeof(OpCodeSimdReg)); + SetA64("0101111000101000000110xxxxxxxxxx", InstName.Sha1su1_V, InstEmit.Sha1su1_V, typeof(OpCodeSimd)); + SetA64("01011110000xxxxx010000xxxxxxxxxx", InstName.Sha256h_V, InstEmit.Sha256h_V, typeof(OpCodeSimdReg)); + SetA64("01011110000xxxxx010100xxxxxxxxxx", InstName.Sha256h2_V, InstEmit.Sha256h2_V, typeof(OpCodeSimdReg)); + SetA64("0101111000101000001010xxxxxxxxxx", InstName.Sha256su0_V, InstEmit.Sha256su0_V, typeof(OpCodeSimd)); + SetA64("01011110000xxxxx011000xxxxxxxxxx", InstName.Sha256su1_V, InstEmit.Sha256su1_V, typeof(OpCodeSimdReg)); + SetA64("0x001110<<1xxxxx000001xxxxxxxxxx", InstName.Shadd_V, InstEmit.Shadd_V, typeof(OpCodeSimdReg)); + SetA64("0101111101xxxxxx010101xxxxxxxxxx", InstName.Shl_S, InstEmit.Shl_S, typeof(OpCodeSimdShImm)); + SetA64("0x00111100>>>xxx010101xxxxxxxxxx", InstName.Shl_V, InstEmit.Shl_V, typeof(OpCodeSimdShImm)); + SetA64("0100111101xxxxxx010101xxxxxxxxxx", InstName.Shl_V, InstEmit.Shl_V, typeof(OpCodeSimdShImm)); + SetA64("0x101110<<100001001110xxxxxxxxxx", InstName.Shll_V, InstEmit.Shll_V, typeof(OpCodeSimd)); + SetA64("0x00111100>>>xxx100001xxxxxxxxxx", InstName.Shrn_V, InstEmit.Shrn_V, typeof(OpCodeSimdShImm)); + SetA64("0x001110<<1xxxxx001001xxxxxxxxxx", InstName.Shsub_V, InstEmit.Shsub_V, typeof(OpCodeSimdReg)); + SetA64("0x10111100>>>xxx010101xxxxxxxxxx", InstName.Sli_V, InstEmit.Sli_V, typeof(OpCodeSimdShImm)); + SetA64("0110111101xxxxxx010101xxxxxxxxxx", InstName.Sli_V, InstEmit.Sli_V, typeof(OpCodeSimdShImm)); + SetA64("0x001110<<1xxxxx011001xxxxxxxxxx", InstName.Smax_V, InstEmit.Smax_V, typeof(OpCodeSimdReg)); + SetA64("0x001110<<1xxxxx101001xxxxxxxxxx", InstName.Smaxp_V, InstEmit.Smaxp_V, typeof(OpCodeSimdReg)); + SetA64("000011100x110000101010xxxxxxxxxx", InstName.Smaxv_V, InstEmit.Smaxv_V, typeof(OpCodeSimd)); + SetA64("01001110<<110000101010xxxxxxxxxx", InstName.Smaxv_V, InstEmit.Smaxv_V, typeof(OpCodeSimd)); + SetA64("0x001110<<1xxxxx011011xxxxxxxxxx", InstName.Smin_V, InstEmit.Smin_V, typeof(OpCodeSimdReg)); + SetA64("0x001110<<1xxxxx101011xxxxxxxxxx", InstName.Sminp_V, InstEmit.Sminp_V, typeof(OpCodeSimdReg)); + SetA64("000011100x110001101010xxxxxxxxxx", InstName.Sminv_V, InstEmit.Sminv_V, typeof(OpCodeSimd)); + SetA64("01001110<<110001101010xxxxxxxxxx", InstName.Sminv_V, InstEmit.Sminv_V, typeof(OpCodeSimd)); + SetA64("0x001110<<1xxxxx100000xxxxxxxxxx", InstName.Smlal_V, InstEmit.Smlal_V, typeof(OpCodeSimdReg)); + SetA64("0x001111xxxxxxxx0010x0xxxxxxxxxx", InstName.Smlal_Ve, InstEmit.Smlal_Ve, typeof(OpCodeSimdRegElem)); + SetA64("0x001110<<1xxxxx101000xxxxxxxxxx", InstName.Smlsl_V, InstEmit.Smlsl_V, typeof(OpCodeSimdReg)); + SetA64("0x001111xxxxxxxx0110x0xxxxxxxxxx", InstName.Smlsl_Ve, InstEmit.Smlsl_Ve, typeof(OpCodeSimdRegElem)); + SetA64("0x001110000xxxxx001011xxxxxxxxxx", InstName.Smov_S, InstEmit.Smov_S, typeof(OpCodeSimdIns)); + SetA64("0x001110<<1xxxxx110000xxxxxxxxxx", InstName.Smull_V, InstEmit.Smull_V, typeof(OpCodeSimdReg)); + SetA64("0x001111xxxxxxxx1010x0xxxxxxxxxx", InstName.Smull_Ve, InstEmit.Smull_Ve, typeof(OpCodeSimdRegElem)); + SetA64("01011110xx100000011110xxxxxxxxxx", InstName.Sqabs_S, InstEmit.Sqabs_S, typeof(OpCodeSimd)); + SetA64("0>001110<<100000011110xxxxxxxxxx", InstName.Sqabs_V, InstEmit.Sqabs_V, typeof(OpCodeSimd)); + SetA64("01011110xx1xxxxx000011xxxxxxxxxx", InstName.Sqadd_S, InstEmit.Sqadd_S, typeof(OpCodeSimdReg)); + SetA64("0>001110<<1xxxxx000011xxxxxxxxxx", InstName.Sqadd_V, InstEmit.Sqadd_V, typeof(OpCodeSimdReg)); + SetA64("01011110011xxxxx101101xxxxxxxxxx", InstName.Sqdmulh_S, InstEmit.Sqdmulh_S, typeof(OpCodeSimdReg)); + SetA64("01011110101xxxxx101101xxxxxxxxxx", InstName.Sqdmulh_S, InstEmit.Sqdmulh_S, typeof(OpCodeSimdReg)); + SetA64("0x001110011xxxxx101101xxxxxxxxxx", InstName.Sqdmulh_V, InstEmit.Sqdmulh_V, typeof(OpCodeSimdReg)); + SetA64("0x001110101xxxxx101101xxxxxxxxxx", InstName.Sqdmulh_V, InstEmit.Sqdmulh_V, typeof(OpCodeSimdReg)); + SetA64("01111110xx100000011110xxxxxxxxxx", InstName.Sqneg_S, InstEmit.Sqneg_S, typeof(OpCodeSimd)); + SetA64("0>101110<<100000011110xxxxxxxxxx", InstName.Sqneg_V, InstEmit.Sqneg_V, typeof(OpCodeSimd)); + SetA64("01111110011xxxxx101101xxxxxxxxxx", InstName.Sqrdmulh_S, InstEmit.Sqrdmulh_S, typeof(OpCodeSimdReg)); + SetA64("01111110101xxxxx101101xxxxxxxxxx", InstName.Sqrdmulh_S, InstEmit.Sqrdmulh_S, typeof(OpCodeSimdReg)); + SetA64("0x101110011xxxxx101101xxxxxxxxxx", InstName.Sqrdmulh_V, InstEmit.Sqrdmulh_V, typeof(OpCodeSimdReg)); + SetA64("0x101110101xxxxx101101xxxxxxxxxx", InstName.Sqrdmulh_V, InstEmit.Sqrdmulh_V, typeof(OpCodeSimdReg)); + SetA64("0>001110<<1xxxxx010111xxxxxxxxxx", InstName.Sqrshl_V, InstEmit.Sqrshl_V, typeof(OpCodeSimdReg)); + SetA64("0101111100>>>xxx100111xxxxxxxxxx", InstName.Sqrshrn_S, InstEmit.Sqrshrn_S, typeof(OpCodeSimdShImm)); + SetA64("0x00111100>>>xxx100111xxxxxxxxxx", InstName.Sqrshrn_V, InstEmit.Sqrshrn_V, typeof(OpCodeSimdShImm)); + SetA64("0111111100>>>xxx100011xxxxxxxxxx", InstName.Sqrshrun_S, InstEmit.Sqrshrun_S, typeof(OpCodeSimdShImm)); + SetA64("0x10111100>>>xxx100011xxxxxxxxxx", InstName.Sqrshrun_V, InstEmit.Sqrshrun_V, typeof(OpCodeSimdShImm)); + SetA64("0>001110<<1xxxxx010011xxxxxxxxxx", InstName.Sqshl_V, InstEmit.Sqshl_V, typeof(OpCodeSimdReg)); + SetA64("0101111100>>>xxx100101xxxxxxxxxx", InstName.Sqshrn_S, InstEmit.Sqshrn_S, typeof(OpCodeSimdShImm)); + SetA64("0x00111100>>>xxx100101xxxxxxxxxx", InstName.Sqshrn_V, InstEmit.Sqshrn_V, typeof(OpCodeSimdShImm)); + SetA64("0111111100>>>xxx100001xxxxxxxxxx", InstName.Sqshrun_S, InstEmit.Sqshrun_S, typeof(OpCodeSimdShImm)); + SetA64("0x10111100>>>xxx100001xxxxxxxxxx", InstName.Sqshrun_V, InstEmit.Sqshrun_V, typeof(OpCodeSimdShImm)); + SetA64("01011110xx1xxxxx001011xxxxxxxxxx", InstName.Sqsub_S, InstEmit.Sqsub_S, typeof(OpCodeSimdReg)); + SetA64("0>001110<<1xxxxx001011xxxxxxxxxx", InstName.Sqsub_V, InstEmit.Sqsub_V, typeof(OpCodeSimdReg)); + SetA64("01011110<<100001010010xxxxxxxxxx", InstName.Sqxtn_S, InstEmit.Sqxtn_S, typeof(OpCodeSimd)); + SetA64("0x001110<<100001010010xxxxxxxxxx", InstName.Sqxtn_V, InstEmit.Sqxtn_V, typeof(OpCodeSimd)); + SetA64("01111110<<100001001010xxxxxxxxxx", InstName.Sqxtun_S, InstEmit.Sqxtun_S, typeof(OpCodeSimd)); + SetA64("0x101110<<100001001010xxxxxxxxxx", InstName.Sqxtun_V, InstEmit.Sqxtun_V, typeof(OpCodeSimd)); + SetA64("0x001110<<1xxxxx000101xxxxxxxxxx", InstName.Srhadd_V, InstEmit.Srhadd_V, typeof(OpCodeSimdReg)); + SetA64("0>001110<<1xxxxx010101xxxxxxxxxx", InstName.Srshl_V, InstEmit.Srshl_V, typeof(OpCodeSimdReg)); + SetA64("0101111101xxxxxx001001xxxxxxxxxx", InstName.Srshr_S, InstEmit.Srshr_S, typeof(OpCodeSimdShImm)); + SetA64("0x00111100>>>xxx001001xxxxxxxxxx", InstName.Srshr_V, InstEmit.Srshr_V, typeof(OpCodeSimdShImm)); + SetA64("0100111101xxxxxx001001xxxxxxxxxx", InstName.Srshr_V, InstEmit.Srshr_V, typeof(OpCodeSimdShImm)); + SetA64("0101111101xxxxxx001101xxxxxxxxxx", InstName.Srsra_S, InstEmit.Srsra_S, typeof(OpCodeSimdShImm)); + SetA64("0x00111100>>>xxx001101xxxxxxxxxx", InstName.Srsra_V, InstEmit.Srsra_V, typeof(OpCodeSimdShImm)); + SetA64("0100111101xxxxxx001101xxxxxxxxxx", InstName.Srsra_V, InstEmit.Srsra_V, typeof(OpCodeSimdShImm)); + SetA64("0>001110<<1xxxxx010001xxxxxxxxxx", InstName.Sshl_V, InstEmit.Sshl_V, typeof(OpCodeSimdReg)); + SetA64("0x00111100>>>xxx101001xxxxxxxxxx", InstName.Sshll_V, InstEmit.Sshll_V, typeof(OpCodeSimdShImm)); + SetA64("0101111101xxxxxx000001xxxxxxxxxx", InstName.Sshr_S, InstEmit.Sshr_S, typeof(OpCodeSimdShImm)); + SetA64("0x00111100>>>xxx000001xxxxxxxxxx", InstName.Sshr_V, InstEmit.Sshr_V, typeof(OpCodeSimdShImm)); + SetA64("0100111101xxxxxx000001xxxxxxxxxx", InstName.Sshr_V, InstEmit.Sshr_V, typeof(OpCodeSimdShImm)); + SetA64("0101111101xxxxxx000101xxxxxxxxxx", InstName.Ssra_S, InstEmit.Ssra_S, typeof(OpCodeSimdShImm)); + SetA64("0x00111100>>>xxx000101xxxxxxxxxx", InstName.Ssra_V, InstEmit.Ssra_V, typeof(OpCodeSimdShImm)); + SetA64("0100111101xxxxxx000101xxxxxxxxxx", InstName.Ssra_V, InstEmit.Ssra_V, typeof(OpCodeSimdShImm)); + SetA64("0x001110<<1xxxxx001000xxxxxxxxxx", InstName.Ssubl_V, InstEmit.Ssubl_V, typeof(OpCodeSimdReg)); + SetA64("0x001110<<1xxxxx001100xxxxxxxxxx", InstName.Ssubw_V, InstEmit.Ssubw_V, typeof(OpCodeSimdReg)); + SetA64("0x00110000000000xxxxxxxxxxxxxxxx", InstName.St__Vms, InstEmit.St__Vms, typeof(OpCodeSimdMemMs)); + SetA64("0x001100100xxxxxxxxxxxxxxxxxxxxx", InstName.St__Vms, InstEmit.St__Vms, typeof(OpCodeSimdMemMs)); + SetA64("0x00110100x00000xxxxxxxxxxxxxxxx", InstName.St__Vss, InstEmit.St__Vss, typeof(OpCodeSimdMemSs)); + SetA64("0x00110110xxxxxxxxxxxxxxxxxxxxxx", InstName.St__Vss, InstEmit.St__Vss, typeof(OpCodeSimdMemSs)); + SetA64("xx10110xx0xxxxxxxxxxxxxxxxxxxxxx", InstName.Stp, InstEmit.Stp, typeof(OpCodeSimdMemPair)); + SetA64("xx111100x00xxxxxxxxx00xxxxxxxxxx", InstName.Str, InstEmit.Str, typeof(OpCodeSimdMemImm)); + SetA64("xx111100x00xxxxxxxxx01xxxxxxxxxx", InstName.Str, InstEmit.Str, typeof(OpCodeSimdMemImm)); + SetA64("xx111100x00xxxxxxxxx11xxxxxxxxxx", InstName.Str, InstEmit.Str, typeof(OpCodeSimdMemImm)); + SetA64("xx111101x0xxxxxxxxxxxxxxxxxxxxxx", InstName.Str, InstEmit.Str, typeof(OpCodeSimdMemImm)); + SetA64("xx111100x01xxxxxxxxx10xxxxxxxxxx", InstName.Str, InstEmit.Str, typeof(OpCodeSimdMemReg)); + SetA64("01111110111xxxxx100001xxxxxxxxxx", InstName.Sub_S, InstEmit.Sub_S, typeof(OpCodeSimdReg)); + SetA64("0>101110<<1xxxxx100001xxxxxxxxxx", InstName.Sub_V, InstEmit.Sub_V, typeof(OpCodeSimdReg)); + SetA64("0x001110<<1xxxxx011000xxxxxxxxxx", InstName.Subhn_V, InstEmit.Subhn_V, typeof(OpCodeSimdReg)); + SetA64("01011110xx100000001110xxxxxxxxxx", InstName.Suqadd_S, InstEmit.Suqadd_S, typeof(OpCodeSimd)); + SetA64("0>001110<<100000001110xxxxxxxxxx", InstName.Suqadd_V, InstEmit.Suqadd_V, typeof(OpCodeSimd)); + SetA64("0x001110000xxxxx0xx000xxxxxxxxxx", InstName.Tbl_V, InstEmit.Tbl_V, typeof(OpCodeSimdTbl)); + SetA64("0>001110<<0xxxxx001010xxxxxxxxxx", InstName.Trn1_V, InstEmit.Trn1_V, typeof(OpCodeSimdReg)); + SetA64("0>001110<<0xxxxx011010xxxxxxxxxx", InstName.Trn2_V, InstEmit.Trn2_V, typeof(OpCodeSimdReg)); + SetA64("0x101110<<1xxxxx011111xxxxxxxxxx", InstName.Uaba_V, InstEmit.Uaba_V, typeof(OpCodeSimdReg)); + SetA64("0x101110<<1xxxxx010100xxxxxxxxxx", InstName.Uabal_V, InstEmit.Uabal_V, typeof(OpCodeSimdReg)); + SetA64("0x101110<<1xxxxx011101xxxxxxxxxx", InstName.Uabd_V, InstEmit.Uabd_V, typeof(OpCodeSimdReg)); + SetA64("0x101110<<1xxxxx011100xxxxxxxxxx", InstName.Uabdl_V, InstEmit.Uabdl_V, typeof(OpCodeSimdReg)); + SetA64("0x101110<<100000011010xxxxxxxxxx", InstName.Uadalp_V, InstEmit.Uadalp_V, typeof(OpCodeSimd)); + SetA64("0x101110<<1xxxxx000000xxxxxxxxxx", InstName.Uaddl_V, InstEmit.Uaddl_V, typeof(OpCodeSimdReg)); + SetA64("0x101110<<100000001010xxxxxxxxxx", InstName.Uaddlp_V, InstEmit.Uaddlp_V, typeof(OpCodeSimd)); + SetA64("001011100x110000001110xxxxxxxxxx", InstName.Uaddlv_V, InstEmit.Uaddlv_V, typeof(OpCodeSimd)); + SetA64("01101110<<110000001110xxxxxxxxxx", InstName.Uaddlv_V, InstEmit.Uaddlv_V, typeof(OpCodeSimd)); + SetA64("0x101110<<1xxxxx000100xxxxxxxxxx", InstName.Uaddw_V, InstEmit.Uaddw_V, typeof(OpCodeSimdReg)); + SetA64("x00111100x100011000000xxxxxxxxxx", InstName.Ucvtf_Gp, InstEmit.Ucvtf_Gp, typeof(OpCodeSimdCvt)); + SetA64(">00111100x000011>xxxxxxxxxxxxxxx", InstName.Ucvtf_Gp_Fixed, InstEmit.Ucvtf_Gp_Fixed, typeof(OpCodeSimdCvt)); + SetA64("011111100x100001110110xxxxxxxxxx", InstName.Ucvtf_S, InstEmit.Ucvtf_S, typeof(OpCodeSimd)); + SetA64("0>1011100<100001110110xxxxxxxxxx", InstName.Ucvtf_V, InstEmit.Ucvtf_V, typeof(OpCodeSimd)); + SetA64("0x101111001xxxxx111001xxxxxxxxxx", InstName.Ucvtf_V_Fixed, InstEmit.Ucvtf_V_Fixed, typeof(OpCodeSimdShImm)); + SetA64("0110111101xxxxxx111001xxxxxxxxxx", InstName.Ucvtf_V_Fixed, InstEmit.Ucvtf_V_Fixed, typeof(OpCodeSimdShImm)); + SetA64("0x101110<<1xxxxx000001xxxxxxxxxx", InstName.Uhadd_V, InstEmit.Uhadd_V, typeof(OpCodeSimdReg)); + SetA64("0x101110<<1xxxxx001001xxxxxxxxxx", InstName.Uhsub_V, InstEmit.Uhsub_V, typeof(OpCodeSimdReg)); + SetA64("0x101110<<1xxxxx011001xxxxxxxxxx", InstName.Umax_V, InstEmit.Umax_V, typeof(OpCodeSimdReg)); + SetA64("0x101110<<1xxxxx101001xxxxxxxxxx", InstName.Umaxp_V, InstEmit.Umaxp_V, typeof(OpCodeSimdReg)); + SetA64("001011100x110000101010xxxxxxxxxx", InstName.Umaxv_V, InstEmit.Umaxv_V, typeof(OpCodeSimd)); + SetA64("01101110<<110000101010xxxxxxxxxx", InstName.Umaxv_V, InstEmit.Umaxv_V, typeof(OpCodeSimd)); + SetA64("0x101110<<1xxxxx011011xxxxxxxxxx", InstName.Umin_V, InstEmit.Umin_V, typeof(OpCodeSimdReg)); + SetA64("0x101110<<1xxxxx101011xxxxxxxxxx", InstName.Uminp_V, InstEmit.Uminp_V, typeof(OpCodeSimdReg)); + SetA64("001011100x110001101010xxxxxxxxxx", InstName.Uminv_V, InstEmit.Uminv_V, typeof(OpCodeSimd)); + SetA64("01101110<<110001101010xxxxxxxxxx", InstName.Uminv_V, InstEmit.Uminv_V, typeof(OpCodeSimd)); + SetA64("0x101110<<1xxxxx100000xxxxxxxxxx", InstName.Umlal_V, InstEmit.Umlal_V, typeof(OpCodeSimdReg)); + SetA64("0x101111xxxxxxxx0010x0xxxxxxxxxx", InstName.Umlal_Ve, InstEmit.Umlal_Ve, typeof(OpCodeSimdRegElem)); + SetA64("0x101110<<1xxxxx101000xxxxxxxxxx", InstName.Umlsl_V, InstEmit.Umlsl_V, typeof(OpCodeSimdReg)); + SetA64("0x101111xxxxxxxx0110x0xxxxxxxxxx", InstName.Umlsl_Ve, InstEmit.Umlsl_Ve, typeof(OpCodeSimdRegElem)); + SetA64("0x001110000xxxxx001111xxxxxxxxxx", InstName.Umov_S, InstEmit.Umov_S, typeof(OpCodeSimdIns)); + SetA64("0x101110<<1xxxxx110000xxxxxxxxxx", InstName.Umull_V, InstEmit.Umull_V, typeof(OpCodeSimdReg)); + SetA64("0x101111xxxxxxxx1010x0xxxxxxxxxx", InstName.Umull_Ve, InstEmit.Umull_Ve, typeof(OpCodeSimdRegElem)); + SetA64("01111110xx1xxxxx000011xxxxxxxxxx", InstName.Uqadd_S, InstEmit.Uqadd_S, typeof(OpCodeSimdReg)); + SetA64("0>101110<<1xxxxx000011xxxxxxxxxx", InstName.Uqadd_V, InstEmit.Uqadd_V, typeof(OpCodeSimdReg)); + SetA64("0>101110<<1xxxxx010111xxxxxxxxxx", InstName.Uqrshl_V, InstEmit.Uqrshl_V, typeof(OpCodeSimdReg)); + SetA64("0111111100>>>xxx100111xxxxxxxxxx", InstName.Uqrshrn_S, InstEmit.Uqrshrn_S, typeof(OpCodeSimdShImm)); + SetA64("0x10111100>>>xxx100111xxxxxxxxxx", InstName.Uqrshrn_V, InstEmit.Uqrshrn_V, typeof(OpCodeSimdShImm)); + SetA64("0>101110<<1xxxxx010011xxxxxxxxxx", InstName.Uqshl_V, InstEmit.Uqshl_V, typeof(OpCodeSimdReg)); + SetA64("0111111100>>>xxx100101xxxxxxxxxx", InstName.Uqshrn_S, InstEmit.Uqshrn_S, typeof(OpCodeSimdShImm)); + SetA64("0x10111100>>>xxx100101xxxxxxxxxx", InstName.Uqshrn_V, InstEmit.Uqshrn_V, typeof(OpCodeSimdShImm)); + SetA64("01111110xx1xxxxx001011xxxxxxxxxx", InstName.Uqsub_S, InstEmit.Uqsub_S, typeof(OpCodeSimdReg)); + SetA64("0>101110<<1xxxxx001011xxxxxxxxxx", InstName.Uqsub_V, InstEmit.Uqsub_V, typeof(OpCodeSimdReg)); + SetA64("01111110<<100001010010xxxxxxxxxx", InstName.Uqxtn_S, InstEmit.Uqxtn_S, typeof(OpCodeSimd)); + SetA64("0x101110<<100001010010xxxxxxxxxx", InstName.Uqxtn_V, InstEmit.Uqxtn_V, typeof(OpCodeSimd)); + SetA64("0x101110<<1xxxxx000101xxxxxxxxxx", InstName.Urhadd_V, InstEmit.Urhadd_V, typeof(OpCodeSimdReg)); + SetA64("0>101110<<1xxxxx010101xxxxxxxxxx", InstName.Urshl_V, InstEmit.Urshl_V, typeof(OpCodeSimdReg)); + SetA64("0111111101xxxxxx001001xxxxxxxxxx", InstName.Urshr_S, InstEmit.Urshr_S, typeof(OpCodeSimdShImm)); + SetA64("0x10111100>>>xxx001001xxxxxxxxxx", InstName.Urshr_V, InstEmit.Urshr_V, typeof(OpCodeSimdShImm)); + SetA64("0110111101xxxxxx001001xxxxxxxxxx", InstName.Urshr_V, InstEmit.Urshr_V, typeof(OpCodeSimdShImm)); + SetA64("0111111101xxxxxx001101xxxxxxxxxx", InstName.Ursra_S, InstEmit.Ursra_S, typeof(OpCodeSimdShImm)); + SetA64("0x10111100>>>xxx001101xxxxxxxxxx", InstName.Ursra_V, InstEmit.Ursra_V, typeof(OpCodeSimdShImm)); + SetA64("0110111101xxxxxx001101xxxxxxxxxx", InstName.Ursra_V, InstEmit.Ursra_V, typeof(OpCodeSimdShImm)); + SetA64("0>101110<<1xxxxx010001xxxxxxxxxx", InstName.Ushl_V, InstEmit.Ushl_V, typeof(OpCodeSimdReg)); + SetA64("0x10111100>>>xxx101001xxxxxxxxxx", InstName.Ushll_V, InstEmit.Ushll_V, typeof(OpCodeSimdShImm)); + SetA64("0111111101xxxxxx000001xxxxxxxxxx", InstName.Ushr_S, InstEmit.Ushr_S, typeof(OpCodeSimdShImm)); + SetA64("0x10111100>>>xxx000001xxxxxxxxxx", InstName.Ushr_V, InstEmit.Ushr_V, typeof(OpCodeSimdShImm)); + SetA64("0110111101xxxxxx000001xxxxxxxxxx", InstName.Ushr_V, InstEmit.Ushr_V, typeof(OpCodeSimdShImm)); + SetA64("01111110xx100000001110xxxxxxxxxx", InstName.Usqadd_S, InstEmit.Usqadd_S, typeof(OpCodeSimd)); + SetA64("0>101110<<100000001110xxxxxxxxxx", InstName.Usqadd_V, InstEmit.Usqadd_V, typeof(OpCodeSimd)); + SetA64("0111111101xxxxxx000101xxxxxxxxxx", InstName.Usra_S, InstEmit.Usra_S, typeof(OpCodeSimdShImm)); + SetA64("0x10111100>>>xxx000101xxxxxxxxxx", InstName.Usra_V, InstEmit.Usra_V, typeof(OpCodeSimdShImm)); + SetA64("0110111101xxxxxx000101xxxxxxxxxx", InstName.Usra_V, InstEmit.Usra_V, typeof(OpCodeSimdShImm)); + SetA64("0x101110<<1xxxxx001000xxxxxxxxxx", InstName.Usubl_V, InstEmit.Usubl_V, typeof(OpCodeSimdReg)); + SetA64("0x101110<<1xxxxx001100xxxxxxxxxx", InstName.Usubw_V, InstEmit.Usubw_V, typeof(OpCodeSimdReg)); + SetA64("0>001110<<0xxxxx000110xxxxxxxxxx", InstName.Uzp1_V, InstEmit.Uzp1_V, typeof(OpCodeSimdReg)); + SetA64("0>001110<<0xxxxx010110xxxxxxxxxx", InstName.Uzp2_V, InstEmit.Uzp2_V, typeof(OpCodeSimdReg)); + SetA64("0x001110<<100001001010xxxxxxxxxx", InstName.Xtn_V, InstEmit.Xtn_V, typeof(OpCodeSimd)); + SetA64("0>001110<<0xxxxx001110xxxxxxxxxx", InstName.Zip1_V, InstEmit.Zip1_V, typeof(OpCodeSimdReg)); + SetA64("0>001110<<0xxxxx011110xxxxxxxxxx", InstName.Zip2_V, InstEmit.Zip2_V, typeof(OpCodeSimdReg)); +#endregion + +#region "OpCode Table (AArch32)" + // Base + SetA32("<<<<0010100xxxxxxxxxxxxxxxxxxxxx", InstName.Add, InstEmit32.Add, typeof(OpCode32AluImm)); + SetA32("<<<<0000100xxxxxxxxxxxxxxxx0xxxx", InstName.Add, InstEmit32.Add, typeof(OpCode32AluRsImm)); + SetA32("<<<<1010xxxxxxxxxxxxxxxxxxxxxxxx", InstName.B, InstEmit32.B, typeof(OpCode32BImm)); + SetA32("<<<<1011xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bl, InstEmit32.Bl, typeof(OpCode32BImm)); + SetA32("1111101xxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Blx, InstEmit32.Blx, typeof(OpCode32BImm)); + SetA32("<<<<000100101111111111110001xxxx", InstName.Bx, InstEmit32.Bx, typeof(OpCode32BReg)); + SetT32("xxxxxxxxxxxxxxxx010001110xxxx000", InstName.Bx, InstEmit32.Bx, typeof(OpCodeT16BReg)); + SetA32("<<<<00110101xxxx0000xxxxxxxxxxxx", InstName.Cmp, InstEmit32.Cmp, typeof(OpCode32AluImm)); + SetA32("<<<<00010101xxxx0000xxxxxxx0xxxx", InstName.Cmp, InstEmit32.Cmp, typeof(OpCode32AluRsImm)); + SetA32("<<<<100xx0x1xxxxxxxxxxxxxxxxxxxx", InstName.Ldm, InstEmit32.Ldm, typeof(OpCode32MemMult)); + SetA32("<<<<010xx0x1xxxxxxxxxxxxxxxxxxxx", InstName.Ldr, InstEmit32.Ldr, typeof(OpCode32MemImm)); + SetA32("<<<<010xx1x1xxxxxxxxxxxxxxxxxxxx", InstName.Ldrb, InstEmit32.Ldrb, typeof(OpCode32MemImm)); + SetA32("<<<<000xx1x0xxxxxxxxxxxx1101xxxx", InstName.Ldrd, InstEmit32.Ldrd, typeof(OpCode32MemImm8)); + SetA32("<<<<000xx1x1xxxxxxxxxxxx1011xxxx", InstName.Ldrh, InstEmit32.Ldrh, typeof(OpCode32MemImm8)); + SetA32("<<<<000xx1x1xxxxxxxxxxxx1101xxxx", InstName.Ldrsb, InstEmit32.Ldrsb, typeof(OpCode32MemImm8)); + SetA32("<<<<000xx1x1xxxxxxxxxxxx1111xxxx", InstName.Ldrsh, InstEmit32.Ldrsh, typeof(OpCode32MemImm8)); + SetA32("<<<<0011101x0000xxxxxxxxxxxxxxxx", InstName.Mov, InstEmit32.Mov, typeof(OpCode32AluImm)); + SetA32("<<<<0001101x0000xxxxxxxxxxx0xxxx", InstName.Mov, InstEmit32.Mov, typeof(OpCode32AluRsImm)); + SetT32("xxxxxxxxxxxxxxxx00100xxxxxxxxxxx", InstName.Mov, InstEmit32.Mov, typeof(OpCodeT16AluImm8)); + SetA32("<<<<100xx0x0xxxxxxxxxxxxxxxxxxxx", InstName.Stm, InstEmit32.Stm, typeof(OpCode32MemMult)); + SetA32("<<<<010xx0x0xxxxxxxxxxxxxxxxxxxx", InstName.Str, InstEmit32.Str, typeof(OpCode32MemImm)); + SetA32("<<<<010xx1x0xxxxxxxxxxxxxxxxxxxx", InstName.Strb, InstEmit32.Strb, typeof(OpCode32MemImm)); + SetA32("<<<<000xx1x0xxxxxxxxxxxx1111xxxx", InstName.Strd, InstEmit32.Strd, typeof(OpCode32MemImm8)); + SetA32("<<<<000xx1x0xxxxxxxxxxxx1011xxxx", InstName.Strh, InstEmit32.Strh, typeof(OpCode32MemImm8)); + SetA32("<<<<0010010xxxxxxxxxxxxxxxxxxxxx", InstName.Sub, InstEmit32.Sub, typeof(OpCode32AluImm)); + SetA32("<<<<0000010xxxxxxxxxxxxxxxx0xxxx", InstName.Sub, InstEmit32.Sub, typeof(OpCode32AluRsImm)); +#endregion + + FillFastLookupTable(_instA32FastLookup, _allInstA32); + FillFastLookupTable(_instT32FastLookup, _allInstT32); + FillFastLookupTable(_instA64FastLookup, _allInstA64); + } + + private static void FillFastLookupTable(InstInfo[][] table, List allInsts) + { + List[] temp = new List[FastLookupSize]; + + for (int index = 0; index < FastLookupSize; index++) + { + temp[index] = new List(); + } + + foreach (InstInfo inst in allInsts) + { + int mask = ToFastLookupIndex(inst.Mask); + int value = ToFastLookupIndex(inst.Value); + + for (int index = 0; index < FastLookupSize; index++) + { + if ((index & mask) == value) + { + temp[index].Add(inst); + } + } + } + + for (int index = 0; index < FastLookupSize; index++) + { + table[index] = temp[index].ToArray(); + } + } + + private static void SetA32(string encoding, InstName name, InstEmitter emitter, Type type) + { + Set(encoding, ExecutionMode.Aarch32Arm, new InstDescriptor(name, emitter), type); + } + + private static void SetT32(string encoding, InstName name, InstEmitter emitter, Type type) + { + Set(encoding, ExecutionMode.Aarch32Thumb, new InstDescriptor(name, emitter), type); + } + + private static void SetA64(string encoding, InstName name, InstEmitter emitter, Type type) + { + Set(encoding, ExecutionMode.Aarch64, new InstDescriptor(name, emitter), type); + } + + private static void Set(string encoding, ExecutionMode mode, InstDescriptor inst, Type type) + { + int bit = encoding.Length - 1; + int value = 0; + int xMask = 0; + int xBits = 0; + + int[] xPos = new int[encoding.Length]; + + int blacklisted = 0; + + for (int index = 0; index < encoding.Length; index++, bit--) + { + // Note: < and > are used on special encodings. + // The < means that we should never have ALL bits with the '<' set. + // So, when the encoding has <<, it means that 00, 01, and 10 are valid, + // but not 11. <<< is 000, 001, ..., 110 but NOT 111, and so on... + // For >, the invalid value is zero. So, for >> 01, 10 and 11 are valid, + // but 00 isn't. + char chr = encoding[index]; + + if (chr == '1') + { + value |= 1 << bit; + } + else if (chr == 'x') + { + xMask |= 1 << bit; + } + else if (chr == '>') + { + xPos[xBits++] = bit; + } + else if (chr == '<') + { + xPos[xBits++] = bit; + + blacklisted |= 1 << bit; + } + else if (chr != '0') + { + throw new ArgumentException(nameof(encoding)); + } + } + + xMask = ~xMask; + + if (xBits == 0) + { + InsertInst(new InstInfo(xMask, value, inst, type), mode); + + return; + } + + for (int index = 0; index < (1 << xBits); index++) + { + int mask = 0; + + for (int x = 0; x < xBits; x++) + { + mask |= ((index >> x) & 1) << xPos[x]; + } + + if (mask != blacklisted) + { + InsertInst(new InstInfo(xMask, value | mask, inst, type), mode); + } + } + } + + private static void InsertInst(InstInfo info, ExecutionMode mode) + { + switch (mode) + { + case ExecutionMode.Aarch32Arm: _allInstA32.Add(info); break; + case ExecutionMode.Aarch32Thumb: _allInstT32.Add(info); break; + case ExecutionMode.Aarch64: _allInstA64.Add(info); break; + } + } + + public static (InstDescriptor inst, Type type) GetInstA32(int opCode) + { + return GetInstFromList(_instA32FastLookup[ToFastLookupIndex(opCode)], opCode); + } + + public static (InstDescriptor inst, Type type) GetInstT32(int opCode) + { + return GetInstFromList(_instT32FastLookup[ToFastLookupIndex(opCode)], opCode); + } + + public static (InstDescriptor inst, Type type) GetInstA64(int opCode) + { + return GetInstFromList(_instA64FastLookup[ToFastLookupIndex(opCode)], opCode); + } + + private static (InstDescriptor inst, Type type) GetInstFromList(InstInfo[] insts, int opCode) + { + foreach (InstInfo info in insts) + { + if ((opCode & info.Mask) == info.Value) + { + return (info.Inst, info.Type); + } + } + + return (new InstDescriptor(InstName.Und, InstEmit.Und), typeof(OpCode)); + } + + private static int ToFastLookupIndex(int value) + { + return ((value >> 10) & 0x00F) | ((value >> 18) & 0xFF0); + } + } +} diff --git a/ARMeilleure/Decoders/RegisterSize.cs b/ARMeilleure/Decoders/RegisterSize.cs new file mode 100644 index 000000000..c9cea03ed --- /dev/null +++ b/ARMeilleure/Decoders/RegisterSize.cs @@ -0,0 +1,10 @@ +namespace ARMeilleure.Decoders +{ + enum RegisterSize + { + Int32, + Int64, + Simd64, + Simd128 + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/ShiftType.cs b/ARMeilleure/Decoders/ShiftType.cs new file mode 100644 index 000000000..8583f16ad --- /dev/null +++ b/ARMeilleure/Decoders/ShiftType.cs @@ -0,0 +1,10 @@ +namespace ARMeilleure.Decoders +{ + enum ShiftType + { + Lsl = 0, + Lsr = 1, + Asr = 2, + Ror = 3 + } +} \ No newline at end of file diff --git a/ARMeilleure/Diagnostics/IRDumper.cs b/ARMeilleure/Diagnostics/IRDumper.cs new file mode 100644 index 000000000..55d5b493e --- /dev/null +++ b/ARMeilleure/Diagnostics/IRDumper.cs @@ -0,0 +1,168 @@ +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Collections.Generic; +using System.Text; + +namespace ARMeilleure.Diagnostics +{ + static class IRDumper + { + private const string Indentation = " "; + + public static string GetDump(ControlFlowGraph cfg) + { + StringBuilder sb = new StringBuilder(); + + Dictionary localNames = new Dictionary(); + + string indentation = string.Empty; + + void IncreaseIndentation() + { + indentation += Indentation; + } + + void DecreaseIndentation() + { + indentation = indentation.Substring(0, indentation.Length - Indentation.Length); + } + + void AppendLine(string text) + { + sb.AppendLine(indentation + text); + } + + IncreaseIndentation(); + + foreach (BasicBlock block in cfg.Blocks) + { + string blockName = GetBlockName(block); + + if (block.Next != null) + { + blockName += $" (next {GetBlockName(block.Next)})"; + } + + if (block.Branch != null) + { + blockName += $" (branch {GetBlockName(block.Branch)})"; + } + + blockName += ":"; + + AppendLine(blockName); + + IncreaseIndentation(); + + foreach (Node node in block.Operations) + { + string[] sources = new string[node.SourcesCount]; + + string instName = string.Empty; + + if (node is PhiNode phi) + { + for (int index = 0; index < sources.Length; index++) + { + string phiBlockName = GetBlockName(phi.GetBlock(index)); + + string operName = GetOperandName(phi.GetSource(index), localNames); + + sources[index] = $"({phiBlockName}: {operName})"; + } + + instName = "Phi"; + } + else if (node is Operation operation) + { + for (int index = 0; index < sources.Length; index++) + { + sources[index] = GetOperandName(operation.GetSource(index), localNames); + } + + instName = operation.Instruction.ToString(); + } + + string allSources = string.Join(", ", sources); + + string line = instName + " " + allSources; + + if (node.Destination != null) + { + line = GetOperandName(node.Destination, localNames) + " = " + line; + } + + AppendLine(line); + } + + DecreaseIndentation(); + } + + return sb.ToString(); + } + + private static string GetBlockName(BasicBlock block) + { + return $"block{block.Index}"; + } + + private static string GetOperandName(Operand operand, Dictionary localNames) + { + if (operand == null) + { + return ""; + } + + string name = string.Empty; + + if (operand.Kind == OperandKind.LocalVariable) + { + if (!localNames.TryGetValue(operand, out string localName)) + { + localName = "%" + localNames.Count; + + localNames.Add(operand, localName); + } + + name = localName; + } + else if (operand.Kind == OperandKind.Register) + { + Register reg = operand.GetRegister(); + + switch (reg.Type) + { + case RegisterType.Flag: name = "b" + reg.Index; break; + case RegisterType.Integer: name = "r" + reg.Index; break; + case RegisterType.Vector: name = "v" + reg.Index; break; + } + } + else if (operand.Kind == OperandKind.Constant) + { + name = "0x" + operand.Value.ToString("X"); + } + else + { + name = operand.Kind.ToString().ToLower(); + } + + return GetTypeName(operand.Type) + " " + name; + } + + private static string GetTypeName(OperandType type) + { + switch (type) + { + case OperandType.FP32: return "f32"; + case OperandType.FP64: return "f64"; + case OperandType.I32: return "i32"; + case OperandType.I64: return "i64"; + case OperandType.None: return "none"; + case OperandType.V128: return "v128"; + } + + throw new ArgumentException($"Invalid operand type \"{type}\"."); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Diagnostics/Logger.cs b/ARMeilleure/Diagnostics/Logger.cs new file mode 100644 index 000000000..29d9c79b9 --- /dev/null +++ b/ARMeilleure/Diagnostics/Logger.cs @@ -0,0 +1,59 @@ +using ARMeilleure.Translation; +using System; +using System.Diagnostics; + +namespace ARMeilleure.Diagnostics +{ + static class Logger + { + private static long _startTime; + + private static long[] _accumulatedTime; + + static Logger() + { + _accumulatedTime = new long[(int)PassName.Count]; + } + + public static void StartPass(PassName name) + { +#if M_DEBUG + WriteOutput(name + " pass started..."); + + _startTime = Stopwatch.GetTimestamp(); +#endif + } + + public static void EndPass(PassName name, ControlFlowGraph cfg) + { +#if M_DEBUG + EndPass(name); + + WriteOutput("IR after " + name + " pass:"); + + WriteOutput(IRDumper.GetDump(cfg)); +#endif + } + + public static void EndPass(PassName name) + { +#if M_DEBUG + long elapsedTime = Stopwatch.GetTimestamp() - _startTime; + + _accumulatedTime[(int)name] += elapsedTime; + + WriteOutput($"{name} pass ended after {GetMilliseconds(_accumulatedTime[(int)name])} ms..."); +#endif + } + + private static long GetMilliseconds(long ticks) + { + return (long)(((double)ticks / Stopwatch.Frequency) * 1000); + } + + private static void WriteOutput(string text) + { + Console.WriteLine(text); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Diagnostics/PassName.cs b/ARMeilleure/Diagnostics/PassName.cs new file mode 100644 index 000000000..e37439855 --- /dev/null +++ b/ARMeilleure/Diagnostics/PassName.cs @@ -0,0 +1,17 @@ +namespace ARMeilleure.Diagnostics +{ + enum PassName + { + Decoding, + Translation, + RegisterUsage, + Dominance, + SsaConstruction, + Optimization, + PreAllocation, + RegisterAllocation, + CodeGeneration, + + Count + } +} \ No newline at end of file diff --git a/ARMeilleure/Instructions/CryptoHelper.cs b/ARMeilleure/Instructions/CryptoHelper.cs new file mode 100644 index 000000000..b6b4a62d3 --- /dev/null +++ b/ARMeilleure/Instructions/CryptoHelper.cs @@ -0,0 +1,279 @@ +// https://www.intel.com/content/dam/doc/white-paper/advanced-encryption-standard-new-instructions-set-paper.pdf + +using ARMeilleure.State; + +namespace ARMeilleure.Instructions +{ + static class CryptoHelper + { +#region "LookUp Tables" + private static readonly byte[] _sBox = new byte[] + { + 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, + 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, + 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, + 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, + 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, + 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, + 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, + 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, + 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, + 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, + 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, + 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, + 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, + 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, + 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, + 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 + }; + + private static readonly byte[] _invSBox = new byte[] + { + 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, + 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, + 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, + 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, + 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, + 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, + 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, + 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, + 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, + 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, + 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, + 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, + 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, + 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, + 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, + 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d + }; + + private static readonly byte[] _gfMul02 = new byte[] + { + 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e, + 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e, + 0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e, + 0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e, + 0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e, + 0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe, + 0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde, + 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe, + 0x1b, 0x19, 0x1f, 0x1d, 0x13, 0x11, 0x17, 0x15, 0x0b, 0x09, 0x0f, 0x0d, 0x03, 0x01, 0x07, 0x05, + 0x3b, 0x39, 0x3f, 0x3d, 0x33, 0x31, 0x37, 0x35, 0x2b, 0x29, 0x2f, 0x2d, 0x23, 0x21, 0x27, 0x25, + 0x5b, 0x59, 0x5f, 0x5d, 0x53, 0x51, 0x57, 0x55, 0x4b, 0x49, 0x4f, 0x4d, 0x43, 0x41, 0x47, 0x45, + 0x7b, 0x79, 0x7f, 0x7d, 0x73, 0x71, 0x77, 0x75, 0x6b, 0x69, 0x6f, 0x6d, 0x63, 0x61, 0x67, 0x65, + 0x9b, 0x99, 0x9f, 0x9d, 0x93, 0x91, 0x97, 0x95, 0x8b, 0x89, 0x8f, 0x8d, 0x83, 0x81, 0x87, 0x85, + 0xbb, 0xb9, 0xbf, 0xbd, 0xb3, 0xb1, 0xb7, 0xb5, 0xab, 0xa9, 0xaf, 0xad, 0xa3, 0xa1, 0xa7, 0xa5, + 0xdb, 0xd9, 0xdf, 0xdd, 0xd3, 0xd1, 0xd7, 0xd5, 0xcb, 0xc9, 0xcf, 0xcd, 0xc3, 0xc1, 0xc7, 0xc5, + 0xfb, 0xf9, 0xff, 0xfd, 0xf3, 0xf1, 0xf7, 0xf5, 0xeb, 0xe9, 0xef, 0xed, 0xe3, 0xe1, 0xe7, 0xe5 + }; + + private static readonly byte[] _gfMul03 = new byte[] + { + 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11, + 0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21, + 0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71, + 0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, 0x42, 0x41, + 0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9, 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1, + 0xf0, 0xf3, 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1, + 0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1, + 0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81, + 0x9b, 0x98, 0x9d, 0x9e, 0x97, 0x94, 0x91, 0x92, 0x83, 0x80, 0x85, 0x86, 0x8f, 0x8c, 0x89, 0x8a, + 0xab, 0xa8, 0xad, 0xae, 0xa7, 0xa4, 0xa1, 0xa2, 0xb3, 0xb0, 0xb5, 0xb6, 0xbf, 0xbc, 0xb9, 0xba, + 0xfb, 0xf8, 0xfd, 0xfe, 0xf7, 0xf4, 0xf1, 0xf2, 0xe3, 0xe0, 0xe5, 0xe6, 0xef, 0xec, 0xe9, 0xea, + 0xcb, 0xc8, 0xcd, 0xce, 0xc7, 0xc4, 0xc1, 0xc2, 0xd3, 0xd0, 0xd5, 0xd6, 0xdf, 0xdc, 0xd9, 0xda, + 0x5b, 0x58, 0x5d, 0x5e, 0x57, 0x54, 0x51, 0x52, 0x43, 0x40, 0x45, 0x46, 0x4f, 0x4c, 0x49, 0x4a, + 0x6b, 0x68, 0x6d, 0x6e, 0x67, 0x64, 0x61, 0x62, 0x73, 0x70, 0x75, 0x76, 0x7f, 0x7c, 0x79, 0x7a, + 0x3b, 0x38, 0x3d, 0x3e, 0x37, 0x34, 0x31, 0x32, 0x23, 0x20, 0x25, 0x26, 0x2f, 0x2c, 0x29, 0x2a, + 0x0b, 0x08, 0x0d, 0x0e, 0x07, 0x04, 0x01, 0x02, 0x13, 0x10, 0x15, 0x16, 0x1f, 0x1c, 0x19, 0x1a + }; + + private static readonly byte[] _gfMul09 = new byte[] + { + 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77, + 0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7, + 0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04, 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c, + 0xab, 0xa2, 0xb9, 0xb0, 0x8f, 0x86, 0x9d, 0x94, 0xe3, 0xea, 0xf1, 0xf8, 0xc7, 0xce, 0xd5, 0xdc, + 0x76, 0x7f, 0x64, 0x6d, 0x52, 0x5b, 0x40, 0x49, 0x3e, 0x37, 0x2c, 0x25, 0x1a, 0x13, 0x08, 0x01, + 0xe6, 0xef, 0xf4, 0xfd, 0xc2, 0xcb, 0xd0, 0xd9, 0xae, 0xa7, 0xbc, 0xb5, 0x8a, 0x83, 0x98, 0x91, + 0x4d, 0x44, 0x5f, 0x56, 0x69, 0x60, 0x7b, 0x72, 0x05, 0x0c, 0x17, 0x1e, 0x21, 0x28, 0x33, 0x3a, + 0xdd, 0xd4, 0xcf, 0xc6, 0xf9, 0xf0, 0xeb, 0xe2, 0x95, 0x9c, 0x87, 0x8e, 0xb1, 0xb8, 0xa3, 0xaa, + 0xec, 0xe5, 0xfe, 0xf7, 0xc8, 0xc1, 0xda, 0xd3, 0xa4, 0xad, 0xb6, 0xbf, 0x80, 0x89, 0x92, 0x9b, + 0x7c, 0x75, 0x6e, 0x67, 0x58, 0x51, 0x4a, 0x43, 0x34, 0x3d, 0x26, 0x2f, 0x10, 0x19, 0x02, 0x0b, + 0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0, + 0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30, + 0x9a, 0x93, 0x88, 0x81, 0xbe, 0xb7, 0xac, 0xa5, 0xd2, 0xdb, 0xc0, 0xc9, 0xf6, 0xff, 0xe4, 0xed, + 0x0a, 0x03, 0x18, 0x11, 0x2e, 0x27, 0x3c, 0x35, 0x42, 0x4b, 0x50, 0x59, 0x66, 0x6f, 0x74, 0x7d, + 0xa1, 0xa8, 0xb3, 0xba, 0x85, 0x8c, 0x97, 0x9e, 0xe9, 0xe0, 0xfb, 0xf2, 0xcd, 0xc4, 0xdf, 0xd6, + 0x31, 0x38, 0x23, 0x2a, 0x15, 0x1c, 0x07, 0x0e, 0x79, 0x70, 0x6b, 0x62, 0x5d, 0x54, 0x4f, 0x46 + }; + + private static readonly byte[] _gfMul0B = new byte[] + { + 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69, + 0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9, + 0x7b, 0x70, 0x6d, 0x66, 0x57, 0x5c, 0x41, 0x4a, 0x23, 0x28, 0x35, 0x3e, 0x0f, 0x04, 0x19, 0x12, + 0xcb, 0xc0, 0xdd, 0xd6, 0xe7, 0xec, 0xf1, 0xfa, 0x93, 0x98, 0x85, 0x8e, 0xbf, 0xb4, 0xa9, 0xa2, + 0xf6, 0xfd, 0xe0, 0xeb, 0xda, 0xd1, 0xcc, 0xc7, 0xae, 0xa5, 0xb8, 0xb3, 0x82, 0x89, 0x94, 0x9f, + 0x46, 0x4d, 0x50, 0x5b, 0x6a, 0x61, 0x7c, 0x77, 0x1e, 0x15, 0x08, 0x03, 0x32, 0x39, 0x24, 0x2f, + 0x8d, 0x86, 0x9b, 0x90, 0xa1, 0xaa, 0xb7, 0xbc, 0xd5, 0xde, 0xc3, 0xc8, 0xf9, 0xf2, 0xef, 0xe4, + 0x3d, 0x36, 0x2b, 0x20, 0x11, 0x1a, 0x07, 0x0c, 0x65, 0x6e, 0x73, 0x78, 0x49, 0x42, 0x5f, 0x54, + 0xf7, 0xfc, 0xe1, 0xea, 0xdb, 0xd0, 0xcd, 0xc6, 0xaf, 0xa4, 0xb9, 0xb2, 0x83, 0x88, 0x95, 0x9e, + 0x47, 0x4c, 0x51, 0x5a, 0x6b, 0x60, 0x7d, 0x76, 0x1f, 0x14, 0x09, 0x02, 0x33, 0x38, 0x25, 0x2e, + 0x8c, 0x87, 0x9a, 0x91, 0xa0, 0xab, 0xb6, 0xbd, 0xd4, 0xdf, 0xc2, 0xc9, 0xf8, 0xf3, 0xee, 0xe5, + 0x3c, 0x37, 0x2a, 0x21, 0x10, 0x1b, 0x06, 0x0d, 0x64, 0x6f, 0x72, 0x79, 0x48, 0x43, 0x5e, 0x55, + 0x01, 0x0a, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68, + 0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8, + 0x7a, 0x71, 0x6c, 0x67, 0x56, 0x5d, 0x40, 0x4b, 0x22, 0x29, 0x34, 0x3f, 0x0e, 0x05, 0x18, 0x13, + 0xca, 0xc1, 0xdc, 0xd7, 0xe6, 0xed, 0xf0, 0xfb, 0x92, 0x99, 0x84, 0x8f, 0xbe, 0xb5, 0xa8, 0xa3 + }; + + private static readonly byte[] _gfMul0D = new byte[] + { + 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b, + 0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b, + 0xbb, 0xb6, 0xa1, 0xac, 0x8f, 0x82, 0x95, 0x98, 0xd3, 0xde, 0xc9, 0xc4, 0xe7, 0xea, 0xfd, 0xf0, + 0x6b, 0x66, 0x71, 0x7c, 0x5f, 0x52, 0x45, 0x48, 0x03, 0x0e, 0x19, 0x14, 0x37, 0x3a, 0x2d, 0x20, + 0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e, 0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26, + 0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6, + 0xd6, 0xdb, 0xcc, 0xc1, 0xe2, 0xef, 0xf8, 0xf5, 0xbe, 0xb3, 0xa4, 0xa9, 0x8a, 0x87, 0x90, 0x9d, + 0x06, 0x0b, 0x1c, 0x11, 0x32, 0x3f, 0x28, 0x25, 0x6e, 0x63, 0x74, 0x79, 0x5a, 0x57, 0x40, 0x4d, + 0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91, + 0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41, + 0x61, 0x6c, 0x7b, 0x76, 0x55, 0x58, 0x4f, 0x42, 0x09, 0x04, 0x13, 0x1e, 0x3d, 0x30, 0x27, 0x2a, + 0xb1, 0xbc, 0xab, 0xa6, 0x85, 0x88, 0x9f, 0x92, 0xd9, 0xd4, 0xc3, 0xce, 0xed, 0xe0, 0xf7, 0xfa, + 0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, 0xeb, 0xe6, 0xf1, 0xfc, + 0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44, 0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c, + 0x0c, 0x01, 0x16, 0x1b, 0x38, 0x35, 0x22, 0x2f, 0x64, 0x69, 0x7e, 0x73, 0x50, 0x5d, 0x4a, 0x47, + 0xdc, 0xd1, 0xc6, 0xcb, 0xe8, 0xe5, 0xf2, 0xff, 0xb4, 0xb9, 0xae, 0xa3, 0x80, 0x8d, 0x9a, 0x97 + }; + + private static readonly byte[] _gfMul0E = new byte[] + { + 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a, + 0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba, + 0xdb, 0xd5, 0xc7, 0xc9, 0xe3, 0xed, 0xff, 0xf1, 0xab, 0xa5, 0xb7, 0xb9, 0x93, 0x9d, 0x8f, 0x81, + 0x3b, 0x35, 0x27, 0x29, 0x03, 0x0d, 0x1f, 0x11, 0x4b, 0x45, 0x57, 0x59, 0x73, 0x7d, 0x6f, 0x61, + 0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87, 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7, + 0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17, + 0x76, 0x78, 0x6a, 0x64, 0x4e, 0x40, 0x52, 0x5c, 0x06, 0x08, 0x1a, 0x14, 0x3e, 0x30, 0x22, 0x2c, + 0x96, 0x98, 0x8a, 0x84, 0xae, 0xa0, 0xb2, 0xbc, 0xe6, 0xe8, 0xfa, 0xf4, 0xde, 0xd0, 0xc2, 0xcc, + 0x41, 0x4f, 0x5d, 0x53, 0x79, 0x77, 0x65, 0x6b, 0x31, 0x3f, 0x2d, 0x23, 0x09, 0x07, 0x15, 0x1b, + 0xa1, 0xaf, 0xbd, 0xb3, 0x99, 0x97, 0x85, 0x8b, 0xd1, 0xdf, 0xcd, 0xc3, 0xe9, 0xe7, 0xf5, 0xfb, + 0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0, + 0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20, + 0xec, 0xe2, 0xf0, 0xfe, 0xd4, 0xda, 0xc8, 0xc6, 0x9c, 0x92, 0x80, 0x8e, 0xa4, 0xaa, 0xb8, 0xb6, + 0x0c, 0x02, 0x10, 0x1e, 0x34, 0x3a, 0x28, 0x26, 0x7c, 0x72, 0x60, 0x6e, 0x44, 0x4a, 0x58, 0x56, + 0x37, 0x39, 0x2b, 0x25, 0x0f, 0x01, 0x13, 0x1d, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d, + 0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d + }; + + private static readonly byte[] _srPerm = new byte[] + { + 0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3 + }; + + private static readonly byte[] _isrPerm = new byte[] + { + 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11 + }; +#endregion + + public static V128 AesInvMixColumns(V128 op) + { + byte[] inState = op.ToArray(); + byte[] outState = new byte[16]; + + for (int columns = 0; columns <= 3; columns++) + { + int idx = columns << 2; + + byte row0 = inState[idx + 0]; // A, E, I, M: [row0, col0-col3] + byte row1 = inState[idx + 1]; // B, F, J, N: [row1, col0-col3] + byte row2 = inState[idx + 2]; // C, G, K, O: [row2, col0-col3] + byte row3 = inState[idx + 3]; // D, H, L, P: [row3, col0-col3] + + outState[idx + 0] = (byte)((uint)_gfMul0E[row0] ^ _gfMul0B[row1] ^ _gfMul0D[row2] ^ _gfMul09[row3]); + outState[idx + 1] = (byte)((uint)_gfMul09[row0] ^ _gfMul0E[row1] ^ _gfMul0B[row2] ^ _gfMul0D[row3]); + outState[idx + 2] = (byte)((uint)_gfMul0D[row0] ^ _gfMul09[row1] ^ _gfMul0E[row2] ^ _gfMul0B[row3]); + outState[idx + 3] = (byte)((uint)_gfMul0B[row0] ^ _gfMul0D[row1] ^ _gfMul09[row2] ^ _gfMul0E[row3]); + } + + return new V128(outState); + } + + public static V128 AesInvShiftRows(V128 op) + { + byte[] inState = op.ToArray(); + byte[] outState = new byte[16]; + + for (int idx = 0; idx <= 15; idx++) + { + outState[_isrPerm[idx]] = inState[idx]; + } + + return new V128(outState); + } + + public static V128 AesInvSubBytes(V128 op) + { + byte[] inState = op.ToArray(); + byte[] outState = new byte[16]; + + for (int idx = 0; idx <= 15; idx++) + { + outState[idx] = _invSBox[inState[idx]]; + } + + return new V128(outState); + } + + public static V128 AesMixColumns(V128 op) + { + byte[] inState = op.ToArray(); + byte[] outState = new byte[16]; + + for (int columns = 0; columns <= 3; columns++) + { + int idx = columns << 2; + + byte row0 = inState[idx + 0]; // A, E, I, M: [row0, col0-col3] + byte row1 = inState[idx + 1]; // B, F, J, N: [row1, col0-col3] + byte row2 = inState[idx + 2]; // C, G, K, O: [row2, col0-col3] + byte row3 = inState[idx + 3]; // D, H, L, P: [row3, col0-col3] + + outState[idx + 0] = (byte)((uint)_gfMul02[row0] ^ _gfMul03[row1] ^ row2 ^ row3); + outState[idx + 1] = (byte)((uint)row0 ^ _gfMul02[row1] ^ _gfMul03[row2] ^ row3); + outState[idx + 2] = (byte)((uint)row0 ^ row1 ^ _gfMul02[row2] ^ _gfMul03[row3]); + outState[idx + 3] = (byte)((uint)_gfMul03[row0] ^ row1 ^ row2 ^ _gfMul02[row3]); + } + + return new V128(outState); + } + + public static V128 AesShiftRows(V128 op) + { + byte[] inState = op.ToArray(); + byte[] outState = new byte[16]; + + for (int idx = 0; idx <= 15; idx++) + { + outState[_srPerm[idx]] = inState[idx]; + } + + return new V128(outState); + } + + public static V128 AesSubBytes(V128 op) + { + byte[] inState = op.ToArray(); + byte[] outState = new byte[16]; + + for (int idx = 0; idx <= 15; idx++) + { + outState[idx] = _sBox[inState[idx]]; + } + + return new V128(outState); + } + } +} diff --git a/ARMeilleure/Instructions/DelegateTypes.cs b/ARMeilleure/Instructions/DelegateTypes.cs new file mode 100644 index 000000000..e90e4d77a --- /dev/null +++ b/ARMeilleure/Instructions/DelegateTypes.cs @@ -0,0 +1,78 @@ +using ARMeilleure.State; +using System; + +namespace ARMeilleure.Instructions +{ + delegate double _F64_F64(double a1); + delegate double _F64_F64_F64(double a1, double a2); + delegate double _F64_F64_F64_F64(double a1, double a2, double a3); + delegate double _F64_F64_MidpointRounding(double a1, MidpointRounding a2); + + delegate float _F32_F32(float a1); + delegate float _F32_F32_F32(float a1, float a2); + delegate float _F32_F32_F32_F32(float a1, float a2, float a3); + delegate float _F32_F32_MidpointRounding(float a1, MidpointRounding a2); + delegate float _F32_U16(ushort a1); + + delegate int _S32_F32(float a1); + delegate int _S32_F32_F32_Bool(float a1, float a2, bool a3); + delegate int _S32_F64(double a1); + delegate int _S32_F64_F64_Bool(double a1, double a2, bool a3); + delegate int _S32_U64_U16(ulong a1, ushort a2); + delegate int _S32_U64_U32(ulong a1, uint a2); + delegate int _S32_U64_U64(ulong a1, ulong a2); + delegate int _S32_U64_U8(ulong a1, byte a2); + delegate int _S32_U64_V128(ulong a1, V128 a2); + + delegate long _S64_F32(float a1); + delegate long _S64_F64(double a1); + delegate long _S64_S64(long a1); + delegate long _S64_S64_S32(long a1, int a2); + delegate long _S64_S64_S64(long a1, long a2); + delegate long _S64_S64_S64_Bool_S32(long a1, long a2, bool a3, int a4); + delegate long _S64_S64_S64_S32(long a1, long a2, int a3); + delegate long _S64_U64_S32(ulong a1, int a2); + delegate long _S64_U64_S64(ulong a1, long a2); + + delegate ushort _U16_F32(float a1); + delegate ushort _U16_U64(ulong a1); + + delegate uint _U32_F32(float a1); + delegate uint _U32_F64(double a1); + delegate uint _U32_U32(uint a1); + delegate uint _U32_U32_U16(uint a1, ushort a2); + delegate uint _U32_U32_U32(uint a1, uint a2); + delegate uint _U32_U32_U64(uint a1, ulong a2); + delegate uint _U32_U32_U8(uint a1, byte a2); + delegate uint _U32_U64(ulong a1); + + delegate ulong _U64(); + delegate ulong _U64_F32(float a1); + delegate ulong _U64_F64(double a1); + delegate ulong _U64_S64_S32(long a1, int a2); + delegate ulong _U64_S64_U64(long a1, ulong a2); + delegate ulong _U64_U64(ulong a1); + delegate ulong _U64_U64_S32(ulong a1, int a2); + delegate ulong _U64_U64_S64_S32(ulong a1, long a2, int a3); + delegate ulong _U64_U64_U64(ulong a1, ulong a2); + delegate ulong _U64_U64_U64_Bool_S32(ulong a1, ulong a2, bool a3, int a4); + + delegate byte _U8_U64(ulong a1); + + delegate V128 _V128_U64(ulong a1); + delegate V128 _V128_V128(V128 a1); + delegate V128 _V128_V128_U32_V128(V128 a1, uint a2, V128 a3); + delegate V128 _V128_V128_V128(V128 a1, V128 a2); + delegate V128 _V128_V128_V128_V128(V128 a1, V128 a2, V128 a3); + delegate V128 _V128_V128_V128_V128_V128(V128 a1, V128 a2, V128 a3, V128 a4); + delegate V128 _V128_V128_V128_V128_V128_V128(V128 a1, V128 a2, V128 a3, V128 a4, V128 a5); + + delegate void _Void(); + delegate void _Void_U64(ulong a1); + delegate void _Void_U64_S32(ulong a1, int a2); + delegate void _Void_U64_U16(ulong a1, ushort a2); + delegate void _Void_U64_U32(ulong a1, uint a2); + delegate void _Void_U64_U64(ulong a1, ulong a2); + delegate void _Void_U64_U8(ulong a1, byte a2); + delegate void _Void_U64_V128(ulong a1, V128 a2); +} \ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitAlu.cs b/ARMeilleure/Instructions/InstEmitAlu.cs new file mode 100644 index 000000000..947c9f70b --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitAlu.cs @@ -0,0 +1,369 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitAluHelper; +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void Adc(ArmEmitterContext context) => EmitAdc(context, setFlags: false); + public static void Adcs(ArmEmitterContext context) => EmitAdc(context, setFlags: true); + + private static void EmitAdc(ArmEmitterContext context, bool setFlags) + { + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand d = context.Add(n, m); + + Operand carry = GetFlag(PState.CFlag); + + if (context.CurrOp.RegisterSize == RegisterSize.Int64) + { + carry = context.ZeroExtend32(OperandType.I64, carry); + } + + d = context.Add(d, carry); + + if (setFlags) + { + EmitNZFlagsCheck(context, d); + + EmitAdcsCCheck(context, n, d); + EmitAddsVCheck(context, n, m, d); + } + + SetAluDOrZR(context, d); + } + + public static void Add(ArmEmitterContext context) + { + SetAluD(context, context.Add(GetAluN(context), GetAluM(context))); + } + + public static void Adds(ArmEmitterContext context) + { + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + context.MarkComparison(n, m); + + Operand d = context.Add(n, m); + + EmitNZFlagsCheck(context, d); + + EmitAddsCCheck(context, n, d); + EmitAddsVCheck(context, n, m, d); + + SetAluDOrZR(context, d); + } + + public static void And(ArmEmitterContext context) + { + SetAluD(context, context.BitwiseAnd(GetAluN(context), GetAluM(context))); + } + + public static void Ands(ArmEmitterContext context) + { + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand d = context.BitwiseAnd(n, m); + + EmitNZFlagsCheck(context, d); + EmitCVFlagsClear(context); + + SetAluDOrZR(context, d); + } + + public static void Asrv(ArmEmitterContext context) + { + SetAluDOrZR(context, context.ShiftRightSI(GetAluN(context), GetAluMShift(context))); + } + + public static void Bic(ArmEmitterContext context) => EmitBic(context, setFlags: false); + public static void Bics(ArmEmitterContext context) => EmitBic(context, setFlags: true); + + private static void EmitBic(ArmEmitterContext context, bool setFlags) + { + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand d = context.BitwiseAnd(n, context.BitwiseNot(m)); + + if (setFlags) + { + EmitNZFlagsCheck(context, d); + EmitCVFlagsClear(context); + } + + SetAluD(context, d, setFlags); + } + + public static void Cls(ArmEmitterContext context) + { + OpCodeAlu op = (OpCodeAlu)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + + Operand nHigh = context.ShiftRightUI(n, Const(1)); + + bool is32Bits = op.RegisterSize == RegisterSize.Int32; + + Operand mask = is32Bits ? Const(int.MaxValue) : Const(long.MaxValue); + + Operand nLow = context.BitwiseAnd(n, mask); + + Operand res = context.CountLeadingZeros(context.BitwiseExclusiveOr(nHigh, nLow)); + + res = context.Subtract(res, Const(res.Type, 1)); + + SetAluDOrZR(context, res); + } + + public static void Clz(ArmEmitterContext context) + { + OpCodeAlu op = (OpCodeAlu)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + + Operand d = context.CountLeadingZeros(n); + + SetAluDOrZR(context, d); + } + + public static void Eon(ArmEmitterContext context) + { + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand d = context.BitwiseExclusiveOr(n, context.BitwiseNot(m)); + + SetAluD(context, d); + } + + public static void Eor(ArmEmitterContext context) + { + SetAluD(context, context.BitwiseExclusiveOr(GetAluN(context), GetAluM(context))); + } + + public static void Extr(ArmEmitterContext context) + { + OpCodeAluRs op = (OpCodeAluRs)context.CurrOp; + + Operand res = GetIntOrZR(context, op.Rm); + + if (op.Shift != 0) + { + if (op.Rn == op.Rm) + { + res = context.RotateRight(res, Const(op.Shift)); + } + else + { + res = context.ShiftRightUI(res, Const(op.Shift)); + + Operand n = GetIntOrZR(context, op.Rn); + + int invShift = op.GetBitsCount() - op.Shift; + + res = context.BitwiseOr(res, context.ShiftLeft(n, Const(invShift))); + } + } + + SetAluDOrZR(context, res); + } + + public static void Lslv(ArmEmitterContext context) + { + SetAluDOrZR(context, context.ShiftLeft(GetAluN(context), GetAluMShift(context))); + } + + public static void Lsrv(ArmEmitterContext context) + { + SetAluDOrZR(context, context.ShiftRightUI(GetAluN(context), GetAluMShift(context))); + } + + public static void Sbc(ArmEmitterContext context) => EmitSbc(context, setFlags: false); + public static void Sbcs(ArmEmitterContext context) => EmitSbc(context, setFlags: true); + + private static void EmitSbc(ArmEmitterContext context, bool setFlags) + { + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand d = context.Subtract(n, m); + + Operand borrow = context.BitwiseExclusiveOr(GetFlag(PState.CFlag), Const(1)); + + if (context.CurrOp.RegisterSize == RegisterSize.Int64) + { + borrow = context.ZeroExtend32(OperandType.I64, borrow); + } + + d = context.Subtract(d, borrow); + + if (setFlags) + { + EmitNZFlagsCheck(context, d); + + EmitSbcsCCheck(context, n, m); + EmitSubsVCheck(context, n, m, d); + } + + SetAluDOrZR(context, d); + } + + public static void Sub(ArmEmitterContext context) + { + SetAluD(context, context.Subtract(GetAluN(context), GetAluM(context))); + } + + public static void Subs(ArmEmitterContext context) + { + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + context.MarkComparison(n, m); + + Operand d = context.Subtract(n, m); + + EmitNZFlagsCheck(context, d); + + EmitSubsCCheck(context, n, m); + EmitSubsVCheck(context, n, m, d); + + SetAluDOrZR(context, d); + } + + public static void Orn(ArmEmitterContext context) + { + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand d = context.BitwiseOr(n, context.BitwiseNot(m)); + + SetAluD(context, d); + } + + public static void Orr(ArmEmitterContext context) + { + SetAluD(context, context.BitwiseOr(GetAluN(context), GetAluM(context))); + } + + public static void Rbit(ArmEmitterContext context) + { + OpCodeAlu op = (OpCodeAlu)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + Operand d; + + if (op.RegisterSize == RegisterSize.Int32) + { + d = context.Call(new _U32_U32(SoftFallback.ReverseBits32), n); + } + else + { + d = context.Call(new _U64_U64(SoftFallback.ReverseBits64), n); + } + + SetAluDOrZR(context, d); + } + + public static void Rev16(ArmEmitterContext context) + { + OpCodeAlu op = (OpCodeAlu)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + Operand d; + + if (op.RegisterSize == RegisterSize.Int32) + { + d = context.Call(new _U32_U32(SoftFallback.ReverseBytes16_32), n); + } + else + { + d = context.Call(new _U64_U64(SoftFallback.ReverseBytes16_64), n); + } + + SetAluDOrZR(context, d); + } + + public static void Rev32(ArmEmitterContext context) + { + OpCodeAlu op = (OpCodeAlu)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + + if (op.RegisterSize == RegisterSize.Int32) + { + SetAluDOrZR(context, context.ByteSwap(n)); + } + else + { + Operand d = context.Call(new _U64_U64(SoftFallback.ReverseBytes32_64), n); + + SetAluDOrZR(context, d); + } + } + + public static void Rev64(ArmEmitterContext context) + { + OpCodeAlu op = (OpCodeAlu)context.CurrOp; + + SetAluDOrZR(context, context.ByteSwap(GetIntOrZR(context, op.Rn))); + } + + public static void Rorv(ArmEmitterContext context) + { + SetAluDOrZR(context, context.RotateRight(GetAluN(context), GetAluMShift(context))); + } + + private static Operand GetAluMShift(ArmEmitterContext context) + { + IOpCodeAluRs op = (IOpCodeAluRs)context.CurrOp; + + Operand m = GetIntOrZR(context, op.Rm); + + if (op.RegisterSize == RegisterSize.Int64) + { + m = context.ConvertI64ToI32(m); + } + + return context.BitwiseAnd(m, Const(context.CurrOp.GetBitsCount() - 1)); + } + + private static void EmitCVFlagsClear(ArmEmitterContext context) + { + SetFlag(context, PState.CFlag, Const(0)); + SetFlag(context, PState.VFlag, Const(0)); + } + + public static void SetAluD(ArmEmitterContext context, Operand d) + { + SetAluD(context, d, x31IsZR: false); + } + + public static void SetAluDOrZR(ArmEmitterContext context, Operand d) + { + SetAluD(context, d, x31IsZR: true); + } + + public static void SetAluD(ArmEmitterContext context, Operand d, bool x31IsZR) + { + IOpCodeAlu op = (IOpCodeAlu)context.CurrOp; + + if ((x31IsZR || op is IOpCodeAluRs) && op.Rd == RegisterConsts.ZeroIndex) + { + return; + } + + SetIntOrSP(context, op.Rd, d); + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitAlu32.cs b/ARMeilleure/Instructions/InstEmitAlu32.cs new file mode 100644 index 000000000..79b0abbc3 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitAlu32.cs @@ -0,0 +1,129 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitAluHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + public static void Add(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context, setCarry: false); + + Operand res = context.Add(n, m); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + + EmitAddsCCheck(context, n, res); + EmitAddsVCheck(context, n, m, res); + } + + EmitAluStore(context, res); + } + + public static void Cmp(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context, setCarry: false); + + Operand res = context.Subtract(n, m); + + EmitNZFlagsCheck(context, res); + + EmitSubsCCheck(context, n, res); + EmitSubsVCheck(context, n, m, res); + } + + public static void Mov(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand m = GetAluM(context); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, m); + } + + EmitAluStore(context, m); + } + + public static void Sub(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context, setCarry: false); + + Operand res = context.Subtract(n, m); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + + EmitSubsCCheck(context, n, res); + EmitSubsVCheck(context, n, m, res); + } + + EmitAluStore(context, res); + } + + private static void EmitAluStore(ArmEmitterContext context, Operand value) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + if (op.Rd == RegisterAlias.Aarch32Pc) + { + if (op.SetFlags) + { + // TODO: Load SPSR etc. + Operand isThumb = GetFlag(PState.TFlag); + + Operand lblThumb = Label(); + + context.BranchIfTrue(lblThumb, isThumb); + + context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseAnd(value, Const(~3)))); + + context.MarkLabel(lblThumb); + + context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseAnd(value, Const(~1)))); + } + else + { + EmitAluWritePc(context, value); + } + } + else + { + SetIntA32(context, op.Rd, value); + } + } + + private static void EmitAluWritePc(ArmEmitterContext context, Operand value) + { + context.StoreToContext(); + + if (IsThumb(context.CurrOp)) + { + context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseAnd(value, Const(~1)))); + } + else + { + EmitBxWritePc(context, value); + } + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitAluHelper.cs b/ARMeilleure/Instructions/InstEmitAluHelper.cs new file mode 100644 index 000000000..81d5c9eb3 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitAluHelper.cs @@ -0,0 +1,351 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static class InstEmitAluHelper + { + public static void EmitNZFlagsCheck(ArmEmitterContext context, Operand d) + { + SetFlag(context, PState.NFlag, context.ICompareLess (d, Const(d.Type, 0))); + SetFlag(context, PState.ZFlag, context.ICompareEqual(d, Const(d.Type, 0))); + } + + public static void EmitAdcsCCheck(ArmEmitterContext context, Operand n, Operand d) + { + // C = (Rd == Rn && CIn) || Rd < Rn + Operand cIn = GetFlag(PState.CFlag); + + Operand cOut = context.BitwiseAnd(context.ICompareEqual(d, n), cIn); + + cOut = context.BitwiseOr(cOut, context.ICompareLessUI(d, n)); + + SetFlag(context, PState.CFlag, cOut); + } + + public static void EmitAddsCCheck(ArmEmitterContext context, Operand n, Operand d) + { + // C = Rd < Rn + SetFlag(context, PState.CFlag, context.ICompareLessUI(d, n)); + } + + public static void EmitAddsVCheck(ArmEmitterContext context, Operand n, Operand m, Operand d) + { + // V = (Rd ^ Rn) & ~(Rn ^ Rm) < 0 + Operand vOut = context.BitwiseExclusiveOr(d, n); + + vOut = context.BitwiseAnd(vOut, context.BitwiseNot(context.BitwiseExclusiveOr(n, m))); + + vOut = context.ICompareLess(vOut, Const(vOut.Type, 0)); + + SetFlag(context, PState.VFlag, vOut); + } + + public static void EmitSbcsCCheck(ArmEmitterContext context, Operand n, Operand m) + { + // C = (Rn == Rm && CIn) || Rn > Rm + Operand cIn = GetFlag(PState.CFlag); + + Operand cOut = context.BitwiseAnd(context.ICompareEqual(n, m), cIn); + + cOut = context.BitwiseOr(cOut, context.ICompareGreaterUI(n, m)); + + SetFlag(context, PState.CFlag, cOut); + } + + public static void EmitSubsCCheck(ArmEmitterContext context, Operand n, Operand m) + { + // C = Rn >= Rm + SetFlag(context, PState.CFlag, context.ICompareGreaterOrEqualUI(n, m)); + } + + public static void EmitSubsVCheck(ArmEmitterContext context, Operand n, Operand m, Operand d) + { + // V = (Rd ^ Rn) & (Rn ^ Rm) < 0 + Operand vOut = context.BitwiseExclusiveOr(d, n); + + vOut = context.BitwiseAnd(vOut, context.BitwiseExclusiveOr(n, m)); + + vOut = context.ICompareLess(vOut, Const(vOut.Type, 0)); + + SetFlag(context, PState.VFlag, vOut); + } + + + public static Operand GetAluN(ArmEmitterContext context) + { + if (context.CurrOp is IOpCodeAlu op) + { + if (op.DataOp == DataOp.Logical || op is IOpCodeAluRs) + { + return GetIntOrZR(context, op.Rn); + } + else + { + return GetIntOrSP(context, op.Rn); + } + } + else if (context.CurrOp is IOpCode32Alu op32) + { + return GetIntA32(context, op32.Rn); + } + else + { + throw InvalidOpCodeType(context.CurrOp); + } + } + + public static Operand GetAluM(ArmEmitterContext context, bool setCarry = true) + { + switch (context.CurrOp) + { + // ARM32. + case OpCode32AluImm op: + { + if (op.SetFlags && op.IsRotated) + { + SetFlag(context, PState.CFlag, Const((uint)op.Immediate >> 31)); + } + + return Const(op.Immediate); + } + + case OpCode32AluRsImm op: return GetMShiftedByImmediate(context, op, setCarry); + + case OpCodeT16AluImm8 op: return Const(op.Immediate); + + // ARM64. + case IOpCodeAluImm op: + { + if (op.GetOperandType() == OperandType.I32) + { + return Const((int)op.Immediate); + } + else + { + return Const(op.Immediate); + } + } + + case IOpCodeAluRs op: + { + Operand value = GetIntOrZR(context, op.Rm); + + switch (op.ShiftType) + { + case ShiftType.Lsl: value = context.ShiftLeft (value, Const(op.Shift)); break; + case ShiftType.Lsr: value = context.ShiftRightUI(value, Const(op.Shift)); break; + case ShiftType.Asr: value = context.ShiftRightSI(value, Const(op.Shift)); break; + case ShiftType.Ror: value = context.RotateRight (value, Const(op.Shift)); break; + } + + return value; + } + + case IOpCodeAluRx op: + { + Operand value = GetExtendedM(context, op.Rm, op.IntType); + + value = context.ShiftLeft(value, Const(op.Shift)); + + return value; + } + + default: throw InvalidOpCodeType(context.CurrOp); + } + } + + private static Exception InvalidOpCodeType(OpCode opCode) + { + return new InvalidOperationException($"Invalid OpCode type \"{opCode?.GetType().Name ?? "null"}\"."); + } + + // ARM32 helpers. + private static Operand GetMShiftedByImmediate(ArmEmitterContext context, OpCode32AluRsImm op, bool setCarry) + { + Operand m = GetIntA32(context, op.Rm); + + int shift = op.Imm; + + if (shift == 0) + { + switch (op.ShiftType) + { + case ShiftType.Lsr: shift = 32; break; + case ShiftType.Asr: shift = 32; break; + case ShiftType.Ror: shift = 1; break; + } + } + + if (shift != 0) + { + setCarry &= op.SetFlags; + + switch (op.ShiftType) + { + case ShiftType.Lsl: m = GetLslC(context, m, setCarry, shift); break; + case ShiftType.Lsr: m = GetLsrC(context, m, setCarry, shift); break; + case ShiftType.Asr: m = GetAsrC(context, m, setCarry, shift); break; + case ShiftType.Ror: + if (op.Imm != 0) + { + m = GetRorC(context, m, setCarry, shift); + } + else + { + m = GetRrxC(context, m, setCarry); + } + break; + } + } + + return m; + } + + private static Operand GetLslC(ArmEmitterContext context, Operand m, bool setCarry, int shift) + { + if ((uint)shift > 32) + { + return GetShiftByMoreThan32(context, setCarry); + } + else if (shift == 32) + { + if (setCarry) + { + SetCarryMLsb(context, m); + } + + return Const(0); + } + else + { + if (setCarry) + { + Operand cOut = context.ShiftRightUI(m, Const(32 - shift)); + + cOut = context.BitwiseAnd(cOut, Const(1)); + + SetFlag(context, PState.CFlag, cOut); + } + + return context.ShiftLeft(m, Const(shift)); + } + } + + private static Operand GetLsrC(ArmEmitterContext context, Operand m, bool setCarry, int shift) + { + if ((uint)shift > 32) + { + return GetShiftByMoreThan32(context, setCarry); + } + else if (shift == 32) + { + if (setCarry) + { + SetCarryMMsb(context, m); + } + + return Const(0); + } + else + { + if (setCarry) + { + SetCarryMShrOut(context, m, shift); + } + + return context.ShiftRightUI(m, Const(shift)); + } + } + + private static Operand GetShiftByMoreThan32(ArmEmitterContext context, bool setCarry) + { + if (setCarry) + { + SetFlag(context, PState.CFlag, Const(0));; + } + + return Const(0); + } + + private static Operand GetAsrC(ArmEmitterContext context, Operand m, bool setCarry, int shift) + { + if ((uint)shift >= 32) + { + m = context.ShiftRightSI(m, Const(31)); + + if (setCarry) + { + SetCarryMLsb(context, m); + } + + return m; + } + else + { + if (setCarry) + { + SetCarryMShrOut(context, m, shift); + } + + return context.ShiftRightSI(m, Const(shift)); + } + } + + private static Operand GetRorC(ArmEmitterContext context, Operand m, bool setCarry, int shift) + { + shift &= 0x1f; + + m = context.RotateRight(m, Const(shift)); + + if (setCarry) + { + SetCarryMMsb(context, m); + } + + return m; + } + + private static Operand GetRrxC(ArmEmitterContext context, Operand m, bool setCarry) + { + // Rotate right by 1 with carry. + Operand cIn = context.Copy(GetFlag(PState.CFlag)); + + if (setCarry) + { + SetCarryMLsb(context, m); + } + + m = context.ShiftRightUI(m, Const(1)); + + m = context.BitwiseOr(m, context.ShiftLeft(cIn, Const(31))); + + return m; + } + + private static void SetCarryMLsb(ArmEmitterContext context, Operand m) + { + SetFlag(context, PState.CFlag, context.BitwiseAnd(m, Const(1))); + } + + private static void SetCarryMMsb(ArmEmitterContext context, Operand m) + { + SetFlag(context, PState.CFlag, context.ShiftRightUI(m, Const(31))); + } + + private static void SetCarryMShrOut(ArmEmitterContext context, Operand m, int shift) + { + Operand cOut = context.ShiftRightUI(m, Const(shift - 1)); + + cOut = context.BitwiseAnd(cOut, Const(1)); + + SetFlag(context, PState.CFlag, cOut); + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitBfm.cs b/ARMeilleure/Instructions/InstEmitBfm.cs new file mode 100644 index 000000000..8fdbf6cfd --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitBfm.cs @@ -0,0 +1,196 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void Bfm(ArmEmitterContext context) + { + OpCodeBfm op = (OpCodeBfm)context.CurrOp; + + Operand d = GetIntOrZR(context, op.Rd); + Operand n = GetIntOrZR(context, op.Rn); + + Operand res; + + if (op.Pos < op.Shift) + { + // BFI. + int shift = op.GetBitsCount() - op.Shift; + + int width = op.Pos + 1; + + long mask = (long)(ulong.MaxValue >> (64 - width)); + + res = context.ShiftLeft(context.BitwiseAnd(n, Const(n.Type, mask)), Const(shift)); + + res = context.BitwiseOr(res, context.BitwiseAnd(d, Const(d.Type, ~(mask << shift)))); + } + else + { + // BFXIL. + int shift = op.Shift; + + int width = op.Pos - shift + 1; + + long mask = (long)(ulong.MaxValue >> (64 - width)); + + res = context.BitwiseAnd(context.ShiftRightUI(n, Const(shift)), Const(n.Type, mask)); + + res = context.BitwiseOr(res, context.BitwiseAnd(d, Const(d.Type, ~mask))); + } + + SetIntOrZR(context, op.Rd, res); + } + + public static void Sbfm(ArmEmitterContext context) + { + OpCodeBfm op = (OpCodeBfm)context.CurrOp; + + int bitsCount = op.GetBitsCount(); + + if (op.Pos + 1 == bitsCount) + { + EmitSbfmShift(context); + } + else if (op.Pos < op.Shift) + { + EmitSbfiz(context); + } + else if (op.Pos == 7 && op.Shift == 0) + { + Operand n = GetIntOrZR(context, op.Rn); + + SetIntOrZR(context, op.Rd, context.SignExtend8(n.Type, n)); + } + else if (op.Pos == 15 && op.Shift == 0) + { + Operand n = GetIntOrZR(context, op.Rn); + + SetIntOrZR(context, op.Rd, context.SignExtend16(n.Type, n)); + } + else if (op.Pos == 31 && op.Shift == 0) + { + Operand n = GetIntOrZR(context, op.Rn); + + SetIntOrZR(context, op.Rd, context.SignExtend32(n.Type, n)); + } + else + { + Operand res = GetIntOrZR(context, op.Rn); + + res = context.ShiftLeft (res, Const(bitsCount - 1 - op.Pos)); + res = context.ShiftRightSI(res, Const(bitsCount - 1)); + res = context.BitwiseAnd (res, Const(res.Type, ~op.TMask)); + + Operand n2 = GetBfmN(context); + + SetIntOrZR(context, op.Rd, context.BitwiseOr(res, n2)); + } + } + + public static void Ubfm(ArmEmitterContext context) + { + OpCodeBfm op = (OpCodeBfm)context.CurrOp; + + if (op.Pos + 1 == op.GetBitsCount()) + { + EmitUbfmShift(context); + } + else if (op.Pos < op.Shift) + { + EmitUbfiz(context); + } + else if (op.Pos + 1 == op.Shift) + { + EmitBfmLsl(context); + } + else if (op.Pos == 7 && op.Shift == 0) + { + Operand n = GetIntOrZR(context, op.Rn); + + SetIntOrZR(context, op.Rd, context.BitwiseAnd(n, Const(n.Type, 0xff))); + } + else if (op.Pos == 15 && op.Shift == 0) + { + Operand n = GetIntOrZR(context, op.Rn); + + SetIntOrZR(context, op.Rd, context.BitwiseAnd(n, Const(n.Type, 0xffff))); + } + else + { + SetIntOrZR(context, op.Rd, GetBfmN(context)); + } + } + + private static void EmitSbfiz(ArmEmitterContext context) => EmitBfiz(context, signed: true); + private static void EmitUbfiz(ArmEmitterContext context) => EmitBfiz(context, signed: false); + + private static void EmitBfiz(ArmEmitterContext context, bool signed) + { + OpCodeBfm op = (OpCodeBfm)context.CurrOp; + + int width = op.Pos + 1; + + Operand res = GetIntOrZR(context, op.Rn); + + res = context.ShiftLeft(res, Const(op.GetBitsCount() - width)); + + res = signed + ? context.ShiftRightSI(res, Const(op.Shift - width)) + : context.ShiftRightUI(res, Const(op.Shift - width)); + + SetIntOrZR(context, op.Rd, res); + } + + private static void EmitSbfmShift(ArmEmitterContext context) + { + EmitBfmShift(context, signed: true); + } + + private static void EmitUbfmShift(ArmEmitterContext context) + { + EmitBfmShift(context, signed: false); + } + + private static void EmitBfmShift(ArmEmitterContext context, bool signed) + { + OpCodeBfm op = (OpCodeBfm)context.CurrOp; + + Operand res = GetIntOrZR(context, op.Rn); + + res = signed + ? context.ShiftRightSI(res, Const(op.Shift)) + : context.ShiftRightUI(res, Const(op.Shift)); + + SetIntOrZR(context, op.Rd, res); + } + + private static void EmitBfmLsl(ArmEmitterContext context) + { + OpCodeBfm op = (OpCodeBfm)context.CurrOp; + + Operand res = GetIntOrZR(context, op.Rn); + + int shift = op.GetBitsCount() - op.Shift; + + SetIntOrZR(context, op.Rd, context.ShiftLeft(res, Const(shift))); + } + + private static Operand GetBfmN(ArmEmitterContext context) + { + OpCodeBfm op = (OpCodeBfm)context.CurrOp; + + Operand res = GetIntOrZR(context, op.Rn); + + long mask = op.WMask & op.TMask; + + return context.BitwiseAnd(context.RotateRight(res, Const(op.Shift)), Const(res.Type, mask)); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitCcmp.cs b/ARMeilleure/Instructions/InstEmitCcmp.cs new file mode 100644 index 000000000..b1b0a2a1c --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitCcmp.cs @@ -0,0 +1,61 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitAluHelper; +using static ARMeilleure.Instructions.InstEmitFlowHelper; +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void Ccmn(ArmEmitterContext context) => EmitCcmp(context, isNegated: true); + public static void Ccmp(ArmEmitterContext context) => EmitCcmp(context, isNegated: false); + + private static void EmitCcmp(ArmEmitterContext context, bool isNegated) + { + OpCodeCcmp op = (OpCodeCcmp)context.CurrOp; + + Operand lblTrue = Label(); + Operand lblEnd = Label(); + + EmitCondBranch(context, lblTrue, op.Cond); + + SetFlag(context, PState.VFlag, Const((op.Nzcv >> 0) & 1)); + SetFlag(context, PState.CFlag, Const((op.Nzcv >> 1) & 1)); + SetFlag(context, PState.ZFlag, Const((op.Nzcv >> 2) & 1)); + SetFlag(context, PState.NFlag, Const((op.Nzcv >> 3) & 1)); + + context.Branch(lblEnd); + + context.MarkLabel(lblTrue); + + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + if (isNegated) + { + Operand d = context.Add(n, m); + + EmitNZFlagsCheck(context, d); + + EmitAddsCCheck(context, n, d); + EmitAddsVCheck(context, n, m, d); + } + else + { + Operand d = context.Subtract(n, m); + + EmitNZFlagsCheck(context, d); + + EmitSubsCCheck(context, n, m); + EmitSubsVCheck(context, n, m, d); + } + + context.MarkLabel(lblEnd); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitCsel.cs b/ARMeilleure/Instructions/InstEmitCsel.cs new file mode 100644 index 000000000..60baf0bc2 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitCsel.cs @@ -0,0 +1,53 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitFlowHelper; +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + private enum CselOperation + { + None, + Increment, + Invert, + Negate + } + + public static void Csel(ArmEmitterContext context) => EmitCsel(context, CselOperation.None); + public static void Csinc(ArmEmitterContext context) => EmitCsel(context, CselOperation.Increment); + public static void Csinv(ArmEmitterContext context) => EmitCsel(context, CselOperation.Invert); + public static void Csneg(ArmEmitterContext context) => EmitCsel(context, CselOperation.Negate); + + private static void EmitCsel(ArmEmitterContext context, CselOperation cselOp) + { + OpCodeCsel op = (OpCodeCsel)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + Operand m = GetIntOrZR(context, op.Rm); + + if (cselOp == CselOperation.Increment) + { + m = context.Add(m, Const(m.Type, 1)); + } + else if (cselOp == CselOperation.Invert) + { + m = context.BitwiseNot(m); + } + else if (cselOp == CselOperation.Negate) + { + m = context.Negate(m); + } + + Operand condTrue = GetCondTrue(context, op.Cond); + + Operand d = context.ConditionalSelect(condTrue, n, m); + + SetIntOrZR(context, op.Rd, d); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitDiv.cs b/ARMeilleure/Instructions/InstEmitDiv.cs new file mode 100644 index 000000000..0c21dd1ba --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitDiv.cs @@ -0,0 +1,67 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void Sdiv(ArmEmitterContext context) => EmitDiv(context, unsigned: false); + public static void Udiv(ArmEmitterContext context) => EmitDiv(context, unsigned: true); + + private static void EmitDiv(ArmEmitterContext context, bool unsigned) + { + OpCodeAluBinary op = (OpCodeAluBinary)context.CurrOp; + + // If Rm == 0, Rd = 0 (division by zero). + Operand n = GetIntOrZR(context, op.Rn); + Operand m = GetIntOrZR(context, op.Rm); + + Operand divisorIsZero = context.ICompareEqual(m, Const(m.Type, 0)); + + Operand lblBadDiv = Label(); + Operand lblEnd = Label(); + + context.BranchIfTrue(lblBadDiv, divisorIsZero); + + if (!unsigned) + { + // If Rn == INT_MIN && Rm == -1, Rd = INT_MIN (overflow). + bool is32Bits = op.RegisterSize == RegisterSize.Int32; + + Operand intMin = is32Bits ? Const(int.MinValue) : Const(long.MinValue); + Operand minus1 = is32Bits ? Const(-1) : Const(-1L); + + Operand nIsIntMin = context.ICompareEqual(n, intMin); + Operand mIsMinus1 = context.ICompareEqual(m, minus1); + + Operand lblGoodDiv = Label(); + + context.BranchIfFalse(lblGoodDiv, context.BitwiseAnd(nIsIntMin, mIsMinus1)); + + SetAluDOrZR(context, intMin); + + context.Branch(lblEnd); + + context.MarkLabel(lblGoodDiv); + } + + Operand d = unsigned + ? context.DivideUI(n, m) + : context.Divide (n, m); + + SetAluDOrZR(context, d); + + context.Branch(lblEnd); + + context.MarkLabel(lblBadDiv); + + SetAluDOrZR(context, Const(op.GetOperandType(), 0)); + + context.MarkLabel(lblEnd); + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitException.cs b/ARMeilleure/Instructions/InstEmitException.cs new file mode 100644 index 000000000..6f7b6fd51 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitException.cs @@ -0,0 +1,55 @@ +using ARMeilleure.Decoders; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void Brk(ArmEmitterContext context) + { + EmitExceptionCall(context, NativeInterface.Break); + } + + public static void Svc(ArmEmitterContext context) + { + EmitExceptionCall(context, NativeInterface.SupervisorCall); + } + + private static void EmitExceptionCall(ArmEmitterContext context, _Void_U64_S32 func) + { + OpCodeException op = (OpCodeException)context.CurrOp; + + context.StoreToContext(); + + context.Call(func, Const(op.Address), Const(op.Id)); + + context.LoadFromContext(); + + if (context.CurrBlock.Next == null) + { + context.Return(Const(op.Address + 4)); + } + } + + public static void Und(ArmEmitterContext context) + { + OpCode op = context.CurrOp; + + Delegate dlg = new _Void_U64_S32(NativeInterface.Undefined); + + context.StoreToContext(); + + context.Call(dlg, Const(op.Address), Const(op.RawOpCode)); + + context.LoadFromContext(); + + if (context.CurrBlock.Next == null) + { + context.Return(Const(op.Address + 4)); + } + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitFlow.cs b/ARMeilleure/Instructions/InstEmitFlow.cs new file mode 100644 index 000000000..93d36e1b9 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitFlow.cs @@ -0,0 +1,159 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitFlowHelper; +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void B(ArmEmitterContext context) + { + OpCodeBImmAl op = (OpCodeBImmAl)context.CurrOp; + + if (context.CurrBlock.Branch != null) + { + context.Branch(context.GetLabel((ulong)op.Immediate)); + } + else + { + context.Return(Const(op.Immediate)); + } + } + + public static void B_Cond(ArmEmitterContext context) + { + OpCodeBImmCond op = (OpCodeBImmCond)context.CurrOp; + + EmitBranch(context, op.Cond); + } + + public static void Bl(ArmEmitterContext context) + { + OpCodeBImmAl op = (OpCodeBImmAl)context.CurrOp; + + context.Copy(GetIntOrZR(context, RegisterAlias.Lr), Const(op.Address + 4)); + + EmitCall(context, (ulong)op.Immediate); + } + + public static void Blr(ArmEmitterContext context) + { + OpCodeBReg op = (OpCodeBReg)context.CurrOp; + + Operand n = context.Copy(GetIntOrZR(context, op.Rn)); + + context.Copy(GetIntOrZR(context, RegisterAlias.Lr), Const(op.Address + 4)); + + EmitVirtualCall(context, n); + } + + public static void Br(ArmEmitterContext context) + { + OpCodeBReg op = (OpCodeBReg)context.CurrOp; + + EmitVirtualJump(context, GetIntOrZR(context, op.Rn)); + } + + public static void Cbnz(ArmEmitterContext context) => EmitCb(context, onNotZero: true); + public static void Cbz(ArmEmitterContext context) => EmitCb(context, onNotZero: false); + + private static void EmitCb(ArmEmitterContext context, bool onNotZero) + { + OpCodeBImmCmp op = (OpCodeBImmCmp)context.CurrOp; + + EmitBranch(context, GetIntOrZR(context, op.Rt), onNotZero); + } + + public static void Ret(ArmEmitterContext context) + { + context.Return(context.BitwiseOr(GetIntOrZR(context, RegisterAlias.Lr), Const(CallFlag))); + } + + public static void Tbnz(ArmEmitterContext context) => EmitTb(context, onNotZero: true); + public static void Tbz(ArmEmitterContext context) => EmitTb(context, onNotZero: false); + + private static void EmitTb(ArmEmitterContext context, bool onNotZero) + { + OpCodeBImmTest op = (OpCodeBImmTest)context.CurrOp; + + Operand value = context.BitwiseAnd(GetIntOrZR(context, op.Rt), Const(1L << op.Bit)); + + EmitBranch(context, value, onNotZero); + } + + private static void EmitBranch(ArmEmitterContext context, Condition cond) + { + OpCodeBImm op = (OpCodeBImm)context.CurrOp; + + if (context.CurrBlock.Branch != null) + { + EmitCondBranch(context, context.GetLabel((ulong)op.Immediate), cond); + + if (context.CurrBlock.Next == null) + { + context.Return(Const(op.Address + 4)); + } + } + else + { + Operand lblTaken = Label(); + + EmitCondBranch(context, lblTaken, cond); + + context.Return(Const(op.Address + 4)); + + context.MarkLabel(lblTaken); + + context.Return(Const(op.Immediate)); + } + } + + private static void EmitBranch(ArmEmitterContext context, Operand value, bool onNotZero) + { + OpCodeBImm op = (OpCodeBImm)context.CurrOp; + + if (context.CurrBlock.Branch != null) + { + Operand lblTarget = context.GetLabel((ulong)op.Immediate); + + if (onNotZero) + { + context.BranchIfTrue(lblTarget, value); + } + else + { + context.BranchIfFalse(lblTarget, value); + } + + if (context.CurrBlock.Next == null) + { + context.Return(Const(op.Address + 4)); + } + } + else + { + Operand lblTaken = Label(); + + if (onNotZero) + { + context.BranchIfTrue(lblTaken, value); + } + else + { + context.BranchIfFalse(lblTaken, value); + } + + context.Return(Const(op.Address + 4)); + + context.MarkLabel(lblTaken); + + context.Return(Const(op.Immediate)); + } + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitFlow32.cs b/ARMeilleure/Instructions/InstEmitFlow32.cs new file mode 100644 index 000000000..27addc78e --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitFlow32.cs @@ -0,0 +1,71 @@ +using ARMeilleure.Decoders; +using ARMeilleure.State; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + public static void B(ArmEmitterContext context) + { + IOpCode32BImm op = (IOpCode32BImm)context.CurrOp; + + if (context.CurrBlock.Branch != null) + { + context.Branch(context.GetLabel((ulong)op.Immediate)); + } + else + { + context.StoreToContext(); + + context.Return(Const(op.Immediate)); + } + } + + public static void Bl(ArmEmitterContext context) + { + Blx(context, x: false); + } + + public static void Blx(ArmEmitterContext context) + { + Blx(context, x: true); + } + + public static void Bx(ArmEmitterContext context) + { + IOpCode32BReg op = (IOpCode32BReg)context.CurrOp; + + context.StoreToContext(); + + EmitBxWritePc(context, GetIntA32(context, op.Rm)); + } + + private static void Blx(ArmEmitterContext context, bool x) + { + IOpCode32BImm op = (IOpCode32BImm)context.CurrOp; + + uint pc = op.GetPc(); + + bool isThumb = IsThumb(context.CurrOp); + + uint currentPc = isThumb + ? op.GetPc() | 1 + : op.GetPc() - 4; + + SetIntOrSP(context, GetBankedRegisterAlias(context.Mode, RegisterAlias.Aarch32Lr), Const(currentPc)); + + // If x is true, then this is a branch with link and exchange. + // In this case we need to swap the mode between Arm <-> Thumb. + if (x) + { + SetFlag(context, PState.TFlag, Const(isThumb ? 0 : 1)); + } + + InstEmitFlowHelper.EmitCall(context, (ulong)op.Immediate); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitFlowHelper.cs b/ARMeilleure/Instructions/InstEmitFlowHelper.cs new file mode 100644 index 000000000..a8eb21d33 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitFlowHelper.cs @@ -0,0 +1,192 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static class InstEmitFlowHelper + { + public const ulong CallFlag = 1; + + public static void EmitCondBranch(ArmEmitterContext context, Operand target, Condition cond) + { + if (cond != Condition.Al) + { + context.BranchIfTrue(target, GetCondTrue(context, cond)); + } + else + { + context.Branch(target); + } + } + + public static Operand GetCondTrue(ArmEmitterContext context, Condition condition) + { + Operand cmpResult = context.TryGetComparisonResult(condition); + + if (cmpResult != null) + { + return cmpResult; + } + + Operand value = Const(1); + + Operand Inverse(Operand val) + { + return context.BitwiseExclusiveOr(val, Const(1)); + } + + switch (condition) + { + case Condition.Eq: + value = GetFlag(PState.ZFlag); + break; + + case Condition.Ne: + value = Inverse(GetFlag(PState.ZFlag)); + break; + + case Condition.GeUn: + value = GetFlag(PState.CFlag); + break; + + case Condition.LtUn: + value = Inverse(GetFlag(PState.CFlag)); + break; + + case Condition.Mi: + value = GetFlag(PState.NFlag); + break; + + case Condition.Pl: + value = Inverse(GetFlag(PState.NFlag)); + break; + + case Condition.Vs: + value = GetFlag(PState.VFlag); + break; + + case Condition.Vc: + value = Inverse(GetFlag(PState.VFlag)); + break; + + case Condition.GtUn: + { + Operand c = GetFlag(PState.CFlag); + Operand z = GetFlag(PState.ZFlag); + + value = context.BitwiseAnd(c, Inverse(z)); + + break; + } + + case Condition.LeUn: + { + Operand c = GetFlag(PState.CFlag); + Operand z = GetFlag(PState.ZFlag); + + value = context.BitwiseOr(Inverse(c), z); + + break; + } + + case Condition.Ge: + { + Operand n = GetFlag(PState.NFlag); + Operand v = GetFlag(PState.VFlag); + + value = context.ICompareEqual(n, v); + + break; + } + + case Condition.Lt: + { + Operand n = GetFlag(PState.NFlag); + Operand v = GetFlag(PState.VFlag); + + value = context.ICompareNotEqual(n, v); + + break; + } + + case Condition.Gt: + { + Operand n = GetFlag(PState.NFlag); + Operand z = GetFlag(PState.ZFlag); + Operand v = GetFlag(PState.VFlag); + + value = context.BitwiseAnd(Inverse(z), context.ICompareEqual(n, v)); + + break; + } + + case Condition.Le: + { + Operand n = GetFlag(PState.NFlag); + Operand z = GetFlag(PState.ZFlag); + Operand v = GetFlag(PState.VFlag); + + value = context.BitwiseOr(z, context.ICompareNotEqual(n, v)); + + break; + } + } + + return value; + } + + public static void EmitCall(ArmEmitterContext context, ulong immediate) + { + context.Return(Const(immediate | CallFlag)); + } + + public static void EmitVirtualCall(ArmEmitterContext context, Operand target) + { + EmitVirtualCallOrJump(context, target, isJump: false); + } + + public static void EmitVirtualJump(ArmEmitterContext context, Operand target) + { + EmitVirtualCallOrJump(context, target, isJump: true); + } + + private static void EmitVirtualCallOrJump(ArmEmitterContext context, Operand target, bool isJump) + { + context.Return(context.BitwiseOr(target, Const(target.Type, (long)CallFlag))); + } + + private static void EmitContinueOrReturnCheck(ArmEmitterContext context, Operand retVal) + { + // Note: The return value of the called method will be placed + // at the Stack, the return value is always a Int64 with the + // return address of the function. We check if the address is + // correct, if it isn't we keep returning until we reach the dispatcher. + ulong nextAddr = GetNextOpAddress(context.CurrOp); + + if (context.CurrBlock.Next != null) + { + Operand lblContinue = Label(); + + context.BranchIfTrue(lblContinue, context.ICompareEqual(retVal, Const(nextAddr))); + + context.Return(Const(nextAddr)); + + context.MarkLabel(lblContinue); + } + else + { + context.Return(Const(nextAddr)); + } + } + + private static ulong GetNextOpAddress(OpCode op) + { + return op.Address + (ulong)op.OpCodeSizeInBytes; + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitHash.cs b/ARMeilleure/Instructions/InstEmitHash.cs new file mode 100644 index 000000000..0be8458e2 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitHash.cs @@ -0,0 +1,64 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.Instructions.InstEmitHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void Crc32b(ArmEmitterContext context) + { + EmitCrc32Call(context, new _U32_U32_U8(SoftFallback.Crc32b)); + } + + public static void Crc32h(ArmEmitterContext context) + { + EmitCrc32Call(context, new _U32_U32_U16(SoftFallback.Crc32h)); + } + + public static void Crc32w(ArmEmitterContext context) + { + EmitCrc32Call(context, new _U32_U32_U32(SoftFallback.Crc32w)); + } + + public static void Crc32x(ArmEmitterContext context) + { + EmitCrc32Call(context, new _U32_U32_U64(SoftFallback.Crc32x)); + } + + public static void Crc32cb(ArmEmitterContext context) + { + EmitCrc32Call(context, new _U32_U32_U8(SoftFallback.Crc32cb)); + } + + public static void Crc32ch(ArmEmitterContext context) + { + EmitCrc32Call(context, new _U32_U32_U16(SoftFallback.Crc32ch)); + } + + public static void Crc32cw(ArmEmitterContext context) + { + EmitCrc32Call(context, new _U32_U32_U32(SoftFallback.Crc32cw)); + } + + public static void Crc32cx(ArmEmitterContext context) + { + EmitCrc32Call(context, new _U32_U32_U64(SoftFallback.Crc32cx)); + } + + private static void EmitCrc32Call(ArmEmitterContext context, Delegate dlg) + { + OpCodeAluBinary op = (OpCodeAluBinary)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + Operand m = GetIntOrZR(context, op.Rm); + + Operand d = context.Call(dlg, n, m); + + SetIntOrZR(context, op.Rd, d); + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitHelper.cs b/ARMeilleure/Instructions/InstEmitHelper.cs new file mode 100644 index 000000000..02e104a4f --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitHelper.cs @@ -0,0 +1,218 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static class InstEmitHelper + { + public static bool IsThumb(OpCode op) + { + return op is OpCodeT16; + } + + public static Operand GetExtendedM(ArmEmitterContext context, int rm, IntType type) + { + Operand value = GetIntOrZR(context, rm); + + switch (type) + { + case IntType.UInt8: value = context.ZeroExtend8 (value.Type, value); break; + case IntType.UInt16: value = context.ZeroExtend16(value.Type, value); break; + case IntType.UInt32: value = context.ZeroExtend32(value.Type, value); break; + + case IntType.Int8: value = context.SignExtend8 (value.Type, value); break; + case IntType.Int16: value = context.SignExtend16(value.Type, value); break; + case IntType.Int32: value = context.SignExtend32(value.Type, value); break; + } + + return value; + } + + public static Operand GetIntA32(ArmEmitterContext context, int regIndex) + { + if (regIndex == RegisterAlias.Aarch32Pc) + { + OpCode32 op = (OpCode32)context.CurrOp; + + return Const((int)op.GetPc()); + } + else + { + return GetIntOrSP(context, GetRegisterAlias(context.Mode, regIndex)); + } + } + + public static void SetIntA32(ArmEmitterContext context, int regIndex, Operand value) + { + if (regIndex == RegisterAlias.Aarch32Pc) + { + context.StoreToContext(); + + EmitBxWritePc(context, value); + } + else + { + SetIntOrSP(context, GetRegisterAlias(context.Mode, regIndex), value); + } + } + + public static int GetRegisterAlias(Aarch32Mode mode, int regIndex) + { + // Only registers >= 8 are banked, + // with registers in the range [8, 12] being + // banked for the FIQ mode, and registers + // 13 and 14 being banked for all modes. + if ((uint)regIndex < 8) + { + return regIndex; + } + + return GetBankedRegisterAlias(mode, regIndex); + } + + public static int GetBankedRegisterAlias(Aarch32Mode mode, int regIndex) + { + switch (regIndex) + { + case 8: return mode == Aarch32Mode.Fiq + ? RegisterAlias.R8Fiq + : RegisterAlias.R8Usr; + + case 9: return mode == Aarch32Mode.Fiq + ? RegisterAlias.R9Fiq + : RegisterAlias.R9Usr; + + case 10: return mode == Aarch32Mode.Fiq + ? RegisterAlias.R10Fiq + : RegisterAlias.R10Usr; + + case 11: return mode == Aarch32Mode.Fiq + ? RegisterAlias.R11Fiq + : RegisterAlias.R11Usr; + + case 12: return mode == Aarch32Mode.Fiq + ? RegisterAlias.R12Fiq + : RegisterAlias.R12Usr; + + case 13: + switch (mode) + { + case Aarch32Mode.User: + case Aarch32Mode.System: return RegisterAlias.SpUsr; + case Aarch32Mode.Fiq: return RegisterAlias.SpFiq; + case Aarch32Mode.Irq: return RegisterAlias.SpIrq; + case Aarch32Mode.Supervisor: return RegisterAlias.SpSvc; + case Aarch32Mode.Abort: return RegisterAlias.SpAbt; + case Aarch32Mode.Hypervisor: return RegisterAlias.SpHyp; + case Aarch32Mode.Undefined: return RegisterAlias.SpUnd; + + default: throw new ArgumentException(nameof(mode)); + } + + case 14: + switch (mode) + { + case Aarch32Mode.User: + case Aarch32Mode.Hypervisor: + case Aarch32Mode.System: return RegisterAlias.LrUsr; + case Aarch32Mode.Fiq: return RegisterAlias.LrFiq; + case Aarch32Mode.Irq: return RegisterAlias.LrIrq; + case Aarch32Mode.Supervisor: return RegisterAlias.LrSvc; + case Aarch32Mode.Abort: return RegisterAlias.LrAbt; + case Aarch32Mode.Undefined: return RegisterAlias.LrUnd; + + default: throw new ArgumentException(nameof(mode)); + } + + default: throw new ArgumentOutOfRangeException(nameof(regIndex)); + } + } + + public static void EmitBxWritePc(ArmEmitterContext context, Operand pc) + { + Operand mode = context.BitwiseAnd(pc, Const(1)); + + SetFlag(context, PState.TFlag, mode); + + Operand lblArmMode = Label(); + + context.BranchIfTrue(lblArmMode, mode); + + context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseAnd(pc, Const(~1)))); + + context.MarkLabel(lblArmMode); + + context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseAnd(pc, Const(~3)))); + } + + public static Operand GetIntOrZR(ArmEmitterContext context, int regIndex) + { + if (regIndex == RegisterConsts.ZeroIndex) + { + OperandType type = context.CurrOp.GetOperandType(); + + return type == OperandType.I32 ? Const(0) : Const(0L); + } + else + { + return GetIntOrSP(context, regIndex); + } + } + + public static void SetIntOrZR(ArmEmitterContext context, int regIndex, Operand value) + { + if (regIndex == RegisterConsts.ZeroIndex) + { + return; + } + + SetIntOrSP(context, regIndex, value); + } + + public static Operand GetIntOrSP(ArmEmitterContext context, int regIndex) + { + Operand value = Register(regIndex, RegisterType.Integer, OperandType.I64); + + if (context.CurrOp.RegisterSize == RegisterSize.Int32) + { + value = context.ConvertI64ToI32(value); + } + + return value; + } + + public static void SetIntOrSP(ArmEmitterContext context, int regIndex, Operand value) + { + Operand reg = Register(regIndex, RegisterType.Integer, OperandType.I64); + + if (value.Type == OperandType.I32) + { + value = context.ZeroExtend32(OperandType.I64, value); + } + + context.Copy(reg, value); + } + + public static Operand GetVec(int regIndex) + { + return Register(regIndex, RegisterType.Vector, OperandType.V128); + } + + public static Operand GetFlag(PState stateFlag) + { + return Register((int)stateFlag, RegisterType.Flag, OperandType.I32); + } + + public static void SetFlag(ArmEmitterContext context, PState stateFlag, Operand value) + { + context.Copy(GetFlag(stateFlag), value); + + context.MarkFlagSet(stateFlag); + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitMemory.cs b/ARMeilleure/Instructions/InstEmitMemory.cs new file mode 100644 index 000000000..1d5953fb2 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitMemory.cs @@ -0,0 +1,177 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitMemoryHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void Adr(ArmEmitterContext context) + { + OpCodeAdr op = (OpCodeAdr)context.CurrOp; + + SetIntOrZR(context, op.Rd, Const(op.Address + (ulong)op.Immediate)); + } + + public static void Adrp(ArmEmitterContext context) + { + OpCodeAdr op = (OpCodeAdr)context.CurrOp; + + ulong address = (op.Address & ~0xfffUL) + ((ulong)op.Immediate << 12); + + SetIntOrZR(context, op.Rd, Const(address)); + } + + public static void Ldr(ArmEmitterContext context) => EmitLdr(context, signed: false); + public static void Ldrs(ArmEmitterContext context) => EmitLdr(context, signed: true); + + private static void EmitLdr(ArmEmitterContext context, bool signed) + { + OpCodeMem op = (OpCodeMem)context.CurrOp; + + Operand address = GetAddress(context); + + if (signed && op.Extend64) + { + EmitLoadSx64(context, address, op.Rt, op.Size); + } + else if (signed) + { + EmitLoadSx32(context, address, op.Rt, op.Size); + } + else + { + EmitLoadZx(context, address, op.Rt, op.Size); + } + + EmitWBackIfNeeded(context, address); + } + + public static void Ldr_Literal(ArmEmitterContext context) + { + IOpCodeLit op = (IOpCodeLit)context.CurrOp; + + if (op.Prefetch) + { + return; + } + + if (op.Signed) + { + EmitLoadSx64(context, Const(op.Immediate), op.Rt, op.Size); + } + else + { + EmitLoadZx(context, Const(op.Immediate), op.Rt, op.Size); + } + } + + public static void Ldp(ArmEmitterContext context) + { + OpCodeMemPair op = (OpCodeMemPair)context.CurrOp; + + void EmitLoad(int rt, Operand ldAddr) + { + if (op.Extend64) + { + EmitLoadSx64(context, ldAddr, rt, op.Size); + } + else + { + EmitLoadZx(context, ldAddr, rt, op.Size); + } + } + + Operand address = GetAddress(context); + + Operand address2 = context.Add(address, Const(1L << op.Size)); + + EmitLoad(op.Rt, address); + EmitLoad(op.Rt2, address2); + + EmitWBackIfNeeded(context, address); + } + + public static void Str(ArmEmitterContext context) + { + OpCodeMem op = (OpCodeMem)context.CurrOp; + + Operand address = GetAddress(context); + + InstEmitMemoryHelper.EmitStore(context, address, op.Rt, op.Size); + + EmitWBackIfNeeded(context, address); + } + + public static void Stp(ArmEmitterContext context) + { + OpCodeMemPair op = (OpCodeMemPair)context.CurrOp; + + Operand address = GetAddress(context); + + Operand address2 = context.Add(address, Const(1L << op.Size)); + + InstEmitMemoryHelper.EmitStore(context, address, op.Rt, op.Size); + InstEmitMemoryHelper.EmitStore(context, address2, op.Rt2, op.Size); + + EmitWBackIfNeeded(context, address); + } + + private static Operand GetAddress(ArmEmitterContext context) + { + Operand address = null; + + switch (context.CurrOp) + { + case OpCodeMemImm op: + { + address = context.Copy(GetIntOrSP(context, op.Rn)); + + // Pre-indexing. + if (!op.PostIdx) + { + address = context.Add(address, Const(op.Immediate)); + } + + break; + } + + case OpCodeMemReg op: + { + Operand n = GetIntOrSP(context, op.Rn); + + Operand m = GetExtendedM(context, op.Rm, op.IntType); + + if (op.Shift) + { + m = context.ShiftLeft(m, Const(op.Size)); + } + + address = context.Add(n, m); + + break; + } + } + + return address; + } + + private static void EmitWBackIfNeeded(ArmEmitterContext context, Operand address) + { + // Check whenever the current OpCode has post-indexed write back, if so write it. + if (context.CurrOp is OpCodeMemImm op && op.WBack) + { + if (op.PostIdx) + { + address = context.Add(address, Const(op.Immediate)); + } + + SetIntOrSP(context, op.Rn, address); + } + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitMemory32.cs b/ARMeilleure/Instructions/InstEmitMemory32.cs new file mode 100644 index 000000000..002d2c5c6 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitMemory32.cs @@ -0,0 +1,256 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitMemoryHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + private const int ByteSizeLog2 = 0; + private const int HWordSizeLog2 = 1; + private const int WordSizeLog2 = 2; + private const int DWordSizeLog2 = 3; + + [Flags] + enum AccessType + { + Store = 0, + Signed = 1, + Load = 2, + + LoadZx = Load, + LoadSx = Load | Signed, + } + + public static void Ldm(ArmEmitterContext context) + { + OpCode32MemMult op = (OpCode32MemMult)context.CurrOp; + + Operand n = GetIntA32(context, op.Rn); + + Operand baseAddress = context.Add(n, Const(op.Offset)); + + bool writesToPc = (op.RegisterMask & (1 << RegisterAlias.Aarch32Pc)) != 0; + + bool writeBack = op.PostOffset != 0 && (op.Rn != RegisterAlias.Aarch32Pc || !writesToPc); + + if (writeBack) + { + SetIntA32(context, op.Rn, context.Add(n, Const(op.PostOffset))); + } + + int mask = op.RegisterMask; + int offset = 0; + + for (int register = 0; mask != 0; mask >>= 1, register++) + { + if ((mask & 1) != 0) + { + Operand address = context.Add(baseAddress, Const(offset)); + + EmitLoadZx(context, address, register, WordSizeLog2); + + offset += 4; + } + } + } + + public static void Ldr(ArmEmitterContext context) + { + EmitLoadOrStore(context, WordSizeLog2, AccessType.LoadZx); + } + + public static void Ldrb(ArmEmitterContext context) + { + EmitLoadOrStore(context, ByteSizeLog2, AccessType.LoadZx); + } + + public static void Ldrd(ArmEmitterContext context) + { + EmitLoadOrStore(context, DWordSizeLog2, AccessType.LoadZx); + } + + public static void Ldrh(ArmEmitterContext context) + { + EmitLoadOrStore(context, HWordSizeLog2, AccessType.LoadZx); + } + + public static void Ldrsb(ArmEmitterContext context) + { + EmitLoadOrStore(context, ByteSizeLog2, AccessType.LoadSx); + } + + public static void Ldrsh(ArmEmitterContext context) + { + EmitLoadOrStore(context, HWordSizeLog2, AccessType.LoadSx); + } + + public static void Stm(ArmEmitterContext context) + { + OpCode32MemMult op = (OpCode32MemMult)context.CurrOp; + + Operand n = GetIntA32(context, op.Rn); + + Operand baseAddress = context.Add(n, Const(op.Offset)); + + int mask = op.RegisterMask; + int offset = 0; + + for (int register = 0; mask != 0; mask >>= 1, register++) + { + if ((mask & 1) != 0) + { + Operand address = context.Add(baseAddress, Const(offset)); + + EmitStore(context, address, register, WordSizeLog2); + + // Note: If Rn is also specified on the register list, + // and Rn is the first register on this list, then the + // value that is written to memory is the unmodified value, + // before the write back. If it is on the list, but it's + // not the first one, then the value written to memory + // varies between CPUs. + if (offset == 0 && op.PostOffset != 0) + { + // Emit write back after the first write. + SetIntA32(context, op.Rn, context.Add(n, Const(op.PostOffset))); + } + + offset += 4; + } + } + } + + public static void Str(ArmEmitterContext context) + { + EmitLoadOrStore(context, WordSizeLog2, AccessType.Store); + } + + public static void Strb(ArmEmitterContext context) + { + EmitLoadOrStore(context, ByteSizeLog2, AccessType.Store); + } + + public static void Strd(ArmEmitterContext context) + { + EmitLoadOrStore(context, DWordSizeLog2, AccessType.Store); + } + + public static void Strh(ArmEmitterContext context) + { + EmitLoadOrStore(context, HWordSizeLog2, AccessType.Store); + } + + private static void EmitLoadOrStore(ArmEmitterContext context, int size, AccessType accType) + { + OpCode32Mem op = (OpCode32Mem)context.CurrOp; + + Operand n = context.Copy(GetIntA32(context, op.Rn)); + + Operand temp = null; + + if (op.Index || op.WBack) + { + temp = op.Add + ? context.Add (n, Const(op.Immediate)) + : context.Subtract(n, Const(op.Immediate)); + } + + if (op.WBack) + { + SetIntA32(context, op.Rn, temp); + } + + Operand address; + + if (op.Index) + { + address = temp; + } + else + { + address = n; + } + + if ((accType & AccessType.Load) != 0) + { + void Load(int rt, int offs, int loadSize) + { + Operand addr = context.Add(address, Const(offs)); + + if ((accType & AccessType.Signed) != 0) + { + EmitLoadSx32(context, addr, rt, loadSize); + } + else + { + EmitLoadZx(context, addr, rt, loadSize); + } + } + + if (size == DWordSizeLog2) + { + Operand lblBigEndian = Label(); + Operand lblEnd = Label(); + + context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag)); + + Load(op.Rt, 0, WordSizeLog2); + Load(op.Rt | 1, 4, WordSizeLog2); + + context.Branch(lblEnd); + + context.MarkLabel(lblBigEndian); + + Load(op.Rt | 1, 0, WordSizeLog2); + Load(op.Rt, 4, WordSizeLog2); + + context.MarkLabel(lblEnd); + } + else + { + Load(op.Rt, 0, size); + } + } + else + { + void Store(int rt, int offs, int storeSize) + { + Operand addr = context.Add(address, Const(offs)); + + EmitStore(context, addr, rt, storeSize); + } + + if (size == DWordSizeLog2) + { + Operand lblBigEndian = Label(); + Operand lblEnd = Label(); + + context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag)); + + Store(op.Rt, 0, WordSizeLog2); + Store(op.Rt | 1, 4, WordSizeLog2); + + context.Branch(lblEnd); + + context.MarkLabel(lblBigEndian); + + Store(op.Rt | 1, 0, WordSizeLog2); + Store(op.Rt, 4, WordSizeLog2); + + context.MarkLabel(lblEnd); + } + else + { + Store(op.Rt, 0, size); + } + } + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitMemoryEx.cs b/ARMeilleure/Instructions/InstEmitMemoryEx.cs new file mode 100644 index 000000000..bcca7619d --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitMemoryEx.cs @@ -0,0 +1,261 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Diagnostics; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + [Flags] + private enum AccessType + { + None = 0, + Ordered = 1, + Exclusive = 2, + OrderedEx = Ordered | Exclusive + } + + public static void Clrex(ArmEmitterContext context) + { + context.Call(new _Void(NativeInterface.ClearExclusive)); + } + + public static void Dmb(ArmEmitterContext context) => EmitBarrier(context); + public static void Dsb(ArmEmitterContext context) => EmitBarrier(context); + + public static void Ldar(ArmEmitterContext context) => EmitLdr(context, AccessType.Ordered); + public static void Ldaxr(ArmEmitterContext context) => EmitLdr(context, AccessType.OrderedEx); + public static void Ldxr(ArmEmitterContext context) => EmitLdr(context, AccessType.Exclusive); + public static void Ldxp(ArmEmitterContext context) => EmitLdp(context, AccessType.Exclusive); + public static void Ldaxp(ArmEmitterContext context) => EmitLdp(context, AccessType.OrderedEx); + + private static void EmitLdr(ArmEmitterContext context, AccessType accType) + { + EmitLoadEx(context, accType, pair: false); + } + + private static void EmitLdp(ArmEmitterContext context, AccessType accType) + { + EmitLoadEx(context, accType, pair: true); + } + + private static void EmitLoadEx(ArmEmitterContext context, AccessType accType, bool pair) + { + OpCodeMemEx op = (OpCodeMemEx)context.CurrOp; + + bool ordered = (accType & AccessType.Ordered) != 0; + bool exclusive = (accType & AccessType.Exclusive) != 0; + + if (ordered) + { + EmitBarrier(context); + } + + Operand address = context.Copy(GetIntOrSP(context, op.Rn)); + + if (pair) + { + // Exclusive loads should be atomic. For pairwise loads, we need to + // read all the data at once. For a 32-bits pairwise load, we do a + // simple 64-bits load, for a 128-bits load, we need to call a special + // method to read 128-bits atomically. + if (op.Size == 2) + { + Operand value = EmitLoad(context, address, exclusive, 3); + + Operand valueLow = context.ConvertI64ToI32(value); + + valueLow = context.ZeroExtend32(OperandType.I64, valueLow); + + Operand valueHigh = context.ShiftRightUI(value, Const(32)); + + SetIntOrZR(context, op.Rt, valueLow); + SetIntOrZR(context, op.Rt2, valueHigh); + } + else if (op.Size == 3) + { + Operand value = EmitLoad(context, address, exclusive, 4); + + Operand valueLow = context.VectorExtract(OperandType.I64, value, 0); + Operand valueHigh = context.VectorExtract(OperandType.I64, value, 1); + + SetIntOrZR(context, op.Rt, valueLow); + SetIntOrZR(context, op.Rt2, valueHigh); + } + else + { + throw new InvalidOperationException($"Invalid load size of {1 << op.Size} bytes."); + } + } + else + { + // 8, 16, 32 or 64-bits (non-pairwise) load. + Operand value = EmitLoad(context, address, exclusive, op.Size); + + SetIntOrZR(context, op.Rt, value); + } + } + + private static Operand EmitLoad( + ArmEmitterContext context, + Operand address, + bool exclusive, + int size) + { + Delegate fallbackMethodDlg = null; + + if (exclusive) + { + switch (size) + { + case 0: fallbackMethodDlg = new _U8_U64 (NativeInterface.ReadByteExclusive); break; + case 1: fallbackMethodDlg = new _U16_U64 (NativeInterface.ReadUInt16Exclusive); break; + case 2: fallbackMethodDlg = new _U32_U64 (NativeInterface.ReadUInt32Exclusive); break; + case 3: fallbackMethodDlg = new _U64_U64 (NativeInterface.ReadUInt64Exclusive); break; + case 4: fallbackMethodDlg = new _V128_U64(NativeInterface.ReadVector128Exclusive); break; + } + } + else + { + switch (size) + { + case 0: fallbackMethodDlg = new _U8_U64 (NativeInterface.ReadByte); break; + case 1: fallbackMethodDlg = new _U16_U64 (NativeInterface.ReadUInt16); break; + case 2: fallbackMethodDlg = new _U32_U64 (NativeInterface.ReadUInt32); break; + case 3: fallbackMethodDlg = new _U64_U64 (NativeInterface.ReadUInt64); break; + case 4: fallbackMethodDlg = new _V128_U64(NativeInterface.ReadVector128); break; + } + } + + return context.Call(fallbackMethodDlg, address); + } + + public static void Pfrm(ArmEmitterContext context) + { + // Memory Prefetch, execute as no-op. + } + + public static void Stlr(ArmEmitterContext context) => EmitStr(context, AccessType.Ordered); + public static void Stlxr(ArmEmitterContext context) => EmitStr(context, AccessType.OrderedEx); + public static void Stxr(ArmEmitterContext context) => EmitStr(context, AccessType.Exclusive); + public static void Stxp(ArmEmitterContext context) => EmitStp(context, AccessType.Exclusive); + public static void Stlxp(ArmEmitterContext context) => EmitStp(context, AccessType.OrderedEx); + + private static void EmitStr(ArmEmitterContext context, AccessType accType) + { + EmitStoreEx(context, accType, pair: false); + } + + private static void EmitStp(ArmEmitterContext context, AccessType accType) + { + EmitStoreEx(context, accType, pair: true); + } + + private static void EmitStoreEx(ArmEmitterContext context, AccessType accType, bool pair) + { + OpCodeMemEx op = (OpCodeMemEx)context.CurrOp; + + bool ordered = (accType & AccessType.Ordered) != 0; + bool exclusive = (accType & AccessType.Exclusive) != 0; + + if (ordered) + { + EmitBarrier(context); + } + + Operand address = context.Copy(GetIntOrSP(context, op.Rn)); + + Operand t = GetIntOrZR(context, op.Rt); + + Operand s = null; + + if (pair) + { + Debug.Assert(op.Size == 2 || op.Size == 3, "Invalid size for pairwise store."); + + Operand t2 = GetIntOrZR(context, op.Rt2); + + Operand value; + + if (op.Size == 2) + { + value = context.BitwiseOr(t, context.ShiftLeft(t2, Const(32))); + } + else /* if (op.Size == 3) */ + { + value = context.VectorInsert(context.VectorZero(), t, 0); + value = context.VectorInsert(value, t2, 1); + } + + s = EmitStore(context, address, value, exclusive, op.Size + 1); + } + else + { + s = EmitStore(context, address, t, exclusive, op.Size); + } + + if (s != null) + { + // This is only needed for exclusive stores. The function returns 0 + // when the store is successful, and 1 otherwise. + SetIntOrZR(context, op.Rs, s); + } + } + + private static Operand EmitStore( + ArmEmitterContext context, + Operand address, + Operand value, + bool exclusive, + int size) + { + if (size < 3) + { + value = context.ConvertI64ToI32(value); + } + + Delegate fallbackMethodDlg = null; + + if (exclusive) + { + switch (size) + { + case 0: fallbackMethodDlg = new _S32_U64_U8 (NativeInterface.WriteByteExclusive); break; + case 1: fallbackMethodDlg = new _S32_U64_U16 (NativeInterface.WriteUInt16Exclusive); break; + case 2: fallbackMethodDlg = new _S32_U64_U32 (NativeInterface.WriteUInt32Exclusive); break; + case 3: fallbackMethodDlg = new _S32_U64_U64 (NativeInterface.WriteUInt64Exclusive); break; + case 4: fallbackMethodDlg = new _S32_U64_V128(NativeInterface.WriteVector128Exclusive); break; + } + + return context.Call(fallbackMethodDlg, address, value); + } + else + { + switch (size) + { + case 0: fallbackMethodDlg = new _Void_U64_U8 (NativeInterface.WriteByte); break; + case 1: fallbackMethodDlg = new _Void_U64_U16 (NativeInterface.WriteUInt16); break; + case 2: fallbackMethodDlg = new _Void_U64_U32 (NativeInterface.WriteUInt32); break; + case 3: fallbackMethodDlg = new _Void_U64_U64 (NativeInterface.WriteUInt64); break; + case 4: fallbackMethodDlg = new _Void_U64_V128(NativeInterface.WriteVector128); break; + } + + context.Call(fallbackMethodDlg, address, value); + + return null; + } + } + + private static void EmitBarrier(ArmEmitterContext context) + { + // Note: This barrier is most likely not necessary, and probably + // doesn't make any difference since we need to do a ton of stuff + // (software MMU emulation) to read or write anything anyway. + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitMemoryHelper.cs b/ARMeilleure/Instructions/InstEmitMemoryHelper.cs new file mode 100644 index 000000000..0ae5e3f26 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitMemoryHelper.cs @@ -0,0 +1,512 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Memory; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static class InstEmitMemoryHelper + { + private enum Extension + { + Zx, + Sx32, + Sx64 + } + + public static void EmitLoadZx(ArmEmitterContext context, Operand address, int rt, int size) + { + EmitLoad(context, address, Extension.Zx, rt, size); + } + + public static void EmitLoadSx32(ArmEmitterContext context, Operand address, int rt, int size) + { + EmitLoad(context, address, Extension.Sx32, rt, size); + } + + public static void EmitLoadSx64(ArmEmitterContext context, Operand address, int rt, int size) + { + EmitLoad(context, address, Extension.Sx64, rt, size); + } + + private static void EmitLoad(ArmEmitterContext context, Operand address, Extension ext, int rt, int size) + { + bool isSimd = IsSimd(context); + + if ((uint)size > (isSimd ? 4 : 3)) + { + throw new ArgumentOutOfRangeException(nameof(size)); + } + + if (isSimd) + { + EmitReadVector(context, address, context.VectorZero(), rt, 0, size); + } + else + { + EmitReadInt(context, address, rt, size); + } + + if (!isSimd) + { + Operand value = GetIntOrZR(context, rt); + + if (ext == Extension.Sx32 || ext == Extension.Sx64) + { + OperandType destType = ext == Extension.Sx64 ? OperandType.I64 : OperandType.I32; + + switch (size) + { + case 0: value = context.SignExtend8 (destType, value); break; + case 1: value = context.SignExtend16(destType, value); break; + case 2: value = context.SignExtend32(destType, value); break; + } + } + + SetIntOrZR(context, rt, value); + } + } + + public static void EmitLoadSimd( + ArmEmitterContext context, + Operand address, + Operand vector, + int rt, + int elem, + int size) + { + EmitReadVector(context, address, vector, rt, elem, size); + } + + public static void EmitStore(ArmEmitterContext context, Operand address, int rt, int size) + { + bool isSimd = IsSimd(context); + + if ((uint)size > (isSimd ? 4 : 3)) + { + throw new ArgumentOutOfRangeException(nameof(size)); + } + + if (isSimd) + { + EmitWriteVector(context, address, rt, 0, size); + } + else + { + EmitWriteInt(context, address, rt, size); + } + } + + public static void EmitStoreSimd( + ArmEmitterContext context, + Operand address, + int rt, + int elem, + int size) + { + EmitWriteVector(context, address, rt, elem, size); + } + + private static bool IsSimd(ArmEmitterContext context) + { + return context.CurrOp is IOpCodeSimd && + !(context.CurrOp is OpCodeSimdMemMs || + context.CurrOp is OpCodeSimdMemSs); + } + + private static void EmitReadInt(ArmEmitterContext context, Operand address, int rt, int size) + { + Operand isUnalignedAddr = EmitAddressCheck(context, address, size); + + Operand lblFastPath = Label(); + Operand lblSlowPath = Label(); + Operand lblEnd = Label(); + + context.BranchIfFalse(lblFastPath, isUnalignedAddr); + + context.MarkLabel(lblSlowPath); + + EmitReadIntFallback(context, address, rt, size); + + context.Branch(lblEnd); + + context.MarkLabel(lblFastPath); + + Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath); + + Operand value = null; + + switch (size) + { + case 0: + value = context.Load8(physAddr); + break; + + case 1: + value = context.Load16(physAddr); + break; + + case 2: + value = context.Load(OperandType.I32, physAddr); + break; + + case 3: + value = context.Load(OperandType.I64, physAddr); + break; + } + + SetInt(context, rt, value); + + context.MarkLabel(lblEnd); + } + + private static void EmitReadVector( + ArmEmitterContext context, + Operand address, + Operand vector, + int rt, + int elem, + int size) + { + Operand isUnalignedAddr = EmitAddressCheck(context, address, size); + + Operand lblFastPath = Label(); + Operand lblSlowPath = Label(); + Operand lblEnd = Label(); + + context.BranchIfFalse(lblFastPath, isUnalignedAddr); + + context.MarkLabel(lblSlowPath); + + EmitReadVectorFallback(context, address, vector, rt, elem, size); + + context.Branch(lblEnd); + + context.MarkLabel(lblFastPath); + + Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath); + + Operand value = null; + + switch (size) + { + case 0: + value = context.VectorInsert8(vector, context.Load8(physAddr), elem); + break; + + case 1: + value = context.VectorInsert16(vector, context.Load16(physAddr), elem); + break; + + case 2: + value = context.VectorInsert(vector, context.Load(OperandType.I32, physAddr), elem); + break; + + case 3: + value = context.VectorInsert(vector, context.Load(OperandType.I64, physAddr), elem); + break; + + case 4: + value = context.Load(OperandType.V128, physAddr); + break; + } + + context.Copy(GetVec(rt), value); + + context.MarkLabel(lblEnd); + } + + private static Operand VectorCreate(ArmEmitterContext context, Operand value) + { + return context.VectorInsert(context.VectorZero(), value, 0); + } + + private static void EmitWriteInt(ArmEmitterContext context, Operand address, int rt, int size) + { + Operand isUnalignedAddr = EmitAddressCheck(context, address, size); + + Operand lblFastPath = Label(); + Operand lblSlowPath = Label(); + Operand lblEnd = Label(); + + context.BranchIfFalse(lblFastPath, isUnalignedAddr); + + context.MarkLabel(lblSlowPath); + + EmitWriteIntFallback(context, address, rt, size); + + context.Branch(lblEnd); + + context.MarkLabel(lblFastPath); + + Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath); + + Operand value = GetInt(context, rt); + + if (size < 3 && value.Type == OperandType.I64) + { + value = context.ConvertI64ToI32(value); + } + + switch (size) + { + case 0: context.Store8 (physAddr, value); break; + case 1: context.Store16(physAddr, value); break; + case 2: context.Store (physAddr, value); break; + case 3: context.Store (physAddr, value); break; + } + + context.MarkLabel(lblEnd); + } + + private static void EmitWriteVector( + ArmEmitterContext context, + Operand address, + int rt, + int elem, + int size) + { + Operand isUnalignedAddr = EmitAddressCheck(context, address, size); + + Operand lblFastPath = Label(); + Operand lblSlowPath = Label(); + Operand lblEnd = Label(); + + context.BranchIfFalse(lblFastPath, isUnalignedAddr); + + context.MarkLabel(lblSlowPath); + + EmitWriteVectorFallback(context, address, rt, elem, size); + + context.Branch(lblEnd); + + context.MarkLabel(lblFastPath); + + Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath); + + Operand value = GetVec(rt); + + switch (size) + { + case 0: + context.Store8(physAddr, context.VectorExtract8(value, elem)); + break; + + case 1: + context.Store16(physAddr, context.VectorExtract16(value, elem)); + break; + + case 2: + context.Store(physAddr, context.VectorExtract(OperandType.FP32, value, elem)); + break; + + case 3: + context.Store(physAddr, context.VectorExtract(OperandType.FP64, value, elem)); + break; + + case 4: + context.Store(physAddr, value); + break; + } + + context.MarkLabel(lblEnd); + } + + private static Operand EmitAddressCheck(ArmEmitterContext context, Operand address, int size) + { + long addressCheckMask = ~(context.Memory.AddressSpaceSize - 1); + + addressCheckMask |= (1u << size) - 1; + + return context.BitwiseAnd(address, Const(address.Type, addressCheckMask)); + } + + private static Operand EmitPtPointerLoad(ArmEmitterContext context, Operand address, Operand lblFallbackPath) + { + Operand pte = Const(context.Memory.PageTable.ToInt64()); + + int bit = MemoryManager.PageBits; + + do + { + Operand addrPart = context.ShiftRightUI(address, Const(bit)); + + bit += context.Memory.PtLevelBits; + + if (bit < context.Memory.AddressSpaceBits) + { + addrPart = context.BitwiseAnd(addrPart, Const(addrPart.Type, context.Memory.PtLevelMask)); + } + + Operand pteOffset = context.ShiftLeft(addrPart, Const(3)); + + if (pteOffset.Type == OperandType.I32) + { + pteOffset = context.ZeroExtend32(OperandType.I64, pteOffset); + } + + Operand pteAddress = context.Add(pte, pteOffset); + + pte = context.Load(OperandType.I64, pteAddress); + } + while (bit < context.Memory.AddressSpaceBits); + + if (!context.Memory.HasWriteWatchSupport) + { + Operand hasFlagSet = context.BitwiseAnd(pte, Const((long)MemoryManager.PteFlagsMask)); + + context.BranchIfTrue(lblFallbackPath, hasFlagSet); + } + + Operand pageOffset = context.BitwiseAnd(address, Const(address.Type, MemoryManager.PageMask)); + + if (pageOffset.Type == OperandType.I32) + { + pageOffset = context.ZeroExtend32(OperandType.I64, pageOffset); + } + + Operand physAddr = context.Add(pte, pageOffset); + + return physAddr; + } + + private static void EmitReadIntFallback(ArmEmitterContext context, Operand address, int rt, int size) + { + Delegate fallbackMethodDlg = null; + + switch (size) + { + case 0: fallbackMethodDlg = new _U8_U64 (NativeInterface.ReadByte); break; + case 1: fallbackMethodDlg = new _U16_U64(NativeInterface.ReadUInt16); break; + case 2: fallbackMethodDlg = new _U32_U64(NativeInterface.ReadUInt32); break; + case 3: fallbackMethodDlg = new _U64_U64(NativeInterface.ReadUInt64); break; + } + + SetInt(context, rt, context.Call(fallbackMethodDlg, address)); + } + + private static void EmitReadVectorFallback( + ArmEmitterContext context, + Operand address, + Operand vector, + int rt, + int elem, + int size) + { + Delegate fallbackMethodDlg = null; + + switch (size) + { + case 0: fallbackMethodDlg = new _U8_U64 (NativeInterface.ReadByte); break; + case 1: fallbackMethodDlg = new _U16_U64 (NativeInterface.ReadUInt16); break; + case 2: fallbackMethodDlg = new _U32_U64 (NativeInterface.ReadUInt32); break; + case 3: fallbackMethodDlg = new _U64_U64 (NativeInterface.ReadUInt64); break; + case 4: fallbackMethodDlg = new _V128_U64(NativeInterface.ReadVector128); break; + } + + Operand value = context.Call(fallbackMethodDlg, address); + + switch (size) + { + case 0: value = context.VectorInsert8 (vector, value, elem); break; + case 1: value = context.VectorInsert16(vector, value, elem); break; + case 2: value = context.VectorInsert (vector, value, elem); break; + case 3: value = context.VectorInsert (vector, value, elem); break; + } + + context.Copy(GetVec(rt), value); + } + + private static void EmitWriteIntFallback(ArmEmitterContext context, Operand address, int rt, int size) + { + Delegate fallbackMethodDlg = null; + + switch (size) + { + case 0: fallbackMethodDlg = new _Void_U64_U8 (NativeInterface.WriteByte); break; + case 1: fallbackMethodDlg = new _Void_U64_U16(NativeInterface.WriteUInt16); break; + case 2: fallbackMethodDlg = new _Void_U64_U32(NativeInterface.WriteUInt32); break; + case 3: fallbackMethodDlg = new _Void_U64_U64(NativeInterface.WriteUInt64); break; + } + + Operand value = GetInt(context, rt); + + if (size < 3 && value.Type == OperandType.I64) + { + value = context.ConvertI64ToI32(value); + } + + context.Call(fallbackMethodDlg, address, value); + } + + private static void EmitWriteVectorFallback( + ArmEmitterContext context, + Operand address, + int rt, + int elem, + int size) + { + Delegate fallbackMethodDlg = null; + + switch (size) + { + case 0: fallbackMethodDlg = new _Void_U64_U8 (NativeInterface.WriteByte); break; + case 1: fallbackMethodDlg = new _Void_U64_U16 (NativeInterface.WriteUInt16); break; + case 2: fallbackMethodDlg = new _Void_U64_U32 (NativeInterface.WriteUInt32); break; + case 3: fallbackMethodDlg = new _Void_U64_U64 (NativeInterface.WriteUInt64); break; + case 4: fallbackMethodDlg = new _Void_U64_V128(NativeInterface.WriteVector128); break; + } + + Operand value = null; + + if (size < 4) + { + switch (size) + { + case 0: + value = context.VectorExtract8(GetVec(rt), elem); + break; + + case 1: + value = context.VectorExtract16(GetVec(rt), elem); + break; + + case 2: + value = context.VectorExtract(OperandType.I32, GetVec(rt), elem); + break; + + case 3: + value = context.VectorExtract(OperandType.I64, GetVec(rt), elem); + break; + } + } + else + { + value = GetVec(rt); + } + + context.Call(fallbackMethodDlg, address, value); + } + + private static Operand GetInt(ArmEmitterContext context, int rt) + { + return context.CurrOp is OpCode32 ? GetIntA32(context, rt) : GetIntOrZR(context, rt); + } + + private static void SetInt(ArmEmitterContext context, int rt, Operand value) + { + if (context.CurrOp is OpCode32) + { + SetIntA32(context, rt, value); + } + else + { + SetIntOrZR(context, rt, value); + } + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitMove.cs b/ARMeilleure/Instructions/InstEmitMove.cs new file mode 100644 index 000000000..bf051f329 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitMove.cs @@ -0,0 +1,41 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void Movk(ArmEmitterContext context) + { + OpCodeMov op = (OpCodeMov)context.CurrOp; + + OperandType type = op.GetOperandType(); + + Operand res = GetIntOrZR(context, op.Rd); + + res = context.BitwiseAnd(res, Const(type, ~(0xffffL << op.Bit))); + + res = context.BitwiseOr(res, Const(type, op.Immediate)); + + SetIntOrZR(context, op.Rd, res); + } + + public static void Movn(ArmEmitterContext context) + { + OpCodeMov op = (OpCodeMov)context.CurrOp; + + SetIntOrZR(context, op.Rd, Const(op.GetOperandType(), ~op.Immediate)); + } + + public static void Movz(ArmEmitterContext context) + { + OpCodeMov op = (OpCodeMov)context.CurrOp; + + SetIntOrZR(context, op.Rd, Const(op.GetOperandType(), op.Immediate)); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitMul.cs b/ARMeilleure/Instructions/InstEmitMul.cs new file mode 100644 index 000000000..65d11b30d --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitMul.cs @@ -0,0 +1,100 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.Instructions.InstEmitHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void Madd(ArmEmitterContext context) => EmitMul(context, isAdd: true); + public static void Msub(ArmEmitterContext context) => EmitMul(context, isAdd: false); + + private static void EmitMul(ArmEmitterContext context, bool isAdd) + { + OpCodeMul op = (OpCodeMul)context.CurrOp; + + Operand a = GetIntOrZR(context, op.Ra); + Operand n = GetIntOrZR(context, op.Rn); + Operand m = GetIntOrZR(context, op.Rm); + + Operand res = context.Multiply(n, m); + + res = isAdd ? context.Add(a, res) : context.Subtract(a, res); + + SetIntOrZR(context, op.Rd, res); + } + + public static void Smaddl(ArmEmitterContext context) => EmitMull(context, MullFlags.SignedAdd); + public static void Smsubl(ArmEmitterContext context) => EmitMull(context, MullFlags.SignedSubtract); + public static void Umaddl(ArmEmitterContext context) => EmitMull(context, MullFlags.Add); + public static void Umsubl(ArmEmitterContext context) => EmitMull(context, MullFlags.Subtract); + + [Flags] + private enum MullFlags + { + Subtract = 0, + Add = 1 << 0, + Signed = 1 << 1, + + SignedAdd = Signed | Add, + SignedSubtract = Signed | Subtract + } + + private static void EmitMull(ArmEmitterContext context, MullFlags flags) + { + OpCodeMul op = (OpCodeMul)context.CurrOp; + + Operand GetExtendedRegister32(int index) + { + Operand value = GetIntOrZR(context, index); + + if ((flags & MullFlags.Signed) != 0) + { + return context.SignExtend32(value.Type, value); + } + else + { + return context.ZeroExtend32(value.Type, value); + } + } + + Operand a = GetIntOrZR(context, op.Ra); + + Operand n = GetExtendedRegister32(op.Rn); + Operand m = GetExtendedRegister32(op.Rm); + + Operand res = context.Multiply(n, m); + + res = (flags & MullFlags.Add) != 0 ? context.Add(a, res) : context.Subtract(a, res); + + SetIntOrZR(context, op.Rd, res); + } + + public static void Smulh(ArmEmitterContext context) + { + OpCodeMul op = (OpCodeMul)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + Operand m = GetIntOrZR(context, op.Rm); + + Operand d = context.Multiply64HighSI(n, m); + + SetIntOrZR(context, op.Rd, d); + } + + public static void Umulh(ArmEmitterContext context) + { + OpCodeMul op = (OpCodeMul)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + Operand m = GetIntOrZR(context, op.Rm); + + Operand d = context.Multiply64HighUI(n, m); + + SetIntOrZR(context, op.Rd, d); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs new file mode 100644 index 000000000..44659e805 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs @@ -0,0 +1,3159 @@ +// https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h +// https://www.agner.org/optimize/#vectorclass @ vectori128.h + +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + using Func2I = Func; + + static partial class InstEmit + { + public static void Abs_S(ArmEmitterContext context) + { + EmitScalarUnaryOpSx(context, (op1) => EmitAbs(context, op1)); + } + + public static void Abs_V(ArmEmitterContext context) + { + EmitVectorUnaryOpSx(context, (op1) => EmitAbs(context, op1)); + } + + public static void Add_S(ArmEmitterContext context) + { + EmitScalarBinaryOpZx(context, (op1, op2) => context.Add(op1, op2)); + } + + public static void Add_V(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic addInst = X86PaddInstruction[op.Size]; + + Operand res = context.AddIntrinsic(addInst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => context.Add(op1, op2)); + } + } + + public static void Addhn_V(ArmEmitterContext context) + { + EmitHighNarrow(context, (op1, op2) => context.Add(op1, op2), round: false); + } + + public static void Addp_S(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand ne0 = EmitVectorExtractZx(context, op.Rn, 0, op.Size); + Operand ne1 = EmitVectorExtractZx(context, op.Rn, 1, op.Size); + + Operand res = context.Add(ne0, ne1); + + context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, op.Size)); + } + + public static void Addp_V(ArmEmitterContext context) + { + EmitVectorPairwiseOpZx(context, (op1, op2) => context.Add(op1, op2)); + } + + public static void Addv_V(ArmEmitterContext context) + { + EmitVectorAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2)); + } + + public static void Cls_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + int eSize = 8 << op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + + Operand de = context.Call(new _U64_U64_S32(SoftFallback.CountLeadingSigns), ne, Const(eSize)); + + res = EmitVectorInsert(context, res, de, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Clz_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + int eSize = 8 << op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + + Operand de; + + if (eSize == 64) + { + de = context.CountLeadingZeros(ne); + } + else + { + de = context.Call(new _U64_U64_S32(SoftFallback.CountLeadingZeros), ne, Const(eSize)); + } + + res = EmitVectorInsert(context, res, de, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Cnt_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.RegisterSize == RegisterSize.Simd128 ? 16 : 8; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, 0); + + Operand de; + + if (Optimizations.UsePopCnt) + { + de = context.AddIntrinsicLong(Intrinsic.X86Popcnt, ne); + } + else + { + de = context.Call(new _U64_U64(SoftFallback.CountSetBits8), ne); + } + + res = EmitVectorInsert(context, res, de, index, 0); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Fabd_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand res = context.AddIntrinsic(Intrinsic.X86Subss, GetVec(op.Rn), GetVec(op.Rm)); + + Operand mask = X86GetScalar(context, -0f); + + res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, res); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); + } + else /* if (sizeF == 1) */ + { + Operand res = context.AddIntrinsic(Intrinsic.X86Subsd, GetVec(op.Rn), GetVec(op.Rm)); + + Operand mask = X86GetScalar(context, -0d); + + res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, res); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res)); + } + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + Operand res = EmitSoftFloatCall(context, SoftFloat32.FPSub, SoftFloat64.FPSub, op1, op2); + + return EmitUnaryMathCall(context, MathF.Abs, Math.Abs, res); + }); + } + } + + public static void Fabd_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand res = context.AddIntrinsic(Intrinsic.X86Subps, GetVec(op.Rn), GetVec(op.Rm)); + + Operand mask = X86GetAllElements(context, -0f); + + res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + Operand res = context.AddIntrinsic(Intrinsic.X86Subpd, GetVec(op.Rn), GetVec(op.Rm)); + + Operand mask = X86GetAllElements(context, -0d); + + res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, res); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + Operand res = EmitSoftFloatCall(context, SoftFloat32.FPSub, SoftFloat64.FPSub, op1, op2); + + return EmitUnaryMathCall(context, MathF.Abs, Math.Abs, res); + }); + } + } + + public static void Fabs_S(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + if (op.Size == 0) + { + Operand mask = X86GetScalar(context, -0f); + + Operand res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, GetVec(op.Rn)); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); + } + else /* if (op.Size == 1) */ + { + Operand mask = X86GetScalar(context, -0d); + + Operand res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, GetVec(op.Rn)); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res)); + } + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitUnaryMathCall(context, MathF.Abs, Math.Abs, op1); + }); + } + } + + public static void Fabs_V(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand mask = X86GetAllElements(context, -0f); + + Operand res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, GetVec(op.Rn)); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + Operand mask = X86GetAllElements(context, -0d); + + Operand res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, GetVec(op.Rn)); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitUnaryMathCall(context, MathF.Abs, Math.Abs, op1); + }); + } + } + + public static void Fadd_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarBinaryOpF(context, Intrinsic.X86Addss, Intrinsic.X86Addsd); + } + else if (Optimizations.FastFP) + { + EmitScalarBinaryOpF(context, (op1, op2) => context.Add(op1, op2)); + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPAdd, SoftFloat64.FPAdd, op1, op2); + }); + } + } + + public static void Fadd_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorBinaryOpF(context, Intrinsic.X86Addps, Intrinsic.X86Addpd); + } + else if (Optimizations.FastFP) + { + EmitVectorBinaryOpF(context, (op1, op2) => context.Add(op1, op2)); + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPAdd, SoftFloat64.FPAdd, op1, op2); + }); + } + } + + public static void Faddp_S(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (Optimizations.FastFP && Optimizations.UseSse3) + { + if (sizeF == 0) + { + Operand res = context.AddIntrinsic(Intrinsic.X86Haddps, GetVec(op.Rn), GetVec(op.Rn)); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); + } + else /* if (sizeF == 1) */ + { + Operand res = context.AddIntrinsic(Intrinsic.X86Haddpd, GetVec(op.Rn), GetVec(op.Rn)); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res)); + } + } + else + { + OperandType type = sizeF != 0 ? OperandType.FP64 + : OperandType.FP32; + + Operand ne0 = context.VectorExtract(type, GetVec(op.Rn), 0); + Operand ne1 = context.VectorExtract(type, GetVec(op.Rn), 1); + + Operand res = EmitSoftFloatCall(context, SoftFloat32.FPAdd, SoftFloat64.FPAdd, ne0, ne1); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } + } + + public static void Faddp_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorPairwiseOpF(context, Intrinsic.X86Addps, Intrinsic.X86Addpd); + } + else + { + EmitVectorPairwiseOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPAdd, SoftFloat64.FPAdd, op1, op2); + }); + } + } + + public static void Fdiv_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarBinaryOpF(context, Intrinsic.X86Divss, Intrinsic.X86Divsd); + } + else if (Optimizations.FastFP) + { + EmitScalarBinaryOpF(context, (op1, op2) => context.Divide(op1, op2)); + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPDiv, SoftFloat64.FPDiv, op1, op2); + }); + } + } + + public static void Fdiv_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorBinaryOpF(context, Intrinsic.X86Divps, Intrinsic.X86Divpd); + } + else if (Optimizations.FastFP) + { + EmitVectorBinaryOpF(context, (op1, op2) => context.Divide(op1, op2)); + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPDiv, SoftFloat64.FPDiv, op1, op2); + }); + } + } + + public static void Fmadd_S(ArmEmitterContext context) // Fused. + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand a = GetVec(op.Ra); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.Size == 0) + { + Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m); + + res = context.AddIntrinsic(Intrinsic.X86Addss, a, res); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); + } + else /* if (op.Size == 1) */ + { + Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m); + + res = context.AddIntrinsic(Intrinsic.X86Addsd, a, res); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res)); + } + } + else + { + EmitScalarTernaryRaOpF(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMulAdd, SoftFloat64.FPMulAdd, op1, op2, op3); + }); + } + } + + public static void Fmax_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarBinaryOpF(context, Intrinsic.X86Maxss, Intrinsic.X86Maxsd); + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMax, SoftFloat64.FPMax, op1, op2); + }); + } + } + + public static void Fmax_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorBinaryOpF(context, Intrinsic.X86Maxps, Intrinsic.X86Maxpd); + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMax, SoftFloat64.FPMax, op1, op2); + }); + } + } + + public static void Fmaxnm_S(ArmEmitterContext context) + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMaxNum, SoftFloat64.FPMaxNum, op1, op2); + }); + } + + public static void Fmaxnm_V(ArmEmitterContext context) + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMaxNum, SoftFloat64.FPMaxNum, op1, op2); + }); + } + + public static void Fmaxp_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorPairwiseOpF(context, Intrinsic.X86Maxps, Intrinsic.X86Maxpd); + } + else + { + EmitVectorPairwiseOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMax, SoftFloat64.FPMax, op1, op2); + }); + } + } + + public static void Fmin_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarBinaryOpF(context, Intrinsic.X86Minss, Intrinsic.X86Minsd); + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMin, SoftFloat64.FPMin, op1, op2); + }); + } + } + + public static void Fmin_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorBinaryOpF(context, Intrinsic.X86Minps, Intrinsic.X86Minpd); + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMin, SoftFloat64.FPMin, op1, op2); + }); + } + } + + public static void Fminnm_S(ArmEmitterContext context) + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMinNum, SoftFloat64.FPMinNum, op1, op2); + }); + } + + public static void Fminnm_V(ArmEmitterContext context) + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMinNum, SoftFloat64.FPMinNum, op1, op2); + }); + } + + public static void Fminp_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorPairwiseOpF(context, Intrinsic.X86Minps, Intrinsic.X86Minpd); + } + else + { + EmitVectorPairwiseOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMin, SoftFloat64.FPMin, op1, op2); + }); + } + } + + public static void Fmla_Se(ArmEmitterContext context) // Fused. + { + EmitScalarTernaryOpByElemF(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); + } + + public static void Fmla_V(ArmEmitterContext context) // Fused. + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m); + + res = context.AddIntrinsic(Intrinsic.X86Addps, d, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m); + + res = context.AddIntrinsic(Intrinsic.X86Addpd, d, res); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + EmitVectorTernaryOpF(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMulAdd, SoftFloat64.FPMulAdd, op1, op2, op3); + }); + } + } + + public static void Fmla_Ve(ArmEmitterContext context) // Fused. + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6; + + Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask)); + + res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res); + res = context.AddIntrinsic(Intrinsic.X86Addps, d, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + int shuffleMask = op.Index | op.Index << 1; + + Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask)); + + res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res); + res = context.AddIntrinsic(Intrinsic.X86Addpd, d, res); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + EmitVectorTernaryOpByElemF(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMulAdd, SoftFloat64.FPMulAdd, op1, op2, op3); + }); + } + } + + public static void Fmls_Se(ArmEmitterContext context) // Fused. + { + EmitScalarTernaryOpByElemF(context, (op1, op2, op3) => + { + return context.Subtract(op1, context.Multiply(op2, op3)); + }); + } + + public static void Fmls_V(ArmEmitterContext context) // Fused. + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m); + + res = context.AddIntrinsic(Intrinsic.X86Subps, d, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m); + + res = context.AddIntrinsic(Intrinsic.X86Subpd, d, res); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + EmitVectorTernaryOpF(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMulSub, SoftFloat64.FPMulSub, op1, op2, op3); + }); + } + } + + public static void Fmls_Ve(ArmEmitterContext context) // Fused. + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6; + + Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask)); + + res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res); + res = context.AddIntrinsic(Intrinsic.X86Subps, d, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + int shuffleMask = op.Index | op.Index << 1; + + Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask)); + + res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res); + res = context.AddIntrinsic(Intrinsic.X86Subpd, d, res); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + EmitVectorTernaryOpByElemF(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMulSub, SoftFloat64.FPMulSub, op1, op2, op3); + }); + } + } + + public static void Fmsub_S(ArmEmitterContext context) // Fused. + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand a = GetVec(op.Ra); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.Size == 0) + { + Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m); + + res = context.AddIntrinsic(Intrinsic.X86Subss, a, res); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); + } + else /* if (op.Size == 1) */ + { + Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m); + + res = context.AddIntrinsic(Intrinsic.X86Subsd, a, res); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res)); + } + } + else + { + EmitScalarTernaryRaOpF(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMulSub, SoftFloat64.FPMulSub, op1, op2, op3); + }); + } + } + + public static void Fmul_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarBinaryOpF(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd); + } + else if (Optimizations.FastFP) + { + EmitScalarBinaryOpF(context, (op1, op2) => context.Multiply(op1, op2)); + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMul, SoftFloat64.FPMul, op1, op2); + }); + } + } + + public static void Fmul_Se(ArmEmitterContext context) + { + EmitScalarBinaryOpByElemF(context, (op1, op2) => context.Multiply(op1, op2)); + } + + public static void Fmul_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorBinaryOpF(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd); + } + else if (Optimizations.FastFP) + { + EmitVectorBinaryOpF(context, (op1, op2) => context.Multiply(op1, op2)); + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMul, SoftFloat64.FPMul, op1, op2); + }); + } + } + + public static void Fmul_Ve(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6; + + Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask)); + + res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + int shuffleMask = op.Index | op.Index << 1; + + Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask)); + + res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res); + + context.Copy(GetVec(op.Rd), res); + } + } + else if (Optimizations.FastFP) + { + EmitVectorBinaryOpByElemF(context, (op1, op2) => context.Multiply(op1, op2)); + } + else + { + EmitVectorBinaryOpByElemF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMul, SoftFloat64.FPMul, op1, op2); + }); + } + } + + public static void Fmulx_S(ArmEmitterContext context) + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMulX, SoftFloat64.FPMulX, op1, op2); + }); + } + + public static void Fmulx_Se(ArmEmitterContext context) + { + EmitScalarBinaryOpByElemF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMulX, SoftFloat64.FPMulX, op1, op2); + }); + } + + public static void Fmulx_V(ArmEmitterContext context) + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMulX, SoftFloat64.FPMulX, op1, op2); + }); + } + + public static void Fmulx_Ve(ArmEmitterContext context) + { + EmitVectorBinaryOpByElemF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMulX, SoftFloat64.FPMulX, op1, op2); + }); + } + + public static void Fneg_S(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + if (op.Size == 0) + { + Operand mask = X86GetScalar(context, -0f); + + Operand res = context.AddIntrinsic(Intrinsic.X86Xorps, mask, GetVec(op.Rn)); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); + } + else /* if (op.Size == 1) */ + { + Operand mask = X86GetScalar(context, -0d); + + Operand res = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, GetVec(op.Rn)); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res)); + } + } + else + { + EmitScalarUnaryOpF(context, (op1) => context.Negate(op1)); + } + } + + public static void Fneg_V(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand mask = X86GetAllElements(context, -0f); + + Operand res = context.AddIntrinsic(Intrinsic.X86Xorps, mask, GetVec(op.Rn)); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + Operand mask = X86GetAllElements(context, -0d); + + Operand res = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, GetVec(op.Rn)); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + EmitVectorUnaryOpF(context, (op1) => context.Negate(op1)); + } + } + + public static void Fnmadd_S(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 + : OperandType.FP32; + + Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0); + Operand me = context.VectorExtract(type, GetVec(op.Rm), 0); + Operand ae = context.VectorExtract(type, GetVec(op.Ra), 0); + + Operand res = context.Subtract(context.Multiply(context.Negate(ne), me), ae); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } + + public static void Fnmsub_S(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 + : OperandType.FP32; + + Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0); + Operand me = context.VectorExtract(type, GetVec(op.Rm), 0); + Operand ae = context.VectorExtract(type, GetVec(op.Ra), 0); + + Operand res = context.Subtract(context.Multiply(ne, me), ae); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } + + public static void Fnmul_S(ArmEmitterContext context) + { + EmitScalarBinaryOpF(context, (op1, op2) => context.Negate(context.Multiply(op1, op2))); + } + + public static void Frecpe_S(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0) + { + EmitScalarUnaryOpF(context, Intrinsic.X86Rcpss, 0); + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPRecipEstimate, SoftFloat64.FPRecipEstimate, op1); + }); + } + } + + public static void Frecpe_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0) + { + EmitVectorUnaryOpF(context, Intrinsic.X86Rcpps, 0); + } + else + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPRecipEstimate, SoftFloat64.FPRecipEstimate, op1); + }); + } + } + + public static void Frecps_S(ArmEmitterContext context) // Fused. + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand mask = X86GetScalar(context, 2f); + + Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, GetVec(op.Rn), GetVec(op.Rm)); + + res = context.AddIntrinsic(Intrinsic.X86Subss, mask, res); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); + } + else /* if (sizeF == 1) */ + { + Operand mask = X86GetScalar(context, 2d); + + Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, GetVec(op.Rn), GetVec(op.Rm)); + + res = context.AddIntrinsic(Intrinsic.X86Subsd, mask, res); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res)); + } + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPRecipStepFused, SoftFloat64.FPRecipStepFused, op1, op2); + }); + } + } + + public static void Frecps_V(ArmEmitterContext context) // Fused. + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand mask = X86GetAllElements(context, 2f); + + Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, GetVec(op.Rn), GetVec(op.Rm)); + + res = context.AddIntrinsic(Intrinsic.X86Subps, mask, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + Operand mask = X86GetAllElements(context, 2d); + + Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, GetVec(op.Rn), GetVec(op.Rm)); + + res = context.AddIntrinsic(Intrinsic.X86Subpd, mask, res); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPRecipStepFused, SoftFloat64.FPRecipStepFused, op1, op2); + }); + } + } + + public static void Frecpx_S(ArmEmitterContext context) + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPRecpX, SoftFloat64.FPRecpX, op1); + }); + } + + public static void Frinta_S(ArmEmitterContext context) + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1); + }); + } + + public static void Frinta_V(ArmEmitterContext context) + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1); + }); + } + + public static void Frinti_S(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + EmitScalarUnaryOpF(context, (op1) => + { + if (op.Size == 0) + { + return context.Call(new _F32_F32(SoftFallback.RoundF), op1); + } + else /* if (op.Size == 1) */ + { + return context.Call(new _F64_F64(SoftFallback.Round), op1); + } + }); + } + + public static void Frinti_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + EmitVectorUnaryOpF(context, (op1) => + { + if (sizeF == 0) + { + return context.Call(new _F32_F32(SoftFallback.RoundF), op1); + } + else /* if (sizeF == 1) */ + { + return context.Call(new _F64_F64(SoftFallback.Round), op1); + } + }); + } + + public static void Frintm_S(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitScalarRoundOpF(context, FPRoundingMode.TowardsMinusInfinity); + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitUnaryMathCall(context, MathF.Floor, Math.Floor, op1); + }); + } + } + + public static void Frintm_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitVectorRoundOpF(context, FPRoundingMode.TowardsMinusInfinity); + } + else + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitUnaryMathCall(context, MathF.Floor, Math.Floor, op1); + }); + } + } + + public static void Frintn_S(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitScalarRoundOpF(context, FPRoundingMode.ToNearest); + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitRoundMathCall(context, MidpointRounding.ToEven, op1); + }); + } + } + + public static void Frintn_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitVectorRoundOpF(context, FPRoundingMode.ToNearest); + } + else + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitRoundMathCall(context, MidpointRounding.ToEven, op1); + }); + } + } + + public static void Frintp_S(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitScalarRoundOpF(context, FPRoundingMode.TowardsPlusInfinity); + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitUnaryMathCall(context, MathF.Ceiling, Math.Ceiling, op1); + }); + } + } + + public static void Frintp_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitVectorRoundOpF(context, FPRoundingMode.TowardsPlusInfinity); + } + else + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitUnaryMathCall(context, MathF.Ceiling, Math.Ceiling, op1); + }); + } + } + + public static void Frintx_S(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + EmitScalarUnaryOpF(context, (op1) => + { + if (op.Size == 0) + { + return context.Call(new _F32_F32(SoftFallback.RoundF), op1); + } + else /* if (op.Size == 1) */ + { + return context.Call(new _F64_F64(SoftFallback.Round), op1); + } + }); + } + + public static void Frintx_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + EmitVectorUnaryOpF(context, (op1) => + { + if (sizeF == 0) + { + return context.Call(new _F32_F32(SoftFallback.RoundF), op1); + } + else /* if (sizeF == 1) */ + { + return context.Call(new _F64_F64(SoftFallback.Round), op1); + } + }); + } + + public static void Frintz_S(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitScalarRoundOpF(context, FPRoundingMode.TowardsZero); + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitUnaryMathCall(context, MathF.Truncate, Math.Truncate, op1); + }); + } + } + + public static void Frintz_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitVectorRoundOpF(context, FPRoundingMode.TowardsZero); + } + else + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitUnaryMathCall(context, MathF.Truncate, Math.Truncate, op1); + }); + } + } + + public static void Frsqrte_S(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0) + { + EmitScalarUnaryOpF(context, Intrinsic.X86Rsqrtss, 0); + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPRSqrtEstimate, SoftFloat64.FPRSqrtEstimate, op1); + }); + } + } + + public static void Frsqrte_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0) + { + EmitVectorUnaryOpF(context, Intrinsic.X86Rsqrtps, 0); + } + else + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPRSqrtEstimate, SoftFloat64.FPRSqrtEstimate, op1); + }); + } + } + + public static void Frsqrts_S(ArmEmitterContext context) // Fused. + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand maskHalf = X86GetScalar(context, 0.5f); + Operand maskThree = X86GetScalar(context, 3f); + + Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, GetVec(op.Rn), GetVec(op.Rm)); + + res = context.AddIntrinsic(Intrinsic.X86Subss, maskThree, res); + res = context.AddIntrinsic(Intrinsic.X86Mulss, maskHalf, res); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); + } + else /* if (sizeF == 1) */ + { + Operand maskHalf = X86GetScalar(context, 0.5d); + Operand maskThree = X86GetScalar(context, 3d); + + Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, GetVec(op.Rn), GetVec(op.Rm)); + + res = context.AddIntrinsic(Intrinsic.X86Subsd, maskThree, res); + res = context.AddIntrinsic(Intrinsic.X86Mulsd, maskHalf, res); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res)); + } + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPRSqrtStepFused, SoftFloat64.FPRSqrtStepFused, op1, op2); + }); + } + } + + public static void Frsqrts_V(ArmEmitterContext context) // Fused. + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand maskHalf = X86GetAllElements(context, 0.5f); + Operand maskThree = X86GetAllElements(context, 3f); + + Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, GetVec(op.Rn), GetVec(op.Rm)); + + res = context.AddIntrinsic(Intrinsic.X86Subps, maskThree, res); + res = context.AddIntrinsic(Intrinsic.X86Mulps, maskHalf, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + Operand maskHalf = X86GetAllElements(context, 0.5d); + Operand maskThree = X86GetAllElements(context, 3d); + + Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, GetVec(op.Rn), GetVec(op.Rm)); + + res = context.AddIntrinsic(Intrinsic.X86Subpd, maskThree, res); + res = context.AddIntrinsic(Intrinsic.X86Mulpd, maskHalf, res); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPRSqrtStepFused, SoftFloat64.FPRSqrtStepFused, op1, op2); + }); + } + } + + public static void Fsqrt_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarUnaryOpF(context, Intrinsic.X86Sqrtss, Intrinsic.X86Sqrtsd); + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPSqrt, SoftFloat64.FPSqrt, op1); + }); + } + } + + public static void Fsqrt_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorUnaryOpF(context, Intrinsic.X86Sqrtps, Intrinsic.X86Sqrtpd); + } + else + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPSqrt, SoftFloat64.FPSqrt, op1); + }); + } + } + + public static void Fsub_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarBinaryOpF(context, Intrinsic.X86Subss, Intrinsic.X86Subsd); + } + else if (Optimizations.FastFP) + { + EmitScalarBinaryOpF(context, (op1, op2) => context.Subtract(op1, op2)); + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPSub, SoftFloat64.FPSub, op1, op2); + }); + } + } + + public static void Fsub_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorBinaryOpF(context, Intrinsic.X86Subps, Intrinsic.X86Subpd); + } + else if (Optimizations.FastFP) + { + EmitVectorBinaryOpF(context, (op1, op2) => context.Subtract(op1, op2)); + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPSub, SoftFloat64.FPSub, op1, op2); + }); + } + } + + public static void Mla_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitSse41Mul_AddSub(context, AddSub.Add); + } + else + { + EmitVectorTernaryOpZx(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); + } + } + + public static void Mla_Ve(ArmEmitterContext context) + { + EmitVectorTernaryOpByElemZx(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); + } + + public static void Mls_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitSse41Mul_AddSub(context, AddSub.Subtract); + } + else + { + EmitVectorTernaryOpZx(context, (op1, op2, op3) => + { + return context.Subtract(op1, context.Multiply(op2, op3)); + }); + } + } + + public static void Mls_Ve(ArmEmitterContext context) + { + EmitVectorTernaryOpByElemZx(context, (op1, op2, op3) => + { + return context.Subtract(op1, context.Multiply(op2, op3)); + }); + } + + public static void Mul_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitSse41Mul_AddSub(context, AddSub.None); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => context.Multiply(op1, op2)); + } + } + + public static void Mul_Ve(ArmEmitterContext context) + { + EmitVectorBinaryOpByElemZx(context, (op1, op2) => context.Multiply(op1, op2)); + } + + public static void Neg_S(ArmEmitterContext context) + { + EmitScalarUnaryOpSx(context, (op1) => context.Negate(op1)); + } + + public static void Neg_V(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Intrinsic subInst = X86PsubInstruction[op.Size]; + + Operand res = context.AddIntrinsic(subInst, context.VectorZero(), GetVec(op.Rn)); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorUnaryOpSx(context, (op1) => context.Negate(op1)); + } + } + + public static void Raddhn_V(ArmEmitterContext context) + { + EmitHighNarrow(context, (op1, op2) => context.Add(op1, op2), round: true); + } + + public static void Rsubhn_V(ArmEmitterContext context) + { + EmitHighNarrow(context, (op1, op2) => context.Subtract(op1, op2), round: true); + } + + public static void Saba_V(ArmEmitterContext context) + { + EmitVectorTernaryOpSx(context, (op1, op2, op3) => + { + return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3))); + }); + } + + public static void Sabal_V(ArmEmitterContext context) + { + EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) => + { + return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3))); + }); + } + + public static void Sabd_V(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + EmitSse41Sabd(context, op, n, m, isLong: false); + } + else + { + EmitVectorBinaryOpSx(context, (op1, op2) => + { + return EmitAbs(context, context.Subtract(op1, op2)); + }); + } + } + + public static void Sabdl_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse41 && op.Size < 2) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = op.Size == 0 + ? Intrinsic.X86Pmovsxbw + : Intrinsic.X86Pmovsxwd; + + n = context.AddIntrinsic(movInst, n); + m = context.AddIntrinsic(movInst, m); + + EmitSse41Sabd(context, op, n, m, isLong: true); + } + else + { + EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => + { + return EmitAbs(context, context.Subtract(op1, op2)); + }); + } + } + + public static void Sadalp_V(ArmEmitterContext context) + { + EmitAddLongPairwise(context, signed: true, accumulate: true); + } + + public static void Saddl_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = X86PmovsxInstruction[op.Size]; + + n = context.AddIntrinsic(movInst, n); + m = context.AddIntrinsic(movInst, m); + + Intrinsic addInst = X86PaddInstruction[op.Size + 1]; + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m)); + } + else + { + EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Add(op1, op2)); + } + } + + public static void Saddlp_V(ArmEmitterContext context) + { + EmitAddLongPairwise(context, signed: true, accumulate: false); + } + + public static void Saddlv_V(ArmEmitterContext context) + { + EmitVectorLongAcrossVectorOpSx(context, (op1, op2) => context.Add(op1, op2)); + } + + public static void Saddw_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = X86PmovsxInstruction[op.Size]; + + m = context.AddIntrinsic(movInst, m); + + Intrinsic addInst = X86PaddInstruction[op.Size + 1]; + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m)); + } + else + { + EmitVectorWidenRmBinaryOpSx(context, (op1, op2) => context.Add(op1, op2)); + } + } + + public static void Shadd_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.AddIntrinsic(Intrinsic.X86Pand, n, m); + Operand res2 = context.AddIntrinsic(Intrinsic.X86Pxor, n, m); + + Intrinsic shiftInst = op.Size == 1 ? Intrinsic.X86Psraw : Intrinsic.X86Psrad; + + res2 = context.AddIntrinsic(shiftInst, res2, Const(1)); + + Intrinsic addInst = X86PaddInstruction[op.Size]; + + res = context.AddIntrinsic(addInst, res, res2); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpSx(context, (op1, op2) => + { + return context.ShiftRightSI(context.Add(op1, op2), Const(1)); + }); + } + } + + public static void Shsub_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size < 2) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand mask = X86GetAllElements(context, (int)(op.Size == 0 ? 0x80808080u : 0x80008000u)); + + Intrinsic addInst = X86PaddInstruction[op.Size]; + + Operand nPlusMask = context.AddIntrinsic(addInst, n, mask); + Operand mPlusMask = context.AddIntrinsic(addInst, m, mask); + + Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw; + + Operand res = context.AddIntrinsic(avgInst, nPlusMask, mPlusMask); + + Intrinsic subInst = X86PsubInstruction[op.Size]; + + res = context.AddIntrinsic(subInst, nPlusMask, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpSx(context, (op1, op2) => + { + return context.ShiftRightSI(context.Subtract(op1, op2), Const(1)); + }); + } + } + + public static void Smax_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic maxInst = X86PmaxsInstruction[op.Size]; + + Operand res = context.AddIntrinsic(maxInst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + Delegate dlg = new _S64_S64_S64(Math.Max); + + EmitVectorBinaryOpSx(context, (op1, op2) => context.Call(dlg, op1, op2)); + } + } + + public static void Smaxp_V(ArmEmitterContext context) + { + Delegate dlg = new _S64_S64_S64(Math.Max); + + EmitVectorPairwiseOpSx(context, (op1, op2) => context.Call(dlg, op1, op2)); + } + + public static void Smaxv_V(ArmEmitterContext context) + { + Delegate dlg = new _S64_S64_S64(Math.Max); + + EmitVectorAcrossVectorOpSx(context, (op1, op2) => context.Call(dlg, op1, op2)); + } + + public static void Smin_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic minInst = X86PminsInstruction[op.Size]; + + Operand res = context.AddIntrinsic(minInst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + Delegate dlg = new _S64_S64_S64(Math.Min); + + EmitVectorBinaryOpSx(context, (op1, op2) => context.Call(dlg, op1, op2)); + } + } + + public static void Sminp_V(ArmEmitterContext context) + { + Delegate dlg = new _S64_S64_S64(Math.Min); + + EmitVectorPairwiseOpSx(context, (op1, op2) => context.Call(dlg, op1, op2)); + } + + public static void Sminv_V(ArmEmitterContext context) + { + Delegate dlg = new _S64_S64_S64(Math.Min); + + EmitVectorAcrossVectorOpSx(context, (op1, op2) => context.Call(dlg, op1, op2)); + } + + public static void Smlal_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse41 && op.Size < 2) + { + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = X86PmovsxInstruction[op.Size]; + + n = context.AddIntrinsic(movInst, n); + m = context.AddIntrinsic(movInst, m); + + Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld; + + Operand res = context.AddIntrinsic(mullInst, n, m); + + Intrinsic addInst = X86PaddInstruction[op.Size + 1]; + + context.Copy(d, context.AddIntrinsic(addInst, d, res)); + } + else + { + EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); + } + } + + public static void Smlal_Ve(ArmEmitterContext context) + { + EmitVectorWidenTernaryOpByElemSx(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); + } + + public static void Smlsl_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse41 && op.Size < 2) + { + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = op.Size == 0 + ? Intrinsic.X86Pmovsxbw + : Intrinsic.X86Pmovsxwd; + + n = context.AddIntrinsic(movInst, n); + m = context.AddIntrinsic(movInst, m); + + Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld; + + Operand res = context.AddIntrinsic(mullInst, n, m); + + Intrinsic subInst = X86PsubInstruction[op.Size + 1]; + + context.Copy(d, context.AddIntrinsic(subInst, d, res)); + } + else + { + EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) => + { + return context.Subtract(op1, context.Multiply(op2, op3)); + }); + } + } + + public static void Smlsl_Ve(ArmEmitterContext context) + { + EmitVectorWidenTernaryOpByElemSx(context, (op1, op2, op3) => + { + return context.Subtract(op1, context.Multiply(op2, op3)); + }); + } + + public static void Smull_V(ArmEmitterContext context) + { + EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Multiply(op1, op2)); + } + + public static void Smull_Ve(ArmEmitterContext context) + { + EmitVectorWidenBinaryOpByElemSx(context, (op1, op2) => context.Multiply(op1, op2)); + } + + public static void Sqabs_S(ArmEmitterContext context) + { + EmitScalarSaturatingUnaryOpSx(context, (op1) => EmitAbs(context, op1)); + } + + public static void Sqabs_V(ArmEmitterContext context) + { + EmitVectorSaturatingUnaryOpSx(context, (op1) => EmitAbs(context, op1)); + } + + public static void Sqadd_S(ArmEmitterContext context) + { + EmitScalarSaturatingBinaryOpSx(context, SaturatingFlags.Add); + } + + public static void Sqadd_V(ArmEmitterContext context) + { + EmitVectorSaturatingBinaryOpSx(context, SaturatingFlags.Add); + } + + public static void Sqdmulh_S(ArmEmitterContext context) + { + EmitSaturatingBinaryOp(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false), SaturatingFlags.ScalarSx); + } + + public static void Sqdmulh_V(ArmEmitterContext context) + { + EmitSaturatingBinaryOp(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false), SaturatingFlags.VectorSx); + } + + public static void Sqneg_S(ArmEmitterContext context) + { + EmitScalarSaturatingUnaryOpSx(context, (op1) => context.Negate(op1)); + } + + public static void Sqneg_V(ArmEmitterContext context) + { + EmitVectorSaturatingUnaryOpSx(context, (op1) => context.Negate(op1)); + } + + public static void Sqrdmulh_S(ArmEmitterContext context) + { + EmitSaturatingBinaryOp(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true), SaturatingFlags.ScalarSx); + } + + public static void Sqrdmulh_V(ArmEmitterContext context) + { + EmitSaturatingBinaryOp(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true), SaturatingFlags.VectorSx); + } + + public static void Sqsub_S(ArmEmitterContext context) + { + EmitScalarSaturatingBinaryOpSx(context, SaturatingFlags.Sub); + } + + public static void Sqsub_V(ArmEmitterContext context) + { + EmitVectorSaturatingBinaryOpSx(context, SaturatingFlags.Sub); + } + + public static void Sqxtn_S(ArmEmitterContext context) + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxSx); + } + + public static void Sqxtn_V(ArmEmitterContext context) + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxSx); + } + + public static void Sqxtun_S(ArmEmitterContext context) + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxZx); + } + + public static void Sqxtun_V(ArmEmitterContext context) + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxZx); + } + + public static void Srhadd_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size < 2) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand mask = X86GetAllElements(context, (int)(op.Size == 0 ? 0x80808080u : 0x80008000u)); + + Intrinsic subInst = X86PsubInstruction[op.Size]; + + Operand nMinusMask = context.AddIntrinsic(subInst, n, mask); + Operand mMinusMask = context.AddIntrinsic(subInst, m, mask); + + Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw; + + Operand res = context.AddIntrinsic(avgInst, nMinusMask, mMinusMask); + + Intrinsic addInst = X86PaddInstruction[op.Size]; + + res = context.AddIntrinsic(addInst, mask, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpSx(context, (op1, op2) => + { + Operand res = context.Add(op1, op2); + + res = context.Add(res, Const(1L)); + + return context.ShiftRightSI(res, Const(1)); + }); + } + } + + public static void Ssubl_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = X86PmovsxInstruction[op.Size]; + + n = context.AddIntrinsic(movInst, n); + m = context.AddIntrinsic(movInst, m); + + Intrinsic subInst = X86PsubInstruction[op.Size + 1]; + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m)); + } + else + { + EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Subtract(op1, op2)); + } + } + + public static void Ssubw_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = X86PmovsxInstruction[op.Size]; + + m = context.AddIntrinsic(movInst, m); + + Intrinsic subInst = X86PsubInstruction[op.Size + 1]; + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m)); + } + else + { + EmitVectorWidenRmBinaryOpSx(context, (op1, op2) => context.Subtract(op1, op2)); + } + } + + public static void Sub_S(ArmEmitterContext context) + { + EmitScalarBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2)); + } + + public static void Sub_V(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic subInst = X86PsubInstruction[op.Size]; + + Operand res = context.AddIntrinsic(subInst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2)); + } + } + + public static void Subhn_V(ArmEmitterContext context) + { + EmitHighNarrow(context, (op1, op2) => context.Subtract(op1, op2), round: false); + } + + public static void Suqadd_S(ArmEmitterContext context) + { + EmitScalarSaturatingBinaryOpSx(context, SaturatingFlags.Accumulate); + } + + public static void Suqadd_V(ArmEmitterContext context) + { + EmitVectorSaturatingBinaryOpSx(context, SaturatingFlags.Accumulate); + } + + public static void Uaba_V(ArmEmitterContext context) + { + EmitVectorTernaryOpZx(context, (op1, op2, op3) => + { + return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3))); + }); + } + + public static void Uabal_V(ArmEmitterContext context) + { + EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) => + { + return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3))); + }); + } + + public static void Uabd_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + EmitSse41Uabd(context, op, n, m, isLong: false); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => + { + return EmitAbs(context, context.Subtract(op1, op2)); + }); + } + } + + public static void Uabdl_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse41 && op.Size < 2) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = op.Size == 0 + ? Intrinsic.X86Pmovzxbw + : Intrinsic.X86Pmovzxwd; + + n = context.AddIntrinsic(movInst, n); + m = context.AddIntrinsic(movInst, m); + + EmitSse41Uabd(context, op, n, m, isLong: true); + } + else + { + EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => + { + return EmitAbs(context, context.Subtract(op1, op2)); + }); + } + } + + public static void Uadalp_V(ArmEmitterContext context) + { + EmitAddLongPairwise(context, signed: false, accumulate: true); + } + + public static void Uaddl_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = X86PmovzxInstruction[op.Size]; + + n = context.AddIntrinsic(movInst, n); + m = context.AddIntrinsic(movInst, m); + + Intrinsic addInst = X86PaddInstruction[op.Size + 1]; + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m)); + } + else + { + EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Add(op1, op2)); + } + } + + public static void Uaddlp_V(ArmEmitterContext context) + { + EmitAddLongPairwise(context, signed: false, accumulate: false); + } + + public static void Uaddlv_V(ArmEmitterContext context) + { + EmitVectorLongAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2)); + } + + public static void Uaddw_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = X86PmovzxInstruction[op.Size]; + + m = context.AddIntrinsic(movInst, m); + + Intrinsic addInst = X86PaddInstruction[op.Size + 1]; + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m)); + } + else + { + EmitVectorWidenRmBinaryOpZx(context, (op1, op2) => context.Add(op1, op2)); + } + } + + public static void Uhadd_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.AddIntrinsic(Intrinsic.X86Pand, n, m); + Operand res2 = context.AddIntrinsic(Intrinsic.X86Pxor, n, m); + + Intrinsic shiftInst = op.Size == 1 ? Intrinsic.X86Psrlw : Intrinsic.X86Psrld; + + res2 = context.AddIntrinsic(shiftInst, res2, Const(1)); + + Intrinsic addInst = X86PaddInstruction[op.Size]; + + res = context.AddIntrinsic(addInst, res, res2); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => + { + return context.ShiftRightUI(context.Add(op1, op2), Const(1)); + }); + } + } + + public static void Uhsub_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size < 2) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw; + + Operand res = context.AddIntrinsic(avgInst, n, m); + + Intrinsic subInst = X86PsubInstruction[op.Size]; + + res = context.AddIntrinsic(subInst, n, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => + { + return context.ShiftRightUI(context.Subtract(op1, op2), Const(1)); + }); + } + } + + public static void Umax_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic maxInst = X86PmaxuInstruction[op.Size]; + + Operand res = context.AddIntrinsic(maxInst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + Delegate dlg = new _U64_U64_U64(Math.Max); + + EmitVectorBinaryOpZx(context, (op1, op2) => context.Call(dlg, op1, op2)); + } + } + + public static void Umaxp_V(ArmEmitterContext context) + { + Delegate dlg = new _U64_U64_U64(Math.Max); + + EmitVectorPairwiseOpZx(context, (op1, op2) => context.Call(dlg, op1, op2)); + } + + public static void Umaxv_V(ArmEmitterContext context) + { + Delegate dlg = new _U64_U64_U64(Math.Max); + + EmitVectorAcrossVectorOpZx(context, (op1, op2) => context.Call(dlg, op1, op2)); + } + + public static void Umin_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic minInst = X86PminuInstruction[op.Size]; + + Operand res = context.AddIntrinsic(minInst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + Delegate dlg = new _U64_U64_U64(Math.Min); + + EmitVectorBinaryOpZx(context, (op1, op2) => context.Call(dlg, op1, op2)); + } + } + + public static void Uminp_V(ArmEmitterContext context) + { + Delegate dlg = new _U64_U64_U64(Math.Min); + + EmitVectorPairwiseOpZx(context, (op1, op2) => context.Call(dlg, op1, op2)); + } + + public static void Uminv_V(ArmEmitterContext context) + { + Delegate dlg = new _U64_U64_U64(Math.Min); + + EmitVectorAcrossVectorOpZx(context, (op1, op2) => context.Call(dlg, op1, op2)); + } + + public static void Umlal_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse41 && op.Size < 2) + { + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = X86PmovzxInstruction[op.Size]; + + n = context.AddIntrinsic(movInst, n); + m = context.AddIntrinsic(movInst, m); + + Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld; + + Operand res = context.AddIntrinsic(mullInst, n, m); + + Intrinsic addInst = X86PaddInstruction[op.Size + 1]; + + context.Copy(d, context.AddIntrinsic(addInst, d, res)); + } + else + { + EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); + } + } + + public static void Umlal_Ve(ArmEmitterContext context) + { + EmitVectorWidenTernaryOpByElemZx(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); + } + + public static void Umlsl_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse41 && op.Size < 2) + { + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = op.Size == 0 + ? Intrinsic.X86Pmovzxbw + : Intrinsic.X86Pmovzxwd; + + n = context.AddIntrinsic(movInst, n); + m = context.AddIntrinsic(movInst, m); + + Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld; + + Operand res = context.AddIntrinsic(mullInst, n, m); + + Intrinsic subInst = X86PsubInstruction[op.Size + 1]; + + context.Copy(d, context.AddIntrinsic(subInst, d, res)); + } + else + { + EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) => + { + return context.Subtract(op1, context.Multiply(op2, op3)); + }); + } + } + + public static void Umlsl_Ve(ArmEmitterContext context) + { + EmitVectorWidenTernaryOpByElemZx(context, (op1, op2, op3) => + { + return context.Subtract(op1, context.Multiply(op2, op3)); + }); + } + + public static void Umull_V(ArmEmitterContext context) + { + EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Multiply(op1, op2)); + } + + public static void Umull_Ve(ArmEmitterContext context) + { + EmitVectorWidenBinaryOpByElemZx(context, (op1, op2) => context.Multiply(op1, op2)); + } + + public static void Uqadd_S(ArmEmitterContext context) + { + EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Add); + } + + public static void Uqadd_V(ArmEmitterContext context) + { + EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Add); + } + + public static void Uqsub_S(ArmEmitterContext context) + { + EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Sub); + } + + public static void Uqsub_V(ArmEmitterContext context) + { + EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Sub); + } + + public static void Uqxtn_S(ArmEmitterContext context) + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarZxZx); + } + + public static void Uqxtn_V(ArmEmitterContext context) + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorZxZx); + } + + public static void Urhadd_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size < 2) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw; + + Operand res = context.AddIntrinsic(avgInst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => + { + Operand res = context.Add(op1, op2); + + res = context.Add(res, Const(1L)); + + return context.ShiftRightUI(res, Const(1)); + }); + } + } + + public static void Usqadd_S(ArmEmitterContext context) + { + EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate); + } + + public static void Usqadd_V(ArmEmitterContext context) + { + EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate); + } + + public static void Usubl_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = X86PmovzxInstruction[op.Size]; + + n = context.AddIntrinsic(movInst, n); + m = context.AddIntrinsic(movInst, m); + + Intrinsic subInst = X86PsubInstruction[op.Size + 1]; + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m)); + } + else + { + EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2)); + } + } + + public static void Usubw_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = X86PmovzxInstruction[op.Size]; + + m = context.AddIntrinsic(movInst, m); + + Intrinsic subInst = X86PsubInstruction[op.Size + 1]; + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m)); + } + else + { + EmitVectorWidenRmBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2)); + } + } + + private static Operand EmitAbs(ArmEmitterContext context, Operand value) + { + Operand isPositive = context.ICompareGreaterOrEqual(value, Const(value.Type, 0)); + + return context.ConditionalSelect(isPositive, value, context.Negate(value)); + } + + private static void EmitAddLongPairwise(ArmEmitterContext context, bool signed, bool accumulate) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int pairs = op.GetPairsCount() >> op.Size; + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + + Operand ne0 = EmitVectorExtract(context, op.Rn, pairIndex, op.Size, signed); + Operand ne1 = EmitVectorExtract(context, op.Rn, pairIndex + 1, op.Size, signed); + + Operand e = context.Add(ne0, ne1); + + if (accumulate) + { + Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed); + + e = context.Add(e, de); + } + + res = EmitVectorInsert(context, res, e, index, op.Size + 1); + } + + context.Copy(GetVec(op.Rd), res); + } + + private static Operand EmitDoublingMultiplyHighHalf( + ArmEmitterContext context, + Operand n, + Operand m, + bool round) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + int eSize = 8 << op.Size; + + Operand res = context.Multiply(n, m); + + if (!round) + { + res = context.ShiftRightSI(res, Const(eSize - 1)); + } + else + { + long roundConst = 1L << (eSize - 1); + + res = context.ShiftLeft(res, Const(1)); + + res = context.Add(res, Const(roundConst)); + + res = context.ShiftRightSI(res, Const(eSize)); + + Operand isIntMin = context.ICompareEqual(res, Const((long)int.MinValue)); + + res = context.ConditionalSelect(isIntMin, context.Negate(res), res); + } + + return res; + } + + private static void EmitHighNarrow(ArmEmitterContext context, Func2I emit, bool round) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + int elems = 8 >> op.Size; + int eSize = 8 << op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd)); + + long roundConst = 1L << (eSize - 1); + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size + 1); + Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size + 1); + + Operand de = emit(ne, me); + + if (round) + { + de = context.Add(de, Const(roundConst)); + } + + de = context.ShiftRightUI(de, Const(eSize)); + + res = EmitVectorInsert(context, res, de, part + index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitScalarRoundOpF(ArmEmitterContext context, FPRoundingMode roundMode) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Intrinsic inst = (op.Size & 1) != 0 ? Intrinsic.X86Roundsd : Intrinsic.X86Roundss; + + Operand res = context.AddIntrinsic(inst, n, Const(X86GetRoundControl(roundMode))); + + if ((op.Size & 1) != 0) + { + res = context.VectorZeroUpper64(res); + } + else + { + res = context.VectorZeroUpper96(res); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorRoundOpF(ArmEmitterContext context, FPRoundingMode roundMode) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Intrinsic inst = (op.Size & 1) != 0 ? Intrinsic.X86Roundpd : Intrinsic.X86Roundps; + + Operand res = context.AddIntrinsic(inst, n, Const(X86GetRoundControl(roundMode))); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + + private enum AddSub + { + None, + Add, + Subtract + } + + private static void EmitSse41Mul_AddSub(ArmEmitterContext context, AddSub addSub) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = null; + + if (op.Size == 0) + { + Operand ns8 = context.AddIntrinsic(Intrinsic.X86Psrlw, n, Const(8)); + Operand ms8 = context.AddIntrinsic(Intrinsic.X86Psrlw, m, Const(8)); + + res = context.AddIntrinsic(Intrinsic.X86Pmullw, ns8, ms8); + + res = context.AddIntrinsic(Intrinsic.X86Psllw, res, Const(8)); + + Operand res2 = context.AddIntrinsic(Intrinsic.X86Pmullw, n, m); + + Operand mask = X86GetAllElements(context, 0x00FF00FF); + + res = context.AddIntrinsic(Intrinsic.X86Pblendvb, res, res2, mask); + } + else if (op.Size == 1) + { + res = context.AddIntrinsic(Intrinsic.X86Pmullw, n, m); + } + else + { + res = context.AddIntrinsic(Intrinsic.X86Pmulld, n, m); + } + + Operand d = GetVec(op.Rd); + + if (addSub == AddSub.Add) + { + switch (op.Size) + { + case 0: res = context.AddIntrinsic(Intrinsic.X86Paddb, d, res); break; + case 1: res = context.AddIntrinsic(Intrinsic.X86Paddw, d, res); break; + case 2: res = context.AddIntrinsic(Intrinsic.X86Paddd, d, res); break; + case 3: res = context.AddIntrinsic(Intrinsic.X86Paddq, d, res); break; + } + } + else if (addSub == AddSub.Subtract) + { + switch (op.Size) + { + case 0: res = context.AddIntrinsic(Intrinsic.X86Psubb, d, res); break; + case 1: res = context.AddIntrinsic(Intrinsic.X86Psubw, d, res); break; + case 2: res = context.AddIntrinsic(Intrinsic.X86Psubd, d, res); break; + case 3: res = context.AddIntrinsic(Intrinsic.X86Psubq, d, res); break; + } + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(d, res); + } + + private static void EmitSse41Sabd( + ArmEmitterContext context, + OpCodeSimdReg op, + Operand n, + Operand m, + bool isLong) + { + int size = isLong ? op.Size + 1 : op.Size; + + Intrinsic cmpgtInst = X86PcmpgtInstruction[size]; + + Operand cmpMask = context.AddIntrinsic(cmpgtInst, n, m); + + Intrinsic subInst = X86PsubInstruction[size]; + + Operand res = context.AddIntrinsic(subInst, n, m); + + res = context.AddIntrinsic(Intrinsic.X86Pand, cmpMask, res); + + Operand res2 = context.AddIntrinsic(subInst, m, n); + + res2 = context.AddIntrinsic(Intrinsic.X86Pandn, cmpMask, res2); + + res = context.AddIntrinsic(Intrinsic.X86Por, res, res2); + + if (!isLong && op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + + private static void EmitSse41Uabd( + ArmEmitterContext context, + OpCodeSimdReg op, + Operand n, + Operand m, + bool isLong) + { + int size = isLong ? op.Size + 1 : op.Size; + + Intrinsic maxInst = X86PmaxuInstruction[size]; + + Operand max = context.AddIntrinsic(maxInst, m, n); + + Intrinsic cmpeqInst = X86PcmpeqInstruction[size]; + + Operand cmpMask = context.AddIntrinsic(cmpeqInst, max, m); + + Operand onesMask = X86GetAllElements(context, -1L); + + cmpMask = context.AddIntrinsic(Intrinsic.X86Pandn, cmpMask, onesMask); + + Intrinsic subInst = X86PsubInstruction[size]; + + Operand res = context.AddIntrinsic(subInst, n, m); + Operand res2 = context.AddIntrinsic(subInst, m, n); + + res = context.AddIntrinsic(Intrinsic.X86Pand, cmpMask, res); + res2 = context.AddIntrinsic(Intrinsic.X86Pandn, cmpMask, res2); + + res = context.AddIntrinsic(Intrinsic.X86Por, res, res2); + + if (!isLong && op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSimdCmp.cs b/ARMeilleure/Instructions/InstEmitSimdCmp.cs new file mode 100644 index 000000000..f27121bb3 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdCmp.cs @@ -0,0 +1,712 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + using Func2I = Func; + + static partial class InstEmit + { + public static void Cmeq_S(ArmEmitterContext context) + { + EmitCmpOp(context, (op1, op2) => context.ICompareEqual(op1, op2), scalar: true); + } + + public static void Cmeq_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m; + + if (op is OpCodeSimdReg binOp) + { + m = GetVec(binOp.Rm); + } + else + { + m = context.VectorZero(); + } + + Intrinsic cmpInst = X86PcmpeqInstruction[op.Size]; + + Operand res = context.AddIntrinsic(cmpInst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitCmpOp(context, (op1, op2) => context.ICompareEqual(op1, op2), scalar: false); + } + } + + public static void Cmge_S(ArmEmitterContext context) + { + EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqual(op1, op2), scalar: true); + } + + public static void Cmge_V(ArmEmitterContext context) + { + if (Optimizations.UseSse42) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m; + + if (op is OpCodeSimdReg binOp) + { + m = GetVec(binOp.Rm); + } + else + { + m = context.VectorZero(); + } + + Intrinsic cmpInst = X86PcmpgtInstruction[op.Size]; + + Operand res = context.AddIntrinsic(cmpInst, m, n); + + Operand mask = X86GetAllElements(context, -1L); + + res = context.AddIntrinsic(Intrinsic.X86Pandn, res, mask); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqual(op1, op2), scalar: false); + } + } + + public static void Cmgt_S(ArmEmitterContext context) + { + EmitCmpOp(context, (op1, op2) => context.ICompareGreater(op1, op2), scalar: true); + } + + public static void Cmgt_V(ArmEmitterContext context) + { + if (Optimizations.UseSse42) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m; + + if (op is OpCodeSimdReg binOp) + { + m = GetVec(binOp.Rm); + } + else + { + m = context.VectorZero(); + } + + Intrinsic cmpInst = X86PcmpgtInstruction[op.Size]; + + Operand res = context.AddIntrinsic(cmpInst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitCmpOp(context, (op1, op2) => context.ICompareGreater(op1, op2), scalar: false); + } + } + + public static void Cmhi_S(ArmEmitterContext context) + { + EmitCmpOp(context, (op1, op2) => context.ICompareGreaterUI(op1, op2), scalar: true); + } + + public static void Cmhi_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse41 && op.Size < 3) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic maxInst = X86PmaxuInstruction[op.Size]; + + Operand res = context.AddIntrinsic(maxInst, m, n); + + Intrinsic cmpInst = X86PcmpeqInstruction[op.Size]; + + res = context.AddIntrinsic(cmpInst, res, m); + + Operand mask = X86GetAllElements(context, -1L); + + res = context.AddIntrinsic(Intrinsic.X86Pandn, res, mask); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitCmpOp(context, (op1, op2) => context.ICompareGreaterUI(op1, op2), scalar: false); + } + } + + public static void Cmhs_S(ArmEmitterContext context) + { + EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqualUI(op1, op2), scalar: true); + } + + public static void Cmhs_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse41 && op.Size < 3) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic maxInst = X86PmaxuInstruction[op.Size]; + + Operand res = context.AddIntrinsic(maxInst, n, m); + + Intrinsic cmpInst = X86PcmpeqInstruction[op.Size]; + + res = context.AddIntrinsic(cmpInst, res, n); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqualUI(op1, op2), scalar: false); + } + } + + public static void Cmle_S(ArmEmitterContext context) + { + EmitCmpOp(context, (op1, op2) => context.ICompareLessOrEqual(op1, op2), scalar: true); + } + + public static void Cmle_V(ArmEmitterContext context) + { + if (Optimizations.UseSse42) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Intrinsic cmpInst = X86PcmpgtInstruction[op.Size]; + + Operand res = context.AddIntrinsic(cmpInst, n, context.VectorZero()); + + Operand mask = X86GetAllElements(context, -1L); + + res = context.AddIntrinsic(Intrinsic.X86Pandn, res, mask); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitCmpOp(context, (op1, op2) => context.ICompareLessOrEqual(op1, op2), scalar: false); + } + } + + public static void Cmlt_S(ArmEmitterContext context) + { + EmitCmpOp(context, (op1, op2) => context.ICompareLess(op1, op2), scalar: true); + } + + public static void Cmlt_V(ArmEmitterContext context) + { + if (Optimizations.UseSse42) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Intrinsic cmpInst = X86PcmpgtInstruction[op.Size]; + + Operand res = context.AddIntrinsic(cmpInst, context.VectorZero(), n); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitCmpOp(context, (op1, op2) => context.ICompareLess(op1, op2), scalar: false); + } + } + + public static void Cmtst_S(ArmEmitterContext context) + { + EmitCmtstOp(context, scalar: true); + } + + public static void Cmtst_V(ArmEmitterContext context) + { + EmitCmtstOp(context, scalar: false); + } + + public static void Fccmp_S(ArmEmitterContext context) + { + EmitFccmpOrFccmpe(context, signalNaNs: false); + } + + public static void Fccmpe_S(ArmEmitterContext context) + { + EmitFccmpOrFccmpe(context, signalNaNs: true); + } + + public static void Fcmeq_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitCmpSseOrSse2OpF(context, CmpCondition.Equal, scalar: true); + } + else + { + EmitCmpOpF(context, SoftFloat32.FPCompareEQ, SoftFloat64.FPCompareEQ, scalar: true); + } + } + + public static void Fcmeq_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitCmpSseOrSse2OpF(context, CmpCondition.Equal, scalar: false); + } + else + { + EmitCmpOpF(context, SoftFloat32.FPCompareEQ, SoftFloat64.FPCompareEQ, scalar: false); + } + } + + public static void Fcmge_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: true); + } + else + { + EmitCmpOpF(context, SoftFloat32.FPCompareGE, SoftFloat64.FPCompareGE, scalar: true); + } + } + + public static void Fcmge_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: false); + } + else + { + EmitCmpOpF(context, SoftFloat32.FPCompareGE, SoftFloat64.FPCompareGE, scalar: false); + } + } + + public static void Fcmgt_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: true); + } + else + { + EmitCmpOpF(context, SoftFloat32.FPCompareGT, SoftFloat64.FPCompareGT, scalar: true); + } + } + + public static void Fcmgt_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: false); + } + else + { + EmitCmpOpF(context, SoftFloat32.FPCompareGT, SoftFloat64.FPCompareGT, scalar: false); + } + } + + public static void Fcmle_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: true, isLeOrLt: true); + } + else + { + EmitCmpOpF(context, SoftFloat32.FPCompareLE, SoftFloat64.FPCompareLE, scalar: true); + } + } + + public static void Fcmle_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: false, isLeOrLt: true); + } + else + { + EmitCmpOpF(context, SoftFloat32.FPCompareLE, SoftFloat64.FPCompareLE, scalar: false); + } + } + + public static void Fcmlt_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: true, isLeOrLt: true); + } + else + { + EmitCmpOpF(context, SoftFloat32.FPCompareLT, SoftFloat64.FPCompareLT, scalar: true); + } + } + + public static void Fcmlt_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: false, isLeOrLt: true); + } + else + { + EmitCmpOpF(context, SoftFloat32.FPCompareLT, SoftFloat64.FPCompareLT, scalar: false); + } + } + + public static void Fcmp_S(ArmEmitterContext context) + { + EmitFcmpOrFcmpe(context, signalNaNs: false); + } + + public static void Fcmpe_S(ArmEmitterContext context) + { + EmitFcmpOrFcmpe(context, signalNaNs: true); + } + + public static void EmitFccmpOrFccmpe(ArmEmitterContext context, bool signalNaNs) + { + OpCodeSimdFcond op = (OpCodeSimdFcond)context.CurrOp; + + Operand lblTrue = Label(); + Operand lblEnd = Label(); + + context.BranchIfTrue(lblTrue, InstEmitFlowHelper.GetCondTrue(context, op.Cond)); + + EmitSetNzcv(context, Const(op.Nzcv)); + + context.Branch(lblEnd); + + context.MarkLabel(lblTrue); + + EmitFcmpOrFcmpe(context, signalNaNs); + + context.MarkLabel(lblEnd); + } + + private static void EmitFcmpOrFcmpe(ArmEmitterContext context, bool signalNaNs) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + const int cmpOrdered = 7; + + bool cmpWithZero = !(op is OpCodeSimdFcond) ? op.Bit3 : false; + + if (Optimizations.FastFP && Optimizations.UseSse2) + { + Operand n = GetVec(op.Rn); + Operand m = cmpWithZero ? context.VectorZero() : GetVec(op.Rm); + + Operand lblNaN = Label(); + Operand lblEnd = Label(); + + if (op.Size == 0) + { + Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpss, n, m, Const(cmpOrdered)); + + Operand isOrdered = context.VectorExtract16(ordMask, 0); + + context.BranchIfFalse(lblNaN, isOrdered); + + Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comissge, n, m); + Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisseq, n, m); + Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisslt, n, m); + + SetFlag(context, PState.VFlag, Const(0)); + SetFlag(context, PState.CFlag, cf); + SetFlag(context, PState.ZFlag, zf); + SetFlag(context, PState.NFlag, nf); + } + else /* if (op.Size == 1) */ + { + Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, m, Const(cmpOrdered)); + + Operand isOrdered = context.VectorExtract16(ordMask, 0); + + context.BranchIfFalse(lblNaN, isOrdered); + + Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comisdge, n, m); + Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisdeq, n, m); + Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisdlt, n, m); + + SetFlag(context, PState.VFlag, Const(0)); + SetFlag(context, PState.CFlag, cf); + SetFlag(context, PState.ZFlag, zf); + SetFlag(context, PState.NFlag, nf); + } + + context.Branch(lblEnd); + + context.MarkLabel(lblNaN); + + SetFlag(context, PState.VFlag, Const(1)); + SetFlag(context, PState.CFlag, Const(1)); + SetFlag(context, PState.ZFlag, Const(0)); + SetFlag(context, PState.NFlag, Const(0)); + + context.MarkLabel(lblEnd); + } + else + { + OperandType type = op.Size != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0); + Operand me; + + if (cmpWithZero) + { + me = op.Size == 0 ? ConstF(0f) : ConstF(0d); + } + else + { + me = context.VectorExtract(type, GetVec(op.Rm), 0); + } + + Delegate dlg = op.Size != 0 + ? (Delegate)new _S32_F64_F64_Bool(SoftFloat64.FPCompare) + : (Delegate)new _S32_F32_F32_Bool(SoftFloat32.FPCompare); + + Operand nzcv = context.Call(dlg, ne, me, Const(signalNaNs)); + + EmitSetNzcv(context, nzcv); + } + } + + private static void EmitSetNzcv(ArmEmitterContext context, Operand nzcv) + { + Operand Extract(Operand value, int bit) + { + if (bit != 0) + { + value = context.ShiftRightUI(value, Const(bit)); + } + + value = context.BitwiseAnd(value, Const(1)); + + return value; + } + + SetFlag(context, PState.VFlag, Extract(nzcv, 0)); + SetFlag(context, PState.CFlag, Extract(nzcv, 1)); + SetFlag(context, PState.ZFlag, Extract(nzcv, 2)); + SetFlag(context, PState.NFlag, Extract(nzcv, 3)); + } + + private static void EmitCmpOp(ArmEmitterContext context, Func2I emitCmp, bool scalar) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = !scalar ? op.GetBytesCount() >> op.Size : 1; + + ulong szMask = ulong.MaxValue >> (64 - (8 << op.Size)); + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + Operand me; + + if (op is OpCodeSimdReg binOp) + { + me = EmitVectorExtractSx(context, binOp.Rm, index, op.Size); + } + else + { + me = Const(0L); + } + + Operand isTrue = emitCmp(ne, me); + + Operand mask = context.ConditionalSelect(isTrue, Const(szMask), Const(0L)); + + res = EmitVectorInsert(context, res, mask, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + private static void EmitCmtstOp(ArmEmitterContext context, bool scalar) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = !scalar ? op.GetBytesCount() >> op.Size : 1; + + ulong szMask = ulong.MaxValue >> (64 - (8 << op.Size)); + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size); + + Operand test = context.BitwiseAnd(ne, me); + + Operand isTrue = context.ICompareNotEqual(test, Const(0L)); + + Operand mask = context.ConditionalSelect(isTrue, Const(szMask), Const(0L)); + + res = EmitVectorInsert(context, res, mask, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + private static void EmitCmpOpF( + ArmEmitterContext context, + _F32_F32_F32 f32, + _F64_F64_F64 f64, + bool scalar) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = !scalar ? op.GetBytesCount() >> sizeF + 2 : 1; + + for (int index = 0; index < elems; index++) + { + Operand ne = context.VectorExtract(type, GetVec(op.Rn), index); + Operand me; + + if (op is OpCodeSimdReg binOp) + { + me = context.VectorExtract(type, GetVec(binOp.Rm), index); + } + else + { + me = sizeF == 0 ? ConstF(0f) : ConstF(0d); + } + + Operand e = EmitSoftFloatCall(context, f32, f64, ne, me); + + res = context.VectorInsert(res, e, index); + } + + context.Copy(GetVec(op.Rd), res); + } + + private enum CmpCondition + { + Equal = 0, + GreaterThanOrEqual = 5, + GreaterThan = 6 + } + + private static void EmitCmpSseOrSse2OpF( + ArmEmitterContext context, + CmpCondition cond, + bool scalar, + bool isLeOrLt = false) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = op is OpCodeSimdReg binOp ? GetVec(binOp.Rm) : context.VectorZero(); + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Intrinsic inst = scalar ? Intrinsic.X86Cmpss : Intrinsic.X86Cmpps; + + Operand res = isLeOrLt + ? context.AddIntrinsic(inst, m, n, Const((int)cond)) + : context.AddIntrinsic(inst, n, m, Const((int)cond)); + + if (scalar) + { + res = context.VectorZeroUpper96(res); + } + else if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + Intrinsic inst = scalar ? Intrinsic.X86Cmpsd : Intrinsic.X86Cmppd; + + Operand res = isLeOrLt + ? context.AddIntrinsic(inst, m, n, Const((int)cond)) + : context.AddIntrinsic(inst, n, m, Const((int)cond)); + + if (scalar) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitSimdCrypto.cs b/ARMeilleure/Instructions/InstEmitSimdCrypto.cs new file mode 100644 index 000000000..2b61fadac --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdCrypto.cs @@ -0,0 +1,49 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void Aesd_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + context.Copy(d, context.Call(new _V128_V128_V128(SoftFallback.Decrypt), d, n)); + } + + public static void Aese_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + context.Copy(d, context.Call(new _V128_V128_V128(SoftFallback.Encrypt), d, n)); + } + + public static void Aesimc_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + context.Copy(GetVec(op.Rd), context.Call(new _V128_V128(SoftFallback.InverseMixColumns), n)); + } + + public static void Aesmc_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + context.Copy(GetVec(op.Rd), context.Call(new _V128_V128(SoftFallback.MixColumns), n)); + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSimdCvt.cs b/ARMeilleure/Instructions/InstEmitSimdCvt.cs new file mode 100644 index 000000000..012bfcce2 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdCvt.cs @@ -0,0 +1,1166 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; +using System.Diagnostics; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + using Func1I = Func; + + static partial class InstEmit + { + public static void Fcvt_S(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + if (op.Size == 0 && op.Opc == 1) // Single -> Double. + { + if (Optimizations.UseSse2) + { + Operand n = GetVec(op.Rn); + + Operand res = context.AddIntrinsic(Intrinsic.X86Cvtss2sd, context.VectorZero(), n); + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0); + + Operand res = context.ConvertToFP(OperandType.FP64, ne); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } + } + else if (op.Size == 1 && op.Opc == 0) // Double -> Single. + { + if (Optimizations.UseSse2) + { + Operand n = GetVec(op.Rn); + + Operand res = context.AddIntrinsic(Intrinsic.X86Cvtsd2ss, context.VectorZero(), n); + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand ne = context.VectorExtract(OperandType.FP64, GetVec(op.Rn), 0); + + Operand res = context.ConvertToFP(OperandType.FP32, ne); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } + } + else if (op.Size == 0 && op.Opc == 3) // Single -> Half. + { + Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0); + + Delegate dlg = new _U16_F32(SoftFloat32_16.FPConvert); + + Operand res = context.Call(dlg, ne); + + res = context.ZeroExtend16(OperandType.I64, res); + + context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, 1)); + } + else if (op.Size == 3 && op.Opc == 0) // Half -> Single. + { + Operand ne = EmitVectorExtractZx(context, op.Rn, 0, 1); + + Delegate dlg = new _F32_U16(SoftFloat16_32.FPConvert); + + Operand res = context.Call(dlg, ne); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } + else if (op.Size == 1 && op.Opc == 3) // Double -> Half. + { + throw new NotImplementedException("Double-precision to half-precision."); + } + else if (op.Size == 3 && op.Opc == 1) // Double -> Half. + { + throw new NotImplementedException("Half-precision to double-precision."); + } + else // Invalid encoding. + { + Debug.Assert(false, $"type == {op.Size} && opc == {op.Opc}"); + } + } + + public static void Fcvtas_Gp(ArmEmitterContext context) + { + EmitFcvt_s_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1)); + } + + public static void Fcvtau_Gp(ArmEmitterContext context) + { + EmitFcvt_u_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1)); + } + + public static void Fcvtl_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (Optimizations.UseSse2 && sizeF == 1) + { + Operand n = GetVec(op.Rn); + Operand res; + + if (op.RegisterSize == RegisterSize.Simd128) + { + res = context.AddIntrinsic(Intrinsic.X86Movhlps, n, n); + } + else + { + res = n; + } + + res = context.AddIntrinsic(Intrinsic.X86Cvtps2pd, res); + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand res = context.VectorZero(); + + int elems = 4 >> sizeF; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + for (int index = 0; index < elems; index++) + { + if (sizeF == 0) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, part + index, 1); + + Delegate dlg = new _F32_U16(SoftFloat16_32.FPConvert); + + Operand e = context.Call(dlg, ne); + + res = context.VectorInsert(res, e, index); + } + else /* if (sizeF == 1) */ + { + Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), part + index); + + Operand e = context.ConvertToFP(OperandType.FP64, ne); + + res = context.VectorInsert(res, e, index); + } + } + + context.Copy(GetVec(op.Rd), res); + } + } + + public static void Fcvtms_Gp(ArmEmitterContext context) + { + EmitFcvt_s_Gp(context, (op1) => EmitUnaryMathCall(context, MathF.Floor, Math.Floor, op1)); + } + + public static void Fcvtmu_Gp(ArmEmitterContext context) + { + EmitFcvt_u_Gp(context, (op1) => EmitUnaryMathCall(context, MathF.Floor, Math.Floor, op1)); + } + + public static void Fcvtn_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (Optimizations.UseSse2 && sizeF == 1) + { + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Operand res = context.AddIntrinsic(Intrinsic.X86Movlhps, d, context.VectorZero()); + + Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtpd2ps, n); + + nInt = context.AddIntrinsic(Intrinsic.X86Movlhps, nInt, nInt); + + Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128 + ? Intrinsic.X86Movlhps + : Intrinsic.X86Movhlps; + + res = context.AddIntrinsic(movInst, res, nInt); + + context.Copy(GetVec(op.Rd), res); + } + else + { + OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64; + + int elems = 4 >> sizeF; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd)); + + for (int index = 0; index < elems; index++) + { + Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0); + + if (sizeF == 0) + { + Delegate dlg = new _U16_F32(SoftFloat32_16.FPConvert); + + Operand e = context.Call(dlg, ne); + + e = context.ZeroExtend16(OperandType.I64, e); + + res = EmitVectorInsert(context, res, e, part + index, 1); + } + else /* if (sizeF == 1) */ + { + Operand e = context.ConvertToFP(OperandType.FP32, ne); + + res = context.VectorInsert(res, e, part + index); + } + } + + context.Copy(GetVec(op.Rd), res); + } + } + + public static void Fcvtns_S(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitSse41Fcvts(context, FPRoundingMode.ToNearest, scalar: true); + } + else + { + EmitFcvtn(context, signed: true, scalar: true); + } + } + + public static void Fcvtns_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitSse41Fcvts(context, FPRoundingMode.ToNearest, scalar: false); + } + else + { + EmitFcvtn(context, signed: true, scalar: false); + } + } + + public static void Fcvtnu_S(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitSse41Fcvtu(context, FPRoundingMode.ToNearest, scalar: true); + } + else + { + EmitFcvtn(context, signed: false, scalar: true); + } + } + + public static void Fcvtnu_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitSse41Fcvtu(context, FPRoundingMode.ToNearest, scalar: false); + } + else + { + EmitFcvtn(context, signed: false, scalar: false); + } + } + + public static void Fcvtps_Gp(ArmEmitterContext context) + { + EmitFcvt_s_Gp(context, (op1) => EmitUnaryMathCall(context, MathF.Ceiling, Math.Ceiling, op1)); + } + + public static void Fcvtpu_Gp(ArmEmitterContext context) + { + EmitFcvt_u_Gp(context, (op1) => EmitUnaryMathCall(context, MathF.Ceiling, Math.Ceiling, op1)); + } + + public static void Fcvtzs_Gp(ArmEmitterContext context) + { + EmitFcvt_s_Gp(context, (op1) => op1); + } + + public static void Fcvtzs_Gp_Fixed(ArmEmitterContext context) + { + EmitFcvtzs_Gp_Fixed(context); + } + + public static void Fcvtzs_S(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitSse41Fcvts(context, FPRoundingMode.TowardsZero, scalar: true); + } + else + { + EmitFcvtz(context, signed: true, scalar: true); + } + } + + public static void Fcvtzs_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitSse41Fcvts(context, FPRoundingMode.TowardsZero, scalar: false); + } + else + { + EmitFcvtz(context, signed: true, scalar: false); + } + } + + public static void Fcvtzs_V_Fixed(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitSse41Fcvts(context, FPRoundingMode.TowardsZero, scalar: false); + } + else + { + EmitFcvtz(context, signed: true, scalar: false); + } + } + + public static void Fcvtzu_Gp(ArmEmitterContext context) + { + EmitFcvt_u_Gp(context, (op1) => op1); + } + + public static void Fcvtzu_Gp_Fixed(ArmEmitterContext context) + { + EmitFcvtzu_Gp_Fixed(context); + } + + public static void Fcvtzu_S(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitSse41Fcvtu(context, FPRoundingMode.TowardsZero, scalar: true); + } + else + { + EmitFcvtz(context, signed: false, scalar: true); + } + } + + public static void Fcvtzu_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitSse41Fcvtu(context, FPRoundingMode.TowardsZero, scalar: false); + } + else + { + EmitFcvtz(context, signed: false, scalar: false); + } + } + + public static void Fcvtzu_V_Fixed(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitSse41Fcvtu(context, FPRoundingMode.TowardsZero, scalar: false); + } + else + { + EmitFcvtz(context, signed: false, scalar: false); + } + } + + public static void Scvtf_Gp(ArmEmitterContext context) + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; + + Operand res = GetIntOrZR(context, op.Rn); + + if (op.RegisterSize == RegisterSize.Int32) + { + res = context.SignExtend32(OperandType.I64, res); + } + + res = EmitFPConvert(context, res, op.Size, signed: true); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } + + public static void Scvtf_Gp_Fixed(ArmEmitterContext context) + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; + + Operand res = GetIntOrZR(context, op.Rn); + + if (op.RegisterSize == RegisterSize.Int32) + { + res = context.SignExtend32(OperandType.I64, res); + } + + res = EmitFPConvert(context, res, op.Size, signed: true); + + res = EmitI2fFBitsMul(context, res, op.FBits); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } + + public static void Scvtf_S(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (Optimizations.UseSse2 && sizeF == 0) + { + EmitSse2Scvtf(context, scalar: true); + } + else + { + Operand res = EmitVectorLongExtract(context, op.Rn, 0, sizeF + 2); + + res = EmitFPConvert(context, res, op.Size, signed: true); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } + } + + public static void Scvtf_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (Optimizations.UseSse2 && sizeF == 0) + { + EmitSse2Scvtf(context, scalar: false); + } + else + { + EmitVectorCvtf(context, signed: true); + } + } + + public static void Scvtf_V_Fixed(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + // sizeF == ((OpCodeSimdShImm64)op).Size - 2 + int sizeF = op.Size & 1; + + if (Optimizations.UseSse2 && sizeF == 0) + { + EmitSse2Scvtf(context, scalar: false); + } + else + { + EmitVectorCvtf(context, signed: true); + } + } + + public static void Ucvtf_Gp(ArmEmitterContext context) + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; + + Operand res = GetIntOrZR(context, op.Rn); + + res = EmitFPConvert(context, res, op.Size, signed: false); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } + + public static void Ucvtf_Gp_Fixed(ArmEmitterContext context) + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; + + Operand res = GetIntOrZR(context, op.Rn); + + res = EmitFPConvert(context, res, op.Size, signed: false); + + res = EmitI2fFBitsMul(context, res, op.FBits); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } + + public static void Ucvtf_S(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (Optimizations.UseSse2 && sizeF == 0) + { + EmitSse2Ucvtf(context, scalar: true); + } + else + { + Operand ne = EmitVectorLongExtract(context, op.Rn, 0, sizeF + 2); + + Operand res = EmitFPConvert(context, ne, sizeF, signed: false); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } + } + + public static void Ucvtf_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (Optimizations.UseSse2 && sizeF == 0) + { + EmitSse2Ucvtf(context, scalar: false); + } + else + { + EmitVectorCvtf(context, signed: false); + } + } + + public static void Ucvtf_V_Fixed(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + // sizeF == ((OpCodeSimdShImm)op).Size - 2 + int sizeF = op.Size & 1; + + if (Optimizations.UseSse2 && sizeF == 0) + { + EmitSse2Ucvtf(context, scalar: false); + } + else + { + EmitVectorCvtf(context, signed: false); + } + } + + private static void EmitFcvtn(ArmEmitterContext context, bool signed, bool scalar) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + Operand n = GetVec(op.Rn); + + int sizeF = op.Size & 1; + int sizeI = sizeF + 2; + + OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64; + + int elems = !scalar ? op.GetBytesCount() >> sizeI : 1; + + for (int index = 0; index < elems; index++) + { + Operand ne = context.VectorExtract(type, n, index); + + Operand e = EmitRoundMathCall(context, MidpointRounding.ToEven, ne); + + if (sizeF == 0) + { + Delegate dlg = signed + ? (Delegate)new _S32_F32(SoftFallback.SatF32ToS32) + : (Delegate)new _U32_F32(SoftFallback.SatF32ToU32); + + e = context.Call(dlg, e); + + e = context.ZeroExtend32(OperandType.I64, e); + } + else /* if (sizeF == 1) */ + { + Delegate dlg = signed + ? (Delegate)new _S64_F64(SoftFallback.SatF64ToS64) + : (Delegate)new _U64_F64(SoftFallback.SatF64ToU64); + + e = context.Call(dlg, e); + } + + res = EmitVectorInsert(context, res, e, index, sizeI); + } + + context.Copy(GetVec(op.Rd), res); + } + + private static void EmitFcvtz(ArmEmitterContext context, bool signed, bool scalar) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + Operand n = GetVec(op.Rn); + + int sizeF = op.Size & 1; + int sizeI = sizeF + 2; + + OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64; + + int fBits = GetFBits(context); + + int elems = !scalar ? op.GetBytesCount() >> sizeI : 1; + + for (int index = 0; index < elems; index++) + { + Operand ne = context.VectorExtract(type, n, index); + + Operand e = EmitF2iFBitsMul(context, ne, fBits); + + if (sizeF == 0) + { + Delegate dlg = signed + ? (Delegate)new _S32_F32(SoftFallback.SatF32ToS32) + : (Delegate)new _U32_F32(SoftFallback.SatF32ToU32); + + e = context.Call(dlg, e); + + e = context.ZeroExtend32(OperandType.I64, e); + } + else /* if (sizeF == 1) */ + { + Delegate dlg = signed + ? (Delegate)new _S64_F64(SoftFallback.SatF64ToS64) + : (Delegate)new _U64_F64(SoftFallback.SatF64ToU64); + + e = context.Call(dlg, e); + } + + res = EmitVectorInsert(context, res, e, index, sizeI); + } + + context.Copy(GetVec(op.Rd), res); + } + + private static void EmitFcvt_s_Gp(ArmEmitterContext context, Func1I emit) + { + EmitFcvt___Gp(context, emit, signed: true); + } + + private static void EmitFcvt_u_Gp(ArmEmitterContext context, Func1I emit) + { + EmitFcvt___Gp(context, emit, signed: false); + } + + private static void EmitFcvt___Gp(ArmEmitterContext context, Func1I emit, bool signed) + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; + + OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64; + + Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0); + + Operand res = signed + ? EmitScalarFcvts(context, emit(ne), 0) + : EmitScalarFcvtu(context, emit(ne), 0); + + SetIntOrZR(context, op.Rd, res); + } + + private static void EmitFcvtzs_Gp_Fixed(ArmEmitterContext context) + { + EmitFcvtz__Gp_Fixed(context, signed: true); + } + + private static void EmitFcvtzu_Gp_Fixed(ArmEmitterContext context) + { + EmitFcvtz__Gp_Fixed(context, signed: false); + } + + private static void EmitFcvtz__Gp_Fixed(ArmEmitterContext context, bool signed) + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; + + OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64; + + Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0); + + Operand res = signed + ? EmitScalarFcvts(context, ne, op.FBits) + : EmitScalarFcvtu(context, ne, op.FBits); + + SetIntOrZR(context, op.Rd, res); + } + + private static void EmitVectorCvtf(ArmEmitterContext context, bool signed) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int sizeF = op.Size & 1; + int sizeI = sizeF + 2; + + int fBits = GetFBits(context); + + int elems = op.GetBytesCount() >> sizeI; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorLongExtract(context, op.Rn, index, sizeI); + + Operand e = EmitFPConvert(context, ne, sizeF, signed); + + e = EmitI2fFBitsMul(context, e, fBits); + + res = context.VectorInsert(res, e, index); + } + + context.Copy(GetVec(op.Rd), res); + } + + private static int GetFBits(ArmEmitterContext context) + { + if (context.CurrOp is OpCodeSimdShImm op) + { + return GetImmShr(op); + } + + return 0; + } + + private static Operand EmitFPConvert(ArmEmitterContext context, Operand value, int size, bool signed) + { + Debug.Assert(value.Type == OperandType.I32 || value.Type == OperandType.I64); + Debug.Assert((uint)size < 2); + + OperandType type = size == 0 ? OperandType.FP32 + : OperandType.FP64; + + if (signed) + { + return context.ConvertToFP(type, value); + } + else + { + return context.ConvertToFPUI(type, value); + } + } + + private static Operand EmitScalarFcvts(ArmEmitterContext context, Operand value, int fBits) + { + Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64); + + value = EmitF2iFBitsMul(context, value, fBits); + + if (context.CurrOp.RegisterSize == RegisterSize.Int32) + { + Delegate dlg = value.Type == OperandType.FP32 + ? (Delegate)new _S32_F32(SoftFallback.SatF32ToS32) + : (Delegate)new _S32_F64(SoftFallback.SatF64ToS32); + + return context.Call(dlg, value); + } + else + { + Delegate dlg = value.Type == OperandType.FP32 + ? (Delegate)new _S64_F32(SoftFallback.SatF32ToS64) + : (Delegate)new _S64_F64(SoftFallback.SatF64ToS64); + + return context.Call(dlg, value); + } + } + + private static Operand EmitScalarFcvtu(ArmEmitterContext context, Operand value, int fBits) + { + Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64); + + value = EmitF2iFBitsMul(context, value, fBits); + + if (context.CurrOp.RegisterSize == RegisterSize.Int32) + { + Delegate dlg = value.Type == OperandType.FP32 + ? (Delegate)new _U32_F32(SoftFallback.SatF32ToU32) + : (Delegate)new _U32_F64(SoftFallback.SatF64ToU32); + + return context.Call(dlg, value); + } + else + { + Delegate dlg = value.Type == OperandType.FP32 + ? (Delegate)new _U64_F32(SoftFallback.SatF32ToU64) + : (Delegate)new _U64_F64(SoftFallback.SatF64ToU64); + + return context.Call(dlg, value); + } + } + + private static Operand EmitF2iFBitsMul(ArmEmitterContext context, Operand value, int fBits) + { + Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64); + + if (fBits == 0) + { + return value; + } + + if (value.Type == OperandType.FP32) + { + return context.Multiply(value, ConstF(MathF.Pow(2f, fBits))); + } + else /* if (value.Type == OperandType.FP64) */ + { + return context.Multiply(value, ConstF(Math.Pow(2d, fBits))); + } + } + + private static Operand EmitI2fFBitsMul(ArmEmitterContext context, Operand value, int fBits) + { + Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64); + + if (fBits == 0) + { + return value; + } + + if (value.Type == OperandType.FP32) + { + return context.Multiply(value, ConstF(1f / MathF.Pow(2f, fBits))); + } + else /* if (value.Type == OperandType.FP64) */ + { + return context.Multiply(value, ConstF(1d / Math.Pow(2d, fBits))); + } + } + + private static void EmitSse41Fcvts(ArmEmitterContext context, FPRoundingMode roundMode, bool scalar) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + const int cmpGreaterThanOrEqual = 5; + const int cmpOrdered = 7; + + // sizeF == ((OpCodeSimdShImm64)op).Size - 2 + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const(cmpOrdered)); + + Operand nScaled = context.AddIntrinsic(Intrinsic.X86Pand, nMask, n); + + if (op is OpCodeSimdShImm fixedOp) + { + int fBits = GetImmShr(fixedOp); + + // BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, fBits) + int fpScaled = 0x3F800000 + fBits * 0x800000; + + Operand scale = X86GetAllElements(context, fpScaled); + + nScaled = context.AddIntrinsic(Intrinsic.X86Mulps, nScaled, scale); + } + + Operand nRnd = context.AddIntrinsic(Intrinsic.X86Roundps, nScaled, Const(X86GetRoundControl(roundMode))); + + Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRnd); + + Operand mask = X86GetAllElements(context, 0x4F000000); // 2.14748365E9f (2147483648) + + Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmpps, nRnd, mask, Const(cmpGreaterThanOrEqual)); + + Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, nInt, mask2); + + if (scalar) + { + res = context.VectorZeroUpper96(res); + } + else if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const(cmpOrdered)); + + Operand nScaled = context.AddIntrinsic(Intrinsic.X86Pand, nMask, n); + + if (op is OpCodeSimdShImm fixedOp) + { + int fBits = GetImmShr(fixedOp); + + // BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, fBits) + long fpScaled = 0x3FF0000000000000L + fBits * 0x10000000000000L; + + Operand scale = X86GetAllElements(context, fpScaled); + + nScaled = context.AddIntrinsic(Intrinsic.X86Mulpd, nScaled, scale); + } + + Operand nRnd = context.AddIntrinsic(Intrinsic.X86Roundpd, nScaled, Const(X86GetRoundControl(roundMode))); + + Operand high; + + if (!scalar) + { + high = context.AddIntrinsic(Intrinsic.X86Unpckhpd, nRnd, nRnd); + high = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, high); + } + else + { + high = Const(0L); + } + + Operand low = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, nRnd); + + Operand nInt = EmitVectorLongCreate(context, low, high); + + Operand mask = X86GetAllElements(context, 0x43E0000000000000L); // 9.2233720368547760E18d (9223372036854775808) + + Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmppd, nRnd, mask, Const(cmpGreaterThanOrEqual)); + + Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, nInt, mask2); + + if (scalar) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + } + + private static void EmitSse41Fcvtu(ArmEmitterContext context, FPRoundingMode roundMode, bool scalar) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + const int cmpGreaterThanOrEqual = 5; + const int cmpGreaterThan = 6; + const int cmpOrdered = 7; + + // sizeF == ((OpCodeSimdShImm)op).Size - 2 + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const(cmpOrdered)); + + Operand nScaled = context.AddIntrinsic(Intrinsic.X86Pand, nMask, n); + + if (op is OpCodeSimdShImm fixedOp) + { + int fBits = GetImmShr(fixedOp); + + // BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, fBits) + int fpScaled = 0x3F800000 + fBits * 0x800000; + + Operand scale = X86GetAllElements(context, fpScaled); + + nScaled = context.AddIntrinsic(Intrinsic.X86Mulps, nScaled, scale); + } + + Operand nRnd = context.AddIntrinsic(Intrinsic.X86Roundps, nScaled, Const(X86GetRoundControl(roundMode))); + + Operand nRndMask = context.AddIntrinsic(Intrinsic.X86Cmpps, nRnd, context.VectorZero(), Const(cmpGreaterThan)); + + Operand nRndMasked = context.AddIntrinsic(Intrinsic.X86Pand, nRnd, nRndMask); + + Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRndMasked); + + Operand mask = X86GetAllElements(context, 0x4F000000); // 2.14748365E9f (2147483648) + + Operand res = context.AddIntrinsic(Intrinsic.X86Subps, nRndMasked, mask); + + Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmpps, res, context.VectorZero(), Const(cmpGreaterThan)); + + Operand resMasked = context.AddIntrinsic(Intrinsic.X86Pand, res, mask2); + + res = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, resMasked); + + Operand mask3 = context.AddIntrinsic(Intrinsic.X86Cmpps, resMasked, mask, Const(cmpGreaterThanOrEqual)); + + res = context.AddIntrinsic(Intrinsic.X86Pxor, res, mask3); + res = context.AddIntrinsic(Intrinsic.X86Paddd, res, nInt); + + if (scalar) + { + res = context.VectorZeroUpper96(res); + } + else if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const(cmpOrdered)); + + Operand nScaled = context.AddIntrinsic(Intrinsic.X86Pand, nMask, n); + + if (op is OpCodeSimdShImm fixedOp) + { + int fBits = GetImmShr(fixedOp); + + // BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, fBits) + long fpScaled = 0x3FF0000000000000L + fBits * 0x10000000000000L; + + Operand scale = X86GetAllElements(context, fpScaled); + + nScaled = context.AddIntrinsic(Intrinsic.X86Mulpd, nScaled, scale); + } + + Operand nRnd = context.AddIntrinsic(Intrinsic.X86Roundpd, nScaled, Const(X86GetRoundControl(roundMode))); + + Operand nRndMask = context.AddIntrinsic(Intrinsic.X86Cmppd, nRnd, context.VectorZero(), Const(cmpGreaterThan)); + + Operand nRndMasked = context.AddIntrinsic(Intrinsic.X86Pand, nRnd, nRndMask); + + Operand high; + + if (!scalar) + { + high = context.AddIntrinsic(Intrinsic.X86Unpckhpd, nRndMasked, nRndMasked); + high = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, high); + } + else + { + high = Const(0L); + } + + Operand low = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, nRndMasked); + + Operand nInt = EmitVectorLongCreate(context, low, high); + + Operand mask = X86GetAllElements(context, 0x43E0000000000000L); // 9.2233720368547760E18d (9223372036854775808) + + Operand res = context.AddIntrinsic(Intrinsic.X86Subpd, nRndMasked, mask); + + Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmppd, res, context.VectorZero(), Const(cmpGreaterThan)); + + Operand resMasked = context.AddIntrinsic(Intrinsic.X86Pand, res, mask2); + + if (!scalar) + { + high = context.AddIntrinsic(Intrinsic.X86Unpckhpd, resMasked, resMasked); + high = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, high); + } + + low = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, resMasked); + + res = EmitVectorLongCreate(context, low, high); + + Operand mask3 = context.AddIntrinsic(Intrinsic.X86Cmppd, resMasked, mask, Const(cmpGreaterThanOrEqual)); + + res = context.AddIntrinsic(Intrinsic.X86Pxor, res, mask3); + res = context.AddIntrinsic(Intrinsic.X86Paddq, res, nInt); + + if (scalar) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + } + + private static void EmitSse2Scvtf(ArmEmitterContext context, bool scalar) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Operand res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, n); + + if (op is OpCodeSimdShImm fixedOp) + { + int fBits = GetImmShr(fixedOp); + + // BitConverter.Int32BitsToSingle(fpScaled) == 1f / MathF.Pow(2f, fBits) + int fpScaled = 0x3F800000 - fBits * 0x800000; + + Operand scale = X86GetAllElements(context, fpScaled); + + res = context.AddIntrinsic(Intrinsic.X86Mulps, res, scale); + } + + if (scalar) + { + res = context.VectorZeroUpper96(res); + } + else if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + + private static void EmitSse2Ucvtf(ArmEmitterContext context, bool scalar) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Operand res = context.AddIntrinsic(Intrinsic.X86Psrld, n, Const(16)); + + res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res); + + Operand mask = X86GetAllElements(context, 0x47800000); // 65536.0f (1 << 16) + + res = context.AddIntrinsic(Intrinsic.X86Mulps, res, mask); + + Operand res2 = context.AddIntrinsic(Intrinsic.X86Pslld, n, Const(16)); + + res2 = context.AddIntrinsic(Intrinsic.X86Psrld, res2, Const(16)); + res2 = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res2); + + res = context.AddIntrinsic(Intrinsic.X86Addps, res, res2); + + if (op is OpCodeSimdShImm fixedOp) + { + int fBits = GetImmShr(fixedOp); + + // BitConverter.Int32BitsToSingle(fpScaled) == 1f / MathF.Pow(2f, fBits) + int fpScaled = 0x3F800000 - fBits * 0x800000; + + Operand scale = X86GetAllElements(context, fpScaled); + + res = context.AddIntrinsic(Intrinsic.X86Mulps, res, scale); + } + + if (scalar) + { + res = context.VectorZeroUpper96(res); + } + else if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + + private static Operand EmitVectorLongExtract(ArmEmitterContext context, int reg, int index, int size) + { + OperandType type = size == 3 ? OperandType.I64 : OperandType.I32; + + return context.VectorExtract(type, GetVec(reg), index); + } + + private static Operand EmitVectorLongCreate(ArmEmitterContext context, Operand low, Operand high) + { + Operand vector = context.VectorCreateScalar(low); + + vector = context.VectorInsert(vector, high, 1); + + return vector; + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSimdHash.cs b/ARMeilleure/Instructions/InstEmitSimdHash.cs new file mode 100644 index 000000000..4ed960612 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdHash.cs @@ -0,0 +1,147 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { +#region "Sha1" + public static void Sha1c_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + + Operand ne = context.VectorExtract(OperandType.I32, GetVec(op.Rn), 0); + + Operand m = GetVec(op.Rm); + + Operand res = context.Call(new _V128_V128_U32_V128(SoftFallback.HashChoose), d, ne, m); + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sha1h_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand ne = context.VectorExtract(OperandType.I32, GetVec(op.Rn), 0); + + Operand res = context.Call(new _U32_U32(SoftFallback.FixedRotate), ne); + + context.Copy(GetVec(op.Rd), context.VectorCreateScalar(res)); + } + + public static void Sha1m_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + + Operand ne = context.VectorExtract(OperandType.I32, GetVec(op.Rn), 0); + + Operand m = GetVec(op.Rm); + + Operand res = context.Call(new _V128_V128_U32_V128(SoftFallback.HashMajority), d, ne, m); + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sha1p_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + + Operand ne = context.VectorExtract(OperandType.I32, GetVec(op.Rn), 0); + + Operand m = GetVec(op.Rm); + + Operand res = context.Call(new _V128_V128_U32_V128(SoftFallback.HashParity), d, ne, m); + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sha1su0_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.Call(new _V128_V128_V128_V128(SoftFallback.Sha1SchedulePart1), d, n, m); + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sha1su1_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Operand res = context.Call(new _V128_V128_V128(SoftFallback.Sha1SchedulePart2), d, n); + + context.Copy(GetVec(op.Rd), res); + } +#endregion + +#region "Sha256" + public static void Sha256h_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.Call(new _V128_V128_V128_V128(SoftFallback.HashLower), d, n, m); + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sha256h2_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.Call(new _V128_V128_V128_V128(SoftFallback.HashUpper), d, n, m); + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sha256su0_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Operand res = context.Call(new _V128_V128_V128(SoftFallback.Sha256SchedulePart1), d, n); + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sha256su1_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.Call(new _V128_V128_V128_V128(SoftFallback.Sha256SchedulePart2), d, n, m); + + context.Copy(GetVec(op.Rd), res); + } +#endregion + } +} diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper.cs b/ARMeilleure/Instructions/InstEmitSimdHelper.cs new file mode 100644 index 000000000..a3da80fb0 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdHelper.cs @@ -0,0 +1,1477 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; +using System.Diagnostics; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + using Func1I = Func; + using Func2I = Func; + using Func3I = Func; + + static class InstEmitSimdHelper + { +#region "X86 SSE Intrinsics" + public static readonly Intrinsic[] X86PaddInstruction = new Intrinsic[] + { + Intrinsic.X86Paddb, + Intrinsic.X86Paddw, + Intrinsic.X86Paddd, + Intrinsic.X86Paddq + }; + + public static readonly Intrinsic[] X86PcmpeqInstruction = new Intrinsic[] + { + Intrinsic.X86Pcmpeqb, + Intrinsic.X86Pcmpeqw, + Intrinsic.X86Pcmpeqd, + Intrinsic.X86Pcmpeqq + }; + + public static readonly Intrinsic[] X86PcmpgtInstruction = new Intrinsic[] + { + Intrinsic.X86Pcmpgtb, + Intrinsic.X86Pcmpgtw, + Intrinsic.X86Pcmpgtd, + Intrinsic.X86Pcmpgtq + }; + + public static readonly Intrinsic[] X86PmaxsInstruction = new Intrinsic[] + { + Intrinsic.X86Pmaxsb, + Intrinsic.X86Pmaxsw, + Intrinsic.X86Pmaxsd + }; + + public static readonly Intrinsic[] X86PmaxuInstruction = new Intrinsic[] + { + Intrinsic.X86Pmaxub, + Intrinsic.X86Pmaxuw, + Intrinsic.X86Pmaxud + }; + + public static readonly Intrinsic[] X86PminsInstruction = new Intrinsic[] + { + Intrinsic.X86Pminsb, + Intrinsic.X86Pminsw, + Intrinsic.X86Pminsd + }; + + public static readonly Intrinsic[] X86PminuInstruction = new Intrinsic[] + { + Intrinsic.X86Pminub, + Intrinsic.X86Pminuw, + Intrinsic.X86Pminud + }; + + public static readonly Intrinsic[] X86PmovsxInstruction = new Intrinsic[] + { + Intrinsic.X86Pmovsxbw, + Intrinsic.X86Pmovsxwd, + Intrinsic.X86Pmovsxdq + }; + + public static readonly Intrinsic[] X86PmovzxInstruction = new Intrinsic[] + { + Intrinsic.X86Pmovzxbw, + Intrinsic.X86Pmovzxwd, + Intrinsic.X86Pmovzxdq + }; + + public static readonly Intrinsic[] X86PsllInstruction = new Intrinsic[] + { + 0, + Intrinsic.X86Psllw, + Intrinsic.X86Pslld, + Intrinsic.X86Psllq + }; + + public static readonly Intrinsic[] X86PsraInstruction = new Intrinsic[] + { + 0, + Intrinsic.X86Psraw, + Intrinsic.X86Psrad + }; + + public static readonly Intrinsic[] X86PsrlInstruction = new Intrinsic[] + { + 0, + Intrinsic.X86Psrlw, + Intrinsic.X86Psrld, + Intrinsic.X86Psrlq + }; + + public static readonly Intrinsic[] X86PsubInstruction = new Intrinsic[] + { + Intrinsic.X86Psubb, + Intrinsic.X86Psubw, + Intrinsic.X86Psubd, + Intrinsic.X86Psubq + }; + + public static readonly Intrinsic[] X86PunpckhInstruction = new Intrinsic[] + { + Intrinsic.X86Punpckhbw, + Intrinsic.X86Punpckhwd, + Intrinsic.X86Punpckhdq, + Intrinsic.X86Punpckhqdq + }; + + public static readonly Intrinsic[] X86PunpcklInstruction = new Intrinsic[] + { + Intrinsic.X86Punpcklbw, + Intrinsic.X86Punpcklwd, + Intrinsic.X86Punpckldq, + Intrinsic.X86Punpcklqdq + }; +#endregion + + public static int GetImmShl(OpCodeSimdShImm op) + { + return op.Imm - (8 << op.Size); + } + + public static int GetImmShr(OpCodeSimdShImm op) + { + return (8 << (op.Size + 1)) - op.Imm; + } + + public static Operand X86GetScalar(ArmEmitterContext context, float value) + { + return X86GetScalar(context, BitConverter.SingleToInt32Bits(value)); + } + + public static Operand X86GetScalar(ArmEmitterContext context, double value) + { + return X86GetScalar(context, BitConverter.DoubleToInt64Bits(value)); + } + + public static Operand X86GetScalar(ArmEmitterContext context, int value) + { + return context.VectorCreateScalar(Const(value)); + } + + public static Operand X86GetScalar(ArmEmitterContext context, long value) + { + return context.VectorCreateScalar(Const(value)); + } + + public static Operand X86GetAllElements(ArmEmitterContext context, float value) + { + return X86GetAllElements(context, BitConverter.SingleToInt32Bits(value)); + } + + public static Operand X86GetAllElements(ArmEmitterContext context, double value) + { + return X86GetAllElements(context, BitConverter.DoubleToInt64Bits(value)); + } + + public static Operand X86GetAllElements(ArmEmitterContext context, int value) + { + Operand vector = context.VectorCreateScalar(Const(value)); + + vector = context.AddIntrinsic(Intrinsic.X86Shufps, vector, vector, Const(0)); + + return vector; + } + + public static Operand X86GetAllElements(ArmEmitterContext context, long value) + { + Operand vector = context.VectorCreateScalar(Const(value)); + + vector = context.AddIntrinsic(Intrinsic.X86Movlhps, vector, vector); + + return vector; + } + + public static int X86GetRoundControl(FPRoundingMode roundMode) + { + switch (roundMode) + { + case FPRoundingMode.ToNearest: return 8 | 0; + case FPRoundingMode.TowardsPlusInfinity: return 8 | 2; + case FPRoundingMode.TowardsMinusInfinity: return 8 | 1; + case FPRoundingMode.TowardsZero: return 8 | 3; + } + + throw new ArgumentException($"Invalid rounding mode \"{roundMode}\"."); + } + + public static void EmitScalarUnaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32; + + Operand res = context.AddIntrinsic(inst, n); + + if ((op.Size & 1) != 0) + { + res = context.VectorZeroUpper64(res); + } + else + { + res = context.VectorZeroUpper96(res); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitScalarBinaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32; + + Operand res = context.AddIntrinsic(inst, n, m); + + if ((op.Size & 1) != 0) + { + res = context.VectorZeroUpper64(res); + } + else + { + res = context.VectorZeroUpper96(res); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorUnaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32; + + Operand res = context.AddIntrinsic(inst, n); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorBinaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32; + + Operand res = context.AddIntrinsic(inst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static Operand EmitUnaryMathCall(ArmEmitterContext context, _F32_F32 f32, _F64_F64 f64, Operand n) + { + IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; + + return (op.Size & 1) == 0 ? context.Call(f32, n) : context.Call(f64, n); + } + + public static Operand EmitRoundMathCall(ArmEmitterContext context, MidpointRounding roundMode, Operand n) + { + IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; + + Delegate dlg; + + if ((op.Size & 1) == 0) + { + dlg = new _F32_F32_MidpointRounding(MathF.Round); + } + else /* if ((op.Size & 1) == 1) */ + { + dlg = new _F64_F64_MidpointRounding(Math.Round); + } + + return context.Call(dlg, n, Const((int)roundMode)); + } + + public static Operand EmitSoftFloatCall( + ArmEmitterContext context, + _F32_F32 f32, + _F64_F64 f64, + params Operand[] callArgs) + { + IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; + + Delegate dlg = (op.Size & 1) == 0 ? (Delegate)f32 : (Delegate)f64; + + return context.Call(dlg, callArgs); + } + + public static Operand EmitSoftFloatCall( + ArmEmitterContext context, + _F32_F32_F32 f32, + _F64_F64_F64 f64, + params Operand[] callArgs) + { + IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; + + Delegate dlg = (op.Size & 1) == 0 ? (Delegate)f32 : (Delegate)f64; + + return context.Call(dlg, callArgs); + } + + public static Operand EmitSoftFloatCall( + ArmEmitterContext context, + _F32_F32_F32_F32 f32, + _F64_F64_F64_F64 f64, + params Operand[] callArgs) + { + IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; + + Delegate dlg = (op.Size & 1) == 0 ? (Delegate)f32 : (Delegate)f64; + + return context.Call(dlg, callArgs); + } + + public static void EmitScalarBinaryOpByElemF(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand n = context.VectorExtract(type, GetVec(op.Rn), 0); + Operand m = context.VectorExtract(type, GetVec(op.Rm), op.Index); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n, m), 0)); + } + + public static void EmitScalarTernaryOpByElemF(ArmEmitterContext context, Func3I emit) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand d = context.VectorExtract(type, GetVec(op.Rd), 0); + Operand n = context.VectorExtract(type, GetVec(op.Rn), 0); + Operand m = context.VectorExtract(type, GetVec(op.Rm), op.Index); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(d, n, m), 0)); + } + + public static void EmitScalarUnaryOpSx(ArmEmitterContext context, Func1I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = EmitVectorExtractSx(context, op.Rn, 0, op.Size); + + Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n), 0, op.Size); + + context.Copy(GetVec(op.Rd), d); + } + + public static void EmitScalarBinaryOpSx(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = EmitVectorExtractSx(context, op.Rn, 0, op.Size); + Operand m = EmitVectorExtractSx(context, op.Rm, 0, op.Size); + + Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n, m), 0, op.Size); + + context.Copy(GetVec(op.Rd), d); + } + + public static void EmitScalarUnaryOpZx(ArmEmitterContext context, Func1I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size); + + Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n), 0, op.Size); + + context.Copy(GetVec(op.Rd), d); + } + + public static void EmitScalarBinaryOpZx(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size); + Operand m = EmitVectorExtractZx(context, op.Rm, 0, op.Size); + + Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n, m), 0, op.Size); + + context.Copy(GetVec(op.Rd), d); + } + + public static void EmitScalarTernaryOpZx(ArmEmitterContext context, Func3I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = EmitVectorExtractZx(context, op.Rd, 0, op.Size); + Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size); + Operand m = EmitVectorExtractZx(context, op.Rm, 0, op.Size); + + d = EmitVectorInsert(context, context.VectorZero(), emit(d, n, m), 0, op.Size); + + context.Copy(GetVec(op.Rd), d); + } + + public static void EmitScalarUnaryOpF(ArmEmitterContext context, Func1I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand n = context.VectorExtract(type, GetVec(op.Rn), 0); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n), 0)); + } + + public static void EmitScalarBinaryOpF(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand n = context.VectorExtract(type, GetVec(op.Rn), 0); + Operand m = context.VectorExtract(type, GetVec(op.Rm), 0); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n, m), 0)); + } + + public static void EmitScalarTernaryRaOpF(ArmEmitterContext context, Func3I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand a = context.VectorExtract(type, GetVec(op.Ra), 0); + Operand n = context.VectorExtract(type, GetVec(op.Rn), 0); + Operand m = context.VectorExtract(type, GetVec(op.Rm), 0); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(a, n, m), 0)); + } + + public static void EmitVectorUnaryOpF(ArmEmitterContext context, Func1I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + for (int index = 0; index < elems; index++) + { + Operand ne = context.VectorExtract(type, GetVec(op.Rn), index); + + res = context.VectorInsert(res, emit(ne), index); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorBinaryOpF(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + for (int index = 0; index < elems; index++) + { + Operand ne = context.VectorExtract(type, GetVec(op.Rn), index); + Operand me = context.VectorExtract(type, GetVec(op.Rm), index); + + res = context.VectorInsert(res, emit(ne, me), index); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorTernaryOpF(ArmEmitterContext context, Func3I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + for (int index = 0; index < elems; index++) + { + Operand de = context.VectorExtract(type, GetVec(op.Rd), index); + Operand ne = context.VectorExtract(type, GetVec(op.Rn), index); + Operand me = context.VectorExtract(type, GetVec(op.Rm), index); + + res = context.VectorInsert(res, emit(de, ne, me), index); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorBinaryOpByElemF(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + Operand res = context.VectorZero(); + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + for (int index = 0; index < elems; index++) + { + Operand ne = context.VectorExtract(type, GetVec(op.Rn), index); + Operand me = context.VectorExtract(type, GetVec(op.Rm), op.Index); + + res = context.VectorInsert(res, emit(ne, me), index); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorTernaryOpByElemF(ArmEmitterContext context, Func3I emit) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + Operand res = context.VectorZero(); + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + for (int index = 0; index < elems; index++) + { + Operand de = context.VectorExtract(type, GetVec(op.Rd), index); + Operand ne = context.VectorExtract(type, GetVec(op.Rn), index); + Operand me = context.VectorExtract(type, GetVec(op.Rm), op.Index); + + res = context.VectorInsert(res, emit(de, ne, me), index); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorUnaryOpSx(ArmEmitterContext context, Func1I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + + res = EmitVectorInsert(context, res, emit(ne), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorBinaryOpSx(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size); + + res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorTernaryOpSx(ArmEmitterContext context, Func3I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtractSx(context, op.Rd, index, op.Size); + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size); + + res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorUnaryOpZx(ArmEmitterContext context, Func1I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + + res = EmitVectorInsert(context, res, emit(ne), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorBinaryOpZx(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size); + + res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorTernaryOpZx(ArmEmitterContext context, Func3I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size); + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size); + + res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorBinaryOpByElemSx(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp; + + Operand res = context.VectorZero(); + + Operand me = EmitVectorExtractSx(context, op.Rm, op.Index, op.Size); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + + res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorBinaryOpByElemZx(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp; + + Operand res = context.VectorZero(); + + Operand me = EmitVectorExtractZx(context, op.Rm, op.Index, op.Size); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + + res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorTernaryOpByElemZx(ArmEmitterContext context, Func3I emit) + { + OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp; + + Operand res = context.VectorZero(); + + Operand me = EmitVectorExtractZx(context, op.Rm, op.Index, op.Size); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size); + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + + res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorImmUnaryOp(ArmEmitterContext context, Func1I emit) + { + OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp; + + Operand imm = Const(op.Immediate); + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + res = EmitVectorInsert(context, res, emit(imm), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorImmBinaryOp(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp; + + Operand imm = Const(op.Immediate); + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size); + + res = EmitVectorInsert(context, res, emit(de, imm), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorWidenRmBinaryOpSx(ArmEmitterContext context, Func2I emit) + { + EmitVectorWidenRmBinaryOp(context, emit, signed: true); + } + + public static void EmitVectorWidenRmBinaryOpZx(ArmEmitterContext context, Func2I emit) + { + EmitVectorWidenRmBinaryOp(context, emit, signed: false); + } + + private static void EmitVectorWidenRmBinaryOp(ArmEmitterContext context, Func2I emit, bool signed) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signed); + Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorWidenRnRmBinaryOpSx(ArmEmitterContext context, Func2I emit) + { + EmitVectorWidenRnRmBinaryOp(context, emit, signed: true); + } + + public static void EmitVectorWidenRnRmBinaryOpZx(ArmEmitterContext context, Func2I emit) + { + EmitVectorWidenRnRmBinaryOp(context, emit, signed: false); + } + + private static void EmitVectorWidenRnRmBinaryOp(ArmEmitterContext context, Func2I emit, bool signed) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed); + Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorWidenRnRmTernaryOpSx(ArmEmitterContext context, Func3I emit) + { + EmitVectorWidenRnRmTernaryOp(context, emit, signed: true); + } + + public static void EmitVectorWidenRnRmTernaryOpZx(ArmEmitterContext context, Func3I emit) + { + EmitVectorWidenRnRmTernaryOp(context, emit, signed: false); + } + + private static void EmitVectorWidenRnRmTernaryOp(ArmEmitterContext context, Func3I emit, bool signed) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed); + Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed); + Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorWidenBinaryOpByElemSx(ArmEmitterContext context, Func2I emit) + { + EmitVectorWidenBinaryOpByElem(context, emit, signed: true); + } + + public static void EmitVectorWidenBinaryOpByElemZx(ArmEmitterContext context, Func2I emit) + { + EmitVectorWidenBinaryOpByElem(context, emit, signed: false); + } + + private static void EmitVectorWidenBinaryOpByElem(ArmEmitterContext context, Func2I emit, bool signed) + { + OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp; + + Operand res = context.VectorZero(); + + Operand me = EmitVectorExtract(context, op.Rm, op.Index, op.Size, signed);; + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorWidenTernaryOpByElemSx(ArmEmitterContext context, Func3I emit) + { + EmitVectorWidenTernaryOpByElem(context, emit, signed: true); + } + + public static void EmitVectorWidenTernaryOpByElemZx(ArmEmitterContext context, Func3I emit) + { + EmitVectorWidenTernaryOpByElem(context, emit, signed: false); + } + + private static void EmitVectorWidenTernaryOpByElem(ArmEmitterContext context, Func3I emit, bool signed) + { + OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp; + + Operand res = context.VectorZero(); + + Operand me = EmitVectorExtract(context, op.Rm, op.Index, op.Size, signed);; + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed); + Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorPairwiseOpSx(ArmEmitterContext context, Func2I emit) + { + EmitVectorPairwiseOp(context, emit, signed: true); + } + + public static void EmitVectorPairwiseOpZx(ArmEmitterContext context, Func2I emit) + { + EmitVectorPairwiseOp(context, emit, signed: false); + } + + private static void EmitVectorPairwiseOp(ArmEmitterContext context, Func2I emit, bool signed) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int pairs = op.GetPairsCount() >> op.Size; + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + + Operand n0 = EmitVectorExtract(context, op.Rn, pairIndex, op.Size, signed); + Operand n1 = EmitVectorExtract(context, op.Rn, pairIndex + 1, op.Size, signed); + + Operand m0 = EmitVectorExtract(context, op.Rm, pairIndex, op.Size, signed); + Operand m1 = EmitVectorExtract(context, op.Rm, pairIndex + 1, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(n0, n1), index, op.Size); + res = EmitVectorInsert(context, res, emit(m0, m1), pairs + index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorAcrossVectorOpSx(ArmEmitterContext context, Func2I emit) + { + EmitVectorAcrossVectorOp(context, emit, signed: true, isLong: false); + } + + public static void EmitVectorAcrossVectorOpZx(ArmEmitterContext context, Func2I emit) + { + EmitVectorAcrossVectorOp(context, emit, signed: false, isLong: false); + } + + public static void EmitVectorLongAcrossVectorOpSx(ArmEmitterContext context, Func2I emit) + { + EmitVectorAcrossVectorOp(context, emit, signed: true, isLong: true); + } + + public static void EmitVectorLongAcrossVectorOpZx(ArmEmitterContext context, Func2I emit) + { + EmitVectorAcrossVectorOp(context, emit, signed: false, isLong: true); + } + + private static void EmitVectorAcrossVectorOp( + ArmEmitterContext context, + Func2I emit, + bool signed, + bool isLong) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int elems = op.GetBytesCount() >> op.Size; + + Operand res = EmitVectorExtract(context, op.Rn, 0, op.Size, signed); + + for (int index = 1; index < elems; index++) + { + Operand n = EmitVectorExtract(context, op.Rn, index, op.Size, signed); + + res = emit(res, n); + } + + int size = isLong ? op.Size + 1 : op.Size; + + Operand d = EmitVectorInsert(context, context.VectorZero(), res, 0, size); + + context.Copy(GetVec(op.Rd), d); + } + + public static void EmitVectorPairwiseOpF(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int pairs = op.GetPairsCount() >> sizeF + 2; + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + + Operand n0 = context.VectorExtract(type, GetVec(op.Rn), pairIndex); + Operand n1 = context.VectorExtract(type, GetVec(op.Rn), pairIndex + 1); + + Operand m0 = context.VectorExtract(type, GetVec(op.Rm), pairIndex); + Operand m1 = context.VectorExtract(type, GetVec(op.Rm), pairIndex + 1); + + res = context.VectorInsert(res, emit(n0, n1), index); + res = context.VectorInsert(res, emit(m0, m1), pairs + index); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorPairwiseOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + if (op.RegisterSize == RegisterSize.Simd64) + { + Operand unpck = context.AddIntrinsic(Intrinsic.X86Unpcklps, n, m); + + Operand zero = context.VectorZero(); + + Operand part0 = context.AddIntrinsic(Intrinsic.X86Movlhps, unpck, zero); + Operand part1 = context.AddIntrinsic(Intrinsic.X86Movhlps, zero, unpck); + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst32, part0, part1)); + } + else /* if (op.RegisterSize == RegisterSize.Simd128) */ + { + const int sm0 = 2 << 6 | 0 << 4 | 2 << 2 | 0 << 0; + const int sm1 = 3 << 6 | 1 << 4 | 3 << 2 | 1 << 0; + + Operand part0 = context.AddIntrinsic(Intrinsic.X86Shufps, n, m, Const(sm0)); + Operand part1 = context.AddIntrinsic(Intrinsic.X86Shufps, n, m, Const(sm1)); + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst32, part0, part1)); + } + } + else /* if (sizeF == 1) */ + { + Operand part0 = context.AddIntrinsic(Intrinsic.X86Unpcklpd, n, m); + Operand part1 = context.AddIntrinsic(Intrinsic.X86Unpckhpd, n, m); + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst64, part0, part1)); + } + } + + + [Flags] + public enum SaturatingFlags + { + Scalar = 1 << 0, + Signed = 1 << 1, + + Add = 1 << 2, + Sub = 1 << 3, + + Accumulate = 1 << 4, + + ScalarSx = Scalar | Signed, + ScalarZx = Scalar, + + VectorSx = Signed, + VectorZx = 0 + } + + public static void EmitScalarSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit) + { + EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.ScalarSx); + } + + public static void EmitVectorSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit) + { + EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.VectorSx); + } + + private static void EmitSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit, SaturatingFlags flags) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + bool scalar = (flags & SaturatingFlags.Scalar) != 0; + + int elems = !scalar ? op.GetBytesCount() >> op.Size : 1; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + Operand de; + + if (op.Size <= 2) + { + de = EmitSatQ(context, emit(ne), op.Size, signedSrc: true, signedDst: true); + } + else /* if (op.Size == 3) */ + { + de = EmitUnarySignedSatQAbsOrNeg(context, emit(ne)); + } + + res = EmitVectorInsert(context, res, de, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitScalarSaturatingBinaryOpSx(ArmEmitterContext context, SaturatingFlags flags) + { + EmitSaturatingBinaryOp(context, null, SaturatingFlags.ScalarSx | flags); + } + + public static void EmitScalarSaturatingBinaryOpZx(ArmEmitterContext context, SaturatingFlags flags) + { + EmitSaturatingBinaryOp(context, null, SaturatingFlags.ScalarZx | flags); + } + + public static void EmitVectorSaturatingBinaryOpSx(ArmEmitterContext context, SaturatingFlags flags) + { + EmitSaturatingBinaryOp(context, null, SaturatingFlags.VectorSx | flags); + } + + public static void EmitVectorSaturatingBinaryOpZx(ArmEmitterContext context, SaturatingFlags flags) + { + EmitSaturatingBinaryOp(context, null, SaturatingFlags.VectorZx | flags); + } + + public static void EmitSaturatingBinaryOp(ArmEmitterContext context, Func2I emit, SaturatingFlags flags) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + bool scalar = (flags & SaturatingFlags.Scalar) != 0; + bool signed = (flags & SaturatingFlags.Signed) != 0; + + bool add = (flags & SaturatingFlags.Add) != 0; + bool sub = (flags & SaturatingFlags.Sub) != 0; + + bool accumulate = (flags & SaturatingFlags.Accumulate) != 0; + + int elems = !scalar ? op.GetBytesCount() >> op.Size : 1; + + if (add || sub) + { + OpCodeSimdReg opReg = (OpCodeSimdReg)op; + + for (int index = 0; index < elems; index++) + { + Operand de; + Operand ne = EmitVectorExtract(context, opReg.Rn, index, op.Size, signed); + Operand me = EmitVectorExtract(context, opReg.Rm, index, op.Size, signed); + + if (op.Size <= 2) + { + Operand temp = add ? context.Add (ne, me) + : context.Subtract(ne, me); + + de = EmitSatQ(context, temp, op.Size, signedSrc: true, signedDst: signed); + } + else if (add) /* if (op.Size == 3) */ + { + de = EmitBinarySatQAdd(context, ne, me, signed); + } + else /* if (sub) */ + { + de = EmitBinarySatQSub(context, ne, me, signed); + } + + res = EmitVectorInsert(context, res, de, index, op.Size); + } + } + else if (accumulate) + { + for (int index = 0; index < elems; index++) + { + Operand de; + Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, !signed); + Operand me = EmitVectorExtract(context, op.Rd, index, op.Size, signed); + + if (op.Size <= 2) + { + Operand temp = context.Add(ne, me); + + de = EmitSatQ(context, temp, op.Size, signedSrc: true, signedDst: signed); + } + else /* if (op.Size == 3) */ + { + de = EmitBinarySatQAccumulate(context, ne, me, signed); + } + + res = EmitVectorInsert(context, res, de, index, op.Size); + } + } + else + { + OpCodeSimdReg opReg = (OpCodeSimdReg)op; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract(context, opReg.Rn, index, op.Size, signed); + Operand me = EmitVectorExtract(context, opReg.Rm, index, op.Size, signed); + + Operand de = EmitSatQ(context, emit(ne, me), op.Size, true, signed); + + res = EmitVectorInsert(context, res, de, index, op.Size); + } + } + + context.Copy(GetVec(op.Rd), res); + } + + [Flags] + public enum SaturatingNarrowFlags + { + Scalar = 1 << 0, + SignedSrc = 1 << 1, + SignedDst = 1 << 2, + + ScalarSxSx = Scalar | SignedSrc | SignedDst, + ScalarSxZx = Scalar | SignedSrc, + ScalarZxZx = Scalar, + + VectorSxSx = SignedSrc | SignedDst, + VectorSxZx = SignedSrc, + VectorZxZx = 0 + } + + public static void EmitSaturatingNarrowOp(ArmEmitterContext context, SaturatingNarrowFlags flags) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + bool scalar = (flags & SaturatingNarrowFlags.Scalar) != 0; + bool signedSrc = (flags & SaturatingNarrowFlags.SignedSrc) != 0; + bool signedDst = (flags & SaturatingNarrowFlags.SignedDst) != 0; + + int elems = !scalar ? 8 >> op.Size : 1; + + int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0; + + Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd)); + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc); + + Operand temp = EmitSatQ(context, ne, op.Size, signedSrc, signedDst); + + res = EmitVectorInsert(context, res, temp, part + index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + // TSrc (16bit, 32bit, 64bit; signed, unsigned) > TDst (8bit, 16bit, 32bit; signed, unsigned). + public static Operand EmitSatQ(ArmEmitterContext context, Operand op, int sizeDst, bool signedSrc, bool signedDst) + { + if ((uint)sizeDst > 2u) + { + throw new ArgumentOutOfRangeException(nameof(sizeDst)); + } + + Delegate dlg; + + if (signedSrc) + { + dlg = signedDst + ? (Delegate)new _S64_S64_S32(SoftFallback.SignedSrcSignedDstSatQ) + : (Delegate)new _U64_S64_S32(SoftFallback.SignedSrcUnsignedDstSatQ); + } + else + { + dlg = signedDst + ? (Delegate)new _S64_U64_S32(SoftFallback.UnsignedSrcSignedDstSatQ) + : (Delegate)new _U64_U64_S32(SoftFallback.UnsignedSrcUnsignedDstSatQ); + } + + return context.Call(dlg, op, Const(sizeDst)); + } + + // TSrc (64bit) == TDst (64bit); signed. + public static Operand EmitUnarySignedSatQAbsOrNeg(ArmEmitterContext context, Operand op) + { + Debug.Assert(((OpCodeSimd)context.CurrOp).Size == 3, "Invalid element size."); + + return context.Call(new _S64_S64(SoftFallback.UnarySignedSatQAbsOrNeg), op); + } + + // TSrcs (64bit) == TDst (64bit); signed, unsigned. + public static Operand EmitBinarySatQAdd(ArmEmitterContext context, Operand op1, Operand op2, bool signed) + { + Debug.Assert(((OpCodeSimd)context.CurrOp).Size == 3, "Invalid element size."); + + Delegate dlg = signed + ? (Delegate)new _S64_S64_S64(SoftFallback.BinarySignedSatQAdd) + : (Delegate)new _U64_U64_U64(SoftFallback.BinaryUnsignedSatQAdd); + + return context.Call(dlg, op1, op2); + } + + // TSrcs (64bit) == TDst (64bit); signed, unsigned. + public static Operand EmitBinarySatQSub(ArmEmitterContext context, Operand op1, Operand op2, bool signed) + { + Debug.Assert(((OpCodeSimd)context.CurrOp).Size == 3, "Invalid element size."); + + Delegate dlg = signed + ? (Delegate)new _S64_S64_S64(SoftFallback.BinarySignedSatQSub) + : (Delegate)new _U64_U64_U64(SoftFallback.BinaryUnsignedSatQSub); + + return context.Call(dlg, op1, op2); + } + + // TSrcs (64bit) == TDst (64bit); signed, unsigned. + public static Operand EmitBinarySatQAccumulate(ArmEmitterContext context, Operand op1, Operand op2, bool signed) + { + Debug.Assert(((OpCodeSimd)context.CurrOp).Size == 3, "Invalid element size."); + + Delegate dlg = signed + ? (Delegate)new _S64_U64_S64(SoftFallback.BinarySignedSatQAcc) + : (Delegate)new _U64_S64_U64(SoftFallback.BinaryUnsignedSatQAcc); + + return context.Call(dlg, op1, op2); + } + + public static Operand EmitVectorExtractSx(ArmEmitterContext context, int reg, int index, int size) + { + return EmitVectorExtract(context, reg, index, size, true); + } + + public static Operand EmitVectorExtractZx(ArmEmitterContext context, int reg, int index, int size) + { + return EmitVectorExtract(context, reg, index, size, false); + } + + public static Operand EmitVectorExtract(ArmEmitterContext context, int reg, int index, int size, bool signed) + { + ThrowIfInvalid(index, size); + + Operand res = null; + + switch (size) + { + case 0: + res = context.VectorExtract8(GetVec(reg), index); + break; + + case 1: + res = context.VectorExtract16(GetVec(reg), index); + break; + + case 2: + res = context.VectorExtract(OperandType.I32, GetVec(reg), index); + break; + + case 3: + res = context.VectorExtract(OperandType.I64, GetVec(reg), index); + break; + } + + if (signed) + { + switch (size) + { + case 0: res = context.SignExtend8 (OperandType.I64, res); break; + case 1: res = context.SignExtend16(OperandType.I64, res); break; + case 2: res = context.SignExtend32(OperandType.I64, res); break; + } + } + else + { + switch (size) + { + case 0: res = context.ZeroExtend8 (OperandType.I64, res); break; + case 1: res = context.ZeroExtend16(OperandType.I64, res); break; + case 2: res = context.ZeroExtend32(OperandType.I64, res); break; + } + } + + return res; + } + + public static Operand EmitVectorInsert(ArmEmitterContext context, Operand vector, Operand value, int index, int size) + { + ThrowIfInvalid(index, size); + + if (size < 3) + { + value = context.ConvertI64ToI32(value); + } + + switch (size) + { + case 0: vector = context.VectorInsert8 (vector, value, index); break; + case 1: vector = context.VectorInsert16(vector, value, index); break; + case 2: vector = context.VectorInsert (vector, value, index); break; + case 3: vector = context.VectorInsert (vector, value, index); break; + } + + return vector; + } + + private static void ThrowIfInvalid(int index, int size) + { + if ((uint)size > 3u) + { + throw new ArgumentOutOfRangeException(nameof(size)); + } + + if ((uint)index >= 16u >> size) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSimdLogical.cs b/ARMeilleure/Instructions/InstEmitSimdLogical.cs new file mode 100644 index 000000000..551752d24 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdLogical.cs @@ -0,0 +1,456 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void And_V(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.AddIntrinsic(Intrinsic.X86Pand, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => context.BitwiseAnd(op1, op2)); + } + } + + public static void Bic_V(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.AddIntrinsic(Intrinsic.X86Pandn, m, n); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => + { + return context.BitwiseAnd(op1, context.BitwiseNot(op2)); + }); + } + } + + public static void Bic_Vi(ArmEmitterContext context) + { + EmitVectorImmBinaryOp(context, (op1, op2) => + { + return context.BitwiseAnd(op1, context.BitwiseNot(op2)); + }); + } + + public static void Bif_V(ArmEmitterContext context) + { + EmitBifBit(context, notRm: true); + } + + public static void Bit_V(ArmEmitterContext context) + { + EmitBifBit(context, notRm: false); + } + + private static void EmitBifBit(ArmEmitterContext context, bool notRm) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse2) + { + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, d); + + if (notRm) + { + res = context.AddIntrinsic(Intrinsic.X86Pandn, m, res); + } + else + { + res = context.AddIntrinsic(Intrinsic.X86Pand, m, res); + } + + res = context.AddIntrinsic(Intrinsic.X86Pxor, d, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand res = context.VectorZero(); + + int elems = op.RegisterSize == RegisterSize.Simd128 ? 2 : 1; + + for (int index = 0; index < elems; index++) + { + Operand d = EmitVectorExtractZx(context, op.Rd, index, 3); + Operand n = EmitVectorExtractZx(context, op.Rn, index, 3); + Operand m = EmitVectorExtractZx(context, op.Rm, index, 3); + + if (notRm) + { + m = context.BitwiseNot(m); + } + + Operand e = context.BitwiseExclusiveOr(d, n); + + e = context.BitwiseAnd(e, m); + e = context.BitwiseExclusiveOr(e, d); + + res = EmitVectorInsert(context, res, e, index, 3); + } + + context.Copy(GetVec(op.Rd), res); + } + } + + public static void Bsl_V(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, m); + + res = context.AddIntrinsic(Intrinsic.X86Pand, res, d); + res = context.AddIntrinsic(Intrinsic.X86Pxor, res, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorTernaryOpZx(context, (op1, op2, op3) => + { + return context.BitwiseExclusiveOr( + context.BitwiseAnd(op1, + context.BitwiseExclusiveOr(op2, op3)), op3); + }); + } + } + + public static void Eor_V(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => context.BitwiseExclusiveOr(op1, op2)); + } + } + + public static void Not_V(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Operand mask = X86GetAllElements(context, -1L); + + Operand res = context.AddIntrinsic(Intrinsic.X86Pandn, n, mask); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorUnaryOpZx(context, (op1) => context.BitwiseNot(op1)); + } + } + + public static void Orn_V(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand mask = X86GetAllElements(context, -1L); + + Operand res = context.AddIntrinsic(Intrinsic.X86Pandn, m, mask); + + res = context.AddIntrinsic(Intrinsic.X86Por, res, n); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => + { + return context.BitwiseOr(op1, context.BitwiseNot(op2)); + }); + } + } + + public static void Orr_V(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.AddIntrinsic(Intrinsic.X86Por, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => context.BitwiseOr(op1, op2)); + } + } + + public static void Orr_Vi(ArmEmitterContext context) + { + EmitVectorImmBinaryOp(context, (op1, op2) => context.BitwiseOr(op1, op2)); + } + + public static void Rbit_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.RegisterSize == RegisterSize.Simd128 ? 16 : 8; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, 0); + + ne = context.ConvertI64ToI32(ne); + + Operand de = context.Call(new _U32_U32(SoftFallback.ReverseBits8), ne); + + de = context.ZeroExtend32(OperandType.I64, de); + + res = EmitVectorInsert(context, res, de, index, 0); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Rev16_V(ArmEmitterContext context) + { + if (Optimizations.UseSsse3) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + const long maskE0 = 06L << 56 | 07L << 48 | 04L << 40 | 05L << 32 | 02L << 24 | 03L << 16 | 00L << 8 | 01L << 0; + const long maskE1 = 14L << 56 | 15L << 48 | 12L << 40 | 13L << 32 | 10L << 24 | 11L << 16 | 08L << 8 | 09L << 0; + + Operand mask = X86GetScalar(context, maskE0); + + mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); + + Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitRev_V(context, containerSize: 1); + } + } + + public static void Rev32_V(ArmEmitterContext context) + { + if (Optimizations.UseSsse3) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Operand mask; + + if (op.Size == 0) + { + const long maskE0 = 04L << 56 | 05L << 48 | 06L << 40 | 07L << 32 | 00L << 24 | 01L << 16 | 02L << 8 | 03L << 0; + const long maskE1 = 12L << 56 | 13L << 48 | 14L << 40 | 15L << 32 | 08L << 24 | 09L << 16 | 10L << 8 | 11L << 0; + + mask = X86GetScalar(context, maskE0); + + mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); + } + else /* if (op.Size == 1) */ + { + const long maskE0 = 05L << 56 | 04L << 48 | 07L << 40 | 06L << 32 | 01L << 24 | 00L << 16 | 03L << 8 | 02L << 0; + const long maskE1 = 13L << 56 | 12L << 48 | 15L << 40 | 14L << 32 | 09L << 24 | 08L << 16 | 11L << 8 | 10L << 0; + + mask = X86GetScalar(context, maskE0); + + mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); + } + + Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitRev_V(context, containerSize: 2); + } + } + + public static void Rev64_V(ArmEmitterContext context) + { + if (Optimizations.UseSsse3) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Operand mask; + + if (op.Size == 0) + { + const long maskE0 = 00L << 56 | 01L << 48 | 02L << 40 | 03L << 32 | 04L << 24 | 05L << 16 | 06L << 8 | 07L << 0; + const long maskE1 = 08L << 56 | 09L << 48 | 10L << 40 | 11L << 32 | 12L << 24 | 13L << 16 | 14L << 8 | 15L << 0; + + mask = X86GetScalar(context, maskE0); + + mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); + } + else if (op.Size == 1) + { + const long maskE0 = 01L << 56 | 00L << 48 | 03L << 40 | 02L << 32 | 05L << 24 | 04L << 16 | 07L << 8 | 06L << 0; + const long maskE1 = 09L << 56 | 08L << 48 | 11L << 40 | 10L << 32 | 13L << 24 | 12L << 16 | 15L << 8 | 14L << 0; + + mask = X86GetScalar(context, maskE0); + + mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); + } + else /* if (op.Size == 2) */ + { + const long maskE0 = 03L << 56 | 02L << 48 | 01L << 40 | 00L << 32 | 07L << 24 | 06L << 16 | 05L << 8 | 04L << 0; + const long maskE1 = 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 15L << 24 | 14L << 16 | 13L << 8 | 12L << 0; + + mask = X86GetScalar(context, maskE0); + + mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); + } + + Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitRev_V(context, containerSize: 3); + } + } + + private static void EmitRev_V(ArmEmitterContext context, int containerSize) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + int containerMask = (1 << (containerSize - op.Size)) - 1; + + for (int index = 0; index < elems; index++) + { + int revIndex = index ^ containerMask; + + Operand ne = EmitVectorExtractZx(context, op.Rn, revIndex, op.Size); + + res = EmitVectorInsert(context, res, ne, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSimdMemory.cs b/ARMeilleure/Instructions/InstEmitSimdMemory.cs new file mode 100644 index 000000000..22e9ef7a8 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdMemory.cs @@ -0,0 +1,160 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System.Diagnostics; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitMemoryHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void Ld__Vms(ArmEmitterContext context) + { + EmitSimdMemMs(context, isLoad: true); + } + + public static void Ld__Vss(ArmEmitterContext context) + { + EmitSimdMemSs(context, isLoad: true); + } + + public static void St__Vms(ArmEmitterContext context) + { + EmitSimdMemMs(context, isLoad: false); + } + + public static void St__Vss(ArmEmitterContext context) + { + EmitSimdMemSs(context, isLoad: false); + } + + private static void EmitSimdMemMs(ArmEmitterContext context, bool isLoad) + { + OpCodeSimdMemMs op = (OpCodeSimdMemMs)context.CurrOp; + + Operand n = GetIntOrSP(context, op.Rn); + + long offset = 0; + + for (int rep = 0; rep < op.Reps; rep++) + for (int elem = 0; elem < op.Elems; elem++) + for (int sElem = 0; sElem < op.SElems; sElem++) + { + int rtt = (op.Rt + rep + sElem) & 0x1f; + + Operand tt = GetVec(rtt); + + Operand address = context.Add(n, Const(offset)); + + if (isLoad) + { + EmitLoadSimd(context, address, tt, rtt, elem, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64 && elem == op.Elems - 1) + { + context.Copy(tt, context.VectorZeroUpper64(tt)); + } + } + else + { + EmitStoreSimd(context, address, rtt, elem, op.Size); + } + + offset += 1 << op.Size; + } + + if (op.WBack) + { + EmitSimdMemWBack(context, offset); + } + } + + private static void EmitSimdMemSs(ArmEmitterContext context, bool isLoad) + { + OpCodeSimdMemSs op = (OpCodeSimdMemSs)context.CurrOp; + + Operand n = GetIntOrSP(context, op.Rn); + + long offset = 0; + + if (op.Replicate) + { + // Only loads uses the replicate mode. + Debug.Assert(isLoad, "Replicate mode is not valid for stores."); + + int elems = op.GetBytesCount() >> op.Size; + + for (int sElem = 0; sElem < op.SElems; sElem++) + { + int rt = (op.Rt + sElem) & 0x1f; + + Operand t = GetVec(rt); + + Operand address = context.Add(n, Const(offset)); + + for (int index = 0; index < elems; index++) + { + EmitLoadSimd(context, address, t, rt, index, op.Size); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + context.Copy(t, context.VectorZeroUpper64(t)); + } + + offset += 1 << op.Size; + } + } + else + { + for (int sElem = 0; sElem < op.SElems; sElem++) + { + int rt = (op.Rt + sElem) & 0x1f; + + Operand t = GetVec(rt); + + Operand address = context.Add(n, Const(offset)); + + if (isLoad) + { + EmitLoadSimd(context, address, t, rt, op.Index, op.Size); + } + else + { + EmitStoreSimd(context, address, rt, op.Index, op.Size); + } + + offset += 1 << op.Size; + } + } + + if (op.WBack) + { + EmitSimdMemWBack(context, offset); + } + } + + private static void EmitSimdMemWBack(ArmEmitterContext context, long offset) + { + OpCodeMemReg op = (OpCodeMemReg)context.CurrOp; + + Operand n = GetIntOrSP(context, op.Rn); + Operand m; + + if (op.Rm != RegisterAlias.Zr) + { + m = GetIntOrZR(context, op.Rm); + } + else + { + m = Const(offset); + } + + context.Copy(n, context.Add(n, m)); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitSimdMove.cs b/ARMeilleure/Instructions/InstEmitSimdMove.cs new file mode 100644 index 000000000..47359161f --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdMove.cs @@ -0,0 +1,794 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { +#region "Masks" + private static readonly long[] _masksE0_TrnUzpXtn = new long[] + { + 14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0, + 13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0, + 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0 + }; + + private static readonly long[] _masksE1_TrnUzp = new long[] + { + 15L << 56 | 13L << 48 | 11L << 40 | 09L << 32 | 07L << 24 | 05L << 16 | 03L << 8 | 01L << 0, + 15L << 56 | 14L << 48 | 11L << 40 | 10L << 32 | 07L << 24 | 06L << 16 | 03L << 8 | 02L << 0, + 15L << 56 | 14L << 48 | 13L << 40 | 12L << 32 | 07L << 24 | 06L << 16 | 05L << 8 | 04L << 0 + }; + + private static readonly long[] _masksE0_Uzp = new long[] + { + 13L << 56 | 09L << 48 | 05L << 40 | 01L << 32 | 12L << 24 | 08L << 16 | 04L << 8 | 00L << 0, + 11L << 56 | 10L << 48 | 03L << 40 | 02L << 32 | 09L << 24 | 08L << 16 | 01L << 8 | 00L << 0 + }; + + private static readonly long[] _masksE1_Uzp = new long[] + { + 15L << 56 | 11L << 48 | 07L << 40 | 03L << 32 | 14L << 24 | 10L << 16 | 06L << 8 | 02L << 0, + 15L << 56 | 14L << 48 | 07L << 40 | 06L << 32 | 13L << 24 | 12L << 16 | 05L << 8 | 04L << 0 + }; +#endregion + + public static void Dup_Gp(ArmEmitterContext context) + { + OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + + if (Optimizations.UseSse2) + { + switch (op.Size) + { + case 0: n = context.ZeroExtend8 (n.Type, n); n = context.Multiply(n, Const(n.Type, 0x01010101)); break; + case 1: n = context.ZeroExtend16(n.Type, n); n = context.Multiply(n, Const(n.Type, 0x00010001)); break; + case 2: n = context.ZeroExtend32(n.Type, n); break; + } + + Operand res = context.VectorInsert(context.VectorZero(), n, 0); + + if (op.Size < 3) + { + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0xf0)); + } + else + { + res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0)); + } + } + else + { + res = context.AddIntrinsic(Intrinsic.X86Movlhps, res, res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + res = EmitVectorInsert(context, res, n, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + } + + public static void Dup_S(ArmEmitterContext context) + { + OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp; + + Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size); + + context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), ne, 0, op.Size)); + } + + public static void Dup_V(ArmEmitterContext context) + { + OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp; + + if (Optimizations.UseSse2) + { + Operand res = GetVec(op.Rn); + + if (op.Size == 0) + { + if (op.DstIndex != 0) + { + res = context.AddIntrinsic(Intrinsic.X86Psrldq, res, Const(op.DstIndex)); + } + + res = context.AddIntrinsic(Intrinsic.X86Punpcklbw, res, res); + res = context.AddIntrinsic(Intrinsic.X86Punpcklwd, res, res); + res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0)); + } + else if (op.Size == 1) + { + if (op.DstIndex != 0) + { + res = context.AddIntrinsic(Intrinsic.X86Psrldq, res, Const(op.DstIndex * 2)); + } + + res = context.AddIntrinsic(Intrinsic.X86Punpcklwd, res, res); + res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0)); + } + else if (op.Size == 2) + { + int mask = op.DstIndex * 0b01010101; + + res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(mask)); + } + else if (op.DstIndex == 0 && op.RegisterSize != RegisterSize.Simd64) + { + res = context.AddIntrinsic(Intrinsic.X86Movlhps, res, res); + } + else if (op.DstIndex == 1) + { + res = context.AddIntrinsic(Intrinsic.X86Movhlps, res, res); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size); + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + res = EmitVectorInsert(context, res, ne, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + } + + public static void Ext_V(ArmEmitterContext context) + { + OpCodeSimdExt op = (OpCodeSimdExt)context.CurrOp; + + if (Optimizations.UseSse2) + { + Operand nShifted = GetVec(op.Rn); + + if (op.RegisterSize == RegisterSize.Simd64) + { + nShifted = context.AddIntrinsic(Intrinsic.X86Movlhps, nShifted, context.VectorZero()); + } + + nShifted = context.AddIntrinsic(Intrinsic.X86Psrldq, nShifted, Const(op.Imm4)); + + Operand mShifted = GetVec(op.Rm); + + mShifted = context.AddIntrinsic(Intrinsic.X86Pslldq, mShifted, Const(op.GetBytesCount() - op.Imm4)); + + if (op.RegisterSize == RegisterSize.Simd64) + { + mShifted = context.AddIntrinsic(Intrinsic.X86Movlhps, mShifted, context.VectorZero()); + } + + Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, mShifted); + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand res = context.VectorZero(); + + int bytes = op.GetBytesCount(); + + int position = op.Imm4 & (bytes - 1); + + for (int index = 0; index < bytes; index++) + { + int reg = op.Imm4 + index < bytes ? op.Rn : op.Rm; + + Operand e = EmitVectorExtractZx(context, reg, position, 0); + + position = (position + 1) & (bytes - 1); + + res = EmitVectorInsert(context, res, e, index, 0); + } + + context.Copy(GetVec(op.Rd), res); + } + } + + public static void Fcsel_S(ArmEmitterContext context) + { + OpCodeSimdFcond op = (OpCodeSimdFcond)context.CurrOp; + + Operand lblTrue = Label(); + Operand lblEnd = Label(); + + Operand isTrue = InstEmitFlowHelper.GetCondTrue(context, op.Cond); + + context.BranchIfTrue(lblTrue, isTrue); + + OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64; + + Operand me = context.VectorExtract(type, GetVec(op.Rm), 0); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), me, 0)); + + context.Branch(lblEnd); + + context.MarkLabel(lblTrue); + + Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), ne, 0)); + + context.MarkLabel(lblEnd); + } + + public static void Fmov_Ftoi(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand ne = EmitVectorExtractZx(context, op.Rn, 0, op.Size + 2); + + SetIntOrZR(context, op.Rd, ne); + } + + public static void Fmov_Ftoi1(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand ne = EmitVectorExtractZx(context, op.Rn, 1, 3); + + SetIntOrZR(context, op.Rd, ne); + } + + public static void Fmov_Itof(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + + context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), n, 0, op.Size + 2)); + } + + public static void Fmov_Itof1(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + + context.Copy(GetVec(op.Rd), EmitVectorInsert(context, GetVec(op.Rd), n, 1, 3)); + } + + public static void Fmov_S(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64; + + Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), ne, 0)); + } + + public static void Fmov_Si(ArmEmitterContext context) + { + OpCodeSimdFmov op = (OpCodeSimdFmov)context.CurrOp; + + if (op.Size == 0) + { + context.Copy(GetVec(op.Rd), X86GetScalar(context, (int)op.Immediate)); + } + else + { + context.Copy(GetVec(op.Rd), X86GetScalar(context, op.Immediate)); + } + } + + public static void Fmov_Vi(ArmEmitterContext context) + { + OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp; + + Operand e = Const(op.Immediate); + + Operand res = context.VectorZero(); + + int elems = op.RegisterSize == RegisterSize.Simd128 ? 4 : 2; + + for (int index = 0; index < (elems >> op.Size); index++) + { + res = EmitVectorInsert(context, res, e, index, op.Size + 2); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Ins_Gp(ArmEmitterContext context) + { + OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetIntOrZR(context, op.Rn); + + context.Copy(d, EmitVectorInsert(context, d, n, op.DstIndex, op.Size)); + } + + public static void Ins_V(ArmEmitterContext context) + { + OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand ne = EmitVectorExtractZx(context, op.Rn, op.SrcIndex, op.Size); + + context.Copy(d, EmitVectorInsert(context, d, ne, op.DstIndex, op.Size)); + } + + public static void Movi_V(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + EmitMoviMvni(context, not: false); + } + else + { + EmitVectorImmUnaryOp(context, (op1) => op1); + } + } + + public static void Mvni_V(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + EmitMoviMvni(context, not: true); + } + else + { + EmitVectorImmUnaryOp(context, (op1) => context.BitwiseNot(op1)); + } + } + + public static void Smov_S(ArmEmitterContext context) + { + OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp; + + Operand ne = EmitVectorExtractSx(context, op.Rn, op.DstIndex, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + ne = context.ZeroExtend32(OperandType.I64, ne); + } + + SetIntOrZR(context, op.Rd, ne); + } + + public static void Tbl_V(ArmEmitterContext context) + { + OpCodeSimdTbl op = (OpCodeSimdTbl)context.CurrOp; + + if (Optimizations.UseSsse3) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand mask = X86GetAllElements(context, 0x0F0F0F0F0F0F0F0FL); + + Operand mMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, m, mask); + + mMask = context.AddIntrinsic(Intrinsic.X86Por, mMask, m); + + Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mMask); + + for (int index = 1; index < op.Size; index++) + { + Operand ni = GetVec((op.Rn + index) & 0x1f); + + Operand indexMask = X86GetAllElements(context, 0x1010101010101010L * index); + + Operand mMinusMask = context.AddIntrinsic(Intrinsic.X86Psubb, m, indexMask); + + Operand mMask2 = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, mMinusMask, mask); + + mMask2 = context.AddIntrinsic(Intrinsic.X86Por, mMask2, mMinusMask); + + Operand res2 = context.AddIntrinsic(Intrinsic.X86Pshufb, ni, mMask2); + + res = context.AddIntrinsic(Intrinsic.X86Por, res, res2); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand[] args = new Operand[1 + op.Size]; + + args[0] = GetVec(op.Rm); + + for (int index = 0; index < op.Size; index++) + { + args[1 + index] = GetVec((op.Rn + index) & 0x1f); + } + + Delegate dlg = null; + + switch (op.Size) + { + case 1: dlg = op.RegisterSize == RegisterSize.Simd64 + ? (Delegate)new _V128_V128_V128(SoftFallback.Tbl1_V64) + : (Delegate)new _V128_V128_V128(SoftFallback.Tbl1_V128); break; + + case 2: dlg = op.RegisterSize == RegisterSize.Simd64 + ? (Delegate)new _V128_V128_V128_V128(SoftFallback.Tbl2_V64) + : (Delegate)new _V128_V128_V128_V128(SoftFallback.Tbl2_V128); break; + + case 3: dlg = op.RegisterSize == RegisterSize.Simd64 + ? (Delegate)new _V128_V128_V128_V128_V128(SoftFallback.Tbl3_V64) + : (Delegate)new _V128_V128_V128_V128_V128(SoftFallback.Tbl3_V128); break; + + case 4: dlg = op.RegisterSize == RegisterSize.Simd64 + ? (Delegate)new _V128_V128_V128_V128_V128_V128(SoftFallback.Tbl4_V64) + : (Delegate)new _V128_V128_V128_V128_V128_V128(SoftFallback.Tbl4_V128); break; + } + + context.Copy(GetVec(op.Rd), context.Call(dlg, args)); + } + } + + public static void Trn1_V(ArmEmitterContext context) + { + EmitVectorTranspose(context, part: 0); + } + + public static void Trn2_V(ArmEmitterContext context) + { + EmitVectorTranspose(context, part: 1); + } + + public static void Umov_S(ArmEmitterContext context) + { + OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp; + + Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size); + + SetIntOrZR(context, op.Rd, ne); + } + + public static void Uzp1_V(ArmEmitterContext context) + { + EmitVectorUnzip(context, part: 0); + } + + public static void Uzp2_V(ArmEmitterContext context) + { + EmitVectorUnzip(context, part: 1); + } + + public static void Xtn_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + if (Optimizations.UseSsse3) + { + Operand d = GetVec(op.Rd); + + Operand res = context.AddIntrinsic(Intrinsic.X86Movlhps, d, context.VectorZero()); + + Operand n = GetVec(op.Rn); + + Operand mask = X86GetAllElements(context, _masksE0_TrnUzpXtn[op.Size]); + + Operand res2 = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask); + + Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128 + ? Intrinsic.X86Movlhps + : Intrinsic.X86Movhlps; + + res = context.AddIntrinsic(movInst, res, res2); + + context.Copy(GetVec(op.Rd), res); + } + else + { + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd)); + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size + 1); + + res = EmitVectorInsert(context, res, ne, part + index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + } + + public static void Zip1_V(ArmEmitterContext context) + { + EmitVectorZip(context, part: 0); + } + + public static void Zip2_V(ArmEmitterContext context) + { + EmitVectorZip(context, part: 1); + } + + private static void EmitMoviMvni(ArmEmitterContext context, bool not) + { + OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp; + + long imm = op.Immediate; + + switch (op.Size) + { + case 0: imm *= 0x01010101; break; + case 1: imm *= 0x00010001; break; + } + + if (not) + { + imm = ~imm; + } + + Operand mask; + + if (op.Size < 3) + { + mask = X86GetAllElements(context, (int)imm); + } + else + { + mask = X86GetAllElements(context, imm); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + mask = context.VectorZeroUpper64(mask); + } + + context.Copy(GetVec(op.Rd), mask); + } + + private static void EmitVectorTranspose(ArmEmitterContext context, int part) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSsse3) + { + Operand mask = null; + + if (op.Size < 3) + { + long maskE0 = _masksE0_TrnUzpXtn[op.Size]; + long maskE1 = _masksE1_TrnUzp [op.Size]; + + mask = X86GetScalar(context, maskE0); + + mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); + } + + Operand n = GetVec(op.Rn); + + if (op.Size < 3) + { + n = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask); + } + + Operand m = GetVec(op.Rm); + + if (op.Size < 3) + { + m = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mask); + } + + Intrinsic punpckInst = part == 0 + ? X86PunpcklInstruction[op.Size] + : X86PunpckhInstruction[op.Size]; + + Operand res = context.AddIntrinsic(punpckInst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand res = context.VectorZero(); + + int pairs = op.GetPairsCount() >> op.Size; + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + + Operand ne = EmitVectorExtractZx(context, op.Rn, pairIndex + part, op.Size); + Operand me = EmitVectorExtractZx(context, op.Rm, pairIndex + part, op.Size); + + res = EmitVectorInsert(context, res, ne, pairIndex, op.Size); + res = EmitVectorInsert(context, res, me, pairIndex + 1, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + } + + private static void EmitVectorUnzip(ArmEmitterContext context, int part) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSsse3) + { + if (op.RegisterSize == RegisterSize.Simd128) + { + Operand mask = null; + + if (op.Size < 3) + { + long maskE0 = _masksE0_TrnUzpXtn[op.Size]; + long maskE1 = _masksE1_TrnUzp [op.Size]; + + mask = X86GetScalar(context, maskE0); + + mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); + } + + Operand n = GetVec(op.Rn); + + if (op.Size < 3) + { + n = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask); + } + + Operand m = GetVec(op.Rm); + + if (op.Size < 3) + { + m = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mask); + } + + Intrinsic punpckInst = part == 0 + ? Intrinsic.X86Punpcklqdq + : Intrinsic.X86Punpckhqdq; + + Operand res = context.AddIntrinsic(punpckInst, n, m); + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic punpcklInst = X86PunpcklInstruction[op.Size]; + + Operand res = context.AddIntrinsic(punpcklInst, n, m); + + if (op.Size < 2) + { + long maskE0 = _masksE0_Uzp[op.Size]; + long maskE1 = _masksE1_Uzp[op.Size]; + + Operand mask = X86GetScalar(context, maskE0); + + mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); + + res = context.AddIntrinsic(Intrinsic.X86Pshufb, res, mask); + } + + Intrinsic punpckInst = part == 0 + ? Intrinsic.X86Punpcklqdq + : Intrinsic.X86Punpckhqdq; + + res = context.AddIntrinsic(punpckInst, res, context.VectorZero()); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + Operand res = context.VectorZero(); + + int pairs = op.GetPairsCount() >> op.Size; + + for (int index = 0; index < pairs; index++) + { + int idx = index << 1; + + Operand ne = EmitVectorExtractZx(context, op.Rn, idx + part, op.Size); + Operand me = EmitVectorExtractZx(context, op.Rm, idx + part, op.Size); + + res = EmitVectorInsert(context, res, ne, index, op.Size); + res = EmitVectorInsert(context, res, me, pairs + index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + } + + private static void EmitVectorZip(ArmEmitterContext context, int part) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse2) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + Intrinsic punpckInst = part == 0 + ? X86PunpcklInstruction[op.Size] + : X86PunpckhInstruction[op.Size]; + + Operand res = context.AddIntrinsic(punpckInst, n, m); + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand res = context.AddIntrinsic(X86PunpcklInstruction[op.Size], n, m); + + Intrinsic punpckInst = part == 0 + ? Intrinsic.X86Punpcklqdq + : Intrinsic.X86Punpckhqdq; + + res = context.AddIntrinsic(punpckInst, res, context.VectorZero()); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + Operand res = context.VectorZero(); + + int pairs = op.GetPairsCount() >> op.Size; + + int baseIndex = part != 0 ? pairs : 0; + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + + Operand ne = EmitVectorExtractZx(context, op.Rn, baseIndex + index, op.Size); + Operand me = EmitVectorExtractZx(context, op.Rm, baseIndex + index, op.Size); + + res = EmitVectorInsert(context, res, ne, pairIndex, op.Size); + res = EmitVectorInsert(context, res, me, pairIndex + 1, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitSimdShift.cs b/ARMeilleure/Instructions/InstEmitSimdShift.cs new file mode 100644 index 000000000..1aae491df --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdShift.cs @@ -0,0 +1,1057 @@ +// https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h + +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + using Func2I = Func; + + static partial class InstEmit + { +#region "Masks" + private static readonly long[] _masks_RshrnShrn = new long[] + { + 14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0, + 13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0, + 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0 + }; +#endregion + + public static void Rshrn_V(ArmEmitterContext context) + { + if (Optimizations.UseSsse3) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + long roundConst = 1L << (shift - 1); + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Operand dLow = context.AddIntrinsic(Intrinsic.X86Movlhps, d, context.VectorZero()); + + Operand mask = null; + + switch (op.Size + 1) + { + case 1: mask = X86GetAllElements(context, (int)roundConst * 0x00010001); break; + case 2: mask = X86GetAllElements(context, (int)roundConst); break; + case 3: mask = X86GetAllElements(context, roundConst); break; + } + + Intrinsic addInst = X86PaddInstruction[op.Size + 1]; + + Operand res = context.AddIntrinsic(addInst, n, mask); + + Intrinsic srlInst = X86PsrlInstruction[op.Size + 1]; + + res = context.AddIntrinsic(srlInst, res, Const(shift)); + + Operand mask2 = X86GetAllElements(context, _masks_RshrnShrn[op.Size]); + + res = context.AddIntrinsic(Intrinsic.X86Pshufb, res, mask2); + + Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128 + ? Intrinsic.X86Movlhps + : Intrinsic.X86Movhlps; + + res = context.AddIntrinsic(movInst, dLow, res); + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorShrImmNarrowOpZx(context, round: true); + } + } + + public static void Shl_S(ArmEmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShl(op); + + EmitScalarUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift))); + } + + public static void Shl_V(ArmEmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShl(op); + + if (Optimizations.UseSse2 && op.Size > 0) + { + Operand n = GetVec(op.Rn); + + Intrinsic sllInst = X86PsllInstruction[op.Size]; + + Operand res = context.AddIntrinsic(sllInst, n, Const(shift)); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift))); + } + } + + public static void Shll_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int shift = 8 << op.Size; + + if (Optimizations.UseSse41) + { + Operand n = GetVec(op.Rn); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); + } + + Intrinsic movsxInst = X86PmovsxInstruction[op.Size]; + + Operand res = context.AddIntrinsic(movsxInst, n); + + Intrinsic sllInst = X86PsllInstruction[op.Size + 1]; + + res = context.AddIntrinsic(sllInst, res, Const(shift)); + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorShImmWidenBinaryZx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift); + } + } + + public static void Shrn_V(ArmEmitterContext context) + { + if (Optimizations.UseSsse3) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + long roundConst = 1L << (shift - 1); + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Operand dLow = context.AddIntrinsic(Intrinsic.X86Movlhps, d, context.VectorZero()); + + Intrinsic srlInst = X86PsrlInstruction[op.Size + 1]; + + Operand nShifted = context.AddIntrinsic(srlInst, n, Const(shift)); + + Operand mask = X86GetAllElements(context, _masks_RshrnShrn[op.Size]); + + Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, nShifted, mask); + + Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128 + ? Intrinsic.X86Movlhps + : Intrinsic.X86Movhlps; + + res = context.AddIntrinsic(movInst, dLow, res); + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorShrImmNarrowOpZx(context, round: false); + } + } + + public static void Sli_V(ArmEmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + int shift = GetImmShl(op); + + ulong mask = shift != 0 ? ulong.MaxValue >> (64 - shift) : 0; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + + Operand neShifted = context.ShiftLeft(ne, Const(shift)); + + Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size); + + Operand deMasked = context.BitwiseAnd(de, Const(mask)); + + Operand e = context.BitwiseOr(neShifted, deMasked); + + res = EmitVectorInsert(context, res, e, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sqrshl_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size); + + Operand e = context.Call(new _S64_S64_S64_Bool_S32(SoftFallback.SignedShlRegSatQ), ne, me, Const(1), Const(op.Size)); + + res = EmitVectorInsert(context, res, e, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sqrshrn_S(ArmEmitterContext context) + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx); + } + + public static void Sqrshrn_V(ArmEmitterContext context) + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx); + } + + public static void Sqrshrun_S(ArmEmitterContext context) + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx); + } + + public static void Sqrshrun_V(ArmEmitterContext context) + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx); + } + + public static void Sqshl_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size); + + Operand e = context.Call(new _S64_S64_S64_Bool_S32(SoftFallback.SignedShlRegSatQ), ne, me, Const(0), Const(op.Size)); + + res = EmitVectorInsert(context, res, e, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sqshrn_S(ArmEmitterContext context) + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx); + } + + public static void Sqshrn_V(ArmEmitterContext context) + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx); + } + + public static void Sqshrun_S(ArmEmitterContext context) + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx); + } + + public static void Sqshrun_V(ArmEmitterContext context) + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx); + } + + public static void Srshl_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size); + + Operand e = context.Call(new _S64_S64_S64_Bool_S32(SoftFallback.SignedShlReg), ne, me, Const(1), Const(op.Size)); + + res = EmitVectorInsert(context, res, e, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Srshr_S(ArmEmitterContext context) + { + EmitScalarShrImmOpSx(context, ShrImmFlags.Round); + } + + public static void Srshr_V(ArmEmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3) + { + int shift = GetImmShr(op); + int eSize = 8 << op.Size; + + Operand n = GetVec(op.Rn); + + Intrinsic sllInst = X86PsllInstruction[op.Size]; + + Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift)); + + Intrinsic srlInst = X86PsrlInstruction[op.Size]; + + res = context.AddIntrinsic(srlInst, res, Const(eSize - 1)); + + Intrinsic sraInst = X86PsraInstruction[op.Size]; + + Operand nSra = context.AddIntrinsic(sraInst, n, Const(shift)); + + Intrinsic addInst = X86PaddInstruction[op.Size]; + + res = context.AddIntrinsic(addInst, res, nSra); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorShrImmOpSx(context, ShrImmFlags.Round); + } + } + + public static void Srsra_S(ArmEmitterContext context) + { + EmitScalarShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate); + } + + public static void Srsra_V(ArmEmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3) + { + int shift = GetImmShr(op); + int eSize = 8 << op.Size; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Intrinsic sllInst = X86PsllInstruction[op.Size]; + + Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift)); + + Intrinsic srlInst = X86PsrlInstruction[op.Size]; + + res = context.AddIntrinsic(srlInst, res, Const(eSize - 1)); + + Intrinsic sraInst = X86PsraInstruction[op.Size]; + + Operand nSra = context.AddIntrinsic(sraInst, n, Const(shift)); + + Intrinsic addInst = X86PaddInstruction[op.Size]; + + res = context.AddIntrinsic(addInst, res, nSra); + res = context.AddIntrinsic(addInst, res, d); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate); + } + } + + public static void Sshl_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size); + + Operand e = context.Call(new _S64_S64_S64_Bool_S32(SoftFallback.SignedShlReg), ne, me, Const(0), Const(op.Size)); + + res = EmitVectorInsert(context, res, e, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sshll_V(ArmEmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShl(op); + + if (Optimizations.UseSse41) + { + Operand n = GetVec(op.Rn); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); + } + + Intrinsic movsxInst = X86PmovsxInstruction[op.Size]; + + Operand res = context.AddIntrinsic(movsxInst, n); + + if (shift != 0) + { + Intrinsic sllInst = X86PsllInstruction[op.Size + 1]; + + res = context.AddIntrinsic(sllInst, res, Const(shift)); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorShImmWidenBinarySx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift); + } + } + + public static void Sshr_S(ArmEmitterContext context) + { + EmitShrImmOp(context, ShrImmFlags.ScalarSx); + } + + public static void Sshr_V(ArmEmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3) + { + int shift = GetImmShr(op); + + Operand n = GetVec(op.Rn); + + Intrinsic sraInst = X86PsraInstruction[op.Size]; + + Operand res = context.AddIntrinsic(sraInst, n, Const(shift)); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitShrImmOp(context, ShrImmFlags.VectorSx); + } + } + + public static void Ssra_S(ArmEmitterContext context) + { + EmitScalarShrImmOpSx(context, ShrImmFlags.Accumulate); + } + + public static void Ssra_V(ArmEmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3) + { + int shift = GetImmShr(op); + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Intrinsic sraInst = X86PsraInstruction[op.Size]; + + Operand res = context.AddIntrinsic(sraInst, n, Const(shift)); + + Intrinsic addInst = X86PaddInstruction[op.Size]; + + res = context.AddIntrinsic(addInst, res, d); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(d, res); + } + else + { + EmitVectorShrImmOpSx(context, ShrImmFlags.Accumulate); + } + } + + public static void Uqrshl_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size); + + Operand e = context.Call(new _U64_U64_U64_Bool_S32(SoftFallback.UnsignedShlRegSatQ), ne, me, Const(1), Const(op.Size)); + + res = EmitVectorInsert(context, res, e, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Uqrshrn_S(ArmEmitterContext context) + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx); + } + + public static void Uqrshrn_V(ArmEmitterContext context) + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx); + } + + public static void Uqshl_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size); + + Operand e = context.Call(new _U64_U64_U64_Bool_S32(SoftFallback.UnsignedShlRegSatQ), ne, me, Const(0), Const(op.Size)); + + res = EmitVectorInsert(context, res, e, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Uqshrn_S(ArmEmitterContext context) + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx); + } + + public static void Uqshrn_V(ArmEmitterContext context) + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx); + } + + public static void Urshl_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size); + + Operand e = context.Call(new _U64_U64_U64_Bool_S32(SoftFallback.UnsignedShlReg), ne, me, Const(1), Const(op.Size)); + + res = EmitVectorInsert(context, res, e, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Urshr_S(ArmEmitterContext context) + { + EmitScalarShrImmOpZx(context, ShrImmFlags.Round); + } + + public static void Urshr_V(ArmEmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0) + { + int shift = GetImmShr(op); + int eSize = 8 << op.Size; + + Operand n = GetVec(op.Rn); + + Intrinsic sllInst = X86PsllInstruction[op.Size]; + + Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift)); + + Intrinsic srlInst = X86PsrlInstruction[op.Size]; + + res = context.AddIntrinsic(srlInst, res, Const(eSize - 1)); + + Operand nSrl = context.AddIntrinsic(srlInst, n, Const(shift)); + + Intrinsic addInst = X86PaddInstruction[op.Size]; + + res = context.AddIntrinsic(addInst, res, nSrl); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorShrImmOpZx(context, ShrImmFlags.Round); + } + } + + public static void Ursra_S(ArmEmitterContext context) + { + EmitScalarShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate); + } + + public static void Ursra_V(ArmEmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0) + { + int shift = GetImmShr(op); + int eSize = 8 << op.Size; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Intrinsic sllInst = X86PsllInstruction[op.Size]; + + Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift)); + + Intrinsic srlInst = X86PsrlInstruction[op.Size]; + + res = context.AddIntrinsic(srlInst, res, Const(eSize - 1)); + + Operand nSrl = context.AddIntrinsic(srlInst, n, Const(shift)); + + Intrinsic addInst = X86PaddInstruction[op.Size]; + + res = context.AddIntrinsic(addInst, res, nSrl); + res = context.AddIntrinsic(addInst, res, d); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate); + } + } + + public static void Ushl_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size); + + Operand e = context.Call(new _U64_U64_U64_Bool_S32(SoftFallback.UnsignedShlReg), ne, me, Const(0), Const(op.Size)); + + res = EmitVectorInsert(context, res, e, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Ushll_V(ArmEmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShl(op); + + if (Optimizations.UseSse41) + { + Operand n = GetVec(op.Rn); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); + } + + Intrinsic movzxInst = X86PmovzxInstruction[op.Size]; + + Operand res = context.AddIntrinsic(movzxInst, n); + + if (shift != 0) + { + Intrinsic sllInst = X86PsllInstruction[op.Size + 1]; + + res = context.AddIntrinsic(sllInst, res, Const(shift)); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorShImmWidenBinaryZx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift); + } + } + + public static void Ushr_S(ArmEmitterContext context) + { + EmitShrImmOp(context, ShrImmFlags.ScalarZx); + } + + public static void Ushr_V(ArmEmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0) + { + int shift = GetImmShr(op); + + Operand n = GetVec(op.Rn); + + Intrinsic srlInst = X86PsrlInstruction[op.Size]; + + Operand res = context.AddIntrinsic(srlInst, n, Const(shift)); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitShrImmOp(context, ShrImmFlags.VectorZx); + } + } + + public static void Usra_S(ArmEmitterContext context) + { + EmitScalarShrImmOpZx(context, ShrImmFlags.Accumulate); + } + + public static void Usra_V(ArmEmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + if (Optimizations.UseSse2 && op.Size > 0) + { + int shift = GetImmShr(op); + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Intrinsic srlInst = X86PsrlInstruction[op.Size]; + + Operand res = context.AddIntrinsic(srlInst, n, Const(shift)); + + Intrinsic addInst = X86PaddInstruction[op.Size]; + + res = context.AddIntrinsic(addInst, res, d); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(d, res); + } + else + { + EmitVectorShrImmOpZx(context, ShrImmFlags.Accumulate); + } + } + + [Flags] + private enum ShrImmFlags + { + Scalar = 1 << 0, + Signed = 1 << 1, + + Round = 1 << 2, + Accumulate = 1 << 3, + + ScalarSx = Scalar | Signed, + ScalarZx = Scalar, + + VectorSx = Signed, + VectorZx = 0 + } + + private static void EmitScalarShrImmOpSx(ArmEmitterContext context, ShrImmFlags flags) + { + EmitShrImmOp(context, ShrImmFlags.ScalarSx | flags); + } + + private static void EmitScalarShrImmOpZx(ArmEmitterContext context, ShrImmFlags flags) + { + EmitShrImmOp(context, ShrImmFlags.ScalarZx | flags); + } + + private static void EmitVectorShrImmOpSx(ArmEmitterContext context, ShrImmFlags flags) + { + EmitShrImmOp(context, ShrImmFlags.VectorSx | flags); + } + + private static void EmitVectorShrImmOpZx(ArmEmitterContext context, ShrImmFlags flags) + { + EmitShrImmOp(context, ShrImmFlags.VectorZx | flags); + } + + private static void EmitShrImmOp(ArmEmitterContext context, ShrImmFlags flags) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + Operand res = context.VectorZero(); + + bool scalar = (flags & ShrImmFlags.Scalar) != 0; + bool signed = (flags & ShrImmFlags.Signed) != 0; + bool round = (flags & ShrImmFlags.Round) != 0; + bool accumulate = (flags & ShrImmFlags.Accumulate) != 0; + + int shift = GetImmShr(op); + + long roundConst = 1L << (shift - 1); + + int elems = !scalar ? op.GetBytesCount() >> op.Size : 1; + + for (int index = 0; index < elems; index++) + { + Operand e = EmitVectorExtract(context, op.Rn, index, op.Size, signed); + + if (op.Size <= 2) + { + if (round) + { + e = context.Add(e, Const(roundConst)); + } + + e = signed + ? context.ShiftRightSI(e, Const(shift)) + : context.ShiftRightUI(e, Const(shift)); + } + else /* if (op.Size == 3) */ + { + e = EmitShrImm64(context, e, signed, round ? roundConst : 0L, shift); + } + + if (accumulate) + { + Operand de = EmitVectorExtract(context, op.Rd, index, op.Size, signed); + + e = context.Add(e, de); + } + + res = EmitVectorInsert(context, res, e, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + private static void EmitVectorShrImmNarrowOpZx(ArmEmitterContext context, bool round) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + long roundConst = 1L << (shift - 1); + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd)); + + for (int index = 0; index < elems; index++) + { + Operand e = EmitVectorExtractZx(context, op.Rn, index, op.Size + 1); + + if (round) + { + e = context.Add(e, Const(roundConst)); + } + + e = context.ShiftRightUI(e, Const(shift)); + + res = EmitVectorInsert(context, res, e, part + index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + [Flags] + private enum ShrImmSaturatingNarrowFlags + { + Scalar = 1 << 0, + SignedSrc = 1 << 1, + SignedDst = 1 << 2, + + Round = 1 << 3, + + ScalarSxSx = Scalar | SignedSrc | SignedDst, + ScalarSxZx = Scalar | SignedSrc, + ScalarZxZx = Scalar, + + VectorSxSx = SignedSrc | SignedDst, + VectorSxZx = SignedSrc, + VectorZxZx = 0 + } + + private static void EmitRoundShrImmSaturatingNarrowOp(ArmEmitterContext context, ShrImmSaturatingNarrowFlags flags) + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.Round | flags); + } + + private static void EmitShrImmSaturatingNarrowOp(ArmEmitterContext context, ShrImmSaturatingNarrowFlags flags) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + bool scalar = (flags & ShrImmSaturatingNarrowFlags.Scalar) != 0; + bool signedSrc = (flags & ShrImmSaturatingNarrowFlags.SignedSrc) != 0; + bool signedDst = (flags & ShrImmSaturatingNarrowFlags.SignedDst) != 0; + bool round = (flags & ShrImmSaturatingNarrowFlags.Round) != 0; + + int shift = GetImmShr(op); + + long roundConst = 1L << (shift - 1); + + int elems = !scalar ? 8 >> op.Size : 1; + + int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0; + + Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd)); + + for (int index = 0; index < elems; index++) + { + Operand e = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc); + + if (op.Size <= 1 || !round) + { + if (round) + { + e = context.Add(e, Const(roundConst)); + } + + e = signedSrc + ? context.ShiftRightSI(e, Const(shift)) + : context.ShiftRightUI(e, Const(shift)); + } + else /* if (op.Size == 2 && round) */ + { + e = EmitShrImm64(context, e, signedSrc, roundConst, shift); // shift <= 32 + } + + e = EmitSatQ(context, e, op.Size, signedSrc, signedDst); + + res = EmitVectorInsert(context, res, e, part + index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + // dst64 = (Int(src64, signed) + roundConst) >> shift; + private static Operand EmitShrImm64( + ArmEmitterContext context, + Operand value, + bool signed, + long roundConst, + int shift) + { + Delegate dlg = signed + ? (Delegate)new _S64_S64_S64_S32(SoftFallback.SignedShrImm64) + : (Delegate)new _U64_U64_S64_S32(SoftFallback.UnsignedShrImm64); + + return context.Call(dlg, value, Const(roundConst), Const(shift)); + } + + private static void EmitVectorShImmWidenBinarySx(ArmEmitterContext context, Func2I emit, int imm) + { + EmitVectorShImmWidenBinaryOp(context, emit, imm, signed: true); + } + + private static void EmitVectorShImmWidenBinaryZx(ArmEmitterContext context, Func2I emit, int imm) + { + EmitVectorShImmWidenBinaryOp(context, emit, imm, signed: false); + } + + private static void EmitVectorShImmWidenBinaryOp(ArmEmitterContext context, Func2I emit, int imm, bool signed) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(ne, Const(imm)), index, op.Size + 1); + } + + context.Copy(GetVec(op.Rd), res); + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSystem.cs b/ARMeilleure/Instructions/InstEmitSystem.cs new file mode 100644 index 000000000..eeb53c1fe --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSystem.cs @@ -0,0 +1,114 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + private const int DczSizeLog2 = 4; + + public static void Hint(ArmEmitterContext context) + { + // Execute as no-op. + } + + public static void Isb(ArmEmitterContext context) + { + // Execute as no-op. + } + + public static void Mrs(ArmEmitterContext context) + { + OpCodeSystem op = (OpCodeSystem)context.CurrOp; + + Delegate dlg; + + switch (GetPackedId(op)) + { + case 0b11_011_0000_0000_001: dlg = new _U64(NativeInterface.GetCtrEl0); break; + case 0b11_011_0000_0000_111: dlg = new _U64(NativeInterface.GetDczidEl0); break; + case 0b11_011_0100_0100_000: dlg = new _U64(NativeInterface.GetFpcr); break; + case 0b11_011_0100_0100_001: dlg = new _U64(NativeInterface.GetFpsr); break; + case 0b11_011_1101_0000_010: dlg = new _U64(NativeInterface.GetTpidrEl0); break; + case 0b11_011_1101_0000_011: dlg = new _U64(NativeInterface.GetTpidr); break; + case 0b11_011_1110_0000_000: dlg = new _U64(NativeInterface.GetCntfrqEl0); break; + case 0b11_011_1110_0000_001: dlg = new _U64(NativeInterface.GetCntpctEl0); break; + + default: throw new NotImplementedException($"Unknown MRS 0x{op.RawOpCode:X8} at 0x{op.Address:X16}."); + } + + SetIntOrZR(context, op.Rt, context.Call(dlg)); + } + + public static void Msr(ArmEmitterContext context) + { + OpCodeSystem op = (OpCodeSystem)context.CurrOp; + + Delegate dlg; + + switch (GetPackedId(op)) + { + case 0b11_011_0100_0100_000: dlg = new _Void_U64(NativeInterface.SetFpcr); break; + case 0b11_011_0100_0100_001: dlg = new _Void_U64(NativeInterface.SetFpsr); break; + case 0b11_011_1101_0000_010: dlg = new _Void_U64(NativeInterface.SetTpidrEl0); break; + + default: throw new NotImplementedException($"Unknown MSR 0x{op.RawOpCode:X8} at 0x{op.Address:X16}."); + } + + context.Call(dlg, GetIntOrZR(context, op.Rt)); + } + + public static void Nop(ArmEmitterContext context) + { + // Do nothing. + } + + public static void Sys(ArmEmitterContext context) + { + // This instruction is used to do some operations on the CPU like cache invalidation, + // address translation and the like. + // We treat it as no-op here since we don't have any cache being emulated anyway. + OpCodeSystem op = (OpCodeSystem)context.CurrOp; + + switch (GetPackedId(op)) + { + case 0b11_011_0111_0100_001: + { + // DC ZVA + Operand t = GetIntOrZR(context, op.Rt); + + for (long offset = 0; offset < (4 << DczSizeLog2); offset += 8) + { + Operand address = context.Add(t, Const(offset)); + + context.Call(new _Void_U64_U64(NativeInterface.WriteUInt64), address, Const(0L)); + } + + break; + } + + // No-op + case 0b11_011_0111_1110_001: //DC CIVAC + break; + } + } + + private static int GetPackedId(OpCodeSystem op) + { + int id; + + id = op.Op2 << 0; + id |= op.CRm << 3; + id |= op.CRn << 7; + id |= op.Op1 << 11; + id |= op.Op0 << 14; + + return id; + } + } +} diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs new file mode 100644 index 000000000..e70ca34bc --- /dev/null +++ b/ARMeilleure/Instructions/InstName.cs @@ -0,0 +1,459 @@ +namespace ARMeilleure.Instructions +{ + enum InstName + { + // Base (AArch64) + Adc, + Adcs, + Add, + Adds, + Adr, + Adrp, + And, + Ands, + Asrv, + B, + B_Cond, + Bfm, + Bic, + Bics, + Bl, + Blr, + Br, + Brk, + Cbnz, + Cbz, + Ccmn, + Ccmp, + Clrex, + Cls, + Clz, + Crc32b, + Crc32h, + Crc32w, + Crc32x, + Crc32cb, + Crc32ch, + Crc32cw, + Crc32cx, + Csel, + Csinc, + Csinv, + Csneg, + Dmb, + Dsb, + Eon, + Eor, + Extr, + Hint, + Isb, + Ldar, + Ldaxp, + Ldaxr, + Ldp, + Ldr, + Ldr_Literal, + Ldrs, + Ldxr, + Ldxp, + Lslv, + Lsrv, + Madd, + Movk, + Movn, + Movz, + Mrs, + Msr, + Msub, + Nop, + Orn, + Orr, + Pfrm, + Rbit, + Ret, + Rev16, + Rev32, + Rev64, + Rorv, + Sbc, + Sbcs, + Sbfm, + Sdiv, + Smaddl, + Smsubl, + Smulh, + Stlr, + Stlxp, + Stlxr, + Stp, + Str, + Stxp, + Stxr, + Sub, + Subs, + Svc, + Sys, + Tbnz, + Tbz, + Ubfm, + Udiv, + Umaddl, + Umsubl, + Umulh, + Und, + + // FP & SIMD (AArch64) + Abs_S, + Abs_V, + Add_S, + Add_V, + Addhn_V, + Addp_S, + Addp_V, + Addv_V, + Aesd_V, + Aese_V, + Aesimc_V, + Aesmc_V, + And_V, + Bic_V, + Bic_Vi, + Bif_V, + Bit_V, + Bsl_V, + Cls_V, + Clz_V, + Cmeq_S, + Cmeq_V, + Cmge_S, + Cmge_V, + Cmgt_S, + Cmgt_V, + Cmhi_S, + Cmhi_V, + Cmhs_S, + Cmhs_V, + Cmle_S, + Cmle_V, + Cmlt_S, + Cmlt_V, + Cmtst_S, + Cmtst_V, + Cnt_V, + Dup_Gp, + Dup_S, + Dup_V, + Eor_V, + Ext_V, + Fabd_S, + Fabd_V, + Fabs_S, + Fabs_V, + Fadd_S, + Fadd_V, + Faddp_S, + Faddp_V, + Fccmp_S, + Fccmpe_S, + Fcmeq_S, + Fcmeq_V, + Fcmge_S, + Fcmge_V, + Fcmgt_S, + Fcmgt_V, + Fcmle_S, + Fcmle_V, + Fcmlt_S, + Fcmlt_V, + Fcmp_S, + Fcmpe_S, + Fcsel_S, + Fcvt_S, + Fcvtas_Gp, + Fcvtau_Gp, + Fcvtl_V, + Fcvtms_Gp, + Fcvtmu_Gp, + Fcvtn_V, + Fcvtns_S, + Fcvtns_V, + Fcvtnu_S, + Fcvtnu_V, + Fcvtps_Gp, + Fcvtpu_Gp, + Fcvtzs_Gp, + Fcvtzs_Gp_Fixed, + Fcvtzs_S, + Fcvtzs_V, + Fcvtzs_V_Fixed, + Fcvtzu_Gp, + Fcvtzu_Gp_Fixed, + Fcvtzu_S, + Fcvtzu_V, + Fcvtzu_V_Fixed, + Fdiv_S, + Fdiv_V, + Fmadd_S, + Fmax_S, + Fmax_V, + Fmaxnm_S, + Fmaxnm_V, + Fmaxp_V, + Fmin_S, + Fmin_V, + Fminnm_S, + Fminnm_V, + Fminp_V, + Fmla_Se, + Fmla_V, + Fmla_Ve, + Fmls_Se, + Fmls_V, + Fmls_Ve, + Fmov_S, + Fmov_Si, + Fmov_Vi, + Fmov_Ftoi, + Fmov_Itof, + Fmov_Ftoi1, + Fmov_Itof1, + Fmsub_S, + Fmul_S, + Fmul_Se, + Fmul_V, + Fmul_Ve, + Fmulx_S, + Fmulx_Se, + Fmulx_V, + Fmulx_Ve, + Fneg_S, + Fneg_V, + Fnmadd_S, + Fnmsub_S, + Fnmul_S, + Frecpe_S, + Frecpe_V, + Frecps_S, + Frecps_V, + Frecpx_S, + Frinta_S, + Frinta_V, + Frinti_S, + Frinti_V, + Frintm_S, + Frintm_V, + Frintn_S, + Frintn_V, + Frintp_S, + Frintp_V, + Frintx_S, + Frintx_V, + Frintz_S, + Frintz_V, + Frsqrte_S, + Frsqrte_V, + Frsqrts_S, + Frsqrts_V, + Fsqrt_S, + Fsqrt_V, + Fsub_S, + Fsub_V, + Ins_Gp, + Ins_V, + Ld__Vms, + Ld__Vss, + Mla_V, + Mla_Ve, + Mls_V, + Mls_Ve, + Movi_V, + Mul_V, + Mul_Ve, + Mvni_V, + Neg_S, + Neg_V, + Not_V, + Orn_V, + Orr_V, + Orr_Vi, + Raddhn_V, + Rbit_V, + Rev16_V, + Rev32_V, + Rev64_V, + Rshrn_V, + Rsubhn_V, + Saba_V, + Sabal_V, + Sabd_V, + Sabdl_V, + Sadalp_V, + Saddl_V, + Saddlp_V, + Saddlv_V, + Saddw_V, + Scvtf_Gp, + Scvtf_Gp_Fixed, + Scvtf_S, + Scvtf_V, + Scvtf_V_Fixed, + Sha1c_V, + Sha1h_V, + Sha1m_V, + Sha1p_V, + Sha1su0_V, + Sha1su1_V, + Sha256h_V, + Sha256h2_V, + Sha256su0_V, + Sha256su1_V, + Shadd_V, + Shl_S, + Shl_V, + Shll_V, + Shrn_V, + Shsub_V, + Sli_V, + Smax_V, + Smaxp_V, + Smaxv_V, + Smin_V, + Sminp_V, + Sminv_V, + Smlal_V, + Smlal_Ve, + Smlsl_V, + Smlsl_Ve, + Smov_S, + Smull_V, + Smull_Ve, + Sqabs_S, + Sqabs_V, + Sqadd_S, + Sqadd_V, + Sqdmulh_S, + Sqdmulh_V, + Sqneg_S, + Sqneg_V, + Sqrdmulh_S, + Sqrdmulh_V, + Sqrshl_V, + Sqrshrn_S, + Sqrshrn_V, + Sqrshrun_S, + Sqrshrun_V, + Sqshl_V, + Sqshrn_S, + Sqshrn_V, + Sqshrun_S, + Sqshrun_V, + Sqsub_S, + Sqsub_V, + Sqxtn_S, + Sqxtn_V, + Sqxtun_S, + Sqxtun_V, + Srhadd_V, + Srshl_V, + Srshr_S, + Srshr_V, + Srsra_S, + Srsra_V, + Sshl_V, + Sshll_V, + Sshr_S, + Sshr_V, + Ssra_S, + Ssra_V, + Ssubl_V, + Ssubw_V, + St__Vms, + St__Vss, + Sub_S, + Sub_V, + Subhn_V, + Suqadd_S, + Suqadd_V, + Tbl_V, + Trn1_V, + Trn2_V, + Uaba_V, + Uabal_V, + Uabd_V, + Uabdl_V, + Uadalp_V, + Uaddl_V, + Uaddlp_V, + Uaddlv_V, + Uaddw_V, + Ucvtf_Gp, + Ucvtf_Gp_Fixed, + Ucvtf_S, + Ucvtf_V, + Ucvtf_V_Fixed, + Uhadd_V, + Uhsub_V, + Umax_V, + Umaxp_V, + Umaxv_V, + Umin_V, + Uminp_V, + Uminv_V, + Umlal_V, + Umlal_Ve, + Umlsl_V, + Umlsl_Ve, + Umov_S, + Umull_V, + Umull_Ve, + Uqadd_S, + Uqadd_V, + Uqrshl_V, + Uqrshrn_S, + Uqrshrn_V, + Uqshl_V, + Uqshrn_S, + Uqshrn_V, + Uqsub_S, + Uqsub_V, + Uqxtn_S, + Uqxtn_V, + Urhadd_V, + Urshl_V, + Urshr_S, + Urshr_V, + Ursra_S, + Ursra_V, + Ushl_V, + Ushll_V, + Ushr_S, + Ushr_V, + Usqadd_S, + Usqadd_V, + Usra_S, + Usra_V, + Usubl_V, + Usubw_V, + Uzp1_V, + Uzp2_V, + Xtn_V, + Zip1_V, + Zip2_V, + + // Base (AArch32) + Blx, + Bx, + Cmp, + Ldm, + Ldrb, + Ldrd, + Ldrh, + Ldrsb, + Ldrsh, + Mov, + Stm, + Strb, + Strd, + Strh + } +} \ No newline at end of file diff --git a/ARMeilleure/Instructions/NativeInterface.cs b/ARMeilleure/Instructions/NativeInterface.cs new file mode 100644 index 000000000..3a1e91c8e --- /dev/null +++ b/ARMeilleure/Instructions/NativeInterface.cs @@ -0,0 +1,367 @@ +using ARMeilleure.Memory; +using ARMeilleure.State; +using System; + +namespace ARMeilleure.Instructions +{ + static class NativeInterface + { + private const int ErgSizeLog2 = 4; + + private class ThreadContext + { + public ExecutionContext Context { get; } + public MemoryManager Memory { get; } + + public ulong ExclusiveAddress { get; set; } + public ulong ExclusiveValueLow { get; set; } + public ulong ExclusiveValueHigh { get; set; } + + public ThreadContext(ExecutionContext context, MemoryManager memory) + { + Context = context; + Memory = memory; + + ExclusiveAddress = ulong.MaxValue; + } + } + + [ThreadStatic] + private static ThreadContext _context; + + public static void RegisterThread(ExecutionContext context, MemoryManager memory) + { + _context = new ThreadContext(context, memory); + } + + public static void UnregisterThread() + { + _context = null; + } + + public static void Break(ulong address, int imm) + { + Statistics.PauseTimer(); + + GetContext().OnBreak(address, imm); + + Statistics.ResumeTimer(); + } + + public static void SupervisorCall(ulong address, int imm) + { + Statistics.PauseTimer(); + + GetContext().OnSupervisorCall(address, imm); + + Statistics.ResumeTimer(); + } + + public static void Undefined(ulong address, int opCode) + { + Statistics.PauseTimer(); + + GetContext().OnUndefined(address, opCode); + + Statistics.ResumeTimer(); + } + +#region "System registers" + public static ulong GetCtrEl0() + { + return (ulong)GetContext().CtrEl0; + } + + public static ulong GetDczidEl0() + { + return (ulong)GetContext().DczidEl0; + } + + public static ulong GetFpcr() + { + return (ulong)GetContext().Fpcr; + } + + public static ulong GetFpsr() + { + return (ulong)GetContext().Fpsr; + } + + public static ulong GetTpidrEl0() + { + return (ulong)GetContext().TpidrEl0; + } + + public static ulong GetTpidr() + { + return (ulong)GetContext().Tpidr; + } + + public static ulong GetCntfrqEl0() + { + return GetContext().CntfrqEl0; + } + + public static ulong GetCntpctEl0() + { + return GetContext().CntpctEl0; + } + + public static void SetFpcr(ulong value) + { + GetContext().Fpcr = (FPCR)value; + } + + public static void SetFpsr(ulong value) + { + GetContext().Fpsr = (FPSR)value; + } + + public static void SetTpidrEl0(ulong value) + { + GetContext().TpidrEl0 = (long)value; + } +#endregion + +#region "Read" + public static byte ReadByte(ulong address) + { + return GetMemoryManager().ReadByte((long)address); + } + + public static ushort ReadUInt16(ulong address) + { + return GetMemoryManager().ReadUInt16((long)address); + } + + public static uint ReadUInt32(ulong address) + { + return GetMemoryManager().ReadUInt32((long)address); + } + + public static ulong ReadUInt64(ulong address) + { + return GetMemoryManager().ReadUInt64((long)address); + } + + public static V128 ReadVector128(ulong address) + { + return GetMemoryManager().ReadVector128((long)address); + } +#endregion + +#region "Read exclusive" + public static byte ReadByteExclusive(ulong address) + { + byte value = _context.Memory.ReadByte((long)address); + + _context.ExclusiveAddress = GetMaskedExclusiveAddress(address); + _context.ExclusiveValueLow = value; + _context.ExclusiveValueHigh = 0; + + return value; + } + + public static ushort ReadUInt16Exclusive(ulong address) + { + ushort value = _context.Memory.ReadUInt16((long)address); + + _context.ExclusiveAddress = GetMaskedExclusiveAddress(address); + _context.ExclusiveValueLow = value; + _context.ExclusiveValueHigh = 0; + + return value; + } + + public static uint ReadUInt32Exclusive(ulong address) + { + uint value = _context.Memory.ReadUInt32((long)address); + + _context.ExclusiveAddress = GetMaskedExclusiveAddress(address); + _context.ExclusiveValueLow = value; + _context.ExclusiveValueHigh = 0; + + return value; + } + + public static ulong ReadUInt64Exclusive(ulong address) + { + ulong value = _context.Memory.ReadUInt64((long)address); + + _context.ExclusiveAddress = GetMaskedExclusiveAddress(address); + _context.ExclusiveValueLow = value; + _context.ExclusiveValueHigh = 0; + + return value; + } + + public static V128 ReadVector128Exclusive(ulong address) + { + V128 value = _context.Memory.AtomicLoadInt128((long)address); + + _context.ExclusiveAddress = GetMaskedExclusiveAddress(address); + _context.ExclusiveValueLow = value.GetUInt64(0); + _context.ExclusiveValueHigh = value.GetUInt64(1); + + return value; + } +#endregion + +#region "Write" + public static void WriteByte(ulong address, byte value) + { + GetMemoryManager().WriteByte((long)address, value); + } + + public static void WriteUInt16(ulong address, ushort value) + { + GetMemoryManager().WriteUInt16((long)address, value); + } + + public static void WriteUInt32(ulong address, uint value) + { + GetMemoryManager().WriteUInt32((long)address, value); + } + + public static void WriteUInt64(ulong address, ulong value) + { + GetMemoryManager().WriteUInt64((long)address, value); + } + + public static void WriteVector128(ulong address, V128 value) + { + GetMemoryManager().WriteVector128((long)address, value); + } +#endregion + +#region "Write exclusive" + public static int WriteByteExclusive(ulong address, byte value) + { + bool success = _context.ExclusiveAddress == GetMaskedExclusiveAddress(address); + + if (success) + { + success = _context.Memory.AtomicCompareExchangeByte( + (long)address, + (byte)_context.ExclusiveValueLow, + (byte)value); + + if (success) + { + ClearExclusive(); + } + } + + return success ? 0 : 1; + } + + public static int WriteUInt16Exclusive(ulong address, ushort value) + { + bool success = _context.ExclusiveAddress == GetMaskedExclusiveAddress(address); + + if (success) + { + success = _context.Memory.AtomicCompareExchangeInt16( + (long)address, + (short)_context.ExclusiveValueLow, + (short)value); + + if (success) + { + ClearExclusive(); + } + } + + return success ? 0 : 1; + } + + public static int WriteUInt32Exclusive(ulong address, uint value) + { + bool success = _context.ExclusiveAddress == GetMaskedExclusiveAddress(address); + + if (success) + { + success = _context.Memory.AtomicCompareExchangeInt32( + (long)address, + (int)_context.ExclusiveValueLow, + (int)value); + + if (success) + { + ClearExclusive(); + } + } + + return success ? 0 : 1; + } + + public static int WriteUInt64Exclusive(ulong address, ulong value) + { + bool success = _context.ExclusiveAddress == GetMaskedExclusiveAddress(address); + + if (success) + { + success = _context.Memory.AtomicCompareExchangeInt64( + (long)address, + (long)_context.ExclusiveValueLow, + (long)value); + + if (success) + { + ClearExclusive(); + } + } + + return success ? 0 : 1; + } + + public static int WriteVector128Exclusive(ulong address, V128 value) + { + bool success = _context.ExclusiveAddress == GetMaskedExclusiveAddress(address); + + if (success) + { + V128 expected = new V128(_context.ExclusiveValueLow, _context.ExclusiveValueHigh); + + success = _context.Memory.AtomicCompareExchangeInt128((long)address, expected, value); + + if (success) + { + ClearExclusive(); + } + } + + return success ? 0 : 1; + } +#endregion + + private static ulong GetMaskedExclusiveAddress(ulong address) + { + return address & ~((4UL << ErgSizeLog2) - 1); + } + + public static void ClearExclusive() + { + _context.ExclusiveAddress = ulong.MaxValue; + } + + public static void CheckSynchronization() + { + Statistics.PauseTimer(); + + GetContext().CheckInterrupt(); + + Statistics.ResumeTimer(); + } + + public static ExecutionContext GetContext() + { + return _context.Context; + } + + public static MemoryManager GetMemoryManager() + { + return _context.Memory; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Instructions/SoftFallback.cs b/ARMeilleure/Instructions/SoftFallback.cs new file mode 100644 index 000000000..dc0309218 --- /dev/null +++ b/ARMeilleure/Instructions/SoftFallback.cs @@ -0,0 +1,1307 @@ +using ARMeilleure.State; +using System; + +namespace ARMeilleure.Instructions +{ + static class SoftFallback + { +#region "ShlReg" + public static long SignedShlReg(long value, long shift, bool round, int size) + { + int eSize = 8 << size; + + int shiftLsB = (sbyte)shift; + + if (shiftLsB < 0) + { + return SignedShrReg(value, -shiftLsB, round, eSize); + } + else if (shiftLsB > 0) + { + if (shiftLsB >= eSize) + { + return 0L; + } + + return value << shiftLsB; + } + else /* if (shiftLsB == 0) */ + { + return value; + } + } + + public static ulong UnsignedShlReg(ulong value, ulong shift, bool round, int size) + { + int eSize = 8 << size; + + int shiftLsB = (sbyte)shift; + + if (shiftLsB < 0) + { + return UnsignedShrReg(value, -shiftLsB, round, eSize); + } + else if (shiftLsB > 0) + { + if (shiftLsB >= eSize) + { + return 0UL; + } + + return value << shiftLsB; + } + else /* if (shiftLsB == 0) */ + { + return value; + } + } + + public static long SignedShlRegSatQ(long value, long shift, bool round, int size) + { + ExecutionContext context = NativeInterface.GetContext(); + + int eSize = 8 << size; + + int shiftLsB = (sbyte)shift; + + if (shiftLsB < 0) + { + return SignedShrReg(value, -shiftLsB, round, eSize); + } + else if (shiftLsB > 0) + { + if (shiftLsB >= eSize) + { + return SignedSignSatQ(value, eSize, context); + } + + if (eSize == 64) + { + long shl = value << shiftLsB; + long shr = shl >> shiftLsB; + + if (shr != value) + { + return SignedSignSatQ(value, eSize, context); + } + else /* if (shr == value) */ + { + return shl; + } + } + else /* if (eSize != 64) */ + { + return SignedSrcSignedDstSatQ(value << shiftLsB, size); + } + } + else /* if (shiftLsB == 0) */ + { + return value; + } + } + + public static ulong UnsignedShlRegSatQ(ulong value, ulong shift, bool round, int size) + { + ExecutionContext context = NativeInterface.GetContext(); + + int eSize = 8 << size; + + int shiftLsB = (sbyte)shift; + + if (shiftLsB < 0) + { + return UnsignedShrReg(value, -shiftLsB, round, eSize); + } + else if (shiftLsB > 0) + { + if (shiftLsB >= eSize) + { + return UnsignedSignSatQ(value, eSize, context); + } + + if (eSize == 64) + { + ulong shl = value << shiftLsB; + ulong shr = shl >> shiftLsB; + + if (shr != value) + { + return UnsignedSignSatQ(value, eSize, context); + } + else /* if (shr == value) */ + { + return shl; + } + } + else /* if (eSize != 64) */ + { + return UnsignedSrcUnsignedDstSatQ(value << shiftLsB, size); + } + } + else /* if (shiftLsB == 0) */ + { + return value; + } + } + + private static long SignedShrReg(long value, int shift, bool round, int eSize) // shift := [1, 128]; eSize := {8, 16, 32, 64}. + { + if (round) + { + if (shift >= eSize) + { + return 0L; + } + + long roundConst = 1L << (shift - 1); + + long add = value + roundConst; + + if (eSize == 64) + { + if ((~value & (value ^ add)) < 0L) + { + return (long)((ulong)add >> shift); + } + else + { + return add >> shift; + } + } + else /* if (eSize != 64) */ + { + return add >> shift; + } + } + else /* if (!round) */ + { + if (shift >= eSize) + { + if (value < 0L) + { + return -1L; + } + else /* if (value >= 0L) */ + { + return 0L; + } + } + + return value >> shift; + } + } + + private static ulong UnsignedShrReg(ulong value, int shift, bool round, int eSize) // shift := [1, 128]; eSize := {8, 16, 32, 64}. + { + if (round) + { + if (shift > 64) + { + return 0UL; + } + + ulong roundConst = 1UL << (shift - 1); + + ulong add = value + roundConst; + + if (eSize == 64) + { + if ((add < value) && (add < roundConst)) + { + if (shift == 64) + { + return 1UL; + } + + return (add >> shift) | (0x8000000000000000UL >> (shift - 1)); + } + else + { + if (shift == 64) + { + return 0UL; + } + + return add >> shift; + } + } + else /* if (eSize != 64) */ + { + if (shift == 64) + { + return 0UL; + } + + return add >> shift; + } + } + else /* if (!round) */ + { + if (shift >= eSize) + { + return 0UL; + } + + return value >> shift; + } + } + + private static long SignedSignSatQ(long op, int eSize, ExecutionContext context) // eSize := {8, 16, 32, 64}. + { + long tMaxValue = (1L << (eSize - 1)) - 1L; + long tMinValue = -(1L << (eSize - 1)); + + if (op > 0L) + { + context.Fpsr |= FPSR.Qc; + + return tMaxValue; + } + else if (op < 0L) + { + context.Fpsr |= FPSR.Qc; + + return tMinValue; + } + else + { + return 0L; + } + } + + private static ulong UnsignedSignSatQ(ulong op, int eSize, ExecutionContext context) // eSize := {8, 16, 32, 64}. + { + ulong tMaxValue = ulong.MaxValue >> (64 - eSize); + + if (op > 0UL) + { + context.Fpsr |= FPSR.Qc; + + return tMaxValue; + } + else + { + return 0UL; + } + } +#endregion + +#region "ShrImm64" + public static long SignedShrImm64(long value, long roundConst, int shift) + { + if (roundConst == 0L) + { + if (shift <= 63) + { + return value >> shift; + } + else /* if (shift == 64) */ + { + if (value < 0L) + { + return -1L; + } + else /* if (value >= 0L) */ + { + return 0L; + } + } + } + else /* if (roundConst == 1L << (shift - 1)) */ + { + if (shift <= 63) + { + long add = value + roundConst; + + if ((~value & (value ^ add)) < 0L) + { + return (long)((ulong)add >> shift); + } + else + { + return add >> shift; + } + } + else /* if (shift == 64) */ + { + return 0L; + } + } + } + + public static ulong UnsignedShrImm64(ulong value, long roundConst, int shift) + { + if (roundConst == 0L) + { + if (shift <= 63) + { + return value >> shift; + } + else /* if (shift == 64) */ + { + return 0UL; + } + } + else /* if (roundConst == 1L << (shift - 1)) */ + { + ulong add = value + (ulong)roundConst; + + if ((add < value) && (add < (ulong)roundConst)) + { + if (shift <= 63) + { + return (add >> shift) | (0x8000000000000000UL >> (shift - 1)); + } + else /* if (shift == 64) */ + { + return 1UL; + } + } + else + { + if (shift <= 63) + { + return add >> shift; + } + else /* if (shift == 64) */ + { + return 0UL; + } + } + } + } +#endregion + +#region "Rounding" + public static double Round(double value) + { + ExecutionContext context = NativeInterface.GetContext(); + + FPRoundingMode roundMode = context.Fpcr.GetRoundingMode(); + + if (roundMode == FPRoundingMode.ToNearest) + { + return Math.Round(value); // even + } + else if (roundMode == FPRoundingMode.TowardsPlusInfinity) + { + return Math.Ceiling(value); + } + else if (roundMode == FPRoundingMode.TowardsMinusInfinity) + { + return Math.Floor(value); + } + else /* if (roundMode == FPRoundingMode.TowardsZero) */ + { + return Math.Truncate(value); + } + } + + public static float RoundF(float value) + { + ExecutionContext context = NativeInterface.GetContext(); + + FPRoundingMode roundMode = context.Fpcr.GetRoundingMode(); + + if (roundMode == FPRoundingMode.ToNearest) + { + return MathF.Round(value); // even + } + else if (roundMode == FPRoundingMode.TowardsPlusInfinity) + { + return MathF.Ceiling(value); + } + else if (roundMode == FPRoundingMode.TowardsMinusInfinity) + { + return MathF.Floor(value); + } + else /* if (roundMode == FPRoundingMode.TowardsZero) */ + { + return MathF.Truncate(value); + } + } +#endregion + +#region "Saturation" + public static int SatF32ToS32(float value) + { + if (float.IsNaN(value)) return 0; + + return value >= int.MaxValue ? int.MaxValue : + value <= int.MinValue ? int.MinValue : (int)value; + } + + public static long SatF32ToS64(float value) + { + if (float.IsNaN(value)) return 0; + + return value >= long.MaxValue ? long.MaxValue : + value <= long.MinValue ? long.MinValue : (long)value; + } + + public static uint SatF32ToU32(float value) + { + if (float.IsNaN(value)) return 0; + + return value >= uint.MaxValue ? uint.MaxValue : + value <= uint.MinValue ? uint.MinValue : (uint)value; + } + + public static ulong SatF32ToU64(float value) + { + if (float.IsNaN(value)) return 0; + + return value >= ulong.MaxValue ? ulong.MaxValue : + value <= ulong.MinValue ? ulong.MinValue : (ulong)value; + } + + public static int SatF64ToS32(double value) + { + if (double.IsNaN(value)) return 0; + + return value >= int.MaxValue ? int.MaxValue : + value <= int.MinValue ? int.MinValue : (int)value; + } + + public static long SatF64ToS64(double value) + { + if (double.IsNaN(value)) return 0; + + return value >= long.MaxValue ? long.MaxValue : + value <= long.MinValue ? long.MinValue : (long)value; + } + + public static uint SatF64ToU32(double value) + { + if (double.IsNaN(value)) return 0; + + return value >= uint.MaxValue ? uint.MaxValue : + value <= uint.MinValue ? uint.MinValue : (uint)value; + } + + public static ulong SatF64ToU64(double value) + { + if (double.IsNaN(value)) return 0; + + return value >= ulong.MaxValue ? ulong.MaxValue : + value <= ulong.MinValue ? ulong.MinValue : (ulong)value; + } +#endregion + +#region "Saturating" + public static long SignedSrcSignedDstSatQ(long op, int size) + { + ExecutionContext context = NativeInterface.GetContext(); + + int eSize = 8 << size; + + long tMaxValue = (1L << (eSize - 1)) - 1L; + long tMinValue = -(1L << (eSize - 1)); + + if (op > tMaxValue) + { + context.Fpsr |= FPSR.Qc; + + return tMaxValue; + } + else if (op < tMinValue) + { + context.Fpsr |= FPSR.Qc; + + return tMinValue; + } + else + { + return op; + } + } + + public static ulong SignedSrcUnsignedDstSatQ(long op, int size) + { + ExecutionContext context = NativeInterface.GetContext(); + + int eSize = 8 << size; + + ulong tMaxValue = (1UL << eSize) - 1UL; + ulong tMinValue = 0UL; + + if (op > (long)tMaxValue) + { + context.Fpsr |= FPSR.Qc; + + return tMaxValue; + } + else if (op < (long)tMinValue) + { + context.Fpsr |= FPSR.Qc; + + return tMinValue; + } + else + { + return (ulong)op; + } + } + + public static long UnsignedSrcSignedDstSatQ(ulong op, int size) + { + ExecutionContext context = NativeInterface.GetContext(); + + int eSize = 8 << size; + + long tMaxValue = (1L << (eSize - 1)) - 1L; + + if (op > (ulong)tMaxValue) + { + context.Fpsr |= FPSR.Qc; + + return tMaxValue; + } + else + { + return (long)op; + } + } + + public static ulong UnsignedSrcUnsignedDstSatQ(ulong op, int size) + { + ExecutionContext context = NativeInterface.GetContext(); + + int eSize = 8 << size; + + ulong tMaxValue = (1UL << eSize) - 1UL; + + if (op > tMaxValue) + { + context.Fpsr |= FPSR.Qc; + + return tMaxValue; + } + else + { + return op; + } + } + + public static long UnarySignedSatQAbsOrNeg(long op) + { + ExecutionContext context = NativeInterface.GetContext(); + + if (op == long.MinValue) + { + context.Fpsr |= FPSR.Qc; + + return long.MaxValue; + } + else + { + return op; + } + } + + public static long BinarySignedSatQAdd(long op1, long op2) + { + ExecutionContext context = NativeInterface.GetContext(); + + long add = op1 + op2; + + if ((~(op1 ^ op2) & (op1 ^ add)) < 0L) + { + context.Fpsr |= FPSR.Qc; + + if (op1 < 0L) + { + return long.MinValue; + } + else + { + return long.MaxValue; + } + } + else + { + return add; + } + } + + public static ulong BinaryUnsignedSatQAdd(ulong op1, ulong op2) + { + ExecutionContext context = NativeInterface.GetContext(); + + ulong add = op1 + op2; + + if ((add < op1) && (add < op2)) + { + context.Fpsr |= FPSR.Qc; + + return ulong.MaxValue; + } + else + { + return add; + } + } + + public static long BinarySignedSatQSub(long op1, long op2) + { + ExecutionContext context = NativeInterface.GetContext(); + + long sub = op1 - op2; + + if (((op1 ^ op2) & (op1 ^ sub)) < 0L) + { + context.Fpsr |= FPSR.Qc; + + if (op1 < 0L) + { + return long.MinValue; + } + else + { + return long.MaxValue; + } + } + else + { + return sub; + } + } + + public static ulong BinaryUnsignedSatQSub(ulong op1, ulong op2) + { + ExecutionContext context = NativeInterface.GetContext(); + + ulong sub = op1 - op2; + + if (op1 < op2) + { + context.Fpsr |= FPSR.Qc; + + return ulong.MinValue; + } + else + { + return sub; + } + } + + public static long BinarySignedSatQAcc(ulong op1, long op2) + { + ExecutionContext context = NativeInterface.GetContext(); + + if (op1 <= (ulong)long.MaxValue) + { + // op1 from ulong.MinValue to (ulong)long.MaxValue + // op2 from long.MinValue to long.MaxValue + + long add = (long)op1 + op2; + + if ((~op2 & add) < 0L) + { + context.Fpsr |= FPSR.Qc; + + return long.MaxValue; + } + else + { + return add; + } + } + else if (op2 >= 0L) + { + // op1 from (ulong)long.MaxValue + 1UL to ulong.MaxValue + // op2 from (long)ulong.MinValue to long.MaxValue + + context.Fpsr |= FPSR.Qc; + + return long.MaxValue; + } + else + { + // op1 from (ulong)long.MaxValue + 1UL to ulong.MaxValue + // op2 from long.MinValue to (long)ulong.MinValue - 1L + + ulong add = op1 + (ulong)op2; + + if (add > (ulong)long.MaxValue) + { + context.Fpsr |= FPSR.Qc; + + return long.MaxValue; + } + else + { + return (long)add; + } + } + } + + public static ulong BinaryUnsignedSatQAcc(long op1, ulong op2) + { + ExecutionContext context = NativeInterface.GetContext(); + + if (op1 >= 0L) + { + // op1 from (long)ulong.MinValue to long.MaxValue + // op2 from ulong.MinValue to ulong.MaxValue + + ulong add = (ulong)op1 + op2; + + if ((add < (ulong)op1) && (add < op2)) + { + context.Fpsr |= FPSR.Qc; + + return ulong.MaxValue; + } + else + { + return add; + } + } + else if (op2 > (ulong)long.MaxValue) + { + // op1 from long.MinValue to (long)ulong.MinValue - 1L + // op2 from (ulong)long.MaxValue + 1UL to ulong.MaxValue + + return (ulong)op1 + op2; + } + else + { + // op1 from long.MinValue to (long)ulong.MinValue - 1L + // op2 from ulong.MinValue to (ulong)long.MaxValue + + long add = op1 + (long)op2; + + if (add < (long)ulong.MinValue) + { + context.Fpsr |= FPSR.Qc; + + return ulong.MinValue; + } + else + { + return (ulong)add; + } + } + } +#endregion + +#region "Count" + public static ulong CountLeadingSigns(ulong value, int size) // size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.). + { + value ^= value >> 1; + + int highBit = size - 2; + + for (int bit = highBit; bit >= 0; bit--) + { + if (((int)(value >> bit) & 0b1) != 0) + { + return (ulong)(highBit - bit); + } + } + + return (ulong)(size - 1); + } + + private static readonly byte[] ClzNibbleTbl = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 }; + + public static ulong CountLeadingZeros(ulong value, int size) // size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.). + { + if (value == 0ul) + { + return (ulong)size; + } + + int nibbleIdx = size; + int preCount, count = 0; + + do + { + nibbleIdx -= 4; + preCount = ClzNibbleTbl[(int)(value >> nibbleIdx) & 0b1111]; + count += preCount; + } + while (preCount == 4); + + return (ulong)count; + } + + public static ulong CountSetBits8(ulong value) // "size" is 8 (SIMD&FP Inst.). + { + value = ((value >> 1) & 0x55ul) + (value & 0x55ul); + value = ((value >> 2) & 0x33ul) + (value & 0x33ul); + + return (value >> 4) + (value & 0x0ful); + } +#endregion + +#region "Table" + public static V128 Tbl1_V64(V128 vector, V128 tb0) + { + return Tbl(vector, 8, tb0); + } + + public static V128 Tbl1_V128(V128 vector, V128 tb0) + { + return Tbl(vector, 16, tb0); + } + + public static V128 Tbl2_V64(V128 vector, V128 tb0, V128 tb1) + { + return Tbl(vector, 8, tb0, tb1); + } + + public static V128 Tbl2_V128(V128 vector, V128 tb0, V128 tb1) + { + return Tbl(vector, 16, tb0, tb1); + } + + public static V128 Tbl3_V64(V128 vector, V128 tb0, V128 tb1, V128 tb2) + { + return Tbl(vector, 8, tb0, tb1, tb2); + } + + public static V128 Tbl3_V128(V128 vector, V128 tb0, V128 tb1, V128 tb2) + { + return Tbl(vector, 16, tb0, tb1, tb2); + } + + public static V128 Tbl4_V64(V128 vector, V128 tb0, V128 tb1, V128 tb2, V128 tb3) + { + return Tbl(vector, 8, tb0, tb1, tb2, tb3); + } + + public static V128 Tbl4_V128(V128 vector, V128 tb0, V128 tb1, V128 tb2, V128 tb3) + { + return Tbl(vector, 16, tb0, tb1, tb2, tb3); + } + + private static V128 Tbl(V128 vector, int bytes, params V128[] tb) + { + byte[] res = new byte[16]; + byte[] table = new byte[tb.Length * 16]; + + for (byte index = 0; index < tb.Length; index++) + { + Buffer.BlockCopy(tb[index].ToArray(), 0, table, index * 16, 16); + } + + byte[] v = vector.ToArray(); + + for (byte index = 0; index < bytes; index++) + { + byte tblIndex = v[index]; + + if (tblIndex < table.Length) + { + res[index] = table[tblIndex]; + } + } + + return new V128(res); + } +#endregion + +#region "Crc32" + private const uint Crc32RevPoly = 0xedb88320; + private const uint Crc32cRevPoly = 0x82f63b78; + + public static uint Crc32b(uint crc, byte value) => Crc32 (crc, Crc32RevPoly, value); + public static uint Crc32h(uint crc, ushort value) => Crc32h(crc, Crc32RevPoly, value); + public static uint Crc32w(uint crc, uint value) => Crc32w(crc, Crc32RevPoly, value); + public static uint Crc32x(uint crc, ulong value) => Crc32x(crc, Crc32RevPoly, value); + + public static uint Crc32cb(uint crc, byte value) => Crc32 (crc, Crc32cRevPoly, value); + public static uint Crc32ch(uint crc, ushort value) => Crc32h(crc, Crc32cRevPoly, value); + public static uint Crc32cw(uint crc, uint value) => Crc32w(crc, Crc32cRevPoly, value); + public static uint Crc32cx(uint crc, ulong value) => Crc32x(crc, Crc32cRevPoly, value); + + private static uint Crc32h(uint crc, uint poly, ushort val) + { + crc = Crc32(crc, poly, (byte)(val >> 0)); + crc = Crc32(crc, poly, (byte)(val >> 8)); + + return crc; + } + + private static uint Crc32w(uint crc, uint poly, uint val) + { + crc = Crc32(crc, poly, (byte)(val >> 0)); + crc = Crc32(crc, poly, (byte)(val >> 8)); + crc = Crc32(crc, poly, (byte)(val >> 16)); + crc = Crc32(crc, poly, (byte)(val >> 24)); + + return crc; + } + + private static uint Crc32x(uint crc, uint poly, ulong val) + { + crc = Crc32(crc, poly, (byte)(val >> 0)); + crc = Crc32(crc, poly, (byte)(val >> 8)); + crc = Crc32(crc, poly, (byte)(val >> 16)); + crc = Crc32(crc, poly, (byte)(val >> 24)); + crc = Crc32(crc, poly, (byte)(val >> 32)); + crc = Crc32(crc, poly, (byte)(val >> 40)); + crc = Crc32(crc, poly, (byte)(val >> 48)); + crc = Crc32(crc, poly, (byte)(val >> 56)); + + return crc; + } + + private static uint Crc32(uint crc, uint poly, byte val) + { + crc ^= val; + + for (int bit = 7; bit >= 0; bit--) + { + uint mask = (uint)(-(int)(crc & 1)); + + crc = (crc >> 1) ^ (poly & mask); + } + + return crc; + } +#endregion + +#region "Aes" + public static V128 Decrypt(V128 value, V128 roundKey) + { + return CryptoHelper.AesInvSubBytes(CryptoHelper.AesInvShiftRows(value ^ roundKey)); + } + + public static V128 Encrypt(V128 value, V128 roundKey) + { + return CryptoHelper.AesSubBytes(CryptoHelper.AesShiftRows(value ^ roundKey)); + } + + public static V128 InverseMixColumns(V128 value) + { + return CryptoHelper.AesInvMixColumns(value); + } + + public static V128 MixColumns(V128 value) + { + return CryptoHelper.AesMixColumns(value); + } +#endregion + +#region "Sha1" + public static V128 HashChoose(V128 hash_abcd, uint hash_e, V128 wk) + { + for (int e = 0; e <= 3; e++) + { + uint t = ShaChoose(hash_abcd.GetUInt32(1), + hash_abcd.GetUInt32(2), + hash_abcd.GetUInt32(3)); + + hash_e += Rol(hash_abcd.GetUInt32(0), 5) + t + wk.GetUInt32(e); + + t = Rol(hash_abcd.GetUInt32(1), 30); + + hash_abcd.Insert(1, t); + + Rol32_160(ref hash_e, ref hash_abcd); + } + + return hash_abcd; + } + + public static uint FixedRotate(uint hash_e) + { + return hash_e.Rol(30); + } + + public static V128 HashMajority(V128 hash_abcd, uint hash_e, V128 wk) + { + for (int e = 0; e <= 3; e++) + { + uint t = ShaMajority(hash_abcd.GetUInt32(1), + hash_abcd.GetUInt32(2), + hash_abcd.GetUInt32(3)); + + hash_e += Rol(hash_abcd.GetUInt32(0), 5) + t + wk.GetUInt32(e); + + t = Rol(hash_abcd.GetUInt32(1), 30); + + hash_abcd.Insert(1, t); + + Rol32_160(ref hash_e, ref hash_abcd); + } + + return hash_abcd; + } + + public static V128 HashParity(V128 hash_abcd, uint hash_e, V128 wk) + { + for (int e = 0; e <= 3; e++) + { + uint t = ShaParity(hash_abcd.GetUInt32(1), + hash_abcd.GetUInt32(2), + hash_abcd.GetUInt32(3)); + + hash_e += Rol(hash_abcd.GetUInt32(0), 5) + t + wk.GetUInt32(e); + + t = Rol(hash_abcd.GetUInt32(1), 30); + + hash_abcd.Insert(1, t); + + Rol32_160(ref hash_e, ref hash_abcd); + } + + return hash_abcd; + } + + public static V128 Sha1SchedulePart1(V128 w0_3, V128 w4_7, V128 w8_11) + { + ulong t2 = w4_7.GetUInt64(0); + ulong t1 = w0_3.GetUInt64(1); + + V128 result = new V128(t1, t2); + + return result ^ (w0_3 ^ w8_11); + } + + public static V128 Sha1SchedulePart2(V128 tw0_3, V128 w12_15) + { + V128 t = tw0_3 ^ (w12_15 >> 32); + + uint tE0 = t.GetUInt32(0); + uint tE1 = t.GetUInt32(1); + uint tE2 = t.GetUInt32(2); + uint tE3 = t.GetUInt32(3); + + return new V128(tE0.Rol(1), tE1.Rol(1), tE2.Rol(1), tE3.Rol(1) ^ tE0.Rol(2)); + } + + private static void Rol32_160(ref uint y, ref V128 x) + { + uint xE3 = x.GetUInt32(3); + + x <<= 32; + x.Insert(0, y); + + y = xE3; + } + + private static uint ShaChoose(uint x, uint y, uint z) + { + return ((y ^ z) & x) ^ z; + } + + private static uint ShaMajority(uint x, uint y, uint z) + { + return (x & y) | ((x | y) & z); + } + + private static uint ShaParity(uint x, uint y, uint z) + { + return x ^ y ^ z; + } + + private static uint Rol(this uint value, int count) + { + return (value << count) | (value >> (32 - count)); + } +#endregion + +#region "Sha256" + public static V128 HashLower(V128 hash_abcd, V128 hash_efgh, V128 wk) + { + return Sha256Hash(hash_abcd, hash_efgh, wk, part1: true); + } + + public static V128 HashUpper(V128 hash_efgh, V128 hash_abcd, V128 wk) + { + return Sha256Hash(hash_abcd, hash_efgh, wk, part1: false); + } + + public static V128 Sha256SchedulePart1(V128 w0_3, V128 w4_7) + { + V128 result = new V128(); + + for (int e = 0; e <= 3; e++) + { + uint elt = (e <= 2 ? w0_3 : w4_7).GetUInt32(e <= 2 ? e + 1 : 0); + + elt = elt.Ror(7) ^ elt.Ror(18) ^ elt.Lsr(3); + + elt += w0_3.GetUInt32(e); + + result.Insert(e, elt); + } + + return result; + } + + public static V128 Sha256SchedulePart2(V128 w0_3, V128 w8_11, V128 w12_15) + { + V128 result = new V128(); + + ulong t1 = w12_15.GetUInt64(1); + + for (int e = 0; e <= 1; e++) + { + uint elt = t1.ULongPart(e); + + elt = elt.Ror(17) ^ elt.Ror(19) ^ elt.Lsr(10); + + elt += w0_3.GetUInt32(e) + w8_11.GetUInt32(e + 1); + + result.Insert(e, elt); + } + + t1 = result.GetUInt64(0); + + for (int e = 2; e <= 3; e++) + { + uint elt = t1.ULongPart(e - 2); + + elt = elt.Ror(17) ^ elt.Ror(19) ^ elt.Lsr(10); + + elt += w0_3.GetUInt32(e) + (e == 2 ? w8_11 : w12_15).GetUInt32(e == 2 ? 3 : 0); + + result.Insert(e, elt); + } + + return result; + } + + private static V128 Sha256Hash(V128 x, V128 y, V128 w, bool part1) + { + for (int e = 0; e <= 3; e++) + { + uint chs = ShaChoose(y.GetUInt32(0), + y.GetUInt32(1), + y.GetUInt32(2)); + + uint maj = ShaMajority(x.GetUInt32(0), + x.GetUInt32(1), + x.GetUInt32(2)); + + uint t1 = y.GetUInt32(3) + ShaHashSigma1(y.GetUInt32(0)) + chs + w.GetUInt32(e); + + uint t2 = t1 + x.GetUInt32(3); + + x.Insert(3, t2); + + t2 = t1 + ShaHashSigma0(x.GetUInt32(0)) + maj; + + y.Insert(3, t2); + + Rol32_256(ref y, ref x); + } + + return part1 ? x : y; + } + + private static void Rol32_256(ref V128 y, ref V128 x) + { + uint yE3 = y.GetUInt32(3); + uint xE3 = x.GetUInt32(3); + + y <<= 32; + x <<= 32; + + y.Insert(0, xE3); + x.Insert(0, yE3); + } + + private static uint ShaHashSigma0(uint x) + { + return x.Ror(2) ^ x.Ror(13) ^ x.Ror(22); + } + + private static uint ShaHashSigma1(uint x) + { + return x.Ror(6) ^ x.Ror(11) ^ x.Ror(25); + } + + private static uint Ror(this uint value, int count) + { + return (value >> count) | (value << (32 - count)); + } + + private static uint Lsr(this uint value, int count) + { + return value >> count; + } + + private static uint ULongPart(this ulong value, int part) + { + return part == 0 + ? (uint)(value & 0xFFFFFFFFUL) + : (uint)(value >> 32); + } +#endregion + +#region "Reverse" + public static uint ReverseBits8(uint value) + { + value = ((value & 0xaa) >> 1) | ((value & 0x55) << 1); + value = ((value & 0xcc) >> 2) | ((value & 0x33) << 2); + + return (value >> 4) | ((value & 0x0f) << 4); + } + + public static uint ReverseBits32(uint value) + { + value = ((value & 0xaaaaaaaa) >> 1) | ((value & 0x55555555) << 1); + value = ((value & 0xcccccccc) >> 2) | ((value & 0x33333333) << 2); + value = ((value & 0xf0f0f0f0) >> 4) | ((value & 0x0f0f0f0f) << 4); + value = ((value & 0xff00ff00) >> 8) | ((value & 0x00ff00ff) << 8); + + return (value >> 16) | (value << 16); + } + + public static ulong ReverseBits64(ulong value) + { + value = ((value & 0xaaaaaaaaaaaaaaaa) >> 1 ) | ((value & 0x5555555555555555) << 1 ); + value = ((value & 0xcccccccccccccccc) >> 2 ) | ((value & 0x3333333333333333) << 2 ); + value = ((value & 0xf0f0f0f0f0f0f0f0) >> 4 ) | ((value & 0x0f0f0f0f0f0f0f0f) << 4 ); + value = ((value & 0xff00ff00ff00ff00) >> 8 ) | ((value & 0x00ff00ff00ff00ff) << 8 ); + value = ((value & 0xffff0000ffff0000) >> 16) | ((value & 0x0000ffff0000ffff) << 16); + + return (value >> 32) | (value << 32); + } + + public static uint ReverseBytes16_32(uint value) => (uint)ReverseBytes16_64(value); + + public static ulong ReverseBytes16_64(ulong value) => ReverseBytes(value, RevSize.Rev16); + public static ulong ReverseBytes32_64(ulong value) => ReverseBytes(value, RevSize.Rev32); + + private enum RevSize + { + Rev16, + Rev32, + Rev64 + } + + private static ulong ReverseBytes(ulong value, RevSize size) + { + value = ((value & 0xff00ff00ff00ff00) >> 8) | ((value & 0x00ff00ff00ff00ff) << 8); + + if (size == RevSize.Rev16) + { + return value; + } + + value = ((value & 0xffff0000ffff0000) >> 16) | ((value & 0x0000ffff0000ffff) << 16); + + if (size == RevSize.Rev32) + { + return value; + } + + value = ((value & 0xffffffff00000000) >> 32) | ((value & 0x00000000ffffffff) << 32); + + if (size == RevSize.Rev64) + { + return value; + } + + throw new ArgumentException(nameof(size)); + } +#endregion + } +} diff --git a/ARMeilleure/Instructions/SoftFloat.cs b/ARMeilleure/Instructions/SoftFloat.cs new file mode 100644 index 000000000..7358e6b2c --- /dev/null +++ b/ARMeilleure/Instructions/SoftFloat.cs @@ -0,0 +1,2757 @@ +using ARMeilleure.State; +using System; +using System.Diagnostics; + +namespace ARMeilleure.Instructions +{ + static class SoftFloat + { + static SoftFloat() + { + RecipEstimateTable = BuildRecipEstimateTable(); + RecipSqrtEstimateTable = BuildRecipSqrtEstimateTable(); + } + + internal static readonly byte[] RecipEstimateTable; + internal static readonly byte[] RecipSqrtEstimateTable; + + private static byte[] BuildRecipEstimateTable() + { + byte[] tbl = new byte[256]; + + for (int idx = 0; idx < 256; idx++) + { + uint src = (uint)idx + 256u; + + Debug.Assert(256u <= src && src < 512u); + + src = (src << 1) + 1u; + + uint aux = (1u << 19) / src; + + uint dst = (aux + 1u) >> 1; + + Debug.Assert(256u <= dst && dst < 512u); + + tbl[idx] = (byte)(dst - 256u); + } + + return tbl; + } + + private static byte[] BuildRecipSqrtEstimateTable() + { + byte[] tbl = new byte[384]; + + for (int idx = 0; idx < 384; idx++) + { + uint src = (uint)idx + 128u; + + Debug.Assert(128u <= src && src < 512u); + + if (src < 256u) + { + src = (src << 1) + 1u; + } + else + { + src = (src >> 1) << 1; + src = (src + 1u) << 1; + } + + uint aux = 512u; + + while (src * (aux + 1u) * (aux + 1u) < (1u << 28)) + { + aux = aux + 1u; + } + + uint dst = (aux + 1u) >> 1; + + Debug.Assert(256u <= dst && dst < 512u); + + tbl[idx] = (byte)(dst - 256u); + } + + return tbl; + } + } + + static class SoftFloat16_32 + { + public static float FPConvert(ushort valueBits) + { + ExecutionContext context = NativeInterface.GetContext(); + + double real = valueBits.FPUnpackCv(out FPType type, out bool sign, context); + + float result; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + if ((context.Fpcr & FPCR.Dn) != 0) + { + result = FPDefaultNaN(); + } + else + { + result = FPConvertNaN(valueBits); + } + + if (type == FPType.SNaN) + { + FPProcessException(FPException.InvalidOp, context); + } + } + else if (type == FPType.Infinity) + { + result = FPInfinity(sign); + } + else if (type == FPType.Zero) + { + result = FPZero(sign); + } + else + { + result = FPRoundCv(real, context); + } + + return result; + } + + private static float FPDefaultNaN() + { + return -float.NaN; + } + + private static float FPInfinity(bool sign) + { + return sign ? float.NegativeInfinity : float.PositiveInfinity; + } + + private static float FPZero(bool sign) + { + return sign ? -0f : +0f; + } + + private static float FPMaxNormal(bool sign) + { + return sign ? float.MinValue : float.MaxValue; + } + + private static double FPUnpackCv( + this ushort valueBits, + out FPType type, + out bool sign, + ExecutionContext context) + { + sign = (~(uint)valueBits & 0x8000u) == 0u; + + uint exp16 = ((uint)valueBits & 0x7C00u) >> 10; + uint frac16 = (uint)valueBits & 0x03FFu; + + double real; + + if (exp16 == 0u) + { + if (frac16 == 0u) + { + type = FPType.Zero; + real = 0d; + } + else + { + type = FPType.Nonzero; // Subnormal. + real = Math.Pow(2d, -14) * ((double)frac16 * Math.Pow(2d, -10)); + } + } + else if (exp16 == 0x1Fu && (context.Fpcr & FPCR.Ahp) == 0) + { + if (frac16 == 0u) + { + type = FPType.Infinity; + real = Math.Pow(2d, 1000); + } + else + { + type = (~frac16 & 0x0200u) == 0u ? FPType.QNaN : FPType.SNaN; + real = 0d; + } + } + else + { + type = FPType.Nonzero; // Normal. + real = Math.Pow(2d, (int)exp16 - 15) * (1d + (double)frac16 * Math.Pow(2d, -10)); + } + + return sign ? -real : real; + } + + private static float FPRoundCv(double real, ExecutionContext context) + { + const int minimumExp = -126; + + const int e = 8; + const int f = 23; + + bool sign; + double mantissa; + + if (real < 0d) + { + sign = true; + mantissa = -real; + } + else + { + sign = false; + mantissa = real; + } + + int exponent = 0; + + while (mantissa < 1d) + { + mantissa *= 2d; + exponent--; + } + + while (mantissa >= 2d) + { + mantissa /= 2d; + exponent++; + } + + if ((context.Fpcr & FPCR.Fz) != 0 && exponent < minimumExp) + { + context.Fpsr |= FPSR.Ufc; + + return FPZero(sign); + } + + uint biasedExp = (uint)Math.Max(exponent - minimumExp + 1, 0); + + if (biasedExp == 0u) + { + mantissa /= Math.Pow(2d, minimumExp - exponent); + } + + uint intMant = (uint)Math.Floor(mantissa * Math.Pow(2d, f)); + double error = mantissa * Math.Pow(2d, f) - (double)intMant; + + if (biasedExp == 0u && (error != 0d || (context.Fpcr & FPCR.Ufe) != 0)) + { + FPProcessException(FPException.Underflow, context); + } + + bool overflowToInf; + bool roundUp; + + switch (context.Fpcr.GetRoundingMode()) + { + default: + case FPRoundingMode.ToNearest: + roundUp = (error > 0.5d || (error == 0.5d && (intMant & 1u) == 1u)); + overflowToInf = true; + break; + + case FPRoundingMode.TowardsPlusInfinity: + roundUp = (error != 0d && !sign); + overflowToInf = !sign; + break; + + case FPRoundingMode.TowardsMinusInfinity: + roundUp = (error != 0d && sign); + overflowToInf = sign; + break; + + case FPRoundingMode.TowardsZero: + roundUp = false; + overflowToInf = false; + break; + } + + if (roundUp) + { + intMant++; + + if (intMant == 1u << f) + { + biasedExp = 1u; + } + + if (intMant == 1u << (f + 1)) + { + biasedExp++; + intMant >>= 1; + } + } + + float result; + + if (biasedExp >= (1u << e) - 1u) + { + result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign); + + FPProcessException(FPException.Overflow, context); + + error = 1d; + } + else + { + result = BitConverter.Int32BitsToSingle( + (int)((sign ? 1u : 0u) << 31 | (biasedExp & 0xFFu) << 23 | (intMant & 0x007FFFFFu))); + } + + if (error != 0d) + { + FPProcessException(FPException.Inexact, context); + } + + return result; + } + + private static float FPConvertNaN(ushort valueBits) + { + return BitConverter.Int32BitsToSingle( + (int)(((uint)valueBits & 0x8000u) << 16 | 0x7FC00000u | ((uint)valueBits & 0x01FFu) << 13)); + } + + private static void FPProcessException(FPException exc, ExecutionContext context) + { + int enable = (int)exc + 8; + + if ((context.Fpcr & (FPCR)(1 << enable)) != 0) + { + throw new NotImplementedException("Floating-point trap handling."); + } + else + { + context.Fpsr |= (FPSR)(1 << (int)exc); + } + } + } + + static class SoftFloat32_16 + { + public static ushort FPConvert(float value) + { + ExecutionContext context = NativeInterface.GetContext(); + + double real = value.FPUnpackCv(out FPType type, out bool sign, out uint valueBits, context); + + bool altHp = (context.Fpcr & FPCR.Ahp) != 0; + + ushort resultBits; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + if (altHp) + { + resultBits = FPZero(sign); + } + else if ((context.Fpcr & FPCR.Dn) != 0) + { + resultBits = FPDefaultNaN(); + } + else + { + resultBits = FPConvertNaN(valueBits); + } + + if (type == FPType.SNaN || altHp) + { + FPProcessException(FPException.InvalidOp, context); + } + } + else if (type == FPType.Infinity) + { + if (altHp) + { + resultBits = (ushort)((sign ? 1u : 0u) << 15 | 0x7FFFu); + + FPProcessException(FPException.InvalidOp, context); + } + else + { + resultBits = FPInfinity(sign); + } + } + else if (type == FPType.Zero) + { + resultBits = FPZero(sign); + } + else + { + resultBits = FPRoundCv(real, context); + } + + return resultBits; + } + + private static ushort FPDefaultNaN() + { + return (ushort)0x7E00u; + } + + private static ushort FPInfinity(bool sign) + { + return sign ? (ushort)0xFC00u : (ushort)0x7C00u; + } + + private static ushort FPZero(bool sign) + { + return sign ? (ushort)0x8000u : (ushort)0x0000u; + } + + private static ushort FPMaxNormal(bool sign) + { + return sign ? (ushort)0xFBFFu : (ushort)0x7BFFu; + } + + private static double FPUnpackCv( + this float value, + out FPType type, + out bool sign, + out uint valueBits, + ExecutionContext context) + { + valueBits = (uint)BitConverter.SingleToInt32Bits(value); + + sign = (~valueBits & 0x80000000u) == 0u; + + uint exp32 = (valueBits & 0x7F800000u) >> 23; + uint frac32 = valueBits & 0x007FFFFFu; + + double real; + + if (exp32 == 0u) + { + if (frac32 == 0u || (context.Fpcr & FPCR.Fz) != 0) + { + type = FPType.Zero; + real = 0d; + + if (frac32 != 0u) + { + FPProcessException(FPException.InputDenorm, context); + } + } + else + { + type = FPType.Nonzero; // Subnormal. + real = Math.Pow(2d, -126) * ((double)frac32 * Math.Pow(2d, -23)); + } + } + else if (exp32 == 0xFFu) + { + if (frac32 == 0u) + { + type = FPType.Infinity; + real = Math.Pow(2d, 1000); + } + else + { + type = (~frac32 & 0x00400000u) == 0u ? FPType.QNaN : FPType.SNaN; + real = 0d; + } + } + else + { + type = FPType.Nonzero; // Normal. + real = Math.Pow(2d, (int)exp32 - 127) * (1d + (double)frac32 * Math.Pow(2d, -23)); + } + + return sign ? -real : real; + } + + private static ushort FPRoundCv(double real, ExecutionContext context) + { + const int minimumExp = -14; + + const int e = 5; + const int f = 10; + + bool sign; + double mantissa; + + if (real < 0d) + { + sign = true; + mantissa = -real; + } + else + { + sign = false; + mantissa = real; + } + + int exponent = 0; + + while (mantissa < 1d) + { + mantissa *= 2d; + exponent--; + } + + while (mantissa >= 2d) + { + mantissa /= 2d; + exponent++; + } + + uint biasedExp = (uint)Math.Max(exponent - minimumExp + 1, 0); + + if (biasedExp == 0u) + { + mantissa /= Math.Pow(2d, minimumExp - exponent); + } + + uint intMant = (uint)Math.Floor(mantissa * Math.Pow(2d, f)); + double error = mantissa * Math.Pow(2d, f) - (double)intMant; + + if (biasedExp == 0u && (error != 0d || (context.Fpcr & FPCR.Ufe) != 0)) + { + FPProcessException(FPException.Underflow, context); + } + + bool overflowToInf; + bool roundUp; + + switch (context.Fpcr.GetRoundingMode()) + { + default: + case FPRoundingMode.ToNearest: + roundUp = (error > 0.5d || (error == 0.5d && (intMant & 1u) == 1u)); + overflowToInf = true; + break; + + case FPRoundingMode.TowardsPlusInfinity: + roundUp = (error != 0d && !sign); + overflowToInf = !sign; + break; + + case FPRoundingMode.TowardsMinusInfinity: + roundUp = (error != 0d && sign); + overflowToInf = sign; + break; + + case FPRoundingMode.TowardsZero: + roundUp = false; + overflowToInf = false; + break; + } + + if (roundUp) + { + intMant++; + + if (intMant == 1u << f) + { + biasedExp = 1u; + } + + if (intMant == 1u << (f + 1)) + { + biasedExp++; + intMant >>= 1; + } + } + + ushort resultBits; + + if ((context.Fpcr & FPCR.Ahp) == 0) + { + if (biasedExp >= (1u << e) - 1u) + { + resultBits = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign); + + FPProcessException(FPException.Overflow, context); + + error = 1d; + } + else + { + resultBits = (ushort)((sign ? 1u : 0u) << 15 | (biasedExp & 0x1Fu) << 10 | (intMant & 0x03FFu)); + } + } + else + { + if (biasedExp >= 1u << e) + { + resultBits = (ushort)((sign ? 1u : 0u) << 15 | 0x7FFFu); + + FPProcessException(FPException.InvalidOp, context); + + error = 0d; + } + else + { + resultBits = (ushort)((sign ? 1u : 0u) << 15 | (biasedExp & 0x1Fu) << 10 | (intMant & 0x03FFu)); + } + } + + if (error != 0d) + { + FPProcessException(FPException.Inexact, context); + } + + return resultBits; + } + + private static ushort FPConvertNaN(uint valueBits) + { + return (ushort)((valueBits & 0x80000000u) >> 16 | 0x7E00u | (valueBits & 0x003FE000u) >> 13); + } + + private static void FPProcessException(FPException exc, ExecutionContext context) + { + int enable = (int)exc + 8; + + if ((context.Fpcr & (FPCR)(1 << enable)) != 0) + { + throw new NotImplementedException("Floating-point trap handling."); + } + else + { + context.Fpsr |= (FPSR)(1 << (int)exc); + } + } + } + + static class SoftFloat32 + { + public static float FPAdd(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if (inf1 && inf2 && sign1 == !sign2) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else if ((inf1 && !sign1) || (inf2 && !sign2)) + { + result = FPInfinity(false); + } + else if ((inf1 && sign1) || (inf2 && sign2)) + { + result = FPInfinity(true); + } + else if (zero1 && zero2 && sign1 == sign2) + { + result = FPZero(sign1); + } + else + { + result = value1 + value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + + return result; + } + + public static int FPCompare(float value1, float value2, bool signalNaNs) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out _, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out _, context); + + int result; + + if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN) + { + result = 0b0011; + + if (type1 == FPType.SNaN || type2 == FPType.SNaN || signalNaNs) + { + FPProcessException(FPException.InvalidOp, context); + } + } + else + { + if (value1 == value2) + { + result = 0b0110; + } + else if (value1 < value2) + { + result = 0b1000; + } + else + { + result = 0b0010; + } + } + + return result; + } + + public static float FPCompareEQ(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out _, out _, context); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context); + + float result; + + if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN) + { + result = ZerosOrOnes(false); + + if (type1 == FPType.SNaN || type2 == FPType.SNaN) + { + FPProcessException(FPException.InvalidOp, context); + } + } + else + { + result = ZerosOrOnes(value1 == value2); + } + + return result; + } + + public static float FPCompareGE(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out _, out _, context); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context); + + float result; + + if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN) + { + result = ZerosOrOnes(false); + + FPProcessException(FPException.InvalidOp, context); + } + else + { + result = ZerosOrOnes(value1 >= value2); + } + + return result; + } + + public static float FPCompareGT(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out _, out _, context); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context); + + float result; + + if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN) + { + result = ZerosOrOnes(false); + + FPProcessException(FPException.InvalidOp, context); + } + else + { + result = ZerosOrOnes(value1 > value2); + } + + return result; + } + + public static float FPCompareLE(float value1, float value2) + { + return FPCompareGE(value2, value1); + } + + public static float FPCompareLT(float value1, float value2) + { + return FPCompareGT(value2, value1); + } + + public static float FPDiv(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if ((inf1 && inf2) || (zero1 && zero2)) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else if (inf1 || zero2) + { + result = FPInfinity(sign1 ^ sign2); + + if (!inf1) + { + FPProcessException(FPException.DivideByZero, context); + } + } + else if (zero1 || inf2) + { + result = FPZero(sign1 ^ sign2); + } + else + { + result = value1 / value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + + return result; + } + + public static float FPMax(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + if (value1 > value2) + { + if (type1 == FPType.Infinity) + { + result = FPInfinity(sign1); + } + else if (type1 == FPType.Zero) + { + result = FPZero(sign1 && sign2); + } + else + { + result = value1; + } + } + else + { + if (type2 == FPType.Infinity) + { + result = FPInfinity(sign2); + } + else if (type2 == FPType.Zero) + { + result = FPZero(sign1 && sign2); + } + else + { + result = value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + } + + return result; + } + + public static float FPMaxNum(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1.FPUnpack(out FPType type1, out _, out _, context); + value2.FPUnpack(out FPType type2, out _, out _, context); + + if (type1 == FPType.QNaN && type2 != FPType.QNaN) + { + value1 = FPInfinity(true); + } + else if (type1 != FPType.QNaN && type2 == FPType.QNaN) + { + value2 = FPInfinity(true); + } + + return FPMax(value1, value2); + } + + public static float FPMin(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + if (value1 < value2) + { + if (type1 == FPType.Infinity) + { + result = FPInfinity(sign1); + } + else if (type1 == FPType.Zero) + { + result = FPZero(sign1 || sign2); + } + else + { + result = value1; + } + } + else + { + if (type2 == FPType.Infinity) + { + result = FPInfinity(sign2); + } + else if (type2 == FPType.Zero) + { + result = FPZero(sign1 || sign2); + } + else + { + result = value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + } + + return result; + } + + public static float FPMinNum(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1.FPUnpack(out FPType type1, out _, out _, context); + value2.FPUnpack(out FPType type2, out _, out _, context); + + if (type1 == FPType.QNaN && type2 != FPType.QNaN) + { + value1 = FPInfinity(false); + } + else if (type1 != FPType.QNaN && type2 == FPType.QNaN) + { + value2 = FPInfinity(false); + } + + return FPMin(value1, value2); + } + + public static float FPMul(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else if (inf1 || inf2) + { + result = FPInfinity(sign1 ^ sign2); + } + else if (zero1 || zero2) + { + result = FPZero(sign1 ^ sign2); + } + else + { + result = value1 * value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + + return result; + } + + public static float FPMulAdd(float valueA, float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + valueA = valueA.FPUnpack(out FPType typeA, out bool signA, out uint addend, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + float result = FPProcessNaNs3(typeA, type1, type2, addend, op1, op2, out bool done, context); + + if (typeA == FPType.QNaN && ((inf1 && zero2) || (zero1 && inf2))) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + + if (!done) + { + bool infA = typeA == FPType.Infinity; bool zeroA = typeA == FPType.Zero; + + bool signP = sign1 ^ sign2; + bool infP = inf1 || inf2; + bool zeroP = zero1 || zero2; + + if ((inf1 && zero2) || (zero1 && inf2) || (infA && infP && signA != signP)) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else if ((infA && !signA) || (infP && !signP)) + { + result = FPInfinity(false); + } + else if ((infA && signA) || (infP && signP)) + { + result = FPInfinity(true); + } + else if (zeroA && zeroP && signA == signP) + { + result = FPZero(signA); + } + else + { + // TODO: When available, use: T MathF.FusedMultiplyAdd(T, T, T); + // https://github.com/dotnet/corefx/issues/31903 + + result = valueA + (value1 * value2); + + if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + + return result; + } + + public static float FPMulSub(float valueA, float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPNeg(); + + return FPMulAdd(valueA, value1, value2); + } + + public static float FPMulX(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + result = FPTwo(sign1 ^ sign2); + } + else if (inf1 || inf2) + { + result = FPInfinity(sign1 ^ sign2); + } + else if (zero1 || zero2) + { + result = FPZero(sign1 ^ sign2); + } + else + { + result = value1 * value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + + return result; + } + + public static float FPRecipEstimate(float value) + { + ExecutionContext context = NativeInterface.GetContext(); + + value.FPUnpack(out FPType type, out bool sign, out uint op, context); + + float result; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + result = FPProcessNaN(type, op, context); + } + else if (type == FPType.Infinity) + { + result = FPZero(sign); + } + else if (type == FPType.Zero) + { + result = FPInfinity(sign); + + FPProcessException(FPException.DivideByZero, context); + } + else if (MathF.Abs(value) < MathF.Pow(2f, -128)) + { + bool overflowToInf; + + switch (context.Fpcr.GetRoundingMode()) + { + default: + case FPRoundingMode.ToNearest: overflowToInf = true; break; + case FPRoundingMode.TowardsPlusInfinity: overflowToInf = !sign; break; + case FPRoundingMode.TowardsMinusInfinity: overflowToInf = sign; break; + case FPRoundingMode.TowardsZero: overflowToInf = false; break; + } + + result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign); + + FPProcessException(FPException.Overflow, context); + FPProcessException(FPException.Inexact, context); + } + else if ((context.Fpcr & FPCR.Fz) != 0 && (MathF.Abs(value) >= MathF.Pow(2f, 126))) + { + result = FPZero(sign); + + context.Fpsr |= FPSR.Ufc; + } + else + { + ulong fraction = (ulong)(op & 0x007FFFFFu) << 29; + uint exp = (op & 0x7F800000u) >> 23; + + if (exp == 0u) + { + if ((fraction & 0x0008000000000000ul) == 0ul) + { + fraction = (fraction & 0x0003FFFFFFFFFFFFul) << 2; + exp -= 1u; + } + else + { + fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1; + } + } + + uint scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44); + + uint resultExp = 253u - exp; + + uint estimate = (uint)SoftFloat.RecipEstimateTable[scaled - 256u] + 256u; + + fraction = (ulong)(estimate & 0xFFu) << 44; + + if (resultExp == 0u) + { + fraction = ((fraction & 0x000FFFFFFFFFFFFEul) | 0x0010000000000000ul) >> 1; + } + else if (resultExp + 1u == 0u) + { + fraction = ((fraction & 0x000FFFFFFFFFFFFCul) | 0x0010000000000000ul) >> 2; + resultExp = 0u; + } + + result = BitConverter.Int32BitsToSingle( + (int)((sign ? 1u : 0u) << 31 | (resultExp & 0xFFu) << 23 | (uint)(fraction >> 29) & 0x007FFFFFu)); + } + + return result; + } + + public static float FPRecipStepFused(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPNeg(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + result = FPTwo(false); + } + else if (inf1 || inf2) + { + result = FPInfinity(sign1 ^ sign2); + } + else + { + // TODO: When available, use: T MathF.FusedMultiplyAdd(T, T, T); + // https://github.com/dotnet/corefx/issues/31903 + + result = 2f + (value1 * value2); + + if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + + return result; + } + + public static float FPRecpX(float value) + { + ExecutionContext context = NativeInterface.GetContext(); + + value.FPUnpack(out FPType type, out bool sign, out uint op, context); + + float result; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + result = FPProcessNaN(type, op, context); + } + else + { + uint notExp = (~op >> 23) & 0xFFu; + uint maxExp = 0xFEu; + + result = BitConverter.Int32BitsToSingle( + (int)((sign ? 1u : 0u) << 31 | (notExp == 0xFFu ? maxExp : notExp) << 23)); + } + + return result; + } + + public static float FPRSqrtEstimate(float value) + { + ExecutionContext context = NativeInterface.GetContext(); + + value.FPUnpack(out FPType type, out bool sign, out uint op, context); + + float result; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + result = FPProcessNaN(type, op, context); + } + else if (type == FPType.Zero) + { + result = FPInfinity(sign); + + FPProcessException(FPException.DivideByZero, context); + } + else if (sign) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else if (type == FPType.Infinity) + { + result = FPZero(false); + } + else + { + ulong fraction = (ulong)(op & 0x007FFFFFu) << 29; + uint exp = (op & 0x7F800000u) >> 23; + + if (exp == 0u) + { + while ((fraction & 0x0008000000000000ul) == 0ul) + { + fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1; + exp -= 1u; + } + + fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1; + } + + uint scaled; + + if ((exp & 1u) == 0u) + { + scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44); + } + else + { + scaled = (uint)(((fraction & 0x000FE00000000000ul) | 0x0010000000000000ul) >> 45); + } + + uint resultExp = (380u - exp) >> 1; + + uint estimate = (uint)SoftFloat.RecipSqrtEstimateTable[scaled - 128u] + 256u; + + result = BitConverter.Int32BitsToSingle((int)((resultExp & 0xFFu) << 23 | (estimate & 0xFFu) << 15)); + } + + return result; + } + + public static float FPRSqrtStepFused(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPNeg(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + result = FPOnePointFive(false); + } + else if (inf1 || inf2) + { + result = FPInfinity(sign1 ^ sign2); + } + else + { + // TODO: When available, use: T MathF.FusedMultiplyAdd(T, T, T); + // https://github.com/dotnet/corefx/issues/31903 + + result = (3f + (value1 * value2)) / 2f; + + if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + + return result; + } + + public static float FPSqrt(float value) + { + ExecutionContext context = NativeInterface.GetContext(); + + value = value.FPUnpack(out FPType type, out bool sign, out uint op, context); + + float result; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + result = FPProcessNaN(type, op, context); + } + else if (type == FPType.Zero) + { + result = FPZero(sign); + } + else if (type == FPType.Infinity && !sign) + { + result = FPInfinity(sign); + } + else if (sign) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else + { + result = MathF.Sqrt(value); + + if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + + return result; + } + + public static float FPSub(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if (inf1 && inf2 && sign1 == sign2) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else if ((inf1 && !sign1) || (inf2 && sign2)) + { + result = FPInfinity(false); + } + else if ((inf1 && sign1) || (inf2 && !sign2)) + { + result = FPInfinity(true); + } + else if (zero1 && zero2 && sign1 == !sign2) + { + result = FPZero(sign1); + } + else + { + result = value1 - value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + + return result; + } + + private static float FPDefaultNaN() + { + return -float.NaN; + } + + private static float FPInfinity(bool sign) + { + return sign ? float.NegativeInfinity : float.PositiveInfinity; + } + + private static float FPZero(bool sign) + { + return sign ? -0f : +0f; + } + + private static float FPMaxNormal(bool sign) + { + return sign ? float.MinValue : float.MaxValue; + } + + private static float FPTwo(bool sign) + { + return sign ? -2f : +2f; + } + + private static float FPOnePointFive(bool sign) + { + return sign ? -1.5f : +1.5f; + } + + private static float FPNeg(this float value) + { + return -value; + } + + private static float ZerosOrOnes(bool ones) + { + return BitConverter.Int32BitsToSingle(ones ? -1 : 0); + } + + private static float FPUnpack( + this float value, + out FPType type, + out bool sign, + out uint valueBits, + ExecutionContext context) + { + valueBits = (uint)BitConverter.SingleToInt32Bits(value); + + sign = (~valueBits & 0x80000000u) == 0u; + + if ((valueBits & 0x7F800000u) == 0u) + { + if ((valueBits & 0x007FFFFFu) == 0u || (context.Fpcr & FPCR.Fz) != 0) + { + type = FPType.Zero; + value = FPZero(sign); + + if ((valueBits & 0x007FFFFFu) != 0u) + { + FPProcessException(FPException.InputDenorm, context); + } + } + else + { + type = FPType.Nonzero; + } + } + else if ((~valueBits & 0x7F800000u) == 0u) + { + if ((valueBits & 0x007FFFFFu) == 0u) + { + type = FPType.Infinity; + } + else + { + type = (~valueBits & 0x00400000u) == 0u ? FPType.QNaN : FPType.SNaN; + value = FPZero(sign); + } + } + else + { + type = FPType.Nonzero; + } + + return value; + } + + private static float FPProcessNaNs( + FPType type1, + FPType type2, + uint op1, + uint op2, + out bool done, + ExecutionContext context) + { + done = true; + + if (type1 == FPType.SNaN) + { + return FPProcessNaN(type1, op1, context); + } + else if (type2 == FPType.SNaN) + { + return FPProcessNaN(type2, op2, context); + } + else if (type1 == FPType.QNaN) + { + return FPProcessNaN(type1, op1, context); + } + else if (type2 == FPType.QNaN) + { + return FPProcessNaN(type2, op2, context); + } + + done = false; + + return FPZero(false); + } + + private static float FPProcessNaNs3( + FPType type1, + FPType type2, + FPType type3, + uint op1, + uint op2, + uint op3, + out bool done, + ExecutionContext context) + { + done = true; + + if (type1 == FPType.SNaN) + { + return FPProcessNaN(type1, op1, context); + } + else if (type2 == FPType.SNaN) + { + return FPProcessNaN(type2, op2, context); + } + else if (type3 == FPType.SNaN) + { + return FPProcessNaN(type3, op3, context); + } + else if (type1 == FPType.QNaN) + { + return FPProcessNaN(type1, op1, context); + } + else if (type2 == FPType.QNaN) + { + return FPProcessNaN(type2, op2, context); + } + else if (type3 == FPType.QNaN) + { + return FPProcessNaN(type3, op3, context); + } + + done = false; + + return FPZero(false); + } + + private static float FPProcessNaN(FPType type, uint op, ExecutionContext context) + { + if (type == FPType.SNaN) + { + op |= 1u << 22; + + FPProcessException(FPException.InvalidOp, context); + } + + if ((context.Fpcr & FPCR.Dn) != 0) + { + return FPDefaultNaN(); + } + + return BitConverter.Int32BitsToSingle((int)op); + } + + private static void FPProcessException(FPException exc, ExecutionContext context) + { + int enable = (int)exc + 8; + + if ((context.Fpcr & (FPCR)(1 << enable)) != 0) + { + throw new NotImplementedException("Floating-point trap handling."); + } + else + { + context.Fpsr |= (FPSR)(1 << (int)exc); + } + } + } + + static class SoftFloat64 + { + public static double FPAdd(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if (inf1 && inf2 && sign1 == !sign2) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else if ((inf1 && !sign1) || (inf2 && !sign2)) + { + result = FPInfinity(false); + } + else if ((inf1 && sign1) || (inf2 && sign2)) + { + result = FPInfinity(true); + } + else if (zero1 && zero2 && sign1 == sign2) + { + result = FPZero(sign1); + } + else + { + result = value1 + value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + + return result; + } + + public static int FPCompare(double value1, double value2, bool signalNaNs) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out _, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out _, context); + + int result; + + if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN) + { + result = 0b0011; + + if (type1 == FPType.SNaN || type2 == FPType.SNaN || signalNaNs) + { + FPProcessException(FPException.InvalidOp, context); + } + } + else + { + if (value1 == value2) + { + result = 0b0110; + } + else if (value1 < value2) + { + result = 0b1000; + } + else + { + result = 0b0010; + } + } + + return result; + } + + public static double FPCompareEQ(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out _, out _, context); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context); + + double result; + + if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN) + { + result = ZerosOrOnes(false); + + if (type1 == FPType.SNaN || type2 == FPType.SNaN) + { + FPProcessException(FPException.InvalidOp, context); + } + } + else + { + result = ZerosOrOnes(value1 == value2); + } + + return result; + } + + public static double FPCompareGE(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out _, out _, context); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context); + + double result; + + if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN) + { + result = ZerosOrOnes(false); + + FPProcessException(FPException.InvalidOp, context); + } + else + { + result = ZerosOrOnes(value1 >= value2); + } + + return result; + } + + public static double FPCompareGT(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out _, out _, context); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context); + + double result; + + if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN) + { + result = ZerosOrOnes(false); + + FPProcessException(FPException.InvalidOp, context); + } + else + { + result = ZerosOrOnes(value1 > value2); + } + + return result; + } + + public static double FPCompareLE(double value1, double value2) + { + return FPCompareGE(value2, value1); + } + + public static double FPCompareLT(double value1, double value2) + { + return FPCompareGT(value2, value1); + } + + public static double FPDiv(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if ((inf1 && inf2) || (zero1 && zero2)) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else if (inf1 || zero2) + { + result = FPInfinity(sign1 ^ sign2); + + if (!inf1) + { + FPProcessException(FPException.DivideByZero, context); + } + } + else if (zero1 || inf2) + { + result = FPZero(sign1 ^ sign2); + } + else + { + result = value1 / value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + + return result; + } + + public static double FPMax(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + if (value1 > value2) + { + if (type1 == FPType.Infinity) + { + result = FPInfinity(sign1); + } + else if (type1 == FPType.Zero) + { + result = FPZero(sign1 && sign2); + } + else + { + result = value1; + } + } + else + { + if (type2 == FPType.Infinity) + { + result = FPInfinity(sign2); + } + else if (type2 == FPType.Zero) + { + result = FPZero(sign1 && sign2); + } + else + { + result = value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + } + + return result; + } + + public static double FPMaxNum(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1.FPUnpack(out FPType type1, out _, out _, context); + value2.FPUnpack(out FPType type2, out _, out _, context); + + if (type1 == FPType.QNaN && type2 != FPType.QNaN) + { + value1 = FPInfinity(true); + } + else if (type1 != FPType.QNaN && type2 == FPType.QNaN) + { + value2 = FPInfinity(true); + } + + return FPMax(value1, value2); + } + + public static double FPMin(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + if (value1 < value2) + { + if (type1 == FPType.Infinity) + { + result = FPInfinity(sign1); + } + else if (type1 == FPType.Zero) + { + result = FPZero(sign1 || sign2); + } + else + { + result = value1; + } + } + else + { + if (type2 == FPType.Infinity) + { + result = FPInfinity(sign2); + } + else if (type2 == FPType.Zero) + { + result = FPZero(sign1 || sign2); + } + else + { + result = value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + } + + return result; + } + + public static double FPMinNum(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1.FPUnpack(out FPType type1, out _, out _, context); + value2.FPUnpack(out FPType type2, out _, out _, context); + + if (type1 == FPType.QNaN && type2 != FPType.QNaN) + { + value1 = FPInfinity(false); + } + else if (type1 != FPType.QNaN && type2 == FPType.QNaN) + { + value2 = FPInfinity(false); + } + + return FPMin(value1, value2); + } + + public static double FPMul(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else if (inf1 || inf2) + { + result = FPInfinity(sign1 ^ sign2); + } + else if (zero1 || zero2) + { + result = FPZero(sign1 ^ sign2); + } + else + { + result = value1 * value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + + return result; + } + + public static double FPMulAdd(double valueA, double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + valueA = valueA.FPUnpack(out FPType typeA, out bool signA, out ulong addend, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + double result = FPProcessNaNs3(typeA, type1, type2, addend, op1, op2, out bool done, context); + + if (typeA == FPType.QNaN && ((inf1 && zero2) || (zero1 && inf2))) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + + if (!done) + { + bool infA = typeA == FPType.Infinity; bool zeroA = typeA == FPType.Zero; + + bool signP = sign1 ^ sign2; + bool infP = inf1 || inf2; + bool zeroP = zero1 || zero2; + + if ((inf1 && zero2) || (zero1 && inf2) || (infA && infP && signA != signP)) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else if ((infA && !signA) || (infP && !signP)) + { + result = FPInfinity(false); + } + else if ((infA && signA) || (infP && signP)) + { + result = FPInfinity(true); + } + else if (zeroA && zeroP && signA == signP) + { + result = FPZero(signA); + } + else + { + // TODO: When available, use: T Math.FusedMultiplyAdd(T, T, T); + // https://github.com/dotnet/corefx/issues/31903 + + result = valueA + (value1 * value2); + + if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + + return result; + } + + public static double FPMulSub(double valueA, double value1, double value2) + { + value1 = value1.FPNeg(); + + return FPMulAdd(valueA, value1, value2); + } + + public static double FPMulX(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + result = FPTwo(sign1 ^ sign2); + } + else if (inf1 || inf2) + { + result = FPInfinity(sign1 ^ sign2); + } + else if (zero1 || zero2) + { + result = FPZero(sign1 ^ sign2); + } + else + { + result = value1 * value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + + return result; + } + + public static double FPRecipEstimate(double value) + { + ExecutionContext context = NativeInterface.GetContext(); + + value.FPUnpack(out FPType type, out bool sign, out ulong op, context); + + double result; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + result = FPProcessNaN(type, op, context); + } + else if (type == FPType.Infinity) + { + result = FPZero(sign); + } + else if (type == FPType.Zero) + { + result = FPInfinity(sign); + + FPProcessException(FPException.DivideByZero, context); + } + else if (Math.Abs(value) < Math.Pow(2d, -1024)) + { + bool overflowToInf; + + switch (context.Fpcr.GetRoundingMode()) + { + default: + case FPRoundingMode.ToNearest: overflowToInf = true; break; + case FPRoundingMode.TowardsPlusInfinity: overflowToInf = !sign; break; + case FPRoundingMode.TowardsMinusInfinity: overflowToInf = sign; break; + case FPRoundingMode.TowardsZero: overflowToInf = false; break; + } + + result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign); + + FPProcessException(FPException.Overflow, context); + FPProcessException(FPException.Inexact, context); + } + else if ((context.Fpcr & FPCR.Fz) != 0 && (Math.Abs(value) >= Math.Pow(2d, 1022))) + { + result = FPZero(sign); + + context.Fpsr |= FPSR.Ufc; + } + else + { + ulong fraction = op & 0x000FFFFFFFFFFFFFul; + uint exp = (uint)((op & 0x7FF0000000000000ul) >> 52); + + if (exp == 0u) + { + if ((fraction & 0x0008000000000000ul) == 0ul) + { + fraction = (fraction & 0x0003FFFFFFFFFFFFul) << 2; + exp -= 1u; + } + else + { + fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1; + } + } + + uint scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44); + + uint resultExp = 2045u - exp; + + uint estimate = (uint)SoftFloat.RecipEstimateTable[scaled - 256u] + 256u; + + fraction = (ulong)(estimate & 0xFFu) << 44; + + if (resultExp == 0u) + { + fraction = ((fraction & 0x000FFFFFFFFFFFFEul) | 0x0010000000000000ul) >> 1; + } + else if (resultExp + 1u == 0u) + { + fraction = ((fraction & 0x000FFFFFFFFFFFFCul) | 0x0010000000000000ul) >> 2; + resultExp = 0u; + } + + result = BitConverter.Int64BitsToDouble( + (long)((sign ? 1ul : 0ul) << 63 | (resultExp & 0x7FFul) << 52 | (fraction & 0x000FFFFFFFFFFFFFul))); + } + + return result; + } + + public static double FPRecipStepFused(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPNeg(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + result = FPTwo(false); + } + else if (inf1 || inf2) + { + result = FPInfinity(sign1 ^ sign2); + } + else + { + // TODO: When available, use: T Math.FusedMultiplyAdd(T, T, T); + // https://github.com/dotnet/corefx/issues/31903 + + result = 2d + (value1 * value2); + + if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + + return result; + } + + public static double FPRecpX(double value) + { + ExecutionContext context = NativeInterface.GetContext(); + + value.FPUnpack(out FPType type, out bool sign, out ulong op, context); + + double result; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + result = FPProcessNaN(type, op, context); + } + else + { + ulong notExp = (~op >> 52) & 0x7FFul; + ulong maxExp = 0x7FEul; + + result = BitConverter.Int64BitsToDouble( + (long)((sign ? 1ul : 0ul) << 63 | (notExp == 0x7FFul ? maxExp : notExp) << 52)); + } + + return result; + } + + public static double FPRSqrtEstimate(double value) + { + ExecutionContext context = NativeInterface.GetContext(); + + value.FPUnpack(out FPType type, out bool sign, out ulong op, context); + + double result; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + result = FPProcessNaN(type, op, context); + } + else if (type == FPType.Zero) + { + result = FPInfinity(sign); + + FPProcessException(FPException.DivideByZero, context); + } + else if (sign) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else if (type == FPType.Infinity) + { + result = FPZero(false); + } + else + { + ulong fraction = op & 0x000FFFFFFFFFFFFFul; + uint exp = (uint)((op & 0x7FF0000000000000ul) >> 52); + + if (exp == 0u) + { + while ((fraction & 0x0008000000000000ul) == 0ul) + { + fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1; + exp -= 1u; + } + + fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1; + } + + uint scaled; + + if ((exp & 1u) == 0u) + { + scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44); + } + else + { + scaled = (uint)(((fraction & 0x000FE00000000000ul) | 0x0010000000000000ul) >> 45); + } + + uint resultExp = (3068u - exp) >> 1; + + uint estimate = (uint)SoftFloat.RecipSqrtEstimateTable[scaled - 128u] + 256u; + + result = BitConverter.Int64BitsToDouble((long)((resultExp & 0x7FFul) << 52 | (estimate & 0xFFul) << 44)); + } + + return result; + } + + public static double FPRSqrtStepFused(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPNeg(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + result = FPOnePointFive(false); + } + else if (inf1 || inf2) + { + result = FPInfinity(sign1 ^ sign2); + } + else + { + // TODO: When available, use: T Math.FusedMultiplyAdd(T, T, T); + // https://github.com/dotnet/corefx/issues/31903 + + result = (3d + (value1 * value2)) / 2d; + + if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + + return result; + } + + public static double FPSqrt(double value) + { + ExecutionContext context = NativeInterface.GetContext(); + + value = value.FPUnpack(out FPType type, out bool sign, out ulong op, context); + + double result; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + result = FPProcessNaN(type, op, context); + } + else if (type == FPType.Zero) + { + result = FPZero(sign); + } + else if (type == FPType.Infinity && !sign) + { + result = FPInfinity(sign); + } + else if (sign) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else + { + result = Math.Sqrt(value); + + if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + + return result; + } + + public static double FPSub(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if (inf1 && inf2 && sign1 == sign2) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context); + } + else if ((inf1 && !sign1) || (inf2 && sign2)) + { + result = FPInfinity(false); + } + else if ((inf1 && sign1) || (inf2 && !sign2)) + { + result = FPInfinity(true); + } + else if (zero1 && zero2 && sign1 == !sign2) + { + result = FPZero(sign1); + } + else + { + result = value1 - value2; + + if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + + return result; + } + + private static double FPDefaultNaN() + { + return -double.NaN; + } + + private static double FPInfinity(bool sign) + { + return sign ? double.NegativeInfinity : double.PositiveInfinity; + } + + private static double FPZero(bool sign) + { + return sign ? -0d : +0d; + } + + private static double FPMaxNormal(bool sign) + { + return sign ? double.MinValue : double.MaxValue; + } + + private static double FPTwo(bool sign) + { + return sign ? -2d : +2d; + } + + private static double FPOnePointFive(bool sign) + { + return sign ? -1.5d : +1.5d; + } + + private static double FPNeg(this double value) + { + return -value; + } + + private static double ZerosOrOnes(bool ones) + { + return BitConverter.Int64BitsToDouble(ones ? -1L : 0L); + } + + private static double FPUnpack( + this double value, + out FPType type, + out bool sign, + out ulong valueBits, + ExecutionContext context) + { + valueBits = (ulong)BitConverter.DoubleToInt64Bits(value); + + sign = (~valueBits & 0x8000000000000000ul) == 0ul; + + if ((valueBits & 0x7FF0000000000000ul) == 0ul) + { + if ((valueBits & 0x000FFFFFFFFFFFFFul) == 0ul || (context.Fpcr & FPCR.Fz) != 0) + { + type = FPType.Zero; + value = FPZero(sign); + + if ((valueBits & 0x000FFFFFFFFFFFFFul) != 0ul) + { + FPProcessException(FPException.InputDenorm, context); + } + } + else + { + type = FPType.Nonzero; + } + } + else if ((~valueBits & 0x7FF0000000000000ul) == 0ul) + { + if ((valueBits & 0x000FFFFFFFFFFFFFul) == 0ul) + { + type = FPType.Infinity; + } + else + { + type = (~valueBits & 0x0008000000000000ul) == 0ul ? FPType.QNaN : FPType.SNaN; + value = FPZero(sign); + } + } + else + { + type = FPType.Nonzero; + } + + return value; + } + + private static double FPProcessNaNs( + FPType type1, + FPType type2, + ulong op1, + ulong op2, + out bool done, + ExecutionContext context) + { + done = true; + + if (type1 == FPType.SNaN) + { + return FPProcessNaN(type1, op1, context); + } + else if (type2 == FPType.SNaN) + { + return FPProcessNaN(type2, op2, context); + } + else if (type1 == FPType.QNaN) + { + return FPProcessNaN(type1, op1, context); + } + else if (type2 == FPType.QNaN) + { + return FPProcessNaN(type2, op2, context); + } + + done = false; + + return FPZero(false); + } + + private static double FPProcessNaNs3( + FPType type1, + FPType type2, + FPType type3, + ulong op1, + ulong op2, + ulong op3, + out bool done, + ExecutionContext context) + { + done = true; + + if (type1 == FPType.SNaN) + { + return FPProcessNaN(type1, op1, context); + } + else if (type2 == FPType.SNaN) + { + return FPProcessNaN(type2, op2, context); + } + else if (type3 == FPType.SNaN) + { + return FPProcessNaN(type3, op3, context); + } + else if (type1 == FPType.QNaN) + { + return FPProcessNaN(type1, op1, context); + } + else if (type2 == FPType.QNaN) + { + return FPProcessNaN(type2, op2, context); + } + else if (type3 == FPType.QNaN) + { + return FPProcessNaN(type3, op3, context); + } + + done = false; + + return FPZero(false); + } + + private static double FPProcessNaN(FPType type, ulong op, ExecutionContext context) + { + if (type == FPType.SNaN) + { + op |= 1ul << 51; + + FPProcessException(FPException.InvalidOp, context); + } + + if ((context.Fpcr & FPCR.Dn) != 0) + { + return FPDefaultNaN(); + } + + return BitConverter.Int64BitsToDouble((long)op); + } + + private static void FPProcessException(FPException exc, ExecutionContext context) + { + int enable = (int)exc + 8; + + if ((context.Fpcr & (FPCR)(1 << enable)) != 0) + { + throw new NotImplementedException("Floating-point trap handling."); + } + else + { + context.Fpsr |= (FPSR)(1 << (int)exc); + } + } + } +} diff --git a/ARMeilleure/IntermediateRepresentation/BasicBlock.cs b/ARMeilleure/IntermediateRepresentation/BasicBlock.cs new file mode 100644 index 000000000..06839f309 --- /dev/null +++ b/ARMeilleure/IntermediateRepresentation/BasicBlock.cs @@ -0,0 +1,83 @@ +using System.Collections.Generic; + +namespace ARMeilleure.IntermediateRepresentation +{ + class BasicBlock + { + public int Index { get; set; } + + public LinkedListNode Node { get; set; } + + public LinkedList Operations { get; } + + private BasicBlock _next; + private BasicBlock _branch; + + public BasicBlock Next + { + get => _next; + set => _next = AddSuccessor(_next, value); + } + + public BasicBlock Branch + { + get => _branch; + set => _branch = AddSuccessor(_branch, value); + } + + public List Predecessors { get; } + + public HashSet DominanceFrontiers { get; } + + public BasicBlock ImmediateDominator { get; set; } + + public BasicBlock() + { + Operations = new LinkedList(); + + Predecessors = new List(); + + DominanceFrontiers = new HashSet(); + + Index = -1; + } + + public BasicBlock(int index) : this() + { + Index = index; + } + + private BasicBlock AddSuccessor(BasicBlock oldBlock, BasicBlock newBlock) + { + oldBlock?.Predecessors.Remove(this); + newBlock?.Predecessors.Add(this); + + return newBlock; + } + + public void Append(Node node) + { + // If the branch block is not null, then the list of operations + // should end with a branch instruction. We insert the new operation + // before this branch. + if (_branch != null || (Operations.Last != null && IsLeafBlock())) + { + Operations.AddBefore(Operations.Last, node); + } + else + { + Operations.AddLast(node); + } + } + + private bool IsLeafBlock() + { + return _branch == null && _next == null; + } + + public Node GetLastOp() + { + return Operations.Last?.Value; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/IntermediateRepresentation/Instruction.cs b/ARMeilleure/IntermediateRepresentation/Instruction.cs new file mode 100644 index 000000000..4c4ecb8f2 --- /dev/null +++ b/ARMeilleure/IntermediateRepresentation/Instruction.cs @@ -0,0 +1,79 @@ +namespace ARMeilleure.IntermediateRepresentation +{ + enum Instruction + { + Add, + BitwiseAnd, + BitwiseExclusiveOr, + BitwiseNot, + BitwiseOr, + Branch, + BranchIfFalse, + BranchIfTrue, + ByteSwap, + Call, + CompareAndSwap128, + CompareEqual, + CompareGreater, + CompareGreaterOrEqual, + CompareGreaterOrEqualUI, + CompareGreaterUI, + CompareLess, + CompareLessOrEqual, + CompareLessOrEqualUI, + CompareLessUI, + CompareNotEqual, + ConditionalSelect, + ConvertI64ToI32, + ConvertToFP, + ConvertToFPUI, + Copy, + CountLeadingZeros, + Divide, + DivideUI, + Load, + Load16, + Load8, + LoadArgument, + Multiply, + Multiply64HighSI, + Multiply64HighUI, + Negate, + Return, + RotateRight, + ShiftLeft, + ShiftRightSI, + ShiftRightUI, + SignExtend16, + SignExtend32, + SignExtend8, + StackAlloc, + Store, + Store16, + Store8, + Subtract, + VectorCreateScalar, + VectorExtract, + VectorExtract16, + VectorExtract8, + VectorInsert, + VectorInsert16, + VectorInsert8, + VectorOne, + VectorZero, + VectorZeroUpper64, + VectorZeroUpper96, + ZeroExtend16, + ZeroExtend32, + ZeroExtend8, + + Clobber, + CpuId, + Extended, + Fill, + LoadFromContext, + Spill, + SpillArg, + StoreToContext + } +} \ No newline at end of file diff --git a/ARMeilleure/IntermediateRepresentation/Intrinsic.cs b/ARMeilleure/IntermediateRepresentation/Intrinsic.cs new file mode 100644 index 000000000..1fe29e855 --- /dev/null +++ b/ARMeilleure/IntermediateRepresentation/Intrinsic.cs @@ -0,0 +1,138 @@ +namespace ARMeilleure.IntermediateRepresentation +{ + enum Intrinsic + { + X86Addpd, + X86Addps, + X86Addsd, + X86Addss, + X86Andnpd, + X86Andnps, + X86Cmppd, + X86Cmpps, + X86Cmpsd, + X86Cmpss, + X86Comisdeq, + X86Comisdge, + X86Comisdlt, + X86Comisseq, + X86Comissge, + X86Comisslt, + X86Cvtdq2pd, + X86Cvtdq2ps, + X86Cvtpd2dq, + X86Cvtpd2ps, + X86Cvtps2dq, + X86Cvtps2pd, + X86Cvtsd2si, + X86Cvtsd2ss, + X86Cvtss2sd, + X86Divpd, + X86Divps, + X86Divsd, + X86Divss, + X86Haddpd, + X86Haddps, + X86Maxpd, + X86Maxps, + X86Maxsd, + X86Maxss, + X86Minpd, + X86Minps, + X86Minsd, + X86Minss, + X86Movhlps, + X86Movlhps, + X86Mulpd, + X86Mulps, + X86Mulsd, + X86Mulss, + X86Paddb, + X86Paddd, + X86Paddq, + X86Paddw, + X86Pand, + X86Pandn, + X86Pavgb, + X86Pavgw, + X86Pblendvb, + X86Pcmpeqb, + X86Pcmpeqd, + X86Pcmpeqq, + X86Pcmpeqw, + X86Pcmpgtb, + X86Pcmpgtd, + X86Pcmpgtq, + X86Pcmpgtw, + X86Pmaxsb, + X86Pmaxsd, + X86Pmaxsw, + X86Pmaxub, + X86Pmaxud, + X86Pmaxuw, + X86Pminsb, + X86Pminsd, + X86Pminsw, + X86Pminub, + X86Pminud, + X86Pminuw, + X86Pmovsxbw, + X86Pmovsxdq, + X86Pmovsxwd, + X86Pmovzxbw, + X86Pmovzxdq, + X86Pmovzxwd, + X86Pmulld, + X86Pmullw, + X86Popcnt, + X86Por, + X86Pshufb, + X86Pslld, + X86Pslldq, + X86Psllq, + X86Psllw, + X86Psrad, + X86Psraw, + X86Psrld, + X86Psrlq, + X86Psrldq, + X86Psrlw, + X86Psubb, + X86Psubd, + X86Psubq, + X86Psubw, + X86Punpckhbw, + X86Punpckhdq, + X86Punpckhqdq, + X86Punpckhwd, + X86Punpcklbw, + X86Punpckldq, + X86Punpcklqdq, + X86Punpcklwd, + X86Pxor, + X86Rcpps, + X86Rcpss, + X86Roundpd, + X86Roundps, + X86Roundsd, + X86Roundss, + X86Rsqrtps, + X86Rsqrtss, + X86Shufpd, + X86Shufps, + X86Sqrtpd, + X86Sqrtps, + X86Sqrtsd, + X86Sqrtss, + X86Subpd, + X86Subps, + X86Subsd, + X86Subss, + X86Unpckhpd, + X86Unpckhps, + X86Unpcklpd, + X86Unpcklps, + X86Xorpd, + X86Xorps + } +} \ No newline at end of file diff --git a/ARMeilleure/IntermediateRepresentation/IntrinsicOperation.cs b/ARMeilleure/IntermediateRepresentation/IntrinsicOperation.cs new file mode 100644 index 000000000..34781b700 --- /dev/null +++ b/ARMeilleure/IntermediateRepresentation/IntrinsicOperation.cs @@ -0,0 +1,12 @@ +namespace ARMeilleure.IntermediateRepresentation +{ + class IntrinsicOperation : Operation + { + public Intrinsic Intrinsic { get; } + + public IntrinsicOperation(Intrinsic intrin, Operand dest, params Operand[] sources) : base(Instruction.Extended, dest, sources) + { + Intrinsic = intrin; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/IntermediateRepresentation/MemoryOperand.cs b/ARMeilleure/IntermediateRepresentation/MemoryOperand.cs new file mode 100644 index 000000000..742842fa7 --- /dev/null +++ b/ARMeilleure/IntermediateRepresentation/MemoryOperand.cs @@ -0,0 +1,25 @@ +namespace ARMeilleure.IntermediateRepresentation +{ + class MemoryOperand : Operand + { + public Operand BaseAddress { get; set; } + public Operand Index { get; set; } + + public Multiplier Scale { get; } + + public int Displacement { get; } + + public MemoryOperand( + OperandType type, + Operand baseAddress, + Operand index = null, + Multiplier scale = Multiplier.x1, + int displacement = 0) : base(OperandKind.Memory, type) + { + BaseAddress = baseAddress; + Index = index; + Scale = scale; + Displacement = displacement; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/IntermediateRepresentation/Multiplier.cs b/ARMeilleure/IntermediateRepresentation/Multiplier.cs new file mode 100644 index 000000000..23582072b --- /dev/null +++ b/ARMeilleure/IntermediateRepresentation/Multiplier.cs @@ -0,0 +1,10 @@ +namespace ARMeilleure.IntermediateRepresentation +{ + enum Multiplier + { + x1 = 0, + x2 = 1, + x4 = 2, + x8 = 3 + } +} \ No newline at end of file diff --git a/ARMeilleure/IntermediateRepresentation/Node.cs b/ARMeilleure/IntermediateRepresentation/Node.cs new file mode 100644 index 000000000..167acd072 --- /dev/null +++ b/ARMeilleure/IntermediateRepresentation/Node.cs @@ -0,0 +1,163 @@ +using System; +using System.Collections.Generic; + +namespace ARMeilleure.IntermediateRepresentation +{ + class Node + { + public Operand Destination + { + get + { + return _destinations.Length != 0 ? GetDestination(0) : null; + } + set + { + if (value != null) + { + SetDestinations(new Operand[] { value }); + } + else + { + SetDestinations(new Operand[0]); + } + } + } + + private Operand[] _destinations; + private Operand[] _sources; + + private LinkedListNode[] _asgUseNodes; + private LinkedListNode[] _srcUseNodes; + + public int DestinationsCount => _destinations.Length; + public int SourcesCount => _sources.Length; + + public Node(Operand destination, int sourcesCount) + { + Destination = destination; + + _sources = new Operand[sourcesCount]; + + _srcUseNodes = new LinkedListNode[sourcesCount]; + } + + public Node(Operand[] destinations, int sourcesCount) + { + SetDestinations(destinations ?? throw new ArgumentNullException(nameof(destinations))); + + _sources = new Operand[sourcesCount]; + + _srcUseNodes = new LinkedListNode[sourcesCount]; + } + + public Operand GetDestination(int index) + { + return _destinations[index]; + } + + public Operand GetSource(int index) + { + return _sources[index]; + } + + public void SetDestination(int index, Operand destination) + { + Operand oldOp = _destinations[index]; + + if (oldOp != null && oldOp.Kind == OperandKind.LocalVariable) + { + oldOp.Assignments.Remove(_asgUseNodes[index]); + } + + if (destination != null && destination.Kind == OperandKind.LocalVariable) + { + _asgUseNodes[index] = destination.Assignments.AddLast(this); + } + + _destinations[index] = destination; + } + + public void SetSource(int index, Operand source) + { + Operand oldOp = _sources[index]; + + if (oldOp != null && oldOp.Kind == OperandKind.LocalVariable) + { + oldOp.Uses.Remove(_srcUseNodes[index]); + } + + if (source != null && source.Kind == OperandKind.LocalVariable) + { + _srcUseNodes[index] = source.Uses.AddLast(this); + } + + _sources[index] = source; + } + + public void SetDestinations(Operand[] destinations) + { + if (_destinations != null) + { + for (int index = 0; index < _destinations.Length; index++) + { + Operand oldOp = _destinations[index]; + + if (oldOp != null && oldOp.Kind == OperandKind.LocalVariable) + { + oldOp.Assignments.Remove(_asgUseNodes[index]); + } + } + + _destinations = destinations; + } + else + { + _destinations = new Operand[destinations.Length]; + } + + _asgUseNodes = new LinkedListNode[destinations.Length]; + + for (int index = 0; index < destinations.Length; index++) + { + Operand newOp = destinations[index]; + + _destinations[index] = newOp; + + if (newOp.Kind == OperandKind.LocalVariable) + { + _asgUseNodes[index] = newOp.Assignments.AddLast(this); + } + } + } + + public void SetSources(Operand[] sources) + { + for (int index = 0; index < _sources.Length; index++) + { + Operand oldOp = _sources[index]; + + if (oldOp != null && oldOp.Kind == OperandKind.LocalVariable) + { + oldOp.Uses.Remove(_srcUseNodes[index]); + } + } + + _sources = new Operand[sources.Length]; + + _srcUseNodes = new LinkedListNode[sources.Length]; + + for (int index = 0; index < sources.Length; index++) + { + Operand newOp = sources[index]; + + _sources[index] = newOp; + + if (newOp.Kind == OperandKind.LocalVariable) + { + _srcUseNodes[index] = newOp.Uses.AddLast(this); + } + } + } + } +} \ No newline at end of file diff --git a/ARMeilleure/IntermediateRepresentation/Operand.cs b/ARMeilleure/IntermediateRepresentation/Operand.cs new file mode 100644 index 000000000..2df6256fc --- /dev/null +++ b/ARMeilleure/IntermediateRepresentation/Operand.cs @@ -0,0 +1,124 @@ +using System; +using System.Collections.Generic; + +namespace ARMeilleure.IntermediateRepresentation +{ + class Operand + { + public OperandKind Kind { get; } + + public OperandType Type { get; } + + public ulong Value { get; private set; } + + public LinkedList Assignments { get; } + public LinkedList Uses { get; } + + private Operand() + { + Assignments = new LinkedList(); + Uses = new LinkedList(); + } + + public Operand(OperandKind kind, OperandType type = OperandType.None) : this() + { + Kind = kind; + Type = type; + } + + public Operand(int value) : this(OperandKind.Constant, OperandType.I32) + { + Value = (uint)value; + } + + public Operand(uint value) : this(OperandKind.Constant, OperandType.I32) + { + Value = (uint)value; + } + + public Operand(long value) : this(OperandKind.Constant, OperandType.I64) + { + Value = (ulong)value; + } + + public Operand(ulong value) : this(OperandKind.Constant, OperandType.I64) + { + Value = value; + } + + public Operand(float value) : this(OperandKind.Constant, OperandType.FP32) + { + Value = (ulong)BitConverter.SingleToInt32Bits(value); + } + + public Operand(double value) : this(OperandKind.Constant, OperandType.FP64) + { + Value = (ulong)BitConverter.DoubleToInt64Bits(value); + } + + public Operand(int index, RegisterType regType, OperandType type) : this() + { + Kind = OperandKind.Register; + Type = type; + + Value = (ulong)((int)regType << 24 | index); + } + + public Register GetRegister() + { + return new Register((int)Value & 0xffffff, (RegisterType)(Value >> 24)); + } + + public byte AsByte() + { + return (byte)Value; + } + + public short AsInt16() + { + return (short)Value; + } + + public int AsInt32() + { + return (int)Value; + } + + public long AsInt64() + { + return (long)Value; + } + + public float AsFloat() + { + return BitConverter.Int32BitsToSingle((int)Value); + } + + public double AsDouble() + { + return BitConverter.Int64BitsToDouble((long)Value); + } + + internal void NumberLocal(int number) + { + if (Kind != OperandKind.LocalVariable) + { + throw new InvalidOperationException("The operand is not a local variable."); + } + + Value = (ulong)number; + } + + public override int GetHashCode() + { + if (Kind == OperandKind.LocalVariable) + { + return base.GetHashCode(); + } + else + { + return (int)Value ^ ((int)Kind << 16) ^ ((int)Type << 20); + } + } + } +} \ No newline at end of file diff --git a/ARMeilleure/IntermediateRepresentation/OperandHelper.cs b/ARMeilleure/IntermediateRepresentation/OperandHelper.cs new file mode 100644 index 000000000..4a930e03f --- /dev/null +++ b/ARMeilleure/IntermediateRepresentation/OperandHelper.cs @@ -0,0 +1,68 @@ +using ARMeilleure.State; +using System; + +namespace ARMeilleure.IntermediateRepresentation +{ + static class OperandHelper + { + public static Operand Const(OperandType type, long value) + { + return type == OperandType.I32 ? new Operand((int)value) : new Operand(value); + } + + public static Operand Const(bool value) + { + return new Operand(value ? 1 : 0); + } + + public static Operand Const(int value) + { + return new Operand(value); + } + + public static Operand Const(uint value) + { + return new Operand(value); + } + + public static Operand Const(long value) + { + return new Operand(value); + } + + public static Operand Const(ulong value) + { + return new Operand(value); + } + + public static Operand ConstF(float value) + { + return new Operand(value); + } + + public static Operand ConstF(double value) + { + return new Operand(value); + } + + public static Operand Label() + { + return new Operand(OperandKind.Label); + } + + public static Operand Local(OperandType type) + { + return new Operand(OperandKind.LocalVariable, type); + } + + public static Operand Register(int index, RegisterType regType, OperandType type) + { + return new Operand(index, regType, type); + } + + public static Operand Undef() + { + return new Operand(OperandKind.Undefined); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/IntermediateRepresentation/OperandKind.cs b/ARMeilleure/IntermediateRepresentation/OperandKind.cs new file mode 100644 index 000000000..576183534 --- /dev/null +++ b/ARMeilleure/IntermediateRepresentation/OperandKind.cs @@ -0,0 +1,12 @@ +namespace ARMeilleure.IntermediateRepresentation +{ + enum OperandKind + { + Constant, + Label, + LocalVariable, + Memory, + Register, + Undefined + } +} \ No newline at end of file diff --git a/ARMeilleure/IntermediateRepresentation/OperandType.cs b/ARMeilleure/IntermediateRepresentation/OperandType.cs new file mode 100644 index 000000000..bfdf5130c --- /dev/null +++ b/ARMeilleure/IntermediateRepresentation/OperandType.cs @@ -0,0 +1,51 @@ +using System; + +namespace ARMeilleure.IntermediateRepresentation +{ + enum OperandType + { + None, + I32, + I64, + FP32, + FP64, + V128 + } + + static class OperandTypeExtensions + { + public static bool IsInteger(this OperandType type) + { + return type == OperandType.I32 || + type == OperandType.I64; + } + + public static RegisterType ToRegisterType(this OperandType type) + { + switch (type) + { + case OperandType.FP32: return RegisterType.Vector; + case OperandType.FP64: return RegisterType.Vector; + case OperandType.I32: return RegisterType.Integer; + case OperandType.I64: return RegisterType.Integer; + case OperandType.V128: return RegisterType.Vector; + } + + throw new InvalidOperationException($"Invalid operand type \"{type}\"."); + } + + public static int GetSizeInBytes(this OperandType type) + { + switch (type) + { + case OperandType.FP32: return 4; + case OperandType.FP64: return 8; + case OperandType.I32: return 4; + case OperandType.I64: return 8; + case OperandType.V128: return 16; + } + + throw new InvalidOperationException($"Invalid operand type \"{type}\"."); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/IntermediateRepresentation/Operation.cs b/ARMeilleure/IntermediateRepresentation/Operation.cs new file mode 100644 index 000000000..620bf3f6e --- /dev/null +++ b/ARMeilleure/IntermediateRepresentation/Operation.cs @@ -0,0 +1,40 @@ +namespace ARMeilleure.IntermediateRepresentation +{ + class Operation : Node + { + public Instruction Instruction { get; private set; } + + public Operation( + Instruction instruction, + Operand destination, + params Operand[] sources) : base(destination, sources.Length) + { + Instruction = instruction; + + for (int index = 0; index < sources.Length; index++) + { + SetSource(index, sources[index]); + } + } + + public Operation( + Instruction instruction, + Operand[] destinations, + Operand[] sources) : base(destinations, sources.Length) + { + Instruction = instruction; + + for (int index = 0; index < sources.Length; index++) + { + SetSource(index, sources[index]); + } + } + + public void TurnIntoCopy(Operand source) + { + Instruction = Instruction.Copy; + + SetSources(new Operand[] { source }); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/IntermediateRepresentation/PhiNode.cs b/ARMeilleure/IntermediateRepresentation/PhiNode.cs new file mode 100644 index 000000000..30fc4d384 --- /dev/null +++ b/ARMeilleure/IntermediateRepresentation/PhiNode.cs @@ -0,0 +1,22 @@ +namespace ARMeilleure.IntermediateRepresentation +{ + class PhiNode : Node + { + private BasicBlock[] _blocks; + + public PhiNode(Operand destination, int predecessorsCount) : base(destination, predecessorsCount) + { + _blocks = new BasicBlock[predecessorsCount]; + } + + public BasicBlock GetBlock(int index) + { + return _blocks[index]; + } + + public void SetBlock(int index, BasicBlock block) + { + _blocks[index] = block; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/IntermediateRepresentation/Register.cs b/ARMeilleure/IntermediateRepresentation/Register.cs new file mode 100644 index 000000000..745b31538 --- /dev/null +++ b/ARMeilleure/IntermediateRepresentation/Register.cs @@ -0,0 +1,43 @@ +using System; + +namespace ARMeilleure.IntermediateRepresentation +{ + struct Register : IEquatable + { + public int Index { get; } + + public RegisterType Type { get; } + + public Register(int index, RegisterType type) + { + Index = index; + Type = type; + } + + public override int GetHashCode() + { + return (ushort)Index | ((int)Type << 16); + } + + public static bool operator ==(Register x, Register y) + { + return x.Equals(y); + } + + public static bool operator !=(Register x, Register y) + { + return !x.Equals(y); + } + + public override bool Equals(object obj) + { + return obj is Register reg && Equals(reg); + } + + public bool Equals(Register other) + { + return other.Index == Index && + other.Type == Type; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/IntermediateRepresentation/RegisterType.cs b/ARMeilleure/IntermediateRepresentation/RegisterType.cs new file mode 100644 index 000000000..e71795cb9 --- /dev/null +++ b/ARMeilleure/IntermediateRepresentation/RegisterType.cs @@ -0,0 +1,9 @@ +namespace ARMeilleure.IntermediateRepresentation +{ + enum RegisterType + { + Integer, + Vector, + Flag + } +} \ No newline at end of file diff --git a/ARMeilleure/Memory/IMemory.cs b/ARMeilleure/Memory/IMemory.cs new file mode 100644 index 000000000..0c3849c07 --- /dev/null +++ b/ARMeilleure/Memory/IMemory.cs @@ -0,0 +1,37 @@ +namespace ARMeilleure.Memory +{ + public interface IMemory + { + sbyte ReadSByte(long position); + + short ReadInt16(long position); + + int ReadInt32(long position); + + long ReadInt64(long position); + + byte ReadByte(long position); + + ushort ReadUInt16(long position); + + uint ReadUInt32(long position); + + ulong ReadUInt64(long position); + + void WriteSByte(long position, sbyte value); + + void WriteInt16(long position, short value); + + void WriteInt32(long position, int value); + + void WriteInt64(long position, long value); + + void WriteByte(long position, byte value); + + void WriteUInt16(long position, ushort value); + + void WriteUInt32(long position, uint value); + + void WriteUInt64(long position, ulong value); + } +} \ No newline at end of file diff --git a/ARMeilleure/Memory/IMemoryManager.cs b/ARMeilleure/Memory/IMemoryManager.cs new file mode 100644 index 000000000..bcee5db23 --- /dev/null +++ b/ARMeilleure/Memory/IMemoryManager.cs @@ -0,0 +1,40 @@ +using ARMeilleure.State; +using System; + +namespace ARMeilleure.Memory +{ + public interface IMemoryManager : IMemory, IDisposable + { + void Map(long va, long pa, long size); + + void Unmap(long position, long size); + + bool IsMapped(long position); + + long GetPhysicalAddress(long virtualAddress); + + bool IsRegionModified(long position, long size); + + bool TryGetHostAddress(long position, long size, out IntPtr ptr); + + bool IsValidPosition(long position); + + bool AtomicCompareExchangeInt32(long position, int expected, int desired); + + int AtomicIncrementInt32(long position); + + int AtomicDecrementInt32(long position); + + byte[] ReadBytes(long position, long size); + + void ReadBytes(long position, byte[] data, int startIndex, int size); + + void WriteVector128(long position, V128 value); + + void WriteBytes(long position, byte[] data); + + void WriteBytes(long position, byte[] data, int startIndex, int size); + + void CopyBytes(long src, long dst, long size); + } +} \ No newline at end of file diff --git a/ARMeilleure/Memory/MemoryHelper.cs b/ARMeilleure/Memory/MemoryHelper.cs new file mode 100644 index 000000000..71ddac238 --- /dev/null +++ b/ARMeilleure/Memory/MemoryHelper.cs @@ -0,0 +1,71 @@ +using System; +using System.IO; +using System.Runtime.InteropServices; +using System.Text; + +namespace ARMeilleure.Memory +{ + public static class MemoryHelper + { + public static void FillWithZeros(IMemoryManager memory, long position, int size) + { + int size8 = size & ~(8 - 1); + + for (int offs = 0; offs < size8; offs += 8) + { + memory.WriteInt64(position + offs, 0); + } + + for (int offs = size8; offs < (size - size8); offs++) + { + memory.WriteByte(position + offs, 0); + } + } + + public unsafe static T Read(IMemoryManager memory, long position) where T : struct + { + long size = Marshal.SizeOf(); + + byte[] data = memory.ReadBytes(position, size); + + fixed (byte* ptr = data) + { + return Marshal.PtrToStructure((IntPtr)ptr); + } + } + + public unsafe static void Write(IMemoryManager memory, long position, T value) where T : struct + { + long size = Marshal.SizeOf(); + + byte[] data = new byte[size]; + + fixed (byte* ptr = data) + { + Marshal.StructureToPtr(value, (IntPtr)ptr, false); + } + + memory.WriteBytes(position, data); + } + + public static string ReadAsciiString(IMemoryManager memory, long position, long maxSize = -1) + { + using (MemoryStream ms = new MemoryStream()) + { + for (long offs = 0; offs < maxSize || maxSize == -1; offs++) + { + byte value = (byte)memory.ReadByte(position + offs); + + if (value == 0) + { + break; + } + + ms.WriteByte(value); + } + + return Encoding.ASCII.GetString(ms.ToArray()); + } + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Memory/MemoryManagement.cs b/ARMeilleure/Memory/MemoryManagement.cs new file mode 100644 index 000000000..bf0bd02ce --- /dev/null +++ b/ARMeilleure/Memory/MemoryManagement.cs @@ -0,0 +1,114 @@ +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace ARMeilleure.Memory +{ + public static class MemoryManagement + { + public static bool HasWriteWatchSupport => RuntimeInformation.IsOSPlatform(OSPlatform.Windows); + + public static IntPtr Allocate(ulong size) + { + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) + { + IntPtr sizeNint = new IntPtr((long)size); + + return MemoryManagementWindows.Allocate(sizeNint); + } + else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) || + RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) + { + return MemoryManagementUnix.Allocate(size); + } + else + { + throw new PlatformNotSupportedException(); + } + } + + public static IntPtr AllocateWriteTracked(ulong size) + { + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) + { + IntPtr sizeNint = new IntPtr((long)size); + + return MemoryManagementWindows.AllocateWriteTracked(sizeNint); + } + else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) || + RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) + { + return MemoryManagementUnix.Allocate(size); + } + else + { + throw new PlatformNotSupportedException(); + } + } + + public static void Reprotect(IntPtr address, ulong size, MemoryProtection permission) + { + bool result; + + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) + { + IntPtr sizeNint = new IntPtr((long)size); + + result = MemoryManagementWindows.Reprotect(address, sizeNint, permission); + } + else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) || + RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) + { + result = MemoryManagementUnix.Reprotect(address, size, permission); + } + else + { + throw new PlatformNotSupportedException(); + } + + if (!result) + { + throw new MemoryProtectionException(permission); + } + } + + public static bool Free(IntPtr address) + { + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) + { + return MemoryManagementWindows.Free(address); + } + else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) || + RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) + { + return MemoryManagementUnix.Free(address); + } + else + { + throw new PlatformNotSupportedException(); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static bool GetModifiedPages( + IntPtr address, + IntPtr size, + IntPtr[] addresses, + out ulong count) + { + // This is only supported on windows, but returning + // false (failed) is also valid for platforms without + // write tracking support on the OS. + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) + { + return MemoryManagementWindows.GetModifiedPages(address, size, addresses, out count); + } + else + { + count = 0; + + return false; + } + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Memory/MemoryManagementUnix.cs b/ARMeilleure/Memory/MemoryManagementUnix.cs new file mode 100644 index 000000000..3331fb428 --- /dev/null +++ b/ARMeilleure/Memory/MemoryManagementUnix.cs @@ -0,0 +1,71 @@ +using Mono.Unix.Native; +using System; + +namespace ARMeilleure.Memory +{ + static class MemoryManagementUnix + { + public static IntPtr Allocate(ulong size) + { + ulong pageSize = (ulong)Syscall.sysconf(SysconfName._SC_PAGESIZE); + + const MmapProts prot = MmapProts.PROT_READ | MmapProts.PROT_WRITE; + + const MmapFlags flags = MmapFlags.MAP_PRIVATE | MmapFlags.MAP_ANONYMOUS; + + IntPtr ptr = Syscall.mmap(IntPtr.Zero, size + pageSize, prot, flags, -1, 0); + + if (ptr == IntPtr.Zero) + { + throw new OutOfMemoryException(); + } + + unsafe + { + ptr = new IntPtr(ptr.ToInt64() + (long)pageSize); + + *((ulong*)ptr - 1) = size; + } + + return ptr; + } + + public static bool Reprotect(IntPtr address, ulong size, Memory.MemoryProtection protection) + { + MmapProts prot = GetProtection(protection); + + return Syscall.mprotect(address, size, prot) == 0; + } + + private static MmapProts GetProtection(Memory.MemoryProtection protection) + { + switch (protection) + { + case Memory.MemoryProtection.None: return MmapProts.PROT_NONE; + case Memory.MemoryProtection.Read: return MmapProts.PROT_READ; + case Memory.MemoryProtection.ReadAndWrite: return MmapProts.PROT_READ | MmapProts.PROT_WRITE; + case Memory.MemoryProtection.ReadAndExecute: return MmapProts.PROT_READ | MmapProts.PROT_EXEC; + case Memory.MemoryProtection.ReadWriteExecute: return MmapProts.PROT_READ | MmapProts.PROT_WRITE | MmapProts.PROT_EXEC; + case Memory.MemoryProtection.Execute: return MmapProts.PROT_EXEC; + + default: throw new ArgumentException($"Invalid permission \"{protection}\"."); + } + } + + public static bool Free(IntPtr address) + { + ulong pageSize = (ulong)Syscall.sysconf(SysconfName._SC_PAGESIZE); + + ulong size; + + unsafe + { + size = *((ulong*)address - 1); + + address = new IntPtr(address.ToInt64() - (long)pageSize); + } + + return Syscall.munmap(address, size + pageSize) == 0; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Memory/MemoryManagementWindows.cs b/ARMeilleure/Memory/MemoryManagementWindows.cs new file mode 100644 index 000000000..c1a84c95b --- /dev/null +++ b/ARMeilleure/Memory/MemoryManagementWindows.cs @@ -0,0 +1,156 @@ +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace ARMeilleure.Memory +{ + static class MemoryManagementWindows + { + [Flags] + private enum AllocationType : uint + { + Commit = 0x1000, + Reserve = 0x2000, + Decommit = 0x4000, + Release = 0x8000, + Reset = 0x80000, + Physical = 0x400000, + TopDown = 0x100000, + WriteWatch = 0x200000, + LargePages = 0x20000000 + } + + [Flags] + private enum MemoryProtection : uint + { + NoAccess = 0x01, + ReadOnly = 0x02, + ReadWrite = 0x04, + WriteCopy = 0x08, + Execute = 0x10, + ExecuteRead = 0x20, + ExecuteReadWrite = 0x40, + ExecuteWriteCopy = 0x80, + GuardModifierflag = 0x100, + NoCacheModifierflag = 0x200, + WriteCombineModifierflag = 0x400 + } + + private enum WriteWatchFlags : uint + { + None = 0, + Reset = 1 + } + + [DllImport("kernel32.dll")] + private static extern IntPtr VirtualAlloc( + IntPtr lpAddress, + IntPtr dwSize, + AllocationType flAllocationType, + MemoryProtection flProtect); + + [DllImport("kernel32.dll")] + private static extern bool VirtualProtect( + IntPtr lpAddress, + IntPtr dwSize, + MemoryProtection flNewProtect, + out MemoryProtection lpflOldProtect); + + [DllImport("kernel32.dll")] + private static extern bool VirtualFree( + IntPtr lpAddress, + IntPtr dwSize, + AllocationType dwFreeType); + + [DllImport("kernel32.dll")] + private static extern int GetWriteWatch( + WriteWatchFlags dwFlags, + IntPtr lpBaseAddress, + IntPtr dwRegionSize, + IntPtr[] lpAddresses, + ref ulong lpdwCount, + out uint lpdwGranularity); + + public static IntPtr Allocate(IntPtr size) + { + const AllocationType flags = + AllocationType.Reserve | + AllocationType.Commit; + + IntPtr ptr = VirtualAlloc(IntPtr.Zero, size, flags, MemoryProtection.ReadWrite); + + if (ptr == IntPtr.Zero) + { + throw new OutOfMemoryException(); + } + + return ptr; + } + + public static IntPtr AllocateWriteTracked(IntPtr size) + { + const AllocationType flags = + AllocationType.Reserve | + AllocationType.Commit | + AllocationType.WriteWatch; + + IntPtr ptr = VirtualAlloc(IntPtr.Zero, size, flags, MemoryProtection.ReadWrite); + + if (ptr == IntPtr.Zero) + { + throw new OutOfMemoryException(); + } + + return ptr; + } + + public static bool Reprotect(IntPtr address, IntPtr size, Memory.MemoryProtection protection) + { + MemoryProtection prot = GetProtection(protection); + + return VirtualProtect(address, size, prot, out _); + } + + private static MemoryProtection GetProtection(Memory.MemoryProtection protection) + { + switch (protection) + { + case Memory.MemoryProtection.None: return MemoryProtection.NoAccess; + case Memory.MemoryProtection.Read: return MemoryProtection.ReadOnly; + case Memory.MemoryProtection.ReadAndWrite: return MemoryProtection.ReadWrite; + case Memory.MemoryProtection.ReadAndExecute: return MemoryProtection.ExecuteRead; + case Memory.MemoryProtection.ReadWriteExecute: return MemoryProtection.ExecuteReadWrite; + case Memory.MemoryProtection.Execute: return MemoryProtection.Execute; + + default: throw new ArgumentException($"Invalid permission \"{protection}\"."); + } + } + + public static bool Free(IntPtr address) + { + return VirtualFree(address, IntPtr.Zero, AllocationType.Release); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static bool GetModifiedPages( + IntPtr address, + IntPtr size, + IntPtr[] addresses, + out ulong count) + { + ulong pagesCount = (ulong)addresses.Length; + + int result = GetWriteWatch( + WriteWatchFlags.Reset, + address, + size, + addresses, + ref pagesCount, + out uint granularity); + + count = pagesCount; + + return result == 0; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Memory/MemoryManager.cs b/ARMeilleure/Memory/MemoryManager.cs new file mode 100644 index 000000000..12c118437 --- /dev/null +++ b/ARMeilleure/Memory/MemoryManager.cs @@ -0,0 +1,835 @@ +using ARMeilleure.State; +using System; +using System.Runtime.InteropServices; +using System.Threading; + +using static ARMeilleure.Memory.MemoryManagement; + +namespace ARMeilleure.Memory +{ + public unsafe class MemoryManager : IMemoryManager + { + public const int PageBits = 12; + public const int PageSize = 1 << PageBits; + public const int PageMask = PageSize - 1; + + private const long PteFlagNotModified = 1; + + internal const long PteFlagsMask = 7; + + public IntPtr Ram { get; private set; } + + private byte* _ramPtr; + + private IntPtr _pageTable; + + internal IntPtr PageTable => _pageTable; + + internal int PtLevelBits { get; } + internal int PtLevelSize { get; } + internal int PtLevelMask { get; } + + public bool HasWriteWatchSupport => MemoryManagement.HasWriteWatchSupport; + + public int AddressSpaceBits { get; } + public long AddressSpaceSize { get; } + + public MemoryManager( + IntPtr ram, + int addressSpaceBits = 48, + bool useFlatPageTable = false) + { + Ram = ram; + + _ramPtr = (byte*)ram; + + AddressSpaceBits = addressSpaceBits; + AddressSpaceSize = 1L << addressSpaceBits; + + // When flat page table is requested, we use a single + // array for the mappings of the entire address space. + // This has better performance, but also high memory usage. + // The multi level page table uses 9 bits per level, so + // the memory usage is lower, but the performance is also + // lower, since each address translation requires multiple reads. + if (useFlatPageTable) + { + PtLevelBits = addressSpaceBits - PageBits; + } + else + { + PtLevelBits = 9; + } + + PtLevelSize = 1 << PtLevelBits; + PtLevelMask = PtLevelSize - 1; + + _pageTable = Allocate((ulong)(PtLevelSize * IntPtr.Size)); + } + + public void Map(long va, long pa, long size) + { + SetPtEntries(va, _ramPtr + pa, size); + } + + public void Unmap(long position, long size) + { + SetPtEntries(position, null, size); + } + + public bool IsMapped(long position) + { + return Translate(position) != IntPtr.Zero; + } + + public long GetPhysicalAddress(long virtualAddress) + { + byte* ptr = (byte*)Translate(virtualAddress); + + return (long)(ptr - _ramPtr); + } + + private IntPtr Translate(long position) + { + if (!IsValidPosition(position)) + { + return IntPtr.Zero; + } + + byte* ptr = GetPtEntry(position); + + ulong ptrUlong = (ulong)ptr; + + if ((ptrUlong & PteFlagsMask) != 0) + { + ptrUlong &= ~(ulong)PteFlagsMask; + + ptr = (byte*)ptrUlong; + } + + return new IntPtr(ptr + (position & PageMask)); + } + + private IntPtr TranslateWrite(long position) + { + if (!IsValidPosition(position)) + { + return IntPtr.Zero; + } + + byte* ptr = GetPtEntry(position); + + ulong ptrUlong = (ulong)ptr; + + if ((ptrUlong & PteFlagsMask) != 0) + { + if ((ptrUlong & PteFlagNotModified) != 0) + { + ClearPtEntryFlag(position, PteFlagNotModified); + } + + ptrUlong &= ~(ulong)PteFlagsMask; + + ptr = (byte*)ptrUlong; + } + + return new IntPtr(ptr + (position & PageMask)); + } + + private byte* GetPtEntry(long position) + { + return *(byte**)GetPtPtr(position); + } + + private void SetPtEntries(long va, byte* ptr, long size) + { + long endPosition = (va + size + PageMask) & ~PageMask; + + while ((ulong)va < (ulong)endPosition) + { + SetPtEntry(va, ptr); + + va += PageSize; + + if (ptr != null) + { + ptr += PageSize; + } + } + } + + private void SetPtEntry(long position, byte* ptr) + { + *(byte**)GetPtPtr(position) = ptr; + } + + private void SetPtEntryFlag(long position, long flag) + { + ModifyPtEntryFlag(position, flag, setFlag: true); + } + + private void ClearPtEntryFlag(long position, long flag) + { + ModifyPtEntryFlag(position, flag, setFlag: false); + } + + private void ModifyPtEntryFlag(long position, long flag, bool setFlag) + { + IntPtr* pt = (IntPtr*)_pageTable; + + while (true) + { + IntPtr* ptPtr = GetPtPtr(position); + + IntPtr old = *ptPtr; + + long modified = old.ToInt64(); + + if (setFlag) + { + modified |= flag; + } + else + { + modified &= ~flag; + } + + IntPtr origValue = Interlocked.CompareExchange(ref *ptPtr, new IntPtr(modified), old); + + if (origValue == old) + { + break; + } + } + } + + private IntPtr* GetPtPtr(long position) + { + if (!IsValidPosition(position)) + { + throw new ArgumentOutOfRangeException(nameof(position)); + } + + IntPtr nextPtr = _pageTable; + + IntPtr* ptePtr = null; + + int bit = PageBits; + + while (true) + { + long index = (position >> bit) & PtLevelMask; + + ptePtr = &((IntPtr*)nextPtr)[index]; + + bit += PtLevelBits; + + if (bit >= AddressSpaceBits) + { + break; + } + + nextPtr = *ptePtr; + + if (nextPtr == IntPtr.Zero) + { + // Entry does not yet exist, allocate a new one. + IntPtr newPtr = Allocate((ulong)(PtLevelSize * IntPtr.Size)); + + // Try to swap the current pointer (should be zero), with the allocated one. + nextPtr = Interlocked.CompareExchange(ref *ptePtr, newPtr, IntPtr.Zero); + + // If the old pointer is not null, then another thread already has set it. + if (nextPtr != IntPtr.Zero) + { + Free(newPtr); + } + else + { + nextPtr = newPtr; + } + } + } + + return ptePtr; + } + + public bool IsRegionModified(long position, long size) + { + if (!HasWriteWatchSupport) + { + return IsRegionModifiedFallback(position, size); + } + + IntPtr address = Translate(position); + + IntPtr baseAddr = address; + IntPtr expectedAddr = address; + + long pendingPages = 0; + + long pages = size / PageSize; + + bool modified = false; + + bool IsAnyPageModified() + { + IntPtr pendingSize = new IntPtr(pendingPages * PageSize); + + IntPtr[] addresses = new IntPtr[pendingPages]; + + bool result = GetModifiedPages(baseAddr, pendingSize, addresses, out ulong count); + + if (result) + { + return count != 0; + } + else + { + return true; + } + } + + while (pages-- > 0) + { + if (address != expectedAddr) + { + modified |= IsAnyPageModified(); + + baseAddr = address; + + pendingPages = 0; + } + + expectedAddr = address + PageSize; + + pendingPages++; + + if (pages == 0) + { + break; + } + + position += PageSize; + + address = Translate(position); + } + + if (pendingPages != 0) + { + modified |= IsAnyPageModified(); + } + + return modified; + } + + private unsafe bool IsRegionModifiedFallback(long position, long size) + { + long endAddr = (position + size + PageMask) & ~PageMask; + + bool modified = false; + + while ((ulong)position < (ulong)endAddr) + { + if (IsValidPosition(position)) + { + byte* ptr = ((byte**)_pageTable)[position >> PageBits]; + + ulong ptrUlong = (ulong)ptr; + + if ((ptrUlong & PteFlagNotModified) == 0) + { + modified = true; + + SetPtEntryFlag(position, PteFlagNotModified); + } + } + else + { + modified = true; + } + + position += PageSize; + } + + return modified; + } + + public bool TryGetHostAddress(long position, long size, out IntPtr ptr) + { + if (IsContiguous(position, size)) + { + ptr = (IntPtr)Translate(position); + + return true; + } + + ptr = IntPtr.Zero; + + return false; + } + + private bool IsContiguous(long position, long size) + { + long endPos = position + size; + + position &= ~PageMask; + + long expectedPa = GetPhysicalAddress(position); + + while ((ulong)position < (ulong)endPos) + { + long pa = GetPhysicalAddress(position); + + if (pa != expectedPa) + { + return false; + } + + position += PageSize; + expectedPa += PageSize; + } + + return true; + } + + public bool IsValidPosition(long position) + { + return (ulong)position < (ulong)AddressSpaceSize; + } + + internal V128 AtomicLoadInt128(long position) + { + if ((position & 0xf) != 0) + { + AbortWithAlignmentFault(position); + } + + IntPtr ptr = TranslateWrite(position); + + return MemoryManagerPal.AtomicLoad128(ptr); + } + + internal bool AtomicCompareExchangeByte(long position, byte expected, byte desired) + { + int* ptr = (int*)Translate(position); + + int currentValue = *ptr; + + int expected32 = (currentValue & ~byte.MaxValue) | expected; + int desired32 = (currentValue & ~byte.MaxValue) | desired; + + return Interlocked.CompareExchange(ref *ptr, desired32, expected32) == expected32; + } + + internal bool AtomicCompareExchangeInt16(long position, short expected, short desired) + { + if ((position & 1) != 0) + { + AbortWithAlignmentFault(position); + } + + int* ptr = (int*)Translate(position); + + int currentValue = *ptr; + + int expected32 = (currentValue & ~ushort.MaxValue) | (ushort)expected; + int desired32 = (currentValue & ~ushort.MaxValue) | (ushort)desired; + + return Interlocked.CompareExchange(ref *ptr, desired32, expected32) == expected32; + } + + public bool AtomicCompareExchangeInt32(long position, int expected, int desired) + { + if ((position & 3) != 0) + { + AbortWithAlignmentFault(position); + } + + int* ptr = (int*)TranslateWrite(position); + + return Interlocked.CompareExchange(ref *ptr, desired, expected) == expected; + } + + internal bool AtomicCompareExchangeInt64(long position, long expected, long desired) + { + if ((position & 7) != 0) + { + AbortWithAlignmentFault(position); + } + + long* ptr = (long*)TranslateWrite(position); + + return Interlocked.CompareExchange(ref *ptr, desired, expected) == expected; + } + + internal bool AtomicCompareExchangeInt128(long position, V128 expected, V128 desired) + { + if ((position & 0xf) != 0) + { + AbortWithAlignmentFault(position); + } + + IntPtr ptr = TranslateWrite(position); + + return MemoryManagerPal.CompareAndSwap128(ptr, expected, desired) == expected; + } + + public int AtomicIncrementInt32(long position) + { + if ((position & 3) != 0) + { + AbortWithAlignmentFault(position); + } + + int* ptr = (int*)TranslateWrite(position); + + return Interlocked.Increment(ref *ptr); + } + + public int AtomicDecrementInt32(long position) + { + if ((position & 3) != 0) + { + AbortWithAlignmentFault(position); + } + + int* ptr = (int*)TranslateWrite(position); + + return Interlocked.Decrement(ref *ptr); + } + + private void AbortWithAlignmentFault(long position) + { + // TODO: Abort mode and exception support on the CPU. + throw new InvalidOperationException($"Tried to compare exchange a misaligned address 0x{position:X16}."); + } + + public sbyte ReadSByte(long position) + { + return (sbyte)ReadByte(position); + } + + public short ReadInt16(long position) + { + return (short)ReadUInt16(position); + } + + public int ReadInt32(long position) + { + return (int)ReadUInt32(position); + } + + public long ReadInt64(long position) + { + return (long)ReadUInt64(position); + } + + public byte ReadByte(long position) + { + return *((byte*)Translate(position)); + } + + public ushort ReadUInt16(long position) + { + if ((position & 1) == 0) + { + return *((ushort*)Translate(position)); + } + else + { + return (ushort)(ReadByte(position + 0) << 0 | + ReadByte(position + 1) << 8); + } + } + + public uint ReadUInt32(long position) + { + if ((position & 3) == 0) + { + return *((uint*)Translate(position)); + } + else + { + return (uint)(ReadUInt16(position + 0) << 0 | + ReadUInt16(position + 2) << 16); + } + } + + public ulong ReadUInt64(long position) + { + if ((position & 7) == 0) + { + return *((ulong*)Translate(position)); + } + else + { + return (ulong)ReadUInt32(position + 0) << 0 | + (ulong)ReadUInt32(position + 4) << 32; + } + } + + public V128 ReadVector128(long position) + { + return new V128(ReadUInt64(position), ReadUInt64(position + 8)); + } + + public byte[] ReadBytes(long position, long size) + { + long endAddr = position + size; + + if ((ulong)size > int.MaxValue) + { + throw new ArgumentOutOfRangeException(nameof(size)); + } + + if ((ulong)endAddr < (ulong)position) + { + throw new ArgumentOutOfRangeException(nameof(position)); + } + + byte[] data = new byte[size]; + + int offset = 0; + + while ((ulong)position < (ulong)endAddr) + { + long pageLimit = (position + PageSize) & ~(long)PageMask; + + if ((ulong)pageLimit > (ulong)endAddr) + { + pageLimit = endAddr; + } + + int copySize = (int)(pageLimit - position); + + Marshal.Copy(Translate(position), data, offset, copySize); + + position += copySize; + offset += copySize; + } + + return data; + } + + public void ReadBytes(long position, byte[] data, int startIndex, int size) + { + // Note: This will be moved later. + long endAddr = position + size; + + if ((ulong)size > int.MaxValue) + { + throw new ArgumentOutOfRangeException(nameof(size)); + } + + if ((ulong)endAddr < (ulong)position) + { + throw new ArgumentOutOfRangeException(nameof(position)); + } + + int offset = startIndex; + + while ((ulong)position < (ulong)endAddr) + { + long pageLimit = (position + PageSize) & ~(long)PageMask; + + if ((ulong)pageLimit > (ulong)endAddr) + { + pageLimit = endAddr; + } + + int copySize = (int)(pageLimit - position); + + Marshal.Copy(Translate(position), data, offset, copySize); + + position += copySize; + offset += copySize; + } + } + + public void WriteSByte(long position, sbyte value) + { + WriteByte(position, (byte)value); + } + + public void WriteInt16(long position, short value) + { + WriteUInt16(position, (ushort)value); + } + + public void WriteInt32(long position, int value) + { + WriteUInt32(position, (uint)value); + } + + public void WriteInt64(long position, long value) + { + WriteUInt64(position, (ulong)value); + } + + public void WriteByte(long position, byte value) + { + *((byte*)TranslateWrite(position)) = value; + } + + public void WriteUInt16(long position, ushort value) + { + if ((position & 1) == 0) + { + *((ushort*)TranslateWrite(position)) = value; + } + else + { + WriteByte(position + 0, (byte)(value >> 0)); + WriteByte(position + 1, (byte)(value >> 8)); + } + } + + public void WriteUInt32(long position, uint value) + { + if ((position & 3) == 0) + { + *((uint*)TranslateWrite(position)) = value; + } + else + { + WriteUInt16(position + 0, (ushort)(value >> 0)); + WriteUInt16(position + 2, (ushort)(value >> 16)); + } + } + + public void WriteUInt64(long position, ulong value) + { + if ((position & 7) == 0) + { + *((ulong*)TranslateWrite(position)) = value; + } + else + { + WriteUInt32(position + 0, (uint)(value >> 0)); + WriteUInt32(position + 4, (uint)(value >> 32)); + } + } + + public void WriteVector128(long position, V128 value) + { + WriteUInt64(position + 0, value.GetUInt64(0)); + WriteUInt64(position + 8, value.GetUInt64(1)); + } + + public void WriteBytes(long position, byte[] data) + { + long endAddr = position + data.Length; + + if ((ulong)endAddr < (ulong)position) + { + throw new ArgumentOutOfRangeException(nameof(position)); + } + + int offset = 0; + + while ((ulong)position < (ulong)endAddr) + { + long pageLimit = (position + PageSize) & ~(long)PageMask; + + if ((ulong)pageLimit > (ulong)endAddr) + { + pageLimit = endAddr; + } + + int copySize = (int)(pageLimit - position); + + Marshal.Copy(data, offset, TranslateWrite(position), copySize); + + position += copySize; + offset += copySize; + } + } + + public void WriteBytes(long position, byte[] data, int startIndex, int size) + { + // Note: This will be moved later. + long endAddr = position + size; + + if ((ulong)endAddr < (ulong)position) + { + throw new ArgumentOutOfRangeException(nameof(position)); + } + + int offset = startIndex; + + while ((ulong)position < (ulong)endAddr) + { + long pageLimit = (position + PageSize) & ~(long)PageMask; + + if ((ulong)pageLimit > (ulong)endAddr) + { + pageLimit = endAddr; + } + + int copySize = (int)(pageLimit - position); + + Marshal.Copy(data, offset, Translate(position), copySize); + + position += copySize; + offset += copySize; + } + } + + public void CopyBytes(long src, long dst, long size) + { + // Note: This will be moved later. + if (IsContiguous(src, size) && + IsContiguous(dst, size)) + { + byte* srcPtr = (byte*)Translate(src); + byte* dstPtr = (byte*)Translate(dst); + + Buffer.MemoryCopy(srcPtr, dstPtr, size, size); + } + else + { + WriteBytes(dst, ReadBytes(src, size)); + } + } + + public void Dispose() + { + Dispose(true); + } + + protected virtual void Dispose(bool disposing) + { + IntPtr ptr = Interlocked.Exchange(ref _pageTable, IntPtr.Zero); + + if (ptr != IntPtr.Zero) + { + FreePageTableEntry(ptr, PageBits); + } + } + + private void FreePageTableEntry(IntPtr ptr, int levelBitEnd) + { + levelBitEnd += PtLevelBits; + + if (levelBitEnd >= AddressSpaceBits) + { + Free(ptr); + + return; + } + + for (int index = 0; index < PtLevelSize; index++) + { + IntPtr ptePtr = ((IntPtr*)ptr)[index]; + + if (ptePtr != IntPtr.Zero) + { + FreePageTableEntry(ptePtr, levelBitEnd); + } + } + + Free(ptr); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Memory/MemoryManagerPal.cs b/ARMeilleure/Memory/MemoryManagerPal.cs new file mode 100644 index 000000000..64191a0ac --- /dev/null +++ b/ARMeilleure/Memory/MemoryManagerPal.cs @@ -0,0 +1,77 @@ +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; + +namespace ARMeilleure.Memory +{ + static class MemoryManagerPal + { + private delegate V128 CompareExchange128(IntPtr address, V128 expected, V128 desired); + + private static CompareExchange128 _compareExchange128; + + private static object _lock; + + static MemoryManagerPal() + { + _lock = new object(); + } + + public static V128 AtomicLoad128(IntPtr address) + { + return GetCompareAndSwap128()(address, V128.Zero, V128.Zero); + } + + public static V128 CompareAndSwap128(IntPtr address, V128 expected, V128 desired) + { + return GetCompareAndSwap128()(address, expected, desired); + } + + private static CompareExchange128 GetCompareAndSwap128() + { + if (_compareExchange128 == null) + { + GenerateCompareAndSwap128(); + } + + return _compareExchange128; + } + + private static void GenerateCompareAndSwap128() + { + lock (_lock) + { + if (_compareExchange128 != null) + { + return; + } + + EmitterContext context = new EmitterContext(); + + Operand address = context.LoadArgument(OperandType.I64, 0); + Operand expected = context.LoadArgument(OperandType.V128, 1); + Operand desired = context.LoadArgument(OperandType.V128, 2); + + Operand result = context.CompareAndSwap128(address, expected, desired); + + context.Return(result); + + ControlFlowGraph cfg = context.GetControlFlowGraph(); + + OperandType[] argTypes = new OperandType[] + { + OperandType.I64, + OperandType.V128, + OperandType.V128 + }; + + _compareExchange128 = Compiler.Compile( + cfg, + argTypes, + OperandType.V128, + CompilerOptions.HighCq); + } + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Memory/MemoryProtection.cs b/ARMeilleure/Memory/MemoryProtection.cs new file mode 100644 index 000000000..6bc16f8ea --- /dev/null +++ b/ARMeilleure/Memory/MemoryProtection.cs @@ -0,0 +1,17 @@ +using System; + +namespace ARMeilleure.Memory +{ + [Flags] + public enum MemoryProtection + { + None = 0, + Read = 1 << 0, + Write = 1 << 1, + Execute = 1 << 2, + + ReadAndWrite = Read | Write, + ReadAndExecute = Read | Execute, + ReadWriteExecute = Read | Write | Execute + } +} \ No newline at end of file diff --git a/ARMeilleure/Memory/MemoryProtectionException.cs b/ARMeilleure/Memory/MemoryProtectionException.cs new file mode 100644 index 000000000..6313ce6a1 --- /dev/null +++ b/ARMeilleure/Memory/MemoryProtectionException.cs @@ -0,0 +1,9 @@ +using System; + +namespace ARMeilleure.Memory +{ + class MemoryProtectionException : Exception + { + public MemoryProtectionException(MemoryProtection protection) : base($"Failed to set memory protection to \"{protection}\".") { } + } +} \ No newline at end of file diff --git a/ARMeilleure/Optimizations.cs b/ARMeilleure/Optimizations.cs new file mode 100644 index 000000000..0b9885dc9 --- /dev/null +++ b/ARMeilleure/Optimizations.cs @@ -0,0 +1,33 @@ +using ARMeilleure.CodeGen.X86; + +namespace ARMeilleure +{ + public static class Optimizations + { + public static bool AssumeStrictAbiCompliance { get; set; } = true; + + public static bool FastFP { get; set; } = true; + + public static bool UseSseIfAvailable { get; set; } = true; + public static bool UseSse2IfAvailable { get; set; } = true; + public static bool UseSse3IfAvailable { get; set; } = true; + public static bool UseSsse3IfAvailable { get; set; } = true; + public static bool UseSse41IfAvailable { get; set; } = true; + public static bool UseSse42IfAvailable { get; set; } = true; + public static bool UsePopCntIfAvailable { get; set; } = true; + + public static bool ForceLegacySse + { + get => HardwareCapabilities.ForceLegacySse; + set => HardwareCapabilities.ForceLegacySse = value; + } + + internal static bool UseSse => UseSseIfAvailable && HardwareCapabilities.SupportsSse; + internal static bool UseSse2 => UseSse2IfAvailable && HardwareCapabilities.SupportsSse2; + internal static bool UseSse3 => UseSse3IfAvailable && HardwareCapabilities.SupportsSse3; + internal static bool UseSsse3 => UseSsse3IfAvailable && HardwareCapabilities.SupportsSsse3; + internal static bool UseSse41 => UseSse41IfAvailable && HardwareCapabilities.SupportsSse41; + internal static bool UseSse42 => UseSse42IfAvailable && HardwareCapabilities.SupportsSse42; + internal static bool UsePopCnt => UsePopCntIfAvailable && HardwareCapabilities.SupportsPopcnt; + } +} \ No newline at end of file diff --git a/ARMeilleure/State/Aarch32Mode.cs b/ARMeilleure/State/Aarch32Mode.cs new file mode 100644 index 000000000..395e288aa --- /dev/null +++ b/ARMeilleure/State/Aarch32Mode.cs @@ -0,0 +1,15 @@ +namespace ARMeilleure.State +{ + enum Aarch32Mode + { + User = 0b10000, + Fiq = 0b10001, + Irq = 0b10010, + Supervisor = 0b10011, + Monitor = 0b10110, + Abort = 0b10111, + Hypervisor = 0b11010, + Undefined = 0b11011, + System = 0b11111 + } +} \ No newline at end of file diff --git a/ARMeilleure/State/ExecutionContext.cs b/ARMeilleure/State/ExecutionContext.cs new file mode 100644 index 000000000..22cfcb694 --- /dev/null +++ b/ARMeilleure/State/ExecutionContext.cs @@ -0,0 +1,130 @@ +using System; +using System.Diagnostics; + +namespace ARMeilleure.State +{ + public class ExecutionContext : IExecutionContext + { + private const int MinCountForCheck = 40000; + + private NativeContext _nativeContext; + + internal IntPtr NativeContextPtr => _nativeContext.BasePtr; + + private bool _interrupted; + + private static Stopwatch _tickCounter; + + private static double _hostTickFreq; + + public uint CtrEl0 => 0x8444c004; + public uint DczidEl0 => 0x00000004; + + public ulong CntfrqEl0 { get; set; } + public ulong CntpctEl0 + { + get + { + double ticks = _tickCounter.ElapsedTicks * _hostTickFreq; + + return (ulong)(ticks * CntfrqEl0); + } + } + + public long TpidrEl0 { get; set; } + public long Tpidr { get; set; } + + public FPCR Fpcr { get; set; } + public FPSR Fpsr { get; set; } + + public bool IsAarch32 { get; set; } + + internal ExecutionMode ExecutionMode + { + get + { + if (IsAarch32) + { + return GetPstateFlag(PState.TFlag) + ? ExecutionMode.Aarch32Thumb + : ExecutionMode.Aarch32Arm; + } + else + { + return ExecutionMode.Aarch64; + } + } + } + + public bool Running { get; set; } + + public event EventHandler Interrupt; + public event EventHandler Break; + public event EventHandler SupervisorCall; + public event EventHandler Undefined; + + static ExecutionContext() + { + _hostTickFreq = 1.0 / Stopwatch.Frequency; + + _tickCounter = new Stopwatch(); + + _tickCounter.Start(); + } + + public ExecutionContext() + { + _nativeContext = new NativeContext(); + + Running = true; + + _nativeContext.SetCounter(MinCountForCheck); + } + + public ulong GetX(int index) => _nativeContext.GetX(index); + public void SetX(int index, ulong value) => _nativeContext.SetX(index, value); + + public V128 GetV(int index) => _nativeContext.GetV(index); + public void SetV(int index, V128 value) => _nativeContext.SetV(index, value); + + public bool GetPstateFlag(PState flag) => _nativeContext.GetPstateFlag(flag); + public void SetPstateFlag(PState flag, bool value) => _nativeContext.SetPstateFlag(flag, value); + + internal void CheckInterrupt() + { + if (_interrupted) + { + _interrupted = false; + + Interrupt?.Invoke(this, EventArgs.Empty); + } + + _nativeContext.SetCounter(MinCountForCheck); + } + + public void RequestInterrupt() + { + _interrupted = true; + } + + internal void OnBreak(ulong address, int imm) + { + Break?.Invoke(this, new InstExceptionEventArgs(address, imm)); + } + + internal void OnSupervisorCall(ulong address, int imm) + { + SupervisorCall?.Invoke(this, new InstExceptionEventArgs(address, imm)); + } + + internal void OnUndefined(ulong address, int opCode) + { + Undefined?.Invoke(this, new InstUndefinedEventArgs(address, opCode)); + } + + public void Dispose() + { + _nativeContext.Dispose(); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/State/ExecutionMode.cs b/ARMeilleure/State/ExecutionMode.cs new file mode 100644 index 000000000..eaed9d27f --- /dev/null +++ b/ARMeilleure/State/ExecutionMode.cs @@ -0,0 +1,9 @@ +namespace ARMeilleure.State +{ + enum ExecutionMode + { + Aarch32Arm, + Aarch32Thumb, + Aarch64 + } +} \ No newline at end of file diff --git a/ARMeilleure/State/FPCR.cs b/ARMeilleure/State/FPCR.cs new file mode 100644 index 000000000..511681fa9 --- /dev/null +++ b/ARMeilleure/State/FPCR.cs @@ -0,0 +1,23 @@ +using System; + +namespace ARMeilleure.State +{ + [Flags] + public enum FPCR + { + Ufe = 1 << 11, + Fz = 1 << 24, + Dn = 1 << 25, + Ahp = 1 << 26 + } + + public static class FPCRExtensions + { + private const int RModeShift = 22; + + public static FPRoundingMode GetRoundingMode(this FPCR fpcr) + { + return (FPRoundingMode)(((int)fpcr >> RModeShift) & 3); + } + } +} diff --git a/ARMeilleure/State/FPException.cs b/ARMeilleure/State/FPException.cs new file mode 100644 index 000000000..e24e07af1 --- /dev/null +++ b/ARMeilleure/State/FPException.cs @@ -0,0 +1,12 @@ +namespace ARMeilleure.State +{ + enum FPException + { + InvalidOp = 0, + DivideByZero = 1, + Overflow = 2, + Underflow = 3, + Inexact = 4, + InputDenorm = 7 + } +} diff --git a/ARMeilleure/State/FPRoundingMode.cs b/ARMeilleure/State/FPRoundingMode.cs new file mode 100644 index 000000000..ee4f87668 --- /dev/null +++ b/ARMeilleure/State/FPRoundingMode.cs @@ -0,0 +1,10 @@ +namespace ARMeilleure.State +{ + public enum FPRoundingMode + { + ToNearest = 0, + TowardsPlusInfinity = 1, + TowardsMinusInfinity = 2, + TowardsZero = 3 + } +} diff --git a/ARMeilleure/State/FPSR.cs b/ARMeilleure/State/FPSR.cs new file mode 100644 index 000000000..c20dc4393 --- /dev/null +++ b/ARMeilleure/State/FPSR.cs @@ -0,0 +1,11 @@ +using System; + +namespace ARMeilleure.State +{ + [Flags] + public enum FPSR + { + Ufc = 1 << 3, + Qc = 1 << 27 + } +} diff --git a/ARMeilleure/State/FPType.cs b/ARMeilleure/State/FPType.cs new file mode 100644 index 000000000..84e0db8da --- /dev/null +++ b/ARMeilleure/State/FPType.cs @@ -0,0 +1,11 @@ +namespace ARMeilleure.State +{ + enum FPType + { + Nonzero, + Zero, + Infinity, + QNaN, + SNaN + } +} diff --git a/ARMeilleure/State/IExecutionContext.cs b/ARMeilleure/State/IExecutionContext.cs new file mode 100644 index 000000000..df91b7a1e --- /dev/null +++ b/ARMeilleure/State/IExecutionContext.cs @@ -0,0 +1,37 @@ +using System; + +namespace ARMeilleure.State +{ + public interface IExecutionContext : IDisposable + { + uint CtrEl0 { get; } + uint DczidEl0 { get; } + + ulong CntfrqEl0 { get; set; } + ulong CntpctEl0 { get; } + + long TpidrEl0 { get; set; } + long Tpidr { get; set; } + + FPCR Fpcr { get; set; } + FPSR Fpsr { get; set; } + + bool IsAarch32 { get; set; } + + bool Running { get; set; } + + event EventHandler Interrupt; + event EventHandler Break; + event EventHandler SupervisorCall; + event EventHandler Undefined; + + ulong GetX(int index); + void SetX(int index, ulong value); + + V128 GetV(int index); + + bool GetPstateFlag(PState flag); + + void RequestInterrupt(); + } +} \ No newline at end of file diff --git a/ARMeilleure/State/InstExceptionEventArgs.cs b/ARMeilleure/State/InstExceptionEventArgs.cs new file mode 100644 index 000000000..c2460e4b4 --- /dev/null +++ b/ARMeilleure/State/InstExceptionEventArgs.cs @@ -0,0 +1,16 @@ +using System; + +namespace ARMeilleure.State +{ + public class InstExceptionEventArgs : EventArgs + { + public ulong Address { get; } + public int Id { get; } + + public InstExceptionEventArgs(ulong address, int id) + { + Address = address; + Id = id; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/State/InstUndefinedEventArgs.cs b/ARMeilleure/State/InstUndefinedEventArgs.cs new file mode 100644 index 000000000..c02b648e1 --- /dev/null +++ b/ARMeilleure/State/InstUndefinedEventArgs.cs @@ -0,0 +1,16 @@ +using System; + +namespace ARMeilleure.State +{ + public class InstUndefinedEventArgs : EventArgs + { + public ulong Address { get; } + public int OpCode { get; } + + public InstUndefinedEventArgs(ulong address, int opCode) + { + Address = address; + OpCode = opCode; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/State/NativeContext.cs b/ARMeilleure/State/NativeContext.cs new file mode 100644 index 000000000..4e6a5302f --- /dev/null +++ b/ARMeilleure/State/NativeContext.cs @@ -0,0 +1,157 @@ +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Memory; +using System; +using System.Runtime.InteropServices; + +namespace ARMeilleure.State +{ + class NativeContext : IDisposable + { + private const int IntSize = 8; + private const int VecSize = 16; + private const int FlagSize = 4; + private const int ExtraSize = 4; + + private const int TotalSize = RegisterConsts.IntRegsCount * IntSize + + RegisterConsts.VecRegsCount * VecSize + + RegisterConsts.FlagsCount * FlagSize + ExtraSize; + + public IntPtr BasePtr { get; } + + public NativeContext() + { + BasePtr = MemoryManagement.Allocate(TotalSize); + } + + public ulong GetX(int index) + { + if ((uint)index >= RegisterConsts.IntRegsCount) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + + return (ulong)Marshal.ReadInt64(BasePtr, index * IntSize); + } + + public void SetX(int index, ulong value) + { + if ((uint)index >= RegisterConsts.IntRegsCount) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + + Marshal.WriteInt64(BasePtr, index * IntSize, (long)value); + } + + public V128 GetV(int index) + { + if ((uint)index >= RegisterConsts.IntRegsCount) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + + int offset = RegisterConsts.IntRegsCount * IntSize + index * VecSize; + + return new V128( + Marshal.ReadInt64(BasePtr, offset + 0), + Marshal.ReadInt64(BasePtr, offset + 8)); + } + + public void SetV(int index, V128 value) + { + if ((uint)index >= RegisterConsts.IntRegsCount) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + + int offset = RegisterConsts.IntRegsCount * IntSize + index * VecSize; + + Marshal.WriteInt64(BasePtr, offset + 0, value.GetInt64(0)); + Marshal.WriteInt64(BasePtr, offset + 8, value.GetInt64(1)); + } + + public bool GetPstateFlag(PState flag) + { + if ((uint)flag >= RegisterConsts.FlagsCount) + { + throw new ArgumentException($"Invalid flag \"{flag}\" specified."); + } + + int offset = + RegisterConsts.IntRegsCount * IntSize + + RegisterConsts.VecRegsCount * VecSize + (int)flag * FlagSize; + + int value = Marshal.ReadInt32(BasePtr, offset); + + return value != 0; + } + + public void SetPstateFlag(PState flag, bool value) + { + if ((uint)flag >= RegisterConsts.FlagsCount) + { + throw new ArgumentException($"Invalid flag \"{flag}\" specified."); + } + + int offset = + RegisterConsts.IntRegsCount * IntSize + + RegisterConsts.VecRegsCount * VecSize + (int)flag * FlagSize; + + Marshal.WriteInt32(BasePtr, offset, value ? 1 : 0); + } + + public int GetCounter() + { + return Marshal.ReadInt32(BasePtr, GetCounterOffset()); + } + + public void SetCounter(int value) + { + Marshal.WriteInt32(BasePtr, GetCounterOffset(), value); + } + + public static int GetRegisterOffset(Register reg) + { + int offset, size; + + if (reg.Type == RegisterType.Integer) + { + offset = reg.Index * IntSize; + + size = IntSize; + } + else if (reg.Type == RegisterType.Vector) + { + offset = RegisterConsts.IntRegsCount * IntSize + reg.Index * VecSize; + + size = VecSize; + } + else /* if (reg.Type == RegisterType.Flag) */ + { + offset = RegisterConsts.IntRegsCount * IntSize + + RegisterConsts.VecRegsCount * VecSize + reg.Index * FlagSize; + + size = FlagSize; + } + + if ((uint)(offset + size) > (uint)TotalSize) + { + throw new ArgumentException("Invalid register."); + } + + return offset; + } + + public static int GetCounterOffset() + { + return RegisterConsts.IntRegsCount * IntSize + + RegisterConsts.VecRegsCount * VecSize + + RegisterConsts.FlagsCount * FlagSize; + } + + public void Dispose() + { + MemoryManagement.Free(BasePtr); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/State/PState.cs b/ARMeilleure/State/PState.cs new file mode 100644 index 000000000..ce755e952 --- /dev/null +++ b/ARMeilleure/State/PState.cs @@ -0,0 +1,16 @@ +using System; + +namespace ARMeilleure.State +{ + [Flags] + public enum PState + { + TFlag = 5, + EFlag = 9, + + VFlag = 28, + CFlag = 29, + ZFlag = 30, + NFlag = 31 + } +} diff --git a/ARMeilleure/State/RegisterAlias.cs b/ARMeilleure/State/RegisterAlias.cs new file mode 100644 index 000000000..ae0d45628 --- /dev/null +++ b/ARMeilleure/State/RegisterAlias.cs @@ -0,0 +1,41 @@ +namespace ARMeilleure.State +{ + static class RegisterAlias + { + public const int R8Usr = 8; + public const int R9Usr = 9; + public const int R10Usr = 10; + public const int R11Usr = 11; + public const int R12Usr = 12; + public const int SpUsr = 13; + public const int LrUsr = 14; + + public const int SpHyp = 15; + + public const int LrIrq = 16; + public const int SpIrq = 17; + + public const int LrSvc = 18; + public const int SpSvc = 19; + + public const int LrAbt = 20; + public const int SpAbt = 21; + + public const int LrUnd = 22; + public const int SpUnd = 23; + + public const int R8Fiq = 24; + public const int R9Fiq = 25; + public const int R10Fiq = 26; + public const int R11Fiq = 27; + public const int R12Fiq = 28; + public const int SpFiq = 29; + public const int LrFiq = 30; + + public const int Aarch32Lr = 14; + public const int Aarch32Pc = 15; + + public const int Lr = 30; + public const int Zr = 31; + } +} \ No newline at end of file diff --git a/ARMeilleure/State/RegisterConsts.cs b/ARMeilleure/State/RegisterConsts.cs new file mode 100644 index 000000000..a85117bb2 --- /dev/null +++ b/ARMeilleure/State/RegisterConsts.cs @@ -0,0 +1,13 @@ +namespace ARMeilleure.State +{ + static class RegisterConsts + { + public const int IntRegsCount = 32; + public const int VecRegsCount = 32; + public const int FlagsCount = 32; + public const int IntAndVecRegsCount = IntRegsCount + VecRegsCount; + public const int TotalCount = IntRegsCount + VecRegsCount + FlagsCount; + + public const int ZeroIndex = 31; + } +} \ No newline at end of file diff --git a/ARMeilleure/State/V128.cs b/ARMeilleure/State/V128.cs new file mode 100644 index 000000000..eeb9ff1ca --- /dev/null +++ b/ARMeilleure/State/V128.cs @@ -0,0 +1,214 @@ +using System; + +namespace ARMeilleure.State +{ + public struct V128 : IEquatable + { + private ulong _e0; + private ulong _e1; + + private static V128 _zero = new V128(0, 0); + + public static V128 Zero => _zero; + + public V128(float value) : this(value, 0, 0, 0) { } + + public V128(double value) : this(value, 0) { } + + public V128(float e0, float e1, float e2, float e3) + { + _e0 = (ulong)(uint)BitConverter.SingleToInt32Bits(e0) << 0; + _e0 |= (ulong)(uint)BitConverter.SingleToInt32Bits(e1) << 32; + _e1 = (ulong)(uint)BitConverter.SingleToInt32Bits(e2) << 0; + _e1 |= (ulong)(uint)BitConverter.SingleToInt32Bits(e3) << 32; + } + + public V128(double e0, double e1) + { + _e0 = (ulong)BitConverter.DoubleToInt64Bits(e0); + _e1 = (ulong)BitConverter.DoubleToInt64Bits(e1); + } + + public V128(int e0, int e1, int e2, int e3) + { + _e0 = (ulong)(uint)e0 << 0; + _e0 |= (ulong)(uint)e1 << 32; + _e1 = (ulong)(uint)e2 << 0; + _e1 |= (ulong)(uint)e3 << 32; + } + + public V128(uint e0, uint e1, uint e2, uint e3) + { + _e0 = (ulong)e0 << 0; + _e0 |= (ulong)e1 << 32; + _e1 = (ulong)e2 << 0; + _e1 |= (ulong)e3 << 32; + } + + public V128(long e0, long e1) + { + _e0 = (ulong)e0; + _e1 = (ulong)e1; + } + + public V128(ulong e0, ulong e1) + { + _e0 = e0; + _e1 = e1; + } + + public V128(byte[] data) + { + _e0 = (ulong)BitConverter.ToInt64(data, 0); + _e1 = (ulong)BitConverter.ToInt64(data, 8); + } + + public void Insert(int index, uint value) + { + switch (index) + { + case 0: _e0 = (_e0 & 0xffffffff00000000) | ((ulong)value << 0); break; + case 1: _e0 = (_e0 & 0x00000000ffffffff) | ((ulong)value << 32); break; + case 2: _e1 = (_e1 & 0xffffffff00000000) | ((ulong)value << 0); break; + case 3: _e1 = (_e1 & 0x00000000ffffffff) | ((ulong)value << 32); break; + + default: throw new ArgumentOutOfRangeException(nameof(index)); + } + } + + public void Insert(int index, ulong value) + { + switch (index) + { + case 0: _e0 = value; break; + case 1: _e1 = value; break; + + default: throw new ArgumentOutOfRangeException(nameof(index)); + } + } + + public float AsFloat() + { + return GetFloat(0); + } + + public double AsDouble() + { + return GetDouble(0); + } + + public float GetFloat(int index) + { + return BitConverter.Int32BitsToSingle(GetInt32(index)); + } + + public double GetDouble(int index) + { + return BitConverter.Int64BitsToDouble(GetInt64(index)); + } + + public int GetInt32(int index) => (int)GetUInt32(index); + public long GetInt64(int index) => (long)GetUInt64(index); + + public uint GetUInt32(int index) + { + switch (index) + { + case 0: return (uint)(_e0 >> 0); + case 1: return (uint)(_e0 >> 32); + case 2: return (uint)(_e1 >> 0); + case 3: return (uint)(_e1 >> 32); + } + + throw new ArgumentOutOfRangeException(nameof(index)); + } + + public ulong GetUInt64(int index) + { + switch (index) + { + case 0: return _e0; + case 1: return _e1; + } + + throw new ArgumentOutOfRangeException(nameof(index)); + } + + public byte[] ToArray() + { + byte[] e0Data = BitConverter.GetBytes(_e0); + byte[] e1Data = BitConverter.GetBytes(_e1); + + byte[] data = new byte[16]; + + Buffer.BlockCopy(e0Data, 0, data, 0, 8); + Buffer.BlockCopy(e1Data, 0, data, 8, 8); + + return data; + } + + public override int GetHashCode() + { + return HashCode.Combine(_e0, _e1); + } + + public static V128 operator ~(V128 x) + { + return new V128(~x._e0, ~x._e1); + } + + public static V128 operator &(V128 x, V128 y) + { + return new V128(x._e0 & y._e0, x._e1 & y._e1); + } + + public static V128 operator |(V128 x, V128 y) + { + return new V128(x._e0 | y._e0, x._e1 | y._e1); + } + + public static V128 operator ^(V128 x, V128 y) + { + return new V128(x._e0 ^ y._e0, x._e1 ^ y._e1); + } + + public static V128 operator <<(V128 x, int shift) + { + ulong shiftOut = x._e0 >> (64 - shift); + + return new V128(x._e0 << shift, (x._e1 << shift) | shiftOut); + } + + public static V128 operator >>(V128 x, int shift) + { + ulong shiftOut = x._e1 & ((1UL << shift) - 1); + + return new V128((x._e0 >> shift) | (shiftOut << (64 - shift)), x._e1 >> shift); + } + + public static bool operator ==(V128 x, V128 y) + { + return x.Equals(y); + } + + public static bool operator !=(V128 x, V128 y) + { + return !x.Equals(y); + } + + public override bool Equals(object obj) + { + return obj is V128 vector && Equals(vector); + } + + public bool Equals(V128 other) + { + return other._e0 == _e0 && other._e1 == _e1; + } + + public override string ToString() + { + return $"0x{_e1:X16}{_e0:X16}"; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Statistics.cs b/ARMeilleure/Statistics.cs new file mode 100644 index 000000000..e80ee59d6 --- /dev/null +++ b/ARMeilleure/Statistics.cs @@ -0,0 +1,92 @@ +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Text; + +namespace ARMeilleure +{ + public static class Statistics + { + private const int ReportMaxFunctions = 100; + + [ThreadStatic] + private static Stopwatch _executionTimer; + + private static ConcurrentDictionary _ticksPerFunction; + + static Statistics() + { + _ticksPerFunction = new ConcurrentDictionary(); + } + + public static void InitializeTimer() + { +#if M_PROFILE + if (_executionTimer == null) + { + _executionTimer = new Stopwatch(); + } +#endif + } + + internal static void StartTimer() + { +#if M_PROFILE + _executionTimer.Restart(); +#endif + } + + internal static void StopTimer(ulong funcAddr) + { +#if M_PROFILE + _executionTimer.Stop(); + + long ticks = _executionTimer.ElapsedTicks; + + _ticksPerFunction.AddOrUpdate(funcAddr, ticks, (key, oldTicks) => oldTicks + ticks); +#endif + } + + internal static void ResumeTimer() + { +#if M_PROFILE + _executionTimer.Start(); +#endif + } + + internal static void PauseTimer() + { +#if M_PROFILE + _executionTimer.Stop(); +#endif + } + + public static string GetReport() + { + int count = 0; + + StringBuilder sb = new StringBuilder(); + + sb.AppendLine(" Function address | Time"); + sb.AppendLine("--------------------------"); + + KeyValuePair[] funcTable = _ticksPerFunction.ToArray(); + + foreach (KeyValuePair kv in funcTable.OrderByDescending(x => x.Value)) + { + long timeInMs = (kv.Value * 1000) / Stopwatch.Frequency; + + sb.AppendLine($" 0x{kv.Key:X16} | {timeInMs} ms"); + + if (count++ >= ReportMaxFunctions) + { + break; + } + } + + return sb.ToString(); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Translation/ArmEmitterContext.cs b/ARMeilleure/Translation/ArmEmitterContext.cs new file mode 100644 index 000000000..d35e985e6 --- /dev/null +++ b/ARMeilleure/Translation/ArmEmitterContext.cs @@ -0,0 +1,153 @@ +using ARMeilleure.Decoders; +using ARMeilleure.Instructions; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Memory; +using ARMeilleure.State; +using System.Collections.Generic; + +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Translation +{ + class ArmEmitterContext : EmitterContext + { + private Dictionary _labels; + + private OpCode _optOpLastCompare; + private OpCode _optOpLastFlagSet; + + private Operand _optCmpTempN; + private Operand _optCmpTempM; + + private Block _currBlock; + + public Block CurrBlock + { + get + { + return _currBlock; + } + set + { + _currBlock = value; + + ResetBlockState(); + } + } + + public OpCode CurrOp { get; set; } + + public MemoryManager Memory { get; } + + public Aarch32Mode Mode { get; } + + public ArmEmitterContext(MemoryManager memory, Aarch32Mode mode) + { + Memory = memory; + Mode = mode; + + _labels = new Dictionary(); + } + + public Operand GetLabel(ulong address) + { + if (!_labels.TryGetValue(address, out Operand label)) + { + label = Label(); + + _labels.Add(address, label); + } + + return label; + } + + public void MarkComparison(Operand n, Operand m) + { + _optOpLastCompare = CurrOp; + + _optCmpTempN = Copy(n); + _optCmpTempM = Copy(m); + } + + public void MarkFlagSet(PState stateFlag) + { + // Set this only if any of the NZCV flag bits were modified. + // This is used to ensure that when emiting a direct IL branch + // instruction for compare + branch sequences, we're not expecting + // to use comparison values from an old instruction, when in fact + // the flags were already overwritten by another instruction further along. + if (stateFlag >= PState.VFlag) + { + _optOpLastFlagSet = CurrOp; + } + } + + private void ResetBlockState() + { + _optOpLastCompare = null; + _optOpLastFlagSet = null; + } + + public Operand TryGetComparisonResult(Condition condition) + { + if (_optOpLastCompare == null || _optOpLastCompare != _optOpLastFlagSet) + { + return null; + } + + Operand n = _optCmpTempN; + Operand m = _optCmpTempM; + + InstName cmpName = _optOpLastCompare.Instruction.Name; + + if (cmpName == InstName.Subs) + { + switch (condition) + { + case Condition.Eq: return ICompareEqual (n, m); + case Condition.Ne: return ICompareNotEqual (n, m); + case Condition.GeUn: return ICompareGreaterOrEqualUI(n, m); + case Condition.LtUn: return ICompareLessUI (n, m); + case Condition.GtUn: return ICompareGreaterUI (n, m); + case Condition.LeUn: return ICompareLessOrEqualUI (n, m); + case Condition.Ge: return ICompareGreaterOrEqual (n, m); + case Condition.Lt: return ICompareLess (n, m); + case Condition.Gt: return ICompareGreater (n, m); + case Condition.Le: return ICompareLessOrEqual (n, m); + } + } + else if (cmpName == InstName.Adds && _optOpLastCompare is IOpCodeAluImm op) + { + // There are several limitations that needs to be taken into account for CMN comparisons: + // - The unsigned comparisons are not valid, as they depend on the + // carry flag value, and they will have different values for addition and + // subtraction. For addition, it's carry, and for subtraction, it's borrow. + // So, we need to make sure we're not doing a unsigned compare for the CMN case. + // - We can only do the optimization for the immediate variants, + // because when the second operand value is exactly INT_MIN, we can't + // negate the value as theres no positive counterpart. + // Such invalid values can't be encoded on the immediate encodings. + if (op.RegisterSize == RegisterSize.Int32) + { + m = Const((int)-op.Immediate); + } + else + { + m = Const(-op.Immediate); + } + + switch (condition) + { + case Condition.Eq: return ICompareEqual (n, m); + case Condition.Ne: return ICompareNotEqual (n, m); + case Condition.Ge: return ICompareGreaterOrEqual(n, m); + case Condition.Lt: return ICompareLess (n, m); + case Condition.Gt: return ICompareGreater (n, m); + case Condition.Le: return ICompareLessOrEqual (n, m); + } + } + + return null; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Translation/Compiler.cs b/ARMeilleure/Translation/Compiler.cs new file mode 100644 index 000000000..4075a7f06 --- /dev/null +++ b/ARMeilleure/Translation/Compiler.cs @@ -0,0 +1,47 @@ +using ARMeilleure.CodeGen; +using ARMeilleure.CodeGen.X86; +using ARMeilleure.Diagnostics; +using ARMeilleure.IntermediateRepresentation; +using System; +using System.Runtime.InteropServices; + +namespace ARMeilleure.Translation +{ + static class Compiler + { + public static T Compile( + ControlFlowGraph cfg, + OperandType[] funcArgTypes, + OperandType funcReturnType, + CompilerOptions options) + { + Logger.StartPass(PassName.Dominance); + + Dominance.FindDominators(cfg); + Dominance.FindDominanceFrontiers(cfg); + + Logger.EndPass(PassName.Dominance); + + Logger.StartPass(PassName.SsaConstruction); + + if ((options & CompilerOptions.SsaForm) != 0) + { + Ssa.Construct(cfg); + } + else + { + RegisterToLocal.Rename(cfg); + } + + Logger.EndPass(PassName.SsaConstruction, cfg); + + CompilerContext cctx = new CompilerContext(cfg, funcArgTypes, funcReturnType, options); + + CompiledFunction func = CodeGenerator.Generate(cctx); + + IntPtr codePtr = JitCache.Map(func); + + return Marshal.GetDelegateForFunctionPointer(codePtr); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Translation/CompilerContext.cs b/ARMeilleure/Translation/CompilerContext.cs new file mode 100644 index 000000000..cfe5ad1e5 --- /dev/null +++ b/ARMeilleure/Translation/CompilerContext.cs @@ -0,0 +1,26 @@ +using ARMeilleure.IntermediateRepresentation; + +namespace ARMeilleure.Translation +{ + struct CompilerContext + { + public ControlFlowGraph Cfg { get; } + + public OperandType[] FuncArgTypes { get; } + public OperandType FuncReturnType { get; } + + public CompilerOptions Options { get; } + + public CompilerContext( + ControlFlowGraph cfg, + OperandType[] funcArgTypes, + OperandType funcReturnType, + CompilerOptions options) + { + Cfg = cfg; + FuncArgTypes = funcArgTypes; + FuncReturnType = funcReturnType; + Options = options; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Translation/CompilerOptions.cs b/ARMeilleure/Translation/CompilerOptions.cs new file mode 100644 index 000000000..53998ec6f --- /dev/null +++ b/ARMeilleure/Translation/CompilerOptions.cs @@ -0,0 +1,16 @@ +using System; + +namespace ARMeilleure.Translation +{ + [Flags] + enum CompilerOptions + { + None = 0, + SsaForm = 1 << 0, + Optimize = 1 << 1, + Lsra = 1 << 2, + + MediumCq = SsaForm | Optimize, + HighCq = SsaForm | Optimize | Lsra + } +} \ No newline at end of file diff --git a/ARMeilleure/Translation/ControlFlowGraph.cs b/ARMeilleure/Translation/ControlFlowGraph.cs new file mode 100644 index 000000000..758f1f968 --- /dev/null +++ b/ARMeilleure/Translation/ControlFlowGraph.cs @@ -0,0 +1,158 @@ +using ARMeilleure.IntermediateRepresentation; +using System; +using System.Collections.Generic; +using System.Diagnostics; + +namespace ARMeilleure.Translation +{ + class ControlFlowGraph + { + public BasicBlock Entry { get; } + + public LinkedList Blocks { get; } + + public BasicBlock[] PostOrderBlocks { get; } + + public int[] PostOrderMap { get; } + + public ControlFlowGraph(BasicBlock entry, LinkedList blocks) + { + Entry = entry; + Blocks = blocks; + + RemoveUnreachableBlocks(blocks); + + HashSet visited = new HashSet(); + + Stack blockStack = new Stack(); + + PostOrderBlocks = new BasicBlock[blocks.Count]; + + PostOrderMap = new int[blocks.Count]; + + visited.Add(entry); + + blockStack.Push(entry); + + int index = 0; + + while (blockStack.TryPop(out BasicBlock block)) + { + if (block.Next != null && visited.Add(block.Next)) + { + blockStack.Push(block); + blockStack.Push(block.Next); + } + else if (block.Branch != null && visited.Add(block.Branch)) + { + blockStack.Push(block); + blockStack.Push(block.Branch); + } + else + { + PostOrderMap[block.Index] = index; + + PostOrderBlocks[index++] = block; + } + } + } + + private void RemoveUnreachableBlocks(LinkedList blocks) + { + HashSet visited = new HashSet(); + + Queue workQueue = new Queue(); + + visited.Add(Entry); + + workQueue.Enqueue(Entry); + + while (workQueue.TryDequeue(out BasicBlock block)) + { + Debug.Assert(block.Index != -1, "Invalid block index."); + + if (block.Next != null && visited.Add(block.Next)) + { + workQueue.Enqueue(block.Next); + } + + if (block.Branch != null && visited.Add(block.Branch)) + { + workQueue.Enqueue(block.Branch); + } + } + + if (visited.Count < blocks.Count) + { + // Remove unreachable blocks and renumber. + int index = 0; + + for (LinkedListNode node = blocks.First; node != null;) + { + LinkedListNode nextNode = node.Next; + + BasicBlock block = node.Value; + + if (!visited.Contains(block)) + { + block.Next = null; + block.Branch = null; + + blocks.Remove(node); + } + else + { + block.Index = index++; + } + + node = nextNode; + } + } + } + + public BasicBlock SplitEdge(BasicBlock predecessor, BasicBlock successor) + { + BasicBlock splitBlock = new BasicBlock(Blocks.Count); + + if (predecessor.Next == successor) + { + predecessor.Next = splitBlock; + } + + if (predecessor.Branch == successor) + { + predecessor.Branch = splitBlock; + } + + if (splitBlock.Predecessors.Count == 0) + { + throw new ArgumentException("Predecessor and successor are not connected."); + } + + // Insert the new block on the list of blocks. + BasicBlock succPrev = successor.Node.Previous?.Value; + + if (succPrev != null && succPrev != predecessor && succPrev.Next == successor) + { + // Can't insert after the predecessor or before the successor. + // Here, we insert it before the successor by also spliting another + // edge (the one between the block before "successor" and "successor"). + BasicBlock splitBlock2 = new BasicBlock(splitBlock.Index + 1); + + succPrev.Next = splitBlock2; + + splitBlock2.Branch = successor; + + splitBlock2.Operations.AddLast(new Operation(Instruction.Branch, null)); + + Blocks.AddBefore(successor.Node, splitBlock2); + } + + splitBlock.Next = successor; + + Blocks.AddBefore(successor.Node, splitBlock); + + return splitBlock; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Translation/DelegateCache.cs b/ARMeilleure/Translation/DelegateCache.cs new file mode 100644 index 000000000..7328c61a6 --- /dev/null +++ b/ARMeilleure/Translation/DelegateCache.cs @@ -0,0 +1,26 @@ +using System; +using System.Collections.Concurrent; +using System.Reflection; + +namespace ARMeilleure.Translation +{ + static class DelegateCache + { + private static ConcurrentDictionary _delegates; + + static DelegateCache() + { + _delegates = new ConcurrentDictionary(); + } + + public static Delegate GetOrAdd(Delegate dlg) + { + return _delegates.GetOrAdd(GetKey(dlg.Method), (key) => dlg); + } + + private static string GetKey(MethodInfo info) + { + return $"{info.DeclaringType.FullName}.{info.Name}"; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Translation/Dominance.cs b/ARMeilleure/Translation/Dominance.cs new file mode 100644 index 000000000..bb55169ed --- /dev/null +++ b/ARMeilleure/Translation/Dominance.cs @@ -0,0 +1,95 @@ +using ARMeilleure.IntermediateRepresentation; +using System.Diagnostics; + +namespace ARMeilleure.Translation +{ + static class Dominance + { + // Those methods are an implementation of the algorithms on "A Simple, Fast Dominance Algorithm". + // https://www.cs.rice.edu/~keith/EMBED/dom.pdf + public static void FindDominators(ControlFlowGraph cfg) + { + BasicBlock Intersect(BasicBlock block1, BasicBlock block2) + { + while (block1 != block2) + { + while (cfg.PostOrderMap[block1.Index] < cfg.PostOrderMap[block2.Index]) + { + block1 = block1.ImmediateDominator; + } + + while (cfg.PostOrderMap[block2.Index] < cfg.PostOrderMap[block1.Index]) + { + block2 = block2.ImmediateDominator; + } + } + + return block1; + } + + cfg.Entry.ImmediateDominator = cfg.Entry; + + Debug.Assert(cfg.Entry == cfg.PostOrderBlocks[cfg.PostOrderBlocks.Length - 1]); + + bool modified; + + do + { + modified = false; + + for (int blkIndex = cfg.PostOrderBlocks.Length - 2; blkIndex >= 0; blkIndex--) + { + BasicBlock block = cfg.PostOrderBlocks[blkIndex]; + + BasicBlock newIDom = null; + + foreach (BasicBlock predecessor in block.Predecessors) + { + if (predecessor.ImmediateDominator != null) + { + if (newIDom != null) + { + newIDom = Intersect(predecessor, newIDom); + } + else + { + newIDom = predecessor; + } + } + } + + if (block.ImmediateDominator != newIDom) + { + block.ImmediateDominator = newIDom; + + modified = true; + } + } + } + while (modified); + } + + public static void FindDominanceFrontiers(ControlFlowGraph cfg) + { + foreach (BasicBlock block in cfg.Blocks) + { + if (block.Predecessors.Count < 2) + { + continue; + } + + for (int pBlkIndex = 0; pBlkIndex < block.Predecessors.Count; pBlkIndex++) + { + BasicBlock current = block.Predecessors[pBlkIndex]; + + while (current != block.ImmediateDominator) + { + current.DominanceFrontiers.Add(block); + + current = current.ImmediateDominator; + } + } + } + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Translation/EmitterContext.cs b/ARMeilleure/Translation/EmitterContext.cs new file mode 100644 index 000000000..13cf677c7 --- /dev/null +++ b/ARMeilleure/Translation/EmitterContext.cs @@ -0,0 +1,562 @@ +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using System; +using System.Collections.Generic; +using System.Runtime.InteropServices; + +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Translation +{ + class EmitterContext + { + private Dictionary _irLabels; + + private LinkedList _irBlocks; + + private BasicBlock _irBlock; + + private bool _needsNewBlock; + + public EmitterContext() + { + _irLabels = new Dictionary(); + + _irBlocks = new LinkedList(); + + _needsNewBlock = true; + } + + public Operand Add(Operand op1, Operand op2) + { + return Add(Instruction.Add, Local(op1.Type), op1, op2); + } + + public Operand BitwiseAnd(Operand op1, Operand op2) + { + return Add(Instruction.BitwiseAnd, Local(op1.Type), op1, op2); + } + + public Operand BitwiseExclusiveOr(Operand op1, Operand op2) + { + return Add(Instruction.BitwiseExclusiveOr, Local(op1.Type), op1, op2); + } + + public Operand BitwiseNot(Operand op1) + { + return Add(Instruction.BitwiseNot, Local(op1.Type), op1); + } + + public Operand BitwiseOr(Operand op1, Operand op2) + { + return Add(Instruction.BitwiseOr, Local(op1.Type), op1, op2); + } + + public void Branch(Operand label) + { + Add(Instruction.Branch, null); + + BranchToLabel(label); + } + + public void BranchIfFalse(Operand label, Operand op1) + { + Add(Instruction.BranchIfFalse, null, op1); + + BranchToLabel(label); + } + + public void BranchIfTrue(Operand label, Operand op1) + { + Add(Instruction.BranchIfTrue, null, op1); + + BranchToLabel(label); + } + + public Operand ByteSwap(Operand op1) + { + return Add(Instruction.ByteSwap, Local(op1.Type), op1); + } + + public Operand Call(Delegate func, params Operand[] callArgs) + { + // Add the delegate to the cache to ensure it will not be garbage collected. + func = DelegateCache.GetOrAdd(func); + + IntPtr ptr = Marshal.GetFunctionPointerForDelegate(func); + + OperandType returnType = GetOperandType(func.Method.ReturnType); + + return Call(Const(ptr.ToInt64()), returnType, callArgs); + } + + private static Dictionary _typeCodeToOperandTypeMap = + new Dictionary() + { + { TypeCode.Boolean, OperandType.I32 }, + { TypeCode.Byte, OperandType.I32 }, + { TypeCode.Char, OperandType.I32 }, + { TypeCode.Double, OperandType.FP64 }, + { TypeCode.Int16, OperandType.I32 }, + { TypeCode.Int32, OperandType.I32 }, + { TypeCode.Int64, OperandType.I64 }, + { TypeCode.SByte, OperandType.I32 }, + { TypeCode.Single, OperandType.FP32 }, + { TypeCode.UInt16, OperandType.I32 }, + { TypeCode.UInt32, OperandType.I32 }, + { TypeCode.UInt64, OperandType.I64 } + }; + + private static OperandType GetOperandType(Type type) + { + if (_typeCodeToOperandTypeMap.TryGetValue(Type.GetTypeCode(type), out OperandType ot)) + { + return ot; + } + else if (type == typeof(V128)) + { + return OperandType.V128; + } + else if (type == typeof(void)) + { + return OperandType.None; + } + + throw new ArgumentException($"Invalid type \"{type.Name}\"."); + } + + public Operand Call(Operand address, OperandType returnType, params Operand[] callArgs) + { + Operand[] args = new Operand[callArgs.Length + 1]; + + args[0] = address; + + Array.Copy(callArgs, 0, args, 1, callArgs.Length); + + if (returnType != OperandType.None) + { + return Add(Instruction.Call, Local(returnType), args); + } + else + { + return Add(Instruction.Call, null, args); + } + } + + public Operand CompareAndSwap128(Operand address, Operand expected, Operand desired) + { + return Add(Instruction.CompareAndSwap128, Local(OperandType.V128), address, expected, desired); + } + + public Operand ConditionalSelect(Operand op1, Operand op2, Operand op3) + { + return Add(Instruction.ConditionalSelect, Local(op2.Type), op1, op2, op3); + } + + public Operand ConvertI64ToI32(Operand op1) + { + if (op1.Type != OperandType.I64) + { + throw new ArgumentException($"Invalid operand type \"{op1.Type}\"."); + } + + return Add(Instruction.ConvertI64ToI32, Local(OperandType.I32), op1); + } + + public Operand ConvertToFP(OperandType type, Operand op1) + { + return Add(Instruction.ConvertToFP, Local(type), op1); + } + + public Operand ConvertToFPUI(OperandType type, Operand op1) + { + return Add(Instruction.ConvertToFPUI, Local(type), op1); + } + + public Operand Copy(Operand op1) + { + return Add(Instruction.Copy, Local(op1.Type), op1); + } + + public Operand Copy(Operand dest, Operand op1) + { + if (dest.Kind != OperandKind.Register) + { + throw new ArgumentException($"Invalid dest operand kind \"{dest.Kind}\"."); + } + + return Add(Instruction.Copy, dest, op1); + } + + public Operand CountLeadingZeros(Operand op1) + { + return Add(Instruction.CountLeadingZeros, Local(op1.Type), op1); + } + + internal Operand CpuId() + { + return Add(Instruction.CpuId, Local(OperandType.I64)); + } + + public Operand Divide(Operand op1, Operand op2) + { + return Add(Instruction.Divide, Local(op1.Type), op1, op2); + } + + public Operand DivideUI(Operand op1, Operand op2) + { + return Add(Instruction.DivideUI, Local(op1.Type), op1, op2); + } + + public Operand ICompareEqual(Operand op1, Operand op2) + { + return Add(Instruction.CompareEqual, Local(OperandType.I32), op1, op2); + } + + public Operand ICompareGreater(Operand op1, Operand op2) + { + return Add(Instruction.CompareGreater, Local(OperandType.I32), op1, op2); + } + + public Operand ICompareGreaterOrEqual(Operand op1, Operand op2) + { + return Add(Instruction.CompareGreaterOrEqual, Local(OperandType.I32), op1, op2); + } + + public Operand ICompareGreaterOrEqualUI(Operand op1, Operand op2) + { + return Add(Instruction.CompareGreaterOrEqualUI, Local(OperandType.I32), op1, op2); + } + + public Operand ICompareGreaterUI(Operand op1, Operand op2) + { + return Add(Instruction.CompareGreaterUI, Local(OperandType.I32), op1, op2); + } + + public Operand ICompareLess(Operand op1, Operand op2) + { + return Add(Instruction.CompareLess, Local(OperandType.I32), op1, op2); + } + + public Operand ICompareLessOrEqual(Operand op1, Operand op2) + { + return Add(Instruction.CompareLessOrEqual, Local(OperandType.I32), op1, op2); + } + + public Operand ICompareLessOrEqualUI(Operand op1, Operand op2) + { + return Add(Instruction.CompareLessOrEqualUI, Local(OperandType.I32), op1, op2); + } + + public Operand ICompareLessUI(Operand op1, Operand op2) + { + return Add(Instruction.CompareLessUI, Local(OperandType.I32), op1, op2); + } + + public Operand ICompareNotEqual(Operand op1, Operand op2) + { + return Add(Instruction.CompareNotEqual, Local(OperandType.I32), op1, op2); + } + + public Operand Load(OperandType type, Operand address) + { + return Add(Instruction.Load, Local(type), address); + } + + public Operand Load16(Operand address) + { + return Add(Instruction.Load16, Local(OperandType.I32), address); + } + + public Operand Load8(Operand address) + { + return Add(Instruction.Load8, Local(OperandType.I32), address); + } + + public Operand LoadArgument(OperandType type, int index) + { + return Add(Instruction.LoadArgument, Local(type), Const(index)); + } + + public void LoadFromContext() + { + _needsNewBlock = true; + + Add(Instruction.LoadFromContext); + } + + public Operand Multiply(Operand op1, Operand op2) + { + return Add(Instruction.Multiply, Local(op1.Type), op1, op2); + } + + public Operand Multiply64HighSI(Operand op1, Operand op2) + { + return Add(Instruction.Multiply64HighSI, Local(OperandType.I64), op1, op2); + } + + public Operand Multiply64HighUI(Operand op1, Operand op2) + { + return Add(Instruction.Multiply64HighUI, Local(OperandType.I64), op1, op2); + } + + public Operand Negate(Operand op1) + { + return Add(Instruction.Negate, Local(op1.Type), op1); + } + + public void Return() + { + Add(Instruction.Return); + + _needsNewBlock = true; + } + + public void Return(Operand op1) + { + Add(Instruction.Return, null, op1); + + _needsNewBlock = true; + } + + public Operand RotateRight(Operand op1, Operand op2) + { + return Add(Instruction.RotateRight, Local(op1.Type), op1, op2); + } + + public Operand ShiftLeft(Operand op1, Operand op2) + { + return Add(Instruction.ShiftLeft, Local(op1.Type), op1, op2); + } + + public Operand ShiftRightSI(Operand op1, Operand op2) + { + return Add(Instruction.ShiftRightSI, Local(op1.Type), op1, op2); + } + + public Operand ShiftRightUI(Operand op1, Operand op2) + { + return Add(Instruction.ShiftRightUI, Local(op1.Type), op1, op2); + } + + public Operand SignExtend16(OperandType type, Operand op1) + { + return Add(Instruction.SignExtend16, Local(type), op1); + } + + public Operand SignExtend32(OperandType type, Operand op1) + { + return Add(Instruction.SignExtend32, Local(type), op1); + } + + public Operand SignExtend8(OperandType type, Operand op1) + { + return Add(Instruction.SignExtend8, Local(type), op1); + } + + public void Store(Operand address, Operand value) + { + Add(Instruction.Store, null, address, value); + } + + public void Store16(Operand address, Operand value) + { + Add(Instruction.Store16, null, address, value); + } + + public void Store8(Operand address, Operand value) + { + Add(Instruction.Store8, null, address, value); + } + + public void StoreToContext() + { + Add(Instruction.StoreToContext); + + _needsNewBlock = true; + } + + public Operand Subtract(Operand op1, Operand op2) + { + return Add(Instruction.Subtract, Local(op1.Type), op1, op2); + } + + public Operand VectorCreateScalar(Operand value) + { + return Add(Instruction.VectorCreateScalar, Local(OperandType.V128), value); + } + + public Operand VectorExtract(OperandType type, Operand vector, int index) + { + return Add(Instruction.VectorExtract, Local(type), vector, Const(index)); + } + + public Operand VectorExtract16(Operand vector, int index) + { + return Add(Instruction.VectorExtract16, Local(OperandType.I32), vector, Const(index)); + } + + public Operand VectorExtract8(Operand vector, int index) + { + return Add(Instruction.VectorExtract8, Local(OperandType.I32), vector, Const(index)); + } + + public Operand VectorInsert(Operand vector, Operand value, int index) + { + return Add(Instruction.VectorInsert, Local(OperandType.V128), vector, value, Const(index)); + } + + public Operand VectorInsert16(Operand vector, Operand value, int index) + { + return Add(Instruction.VectorInsert16, Local(OperandType.V128), vector, value, Const(index)); + } + + public Operand VectorInsert8(Operand vector, Operand value, int index) + { + return Add(Instruction.VectorInsert8, Local(OperandType.V128), vector, value, Const(index)); + } + + public Operand VectorZero() + { + return Add(Instruction.VectorZero, Local(OperandType.V128)); + } + + public Operand VectorZeroUpper64(Operand vector) + { + return Add(Instruction.VectorZeroUpper64, Local(OperandType.V128), vector); + } + + public Operand VectorZeroUpper96(Operand vector) + { + return Add(Instruction.VectorZeroUpper96, Local(OperandType.V128), vector); + } + + public Operand ZeroExtend16(OperandType type, Operand op1) + { + return Add(Instruction.ZeroExtend16, Local(type), op1); + } + + public Operand ZeroExtend32(OperandType type, Operand op1) + { + return Add(Instruction.ZeroExtend32, Local(type), op1); + } + + public Operand ZeroExtend8(OperandType type, Operand op1) + { + return Add(Instruction.ZeroExtend8, Local(type), op1); + } + + private Operand Add(Instruction inst, Operand dest = null, params Operand[] sources) + { + if (_needsNewBlock) + { + NewNextBlock(); + } + + Operation operation = new Operation(inst, dest, sources); + + _irBlock.Operations.AddLast(operation); + + return dest; + } + + public Operand AddIntrinsic(Intrinsic intrin, params Operand[] args) + { + return Add(intrin, Local(OperandType.V128), args); + } + + public Operand AddIntrinsicInt(Intrinsic intrin, params Operand[] args) + { + return Add(intrin, Local(OperandType.I32), args); + } + + public Operand AddIntrinsicLong(Intrinsic intrin, params Operand[] args) + { + return Add(intrin, Local(OperandType.I64), args); + } + + private Operand Add(Intrinsic intrin, Operand dest, params Operand[] sources) + { + if (_needsNewBlock) + { + NewNextBlock(); + } + + IntrinsicOperation operation = new IntrinsicOperation(intrin, dest, sources); + + _irBlock.Operations.AddLast(operation); + + return dest; + } + + private void BranchToLabel(Operand label) + { + if (!_irLabels.TryGetValue(label, out BasicBlock branchBlock)) + { + branchBlock = new BasicBlock(); + + _irLabels.Add(label, branchBlock); + } + + _irBlock.Branch = branchBlock; + + _needsNewBlock = true; + } + + public void MarkLabel(Operand label) + { + if (_irLabels.TryGetValue(label, out BasicBlock nextBlock)) + { + nextBlock.Index = _irBlocks.Count; + nextBlock.Node = _irBlocks.AddLast(nextBlock); + + NextBlock(nextBlock); + } + else + { + NewNextBlock(); + + _irLabels.Add(label, _irBlock); + } + } + + private void NewNextBlock() + { + BasicBlock block = new BasicBlock(_irBlocks.Count); + + block.Node = _irBlocks.AddLast(block); + + NextBlock(block); + } + + private void NextBlock(BasicBlock nextBlock) + { + if (_irBlock != null && !EndsWithUnconditional(_irBlock)) + { + _irBlock.Next = nextBlock; + } + + _irBlock = nextBlock; + + _needsNewBlock = false; + } + + private static bool EndsWithUnconditional(BasicBlock block) + { + Operation lastOp = block.GetLastOp() as Operation; + + if (lastOp == null) + { + return false; + } + + return lastOp.Instruction == Instruction.Branch || + lastOp.Instruction == Instruction.Return; + } + + public ControlFlowGraph GetControlFlowGraph() + { + return new ControlFlowGraph(_irBlocks.First.Value, _irBlocks); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Translation/GuestFunction.cs b/ARMeilleure/Translation/GuestFunction.cs new file mode 100644 index 000000000..ac131a0d1 --- /dev/null +++ b/ARMeilleure/Translation/GuestFunction.cs @@ -0,0 +1,6 @@ +using System; + +namespace ARMeilleure.Translation +{ + delegate ulong GuestFunction(IntPtr nativeContextPtr); +} \ No newline at end of file diff --git a/ARMeilleure/Translation/ITranslator.cs b/ARMeilleure/Translation/ITranslator.cs new file mode 100644 index 000000000..1063d3a65 --- /dev/null +++ b/ARMeilleure/Translation/ITranslator.cs @@ -0,0 +1,9 @@ +using ARMeilleure.State; + +namespace ARMeilleure.Translation +{ + public interface ITranslator + { + void Execute(IExecutionContext context, ulong address); + } +} \ No newline at end of file diff --git a/ARMeilleure/Translation/JitCache.cs b/ARMeilleure/Translation/JitCache.cs new file mode 100644 index 000000000..73f04a966 --- /dev/null +++ b/ARMeilleure/Translation/JitCache.cs @@ -0,0 +1,135 @@ +using ARMeilleure.CodeGen; +using ARMeilleure.Memory; +using System; +using System.Collections.Generic; +using System.Runtime.InteropServices; + +namespace ARMeilleure.Translation +{ + static class JitCache + { + private const int PageSize = 4 * 1024; + private const int PageMask = PageSize - 1; + + private const int CodeAlignment = 4; // Bytes + + private const int CacheSize = 512 * 1024 * 1024; + + private static IntPtr _basePointer; + + private static int _offset; + + private static List _cacheEntries; + + private static object _lock; + + static JitCache() + { + _basePointer = MemoryManagement.Allocate(CacheSize); + + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) + { + JitUnwindWindows.InstallFunctionTableHandler(_basePointer, CacheSize); + + // The first page is used for the table based SEH structs. + _offset = PageSize; + } + + _cacheEntries = new List(); + + _lock = new object(); + } + + public static IntPtr Map(CompiledFunction func) + { + byte[] code = func.Code; + + lock (_lock) + { + int funcOffset = Allocate(code.Length); + + IntPtr funcPtr = _basePointer + funcOffset; + + Marshal.Copy(code, 0, funcPtr, code.Length); + + ReprotectRange(funcOffset, code.Length); + + Add(new JitCacheEntry(funcOffset, code.Length, func.UnwindInfo)); + + return funcPtr; + } + } + + private static void ReprotectRange(int offset, int size) + { + // Map pages that are already full as RX. + // Map pages that are not full yet as RWX. + // On unix, the address must be page aligned. + int endOffs = offset + size; + + int pageStart = offset & ~PageMask; + int pageEnd = endOffs & ~PageMask; + + int fullPagesSize = pageEnd - pageStart; + + if (fullPagesSize != 0) + { + IntPtr funcPtr = _basePointer + pageStart; + + MemoryManagement.Reprotect(funcPtr, (ulong)fullPagesSize, MemoryProtection.ReadAndExecute); + } + + int remaining = endOffs - pageEnd; + + if (remaining != 0) + { + IntPtr funcPtr = _basePointer + pageEnd; + + MemoryManagement.Reprotect(funcPtr, (ulong)remaining, MemoryProtection.ReadWriteExecute); + } + } + + private static int Allocate(int codeSize) + { + codeSize = checked(codeSize + (CodeAlignment - 1)) & ~(CodeAlignment - 1); + + int allocOffset = _offset; + + _offset += codeSize; + + if ((ulong)(uint)_offset > CacheSize) + { + throw new OutOfMemoryException(); + } + + return allocOffset; + } + + private static void Add(JitCacheEntry entry) + { + _cacheEntries.Add(entry); + } + + public static bool TryFind(int offset, out JitCacheEntry entry) + { + lock (_lock) + { + foreach (JitCacheEntry cacheEntry in _cacheEntries) + { + int endOffset = cacheEntry.Offset + cacheEntry.Size; + + if (offset >= cacheEntry.Offset && offset < endOffset) + { + entry = cacheEntry; + + return true; + } + } + } + + entry = default(JitCacheEntry); + + return false; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Translation/JitCacheEntry.cs b/ARMeilleure/Translation/JitCacheEntry.cs new file mode 100644 index 000000000..87d020e68 --- /dev/null +++ b/ARMeilleure/Translation/JitCacheEntry.cs @@ -0,0 +1,19 @@ +using ARMeilleure.CodeGen.Unwinding; + +namespace ARMeilleure.Translation +{ + struct JitCacheEntry + { + public int Offset { get; } + public int Size { get; } + + public UnwindInfo UnwindInfo { get; } + + public JitCacheEntry(int offset, int size, UnwindInfo unwindInfo) + { + Offset = offset; + Size = size; + UnwindInfo = unwindInfo; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Translation/JitUnwindWindows.cs b/ARMeilleure/Translation/JitUnwindWindows.cs new file mode 100644 index 000000000..108dc2c56 --- /dev/null +++ b/ARMeilleure/Translation/JitUnwindWindows.cs @@ -0,0 +1,164 @@ +using ARMeilleure.IntermediateRepresentation; +using System; +using System.Runtime.InteropServices; + +namespace ARMeilleure.Translation +{ + static class JitUnwindWindows + { + private const int MaxUnwindCodesArraySize = 9 + 10 * 2 + 3; + + private struct RuntimeFunction + { + public uint BeginAddress; + public uint EndAddress; + public uint UnwindData; + } + + private struct UnwindInfo + { + public byte VersionAndFlags; + public byte SizeOfProlog; + public byte CountOfUnwindCodes; + public byte FrameRegister; + public unsafe fixed ushort UnwindCodes[MaxUnwindCodesArraySize]; + } + + private enum UnwindOperation + { + PushNonvol = 0, + AllocLarge = 1, + AllocSmall = 2, + SetFpreg = 3, + SaveNonvol = 4, + SaveNonvolFar = 5, + SaveXmm128 = 8, + SaveXmm128Far = 9, + PushMachframe = 10 + } + + private unsafe delegate RuntimeFunction* GetRuntimeFunctionCallback(ulong controlPc, IntPtr context); + + [DllImport("kernel32.dll", CharSet = CharSet.Unicode)] + private static unsafe extern bool RtlInstallFunctionTableCallback( + ulong tableIdentifier, + ulong baseAddress, + uint length, + GetRuntimeFunctionCallback callback, + IntPtr context, + string outOfProcessCallbackDll); + + private static GetRuntimeFunctionCallback _getRuntimeFunctionCallback; + + private static int _sizeOfRuntimeFunction; + + private unsafe static RuntimeFunction* _runtimeFunction; + + private unsafe static UnwindInfo* _unwindInfo; + + public static void InstallFunctionTableHandler(IntPtr codeCachePointer, uint codeCacheLength) + { + ulong codeCachePtr = (ulong)codeCachePointer.ToInt64(); + + _sizeOfRuntimeFunction = Marshal.SizeOf(); + + bool result; + + unsafe + { + _runtimeFunction = (RuntimeFunction*)codeCachePointer; + + _unwindInfo = (UnwindInfo*)(codeCachePointer + _sizeOfRuntimeFunction); + + _getRuntimeFunctionCallback = new GetRuntimeFunctionCallback(FunctionTableHandler); + + result = RtlInstallFunctionTableCallback( + codeCachePtr | 3, + codeCachePtr, + codeCacheLength, + _getRuntimeFunctionCallback, + codeCachePointer, + null); + } + + if (!result) + { + throw new InvalidOperationException("Failure installing function table callback."); + } + } + + private static unsafe RuntimeFunction* FunctionTableHandler(ulong controlPc, IntPtr context) + { + int offset = (int)((long)controlPc - context.ToInt64()); + + if (!JitCache.TryFind(offset, out JitCacheEntry funcEntry)) + { + // Not found. + return null; + } + + var unwindInfo = funcEntry.UnwindInfo; + + int codeIndex = 0; + + int spOffset = unwindInfo.FixedAllocSize; + + foreach (var entry in unwindInfo.PushEntries) + { + if (entry.Type == RegisterType.Vector) + { + spOffset -= 16; + } + } + + for (int index = unwindInfo.PushEntries.Length - 1; index >= 0; index--) + { + var entry = unwindInfo.PushEntries[index]; + + if (entry.Type == RegisterType.Vector) + { + ushort uwop = PackUwop(UnwindOperation.SaveXmm128, entry.StreamEndOffset, entry.Index); + + _unwindInfo->UnwindCodes[codeIndex++] = uwop; + _unwindInfo->UnwindCodes[codeIndex++] = (ushort)spOffset; + + spOffset += 16; + } + } + + _unwindInfo->UnwindCodes[0] = PackUwop(UnwindOperation.AllocLarge, unwindInfo.PrologueSize, 1); + _unwindInfo->UnwindCodes[1] = (ushort)(unwindInfo.FixedAllocSize >> 0); + _unwindInfo->UnwindCodes[2] = (ushort)(unwindInfo.FixedAllocSize >> 16); + + codeIndex += 3; + + for (int index = unwindInfo.PushEntries.Length - 1; index >= 0; index--) + { + var entry = unwindInfo.PushEntries[index]; + + if (entry.Type == RegisterType.Integer) + { + ushort uwop = PackUwop(UnwindOperation.PushNonvol, entry.StreamEndOffset, entry.Index); + + _unwindInfo->UnwindCodes[codeIndex++] = uwop; + } + } + + _unwindInfo->VersionAndFlags = 1; + _unwindInfo->SizeOfProlog = (byte)unwindInfo.PrologueSize; + _unwindInfo->CountOfUnwindCodes = (byte)codeIndex; + _unwindInfo->FrameRegister = 0; + + _runtimeFunction->BeginAddress = (uint)funcEntry.Offset; + _runtimeFunction->EndAddress = (uint)(funcEntry.Offset + funcEntry.Size); + _runtimeFunction->UnwindData = (uint)_sizeOfRuntimeFunction; + + return _runtimeFunction; + } + + private static ushort PackUwop(UnwindOperation uwop, int prologOffset, int opInfo) + { + return (ushort)(prologOffset | ((int)uwop << 8) | (opInfo << 12)); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Translation/PriorityQueue.cs b/ARMeilleure/Translation/PriorityQueue.cs new file mode 100644 index 000000000..ab593dc07 --- /dev/null +++ b/ARMeilleure/Translation/PriorityQueue.cs @@ -0,0 +1,39 @@ +using System.Collections.Concurrent; + +namespace ARMeilleure.Translation +{ + class PriorityQueue + { + private ConcurrentQueue[] _queues; + + public PriorityQueue(int priorities) + { + _queues = new ConcurrentQueue[priorities]; + + for (int index = 0; index < priorities; index++) + { + _queues[index] = new ConcurrentQueue(); + } + } + + public void Enqueue(int priority, T value) + { + _queues[priority].Enqueue(value); + } + + public bool TryDequeue(out T value) + { + for (int index = 0; index < _queues.Length; index++) + { + if (_queues[index].TryDequeue(out value)) + { + return true; + } + } + + value = default(T); + + return false; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Translation/RegisterToLocal.cs b/ARMeilleure/Translation/RegisterToLocal.cs new file mode 100644 index 000000000..aa9180182 --- /dev/null +++ b/ARMeilleure/Translation/RegisterToLocal.cs @@ -0,0 +1,52 @@ +using ARMeilleure.IntermediateRepresentation; +using System.Collections.Generic; + +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Translation +{ + static class RegisterToLocal + { + public static void Rename(ControlFlowGraph cfg) + { + Dictionary registerToLocalMap = new Dictionary(); + + Operand GetLocal(Operand op) + { + Register register = op.GetRegister(); + + if (!registerToLocalMap.TryGetValue(register, out Operand local)) + { + local = Local(op.Type); + + registerToLocalMap.Add(register, local); + } + + return local; + } + + foreach (BasicBlock block in cfg.Blocks) + { + foreach (Node node in block.Operations) + { + Operand dest = node.Destination; + + if (dest != null && dest.Kind == OperandKind.Register) + { + node.Destination = GetLocal(dest); + } + + for (int index = 0; index < node.SourcesCount; index++) + { + Operand source = node.GetSource(index); + + if (source.Kind == OperandKind.Register) + { + node.SetSource(index, GetLocal(source)); + } + } + } + } + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Translation/RegisterUsage.cs b/ARMeilleure/Translation/RegisterUsage.cs new file mode 100644 index 000000000..4164786b9 --- /dev/null +++ b/ARMeilleure/Translation/RegisterUsage.cs @@ -0,0 +1,413 @@ +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using System; + +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Translation +{ + static class RegisterUsage + { + private const long CallerSavedIntRegistersMask = 0x7fL << 9; + private const long PStateNzcvFlagsMask = 0xfL << 60; + + private const long CallerSavedVecRegistersMask = 0xffffL << 16; + + private const int RegsCount = 32; + private const int RegsMask = RegsCount - 1; + + private struct RegisterMask : IEquatable + { + public long IntMask { get; set; } + public long VecMask { get; set; } + + public RegisterMask(long intMask, long vecMask) + { + IntMask = intMask; + VecMask = vecMask; + } + + public static RegisterMask operator &(RegisterMask x, RegisterMask y) + { + return new RegisterMask(x.IntMask & y.IntMask, x.VecMask & y.VecMask); + } + + public static RegisterMask operator |(RegisterMask x, RegisterMask y) + { + return new RegisterMask(x.IntMask | y.IntMask, x.VecMask | y.VecMask); + } + + public static RegisterMask operator ~(RegisterMask x) + { + return new RegisterMask(~x.IntMask, ~x.VecMask); + } + + public static bool operator ==(RegisterMask x, RegisterMask y) + { + return x.Equals(y); + } + + public static bool operator !=(RegisterMask x, RegisterMask y) + { + return !x.Equals(y); + } + + public override bool Equals(object obj) + { + return obj is RegisterMask regMask && Equals(regMask); + } + + public bool Equals(RegisterMask other) + { + return IntMask == other.IntMask && VecMask == other.VecMask; + } + + public override int GetHashCode() + { + return HashCode.Combine(IntMask, VecMask); + } + } + + public static void RunPass(ControlFlowGraph cfg, bool isCompleteFunction) + { + // Compute local register inputs and outputs used inside blocks. + RegisterMask[] localInputs = new RegisterMask[cfg.Blocks.Count]; + RegisterMask[] localOutputs = new RegisterMask[cfg.Blocks.Count]; + + foreach (BasicBlock block in cfg.Blocks) + { + foreach (Node node in block.Operations) + { + Operation operation = node as Operation; + + for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++) + { + Operand source = operation.GetSource(srcIndex); + + if (source.Kind != OperandKind.Register) + { + continue; + } + + Register register = source.GetRegister(); + + localInputs[block.Index] |= GetMask(register) & ~localOutputs[block.Index]; + } + + if (operation.Destination != null && operation.Destination.Kind == OperandKind.Register) + { + localOutputs[block.Index] |= GetMask(operation.Destination.GetRegister()); + } + } + } + + // Compute global register inputs and outputs used across blocks. + RegisterMask[] globalCmnOutputs = new RegisterMask[cfg.Blocks.Count]; + + RegisterMask[] globalInputs = new RegisterMask[cfg.Blocks.Count]; + RegisterMask[] globalOutputs = new RegisterMask[cfg.Blocks.Count]; + + bool modified; + + bool firstPass = true; + + do + { + modified = false; + + // Compute register outputs. + for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--) + { + BasicBlock block = cfg.PostOrderBlocks[index]; + + if (block.Predecessors.Count != 0 && !HasContextLoad(block)) + { + BasicBlock predecessor = block.Predecessors[0]; + + RegisterMask cmnOutputs = localOutputs[predecessor.Index] | globalCmnOutputs[predecessor.Index]; + + RegisterMask outputs = globalOutputs[predecessor.Index]; + + for (int pIndex = 1; pIndex < block.Predecessors.Count; pIndex++) + { + predecessor = block.Predecessors[pIndex]; + + cmnOutputs &= localOutputs[predecessor.Index] | globalCmnOutputs[predecessor.Index]; + + outputs |= globalOutputs[predecessor.Index]; + } + + globalInputs[block.Index] |= outputs & ~cmnOutputs; + + if (!firstPass) + { + cmnOutputs &= globalCmnOutputs[block.Index]; + } + + if (Exchange(globalCmnOutputs, block.Index, cmnOutputs)) + { + modified = true; + } + + outputs |= localOutputs[block.Index]; + + if (Exchange(globalOutputs, block.Index, globalOutputs[block.Index] | outputs)) + { + modified = true; + } + } + else if (Exchange(globalOutputs, block.Index, localOutputs[block.Index])) + { + modified = true; + } + } + + // Compute register inputs. + for (int index = 0; index < cfg.PostOrderBlocks.Length; index++) + { + BasicBlock block = cfg.PostOrderBlocks[index]; + + RegisterMask inputs = localInputs[block.Index]; + + if (block.Next != null) + { + inputs |= globalInputs[block.Next.Index]; + } + + if (block.Branch != null) + { + inputs |= globalInputs[block.Branch.Index]; + } + + inputs &= ~globalCmnOutputs[block.Index]; + + if (Exchange(globalInputs, block.Index, globalInputs[block.Index] | inputs)) + { + modified = true; + } + } + + firstPass = false; + } + while (modified); + + // Insert load and store context instructions where needed. + foreach (BasicBlock block in cfg.Blocks) + { + bool hasContextLoad = HasContextLoad(block); + + if (hasContextLoad) + { + block.Operations.RemoveFirst(); + } + + // The only block without any predecessor should be the entry block. + // It always needs a context load as it is the first block to run. + if (block.Predecessors.Count == 0 || hasContextLoad) + { + LoadLocals(block, globalInputs[block.Index].VecMask, RegisterType.Vector); + LoadLocals(block, globalInputs[block.Index].IntMask, RegisterType.Integer); + } + + bool hasContextStore = HasContextStore(block); + + if (hasContextStore) + { + block.Operations.RemoveLast(); + } + + if (EndsWithReturn(block) || hasContextStore) + { + StoreLocals(block, globalOutputs[block.Index].IntMask, RegisterType.Integer, isCompleteFunction); + StoreLocals(block, globalOutputs[block.Index].VecMask, RegisterType.Vector, isCompleteFunction); + } + } + } + + private static bool HasContextLoad(BasicBlock block) + { + return StartsWith(block, Instruction.LoadFromContext) && block.Operations.First.Value.SourcesCount == 0; + } + + private static bool HasContextStore(BasicBlock block) + { + return EndsWith(block, Instruction.StoreToContext) && block.GetLastOp().SourcesCount == 0; + } + + private static bool StartsWith(BasicBlock block, Instruction inst) + { + if (block.Operations.Count == 0) + { + return false; + } + + return block.Operations.First.Value is Operation operation && operation.Instruction == inst; + } + + private static bool EndsWith(BasicBlock block, Instruction inst) + { + if (block.Operations.Count == 0) + { + return false; + } + + return block.Operations.Last.Value is Operation operation && operation.Instruction == inst; + } + + private static RegisterMask GetMask(Register register) + { + long intMask = 0; + long vecMask = 0; + + switch (register.Type) + { + case RegisterType.Flag: intMask = (1L << RegsCount) << register.Index; break; + case RegisterType.Integer: intMask = 1L << register.Index; break; + case RegisterType.Vector: vecMask = 1L << register.Index; break; + } + + return new RegisterMask(intMask, vecMask); + } + + private static bool Exchange(RegisterMask[] masks, int blkIndex, RegisterMask value) + { + RegisterMask oldValue = masks[blkIndex]; + + masks[blkIndex] = value; + + return oldValue != value; + } + + private static void LoadLocals(BasicBlock block, long inputs, RegisterType baseType) + { + Operand arg0 = Local(OperandType.I64); + + for (int bit = 63; bit >= 0; bit--) + { + long mask = 1L << bit; + + if ((inputs & mask) == 0) + { + continue; + } + + Operand dest = GetRegFromBit(bit, baseType); + + long offset = NativeContext.GetRegisterOffset(dest.GetRegister()); + + Operand addr = Local(OperandType.I64); + + Operation loadOp = new Operation(Instruction.Load, dest, addr); + + block.Operations.AddFirst(loadOp); + + Operation calcOffsOp = new Operation(Instruction.Add, addr, arg0, Const(offset)); + + block.Operations.AddFirst(calcOffsOp); + } + + Operation loadArg0 = new Operation(Instruction.LoadArgument, arg0, Const(0)); + + block.Operations.AddFirst(loadArg0); + } + + private static void StoreLocals(BasicBlock block, long outputs, RegisterType baseType, bool isCompleteFunction) + { + if (Optimizations.AssumeStrictAbiCompliance && isCompleteFunction) + { + if (baseType == RegisterType.Integer || baseType == RegisterType.Flag) + { + outputs = ClearCallerSavedIntRegs(outputs); + } + else /* if (baseType == RegisterType.Vector) */ + { + outputs = ClearCallerSavedVecRegs(outputs); + } + } + + Operand arg0 = Local(OperandType.I64); + + Operation loadArg0 = new Operation(Instruction.LoadArgument, arg0, Const(0)); + + block.Append(loadArg0); + + for (int bit = 0; bit < 64; bit++) + { + long mask = 1L << bit; + + if ((outputs & mask) == 0) + { + continue; + } + + Operand source = GetRegFromBit(bit, baseType); + + long offset = NativeContext.GetRegisterOffset(source.GetRegister()); + + Operand addr = Local(OperandType.I64); + + Operation calcOffsOp = new Operation(Instruction.Add, addr, arg0, Const(offset)); + + block.Append(calcOffsOp); + + Operation storeOp = new Operation(Instruction.Store, null, addr, source); + + block.Append(storeOp); + } + } + + private static Operand GetRegFromBit(int bit, RegisterType baseType) + { + if (bit < RegsCount) + { + return new Operand(bit, baseType, GetOperandType(baseType)); + } + else if (baseType == RegisterType.Integer) + { + return new Operand(bit & RegsMask, RegisterType.Flag, OperandType.I32); + } + else + { + throw new ArgumentOutOfRangeException(nameof(bit)); + } + } + + private static OperandType GetOperandType(RegisterType type) + { + switch (type) + { + case RegisterType.Flag: return OperandType.I32; + case RegisterType.Integer: return OperandType.I64; + case RegisterType.Vector: return OperandType.V128; + } + + throw new ArgumentException($"Invalid register type \"{type}\"."); + } + + private static bool EndsWithReturn(BasicBlock block) + { + if (!(block.GetLastOp() is Operation operation)) + { + return false; + } + + return operation.Instruction == Instruction.Return; + } + + private static long ClearCallerSavedIntRegs(long mask) + { + // TODO: ARM32 support. + mask &= ~(CallerSavedIntRegistersMask | PStateNzcvFlagsMask); + + return mask; + } + + private static long ClearCallerSavedVecRegs(long mask) + { + // TODO: ARM32 support. + mask &= ~CallerSavedVecRegistersMask; + + return mask; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Translation/SsaConstruction.cs b/ARMeilleure/Translation/SsaConstruction.cs new file mode 100644 index 000000000..ccf525915 --- /dev/null +++ b/ARMeilleure/Translation/SsaConstruction.cs @@ -0,0 +1,293 @@ +using ARMeilleure.Common; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using System.Collections.Generic; + +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Translation +{ + static partial class Ssa + { + private class DefMap + { + private Dictionary _map; + + private BitMap _phiMasks; + + public DefMap() + { + _map = new Dictionary(); + + _phiMasks = new BitMap(RegisterConsts.TotalCount); + } + + public bool TryAddOperand(Register reg, Operand operand) + { + return _map.TryAdd(reg, operand); + } + + public bool TryGetOperand(Register reg, out Operand operand) + { + return _map.TryGetValue(reg, out operand); + } + + public bool AddPhi(Register reg) + { + return _phiMasks.Set(GetIdFromRegister(reg)); + } + + public bool HasPhi(Register reg) + { + return _phiMasks.IsSet(GetIdFromRegister(reg)); + } + } + + public static void Construct(ControlFlowGraph cfg) + { + DefMap[] globalDefs = new DefMap[cfg.Blocks.Count]; + + foreach (BasicBlock block in cfg.Blocks) + { + globalDefs[block.Index] = new DefMap(); + } + + Queue dfPhiBlocks = new Queue(); + + // First pass, get all defs and locals uses. + foreach (BasicBlock block in cfg.Blocks) + { + Operand[] localDefs = new Operand[RegisterConsts.TotalCount]; + + LinkedListNode node = block.Operations.First; + + Operand RenameLocal(Operand operand) + { + if (operand != null && operand.Kind == OperandKind.Register) + { + Operand local = localDefs[GetIdFromRegister(operand.GetRegister())]; + + operand = local ?? operand; + } + + return operand; + } + + while (node != null) + { + if (node.Value is Operation operation) + { + for (int index = 0; index < operation.SourcesCount; index++) + { + operation.SetSource(index, RenameLocal(operation.GetSource(index))); + } + + Operand dest = operation.Destination; + + if (dest != null && dest.Kind == OperandKind.Register) + { + Operand local = Local(dest.Type); + + localDefs[GetIdFromRegister(dest.GetRegister())] = local; + + operation.Destination = local; + } + } + + node = node.Next; + } + + for (int index = 0; index < RegisterConsts.TotalCount; index++) + { + Operand local = localDefs[index]; + + if (local == null) + { + continue; + } + + Register reg = GetRegisterFromId(index); + + globalDefs[block.Index].TryAddOperand(reg, local); + + dfPhiBlocks.Enqueue(block); + + while (dfPhiBlocks.TryDequeue(out BasicBlock dfPhiBlock)) + { + foreach (BasicBlock domFrontier in dfPhiBlock.DominanceFrontiers) + { + if (globalDefs[domFrontier.Index].AddPhi(reg)) + { + dfPhiBlocks.Enqueue(domFrontier); + } + } + } + } + } + + // Second pass, rename variables with definitions on different blocks. + foreach (BasicBlock block in cfg.Blocks) + { + Operand[] localDefs = new Operand[RegisterConsts.TotalCount]; + + LinkedListNode node = block.Operations.First; + + Operand RenameGlobal(Operand operand) + { + if (operand != null && operand.Kind == OperandKind.Register) + { + int key = GetIdFromRegister(operand.GetRegister()); + + Operand local = localDefs[key]; + + if (local == null) + { + local = FindDef(globalDefs, block, operand); + + localDefs[key] = local; + } + + operand = local; + } + + return operand; + } + + while (node != null) + { + if (node.Value is Operation operation) + { + for (int index = 0; index < operation.SourcesCount; index++) + { + operation.SetSource(index, RenameGlobal(operation.GetSource(index))); + } + } + + node = node.Next; + } + } + } + + private static Operand FindDef(DefMap[] globalDefs, BasicBlock current, Operand operand) + { + if (globalDefs[current.Index].HasPhi(operand.GetRegister())) + { + return InsertPhi(globalDefs, current, operand); + } + + if (current != current.ImmediateDominator) + { + return FindDefOnPred(globalDefs, current.ImmediateDominator, operand); + } + + return Undef(); + } + + private static Operand FindDefOnPred(DefMap[] globalDefs, BasicBlock current, Operand operand) + { + BasicBlock previous; + + do + { + DefMap defMap = globalDefs[current.Index]; + + Register reg = operand.GetRegister(); + + if (defMap.TryGetOperand(reg, out Operand lastDef)) + { + return lastDef; + } + + if (defMap.HasPhi(reg)) + { + return InsertPhi(globalDefs, current, operand); + } + + previous = current; + current = current.ImmediateDominator; + } + while (previous != current); + + return Undef(); + } + + private static Operand InsertPhi(DefMap[] globalDefs, BasicBlock block, Operand operand) + { + // This block has a Phi that has not been materialized yet, but that + // would define a new version of the variable we're looking for. We need + // to materialize the Phi, add all the block/operand pairs into the Phi, and + // then use the definition from that Phi. + Operand local = Local(operand.Type); + + PhiNode phi = new PhiNode(local, block.Predecessors.Count); + + AddPhi(block, phi); + + globalDefs[block.Index].TryAddOperand(operand.GetRegister(), local); + + for (int index = 0; index < block.Predecessors.Count; index++) + { + BasicBlock predecessor = block.Predecessors[index]; + + phi.SetBlock(index, predecessor); + phi.SetSource(index, FindDefOnPred(globalDefs, predecessor, operand)); + } + + return local; + } + + private static void AddPhi(BasicBlock block, PhiNode phi) + { + LinkedListNode node = block.Operations.First; + + if (node != null) + { + while (node.Next?.Value is PhiNode) + { + node = node.Next; + } + } + + if (node?.Value is PhiNode) + { + block.Operations.AddAfter(node, phi); + } + else + { + block.Operations.AddFirst(phi); + } + } + + private static int GetIdFromRegister(Register reg) + { + if (reg.Type == RegisterType.Integer) + { + return reg.Index; + } + else if (reg.Type == RegisterType.Vector) + { + return RegisterConsts.IntRegsCount + reg.Index; + } + else /* if (reg.Type == RegisterType.Flag) */ + { + return RegisterConsts.IntAndVecRegsCount + reg.Index; + } + } + + private static Register GetRegisterFromId(int id) + { + if (id < RegisterConsts.IntRegsCount) + { + return new Register(id, RegisterType.Integer); + } + else if (id < RegisterConsts.IntAndVecRegsCount) + { + return new Register(id - RegisterConsts.IntRegsCount, RegisterType.Vector); + } + else /* if (id < RegisterConsts.TotalCount) */ + { + return new Register(id - RegisterConsts.IntAndVecRegsCount, RegisterType.Flag); + } + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Translation/SsaDeconstruction.cs b/ARMeilleure/Translation/SsaDeconstruction.cs new file mode 100644 index 000000000..2ba78bdf4 --- /dev/null +++ b/ARMeilleure/Translation/SsaDeconstruction.cs @@ -0,0 +1,46 @@ +using ARMeilleure.IntermediateRepresentation; +using System.Collections.Generic; + +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Translation +{ + static partial class Ssa + { + public static void Deconstruct(ControlFlowGraph cfg) + { + foreach (BasicBlock block in cfg.Blocks) + { + LinkedListNode node = block.Operations.First; + + while (node?.Value is PhiNode phi) + { + LinkedListNode nextNode = node.Next; + + Operand local = Local(phi.Destination.Type); + + for (int index = 0; index < phi.SourcesCount; index++) + { + BasicBlock predecessor = phi.GetBlock(index); + + Operand source = phi.GetSource(index); + + predecessor.Append(new Operation(Instruction.Copy, local, source)); + + phi.SetSource(index, null); + } + + Operation copyOp = new Operation(Instruction.Copy, phi.Destination, local); + + block.Operations.AddBefore(node, copyOp); + + phi.Destination = null; + + block.Operations.Remove(node); + + node = nextNode; + } + } + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Translation/TranslatedFunction.cs b/ARMeilleure/Translation/TranslatedFunction.cs new file mode 100644 index 000000000..06069cf8f --- /dev/null +++ b/ARMeilleure/Translation/TranslatedFunction.cs @@ -0,0 +1,30 @@ +using System.Threading; + +namespace ARMeilleure.Translation +{ + class TranslatedFunction + { + private const int MinCallsForRejit = 100; + + private GuestFunction _func; + + private bool _rejit; + private int _callCount; + + public TranslatedFunction(GuestFunction func, bool rejit) + { + _func = func; + _rejit = rejit; + } + + public ulong Execute(State.ExecutionContext context) + { + return _func(context.NativeContextPtr); + } + + public bool ShouldRejit() + { + return _rejit && Interlocked.Increment(ref _callCount) == MinCallsForRejit; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Translation/Translator.cs b/ARMeilleure/Translation/Translator.cs new file mode 100644 index 000000000..6a7451214 --- /dev/null +++ b/ARMeilleure/Translation/Translator.cs @@ -0,0 +1,253 @@ +using ARMeilleure.Decoders; +using ARMeilleure.Diagnostics; +using ARMeilleure.Instructions; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Memory; +using ARMeilleure.State; +using System; +using System.Collections.Concurrent; +using System.Threading; + +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Translation +{ + public class Translator : ITranslator + { + private const ulong CallFlag = InstEmitFlowHelper.CallFlag; + + private MemoryManager _memory; + + private ConcurrentDictionary _funcs; + + private PriorityQueue _backgroundQueue; + + private AutoResetEvent _backgroundTranslatorEvent; + + private volatile int _threadCount; + + public Translator(MemoryManager memory) + { + _memory = memory; + + _funcs = new ConcurrentDictionary(); + + _backgroundQueue = new PriorityQueue(2); + + _backgroundTranslatorEvent = new AutoResetEvent(false); + } + + private void TranslateQueuedSubs() + { + while (_threadCount != 0) + { + if (_backgroundQueue.TryDequeue(out ulong address)) + { + TranslatedFunction func = Translate(address, ExecutionMode.Aarch64, highCq: true); + + _funcs.AddOrUpdate(address, func, (key, oldFunc) => func); + } + else + { + _backgroundTranslatorEvent.WaitOne(); + } + } + } + + public void Execute(IExecutionContext ctx, ulong address) + { + State.ExecutionContext context = (State.ExecutionContext)ctx; + + if (Interlocked.Increment(ref _threadCount) == 1) + { + Thread backgroundTranslatorThread = new Thread(TranslateQueuedSubs); + + backgroundTranslatorThread.Priority = ThreadPriority.Lowest; + backgroundTranslatorThread.Start(); + } + + Statistics.InitializeTimer(); + + NativeInterface.RegisterThread(context, _memory); + + do + { + address = ExecuteSingle(context, address); + } + while (context.Running && (address & ~1UL) != 0); + + NativeInterface.UnregisterThread(); + + if (Interlocked.Decrement(ref _threadCount) == 0) + { + _backgroundTranslatorEvent.Set(); + } + } + + public ulong ExecuteSingle(State.ExecutionContext context, ulong address) + { + TranslatedFunction func = GetOrTranslate(address, context.ExecutionMode); + + Statistics.StartTimer(); + + ulong nextAddr = func.Execute(context); + + Statistics.StopTimer(address); + + return nextAddr; + } + + private TranslatedFunction GetOrTranslate(ulong address, ExecutionMode mode) + { + // TODO: Investigate how we should handle code at unaligned addresses. + // Currently, those low bits are used to store special flags. + bool isCallTarget = (address & CallFlag) != 0; + + address &= ~CallFlag; + + if (!_funcs.TryGetValue(address, out TranslatedFunction func)) + { + func = Translate(address, mode, highCq: false); + + _funcs.TryAdd(address, func); + } + else if (isCallTarget && func.ShouldRejit()) + { + _backgroundQueue.Enqueue(0, address); + + _backgroundTranslatorEvent.Set(); + } + + return func; + } + + private TranslatedFunction Translate(ulong address, ExecutionMode mode, bool highCq) + { + ArmEmitterContext context = new ArmEmitterContext(_memory, Aarch32Mode.User); + + Logger.StartPass(PassName.Decoding); + + Block[] blocks = highCq + ? Decoder.DecodeFunction (_memory, address, mode) + : Decoder.DecodeBasicBlock(_memory, address, mode); + + Logger.EndPass(PassName.Decoding); + + Logger.StartPass(PassName.Translation); + + EmitSynchronization(context); + + if (blocks[0].Address != address) + { + context.Branch(context.GetLabel(address)); + } + + ControlFlowGraph cfg = EmitAndGetCFG(context, blocks); + + Logger.EndPass(PassName.Translation); + + Logger.StartPass(PassName.RegisterUsage); + + RegisterUsage.RunPass(cfg, isCompleteFunction: false); + + Logger.EndPass(PassName.RegisterUsage); + + OperandType[] argTypes = new OperandType[] { OperandType.I64 }; + + CompilerOptions options = highCq + ? CompilerOptions.HighCq + : CompilerOptions.None; + + GuestFunction func = Compiler.Compile(cfg, argTypes, OperandType.I64, options); + + return new TranslatedFunction(func, rejit: !highCq); + } + + private static ControlFlowGraph EmitAndGetCFG(ArmEmitterContext context, Block[] blocks) + { + for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++) + { + Block block = blocks[blkIndex]; + + context.CurrBlock = block; + + context.MarkLabel(context.GetLabel(block.Address)); + + for (int opcIndex = 0; opcIndex < block.OpCodes.Count; opcIndex++) + { + OpCode opCode = block.OpCodes[opcIndex]; + + context.CurrOp = opCode; + + bool isLastOp = opcIndex == block.OpCodes.Count - 1; + + if (isLastOp && block.Branch != null && block.Branch.Address <= block.Address) + { + EmitSynchronization(context); + } + + Operand lblPredicateSkip = null; + + if (opCode is OpCode32 op && op.Cond < Condition.Al) + { + lblPredicateSkip = Label(); + + InstEmitFlowHelper.EmitCondBranch(context, lblPredicateSkip, op.Cond.Invert()); + } + + if (opCode.Instruction.Emitter != null) + { + opCode.Instruction.Emitter(context); + } + else + { + throw new InvalidOperationException($"Invalid instruction \"{opCode.Instruction.Name}\"."); + } + + if (lblPredicateSkip != null) + { + context.MarkLabel(lblPredicateSkip); + + // If this is the last op on the block, and there's no "next" block + // after this one, then we have to return right now, with the address + // of the next instruction to be executed (in the case that the condition + // is false, and the branch was not taken, as all basic blocks should end + // with some kind of branch). + if (isLastOp && block.Next == null) + { + context.Return(Const(opCode.Address + (ulong)opCode.OpCodeSizeInBytes)); + } + } + } + } + + return context.GetControlFlowGraph(); + } + + private static void EmitSynchronization(EmitterContext context) + { + long countOffs = NativeContext.GetCounterOffset(); + + Operand countAddr = context.Add(context.LoadArgument(OperandType.I64, 0), Const(countOffs)); + + Operand count = context.Load(OperandType.I32, countAddr); + + Operand lblNonZero = Label(); + Operand lblExit = Label(); + + context.BranchIfTrue(lblNonZero, count); + + context.Call(new _Void(NativeInterface.CheckSynchronization)); + + context.Branch(lblExit); + + context.MarkLabel(lblNonZero); + + count = context.Subtract(count, Const(1)); + + context.Store(countAddr, count); + + context.MarkLabel(lblExit); + } + } +} \ No newline at end of file diff --git a/ChocolArm64/ChocolArm64.csproj b/ChocolArm64/ChocolArm64.csproj index ea98003f9..cccdd94df 100644 --- a/ChocolArm64/ChocolArm64.csproj +++ b/ChocolArm64/ChocolArm64.csproj @@ -2,7 +2,7 @@ netcoreapp2.1 - win10-x64;osx-x64;linux-x64 + win-x64;osx-x64;linux-x64 Debug;Release;Profile Debug;Profile Release @@ -33,6 +33,7 @@ + diff --git a/ChocolArm64/CpuThread.cs b/ChocolArm64/CpuThread.cs deleted file mode 100644 index ad1fd6f3c..000000000 --- a/ChocolArm64/CpuThread.cs +++ /dev/null @@ -1,66 +0,0 @@ -using ChocolArm64.Memory; -using ChocolArm64.State; -using ChocolArm64.Translation; -using System; -using System.Threading; - -namespace ChocolArm64 -{ - public class CpuThread - { - public CpuThreadState ThreadState { get; private set; } - public MemoryManager Memory { get; private set; } - - private Translator _translator; - - public Thread Work; - - public event EventHandler WorkFinished; - - private int _isExecuting; - - public CpuThread(Translator translator, MemoryManager memory, long entrypoint) - { - _translator = translator; - Memory = memory; - - ThreadState = new CpuThreadState(); - - ThreadState.Running = true; - - Work = new Thread(delegate() - { - translator.ExecuteSubroutine(this, entrypoint); - - WorkFinished?.Invoke(this, EventArgs.Empty); - }); - } - - public bool Execute() - { - if (Interlocked.Exchange(ref _isExecuting, 1) == 1) - { - return false; - } - - Work.Start(); - - return true; - } - - public void StopExecution() - { - ThreadState.Running = false; - } - - public void RequestInterrupt() - { - ThreadState.RequestInterrupt(); - } - - public bool IsCurrentThread() - { - return Thread.CurrentThread == Work; - } - } -} \ No newline at end of file diff --git a/ChocolArm64/Instructions/InstEmitMemoryHelper.cs b/ChocolArm64/Instructions/InstEmitMemoryHelper.cs index dbb588867..08c8265b5 100644 --- a/ChocolArm64/Instructions/InstEmitMemoryHelper.cs +++ b/ChocolArm64/Instructions/InstEmitMemoryHelper.cs @@ -462,11 +462,11 @@ namespace ChocolArm64.Instructions switch (size) { - case 0: fallbackMethodName = nameof(MemoryManager.WriteVector8); break; - case 1: fallbackMethodName = nameof(MemoryManager.WriteVector16); break; - case 2: fallbackMethodName = nameof(MemoryManager.WriteVector32); break; - case 3: fallbackMethodName = nameof(MemoryManager.WriteVector64); break; - case 4: fallbackMethodName = nameof(MemoryManager.WriteVector128); break; + case 0: fallbackMethodName = nameof(MemoryManager.WriteVector8); break; + case 1: fallbackMethodName = nameof(MemoryManager.WriteVector16); break; + case 2: fallbackMethodName = nameof(MemoryManager.WriteVector32); break; + case 3: fallbackMethodName = nameof(MemoryManager.WriteVector64); break; + case 4: fallbackMethodName = nameof(MemoryManager.WriteVector128Internal); break; } context.EmitCall(typeof(MemoryManager), fallbackMethodName); diff --git a/ChocolArm64/Instructions/InstEmitSystem.cs b/ChocolArm64/Instructions/InstEmitSystem.cs index d0d60b9d5..ac264de92 100644 --- a/ChocolArm64/Instructions/InstEmitSystem.cs +++ b/ChocolArm64/Instructions/InstEmitSystem.cs @@ -31,8 +31,8 @@ namespace ChocolArm64.Instructions { case 0b11_011_0000_0000_001: propName = nameof(CpuThreadState.CtrEl0); break; case 0b11_011_0000_0000_111: propName = nameof(CpuThreadState.DczidEl0); break; - case 0b11_011_0100_0100_000: propName = nameof(CpuThreadState.Fpcr); break; - case 0b11_011_0100_0100_001: propName = nameof(CpuThreadState.Fpsr); break; + case 0b11_011_0100_0100_000: propName = nameof(CpuThreadState.CFpcr); break; + case 0b11_011_0100_0100_001: propName = nameof(CpuThreadState.CFpsr); break; case 0b11_011_1101_0000_010: propName = nameof(CpuThreadState.TpidrEl0); break; case 0b11_011_1101_0000_011: propName = nameof(CpuThreadState.Tpidr); break; case 0b11_011_1110_0000_000: propName = nameof(CpuThreadState.CntfrqEl0); break; @@ -65,8 +65,8 @@ namespace ChocolArm64.Instructions switch (GetPackedId(op)) { - case 0b11_011_0100_0100_000: propName = nameof(CpuThreadState.Fpcr); break; - case 0b11_011_0100_0100_001: propName = nameof(CpuThreadState.Fpsr); break; + case 0b11_011_0100_0100_000: propName = nameof(CpuThreadState.CFpcr); break; + case 0b11_011_0100_0100_001: propName = nameof(CpuThreadState.CFpsr); break; case 0b11_011_1101_0000_010: propName = nameof(CpuThreadState.TpidrEl0); break; default: throw new NotImplementedException($"Unknown MSR at {op.Position:x16}"); diff --git a/ChocolArm64/Instructions/SoftFloat.cs b/ChocolArm64/Instructions/SoftFloat.cs index 3521ad152..e78932cc4 100644 --- a/ChocolArm64/Instructions/SoftFloat.cs +++ b/ChocolArm64/Instructions/SoftFloat.cs @@ -82,7 +82,7 @@ namespace ChocolArm64.Instructions { public static float FPConvert(ushort valueBits, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat16_32.FPConvert: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat16_32.FPConvert: state.Fpcr = 0x{state.CFpcr:X8}"); double real = valueBits.FPUnpackCv(out FpType type, out bool sign, state); @@ -322,13 +322,13 @@ namespace ChocolArm64.Instructions { int enable = (int)exc + 8; - if ((state.Fpcr & (1 << enable)) != 0) + if ((state.CFpcr & (1 << enable)) != 0) { throw new NotImplementedException("Floating-point trap handling."); } else { - state.Fpsr |= 1 << (int)exc; + state.CFpsr |= 1 << (int)exc; } } } @@ -337,7 +337,7 @@ namespace ChocolArm64.Instructions { public static ushort FPConvert(float value, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32_16.FPConvert: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32_16.FPConvert: state.Fpcr = 0x{state.CFpcr:X8}"); double real = value.FPUnpackCv(out FpType type, out bool sign, out uint valueBits, state); @@ -609,13 +609,13 @@ namespace ChocolArm64.Instructions { int enable = (int)exc + 8; - if ((state.Fpcr & (1 << enable)) != 0) + if ((state.CFpcr & (1 << enable)) != 0) { throw new NotImplementedException("Floating-point trap handling."); } else { - state.Fpsr |= 1 << (int)exc; + state.CFpsr |= 1 << (int)exc; } } } @@ -624,7 +624,7 @@ namespace ChocolArm64.Instructions { public static float FPAdd(float value1, float value2, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPAdd: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPAdd: state.Fpcr = 0x{state.CFpcr:X8}"); value1 = value1.FPUnpack(out FpType type1, out bool sign1, out uint op1, state); value2 = value2.FPUnpack(out FpType type2, out bool sign2, out uint op2, state); @@ -672,7 +672,7 @@ namespace ChocolArm64.Instructions public static int FPCompare(float value1, float value2, bool signalNaNs, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPCompare: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPCompare: state.Fpcr = 0x{state.CFpcr:X8}"); value1 = value1.FPUnpack(out FpType type1, out bool sign1, out _, state); value2 = value2.FPUnpack(out FpType type2, out bool sign2, out _, state); @@ -709,7 +709,7 @@ namespace ChocolArm64.Instructions public static float FPCompareEQ(float value1, float value2, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPCompareEQ: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPCompareEQ: state.Fpcr = 0x{state.CFpcr:X8}"); value1 = value1.FPUnpack(out FpType type1, out _, out _, state); value2 = value2.FPUnpack(out FpType type2, out _, out _, state); @@ -735,7 +735,7 @@ namespace ChocolArm64.Instructions public static float FPCompareGE(float value1, float value2, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPCompareGE: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPCompareGE: state.Fpcr = 0x{state.CFpcr:X8}"); value1 = value1.FPUnpack(out FpType type1, out _, out _, state); value2 = value2.FPUnpack(out FpType type2, out _, out _, state); @@ -758,7 +758,7 @@ namespace ChocolArm64.Instructions public static float FPCompareGT(float value1, float value2, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPCompareGT: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPCompareGT: state.Fpcr = 0x{state.CFpcr:X8}"); value1 = value1.FPUnpack(out FpType type1, out _, out _, state); value2 = value2.FPUnpack(out FpType type2, out _, out _, state); @@ -782,7 +782,7 @@ namespace ChocolArm64.Instructions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static float FPCompareLE(float value1, float value2, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPCompareLE: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPCompareLE: state.Fpcr = 0x{state.CFpcr:X8}"); return FPCompareGE(value2, value1, state); } @@ -790,14 +790,14 @@ namespace ChocolArm64.Instructions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static float FPCompareLT(float value1, float value2, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPCompareLT: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPCompareLT: state.Fpcr = 0x{state.CFpcr:X8}"); return FPCompareGT(value2, value1, state); } public static float FPDiv(float value1, float value2, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPDiv: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPDiv: state.Fpcr = 0x{state.CFpcr:X8}"); value1 = value1.FPUnpack(out FpType type1, out bool sign1, out uint op1, state); value2 = value2.FPUnpack(out FpType type2, out bool sign2, out uint op2, state); @@ -846,7 +846,7 @@ namespace ChocolArm64.Instructions public static float FPMax(float value1, float value2, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPMax: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPMax: state.Fpcr = 0x{state.CFpcr:X8}"); value1 = value1.FPUnpack(out FpType type1, out bool sign1, out uint op1, state); value2 = value2.FPUnpack(out FpType type2, out bool sign2, out uint op2, state); @@ -899,7 +899,7 @@ namespace ChocolArm64.Instructions public static float FPMaxNum(float value1, float value2, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPMaxNum: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPMaxNum: state.Fpcr = 0x{state.CFpcr:X8}"); value1.FPUnpack(out FpType type1, out _, out _, state); value2.FPUnpack(out FpType type2, out _, out _, state); @@ -918,7 +918,7 @@ namespace ChocolArm64.Instructions public static float FPMin(float value1, float value2, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPMin: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPMin: state.Fpcr = 0x{state.CFpcr:X8}"); value1 = value1.FPUnpack(out FpType type1, out bool sign1, out uint op1, state); value2 = value2.FPUnpack(out FpType type2, out bool sign2, out uint op2, state); @@ -971,7 +971,7 @@ namespace ChocolArm64.Instructions public static float FPMinNum(float value1, float value2, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPMinNum: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPMinNum: state.Fpcr = 0x{state.CFpcr:X8}"); value1.FPUnpack(out FpType type1, out _, out _, state); value2.FPUnpack(out FpType type2, out _, out _, state); @@ -990,7 +990,7 @@ namespace ChocolArm64.Instructions public static float FPMul(float value1, float value2, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPMul: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPMul: state.Fpcr = 0x{state.CFpcr:X8}"); value1 = value1.FPUnpack(out FpType type1, out bool sign1, out uint op1, state); value2 = value2.FPUnpack(out FpType type2, out bool sign2, out uint op2, state); @@ -1038,7 +1038,7 @@ namespace ChocolArm64.Instructions float value2, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPMulAdd: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPMulAdd: state.Fpcr = 0x{state.CFpcr:X8}"); valueA = valueA.FPUnpack(out FpType typeA, out bool signA, out uint addend, state); value1 = value1.FPUnpack(out FpType type1, out bool sign1, out uint op1, state); @@ -1108,7 +1108,7 @@ namespace ChocolArm64.Instructions float value2, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPMulSub: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPMulSub: state.Fpcr = 0x{state.CFpcr:X8}"); value1 = value1.FPNeg(); @@ -1117,7 +1117,7 @@ namespace ChocolArm64.Instructions public static float FPMulX(float value1, float value2, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPMulX: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPMulX: state.Fpcr = 0x{state.CFpcr:X8}"); value1 = value1.FPUnpack(out FpType type1, out bool sign1, out uint op1, state); value2 = value2.FPUnpack(out FpType type2, out bool sign2, out uint op2, state); @@ -1159,7 +1159,7 @@ namespace ChocolArm64.Instructions public static float FPRecipEstimate(float value, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPRecipEstimate: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPRecipEstimate: state.Fpcr = 0x{state.CFpcr:X8}"); value.FPUnpack(out FpType type, out bool sign, out uint op, state); @@ -1248,7 +1248,7 @@ namespace ChocolArm64.Instructions public static float FPRecipStepFused(float value1, float value2, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPRecipStepFused: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPRecipStepFused: state.Fpcr = 0x{state.CFpcr:X8}"); value1 = value1.FPNeg(); @@ -1291,7 +1291,7 @@ namespace ChocolArm64.Instructions public static float FPRecpX(float value, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPRecpX: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPRecpX: state.Fpcr = 0x{state.CFpcr:X8}"); value.FPUnpack(out FpType type, out bool sign, out uint op, state); @@ -1315,7 +1315,7 @@ namespace ChocolArm64.Instructions public static float FPRSqrtEstimate(float value, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPRSqrtEstimate: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPRSqrtEstimate: state.Fpcr = 0x{state.CFpcr:X8}"); value.FPUnpack(out FpType type, out bool sign, out uint op, state); @@ -1380,7 +1380,7 @@ namespace ChocolArm64.Instructions public static float FPRSqrtStepFused(float value1, float value2, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPRSqrtStepFused: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPRSqrtStepFused: state.Fpcr = 0x{state.CFpcr:X8}"); value1 = value1.FPNeg(); @@ -1423,7 +1423,7 @@ namespace ChocolArm64.Instructions public static float FPSqrt(float value, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPSqrt: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPSqrt: state.Fpcr = 0x{state.CFpcr:X8}"); value = value.FPUnpack(out FpType type, out bool sign, out uint op, state); @@ -1464,7 +1464,7 @@ namespace ChocolArm64.Instructions public static float FPSub(float value1, float value2, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPSub: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPSub: state.Fpcr = 0x{state.CFpcr:X8}"); value1 = value1.FPUnpack(out FpType type1, out bool sign1, out uint op1, state); value2 = value2.FPUnpack(out FpType type2, out bool sign2, out uint op2, state); @@ -1693,13 +1693,13 @@ namespace ChocolArm64.Instructions { int enable = (int)exc + 8; - if ((state.Fpcr & (1 << enable)) != 0) + if ((state.CFpcr & (1 << enable)) != 0) { throw new NotImplementedException("Floating-point trap handling."); } else { - state.Fpsr |= 1 << (int)exc; + state.CFpsr |= 1 << (int)exc; } } } @@ -1708,7 +1708,7 @@ namespace ChocolArm64.Instructions { public static double FPAdd(double value1, double value2, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPAdd: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPAdd: state.Fpcr = 0x{state.CFpcr:X8}"); value1 = value1.FPUnpack(out FpType type1, out bool sign1, out ulong op1, state); value2 = value2.FPUnpack(out FpType type2, out bool sign2, out ulong op2, state); @@ -1756,7 +1756,7 @@ namespace ChocolArm64.Instructions public static int FPCompare(double value1, double value2, bool signalNaNs, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPCompare: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPCompare: state.Fpcr = 0x{state.CFpcr:X8}"); value1 = value1.FPUnpack(out FpType type1, out bool sign1, out _, state); value2 = value2.FPUnpack(out FpType type2, out bool sign2, out _, state); @@ -1793,7 +1793,7 @@ namespace ChocolArm64.Instructions public static double FPCompareEQ(double value1, double value2, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPCompareEQ: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPCompareEQ: state.Fpcr = 0x{state.CFpcr:X8}"); value1 = value1.FPUnpack(out FpType type1, out _, out _, state); value2 = value2.FPUnpack(out FpType type2, out _, out _, state); @@ -1819,7 +1819,7 @@ namespace ChocolArm64.Instructions public static double FPCompareGE(double value1, double value2, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPCompareGE: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPCompareGE: state.Fpcr = 0x{state.CFpcr:X8}"); value1 = value1.FPUnpack(out FpType type1, out _, out _, state); value2 = value2.FPUnpack(out FpType type2, out _, out _, state); @@ -1842,7 +1842,7 @@ namespace ChocolArm64.Instructions public static double FPCompareGT(double value1, double value2, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPCompareGT: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPCompareGT: state.Fpcr = 0x{state.CFpcr:X8}"); value1 = value1.FPUnpack(out FpType type1, out _, out _, state); value2 = value2.FPUnpack(out FpType type2, out _, out _, state); @@ -1866,7 +1866,7 @@ namespace ChocolArm64.Instructions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static double FPCompareLE(double value1, double value2, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPCompareLE: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPCompareLE: state.Fpcr = 0x{state.CFpcr:X8}"); return FPCompareGE(value2, value1, state); } @@ -1874,14 +1874,14 @@ namespace ChocolArm64.Instructions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static double FPCompareLT(double value1, double value2, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPCompareLT: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPCompareLT: state.Fpcr = 0x{state.CFpcr:X8}"); return FPCompareGT(value2, value1, state); } public static double FPDiv(double value1, double value2, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPDiv: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPDiv: state.Fpcr = 0x{state.CFpcr:X8}"); value1 = value1.FPUnpack(out FpType type1, out bool sign1, out ulong op1, state); value2 = value2.FPUnpack(out FpType type2, out bool sign2, out ulong op2, state); @@ -1930,7 +1930,7 @@ namespace ChocolArm64.Instructions public static double FPMax(double value1, double value2, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPMax: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPMax: state.Fpcr = 0x{state.CFpcr:X8}"); value1 = value1.FPUnpack(out FpType type1, out bool sign1, out ulong op1, state); value2 = value2.FPUnpack(out FpType type2, out bool sign2, out ulong op2, state); @@ -1983,7 +1983,7 @@ namespace ChocolArm64.Instructions public static double FPMaxNum(double value1, double value2, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPMaxNum: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPMaxNum: state.Fpcr = 0x{state.CFpcr:X8}"); value1.FPUnpack(out FpType type1, out _, out _, state); value2.FPUnpack(out FpType type2, out _, out _, state); @@ -2002,7 +2002,7 @@ namespace ChocolArm64.Instructions public static double FPMin(double value1, double value2, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPMin: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPMin: state.Fpcr = 0x{state.CFpcr:X8}"); value1 = value1.FPUnpack(out FpType type1, out bool sign1, out ulong op1, state); value2 = value2.FPUnpack(out FpType type2, out bool sign2, out ulong op2, state); @@ -2055,7 +2055,7 @@ namespace ChocolArm64.Instructions public static double FPMinNum(double value1, double value2, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPMinNum: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPMinNum: state.Fpcr = 0x{state.CFpcr:X8}"); value1.FPUnpack(out FpType type1, out _, out _, state); value2.FPUnpack(out FpType type2, out _, out _, state); @@ -2074,7 +2074,7 @@ namespace ChocolArm64.Instructions public static double FPMul(double value1, double value2, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPMul: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPMul: state.Fpcr = 0x{state.CFpcr:X8}"); value1 = value1.FPUnpack(out FpType type1, out bool sign1, out ulong op1, state); value2 = value2.FPUnpack(out FpType type2, out bool sign2, out ulong op2, state); @@ -2122,7 +2122,7 @@ namespace ChocolArm64.Instructions double value2, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPMulAdd: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPMulAdd: state.Fpcr = 0x{state.CFpcr:X8}"); valueA = valueA.FPUnpack(out FpType typeA, out bool signA, out ulong addend, state); value1 = value1.FPUnpack(out FpType type1, out bool sign1, out ulong op1, state); @@ -2192,7 +2192,7 @@ namespace ChocolArm64.Instructions double value2, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPMulSub: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPMulSub: state.Fpcr = 0x{state.CFpcr:X8}"); value1 = value1.FPNeg(); @@ -2201,7 +2201,7 @@ namespace ChocolArm64.Instructions public static double FPMulX(double value1, double value2, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPMulX: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPMulX: state.Fpcr = 0x{state.CFpcr:X8}"); value1 = value1.FPUnpack(out FpType type1, out bool sign1, out ulong op1, state); value2 = value2.FPUnpack(out FpType type2, out bool sign2, out ulong op2, state); @@ -2243,7 +2243,7 @@ namespace ChocolArm64.Instructions public static double FPRecipEstimate(double value, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPRecipEstimate: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPRecipEstimate: state.Fpcr = 0x{state.CFpcr:X8}"); value.FPUnpack(out FpType type, out bool sign, out ulong op, state); @@ -2332,7 +2332,7 @@ namespace ChocolArm64.Instructions public static double FPRecipStepFused(double value1, double value2, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPRecipStepFused: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPRecipStepFused: state.Fpcr = 0x{state.CFpcr:X8}"); value1 = value1.FPNeg(); @@ -2375,7 +2375,7 @@ namespace ChocolArm64.Instructions public static double FPRecpX(double value, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPRecpX: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPRecpX: state.Fpcr = 0x{state.CFpcr:X8}"); value.FPUnpack(out FpType type, out bool sign, out ulong op, state); @@ -2399,7 +2399,7 @@ namespace ChocolArm64.Instructions public static double FPRSqrtEstimate(double value, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPRSqrtEstimate: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPRSqrtEstimate: state.Fpcr = 0x{state.CFpcr:X8}"); value.FPUnpack(out FpType type, out bool sign, out ulong op, state); @@ -2464,7 +2464,7 @@ namespace ChocolArm64.Instructions public static double FPRSqrtStepFused(double value1, double value2, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPRSqrtStepFused: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPRSqrtStepFused: state.Fpcr = 0x{state.CFpcr:X8}"); value1 = value1.FPNeg(); @@ -2507,7 +2507,7 @@ namespace ChocolArm64.Instructions public static double FPSqrt(double value, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPSqrt: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPSqrt: state.Fpcr = 0x{state.CFpcr:X8}"); value = value.FPUnpack(out FpType type, out bool sign, out ulong op, state); @@ -2548,7 +2548,7 @@ namespace ChocolArm64.Instructions public static double FPSub(double value1, double value2, CpuThreadState state) { - Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPSub: state.Fpcr = 0x{state.Fpcr:X8}"); + Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPSub: state.Fpcr = 0x{state.CFpcr:X8}"); value1 = value1.FPUnpack(out FpType type1, out bool sign1, out ulong op1, state); value2 = value2.FPUnpack(out FpType type2, out bool sign2, out ulong op2, state); @@ -2777,13 +2777,13 @@ namespace ChocolArm64.Instructions { int enable = (int)exc + 8; - if ((state.Fpcr & (1 << enable)) != 0) + if ((state.CFpcr & (1 << enable)) != 0) { throw new NotImplementedException("Floating-point trap handling."); } else { - state.Fpsr |= 1 << (int)exc; + state.CFpsr |= 1 << (int)exc; } } } diff --git a/ChocolArm64/Memory/MemoryManager.cs b/ChocolArm64/Memory/MemoryManager.cs index 364f6b58a..2347f1eb4 100644 --- a/ChocolArm64/Memory/MemoryManager.cs +++ b/ChocolArm64/Memory/MemoryManager.cs @@ -11,7 +11,7 @@ using static ChocolArm64.Memory.MemoryManagement; namespace ChocolArm64.Memory { - public unsafe class MemoryManager : IMemory, IDisposable + public unsafe class MemoryManager : ARMeilleure.Memory.IMemoryManager { public const int PageBits = 12; public const int PageSize = 1 << PageBits; @@ -880,7 +880,7 @@ namespace ChocolArm64.Memory } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void WriteVector128(long position, Vector128 value) + public void WriteVector128Internal(long position, Vector128 value) { if (Sse.IsSupported && (position & 15) == 0) { @@ -893,6 +893,12 @@ namespace ChocolArm64.Memory } } + public void WriteVector128(long position, ARMeilleure.State.V128 value) + { + WriteUInt64(position + 0, value.GetUInt64(0)); + WriteUInt64(position + 8, value.GetUInt64(1)); + } + public void WriteBytes(long position, byte[] data) { long endAddr = position + data.Length; diff --git a/ChocolArm64/Optimizations.cs b/ChocolArm64/Optimizations.cs index cbb8131f5..24828ebfb 100644 --- a/ChocolArm64/Optimizations.cs +++ b/ChocolArm64/Optimizations.cs @@ -1,24 +1,27 @@ using System.Runtime.Intrinsics.X86; -public static class Optimizations +namespace ChocolArm64 { - public static bool AssumeStrictAbiCompliance { get; set; } + public static class Optimizations + { + public static bool AssumeStrictAbiCompliance { get; set; } = true; - public static bool FastFP { get; set; } = true; + public static bool FastFP { get; set; } = true; - private const bool UseAllSseIfAvailable = true; + private const bool UseAllSseIfAvailable = true; - public static bool UseSseIfAvailable { get; set; } = UseAllSseIfAvailable; - public static bool UseSse2IfAvailable { get; set; } = UseAllSseIfAvailable; - public static bool UseSse3IfAvailable { get; set; } = UseAllSseIfAvailable; - public static bool UseSsse3IfAvailable { get; set; } = UseAllSseIfAvailable; - public static bool UseSse41IfAvailable { get; set; } = UseAllSseIfAvailable; - public static bool UseSse42IfAvailable { get; set; } = UseAllSseIfAvailable; + public static bool UseSseIfAvailable { get; set; } = UseAllSseIfAvailable; + public static bool UseSse2IfAvailable { get; set; } = UseAllSseIfAvailable; + public static bool UseSse3IfAvailable { get; set; } = UseAllSseIfAvailable; + public static bool UseSsse3IfAvailable { get; set; } = UseAllSseIfAvailable; + public static bool UseSse41IfAvailable { get; set; } = UseAllSseIfAvailable; + public static bool UseSse42IfAvailable { get; set; } = UseAllSseIfAvailable; - internal static bool UseSse => UseSseIfAvailable && Sse.IsSupported; - internal static bool UseSse2 => UseSse2IfAvailable && Sse2.IsSupported; - internal static bool UseSse3 => UseSse3IfAvailable && Sse3.IsSupported; - internal static bool UseSsse3 => UseSsse3IfAvailable && Ssse3.IsSupported; - internal static bool UseSse41 => UseSse41IfAvailable && Sse41.IsSupported; - internal static bool UseSse42 => UseSse42IfAvailable && Sse42.IsSupported; + internal static bool UseSse => UseSseIfAvailable && Sse.IsSupported; + internal static bool UseSse2 => UseSse2IfAvailable && Sse2.IsSupported; + internal static bool UseSse3 => UseSse3IfAvailable && Sse3.IsSupported; + internal static bool UseSsse3 => UseSsse3IfAvailable && Ssse3.IsSupported; + internal static bool UseSse41 => UseSse41IfAvailable && Sse41.IsSupported; + internal static bool UseSse42 => UseSse42IfAvailable && Sse42.IsSupported; + } } \ No newline at end of file diff --git a/ChocolArm64/State/CpuThreadState.cs b/ChocolArm64/State/CpuThreadState.cs index 424f17258..e4baaefa4 100644 --- a/ChocolArm64/State/CpuThreadState.cs +++ b/ChocolArm64/State/CpuThreadState.cs @@ -1,13 +1,14 @@ -using ChocolArm64.Events; using ChocolArm64.Translation; using System; using System.Diagnostics; using System.Runtime.CompilerServices; using System.Runtime.Intrinsics; +using static ChocolArm64.Instructions.VectorHelper; + namespace ChocolArm64.State { - public class CpuThreadState + public class CpuThreadState : ARMeilleure.State.IExecutionContext { private const int MinCountForCheck = 40000; @@ -24,7 +25,7 @@ namespace ChocolArm64.State V16, V17, V18, V19, V20, V21, V22, V23, V24, V25, V26, V27, V28, V29, V30, V31; - public bool Aarch32; + public bool IsAarch32 { get; set; } public bool Thumb; public bool BigEndian; @@ -45,8 +46,20 @@ namespace ChocolArm64.State public long TpidrEl0 { get; set; } public long Tpidr { get; set; } - public int Fpcr { get; set; } - public int Fpsr { get; set; } + public int CFpcr { get; set; } + public int CFpsr { get; set; } + + public ARMeilleure.State.FPCR Fpcr + { + get => (ARMeilleure.State.FPCR)CFpcr; + set => CFpcr = (int)value; + } + + public ARMeilleure.State.FPSR Fpsr + { + get => (ARMeilleure.State.FPSR)CFpsr; + set => CFpsr = (int)value; + } public int Psr { @@ -73,10 +86,10 @@ namespace ChocolArm64.State } } - public event EventHandler Interrupt; - public event EventHandler Break; - public event EventHandler SvcCall; - public event EventHandler Undefined; + public event EventHandler Interrupt; + public event EventHandler Break; + public event EventHandler SupervisorCall; + public event EventHandler Undefined; private static Stopwatch _tickCounter; @@ -92,6 +105,8 @@ namespace ChocolArm64.State public CpuThreadState() { ClearExclusiveAddress(); + + Running = true; } static CpuThreadState() @@ -151,29 +166,165 @@ namespace ChocolArm64.State } } - internal void RequestInterrupt() + public ulong GetX(int index) + { + switch (index) + { + case 0: return X0; + case 1: return X1; + case 2: return X2; + case 3: return X3; + case 4: return X4; + case 5: return X5; + case 6: return X6; + case 7: return X7; + case 8: return X8; + case 9: return X9; + case 10: return X10; + case 11: return X11; + case 12: return X12; + case 13: return X13; + case 14: return X14; + case 15: return X15; + case 16: return X16; + case 17: return X17; + case 18: return X18; + case 19: return X19; + case 20: return X20; + case 21: return X21; + case 22: return X22; + case 23: return X23; + case 24: return X24; + case 25: return X25; + case 26: return X26; + case 27: return X27; + case 28: return X28; + case 29: return X29; + case 30: return X30; + case 31: return X31; + + default: throw new ArgumentOutOfRangeException(nameof(index)); + } + } + + public void SetX(int index, ulong value) + { + switch (index) + { + case 0: X0 = value; break; + case 1: X1 = value; break; + case 2: X2 = value; break; + case 3: X3 = value; break; + case 4: X4 = value; break; + case 5: X5 = value; break; + case 6: X6 = value; break; + case 7: X7 = value; break; + case 8: X8 = value; break; + case 9: X9 = value; break; + case 10: X10 = value; break; + case 11: X11 = value; break; + case 12: X12 = value; break; + case 13: X13 = value; break; + case 14: X14 = value; break; + case 15: X15 = value; break; + case 16: X16 = value; break; + case 17: X17 = value; break; + case 18: X18 = value; break; + case 19: X19 = value; break; + case 20: X20 = value; break; + case 21: X21 = value; break; + case 22: X22 = value; break; + case 23: X23 = value; break; + case 24: X24 = value; break; + case 25: X25 = value; break; + case 26: X26 = value; break; + case 27: X27 = value; break; + case 28: X28 = value; break; + case 29: X29 = value; break; + case 30: X30 = value; break; + case 31: X31 = value; break; + + default: throw new ArgumentOutOfRangeException(nameof(index)); + } + } + + public ARMeilleure.State.V128 GetV(int index) + { + switch (index) + { + case 0: return new ARMeilleure.State.V128(VectorExtractIntZx(V0, 0, 3), VectorExtractIntZx(V0, 1, 3)); + case 1: return new ARMeilleure.State.V128(VectorExtractIntZx(V1, 0, 3), VectorExtractIntZx(V1, 1, 3)); + case 2: return new ARMeilleure.State.V128(VectorExtractIntZx(V2, 0, 3), VectorExtractIntZx(V2, 1, 3)); + case 3: return new ARMeilleure.State.V128(VectorExtractIntZx(V3, 0, 3), VectorExtractIntZx(V3, 1, 3)); + case 4: return new ARMeilleure.State.V128(VectorExtractIntZx(V4, 0, 3), VectorExtractIntZx(V4, 1, 3)); + case 5: return new ARMeilleure.State.V128(VectorExtractIntZx(V5, 0, 3), VectorExtractIntZx(V5, 1, 3)); + case 6: return new ARMeilleure.State.V128(VectorExtractIntZx(V6, 0, 3), VectorExtractIntZx(V6, 1, 3)); + case 7: return new ARMeilleure.State.V128(VectorExtractIntZx(V7, 0, 3), VectorExtractIntZx(V7, 1, 3)); + case 8: return new ARMeilleure.State.V128(VectorExtractIntZx(V8, 0, 3), VectorExtractIntZx(V8, 1, 3)); + case 9: return new ARMeilleure.State.V128(VectorExtractIntZx(V9, 0, 3), VectorExtractIntZx(V9, 1, 3)); + case 10: return new ARMeilleure.State.V128(VectorExtractIntZx(V10, 0, 3), VectorExtractIntZx(V10, 1, 3)); + case 11: return new ARMeilleure.State.V128(VectorExtractIntZx(V11, 0, 3), VectorExtractIntZx(V11, 1, 3)); + case 12: return new ARMeilleure.State.V128(VectorExtractIntZx(V12, 0, 3), VectorExtractIntZx(V12, 1, 3)); + case 13: return new ARMeilleure.State.V128(VectorExtractIntZx(V13, 0, 3), VectorExtractIntZx(V13, 1, 3)); + case 14: return new ARMeilleure.State.V128(VectorExtractIntZx(V14, 0, 3), VectorExtractIntZx(V14, 1, 3)); + case 15: return new ARMeilleure.State.V128(VectorExtractIntZx(V15, 0, 3), VectorExtractIntZx(V15, 1, 3)); + case 16: return new ARMeilleure.State.V128(VectorExtractIntZx(V16, 0, 3), VectorExtractIntZx(V16, 1, 3)); + case 17: return new ARMeilleure.State.V128(VectorExtractIntZx(V17, 0, 3), VectorExtractIntZx(V17, 1, 3)); + case 18: return new ARMeilleure.State.V128(VectorExtractIntZx(V18, 0, 3), VectorExtractIntZx(V18, 1, 3)); + case 19: return new ARMeilleure.State.V128(VectorExtractIntZx(V19, 0, 3), VectorExtractIntZx(V19, 1, 3)); + case 20: return new ARMeilleure.State.V128(VectorExtractIntZx(V20, 0, 3), VectorExtractIntZx(V20, 1, 3)); + case 21: return new ARMeilleure.State.V128(VectorExtractIntZx(V21, 0, 3), VectorExtractIntZx(V21, 1, 3)); + case 22: return new ARMeilleure.State.V128(VectorExtractIntZx(V22, 0, 3), VectorExtractIntZx(V22, 1, 3)); + case 23: return new ARMeilleure.State.V128(VectorExtractIntZx(V23, 0, 3), VectorExtractIntZx(V23, 1, 3)); + case 24: return new ARMeilleure.State.V128(VectorExtractIntZx(V24, 0, 3), VectorExtractIntZx(V24, 1, 3)); + case 25: return new ARMeilleure.State.V128(VectorExtractIntZx(V25, 0, 3), VectorExtractIntZx(V25, 1, 3)); + case 26: return new ARMeilleure.State.V128(VectorExtractIntZx(V26, 0, 3), VectorExtractIntZx(V26, 1, 3)); + case 27: return new ARMeilleure.State.V128(VectorExtractIntZx(V27, 0, 3), VectorExtractIntZx(V27, 1, 3)); + case 28: return new ARMeilleure.State.V128(VectorExtractIntZx(V28, 0, 3), VectorExtractIntZx(V28, 1, 3)); + case 29: return new ARMeilleure.State.V128(VectorExtractIntZx(V29, 0, 3), VectorExtractIntZx(V29, 1, 3)); + case 30: return new ARMeilleure.State.V128(VectorExtractIntZx(V30, 0, 3), VectorExtractIntZx(V30, 1, 3)); + case 31: return new ARMeilleure.State.V128(VectorExtractIntZx(V31, 0, 3), VectorExtractIntZx(V31, 1, 3)); + + default: throw new ArgumentOutOfRangeException(nameof(index)); + } + } + + public bool GetPstateFlag(ARMeilleure.State.PState flag) + { + switch (flag) + { + case ARMeilleure.State.PState.NFlag: return Negative; + case ARMeilleure.State.PState.ZFlag: return Zero; + case ARMeilleure.State.PState.CFlag: return Carry; + case ARMeilleure.State.PState.VFlag: return Overflow; + + default: throw new ArgumentOutOfRangeException(nameof(flag)); + } + } + + public void RequestInterrupt() { _interrupted = true; } internal void OnBreak(long position, int imm) { - Break?.Invoke(this, new InstExceptionEventArgs(position, imm)); + Break?.Invoke(this, new ARMeilleure.State.InstExceptionEventArgs((ulong)position, imm)); } internal void OnSvcCall(long position, int imm) { - SvcCall?.Invoke(this, new InstExceptionEventArgs(position, imm)); + SupervisorCall?.Invoke(this, new ARMeilleure.State.InstExceptionEventArgs((ulong)position, imm)); } internal void OnUndefined(long position, int rawOpCode) { - Undefined?.Invoke(this, new InstUndefinedEventArgs(position, rawOpCode)); + Undefined?.Invoke(this, new ARMeilleure.State.InstUndefinedEventArgs((ulong)position, rawOpCode)); } internal ExecutionMode GetExecutionMode() { - if (!Aarch32) + if (!IsAarch32) { return ExecutionMode.Aarch64; } @@ -185,17 +336,19 @@ namespace ChocolArm64.State internal bool GetFpcrFlag(Fpcr flag) { - return (Fpcr & (1 << (int)flag)) != 0; + return (CFpcr & (1 << (int)flag)) != 0; } internal void SetFpsrFlag(Fpsr flag) { - Fpsr |= 1 << (int)flag; + CFpsr |= 1 << (int)flag; } internal RoundMode FPRoundingMode() { - return (RoundMode)((Fpcr >> (int)State.Fpcr.RMode) & 3); + return (RoundMode)((CFpcr >> (int)State.Fpcr.RMode) & 3); } + + public void Dispose() { } } } diff --git a/ChocolArm64/Translation/Translator.cs b/ChocolArm64/Translation/Translator.cs index 0803df09b..ab8f474a4 100644 --- a/ChocolArm64/Translation/Translator.cs +++ b/ChocolArm64/Translation/Translator.cs @@ -9,7 +9,7 @@ using System.Threading; namespace ChocolArm64.Translation { - public class Translator + public class Translator : ARMeilleure.Translation.ITranslator { private MemoryManager _memory; @@ -38,24 +38,18 @@ namespace ChocolArm64.Translation _queue = new TranslatorQueue(); } - internal void ExecuteSubroutine(CpuThread thread, long position) + public void Execute(ARMeilleure.State.IExecutionContext ctx, ulong address) { + CpuThreadState state = (CpuThreadState)ctx; + + long position = (long)address; + if (Interlocked.Increment(ref _threadCount) == 1) { _backgroundTranslator = new Thread(TranslateQueuedSubs); _backgroundTranslator.Start(); } - ExecuteSubroutine(thread.ThreadState, position); - - if (Interlocked.Decrement(ref _threadCount) == 0) - { - _queue.ForceSignal(); - } - } - - private void ExecuteSubroutine(CpuThreadState state, long position) - { state.CurrentTranslator = this; do @@ -75,6 +69,11 @@ namespace ChocolArm64.Translation while (position != 0 && state.Running); state.CurrentTranslator = null; + + if (Interlocked.Decrement(ref _threadCount) == 0) + { + _queue.ForceSignal(); + } } internal ArmSubroutine GetOrTranslateSubroutine(CpuThreadState state, long position, CallType cs) diff --git a/Ryujinx.Audio/Ryujinx.Audio.csproj b/Ryujinx.Audio/Ryujinx.Audio.csproj index a6a34f40f..e25066eee 100644 --- a/Ryujinx.Audio/Ryujinx.Audio.csproj +++ b/Ryujinx.Audio/Ryujinx.Audio.csproj @@ -2,7 +2,7 @@ netcoreapp2.1 - win10-x64;osx-x64;linux-x64 + win-x64;osx-x64;linux-x64 Debug;Release;Profile Debug;Profile Release diff --git a/Ryujinx.Common/Ryujinx.Common.csproj b/Ryujinx.Common/Ryujinx.Common.csproj index cf078db85..86c6c570d 100644 --- a/Ryujinx.Common/Ryujinx.Common.csproj +++ b/Ryujinx.Common/Ryujinx.Common.csproj @@ -2,7 +2,7 @@ netcoreapp2.1 - win10-x64;osx-x64;linux-x64 + win-x64;osx-x64;linux-x64 Debug;Release;Profile Debug;Profile Release @@ -25,7 +25,7 @@ TRACE;USE_PROFILING true - + diff --git a/Ryujinx.Graphics/Graphics3d/Texture/ImageUtils.cs b/Ryujinx.Graphics/Graphics3d/Texture/ImageUtils.cs index 2e78cf142..62dae00b5 100644 --- a/Ryujinx.Graphics/Graphics3d/Texture/ImageUtils.cs +++ b/Ryujinx.Graphics/Graphics3d/Texture/ImageUtils.cs @@ -1,4 +1,4 @@ -using ChocolArm64.Memory; +using ARMeilleure.Memory; using OpenTK.Graphics.OpenGL; using Ryujinx.Graphics.Gal; using Ryujinx.Graphics.Memory; @@ -229,7 +229,7 @@ namespace Ryujinx.Graphics.Texture public static byte[] ReadTexture(IMemory memory, GalImage image, long position) { - MemoryManager cpuMemory; + IMemoryManager cpuMemory; if (memory is NvGpuVmm vmm) { @@ -237,7 +237,7 @@ namespace Ryujinx.Graphics.Texture } else { - cpuMemory = (MemoryManager)memory; + cpuMemory = (IMemoryManager)memory; } ISwizzle swizzle = TextureHelper.GetSwizzle(image); @@ -251,7 +251,6 @@ namespace Ryujinx.Graphics.Texture // Note: Each row of the texture needs to be aligned to 4 bytes. int pitch = (width * bytesPerPixel + 3) & ~3; - int dataLayerSize = height * pitch * depth; byte[] data = new byte[dataLayerSize * image.LayerCount]; diff --git a/Ryujinx.Graphics/Graphics3d/Texture/TextureHelper.cs b/Ryujinx.Graphics/Graphics3d/Texture/TextureHelper.cs index 1de81008e..22b803db3 100644 --- a/Ryujinx.Graphics/Graphics3d/Texture/TextureHelper.cs +++ b/Ryujinx.Graphics/Graphics3d/Texture/TextureHelper.cs @@ -1,4 +1,4 @@ -using ChocolArm64.Memory; +using ARMeilleure.Memory; using Ryujinx.Common; using Ryujinx.Graphics.Gal; using Ryujinx.Graphics.Memory; @@ -38,7 +38,7 @@ namespace Ryujinx.Graphics.Texture } } - public static (MemoryManager Memory, long Position) GetMemoryAndPosition( + public static (IMemoryManager Memory, long Position) GetMemoryAndPosition( IMemory memory, long position) { @@ -47,7 +47,7 @@ namespace Ryujinx.Graphics.Texture return (vmm.Memory, vmm.GetPhysicalAddress(position)); } - return ((MemoryManager)memory, position); + return ((IMemoryManager)memory, position); } } } diff --git a/Ryujinx.Graphics/Memory/NvGpuVmm.cs b/Ryujinx.Graphics/Memory/NvGpuVmm.cs index fea99587d..d8ccd6c74 100644 --- a/Ryujinx.Graphics/Memory/NvGpuVmm.cs +++ b/Ryujinx.Graphics/Memory/NvGpuVmm.cs @@ -1,4 +1,4 @@ -using ChocolArm64.Memory; +using ARMeilleure.Memory; using Ryujinx.Graphics.Gal; using System; @@ -23,7 +23,7 @@ namespace Ryujinx.Graphics.Memory private const int PtLvl0Bit = PtPageBits + PtLvl1Bits; private const int PtLvl1Bit = PtPageBits; - public MemoryManager Memory { get; private set; } + public IMemoryManager Memory { get; private set; } private NvGpuVmmCache _cache; @@ -32,7 +32,7 @@ namespace Ryujinx.Graphics.Memory private long[][] _pageTable; - public NvGpuVmm(MemoryManager memory) + public NvGpuVmm(IMemoryManager memory) { Memory = memory; diff --git a/Ryujinx.Graphics/Memory/NvGpuVmmCache.cs b/Ryujinx.Graphics/Memory/NvGpuVmmCache.cs index ab5ea288c..37ead4e0a 100644 --- a/Ryujinx.Graphics/Memory/NvGpuVmmCache.cs +++ b/Ryujinx.Graphics/Memory/NvGpuVmmCache.cs @@ -1,4 +1,4 @@ -using ChocolArm64.Memory; +using ARMeilleure.Memory; using System.Collections.Concurrent; namespace Ryujinx.Graphics.Memory @@ -12,9 +12,9 @@ namespace Ryujinx.Graphics.Memory private ConcurrentDictionary[] _cachedPages; - private MemoryManager _memory; + private IMemoryManager _memory; - public NvGpuVmmCache(MemoryManager memory) + public NvGpuVmmCache(IMemoryManager memory) { _memory = memory; diff --git a/Ryujinx.Graphics/Ryujinx.Graphics.csproj b/Ryujinx.Graphics/Ryujinx.Graphics.csproj index 740008955..e2bf16930 100644 --- a/Ryujinx.Graphics/Ryujinx.Graphics.csproj +++ b/Ryujinx.Graphics/Ryujinx.Graphics.csproj @@ -2,7 +2,7 @@ netcoreapp2.1 - win10-x64;osx-x64;linux-x64 + win-x64;osx-x64;linux-x64 Debug;Release;Profile Debug;Profile Release @@ -32,8 +32,9 @@ - + + diff --git a/Ryujinx.Graphics/VDec/VideoDecoder.cs b/Ryujinx.Graphics/VDec/VideoDecoder.cs index 3ebb93f42..9bf60c31b 100644 --- a/Ryujinx.Graphics/VDec/VideoDecoder.cs +++ b/Ryujinx.Graphics/VDec/VideoDecoder.cs @@ -1,4 +1,4 @@ -using ChocolArm64.Memory; +using ARMeilleure.Memory; using Ryujinx.Graphics.Gal; using Ryujinx.Graphics.Memory; using Ryujinx.Graphics.Texture; diff --git a/Ryujinx.HLE/DeviceMemory.cs b/Ryujinx.HLE/DeviceMemory.cs index 3553a6e71..0ead17473 100644 --- a/Ryujinx.HLE/DeviceMemory.cs +++ b/Ryujinx.HLE/DeviceMemory.cs @@ -1,4 +1,4 @@ -using ChocolArm64.Memory; +using ARMeilleure.Memory; using System; using System.Runtime.InteropServices; diff --git a/Ryujinx.HLE/Exceptions/UndefinedInstructionException.cs b/Ryujinx.HLE/Exceptions/UndefinedInstructionException.cs index 84bb1fc59..dfbd6c272 100644 --- a/Ryujinx.HLE/Exceptions/UndefinedInstructionException.cs +++ b/Ryujinx.HLE/Exceptions/UndefinedInstructionException.cs @@ -8,6 +8,6 @@ namespace Ryujinx.HLE.Exceptions public UndefinedInstructionException() : base() { } - public UndefinedInstructionException(long position, int opCode) : base(string.Format(ExMsg, position, opCode)) { } + public UndefinedInstructionException(ulong address, int opCode) : base(string.Format(ExMsg, address, opCode)) { } } } \ No newline at end of file diff --git a/Ryujinx.HLE/HOS/Homebrew.cs b/Ryujinx.HLE/HOS/Homebrew.cs index b11a46404..8e54f82c1 100644 --- a/Ryujinx.HLE/HOS/Homebrew.cs +++ b/Ryujinx.HLE/HOS/Homebrew.cs @@ -1,4 +1,4 @@ -using ChocolArm64.Memory; +using ARMeilleure.Memory; using System.Text; namespace Ryujinx.HLE.HOS @@ -8,7 +8,7 @@ namespace Ryujinx.HLE.HOS public const string TemporaryNroSuffix = ".ryu_tmp.nro"; // http://switchbrew.org/index.php?title=Homebrew_ABI - public static void WriteHbAbiData(MemoryManager memory, long position, int mainThreadHandle, string switchPath) + public static void WriteHbAbiData(IMemoryManager memory, long position, int mainThreadHandle, string switchPath) { // MainThreadHandle. WriteConfigEntry(memory, ref position, 1, 0, mainThreadHandle); @@ -31,7 +31,7 @@ namespace Ryujinx.HLE.HOS } private static void WriteConfigEntry( - MemoryManager memory, + IMemoryManager memory, ref long position, int key, int flags = 0, @@ -46,7 +46,7 @@ namespace Ryujinx.HLE.HOS position += 0x18; } - public static string ReadHbAbiNextLoadPath(MemoryManager memory, long position) + public static string ReadHbAbiNextLoadPath(IMemoryManager memory, long position) { string fileName = null; diff --git a/Ryujinx.HLE/HOS/Horizon.cs b/Ryujinx.HLE/HOS/Horizon.cs index f8bb345f2..5873223ef 100644 --- a/Ryujinx.HLE/HOS/Horizon.cs +++ b/Ryujinx.HLE/HOS/Horizon.cs @@ -110,6 +110,8 @@ namespace Ryujinx.HLE.HOS public int GlobalAccessLogMode { get; set; } + public bool UseLegacyJit { get; set; } + internal long HidBaseAddress { get; private set; } public Horizon(Switch device) diff --git a/Ryujinx.HLE/HOS/Ipc/IpcHandler.cs b/Ryujinx.HLE/HOS/Ipc/IpcHandler.cs index e940d774c..50ab3d100 100644 --- a/Ryujinx.HLE/HOS/Ipc/IpcHandler.cs +++ b/Ryujinx.HLE/HOS/Ipc/IpcHandler.cs @@ -1,4 +1,4 @@ -using ChocolArm64.Memory; +using ARMeilleure.Memory; using Ryujinx.HLE.HOS.Kernel.Common; using Ryujinx.HLE.HOS.Kernel.Ipc; using Ryujinx.HLE.HOS.Kernel.Process; @@ -13,7 +13,7 @@ namespace Ryujinx.HLE.HOS.Ipc public static KernelResult IpcCall( Switch device, KProcess process, - MemoryManager memory, + IMemoryManager memory, KThread thread, KClientSession session, IpcMessage request, diff --git a/Ryujinx.HLE/HOS/Kernel/Common/KernelTransfer.cs b/Ryujinx.HLE/HOS/Kernel/Common/KernelTransfer.cs index 0fcb31483..62330d6ba 100644 --- a/Ryujinx.HLE/HOS/Kernel/Common/KernelTransfer.cs +++ b/Ryujinx.HLE/HOS/Kernel/Common/KernelTransfer.cs @@ -1,5 +1,5 @@ using Ryujinx.HLE.HOS.Kernel.Process; -using ChocolArm64.Memory; +using ARMeilleure.Memory; namespace Ryujinx.HLE.HOS.Kernel.Common { diff --git a/Ryujinx.HLE/HOS/Kernel/Memory/KMemoryManager.cs b/Ryujinx.HLE/HOS/Kernel/Memory/KMemoryManager.cs index 448ae54c0..fd80b3b9e 100644 --- a/Ryujinx.HLE/HOS/Kernel/Memory/KMemoryManager.cs +++ b/Ryujinx.HLE/HOS/Kernel/Memory/KMemoryManager.cs @@ -1,4 +1,4 @@ -using ChocolArm64.Memory; +using ARMeilleure.Memory; using Ryujinx.Common; using Ryujinx.HLE.HOS.Kernel.Common; using Ryujinx.HLE.HOS.Kernel.Process; @@ -29,7 +29,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Memory private LinkedList _blocks; - private MemoryManager _cpuMemory; + private IMemoryManager _cpuMemory; private Horizon _system; @@ -72,7 +72,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Memory private MersenneTwister _randomNumberGenerator; - public KMemoryManager(Horizon system, MemoryManager cpuMemory) + public KMemoryManager(Horizon system, IMemoryManager cpuMemory) { _system = system; _cpuMemory = cpuMemory; diff --git a/Ryujinx.HLE/HOS/Kernel/Process/HleProcessDebugger.cs b/Ryujinx.HLE/HOS/Kernel/Process/HleProcessDebugger.cs index 223bf5dae..e2ca44b59 100644 --- a/Ryujinx.HLE/HOS/Kernel/Process/HleProcessDebugger.cs +++ b/Ryujinx.HLE/HOS/Kernel/Process/HleProcessDebugger.cs @@ -1,5 +1,5 @@ -using ChocolArm64.Memory; -using ChocolArm64.State; +using ARMeilleure.Memory; +using ARMeilleure.State; using Ryujinx.HLE.HOS.Diagnostics.Demangler; using Ryujinx.HLE.HOS.Kernel.Memory; using Ryujinx.HLE.Loaders.Elf; @@ -40,7 +40,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Process _images = new List(); } - public string GetGuestStackTrace(CpuThreadState threadState) + public string GetGuestStackTrace(IExecutionContext context) { EnsureLoaded(); @@ -74,7 +74,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Process } // TODO: ARM32. - long framePointer = (long)threadState.X29; + long framePointer = (long)context.GetX(29); trace.AppendLine($"Process: {_owner.Name}, PID: {_owner.Pid}"); @@ -218,7 +218,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Process } } - private void LoadMod0Symbols(MemoryManager memory, long textOffset) + private void LoadMod0Symbols(IMemoryManager memory, long textOffset) { long mod0Offset = textOffset + memory.ReadUInt32(textOffset + 4); @@ -288,7 +288,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Process } } - private ElfSymbol GetSymbol(MemoryManager memory, long address, long strTblAddr) + private ElfSymbol GetSymbol(IMemoryManager memory, long address, long strTblAddr) { int nameIndex = memory.ReadInt32(address + 0); int info = memory.ReadByte (address + 4); diff --git a/Ryujinx.HLE/HOS/Kernel/Process/KProcess.cs b/Ryujinx.HLE/HOS/Kernel/Process/KProcess.cs index 1b5a67722..beb376f64 100644 --- a/Ryujinx.HLE/HOS/Kernel/Process/KProcess.cs +++ b/Ryujinx.HLE/HOS/Kernel/Process/KProcess.cs @@ -1,9 +1,7 @@ -using ChocolArm64; -using ChocolArm64.Events; -using ChocolArm64.Memory; -using ChocolArm64.Translation; +using ARMeilleure.Memory; +using ARMeilleure.State; +using ARMeilleure.Translation; using Ryujinx.Common; -using Ryujinx.Common.Logging; using Ryujinx.HLE.Exceptions; using Ryujinx.HLE.HOS.Kernel.Common; using Ryujinx.HLE.HOS.Kernel.Memory; @@ -80,9 +78,9 @@ namespace Ryujinx.HLE.HOS.Kernel.Process public bool IsPaused { get; private set; } - public MemoryManager CpuMemory { get; private set; } + public IMemoryManager CpuMemory { get; private set; } - public Translator Translator { get; private set; } + public ITranslator Translator { get; private set; } private SvcHandler _svcHandler; @@ -793,11 +791,11 @@ namespace Ryujinx.HLE.HOS.Kernel.Process } } - public void SubscribeThreadEventHandlers(CpuThread context) + public void SubscribeThreadEventHandlers(IExecutionContext context) { - context.ThreadState.Interrupt += InterruptHandler; - context.ThreadState.SvcCall += _svcHandler.SvcCall; - context.ThreadState.Undefined += UndefinedInstructionHandler; + context.Interrupt += InterruptHandler; + context.SupervisorCall += _svcHandler.SvcCall; + context.Undefined += UndefinedInstructionHandler; } private void InterruptHandler(object sender, EventArgs e) @@ -1001,9 +999,9 @@ namespace Ryujinx.HLE.HOS.Kernel.Process { foreach (KThread thread in _threads) { - thread.Context.StopExecution(); + thread.Context.Running = false; - System.Scheduler.CoreManager.Set(thread.Context.Work); + System.Scheduler.CoreManager.Set(thread.HostThread); } } } @@ -1024,13 +1022,20 @@ namespace Ryujinx.HLE.HOS.Kernel.Process bool useFlatPageTable = memRegion == MemoryRegion.Application; - CpuMemory = new MemoryManager(_system.Device.Memory.RamPointer, addrSpaceBits, useFlatPageTable); + if (_system.UseLegacyJit) + { + CpuMemory = new ChocolArm64.Memory.MemoryManager(_system.Device.Memory.RamPointer, addrSpaceBits, useFlatPageTable); + + Translator = new ChocolArm64.Translation.Translator((ChocolArm64.Memory.MemoryManager)CpuMemory); + } + else + { + CpuMemory = new MemoryManager(_system.Device.Memory.RamPointer, addrSpaceBits, useFlatPageTable); + + Translator = new Translator((MemoryManager)CpuMemory); + } MemoryManager = new KMemoryManager(_system, CpuMemory); - - Translator = new Translator(CpuMemory); - - Translator.CpuTrace += CpuTraceHandler; } public void PrintCurrentThreadStackTrace() @@ -1038,14 +1043,9 @@ namespace Ryujinx.HLE.HOS.Kernel.Process System.Scheduler.GetCurrentThread().PrintGuestStackTrace(); } - private void CpuTraceHandler(object sender, CpuTraceEventArgs e) - { - Logger.PrintInfo(LogClass.Cpu, $"Executing at 0x{e.Position:X16}."); - } - private void UndefinedInstructionHandler(object sender, InstUndefinedEventArgs e) { - throw new UndefinedInstructionException(e.Position, e.RawOpCode); + throw new UndefinedInstructionException(e.Address, e.OpCode); } } } \ No newline at end of file diff --git a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcHandler.cs b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcHandler.cs index cf881a793..7509ae048 100644 --- a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcHandler.cs +++ b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcHandler.cs @@ -1,5 +1,4 @@ -using ChocolArm64.Events; -using ChocolArm64.State; +using ARMeilleure.State; using Ryujinx.HLE.HOS.Kernel.Process; using System; @@ -7,9 +6,9 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall { partial class SvcHandler { - private Switch _device; - private KProcess _process; - private Horizon _system; + private Switch _device; + private KProcess _process; + private Horizon _system; public SvcHandler(Switch device, KProcess process) { @@ -20,16 +19,16 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall public void SvcCall(object sender, InstExceptionEventArgs e) { - Action svcFunc = SvcTable.GetSvcFunc(e.Id); + Action svcFunc = SvcTable.GetSvcFunc(e.Id); if (svcFunc == null) { throw new NotImplementedException($"SVC 0x{e.Id:X4} is not implemented."); } - CpuThreadState threadState = (CpuThreadState)sender; + IExecutionContext context = (IExecutionContext)sender; - svcFunc(this, threadState); + svcFunc(this, context); } } } \ No newline at end of file diff --git a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcIpc.cs b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcIpc.cs index eb7595c0a..7c1c981bf 100644 --- a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcIpc.cs +++ b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcIpc.cs @@ -83,7 +83,7 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall public KernelResult SendSyncRequest64(int handle) { - return SendSyncRequest((ulong)_system.Scheduler.GetCurrentThread().Context.ThreadState.Tpidr, 0x100, handle); + return SendSyncRequest((ulong)_system.Scheduler.GetCurrentThread().Context.Tpidr, 0x100, handle); } public KernelResult SendSyncRequestWithUserBuffer64(ulong messagePtr, ulong size, int handle) diff --git a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcSystem.cs b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcSystem.cs index 5f971131c..094e1935f 100644 --- a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcSystem.cs +++ b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcSystem.cs @@ -1,4 +1,4 @@ -using ChocolArm64.Memory; +using ARMeilleure.Memory; using Ryujinx.Common; using Ryujinx.Common.Logging; using Ryujinx.HLE.Exceptions; @@ -138,7 +138,7 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall public ulong GetSystemTick64() { - return _system.Scheduler.GetCurrentThread().Context.ThreadState.CntpctEl0; + return _system.Scheduler.GetCurrentThread().Context.CntpctEl0; } public KernelResult GetProcessId64(int handle, out long pid) diff --git a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcTable.cs b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcTable.cs index 23934649f..c1a31da9b 100644 --- a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcTable.cs +++ b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcTable.cs @@ -1,4 +1,4 @@ -using ChocolArm64.State; +using ARMeilleure.State; using Ryujinx.Common.Logging; using Ryujinx.HLE.HOS.Kernel.Common; using System; @@ -14,7 +14,7 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall private static Dictionary _svcFuncs64; - private static Action[] _svcTable64; + private static Action[] _svcTable64; static SvcTable() { @@ -77,10 +77,10 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall { 0x78, nameof(SvcHandler.UnmapProcessCodeMemory64) } }; - _svcTable64 = new Action[0x80]; + _svcTable64 = new Action[0x80]; } - public static Action GetSvcFunc(int svcId) + public static Action GetSvcFunc(int svcId) { if (_svcTable64[svcId] != null) { @@ -95,9 +95,9 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall return null; } - private static Action GenerateMethod(string svcName) + private static Action GenerateMethod(string svcName) { - Type[] argTypes = new Type[] { typeof(SvcHandler), typeof(CpuThreadState) }; + Type[] argTypes = new Type[] { typeof(SvcHandler), typeof(IExecutionContext) }; DynamicMethod method = new DynamicMethod(svcName, null, argTypes); @@ -183,7 +183,11 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall generator.Emit(OpCodes.Conv_I); generator.Emit(OpCodes.Ldarg_1); - generator.Emit(OpCodes.Ldfld, GetStateFieldX(byRefArgsCount + index)); + generator.Emit(OpCodes.Ldc_I4, byRefArgsCount + index); + + MethodInfo info = typeof(IExecutionContext).GetMethod(nameof(IExecutionContext.GetX)); + + generator.Emit(OpCodes.Call, info); generator.Emit(OpCodes.Box, typeof(ulong)); @@ -227,7 +231,11 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall else { generator.Emit(OpCodes.Ldarg_1); - generator.Emit(OpCodes.Ldfld, GetStateFieldX(byRefArgsCount + index)); + generator.Emit(OpCodes.Ldc_I4, byRefArgsCount + index); + + MethodInfo info = typeof(IExecutionContext).GetMethod(nameof(IExecutionContext.GetX)); + + generator.Emit(OpCodes.Call, info); ConvertToArgType(argType); } @@ -258,51 +266,44 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall generator.Emit(OpCodes.Stloc, tempLocal); generator.Emit(OpCodes.Ldarg_1); + generator.Emit(OpCodes.Ldc_I4, outRegIndex++); generator.Emit(OpCodes.Ldloc, tempLocal); ConvertToFieldType(retType); - generator.Emit(OpCodes.Stfld, GetStateFieldX(outRegIndex++)); + MethodInfo info = typeof(IExecutionContext).GetMethod(nameof(IExecutionContext.SetX)); + + generator.Emit(OpCodes.Call, info); } for (int index = 0; index < locals.Count; index++) { generator.Emit(OpCodes.Ldarg_1); + generator.Emit(OpCodes.Ldc_I4, outRegIndex++); generator.Emit(OpCodes.Ldloc, locals[index]); ConvertToFieldType(locals[index].LocalType); - generator.Emit(OpCodes.Stfld, GetStateFieldX(outRegIndex++)); + MethodInfo info = typeof(IExecutionContext).GetMethod(nameof(IExecutionContext.SetX)); + + generator.Emit(OpCodes.Call, info); } // Zero out the remaining unused registers. while (outRegIndex < SvcFuncMaxArguments) { generator.Emit(OpCodes.Ldarg_1); + generator.Emit(OpCodes.Ldc_I4, outRegIndex++); generator.Emit(OpCodes.Ldc_I8, 0L); - generator.Emit(OpCodes.Stfld, GetStateFieldX(outRegIndex++)); + + MethodInfo info = typeof(IExecutionContext).GetMethod(nameof(IExecutionContext.SetX)); + + generator.Emit(OpCodes.Call, info); } generator.Emit(OpCodes.Ret); - return (Action)method.CreateDelegate(typeof(Action)); - } - - private static FieldInfo GetStateFieldX(int index) - { - switch (index) - { - case 0: return typeof(CpuThreadState).GetField(nameof(CpuThreadState.X0)); - case 1: return typeof(CpuThreadState).GetField(nameof(CpuThreadState.X1)); - case 2: return typeof(CpuThreadState).GetField(nameof(CpuThreadState.X2)); - case 3: return typeof(CpuThreadState).GetField(nameof(CpuThreadState.X3)); - case 4: return typeof(CpuThreadState).GetField(nameof(CpuThreadState.X4)); - case 5: return typeof(CpuThreadState).GetField(nameof(CpuThreadState.X5)); - case 6: return typeof(CpuThreadState).GetField(nameof(CpuThreadState.X6)); - case 7: return typeof(CpuThreadState).GetField(nameof(CpuThreadState.X7)); - } - - throw new ArgumentOutOfRangeException(nameof(index)); + return (Action)method.CreateDelegate(typeof(Action)); } private static void CheckIfTypeIsSupported(Type type, string svcName) diff --git a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcThread.cs b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcThread.cs index e1f018c19..e49da023a 100644 --- a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcThread.cs +++ b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcThread.cs @@ -1,4 +1,5 @@ -using ChocolArm64.Memory; +using ARMeilleure.Memory; +using ARMeilleure.State; using Ryujinx.HLE.HOS.Kernel.Common; using Ryujinx.HLE.HOS.Kernel.Process; using Ryujinx.HLE.HOS.Kernel.Threading; @@ -347,83 +348,91 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall return KernelResult.InvalidThread; } - MemoryManager memory = currentProcess.CpuMemory; + IMemoryManager memory = currentProcess.CpuMemory; - memory.WriteUInt64((long)address + 0x0, thread.Context.ThreadState.X0); - memory.WriteUInt64((long)address + 0x8, thread.Context.ThreadState.X1); - memory.WriteUInt64((long)address + 0x10, thread.Context.ThreadState.X2); - memory.WriteUInt64((long)address + 0x18, thread.Context.ThreadState.X3); - memory.WriteUInt64((long)address + 0x20, thread.Context.ThreadState.X4); - memory.WriteUInt64((long)address + 0x28, thread.Context.ThreadState.X5); - memory.WriteUInt64((long)address + 0x30, thread.Context.ThreadState.X6); - memory.WriteUInt64((long)address + 0x38, thread.Context.ThreadState.X7); - memory.WriteUInt64((long)address + 0x40, thread.Context.ThreadState.X8); - memory.WriteUInt64((long)address + 0x48, thread.Context.ThreadState.X9); - memory.WriteUInt64((long)address + 0x50, thread.Context.ThreadState.X10); - memory.WriteUInt64((long)address + 0x58, thread.Context.ThreadState.X11); - memory.WriteUInt64((long)address + 0x60, thread.Context.ThreadState.X12); - memory.WriteUInt64((long)address + 0x68, thread.Context.ThreadState.X13); - memory.WriteUInt64((long)address + 0x70, thread.Context.ThreadState.X14); - memory.WriteUInt64((long)address + 0x78, thread.Context.ThreadState.X15); - memory.WriteUInt64((long)address + 0x80, thread.Context.ThreadState.X16); - memory.WriteUInt64((long)address + 0x88, thread.Context.ThreadState.X17); - memory.WriteUInt64((long)address + 0x90, thread.Context.ThreadState.X18); - memory.WriteUInt64((long)address + 0x98, thread.Context.ThreadState.X19); - memory.WriteUInt64((long)address + 0xa0, thread.Context.ThreadState.X20); - memory.WriteUInt64((long)address + 0xa8, thread.Context.ThreadState.X21); - memory.WriteUInt64((long)address + 0xb0, thread.Context.ThreadState.X22); - memory.WriteUInt64((long)address + 0xb8, thread.Context.ThreadState.X23); - memory.WriteUInt64((long)address + 0xc0, thread.Context.ThreadState.X24); - memory.WriteUInt64((long)address + 0xc8, thread.Context.ThreadState.X25); - memory.WriteUInt64((long)address + 0xd0, thread.Context.ThreadState.X26); - memory.WriteUInt64((long)address + 0xd8, thread.Context.ThreadState.X27); - memory.WriteUInt64((long)address + 0xe0, thread.Context.ThreadState.X28); - memory.WriteUInt64((long)address + 0xe8, thread.Context.ThreadState.X29); - memory.WriteUInt64((long)address + 0xf0, thread.Context.ThreadState.X30); - memory.WriteUInt64((long)address + 0xf8, thread.Context.ThreadState.X31); + memory.WriteUInt64((long)address + 0x0, thread.Context.GetX(0)); + memory.WriteUInt64((long)address + 0x8, thread.Context.GetX(1)); + memory.WriteUInt64((long)address + 0x10, thread.Context.GetX(2)); + memory.WriteUInt64((long)address + 0x18, thread.Context.GetX(3)); + memory.WriteUInt64((long)address + 0x20, thread.Context.GetX(4)); + memory.WriteUInt64((long)address + 0x28, thread.Context.GetX(5)); + memory.WriteUInt64((long)address + 0x30, thread.Context.GetX(6)); + memory.WriteUInt64((long)address + 0x38, thread.Context.GetX(7)); + memory.WriteUInt64((long)address + 0x40, thread.Context.GetX(8)); + memory.WriteUInt64((long)address + 0x48, thread.Context.GetX(9)); + memory.WriteUInt64((long)address + 0x50, thread.Context.GetX(10)); + memory.WriteUInt64((long)address + 0x58, thread.Context.GetX(11)); + memory.WriteUInt64((long)address + 0x60, thread.Context.GetX(12)); + memory.WriteUInt64((long)address + 0x68, thread.Context.GetX(13)); + memory.WriteUInt64((long)address + 0x70, thread.Context.GetX(14)); + memory.WriteUInt64((long)address + 0x78, thread.Context.GetX(15)); + memory.WriteUInt64((long)address + 0x80, thread.Context.GetX(16)); + memory.WriteUInt64((long)address + 0x88, thread.Context.GetX(17)); + memory.WriteUInt64((long)address + 0x90, thread.Context.GetX(18)); + memory.WriteUInt64((long)address + 0x98, thread.Context.GetX(19)); + memory.WriteUInt64((long)address + 0xa0, thread.Context.GetX(20)); + memory.WriteUInt64((long)address + 0xa8, thread.Context.GetX(21)); + memory.WriteUInt64((long)address + 0xb0, thread.Context.GetX(22)); + memory.WriteUInt64((long)address + 0xb8, thread.Context.GetX(23)); + memory.WriteUInt64((long)address + 0xc0, thread.Context.GetX(24)); + memory.WriteUInt64((long)address + 0xc8, thread.Context.GetX(25)); + memory.WriteUInt64((long)address + 0xd0, thread.Context.GetX(26)); + memory.WriteUInt64((long)address + 0xd8, thread.Context.GetX(27)); + memory.WriteUInt64((long)address + 0xe0, thread.Context.GetX(28)); + memory.WriteUInt64((long)address + 0xe8, thread.Context.GetX(29)); + memory.WriteUInt64((long)address + 0xf0, thread.Context.GetX(30)); + memory.WriteUInt64((long)address + 0xf8, thread.Context.GetX(31)); memory.WriteInt64((long)address + 0x100, thread.LastPc); - memory.WriteUInt64((long)address + 0x108, (ulong)thread.Context.ThreadState.Psr); + memory.WriteUInt64((long)address + 0x108, (ulong)GetPsr(thread.Context)); - memory.WriteVector128((long)address + 0x110, thread.Context.ThreadState.V0); - memory.WriteVector128((long)address + 0x120, thread.Context.ThreadState.V1); - memory.WriteVector128((long)address + 0x130, thread.Context.ThreadState.V2); - memory.WriteVector128((long)address + 0x140, thread.Context.ThreadState.V3); - memory.WriteVector128((long)address + 0x150, thread.Context.ThreadState.V4); - memory.WriteVector128((long)address + 0x160, thread.Context.ThreadState.V5); - memory.WriteVector128((long)address + 0x170, thread.Context.ThreadState.V6); - memory.WriteVector128((long)address + 0x180, thread.Context.ThreadState.V7); - memory.WriteVector128((long)address + 0x190, thread.Context.ThreadState.V8); - memory.WriteVector128((long)address + 0x1a0, thread.Context.ThreadState.V9); - memory.WriteVector128((long)address + 0x1b0, thread.Context.ThreadState.V10); - memory.WriteVector128((long)address + 0x1c0, thread.Context.ThreadState.V11); - memory.WriteVector128((long)address + 0x1d0, thread.Context.ThreadState.V12); - memory.WriteVector128((long)address + 0x1e0, thread.Context.ThreadState.V13); - memory.WriteVector128((long)address + 0x1f0, thread.Context.ThreadState.V14); - memory.WriteVector128((long)address + 0x200, thread.Context.ThreadState.V15); - memory.WriteVector128((long)address + 0x210, thread.Context.ThreadState.V16); - memory.WriteVector128((long)address + 0x220, thread.Context.ThreadState.V17); - memory.WriteVector128((long)address + 0x230, thread.Context.ThreadState.V18); - memory.WriteVector128((long)address + 0x240, thread.Context.ThreadState.V19); - memory.WriteVector128((long)address + 0x250, thread.Context.ThreadState.V20); - memory.WriteVector128((long)address + 0x260, thread.Context.ThreadState.V21); - memory.WriteVector128((long)address + 0x270, thread.Context.ThreadState.V22); - memory.WriteVector128((long)address + 0x280, thread.Context.ThreadState.V23); - memory.WriteVector128((long)address + 0x290, thread.Context.ThreadState.V24); - memory.WriteVector128((long)address + 0x2a0, thread.Context.ThreadState.V25); - memory.WriteVector128((long)address + 0x2b0, thread.Context.ThreadState.V26); - memory.WriteVector128((long)address + 0x2c0, thread.Context.ThreadState.V27); - memory.WriteVector128((long)address + 0x2d0, thread.Context.ThreadState.V28); - memory.WriteVector128((long)address + 0x2e0, thread.Context.ThreadState.V29); - memory.WriteVector128((long)address + 0x2f0, thread.Context.ThreadState.V30); - memory.WriteVector128((long)address + 0x300, thread.Context.ThreadState.V31); + memory.WriteVector128((long)address + 0x110, thread.Context.GetV(0)); + memory.WriteVector128((long)address + 0x120, thread.Context.GetV(1)); + memory.WriteVector128((long)address + 0x130, thread.Context.GetV(2)); + memory.WriteVector128((long)address + 0x140, thread.Context.GetV(3)); + memory.WriteVector128((long)address + 0x150, thread.Context.GetV(4)); + memory.WriteVector128((long)address + 0x160, thread.Context.GetV(5)); + memory.WriteVector128((long)address + 0x170, thread.Context.GetV(6)); + memory.WriteVector128((long)address + 0x180, thread.Context.GetV(7)); + memory.WriteVector128((long)address + 0x190, thread.Context.GetV(8)); + memory.WriteVector128((long)address + 0x1a0, thread.Context.GetV(9)); + memory.WriteVector128((long)address + 0x1b0, thread.Context.GetV(10)); + memory.WriteVector128((long)address + 0x1c0, thread.Context.GetV(11)); + memory.WriteVector128((long)address + 0x1d0, thread.Context.GetV(12)); + memory.WriteVector128((long)address + 0x1e0, thread.Context.GetV(13)); + memory.WriteVector128((long)address + 0x1f0, thread.Context.GetV(14)); + memory.WriteVector128((long)address + 0x200, thread.Context.GetV(15)); + memory.WriteVector128((long)address + 0x210, thread.Context.GetV(16)); + memory.WriteVector128((long)address + 0x220, thread.Context.GetV(17)); + memory.WriteVector128((long)address + 0x230, thread.Context.GetV(18)); + memory.WriteVector128((long)address + 0x240, thread.Context.GetV(19)); + memory.WriteVector128((long)address + 0x250, thread.Context.GetV(20)); + memory.WriteVector128((long)address + 0x260, thread.Context.GetV(21)); + memory.WriteVector128((long)address + 0x270, thread.Context.GetV(22)); + memory.WriteVector128((long)address + 0x280, thread.Context.GetV(23)); + memory.WriteVector128((long)address + 0x290, thread.Context.GetV(24)); + memory.WriteVector128((long)address + 0x2a0, thread.Context.GetV(25)); + memory.WriteVector128((long)address + 0x2b0, thread.Context.GetV(26)); + memory.WriteVector128((long)address + 0x2c0, thread.Context.GetV(27)); + memory.WriteVector128((long)address + 0x2d0, thread.Context.GetV(28)); + memory.WriteVector128((long)address + 0x2e0, thread.Context.GetV(29)); + memory.WriteVector128((long)address + 0x2f0, thread.Context.GetV(30)); + memory.WriteVector128((long)address + 0x300, thread.Context.GetV(31)); - memory.WriteInt32((long)address + 0x310, thread.Context.ThreadState.Fpcr); - memory.WriteInt32((long)address + 0x314, thread.Context.ThreadState.Fpsr); - memory.WriteInt64((long)address + 0x318, thread.Context.ThreadState.Tpidr); + memory.WriteInt32((long)address + 0x310, (int)thread.Context.Fpcr); + memory.WriteInt32((long)address + 0x314, (int)thread.Context.Fpsr); + memory.WriteInt64((long)address + 0x318, thread.Context.Tpidr); return KernelResult.Success; } + + private static int GetPsr(IExecutionContext context) + { + return (context.GetPstateFlag(PState.NFlag) ? (1 << 31) : 0) | + (context.GetPstateFlag(PState.ZFlag) ? (1 << 30) : 0) | + (context.GetPstateFlag(PState.CFlag) ? (1 << 29) : 0) | + (context.GetPstateFlag(PState.VFlag) ? (1 << 28) : 0); + } } } \ No newline at end of file diff --git a/Ryujinx.HLE/HOS/Kernel/Threading/HleScheduler.cs b/Ryujinx.HLE/HOS/Kernel/Threading/HleScheduler.cs index 42eed26a0..0b9511348 100644 --- a/Ryujinx.HLE/HOS/Kernel/Threading/HleScheduler.cs +++ b/Ryujinx.HLE/HOS/Kernel/Threading/HleScheduler.cs @@ -36,12 +36,12 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading { KCoreContext coreContext = CoreContexts[core]; - if (coreContext.ContextSwitchNeeded && (coreContext.CurrentThread?.Context.IsCurrentThread() ?? false)) + if (coreContext.ContextSwitchNeeded && (coreContext.CurrentThread?.IsCurrentHostThread() ?? false)) { coreContext.ContextSwitch(); } - if (coreContext.CurrentThread?.Context.IsCurrentThread() ?? false) + if (coreContext.CurrentThread?.IsCurrentHostThread() ?? false) { selectedCount++; } @@ -70,14 +70,14 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading { // If this is not the thread that is currently executing, we need // to request an interrupt to allow safely starting another thread. - if (!currentThread.Context.IsCurrentThread()) + if (!currentThread.IsCurrentHostThread()) { currentThread.Context.RequestInterrupt(); return; } - CoreManager.Reset(currentThread.Context.Work); + CoreManager.Reset(currentThread.HostThread); } // Advance current core and try picking a thread, @@ -92,9 +92,9 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading if (coreContext.CurrentThread != null) { - CoreManager.Set(coreContext.CurrentThread.Context.Work); + CoreManager.Set(coreContext.CurrentThread.HostThread); - coreContext.CurrentThread.Context.Execute(); + coreContext.CurrentThread.Execute(); break; } @@ -134,14 +134,14 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading public void ExitThread(KThread thread) { - thread.Context.StopExecution(); + thread.Context.Running = false; - CoreManager.Exit(thread.Context.Work); + CoreManager.Exit(thread.HostThread); } public void RemoveThread(KThread thread) { - CoreManager.RemoveThread(thread.Context.Work); + CoreManager.RemoveThread(thread.HostThread); } } } \ No newline at end of file diff --git a/Ryujinx.HLE/HOS/Kernel/Threading/KCoreContext.cs b/Ryujinx.HLE/HOS/Kernel/Threading/KCoreContext.cs index 979071772..0aa12b0dd 100644 --- a/Ryujinx.HLE/HOS/Kernel/Threading/KCoreContext.cs +++ b/Ryujinx.HLE/HOS/Kernel/Threading/KCoreContext.cs @@ -58,7 +58,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading if (CurrentThread != null) { - _coreManager.Reset(CurrentThread.Context.Work); + _coreManager.Reset(CurrentThread.HostThread); } CurrentThread = SelectedThread; @@ -70,9 +70,9 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading CurrentThread.TotalTimeRunning += currentTime - CurrentThread.LastScheduledTime; CurrentThread.LastScheduledTime = currentTime; - _coreManager.Set(CurrentThread.Context.Work); + _coreManager.Set(CurrentThread.HostThread); - CurrentThread.Context.Execute(); + CurrentThread.Execute(); } } } diff --git a/Ryujinx.HLE/HOS/Kernel/Threading/KCriticalSection.cs b/Ryujinx.HLE/HOS/Kernel/Threading/KCriticalSection.cs index 39c857b5d..b7013bb7b 100644 --- a/Ryujinx.HLE/HOS/Kernel/Threading/KCriticalSection.cs +++ b/Ryujinx.HLE/HOS/Kernel/Threading/KCriticalSection.cs @@ -1,4 +1,4 @@ -using ChocolArm64; +using ARMeilleure; using System.Threading; namespace Ryujinx.HLE.HOS.Kernel.Threading @@ -53,14 +53,14 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading if (coreContext.ContextSwitchNeeded) { - CpuThread currentHleThread = coreContext.CurrentThread?.Context; + KThread currentThread = coreContext.CurrentThread; - if (currentHleThread == null) + if (currentThread == null) { // Nothing is running, we can perform the context switch immediately. coreContext.ContextSwitch(); } - else if (currentHleThread.IsCurrentThread()) + else if (currentThread.IsCurrentHostThread()) { // Thread running on the current core, context switch will block. doContextSwitch = true; @@ -68,7 +68,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading else { // Thread running on another core, request a interrupt. - currentHleThread.RequestInterrupt(); + currentThread.Context.RequestInterrupt(); } } } diff --git a/Ryujinx.HLE/HOS/Kernel/Threading/KScheduler.cs b/Ryujinx.HLE/HOS/Kernel/Threading/KScheduler.cs index 8d2cdfce6..dd5422b8e 100644 --- a/Ryujinx.HLE/HOS/Kernel/Threading/KScheduler.cs +++ b/Ryujinx.HLE/HOS/Kernel/Threading/KScheduler.cs @@ -203,7 +203,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading { for (int core = 0; core < CpuCoresCount; core++) { - if (CoreContexts[core].CurrentThread?.Context.IsCurrentThread() ?? false) + if (CoreContexts[core].CurrentThread?.IsCurrentHostThread() ?? false) { return CoreContexts[core].CurrentThread; } diff --git a/Ryujinx.HLE/HOS/Kernel/Threading/KThread.cs b/Ryujinx.HLE/HOS/Kernel/Threading/KThread.cs index 50c71ea91..54d5d06c8 100644 --- a/Ryujinx.HLE/HOS/Kernel/Threading/KThread.cs +++ b/Ryujinx.HLE/HOS/Kernel/Threading/KThread.cs @@ -1,5 +1,5 @@ -using ChocolArm64; -using ChocolArm64.Memory; +using ARMeilleure.Memory; +using ARMeilleure.State; using Ryujinx.Common.Logging; using Ryujinx.HLE.HOS.Kernel.Common; using Ryujinx.HLE.HOS.Kernel.Process; @@ -7,12 +7,17 @@ using System; using System.Collections.Generic; using System.Linq; using System.Text; +using System.Threading; namespace Ryujinx.HLE.HOS.Kernel.Threading { class KThread : KSynchronizationObject, IKFutureSchedulerObject { - public CpuThread Context { get; private set; } + private int _hostThreadRunning; + + public Thread HostThread { get; private set; } + + public IExecutionContext Context { get; private set; } public long AffinityMask { get; set; } @@ -152,30 +157,35 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading is64Bits = true; } - Context = new CpuThread(owner.Translator, owner.CpuMemory, (long)entrypoint); + HostThread = new Thread(() => ThreadStart(entrypoint)); - bool isAarch32 = (Owner.MmuFlags & 1) == 0; - - Context.ThreadState.Aarch32 = isAarch32; - - Context.ThreadState.X0 = argsPtr; - - if (isAarch32) + if (System.UseLegacyJit) { - Context.ThreadState.X13 = (uint)stackTop; + Context = new ChocolArm64.State.CpuThreadState(); } else { - Context.ThreadState.X31 = stackTop; + Context = new ARMeilleure.State.ExecutionContext(); } - Context.ThreadState.CntfrqEl0 = 19200000; - Context.ThreadState.Tpidr = (long)_tlsAddress; + bool isAarch32 = (Owner.MmuFlags & 1) == 0; + + Context.SetX(0, argsPtr); + + if (isAarch32) + { + Context.SetX(13, (uint)stackTop); + } + else + { + Context.SetX(31, stackTop); + } + + Context.CntfrqEl0 = 19200000; + Context.Tpidr = (long)_tlsAddress; owner.SubscribeThreadEventHandlers(Context); - Context.WorkFinished += ThreadFinishedHandler; - ThreadUid = System.GetThreadUid(); _hasBeenInitialized = true; @@ -1002,8 +1012,8 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading public void SetEntryArguments(long argsPtr, int threadHandle) { - Context.ThreadState.X0 = (ulong)argsPtr; - Context.ThreadState.X1 = (ulong)threadHandle; + Context.SetX(0, (ulong)argsPtr); + Context.SetX(1, (ulong)threadHandle); } public void TimeUp() @@ -1013,7 +1023,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading public string GetGuestStackTrace() { - return Owner.Debugger.GetGuestStackTrace(Context.ThreadState); + return Owner.Debugger.GetGuestStackTrace(Context); } public void PrintGuestStackTrace() @@ -1026,12 +1036,32 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading Logger.PrintInfo(LogClass.Cpu, trace.ToString()); } - private void ThreadFinishedHandler(object sender, EventArgs e) + public void Execute() + { + if (Interlocked.CompareExchange(ref _hostThreadRunning, 1, 0) == 0) + { + HostThread.Start(); + } + } + + private void ThreadStart(ulong entrypoint) + { + Owner.Translator.Execute(Context, entrypoint); + + ThreadExit(); + } + + private void ThreadExit() { System.Scheduler.ExitThread(this); System.Scheduler.RemoveThread(this); } + public bool IsCurrentHostThread() + { + return Thread.CurrentThread == HostThread; + } + public override bool IsSignaled() { return _hasExited; diff --git a/Ryujinx.HLE/HOS/ProgramLoader.cs b/Ryujinx.HLE/HOS/ProgramLoader.cs index af974e18f..0bc6447e5 100644 --- a/Ryujinx.HLE/HOS/ProgramLoader.cs +++ b/Ryujinx.HLE/HOS/ProgramLoader.cs @@ -1,4 +1,4 @@ -using ChocolArm64.Memory; +using ARMeilleure.Memory; using Ryujinx.Common; using Ryujinx.Common.Logging; using Ryujinx.HLE.HOS.Kernel.Common; diff --git a/Ryujinx.HLE/HOS/ServiceCtx.cs b/Ryujinx.HLE/HOS/ServiceCtx.cs index 99b2d5afe..df74ba0a8 100644 --- a/Ryujinx.HLE/HOS/ServiceCtx.cs +++ b/Ryujinx.HLE/HOS/ServiceCtx.cs @@ -1,4 +1,4 @@ -using ChocolArm64.Memory; +using ARMeilleure.Memory; using Ryujinx.HLE.HOS.Ipc; using Ryujinx.HLE.HOS.Kernel.Ipc; using Ryujinx.HLE.HOS.Kernel.Process; @@ -11,7 +11,7 @@ namespace Ryujinx.HLE.HOS { public Switch Device { get; } public KProcess Process { get; } - public MemoryManager Memory { get; } + public IMemoryManager Memory { get; } public KThread Thread { get; } public KClientSession Session { get; } public IpcMessage Request { get; } @@ -22,7 +22,7 @@ namespace Ryujinx.HLE.HOS public ServiceCtx( Switch device, KProcess process, - MemoryManager memory, + IMemoryManager memory, KThread thread, KClientSession session, IpcMessage request, diff --git a/Ryujinx.HLE/HOS/Services/Acc/IProfile.cs b/Ryujinx.HLE/HOS/Services/Acc/IProfile.cs index 050e44971..10210afed 100644 --- a/Ryujinx.HLE/HOS/Services/Acc/IProfile.cs +++ b/Ryujinx.HLE/HOS/Services/Acc/IProfile.cs @@ -1,4 +1,4 @@ -using ChocolArm64.Memory; +using ARMeilleure.Memory; using Ryujinx.Common.Logging; using Ryujinx.HLE.HOS.SystemState; using Ryujinx.HLE.Utilities; diff --git a/Ryujinx.HLE/HOS/Services/Aud/AudioOut/IAudioOut.cs b/Ryujinx.HLE/HOS/Services/Aud/AudioOut/IAudioOut.cs index 4191dfd67..751d3f704 100644 --- a/Ryujinx.HLE/HOS/Services/Aud/AudioOut/IAudioOut.cs +++ b/Ryujinx.HLE/HOS/Services/Aud/AudioOut/IAudioOut.cs @@ -1,4 +1,4 @@ -using ChocolArm64.Memory; +using ARMeilleure.Memory; using Ryujinx.Audio; using Ryujinx.HLE.HOS.Ipc; using Ryujinx.HLE.HOS.Kernel.Common; diff --git a/Ryujinx.HLE/HOS/Services/Aud/AudioRenderer/IAudioRenderer.cs b/Ryujinx.HLE/HOS/Services/Aud/AudioRenderer/IAudioRenderer.cs index 599f3d81f..e8baf8192 100644 --- a/Ryujinx.HLE/HOS/Services/Aud/AudioRenderer/IAudioRenderer.cs +++ b/Ryujinx.HLE/HOS/Services/Aud/AudioRenderer/IAudioRenderer.cs @@ -1,4 +1,4 @@ -using ChocolArm64.Memory; +using ARMeilleure.Memory; using Ryujinx.Audio; using Ryujinx.Audio.Adpcm; using Ryujinx.Common.Logging; @@ -24,7 +24,7 @@ namespace Ryujinx.HLE.HOS.Services.Aud.AudioRenderer private KEvent _updateEvent; - private MemoryManager _memory; + private IMemoryManager _memory; private IAalOutput _audioOut; @@ -40,7 +40,7 @@ namespace Ryujinx.HLE.HOS.Services.Aud.AudioRenderer public IAudioRenderer( Horizon system, - MemoryManager memory, + IMemoryManager memory, IAalOutput audioOut, AudioRendererParameter Params) { diff --git a/Ryujinx.HLE/HOS/Services/Aud/AudioRenderer/VoiceContext.cs b/Ryujinx.HLE/HOS/Services/Aud/AudioRenderer/VoiceContext.cs index 93a16a617..aaff20a5b 100644 --- a/Ryujinx.HLE/HOS/Services/Aud/AudioRenderer/VoiceContext.cs +++ b/Ryujinx.HLE/HOS/Services/Aud/AudioRenderer/VoiceContext.cs @@ -1,4 +1,4 @@ -using ChocolArm64.Memory; +using ARMeilleure.Memory; using Ryujinx.Audio.Adpcm; using System; @@ -65,7 +65,7 @@ namespace Ryujinx.HLE.HOS.Services.Aud.AudioRenderer _outStatus.VoiceDropsCount = 0; } - public int[] GetBufferData(MemoryManager memory, int maxSamples, out int samplesCount) + public int[] GetBufferData(IMemoryManager memory, int maxSamples, out int samplesCount) { if (!Playing) { @@ -122,7 +122,7 @@ namespace Ryujinx.HLE.HOS.Services.Aud.AudioRenderer return output; } - private void UpdateBuffer(MemoryManager memory) + private void UpdateBuffer(IMemoryManager memory) { // TODO: Implement conversion for formats other // than interleaved stereo (2 channels). diff --git a/Ryujinx.HLE/HOS/Services/Aud/IAudioOutManager.cs b/Ryujinx.HLE/HOS/Services/Aud/IAudioOutManager.cs index ad0dd0445..bea0f3f20 100644 --- a/Ryujinx.HLE/HOS/Services/Aud/IAudioOutManager.cs +++ b/Ryujinx.HLE/HOS/Services/Aud/IAudioOutManager.cs @@ -1,4 +1,4 @@ -using ChocolArm64.Memory; +using ARMeilleure.Memory; using Ryujinx.Audio; using Ryujinx.Common.Logging; using Ryujinx.HLE.HOS.Kernel.Threading; diff --git a/Ryujinx.HLE/HOS/Services/Ldr/IRoInterface.cs b/Ryujinx.HLE/HOS/Services/Ldr/IRoInterface.cs index b8780730d..748a600d5 100644 --- a/Ryujinx.HLE/HOS/Services/Ldr/IRoInterface.cs +++ b/Ryujinx.HLE/HOS/Services/Ldr/IRoInterface.cs @@ -1,4 +1,4 @@ -using ChocolArm64.Memory; +using ARMeilleure.Memory; using Ryujinx.Common; using Ryujinx.HLE.HOS.Ipc; using Ryujinx.HLE.HOS.Kernel.Common; diff --git a/Ryujinx.HLE/HOS/Services/Nv/INvDrvServices.cs b/Ryujinx.HLE/HOS/Services/Nv/INvDrvServices.cs index 50ab7e01f..261c1c5ae 100644 --- a/Ryujinx.HLE/HOS/Services/Nv/INvDrvServices.cs +++ b/Ryujinx.HLE/HOS/Services/Nv/INvDrvServices.cs @@ -1,4 +1,4 @@ -using ChocolArm64.Memory; +using ARMeilleure.Memory; using Ryujinx.Common.Logging; using Ryujinx.HLE.HOS.Ipc; using Ryujinx.HLE.HOS.Kernel.Common; diff --git a/Ryujinx.HLE/HOS/Services/Nv/NvGpuAS/NvGpuASIoctl.cs b/Ryujinx.HLE/HOS/Services/Nv/NvGpuAS/NvGpuASIoctl.cs index 3b96ed6bc..47d15a7e5 100644 --- a/Ryujinx.HLE/HOS/Services/Nv/NvGpuAS/NvGpuASIoctl.cs +++ b/Ryujinx.HLE/HOS/Services/Nv/NvGpuAS/NvGpuASIoctl.cs @@ -1,4 +1,4 @@ -using ChocolArm64.Memory; +using ARMeilleure.Memory; using Ryujinx.Common.Logging; using Ryujinx.Graphics.Memory; using Ryujinx.HLE.HOS.Kernel.Process; diff --git a/Ryujinx.HLE/HOS/Services/Nv/NvGpuGpu/NvGpuGpuIoctl.cs b/Ryujinx.HLE/HOS/Services/Nv/NvGpuGpu/NvGpuGpuIoctl.cs index 4f276d5d9..04b0c63cd 100644 --- a/Ryujinx.HLE/HOS/Services/Nv/NvGpuGpu/NvGpuGpuIoctl.cs +++ b/Ryujinx.HLE/HOS/Services/Nv/NvGpuGpu/NvGpuGpuIoctl.cs @@ -1,4 +1,4 @@ -using ChocolArm64.Memory; +using ARMeilleure.Memory; using Ryujinx.Common.Logging; using System; using System.Diagnostics; diff --git a/Ryujinx.HLE/HOS/Services/Nv/NvHostChannel/NvHostChannelIoctl.cs b/Ryujinx.HLE/HOS/Services/Nv/NvHostChannel/NvHostChannelIoctl.cs index c5f296363..e7879f4a2 100644 --- a/Ryujinx.HLE/HOS/Services/Nv/NvHostChannel/NvHostChannelIoctl.cs +++ b/Ryujinx.HLE/HOS/Services/Nv/NvHostChannel/NvHostChannelIoctl.cs @@ -1,4 +1,4 @@ -using ChocolArm64.Memory; +using ARMeilleure.Memory; using Ryujinx.Common.Logging; using Ryujinx.Graphics.Memory; using Ryujinx.HLE.HOS.Kernel.Process; diff --git a/Ryujinx.HLE/HOS/Services/Nv/NvHostCtrl/NvHostCtrlIoctl.cs b/Ryujinx.HLE/HOS/Services/Nv/NvHostCtrl/NvHostCtrlIoctl.cs index 35f1a9491..2a84b677f 100644 --- a/Ryujinx.HLE/HOS/Services/Nv/NvHostCtrl/NvHostCtrlIoctl.cs +++ b/Ryujinx.HLE/HOS/Services/Nv/NvHostCtrl/NvHostCtrlIoctl.cs @@ -1,4 +1,4 @@ -using ChocolArm64.Memory; +using ARMeilleure.Memory; using Ryujinx.Common.Logging; using Ryujinx.HLE.HOS.Kernel.Process; using System; diff --git a/Ryujinx.HLE/HOS/Services/Nv/NvMap/NvMapIoctl.cs b/Ryujinx.HLE/HOS/Services/Nv/NvMap/NvMapIoctl.cs index 722866622..d9c579a2a 100644 --- a/Ryujinx.HLE/HOS/Services/Nv/NvMap/NvMapIoctl.cs +++ b/Ryujinx.HLE/HOS/Services/Nv/NvMap/NvMapIoctl.cs @@ -1,4 +1,4 @@ -using ChocolArm64.Memory; +using ARMeilleure.Memory; using Ryujinx.Common.Logging; using Ryujinx.Graphics.Memory; using Ryujinx.HLE.HOS.Kernel.Process; diff --git a/Ryujinx.HLE/HOS/Services/Time/Clock/StandardSteadyClockCore.cs b/Ryujinx.HLE/HOS/Services/Time/Clock/StandardSteadyClockCore.cs index fea5bf2f6..5b2d6c84e 100644 --- a/Ryujinx.HLE/HOS/Services/Time/Clock/StandardSteadyClockCore.cs +++ b/Ryujinx.HLE/HOS/Services/Time/Clock/StandardSteadyClockCore.cs @@ -40,7 +40,7 @@ namespace Ryujinx.HLE.HOS.Services.Time.Clock ClockSourceId = GetClockSourceId() }; - TimeSpanType ticksTimeSpan = TimeSpanType.FromTicks(thread.Context.ThreadState.CntpctEl0, thread.Context.ThreadState.CntfrqEl0); + TimeSpanType ticksTimeSpan = TimeSpanType.FromTicks(thread.Context.CntpctEl0, thread.Context.CntfrqEl0); result.TimePoint = _setupValue + ticksTimeSpan.ToSeconds(); diff --git a/Ryujinx.HLE/HOS/Services/Time/Clock/TickBasedSteadyClockCore.cs b/Ryujinx.HLE/HOS/Services/Time/Clock/TickBasedSteadyClockCore.cs index 7a69b014b..6cd4c80b4 100644 --- a/Ryujinx.HLE/HOS/Services/Time/Clock/TickBasedSteadyClockCore.cs +++ b/Ryujinx.HLE/HOS/Services/Time/Clock/TickBasedSteadyClockCore.cs @@ -30,7 +30,7 @@ namespace Ryujinx.HLE.HOS.Services.Time.Clock ClockSourceId = GetClockSourceId() }; - TimeSpanType ticksTimeSpan = TimeSpanType.FromTicks(thread.Context.ThreadState.CntpctEl0, thread.Context.ThreadState.CntfrqEl0); + TimeSpanType ticksTimeSpan = TimeSpanType.FromTicks(thread.Context.CntpctEl0, thread.Context.CntfrqEl0); result.TimePoint = ticksTimeSpan.ToSeconds(); diff --git a/Ryujinx.HLE/HOS/Services/Time/IStaticService.cs b/Ryujinx.HLE/HOS/Services/Time/IStaticService.cs index 9ee038d58..d9c5b4f25 100644 --- a/Ryujinx.HLE/HOS/Services/Time/IStaticService.cs +++ b/Ryujinx.HLE/HOS/Services/Time/IStaticService.cs @@ -141,7 +141,7 @@ namespace Ryujinx.HLE.HOS.Services.Time if (currentTimePoint.ClockSourceId == otherContext.SteadyTimePoint.ClockSourceId) { - TimeSpanType ticksTimeSpan = TimeSpanType.FromTicks(context.Thread.Context.ThreadState.CntpctEl0, context.Thread.Context.ThreadState.CntfrqEl0); + TimeSpanType ticksTimeSpan = TimeSpanType.FromTicks(context.Thread.Context.CntpctEl0, context.Thread.Context.CntfrqEl0); long baseTimePoint = otherContext.Offset + currentTimePoint.TimePoint - ticksTimeSpan.ToSeconds(); context.ResponseData.Write(baseTimePoint); diff --git a/Ryujinx.HLE/HOS/Services/Time/ITimeZoneService.cs b/Ryujinx.HLE/HOS/Services/Time/ITimeZoneService.cs index 895bb1f3e..b820de38f 100644 --- a/Ryujinx.HLE/HOS/Services/Time/ITimeZoneService.cs +++ b/Ryujinx.HLE/HOS/Services/Time/ITimeZoneService.cs @@ -1,4 +1,4 @@ -using ChocolArm64.Memory; +using ARMeilleure.Memory; using Ryujinx.Common; using Ryujinx.Common.Logging; using Ryujinx.HLE.HOS.Services.Time.TimeZone; @@ -106,7 +106,7 @@ namespace Ryujinx.HLE.HOS.Services.Time string locationName = Encoding.ASCII.GetString(context.RequestData.ReadBytes(0x24)).TrimEnd('\0'); ResultCode resultCode = TimeZoneManager.Instance.LoadTimeZoneRules(out TimeZoneRule rules, locationName); - + // Write TimeZoneRule if success if (resultCode == 0) { diff --git a/Ryujinx.HLE/HOS/Services/Vi/IApplicationDisplayService.cs b/Ryujinx.HLE/HOS/Services/Vi/IApplicationDisplayService.cs index 2f1e68e8f..15db6ff2b 100644 --- a/Ryujinx.HLE/HOS/Services/Vi/IApplicationDisplayService.cs +++ b/Ryujinx.HLE/HOS/Services/Vi/IApplicationDisplayService.cs @@ -1,4 +1,4 @@ -using ChocolArm64.Memory; +using ARMeilleure.Memory; using Ryujinx.HLE.HOS.Ipc; using Ryujinx.HLE.HOS.Kernel.Common; using System; diff --git a/Ryujinx.HLE/Ryujinx.HLE.csproj b/Ryujinx.HLE/Ryujinx.HLE.csproj index 78e5c2a3a..3a12a179f 100644 --- a/Ryujinx.HLE/Ryujinx.HLE.csproj +++ b/Ryujinx.HLE/Ryujinx.HLE.csproj @@ -2,7 +2,7 @@ netcoreapp2.1 - win10-x64;osx-x64;linux-x64 + win-x64;osx-x64;linux-x64 Debug;Release;Profile Debug;Profile Release 7.1 @@ -38,16 +38,18 @@ - + + + diff --git a/Ryujinx.HLE/Utilities/StructReader.cs b/Ryujinx.HLE/Utilities/StructReader.cs index 441dfd195..36e5c7d19 100644 --- a/Ryujinx.HLE/Utilities/StructReader.cs +++ b/Ryujinx.HLE/Utilities/StructReader.cs @@ -1,15 +1,15 @@ -using ChocolArm64.Memory; +using ARMeilleure.Memory; using System.Runtime.InteropServices; namespace Ryujinx.HLE.Utilities { class StructReader { - private MemoryManager _memory; + private IMemoryManager _memory; public long Position { get; private set; } - public StructReader(MemoryManager memory, long position) + public StructReader(IMemoryManager memory, long position) { _memory = memory; Position = position; diff --git a/Ryujinx.HLE/Utilities/StructWriter.cs b/Ryujinx.HLE/Utilities/StructWriter.cs index 86cfeedd7..c156956db 100644 --- a/Ryujinx.HLE/Utilities/StructWriter.cs +++ b/Ryujinx.HLE/Utilities/StructWriter.cs @@ -1,15 +1,15 @@ -using ChocolArm64.Memory; +using ARMeilleure.Memory; using System.Runtime.InteropServices; namespace Ryujinx.HLE.Utilities { class StructWriter { - private MemoryManager _memory; + private IMemoryManager _memory; public long Position { get; private set; } - public StructWriter(MemoryManager memory, long position) + public StructWriter(IMemoryManager memory, long position) { _memory = memory; Position = position; diff --git a/Ryujinx.LLE/Luea.csproj b/Ryujinx.LLE/Luea.csproj index 719a0ef38..895f27eef 100644 --- a/Ryujinx.LLE/Luea.csproj +++ b/Ryujinx.LLE/Luea.csproj @@ -2,7 +2,7 @@ netcoreapp2.1 - win10-x64;osx-x64;linux-x64 + win-x64;osx-x64;linux-x64 Exe Debug;Release;Profile Debug;Profile Release diff --git a/Ryujinx.Profiler/Ryujinx.Profiler.csproj b/Ryujinx.Profiler/Ryujinx.Profiler.csproj index 5a4c8f4f9..bcc2d17d2 100644 --- a/Ryujinx.Profiler/Ryujinx.Profiler.csproj +++ b/Ryujinx.Profiler/Ryujinx.Profiler.csproj @@ -2,7 +2,7 @@ netcoreapp2.1 - win10-x64;osx-x64;linux-x64 + win-x64;osx-x64;linux-x64 true Debug;Release;Profile Debug;Profile Release diff --git a/Ryujinx.ShaderTools/Ryujinx.ShaderTools.csproj b/Ryujinx.ShaderTools/Ryujinx.ShaderTools.csproj index 04cab8328..a2ff36d9b 100644 --- a/Ryujinx.ShaderTools/Ryujinx.ShaderTools.csproj +++ b/Ryujinx.ShaderTools/Ryujinx.ShaderTools.csproj @@ -2,7 +2,7 @@ netcoreapp2.1 - win10-x64;osx-x64;linux-x64 + win-x64;osx-x64;linux-x64 Exe Debug;Release;Profile Debug;Profile Release diff --git a/Ryujinx.Tests.Unicorn/IndexedProperty.cs b/Ryujinx.Tests.Unicorn/IndexedProperty.cs index a4365026b..65d445fc0 100644 --- a/Ryujinx.Tests.Unicorn/IndexedProperty.cs +++ b/Ryujinx.Tests.Unicorn/IndexedProperty.cs @@ -4,24 +4,24 @@ namespace Ryujinx.Tests.Unicorn { public class IndexedProperty { - readonly Action SetAction; - readonly Func GetFunc; + private Func _getFunc; + private Action _setAction; public IndexedProperty(Func getFunc, Action setAction) { - GetFunc = getFunc; - SetAction = setAction; + _getFunc = getFunc; + _setAction = setAction; } - public TValue this[TIndex i] + public TValue this[TIndex index] { get { - return GetFunc(i); + return _getFunc(index); } set { - SetAction(i, value); + _setAction(index, value); } } } diff --git a/Ryujinx.Tests.Unicorn/Native/Interface.cs b/Ryujinx.Tests.Unicorn/Native/Interface.cs index 006585b5c..59b1da079 100644 --- a/Ryujinx.Tests.Unicorn/Native/Interface.cs +++ b/Ryujinx.Tests.Unicorn/Native/Interface.cs @@ -16,11 +16,13 @@ namespace Ryujinx.Tests.Unicorn.Native public static void MarshalArrayOf(IntPtr input, int length, out T[] output) { int size = Marshal.SizeOf(typeof(T)); + output = new T[length]; for (int i = 0; i < length; i++) { IntPtr item = new IntPtr(input.ToInt64() + i * size); + output[i] = Marshal.PtrToStructure(item); } } @@ -29,7 +31,7 @@ namespace Ryujinx.Tests.Unicorn.Native public static extern uint uc_version(out uint major, out uint minor); [DllImport("unicorn", CallingConvention = CallingConvention.Cdecl)] - public static extern UnicornError uc_open(uint arch, uint mode, out IntPtr uc); + public static extern UnicornError uc_open(UnicornArch arch, UnicornMode mode, out IntPtr uc); [DllImport("unicorn", CallingConvention = CallingConvention.Cdecl)] public static extern UnicornError uc_close(IntPtr uc); diff --git a/Ryujinx.Tests.Unicorn/Native/UnicornArch.cs b/Ryujinx.Tests.Unicorn/Native/UnicornArch.cs index 73710faa8..ff633293e 100644 --- a/Ryujinx.Tests.Unicorn/Native/UnicornArch.cs +++ b/Ryujinx.Tests.Unicorn/Native/UnicornArch.cs @@ -1,6 +1,6 @@ namespace Ryujinx.Tests.Unicorn.Native { - public enum UnicornArch + public enum UnicornArch : uint { UC_ARCH_ARM = 1, // ARM architecture (including Thumb, Thumb-2) UC_ARCH_ARM64, // ARM-64, also called AArch64 diff --git a/Ryujinx.Tests.Unicorn/Native/UnicornMode.cs b/Ryujinx.Tests.Unicorn/Native/UnicornMode.cs index 5cd835169..8045f2dac 100644 --- a/Ryujinx.Tests.Unicorn/Native/UnicornMode.cs +++ b/Ryujinx.Tests.Unicorn/Native/UnicornMode.cs @@ -1,7 +1,7 @@ // ReSharper disable InconsistentNaming namespace Ryujinx.Tests.Unicorn.Native { - public enum UnicornMode + public enum UnicornMode : uint { UC_MODE_LITTLE_ENDIAN = 0, // little-endian mode (default mode) UC_MODE_BIG_ENDIAN = 1 << 30, // big-endian mode diff --git a/Ryujinx.Tests.Unicorn/Ryujinx.Tests.Unicorn.csproj b/Ryujinx.Tests.Unicorn/Ryujinx.Tests.Unicorn.csproj index 5a99b39f1..d15a405bc 100644 --- a/Ryujinx.Tests.Unicorn/Ryujinx.Tests.Unicorn.csproj +++ b/Ryujinx.Tests.Unicorn/Ryujinx.Tests.Unicorn.csproj @@ -2,7 +2,7 @@ netcoreapp2.1 - win10-x64;osx-x64;linux-x64 + win-x64;osx-x64;linux-x64 true Debug;Release;Profile Debug;Profile Release @@ -23,7 +23,6 @@ - diff --git a/Ryujinx.Tests.Unicorn/SimdValue.cs b/Ryujinx.Tests.Unicorn/SimdValue.cs new file mode 100644 index 000000000..2b5284305 --- /dev/null +++ b/Ryujinx.Tests.Unicorn/SimdValue.cs @@ -0,0 +1,112 @@ +using System; + +namespace Ryujinx.Tests.Unicorn +{ + public struct SimdValue : IEquatable + { + private ulong _e0; + private ulong _e1; + + public SimdValue(ulong e0, ulong e1) + { + _e0 = e0; + _e1 = e1; + } + + public SimdValue(byte[] data) + { + _e0 = (ulong)BitConverter.ToInt64(data, 0); + _e1 = (ulong)BitConverter.ToInt64(data, 8); + } + + public float AsFloat() + { + return GetFloat(0); + } + + public double AsDouble() + { + return GetDouble(0); + } + + public float GetFloat(int index) + { + return BitConverter.Int32BitsToSingle(GetInt32(index)); + } + + public double GetDouble(int index) + { + return BitConverter.Int64BitsToDouble(GetInt64(index)); + } + + public int GetInt32(int index) => (int)GetUInt32(index); + public long GetInt64(int index) => (long)GetUInt64(index); + + public uint GetUInt32(int index) + { + switch (index) + { + case 0: return (uint)(_e0 >> 0); + case 1: return (uint)(_e0 >> 32); + case 2: return (uint)(_e1 >> 0); + case 3: return (uint)(_e1 >> 32); + } + + throw new ArgumentOutOfRangeException(nameof(index)); + } + + public ulong GetUInt64(int index) + { + switch (index) + { + case 0: return _e0; + case 1: return _e1; + } + + throw new ArgumentOutOfRangeException(nameof(index)); + } + + public byte[] ToArray() + { + byte[] e0Data = BitConverter.GetBytes(_e0); + byte[] e1Data = BitConverter.GetBytes(_e1); + + byte[] data = new byte[16]; + + Buffer.BlockCopy(e0Data, 0, data, 0, 8); + Buffer.BlockCopy(e1Data, 0, data, 8, 8); + + return data; + } + + public override int GetHashCode() + { + return HashCode.Combine(_e0, _e1); + } + + public static bool operator ==(SimdValue x, SimdValue y) + { + return x.Equals(y); + } + + public static bool operator !=(SimdValue x, SimdValue y) + { + return !x.Equals(y); + } + + public override bool Equals(object obj) + { + return obj is SimdValue vector && Equals(vector); + } + + public bool Equals(SimdValue other) + { + return other._e0 == _e0 && other._e1 == _e1; + } + + public override string ToString() + { + return $"0x{_e1:X16}{_e0:X16}"; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Tests.Unicorn/UnicornAArch64.cs b/Ryujinx.Tests.Unicorn/UnicornAArch64.cs index 0425d1d3a..4453d18d0 100644 --- a/Ryujinx.Tests.Unicorn/UnicornAArch64.cs +++ b/Ryujinx.Tests.Unicorn/UnicornAArch64.cs @@ -1,8 +1,5 @@ using Ryujinx.Tests.Unicorn.Native; using System; -using System.Diagnostics.Contracts; -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; namespace Ryujinx.Tests.Unicorn { @@ -15,95 +12,96 @@ namespace Ryujinx.Tests.Unicorn get { return new IndexedProperty( - (int i) => GetX(i), + (int i) => GetX(i), (int i, ulong value) => SetX(i, value)); } } - public IndexedProperty> Q + public IndexedProperty Q { get { - return new IndexedProperty>( - (int i) => GetQ(i), - (int i, Vector128 value) => SetQ(i, value)); + return new IndexedProperty( + (int i) => GetQ(i), + (int i, SimdValue value) => SetQ(i, value)); } } public ulong LR { - get { return GetRegister(ArmRegister.LR); } - set { SetRegister(ArmRegister.LR, value); } + get => GetRegister(ArmRegister.LR); + set => SetRegister(ArmRegister.LR, value); } public ulong SP { - get { return GetRegister(ArmRegister.SP); } - set { SetRegister(ArmRegister.SP, value); } + get => GetRegister(ArmRegister.SP); + set => SetRegister(ArmRegister.SP, value); } public ulong PC { - get { return GetRegister(ArmRegister.PC); } - set { SetRegister(ArmRegister.PC, value); } + get => GetRegister(ArmRegister.PC); + set => SetRegister(ArmRegister.PC, value); } public uint Pstate { - get { return (uint)GetRegister(ArmRegister.PSTATE); } - set { SetRegister(ArmRegister.PSTATE, (uint)value); } + get => (uint)GetRegister(ArmRegister.PSTATE); + set => SetRegister(ArmRegister.PSTATE, (uint)value); } public int Fpcr { - get { return (int)GetRegister(ArmRegister.FPCR); } - set { SetRegister(ArmRegister.FPCR, (uint)value); } + get => (int)GetRegister(ArmRegister.FPCR); + set => SetRegister(ArmRegister.FPCR, (uint)value); } public int Fpsr { - get { return (int)GetRegister(ArmRegister.FPSR); } - set { SetRegister(ArmRegister.FPSR, (uint)value); } + get => (int)GetRegister(ArmRegister.FPSR); + set => SetRegister(ArmRegister.FPSR, (uint)value); } public bool OverflowFlag { - get { return (Pstate & 0x10000000u) != 0; } - set { Pstate = (Pstate & ~0x10000000u) | (value ? 0x10000000u : 0u); } + get => (Pstate & 0x10000000u) != 0; + set => Pstate = (Pstate & ~0x10000000u) | (value ? 0x10000000u : 0u); } public bool CarryFlag { - get { return (Pstate & 0x20000000u) != 0; } - set { Pstate = (Pstate & ~0x20000000u) | (value ? 0x20000000u : 0u); } + get => (Pstate & 0x20000000u) != 0; + set => Pstate = (Pstate & ~0x20000000u) | (value ? 0x20000000u : 0u); } public bool ZeroFlag { - get { return (Pstate & 0x40000000u) != 0; } - set { Pstate = (Pstate & ~0x40000000u) | (value ? 0x40000000u : 0u); } + get => (Pstate & 0x40000000u) != 0; + set => Pstate = (Pstate & ~0x40000000u) | (value ? 0x40000000u : 0u); } public bool NegativeFlag { - get { return (Pstate & 0x80000000u) != 0; } - set { Pstate = (Pstate & ~0x80000000u) | (value ? 0x80000000u : 0u); } + get => (Pstate & 0x80000000u) != 0; + set => Pstate = (Pstate & ~0x80000000u) | (value ? 0x80000000u : 0u); } public UnicornAArch64() { - Interface.Checked(Interface.uc_open((uint)UnicornArch.UC_ARCH_ARM64, (uint)UnicornMode.UC_MODE_LITTLE_ENDIAN, out uc)); + Interface.Checked(Interface.uc_open(UnicornArch.UC_ARCH_ARM64, UnicornMode.UC_MODE_LITTLE_ENDIAN, out uc)); + SetRegister(ArmRegister.CPACR_EL1, 0x00300000); } ~UnicornAArch64() { - Interface.Checked(Interface.uc_close(uc)); + Interface.Checked(Native.Interface.uc_close(uc)); } public void RunForCount(ulong count) { - Interface.Checked(Interface.uc_emu_start(uc, PC, 0xFFFFFFFFFFFFFFFFu, 0, count)); + Interface.Checked(Native.Interface.uc_emu_start(uc, this.PC, 0xFFFFFFFFFFFFFFFFu, 0, count)); } public void Step() @@ -111,7 +109,7 @@ namespace Ryujinx.Tests.Unicorn RunForCount(1); } - internal static ArmRegister[] X_registers = new ArmRegister[31] + private static ArmRegister[] XRegisters = new ArmRegister[31] { ArmRegister.X0, ArmRegister.X1, @@ -146,7 +144,7 @@ namespace Ryujinx.Tests.Unicorn ArmRegister.X30, }; - internal static ArmRegister[] Q_registers = new ArmRegister[32] + private static ArmRegister[] QRegisters = new ArmRegister[32] { ArmRegister.Q0, ArmRegister.Q1, @@ -182,97 +180,104 @@ namespace Ryujinx.Tests.Unicorn ArmRegister.Q31, }; - internal ulong GetRegister(ArmRegister register) - { - byte[] value_bytes = new byte[8]; - Interface.Checked(Interface.uc_reg_read(uc, (int)register, value_bytes)); - return (ulong)BitConverter.ToInt64(value_bytes, 0); - } - - internal void SetRegister(ArmRegister register, ulong value) - { - byte[] value_bytes = BitConverter.GetBytes(value); - Interface.Checked(Interface.uc_reg_write(uc, (int)register, value_bytes)); - } - - internal Vector128 GetVector(ArmRegister register) - { - byte[] value_bytes = new byte[16]; - Interface.Checked(Interface.uc_reg_read(uc, (int)register, value_bytes)); - unsafe - { - fixed (byte* p = &value_bytes[0]) - { - return Sse.LoadVector128((float*)p); - } - } - } - - internal void SetVector(ArmRegister register, Vector128 value) - { - byte[] value_bytes = new byte[16]; - unsafe - { - fixed (byte* p = &value_bytes[0]) - { - Sse.Store((float*)p, value); - } - } - Interface.Checked(Interface.uc_reg_write(uc, (int)register, value_bytes)); - } - public ulong GetX(int index) { - Contract.Requires(index <= 30, "invalid register"); + if ((uint)index > 30) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } - return GetRegister(X_registers[index]); + return GetRegister(XRegisters[index]); } public void SetX(int index, ulong value) { - Contract.Requires(index <= 30, "invalid register"); + if ((uint)index > 30) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } - SetRegister(X_registers[index], value); + SetRegister(XRegisters[index], value); } - public Vector128 GetQ(int index) + public SimdValue GetQ(int index) { - Contract.Requires(index <= 31, "invalid vector"); + if ((uint)index > 31) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } - return GetVector(Q_registers[index]); + return GetVector(QRegisters[index]); } - public void SetQ(int index, Vector128 value) + public void SetQ(int index, SimdValue value) { - Contract.Requires(index <= 31, "invalid vector"); + if ((uint)index > 31) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } - SetVector(Q_registers[index], value); + SetVector(QRegisters[index], value); + } + + private ulong GetRegister(ArmRegister register) + { + byte[] data = new byte[8]; + + Interface.Checked(Native.Interface.uc_reg_read(uc, (int)register, data)); + + return (ulong)BitConverter.ToInt64(data, 0); + } + + private void SetRegister(ArmRegister register, ulong value) + { + byte[] data = BitConverter.GetBytes(value); + + Interface.Checked(Interface.uc_reg_write(uc, (int)register, data)); + } + + private SimdValue GetVector(ArmRegister register) + { + byte[] data = new byte[16]; + + Interface.Checked(Interface.uc_reg_read(uc, (int)register, data)); + + return new SimdValue(data); + } + + private void SetVector(ArmRegister register, SimdValue value) + { + byte[] data = value.ToArray(); + + Interface.Checked(Interface.uc_reg_write(uc, (int)register, data)); } public byte[] MemoryRead(ulong address, ulong size) { byte[] value = new byte[size]; + Interface.Checked(Interface.uc_mem_read(uc, address, value, size)); + return value; } - public byte MemoryRead8 (ulong address) { return MemoryRead(address, 1)[0]; } - public UInt16 MemoryRead16(ulong address) { return (UInt16)BitConverter.ToInt16(MemoryRead(address, 2), 0); } - public UInt32 MemoryRead32(ulong address) { return (UInt32)BitConverter.ToInt32(MemoryRead(address, 4), 0); } - public UInt64 MemoryRead64(ulong address) { return (UInt64)BitConverter.ToInt64(MemoryRead(address, 8), 0); } + public byte MemoryRead8 (ulong address) => MemoryRead(address, 1)[0]; + public UInt16 MemoryRead16(ulong address) => (UInt16)BitConverter.ToInt16(MemoryRead(address, 2), 0); + public UInt32 MemoryRead32(ulong address) => (UInt32)BitConverter.ToInt32(MemoryRead(address, 4), 0); + public UInt64 MemoryRead64(ulong address) => (UInt64)BitConverter.ToInt64(MemoryRead(address, 8), 0); public void MemoryWrite(ulong address, byte[] value) { Interface.Checked(Interface.uc_mem_write(uc, address, value, (ulong)value.Length)); } - public void MemoryWrite8 (ulong address, byte value) { MemoryWrite(address, new byte[]{value}); } - public void MemoryWrite16(ulong address, Int16 value) { MemoryWrite(address, BitConverter.GetBytes(value)); } - public void MemoryWrite16(ulong address, UInt16 value) { MemoryWrite(address, BitConverter.GetBytes(value)); } - public void MemoryWrite32(ulong address, Int32 value) { MemoryWrite(address, BitConverter.GetBytes(value)); } - public void MemoryWrite32(ulong address, UInt32 value) { MemoryWrite(address, BitConverter.GetBytes(value)); } - public void MemoryWrite64(ulong address, Int64 value) { MemoryWrite(address, BitConverter.GetBytes(value)); } - public void MemoryWrite64(ulong address, UInt64 value) { MemoryWrite(address, BitConverter.GetBytes(value)); } + public void MemoryWrite8 (ulong address, byte value) => MemoryWrite(address, new byte[]{value}); + public void MemoryWrite16(ulong address, Int16 value) => MemoryWrite(address, BitConverter.GetBytes(value)); + public void MemoryWrite16(ulong address, UInt16 value) => MemoryWrite(address, BitConverter.GetBytes(value)); + public void MemoryWrite32(ulong address, Int32 value) => MemoryWrite(address, BitConverter.GetBytes(value)); + public void MemoryWrite32(ulong address, UInt32 value) => MemoryWrite(address, BitConverter.GetBytes(value)); + public void MemoryWrite64(ulong address, Int64 value) => MemoryWrite(address, BitConverter.GetBytes(value)); + public void MemoryWrite64(ulong address, UInt64 value) => MemoryWrite(address, BitConverter.GetBytes(value)); public void MemoryMap(ulong address, ulong size, MemoryPermission permissions) { @@ -289,21 +294,12 @@ namespace Ryujinx.Tests.Unicorn Interface.Checked(Interface.uc_mem_protect(uc, address, size, (uint)permissions)); } - public void DumpMemoryInformation() - { - Interface.Checked(Interface.uc_mem_regions(uc, out IntPtr regions_raw, out uint length)); - Interface.MarshalArrayOf(regions_raw, (int)length, out var regions); - foreach (var region in regions) - { - Console.WriteLine("region: begin {0:X16} end {1:X16} perms {2:X8}", region.begin, region.end, region.perms); - } - } - public static bool IsAvailable() { try { - Interface.uc_version(out uint major, out uint minor); + Interface.uc_version(out _, out _); + return true; } catch (DllNotFoundException) diff --git a/Ryujinx.Tests/Cpu/CpuTest.cs b/Ryujinx.Tests/Cpu/CpuTest.cs index b147cf446..1e7b75c61 100644 --- a/Ryujinx.Tests/Cpu/CpuTest.cs +++ b/Ryujinx.Tests/Cpu/CpuTest.cs @@ -1,7 +1,6 @@ -using ChocolArm64; -using ChocolArm64.Memory; -using ChocolArm64.State; -using ChocolArm64.Translation; +using ARMeilleure.Memory; +using ARMeilleure.State; +using ARMeilleure.Translation; using NUnit.Framework; @@ -9,24 +8,24 @@ using Ryujinx.Tests.Unicorn; using System; using System.Runtime.InteropServices; -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; -using System.Threading; namespace Ryujinx.Tests.Cpu { [TestFixture] public class CpuTest { - protected long Position { get; private set; } - private long _size; + private ulong _currAddress; + private long _size; - private long _entryPoint; + private ulong _entryPoint; private IntPtr _ramPointer; private MemoryManager _memory; - private CpuThread _thread; + + private ExecutionContext _context; + + private Translator _translator; private static bool _unicornAvailable; private UnicornAArch64 _unicornEmu; @@ -44,24 +43,24 @@ namespace Ryujinx.Tests.Cpu [SetUp] public void Setup() { - Position = 0x1000; - _size = 0x1000; + _currAddress = 0x1000; + _size = 0x1000; - _entryPoint = Position; + _entryPoint = _currAddress; _ramPointer = Marshal.AllocHGlobal(new IntPtr(_size)); _memory = new MemoryManager(_ramPointer); - _memory.Map(Position, 0, _size); + _memory.Map((long)_currAddress, 0, _size); - Translator translator = new Translator(_memory); + _context = new ExecutionContext(); - _thread = new CpuThread(translator, _memory, _entryPoint); + _translator = new Translator(_memory); if (_unicornAvailable) { _unicornEmu = new UnicornAArch64(); - _unicornEmu.MemoryMap((ulong)Position, (ulong)_size, MemoryPermission.READ | MemoryPermission.EXEC); - _unicornEmu.PC = (ulong)_entryPoint; + _unicornEmu.MemoryMap(_currAddress, (ulong)_size, MemoryPermission.READ | MemoryPermission.EXEC); + _unicornEmu.PC = _entryPoint; } } @@ -70,7 +69,8 @@ namespace Ryujinx.Tests.Cpu { Marshal.FreeHGlobal(_ramPointer); _memory = null; - _thread = null; + _context = null; + _translator = null; _unicornEmu = null; } @@ -82,51 +82,61 @@ namespace Ryujinx.Tests.Cpu protected void Opcode(uint opcode) { - _thread.Memory.WriteUInt32(Position, opcode); + _memory.WriteUInt32((long)_currAddress, opcode); if (_unicornAvailable) { - _unicornEmu.MemoryWrite32((ulong)Position, opcode); + _unicornEmu.MemoryWrite32((ulong)_currAddress, opcode); } - Position += 4; + _currAddress += 4; } - protected void SetThreadState(ulong x0 = 0, ulong x1 = 0, ulong x2 = 0, ulong x3 = 0, ulong x31 = 0, - Vector128 v0 = default(Vector128), - Vector128 v1 = default(Vector128), - Vector128 v2 = default(Vector128), - Vector128 v3 = default(Vector128), - Vector128 v4 = default(Vector128), - Vector128 v5 = default(Vector128), - Vector128 v30 = default(Vector128), - Vector128 v31 = default(Vector128), - bool overflow = false, bool carry = false, bool zero = false, bool negative = false, - int fpcr = 0x0, int fpsr = 0x0) + protected ExecutionContext GetContext() => _context; + + protected void SetContext(ulong x0 = 0, + ulong x1 = 0, + ulong x2 = 0, + ulong x3 = 0, + ulong x31 = 0, + V128 v0 = default(V128), + V128 v1 = default(V128), + V128 v2 = default(V128), + V128 v3 = default(V128), + V128 v4 = default(V128), + V128 v5 = default(V128), + V128 v30 = default(V128), + V128 v31 = default(V128), + bool overflow = false, + bool carry = false, + bool zero = false, + bool negative = false, + int fpcr = 0, + int fpsr = 0) { - _thread.ThreadState.X0 = x0; - _thread.ThreadState.X1 = x1; - _thread.ThreadState.X2 = x2; - _thread.ThreadState.X3 = x3; + _context.SetX(0, x0); + _context.SetX(1, x1); + _context.SetX(2, x2); + _context.SetX(3, x3); - _thread.ThreadState.X31 = x31; + _context.SetX(31, x31); - _thread.ThreadState.V0 = v0; - _thread.ThreadState.V1 = v1; - _thread.ThreadState.V2 = v2; - _thread.ThreadState.V3 = v3; - _thread.ThreadState.V4 = v4; - _thread.ThreadState.V5 = v5; - _thread.ThreadState.V30 = v30; - _thread.ThreadState.V31 = v31; + _context.SetV(0, v0); + _context.SetV(1, v1); + _context.SetV(2, v2); + _context.SetV(3, v3); + _context.SetV(4, v4); + _context.SetV(5, v5); + _context.SetV(30, v30); + _context.SetV(31, v31); - _thread.ThreadState.Overflow = overflow; - _thread.ThreadState.Carry = carry; - _thread.ThreadState.Zero = zero; - _thread.ThreadState.Negative = negative; + _context.SetPstateFlag(PState.VFlag, overflow); + _context.SetPstateFlag(PState.CFlag, carry); + _context.SetPstateFlag(PState.ZFlag, zero); + _context.SetPstateFlag(PState.NFlag, negative); - _thread.ThreadState.Fpcr = fpcr; - _thread.ThreadState.Fpsr = fpsr; + _context.Fpcr = (FPCR)fpcr; + _context.Fpsr = (FPSR)fpsr; if (_unicornAvailable) { @@ -137,14 +147,14 @@ namespace Ryujinx.Tests.Cpu _unicornEmu.SP = x31; - _unicornEmu.Q[0] = v0; - _unicornEmu.Q[1] = v1; - _unicornEmu.Q[2] = v2; - _unicornEmu.Q[3] = v3; - _unicornEmu.Q[4] = v4; - _unicornEmu.Q[5] = v5; - _unicornEmu.Q[30] = v30; - _unicornEmu.Q[31] = v31; + _unicornEmu.Q[0] = V128ToSimdValue(v0); + _unicornEmu.Q[1] = V128ToSimdValue(v1); + _unicornEmu.Q[2] = V128ToSimdValue(v2); + _unicornEmu.Q[3] = V128ToSimdValue(v3); + _unicornEmu.Q[4] = V128ToSimdValue(v4); + _unicornEmu.Q[5] = V128ToSimdValue(v5); + _unicornEmu.Q[30] = V128ToSimdValue(v30); + _unicornEmu.Q[31] = V128ToSimdValue(v31); _unicornEmu.OverflowFlag = overflow; _unicornEmu.CarryFlag = carry; @@ -158,43 +168,41 @@ namespace Ryujinx.Tests.Cpu protected void ExecuteOpcodes() { - using (ManualResetEvent wait = new ManualResetEvent(false)) - { - _thread.ThreadState.Break += (sender, e) => _thread.StopExecution(); - _thread.WorkFinished += (sender, e) => wait.Set(); - - _thread.Execute(); - wait.WaitOne(); - } + _translator.Execute(_context, _entryPoint); if (_unicornAvailable) { - _unicornEmu.RunForCount((ulong)(Position - _entryPoint - 8) / 4); + _unicornEmu.RunForCount((ulong)(_currAddress - _entryPoint - 4) / 4); } } - protected CpuThreadState GetThreadState() => _thread.ThreadState; - - protected CpuThreadState SingleOpcode(uint opcode, - ulong x0 = 0, ulong x1 = 0, ulong x2 = 0, ulong x3 = 0, ulong x31 = 0, - Vector128 v0 = default(Vector128), - Vector128 v1 = default(Vector128), - Vector128 v2 = default(Vector128), - Vector128 v3 = default(Vector128), - Vector128 v4 = default(Vector128), - Vector128 v5 = default(Vector128), - Vector128 v30 = default(Vector128), - Vector128 v31 = default(Vector128), - bool overflow = false, bool carry = false, bool zero = false, bool negative = false, - int fpcr = 0x0, int fpsr = 0x0) + protected ExecutionContext SingleOpcode(uint opcode, + ulong x0 = 0, + ulong x1 = 0, + ulong x2 = 0, + ulong x3 = 0, + ulong x31 = 0, + V128 v0 = default(V128), + V128 v1 = default(V128), + V128 v2 = default(V128), + V128 v3 = default(V128), + V128 v4 = default(V128), + V128 v5 = default(V128), + V128 v30 = default(V128), + V128 v31 = default(V128), + bool overflow = false, + bool carry = false, + bool zero = false, + bool negative = false, + int fpcr = 0, + int fpsr = 0) { Opcode(opcode); - Opcode(0xD4200000); // BRK #0 Opcode(0xD65F03C0); // RET - SetThreadState(x0, x1, x2, x3, x31, v0, v1, v2, v3, v4, v5, v30, v31, overflow, carry, zero, negative, fpcr, fpsr); + SetContext(x0, x1, x2, x3, x31, v0, v1, v2, v3, v4, v5, v30, v31, overflow, carry, zero, negative, fpcr, fpsr); ExecuteOpcodes(); - return GetThreadState(); + return GetContext(); } /// Rounding Mode control field. @@ -279,101 +287,101 @@ namespace Ryujinx.Tests.Cpu ManageFpSkips(fpSkips); } - Assert.That(_thread.ThreadState.X0, Is.EqualTo(_unicornEmu.X[0])); - Assert.That(_thread.ThreadState.X1, Is.EqualTo(_unicornEmu.X[1])); - Assert.That(_thread.ThreadState.X2, Is.EqualTo(_unicornEmu.X[2])); - Assert.That(_thread.ThreadState.X3, Is.EqualTo(_unicornEmu.X[3])); - Assert.That(_thread.ThreadState.X4, Is.EqualTo(_unicornEmu.X[4])); - Assert.That(_thread.ThreadState.X5, Is.EqualTo(_unicornEmu.X[5])); - Assert.That(_thread.ThreadState.X6, Is.EqualTo(_unicornEmu.X[6])); - Assert.That(_thread.ThreadState.X7, Is.EqualTo(_unicornEmu.X[7])); - Assert.That(_thread.ThreadState.X8, Is.EqualTo(_unicornEmu.X[8])); - Assert.That(_thread.ThreadState.X9, Is.EqualTo(_unicornEmu.X[9])); - Assert.That(_thread.ThreadState.X10, Is.EqualTo(_unicornEmu.X[10])); - Assert.That(_thread.ThreadState.X11, Is.EqualTo(_unicornEmu.X[11])); - Assert.That(_thread.ThreadState.X12, Is.EqualTo(_unicornEmu.X[12])); - Assert.That(_thread.ThreadState.X13, Is.EqualTo(_unicornEmu.X[13])); - Assert.That(_thread.ThreadState.X14, Is.EqualTo(_unicornEmu.X[14])); - Assert.That(_thread.ThreadState.X15, Is.EqualTo(_unicornEmu.X[15])); - Assert.That(_thread.ThreadState.X16, Is.EqualTo(_unicornEmu.X[16])); - Assert.That(_thread.ThreadState.X17, Is.EqualTo(_unicornEmu.X[17])); - Assert.That(_thread.ThreadState.X18, Is.EqualTo(_unicornEmu.X[18])); - Assert.That(_thread.ThreadState.X19, Is.EqualTo(_unicornEmu.X[19])); - Assert.That(_thread.ThreadState.X20, Is.EqualTo(_unicornEmu.X[20])); - Assert.That(_thread.ThreadState.X21, Is.EqualTo(_unicornEmu.X[21])); - Assert.That(_thread.ThreadState.X22, Is.EqualTo(_unicornEmu.X[22])); - Assert.That(_thread.ThreadState.X23, Is.EqualTo(_unicornEmu.X[23])); - Assert.That(_thread.ThreadState.X24, Is.EqualTo(_unicornEmu.X[24])); - Assert.That(_thread.ThreadState.X25, Is.EqualTo(_unicornEmu.X[25])); - Assert.That(_thread.ThreadState.X26, Is.EqualTo(_unicornEmu.X[26])); - Assert.That(_thread.ThreadState.X27, Is.EqualTo(_unicornEmu.X[27])); - Assert.That(_thread.ThreadState.X28, Is.EqualTo(_unicornEmu.X[28])); - Assert.That(_thread.ThreadState.X29, Is.EqualTo(_unicornEmu.X[29])); - Assert.That(_thread.ThreadState.X30, Is.EqualTo(_unicornEmu.X[30])); + Assert.That(_context.GetX(0), Is.EqualTo(_unicornEmu.X[0])); + Assert.That(_context.GetX(1), Is.EqualTo(_unicornEmu.X[1])); + Assert.That(_context.GetX(2), Is.EqualTo(_unicornEmu.X[2])); + Assert.That(_context.GetX(3), Is.EqualTo(_unicornEmu.X[3])); + Assert.That(_context.GetX(4), Is.EqualTo(_unicornEmu.X[4])); + Assert.That(_context.GetX(5), Is.EqualTo(_unicornEmu.X[5])); + Assert.That(_context.GetX(6), Is.EqualTo(_unicornEmu.X[6])); + Assert.That(_context.GetX(7), Is.EqualTo(_unicornEmu.X[7])); + Assert.That(_context.GetX(8), Is.EqualTo(_unicornEmu.X[8])); + Assert.That(_context.GetX(9), Is.EqualTo(_unicornEmu.X[9])); + Assert.That(_context.GetX(10), Is.EqualTo(_unicornEmu.X[10])); + Assert.That(_context.GetX(11), Is.EqualTo(_unicornEmu.X[11])); + Assert.That(_context.GetX(12), Is.EqualTo(_unicornEmu.X[12])); + Assert.That(_context.GetX(13), Is.EqualTo(_unicornEmu.X[13])); + Assert.That(_context.GetX(14), Is.EqualTo(_unicornEmu.X[14])); + Assert.That(_context.GetX(15), Is.EqualTo(_unicornEmu.X[15])); + Assert.That(_context.GetX(16), Is.EqualTo(_unicornEmu.X[16])); + Assert.That(_context.GetX(17), Is.EqualTo(_unicornEmu.X[17])); + Assert.That(_context.GetX(18), Is.EqualTo(_unicornEmu.X[18])); + Assert.That(_context.GetX(19), Is.EqualTo(_unicornEmu.X[19])); + Assert.That(_context.GetX(20), Is.EqualTo(_unicornEmu.X[20])); + Assert.That(_context.GetX(21), Is.EqualTo(_unicornEmu.X[21])); + Assert.That(_context.GetX(22), Is.EqualTo(_unicornEmu.X[22])); + Assert.That(_context.GetX(23), Is.EqualTo(_unicornEmu.X[23])); + Assert.That(_context.GetX(24), Is.EqualTo(_unicornEmu.X[24])); + Assert.That(_context.GetX(25), Is.EqualTo(_unicornEmu.X[25])); + Assert.That(_context.GetX(26), Is.EqualTo(_unicornEmu.X[26])); + Assert.That(_context.GetX(27), Is.EqualTo(_unicornEmu.X[27])); + Assert.That(_context.GetX(28), Is.EqualTo(_unicornEmu.X[28])); + Assert.That(_context.GetX(29), Is.EqualTo(_unicornEmu.X[29])); + Assert.That(_context.GetX(30), Is.EqualTo(_unicornEmu.X[30])); - Assert.That(_thread.ThreadState.X31, Is.EqualTo(_unicornEmu.SP)); + Assert.That(_context.GetX(31), Is.EqualTo(_unicornEmu.SP)); if (fpTolerances == FpTolerances.None) { - Assert.That(_thread.ThreadState.V0, Is.EqualTo(_unicornEmu.Q[0])); + Assert.That(V128ToSimdValue(_context.GetV(0)), Is.EqualTo(_unicornEmu.Q[0])); } else { ManageFpTolerances(fpTolerances); } - Assert.That(_thread.ThreadState.V1, Is.EqualTo(_unicornEmu.Q[1])); - Assert.That(_thread.ThreadState.V2, Is.EqualTo(_unicornEmu.Q[2])); - Assert.That(_thread.ThreadState.V3, Is.EqualTo(_unicornEmu.Q[3])); - Assert.That(_thread.ThreadState.V4, Is.EqualTo(_unicornEmu.Q[4])); - Assert.That(_thread.ThreadState.V5, Is.EqualTo(_unicornEmu.Q[5])); - Assert.That(_thread.ThreadState.V6, Is.EqualTo(_unicornEmu.Q[6])); - Assert.That(_thread.ThreadState.V7, Is.EqualTo(_unicornEmu.Q[7])); - Assert.That(_thread.ThreadState.V8, Is.EqualTo(_unicornEmu.Q[8])); - Assert.That(_thread.ThreadState.V9, Is.EqualTo(_unicornEmu.Q[9])); - Assert.That(_thread.ThreadState.V10, Is.EqualTo(_unicornEmu.Q[10])); - Assert.That(_thread.ThreadState.V11, Is.EqualTo(_unicornEmu.Q[11])); - Assert.That(_thread.ThreadState.V12, Is.EqualTo(_unicornEmu.Q[12])); - Assert.That(_thread.ThreadState.V13, Is.EqualTo(_unicornEmu.Q[13])); - Assert.That(_thread.ThreadState.V14, Is.EqualTo(_unicornEmu.Q[14])); - Assert.That(_thread.ThreadState.V15, Is.EqualTo(_unicornEmu.Q[15])); - Assert.That(_thread.ThreadState.V16, Is.EqualTo(_unicornEmu.Q[16])); - Assert.That(_thread.ThreadState.V17, Is.EqualTo(_unicornEmu.Q[17])); - Assert.That(_thread.ThreadState.V18, Is.EqualTo(_unicornEmu.Q[18])); - Assert.That(_thread.ThreadState.V19, Is.EqualTo(_unicornEmu.Q[19])); - Assert.That(_thread.ThreadState.V20, Is.EqualTo(_unicornEmu.Q[20])); - Assert.That(_thread.ThreadState.V21, Is.EqualTo(_unicornEmu.Q[21])); - Assert.That(_thread.ThreadState.V22, Is.EqualTo(_unicornEmu.Q[22])); - Assert.That(_thread.ThreadState.V23, Is.EqualTo(_unicornEmu.Q[23])); - Assert.That(_thread.ThreadState.V24, Is.EqualTo(_unicornEmu.Q[24])); - Assert.That(_thread.ThreadState.V25, Is.EqualTo(_unicornEmu.Q[25])); - Assert.That(_thread.ThreadState.V26, Is.EqualTo(_unicornEmu.Q[26])); - Assert.That(_thread.ThreadState.V27, Is.EqualTo(_unicornEmu.Q[27])); - Assert.That(_thread.ThreadState.V28, Is.EqualTo(_unicornEmu.Q[28])); - Assert.That(_thread.ThreadState.V29, Is.EqualTo(_unicornEmu.Q[29])); - Assert.That(_thread.ThreadState.V30, Is.EqualTo(_unicornEmu.Q[30])); - Assert.That(_thread.ThreadState.V31, Is.EqualTo(_unicornEmu.Q[31])); + Assert.That(V128ToSimdValue(_context.GetV(1)), Is.EqualTo(_unicornEmu.Q[1])); + Assert.That(V128ToSimdValue(_context.GetV(2)), Is.EqualTo(_unicornEmu.Q[2])); + Assert.That(V128ToSimdValue(_context.GetV(3)), Is.EqualTo(_unicornEmu.Q[3])); + Assert.That(V128ToSimdValue(_context.GetV(4)), Is.EqualTo(_unicornEmu.Q[4])); + Assert.That(V128ToSimdValue(_context.GetV(5)), Is.EqualTo(_unicornEmu.Q[5])); + Assert.That(V128ToSimdValue(_context.GetV(6)), Is.EqualTo(_unicornEmu.Q[6])); + Assert.That(V128ToSimdValue(_context.GetV(7)), Is.EqualTo(_unicornEmu.Q[7])); + Assert.That(V128ToSimdValue(_context.GetV(8)), Is.EqualTo(_unicornEmu.Q[8])); + Assert.That(V128ToSimdValue(_context.GetV(9)), Is.EqualTo(_unicornEmu.Q[9])); + Assert.That(V128ToSimdValue(_context.GetV(10)), Is.EqualTo(_unicornEmu.Q[10])); + Assert.That(V128ToSimdValue(_context.GetV(11)), Is.EqualTo(_unicornEmu.Q[11])); + Assert.That(V128ToSimdValue(_context.GetV(12)), Is.EqualTo(_unicornEmu.Q[12])); + Assert.That(V128ToSimdValue(_context.GetV(13)), Is.EqualTo(_unicornEmu.Q[13])); + Assert.That(V128ToSimdValue(_context.GetV(14)), Is.EqualTo(_unicornEmu.Q[14])); + Assert.That(V128ToSimdValue(_context.GetV(15)), Is.EqualTo(_unicornEmu.Q[15])); + Assert.That(V128ToSimdValue(_context.GetV(16)), Is.EqualTo(_unicornEmu.Q[16])); + Assert.That(V128ToSimdValue(_context.GetV(17)), Is.EqualTo(_unicornEmu.Q[17])); + Assert.That(V128ToSimdValue(_context.GetV(18)), Is.EqualTo(_unicornEmu.Q[18])); + Assert.That(V128ToSimdValue(_context.GetV(19)), Is.EqualTo(_unicornEmu.Q[19])); + Assert.That(V128ToSimdValue(_context.GetV(20)), Is.EqualTo(_unicornEmu.Q[20])); + Assert.That(V128ToSimdValue(_context.GetV(21)), Is.EqualTo(_unicornEmu.Q[21])); + Assert.That(V128ToSimdValue(_context.GetV(22)), Is.EqualTo(_unicornEmu.Q[22])); + Assert.That(V128ToSimdValue(_context.GetV(23)), Is.EqualTo(_unicornEmu.Q[23])); + Assert.That(V128ToSimdValue(_context.GetV(24)), Is.EqualTo(_unicornEmu.Q[24])); + Assert.That(V128ToSimdValue(_context.GetV(25)), Is.EqualTo(_unicornEmu.Q[25])); + Assert.That(V128ToSimdValue(_context.GetV(26)), Is.EqualTo(_unicornEmu.Q[26])); + Assert.That(V128ToSimdValue(_context.GetV(27)), Is.EqualTo(_unicornEmu.Q[27])); + Assert.That(V128ToSimdValue(_context.GetV(28)), Is.EqualTo(_unicornEmu.Q[28])); + Assert.That(V128ToSimdValue(_context.GetV(29)), Is.EqualTo(_unicornEmu.Q[29])); + Assert.That(V128ToSimdValue(_context.GetV(30)), Is.EqualTo(_unicornEmu.Q[30])); + Assert.That(V128ToSimdValue(_context.GetV(31)), Is.EqualTo(_unicornEmu.Q[31])); - Assert.That(_thread.ThreadState.Fpcr, Is.EqualTo(_unicornEmu.Fpcr)); - Assert.That(_thread.ThreadState.Fpsr & (int)fpsrMask, Is.EqualTo(_unicornEmu.Fpsr & (int)fpsrMask)); + Assert.That((int)_context.Fpcr, Is.EqualTo(_unicornEmu.Fpcr)); + Assert.That((int)_context.Fpsr & (int)fpsrMask, Is.EqualTo(_unicornEmu.Fpsr & (int)fpsrMask)); - Assert.That(_thread.ThreadState.Overflow, Is.EqualTo(_unicornEmu.OverflowFlag)); - Assert.That(_thread.ThreadState.Carry, Is.EqualTo(_unicornEmu.CarryFlag)); - Assert.That(_thread.ThreadState.Zero, Is.EqualTo(_unicornEmu.ZeroFlag)); - Assert.That(_thread.ThreadState.Negative, Is.EqualTo(_unicornEmu.NegativeFlag)); + Assert.That(_context.GetPstateFlag(PState.VFlag), Is.EqualTo(_unicornEmu.OverflowFlag)); + Assert.That(_context.GetPstateFlag(PState.CFlag), Is.EqualTo(_unicornEmu.CarryFlag)); + Assert.That(_context.GetPstateFlag(PState.ZFlag), Is.EqualTo(_unicornEmu.ZeroFlag)); + Assert.That(_context.GetPstateFlag(PState.NFlag), Is.EqualTo(_unicornEmu.NegativeFlag)); } private void ManageFpSkips(FpSkips fpSkips) { if (fpSkips.HasFlag(FpSkips.IfNaNS)) { - if (float.IsNaN(VectorExtractSingle(_unicornEmu.Q[0], (byte)0))) + if (float.IsNaN(_unicornEmu.Q[0].AsFloat())) { Assert.Ignore("NaN test."); } } else if (fpSkips.HasFlag(FpSkips.IfNaND)) { - if (double.IsNaN(VectorExtractDouble(_unicornEmu.Q[0], (byte)0))) + if (double.IsNaN(_unicornEmu.Q[0].AsDouble())) { Assert.Ignore("NaN test."); } @@ -398,158 +406,68 @@ namespace Ryujinx.Tests.Cpu private void ManageFpTolerances(FpTolerances fpTolerances) { - if (!Is.EqualTo(_unicornEmu.Q[0]).ApplyTo(_thread.ThreadState.V0).IsSuccess) + bool IsNormalOrSubnormalS(float f) => float.IsNormal(f) || float.IsSubnormal(f); + bool IsNormalOrSubnormalD(double d) => double.IsNormal(d) || double.IsSubnormal(d); + + if (!Is.EqualTo(_unicornEmu.Q[0]).ApplyTo(V128ToSimdValue(_context.GetV(0))).IsSuccess) { if (fpTolerances == FpTolerances.UpToOneUlpsS) { - if (IsNormalOrSubnormalS(VectorExtractSingle(_unicornEmu.Q[0], (byte)0)) && - IsNormalOrSubnormalS(VectorExtractSingle(_thread.ThreadState.V0, (byte)0))) + if (IsNormalOrSubnormalS(_unicornEmu.Q[0].AsFloat()) && + IsNormalOrSubnormalS(_context.GetV(0).AsFloat())) { - Assert.That (VectorExtractSingle(_thread.ThreadState.V0, (byte)0), - Is.EqualTo(VectorExtractSingle(_unicornEmu.Q[0], (byte)0)).Within(1).Ulps); - Assert.That (VectorExtractSingle(_thread.ThreadState.V0, (byte)1), - Is.EqualTo(VectorExtractSingle(_unicornEmu.Q[0], (byte)1)).Within(1).Ulps); - Assert.That (VectorExtractSingle(_thread.ThreadState.V0, (byte)2), - Is.EqualTo(VectorExtractSingle(_unicornEmu.Q[0], (byte)2)).Within(1).Ulps); - Assert.That (VectorExtractSingle(_thread.ThreadState.V0, (byte)3), - Is.EqualTo(VectorExtractSingle(_unicornEmu.Q[0], (byte)3)).Within(1).Ulps); + Assert.That (_context.GetV(0).GetFloat(0), + Is.EqualTo(_unicornEmu.Q[0].GetFloat(0)).Within(1).Ulps); + Assert.That (_context.GetV(0).GetFloat(1), + Is.EqualTo(_unicornEmu.Q[0].GetFloat(1)).Within(1).Ulps); + Assert.That (_context.GetV(0).GetFloat(2), + Is.EqualTo(_unicornEmu.Q[0].GetFloat(2)).Within(1).Ulps); + Assert.That (_context.GetV(0).GetFloat(3), + Is.EqualTo(_unicornEmu.Q[0].GetFloat(3)).Within(1).Ulps); Console.WriteLine(fpTolerances); } else { - Assert.That(_thread.ThreadState.V0, Is.EqualTo(_unicornEmu.Q[0])); + Assert.That(V128ToSimdValue(_context.GetV(0)), Is.EqualTo(_unicornEmu.Q[0])); } } if (fpTolerances == FpTolerances.UpToOneUlpsD) { - if (IsNormalOrSubnormalD(VectorExtractDouble(_unicornEmu.Q[0], (byte)0)) && - IsNormalOrSubnormalD(VectorExtractDouble(_thread.ThreadState.V0, (byte)0))) + if (IsNormalOrSubnormalD(_unicornEmu.Q[0].AsDouble()) && + IsNormalOrSubnormalD(_context.GetV(0).AsDouble())) { - Assert.That (VectorExtractDouble(_thread.ThreadState.V0, (byte)0), - Is.EqualTo(VectorExtractDouble(_unicornEmu.Q[0], (byte)0)).Within(1).Ulps); - Assert.That (VectorExtractDouble(_thread.ThreadState.V0, (byte)1), - Is.EqualTo(VectorExtractDouble(_unicornEmu.Q[0], (byte)1)).Within(1).Ulps); + Assert.That (_context.GetV(0).GetDouble(0), + Is.EqualTo(_unicornEmu.Q[0].GetDouble(0)).Within(1).Ulps); + Assert.That (_context.GetV(0).GetDouble(1), + Is.EqualTo(_unicornEmu.Q[0].GetDouble(1)).Within(1).Ulps); Console.WriteLine(fpTolerances); } else { - Assert.That(_thread.ThreadState.V0, Is.EqualTo(_unicornEmu.Q[0])); + Assert.That(V128ToSimdValue(_context.GetV(0)), Is.EqualTo(_unicornEmu.Q[0])); } } } - - bool IsNormalOrSubnormalS(float f) => float.IsNormal(f) || float.IsSubnormal(f); - - bool IsNormalOrSubnormalD(double d) => double.IsNormal(d) || double.IsSubnormal(d); } - protected static Vector128 MakeVectorE0(double e0) + private static SimdValue V128ToSimdValue(V128 value) { - if (!Sse2.IsSupported) - { - throw new PlatformNotSupportedException(); - } - - return Sse.StaticCast(Sse2.SetVector128(0, BitConverter.DoubleToInt64Bits(e0))); + return new SimdValue(value.GetUInt64(0), value.GetUInt64(1)); } - protected static Vector128 MakeVectorE0E1(double e0, double e1) - { - if (!Sse2.IsSupported) - { - throw new PlatformNotSupportedException(); - } + protected static V128 MakeVectorScalar(float value) => new V128(value); + protected static V128 MakeVectorScalar(double value) => new V128(value); - return Sse.StaticCast( - Sse2.SetVector128(BitConverter.DoubleToInt64Bits(e1), BitConverter.DoubleToInt64Bits(e0))); - } + protected static V128 MakeVectorE0(ulong e0) => new V128(e0, 0); + protected static V128 MakeVectorE1(ulong e1) => new V128(0, e1); - protected static Vector128 MakeVectorE1(double e1) - { - if (!Sse2.IsSupported) - { - throw new PlatformNotSupportedException(); - } + protected static V128 MakeVectorE0E1(ulong e0, ulong e1) => new V128(e0, e1); - return Sse.StaticCast(Sse2.SetVector128(BitConverter.DoubleToInt64Bits(e1), 0)); - } - - protected static float VectorExtractSingle(Vector128 vector, byte index) - { - if (!Sse41.IsSupported) - { - throw new PlatformNotSupportedException(); - } - - int value = Sse41.Extract(Sse.StaticCast(vector), index); - - return BitConverter.Int32BitsToSingle(value); - } - - protected static double VectorExtractDouble(Vector128 vector, byte index) - { - if (!Sse41.IsSupported) - { - throw new PlatformNotSupportedException(); - } - - long value = Sse41.Extract(Sse.StaticCast(vector), index); - - return BitConverter.Int64BitsToDouble(value); - } - - protected static Vector128 MakeVectorE0(ulong e0) - { - if (!Sse2.IsSupported) - { - throw new PlatformNotSupportedException(); - } - - return Sse.StaticCast(Sse2.SetVector128(0, e0)); - } - - protected static Vector128 MakeVectorE0E1(ulong e0, ulong e1) - { - if (!Sse2.IsSupported) - { - throw new PlatformNotSupportedException(); - } - - return Sse.StaticCast(Sse2.SetVector128(e1, e0)); - } - - protected static Vector128 MakeVectorE1(ulong e1) - { - if (!Sse2.IsSupported) - { - throw new PlatformNotSupportedException(); - } - - return Sse.StaticCast(Sse2.SetVector128(e1, 0)); - } - - protected static ulong GetVectorE0(Vector128 vector) - { - if (!Sse41.IsSupported) - { - throw new PlatformNotSupportedException(); - } - - return Sse41.Extract(Sse.StaticCast(vector), (byte)0); - } - - protected static ulong GetVectorE1(Vector128 vector) - { - if (!Sse41.IsSupported) - { - throw new PlatformNotSupportedException(); - } - - return Sse41.Extract(Sse.StaticCast(vector), (byte)1); - } + protected static ulong GetVectorE0(V128 vector) => vector.GetUInt64(0); + protected static ulong GetVectorE1(V128 vector) => vector.GetUInt64(1); protected static ushort GenNormalH() { diff --git a/Ryujinx.Tests/Cpu/CpuTestAluBinary.cs b/Ryujinx.Tests/Cpu/CpuTestAluBinary.cs new file mode 100644 index 000000000..2823477fc --- /dev/null +++ b/Ryujinx.Tests/Cpu/CpuTestAluBinary.cs @@ -0,0 +1,238 @@ +#define AluBinary + +using NUnit.Framework; + +namespace Ryujinx.Tests.Cpu +{ + [Category("AluBinary")] + public sealed class CpuTestAluBinary : CpuTest + { +#if AluBinary + private const int RndCnt = 2; + + [Test, Pairwise, Description("CRC32X , , "), Ignore("Unicorn fails.")] + public void Crc32x([Values(0u, 31u)] uint rd, + [Values(1u, 31u)] uint rn, + [Values(2u, 31u)] uint rm, + [Values(0x00000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, + [Values((ulong)0x00_00_00_00_00_00_00_00, + (ulong)0x7F_FF_FF_FF_FF_FF_FF_FF, + (ulong)0x80_00_00_00_00_00_00_00, + (ulong)0xFF_FF_FF_FF_FF_FF_FF_FF)] [Random(RndCnt)] ulong xm) + { + uint opcode = 0x9AC04C00; // CRC32X W0, W0, X0 + opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); + + uint w31 = TestContext.CurrentContext.Random.NextUInt(); + + SingleOpcode(opcode, x1: wn, x2: xm, x31: w31); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("CRC32W , , "), Ignore("Unicorn fails.")] + public void Crc32w([Values(0u, 31u)] uint rd, + [Values(1u, 31u)] uint rn, + [Values(2u, 31u)] uint rm, + [Values(0x00000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, + [Values((uint)0x00_00_00_00, (uint)0x7F_FF_FF_FF, + (uint)0x80_00_00_00, (uint)0xFF_FF_FF_FF)] [Random(RndCnt)] uint wm) + { + uint opcode = 0x1AC04800; // CRC32W W0, W0, W0 + opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); + + uint w31 = TestContext.CurrentContext.Random.NextUInt(); + + SingleOpcode(opcode, x1: wn, x2: wm, x31: w31); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("CRC32H , , "), Ignore("Unicorn fails.")] + public void Crc32h([Values(0u, 31u)] uint rd, + [Values(1u, 31u)] uint rn, + [Values(2u, 31u)] uint rm, + [Values(0x00000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, + [Values((ushort)0x00_00, (ushort)0x7F_FF, + (ushort)0x80_00, (ushort)0xFF_FF)] [Random(RndCnt)] ushort wm) + { + uint opcode = 0x1AC04400; // CRC32H W0, W0, W0 + opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); + + uint w31 = TestContext.CurrentContext.Random.NextUInt(); + + SingleOpcode(opcode, x1: wn, x2: wm, x31: w31); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("CRC32B , , "), Ignore("Unicorn fails.")] + public void Crc32b([Values(0u, 31u)] uint rd, + [Values(1u, 31u)] uint rn, + [Values(2u, 31u)] uint rm, + [Values(0x00000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, + [Values((byte)0x00, (byte)0x7F, + (byte)0x80, (byte)0xFF)] [Random(RndCnt)] byte wm) + { + uint opcode = 0x1AC04000; // CRC32B W0, W0, W0 + opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); + + uint w31 = TestContext.CurrentContext.Random.NextUInt(); + + SingleOpcode(opcode, x1: wn, x2: wm, x31: w31); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("CRC32CX , , ")] + public void Crc32cx([Values(0u, 31u)] uint rd, + [Values(1u, 31u)] uint rn, + [Values(2u, 31u)] uint rm, + [Values(0x00000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, + [Values((ulong)0x00_00_00_00_00_00_00_00, + (ulong)0x7F_FF_FF_FF_FF_FF_FF_FF, + (ulong)0x80_00_00_00_00_00_00_00, + (ulong)0xFF_FF_FF_FF_FF_FF_FF_FF)] [Random(RndCnt)] ulong xm) + { + uint opcode = 0x9AC05C00; // CRC32CX W0, W0, X0 + opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); + + uint w31 = TestContext.CurrentContext.Random.NextUInt(); + + SingleOpcode(opcode, x1: wn, x2: xm, x31: w31); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("CRC32CW , , ")] + public void Crc32cw([Values(0u, 31u)] uint rd, + [Values(1u, 31u)] uint rn, + [Values(2u, 31u)] uint rm, + [Values(0x00000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, + [Values((uint)0x00_00_00_00, (uint)0x7F_FF_FF_FF, + (uint)0x80_00_00_00, (uint)0xFF_FF_FF_FF)] [Random(RndCnt)] uint wm) + { + uint opcode = 0x1AC05800; // CRC32CW W0, W0, W0 + opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); + + uint w31 = TestContext.CurrentContext.Random.NextUInt(); + + SingleOpcode(opcode, x1: wn, x2: wm, x31: w31); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("CRC32CH , , ")] + public void Crc32ch([Values(0u, 31u)] uint rd, + [Values(1u, 31u)] uint rn, + [Values(2u, 31u)] uint rm, + [Values(0x00000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, + [Values((ushort)0x00_00, (ushort)0x7F_FF, + (ushort)0x80_00, (ushort)0xFF_FF)] [Random(RndCnt)] ushort wm) + { + uint opcode = 0x1AC05400; // CRC32CH W0, W0, W0 + opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); + + uint w31 = TestContext.CurrentContext.Random.NextUInt(); + + SingleOpcode(opcode, x1: wn, x2: wm, x31: w31); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("CRC32CB , , ")] + public void Crc32cb([Values(0u, 31u)] uint rd, + [Values(1u, 31u)] uint rn, + [Values(2u, 31u)] uint rm, + [Values(0x00000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, + [Values((byte)0x00, (byte)0x7F, + (byte)0x80, (byte)0xFF)] [Random(RndCnt)] byte wm) + { + uint opcode = 0x1AC05000; // CRC32CB W0, W0, W0 + opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); + + uint w31 = TestContext.CurrentContext.Random.NextUInt(); + + SingleOpcode(opcode, x1: wn, x2: wm, x31: w31); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("SDIV , , ")] + public void Sdiv_64bit([Values(0u, 31u)] uint rd, + [Values(1u, 31u)] uint rn, + [Values(2u, 31u)] uint rm, + [Values(0x0000000000000000ul, 0x7FFFFFFFFFFFFFFFul, + 0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul)] [Random(RndCnt)] ulong xn, + [Values(0x0000000000000000ul, 0x7FFFFFFFFFFFFFFFul, + 0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul)] [Random(RndCnt)] ulong xm) + { + uint opcode = 0x9AC00C00; // SDIV X0, X0, X0 + opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); + + ulong x31 = TestContext.CurrentContext.Random.NextULong(); + + SingleOpcode(opcode, x1: xn, x2: xm, x31: x31); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("SDIV , , ")] + public void Sdiv_32bit([Values(0u, 31u)] uint rd, + [Values(1u, 31u)] uint rn, + [Values(2u, 31u)] uint rm, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wm) + { + uint opcode = 0x1AC00C00; // SDIV W0, W0, W0 + opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); + + uint w31 = TestContext.CurrentContext.Random.NextUInt(); + + SingleOpcode(opcode, x1: wn, x2: wm, x31: w31); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("UDIV , , ")] + public void Udiv_64bit([Values(0u, 31u)] uint rd, + [Values(1u, 31u)] uint rn, + [Values(2u, 31u)] uint rm, + [Values(0x0000000000000000ul, 0x7FFFFFFFFFFFFFFFul, + 0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul)] [Random(RndCnt)] ulong xn, + [Values(0x0000000000000000ul, 0x7FFFFFFFFFFFFFFFul, + 0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul)] [Random(RndCnt)] ulong xm) + { + uint opcode = 0x9AC00800; // UDIV X0, X0, X0 + opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); + + ulong x31 = TestContext.CurrentContext.Random.NextULong(); + + SingleOpcode(opcode, x1: xn, x2: xm, x31: x31); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("UDIV , , ")] + public void Udiv_32bit([Values(0u, 31u)] uint rd, + [Values(1u, 31u)] uint rn, + [Values(2u, 31u)] uint rm, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wm) + { + uint opcode = 0x1AC00800; // UDIV W0, W0, W0 + opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); + + uint w31 = TestContext.CurrentContext.Random.NextUInt(); + + SingleOpcode(opcode, x1: wn, x2: wm, x31: w31); + + CompareAgainstUnicorn(); + } +#endif + } +} diff --git a/Ryujinx.Tests/Cpu/CpuTestAluRs.cs b/Ryujinx.Tests/Cpu/CpuTestAluRs.cs index 2d4013e2f..418dd56d2 100644 --- a/Ryujinx.Tests/Cpu/CpuTestAluRs.cs +++ b/Ryujinx.Tests/Cpu/CpuTestAluRs.cs @@ -394,154 +394,6 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } - [Test, Pairwise, Description("CRC32X , , "), Ignore("Unicorn fails.")] - public void Crc32x([Values(0u, 31u)] uint rd, - [Values(1u, 31u)] uint rn, - [Values(2u, 31u)] uint rm, - [Values(0x00000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, - [Values((ulong)0x00_00_00_00_00_00_00_00, - (ulong)0x7F_FF_FF_FF_FF_FF_FF_FF, - (ulong)0x80_00_00_00_00_00_00_00, - (ulong)0xFF_FF_FF_FF_FF_FF_FF_FF)] [Random(RndCnt)] ulong xm) - { - uint opcode = 0x9AC04C00; // CRC32X W0, W0, X0 - opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - - uint w31 = TestContext.CurrentContext.Random.NextUInt(); - - SingleOpcode(opcode, x1: wn, x2: xm, x31: w31); - - CompareAgainstUnicorn(); - } - - [Test, Pairwise, Description("CRC32W , , "), Ignore("Unicorn fails.")] - public void Crc32w([Values(0u, 31u)] uint rd, - [Values(1u, 31u)] uint rn, - [Values(2u, 31u)] uint rm, - [Values(0x00000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, - [Values((uint)0x00_00_00_00, (uint)0x7F_FF_FF_FF, - (uint)0x80_00_00_00, (uint)0xFF_FF_FF_FF)] [Random(RndCnt)] uint wm) - { - uint opcode = 0x1AC04800; // CRC32W W0, W0, W0 - opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - - uint w31 = TestContext.CurrentContext.Random.NextUInt(); - - SingleOpcode(opcode, x1: wn, x2: wm, x31: w31); - - CompareAgainstUnicorn(); - } - - [Test, Pairwise, Description("CRC32H , , "), Ignore("Unicorn fails.")] - public void Crc32h([Values(0u, 31u)] uint rd, - [Values(1u, 31u)] uint rn, - [Values(2u, 31u)] uint rm, - [Values(0x00000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, - [Values((ushort)0x00_00, (ushort)0x7F_FF, - (ushort)0x80_00, (ushort)0xFF_FF)] [Random(RndCnt)] ushort wm) - { - uint opcode = 0x1AC04400; // CRC32H W0, W0, W0 - opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - - uint w31 = TestContext.CurrentContext.Random.NextUInt(); - - SingleOpcode(opcode, x1: wn, x2: wm, x31: w31); - - CompareAgainstUnicorn(); - } - - [Test, Pairwise, Description("CRC32B , , "), Ignore("Unicorn fails.")] - public void Crc32b([Values(0u, 31u)] uint rd, - [Values(1u, 31u)] uint rn, - [Values(2u, 31u)] uint rm, - [Values(0x00000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, - [Values((byte)0x00, (byte)0x7F, - (byte)0x80, (byte)0xFF)] [Random(RndCnt)] byte wm) - { - uint opcode = 0x1AC04000; // CRC32B W0, W0, W0 - opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - - uint w31 = TestContext.CurrentContext.Random.NextUInt(); - - SingleOpcode(opcode, x1: wn, x2: wm, x31: w31); - - CompareAgainstUnicorn(); - } - - [Test, Pairwise, Description("CRC32CX , , ")] - public void Crc32cx([Values(0u, 31u)] uint rd, - [Values(1u, 31u)] uint rn, - [Values(2u, 31u)] uint rm, - [Values(0x00000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, - [Values((ulong)0x00_00_00_00_00_00_00_00, - (ulong)0x7F_FF_FF_FF_FF_FF_FF_FF, - (ulong)0x80_00_00_00_00_00_00_00, - (ulong)0xFF_FF_FF_FF_FF_FF_FF_FF)] [Random(RndCnt)] ulong xm) - { - uint opcode = 0x9AC05C00; // CRC32CX W0, W0, X0 - opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - - uint w31 = TestContext.CurrentContext.Random.NextUInt(); - - SingleOpcode(opcode, x1: wn, x2: xm, x31: w31); - - CompareAgainstUnicorn(); - } - - [Test, Pairwise, Description("CRC32CW , , ")] - public void Crc32cw([Values(0u, 31u)] uint rd, - [Values(1u, 31u)] uint rn, - [Values(2u, 31u)] uint rm, - [Values(0x00000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, - [Values((uint)0x00_00_00_00, (uint)0x7F_FF_FF_FF, - (uint)0x80_00_00_00, (uint)0xFF_FF_FF_FF)] [Random(RndCnt)] uint wm) - { - uint opcode = 0x1AC05800; // CRC32CW W0, W0, W0 - opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - - uint w31 = TestContext.CurrentContext.Random.NextUInt(); - - SingleOpcode(opcode, x1: wn, x2: wm, x31: w31); - - CompareAgainstUnicorn(); - } - - [Test, Pairwise, Description("CRC32CH , , ")] - public void Crc32ch([Values(0u, 31u)] uint rd, - [Values(1u, 31u)] uint rn, - [Values(2u, 31u)] uint rm, - [Values(0x00000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, - [Values((ushort)0x00_00, (ushort)0x7F_FF, - (ushort)0x80_00, (ushort)0xFF_FF)] [Random(RndCnt)] ushort wm) - { - uint opcode = 0x1AC05400; // CRC32CH W0, W0, W0 - opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - - uint w31 = TestContext.CurrentContext.Random.NextUInt(); - - SingleOpcode(opcode, x1: wn, x2: wm, x31: w31); - - CompareAgainstUnicorn(); - } - - [Test, Pairwise, Description("CRC32CB , , ")] - public void Crc32cb([Values(0u, 31u)] uint rd, - [Values(1u, 31u)] uint rn, - [Values(2u, 31u)] uint rm, - [Values(0x00000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, - [Values((byte)0x00, (byte)0x7F, - (byte)0x80, (byte)0xFF)] [Random(RndCnt)] byte wm) - { - uint opcode = 0x1AC05000; // CRC32CB W0, W0, W0 - opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - - uint w31 = TestContext.CurrentContext.Random.NextUInt(); - - SingleOpcode(opcode, x1: wn, x2: wm, x31: w31); - - CompareAgainstUnicorn(); - } - [Test, Pairwise, Description("EON , , {, #}")] public void Eon_64bit([Values(0u, 31u)] uint rd, [Values(1u, 31u)] uint rn, @@ -954,44 +806,6 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } - [Test, Pairwise, Description("SDIV , , ")] - public void Sdiv_64bit([Values(0u, 31u)] uint rd, - [Values(1u, 31u)] uint rn, - [Values(2u, 31u)] uint rm, - [Values(0x0000000000000000ul, 0x7FFFFFFFFFFFFFFFul, - 0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul)] [Random(RndCnt)] ulong xn, - [Values(0x0000000000000000ul, 0x7FFFFFFFFFFFFFFFul, - 0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul)] [Random(RndCnt)] ulong xm) - { - uint opcode = 0x9AC00C00; // SDIV X0, X0, X0 - opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - - ulong x31 = TestContext.CurrentContext.Random.NextULong(); - - SingleOpcode(opcode, x1: xn, x2: xm, x31: x31); - - CompareAgainstUnicorn(); - } - - [Test, Pairwise, Description("SDIV , , ")] - public void Sdiv_32bit([Values(0u, 31u)] uint rd, - [Values(1u, 31u)] uint rn, - [Values(2u, 31u)] uint rm, - [Values(0x00000000u, 0x7FFFFFFFu, - 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, - [Values(0x00000000u, 0x7FFFFFFFu, - 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wm) - { - uint opcode = 0x1AC00C00; // SDIV W0, W0, W0 - opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - - uint w31 = TestContext.CurrentContext.Random.NextUInt(); - - SingleOpcode(opcode, x1: wn, x2: wm, x31: w31); - - CompareAgainstUnicorn(); - } - [Test, Pairwise, Description("SUB , , {, #}")] public void Sub_64bit([Values(0u, 31u)] uint rd, [Values(1u, 31u)] uint rn, @@ -1079,44 +893,6 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } - - [Test, Pairwise, Description("UDIV , , ")] - public void Udiv_64bit([Values(0u, 31u)] uint rd, - [Values(1u, 31u)] uint rn, - [Values(2u, 31u)] uint rm, - [Values(0x0000000000000000ul, 0x7FFFFFFFFFFFFFFFul, - 0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul)] [Random(RndCnt)] ulong xn, - [Values(0x0000000000000000ul, 0x7FFFFFFFFFFFFFFFul, - 0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul)] [Random(RndCnt)] ulong xm) - { - uint opcode = 0x9AC00800; // UDIV X0, X0, X0 - opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - - ulong x31 = TestContext.CurrentContext.Random.NextULong(); - - SingleOpcode(opcode, x1: xn, x2: xm, x31: x31); - - CompareAgainstUnicorn(); - } - - [Test, Pairwise, Description("UDIV , , ")] - public void Udiv_32bit([Values(0u, 31u)] uint rd, - [Values(1u, 31u)] uint rn, - [Values(2u, 31u)] uint rm, - [Values(0x00000000u, 0x7FFFFFFFu, - 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, - [Values(0x00000000u, 0x7FFFFFFFu, - 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wm) - { - uint opcode = 0x1AC00800; // UDIV W0, W0, W0 - opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - - uint w31 = TestContext.CurrentContext.Random.NextUInt(); - - SingleOpcode(opcode, x1: wn, x2: wm, x31: w31); - - CompareAgainstUnicorn(); - } #endif } } diff --git a/Ryujinx.Tests/Cpu/CpuTestMisc.cs b/Ryujinx.Tests/Cpu/CpuTestMisc.cs index e976c2c00..6d2440c18 100644 --- a/Ryujinx.Tests/Cpu/CpuTestMisc.cs +++ b/Ryujinx.Tests/Cpu/CpuTestMisc.cs @@ -1,11 +1,9 @@ #define Misc -using ChocolArm64.State; +using ARMeilleure.State; using NUnit.Framework; -using System.Runtime.Intrinsics.X86; - namespace Ryujinx.Tests.Cpu { [Category("Misc")] @@ -32,10 +30,9 @@ namespace Ryujinx.Tests.Cpu opCmn |= ((shift & 3) << 22) | ((imm & 4095) << 10); opCset |= ((cond & 15) << 12); - SetThreadState(x0: xn); + SetContext(x0: xn); Opcode(opCmn); Opcode(opCset); - Opcode(0xD4200000); // BRK #0 Opcode(0xD65F03C0); // RET ExecuteOpcodes(); @@ -58,10 +55,9 @@ namespace Ryujinx.Tests.Cpu opCmn |= ((shift & 3) << 22) | ((imm & 4095) << 10); opCset |= ((cond & 15) << 12); - SetThreadState(x0: wn); + SetContext(x0: wn); Opcode(opCmn); Opcode(opCset); - Opcode(0xD4200000); // BRK #0 Opcode(0xD65F03C0); // RET ExecuteOpcodes(); @@ -84,10 +80,9 @@ namespace Ryujinx.Tests.Cpu opCmp |= ((shift & 3) << 22) | ((imm & 4095) << 10); opCset |= ((cond & 15) << 12); - SetThreadState(x0: xn); + SetContext(x0: xn); Opcode(opCmp); Opcode(opCset); - Opcode(0xD4200000); // BRK #0 Opcode(0xD65F03C0); // RET ExecuteOpcodes(); @@ -110,10 +105,9 @@ namespace Ryujinx.Tests.Cpu opCmp |= ((shift & 3) << 22) | ((imm & 4095) << 10); opCset |= ((cond & 15) << 12); - SetThreadState(x0: wn); + SetContext(x0: wn); Opcode(opCmp); Opcode(opCset); - Opcode(0xD4200000); // BRK #0 Opcode(0xD65F03C0); // RET ExecuteOpcodes(); @@ -136,11 +130,10 @@ namespace Ryujinx.Tests.Cpu SUB W0, W0, #3 MUL W0, W1, W0 SDIV W0, W2, W0 - BRK #0 RET */ - SetThreadState(x0: a); + SetContext(x0: a); Opcode(0x11000C02); Opcode(0x51001401); Opcode(0x1B017C42); @@ -148,11 +141,10 @@ namespace Ryujinx.Tests.Cpu Opcode(0x51000C00); Opcode(0x1B007C20); Opcode(0x1AC00C40); - Opcode(0xD4200000); Opcode(0xD65F03C0); ExecuteOpcodes(); - Assert.That(GetThreadState().X0, Is.Zero); + Assert.That(GetContext().GetX(0), Is.Zero); } [Explicit] @@ -185,24 +177,20 @@ namespace Ryujinx.Tests.Cpu FADD S0, S0, S1 FDIV S0, S2, S0 FMUL S0, S0, S0 - BRK #0 RET */ - SetThreadState( - v0: Sse.SetScalarVector128(a), - v1: Sse.SetScalarVector128(b)); + SetContext(v0: MakeVectorScalar(a), v1: MakeVectorScalar(b)); Opcode(0x1E2E1002); Opcode(0x1E201840); Opcode(0x1E211841); Opcode(0x1E212800); Opcode(0x1E201840); Opcode(0x1E200800); - Opcode(0xD4200000); Opcode(0xD65F03C0); ExecuteOpcodes(); - Assert.That(Sse41.Extract(GetThreadState().V0, (byte)0), Is.EqualTo(16f)); + Assert.That(GetContext().GetV(0).AsFloat(), Is.EqualTo(16f)); } [Explicit] @@ -235,24 +223,20 @@ namespace Ryujinx.Tests.Cpu FADD D0, D0, D1 FDIV D0, D2, D0 FMUL D0, D0, D0 - BRK #0 RET */ - SetThreadState( - v0: Sse.StaticCast(Sse2.SetScalarVector128(a)), - v1: Sse.StaticCast(Sse2.SetScalarVector128(b))); + SetContext(v0: MakeVectorScalar(a), v1: MakeVectorScalar(b)); Opcode(0x1E6E1002); Opcode(0x1E601840); Opcode(0x1E611841); Opcode(0x1E612800); Opcode(0x1E601840); Opcode(0x1E600800); - Opcode(0xD4200000); Opcode(0xD65F03C0); ExecuteOpcodes(); - Assert.That(VectorExtractDouble(GetThreadState().V0, (byte)0), Is.EqualTo(16d)); + Assert.That(GetContext().GetV(0).AsDouble(), Is.EqualTo(16d)); } [Test, Ignore("The Tester supports only one return point.")] @@ -279,9 +263,9 @@ namespace Ryujinx.Tests.Cpu /* 0x0000000000001000: MOV W4, W0 - 0x0000000000001004: CBZ W0, #0x3C + 0x0000000000001004: CBZ W0, #0x34 0x0000000000001008: CMP W0, #1 - 0x000000000000100C: B.LS #0x48 + 0x000000000000100C: B.LS #0x34 0x0000000000001010: MOVZ W2, #0x2 0x0000000000001014: MOVZ X1, #0x1 0x0000000000001018: MOVZ X3, #0 @@ -290,22 +274,19 @@ namespace Ryujinx.Tests.Cpu 0x0000000000001024: MOV X3, X1 0x0000000000001028: MOV X1, X0 0x000000000000102C: CMP W4, W2 - 0x0000000000001030: B.HS #0x1C - 0x0000000000001034: BRK #0 - 0x0000000000001038: RET - 0x000000000000103C: MOVZ X0, #0 - 0x0000000000001040: BRK #0 + 0x0000000000001030: B.HS #-0x14 + 0x0000000000001034: RET + 0x0000000000001038: MOVZ X0, #0 + 0x000000000000103C: RET + 0x0000000000001040: MOVZ X0, #0x1 0x0000000000001044: RET - 0x0000000000001048: MOVZ X0, #0x1 - 0x000000000000104C: BRK #0 - 0x0000000000001050: RET */ - SetThreadState(x0: a); + SetContext(x0: a); Opcode(0x2A0003E4); - Opcode(0x340001C0); + Opcode(0x340001A0); Opcode(0x7100041F); - Opcode(0x540001E9); + Opcode(0x540001A9); Opcode(0x52800042); Opcode(0xD2800021); Opcode(0xD2800003); @@ -315,17 +296,14 @@ namespace Ryujinx.Tests.Cpu Opcode(0xAA0003E1); Opcode(0x6B02009F); Opcode(0x54FFFF62); - Opcode(0xD4200000); Opcode(0xD65F03C0); Opcode(0xD2800000); - Opcode(0xD4200000); Opcode(0xD65F03C0); Opcode(0xD2800020); - Opcode(0xD4200000); Opcode(0xD65F03C0); ExecuteOpcodes(); - Assert.That(GetThreadState().X0, Is.EqualTo(Fn(a))); + Assert.That(GetContext().GetX(0), Is.EqualTo(Fn(a))); } [Explicit] @@ -338,18 +316,16 @@ namespace Ryujinx.Tests.Cpu 0x0000000000001000: MOV X0, #2 0x0000000000001004: MOV X1, #3 0x0000000000001008: ADD X0, X0, X1 - 0x000000000000100C: BRK #0 - 0x0000000000001010: RET + 0x000000000000100C: RET */ Opcode(0xD2800040); Opcode(0xD2800061); Opcode(0x8B010000); - Opcode(0xD4200000); Opcode(0xD65F03C0); ExecuteOpcodes(); - Assert.That(GetThreadState().X0, Is.EqualTo(result)); + Assert.That(GetContext().GetX(0), Is.EqualTo(result)); Reset(); @@ -357,18 +333,16 @@ namespace Ryujinx.Tests.Cpu 0x0000000000001000: MOV X0, #3 0x0000000000001004: MOV X1, #2 0x0000000000001008: ADD X0, X0, X1 - 0x000000000000100C: BRK #0 - 0x0000000000001010: RET + 0x000000000000100C: RET */ Opcode(0xD2800060); Opcode(0xD2800041); Opcode(0x8B010000); - Opcode(0xD4200000); Opcode(0xD65F03C0); ExecuteOpcodes(); - Assert.That(GetThreadState().X0, Is.EqualTo(result)); + Assert.That(GetContext().GetX(0), Is.EqualTo(result)); } [Explicit] @@ -379,9 +353,9 @@ namespace Ryujinx.Tests.Cpu public void SanityCheck(ulong a) { uint opcode = 0xD503201F; // NOP - CpuThreadState threadState = SingleOpcode(opcode, x0: a); + ExecutionContext context = SingleOpcode(opcode, x0: a); - Assert.That(threadState.X0, Is.EqualTo(a)); + Assert.That(context.GetX(0), Is.EqualTo(a)); } #endif } diff --git a/Ryujinx.Tests/Cpu/CpuTestSimd.cs b/Ryujinx.Tests/Cpu/CpuTestSimd.cs index b446d953e..30dec59ac 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimd.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimd.cs @@ -1,10 +1,11 @@ #define Simd +using ARMeilleure.State; + using NUnit.Framework; using System; using System.Collections.Generic; -using System.Runtime.Intrinsics; namespace Ryujinx.Tests.Cpu { @@ -1175,8 +1176,8 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x5EE0B800; // ABS D0, D0 opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -1194,8 +1195,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -1213,8 +1214,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -1230,8 +1231,8 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x5EF1B800; // ADDP D0, V0.2D opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -1249,8 +1250,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -1268,8 +1269,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -1287,8 +1288,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a * q); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a * q); SingleOpcode(opcode, v0: v0, v1: v1); @@ -1306,8 +1307,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a * q); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a * q); SingleOpcode(opcode, v0: v0, v1: v1); @@ -1325,8 +1326,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a * q); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a * q); SingleOpcode(opcode, v0: v0, v1: v1); @@ -1344,8 +1345,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a * q); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a * q); SingleOpcode(opcode, v0: v0, v1: v1); @@ -1363,8 +1364,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a * q); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a * q); SingleOpcode(opcode, v0: v0, v1: v1); @@ -1382,8 +1383,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a * q); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a * q); SingleOpcode(opcode, v0: v0, v1: v1); @@ -1399,8 +1400,8 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x5EE09800; // CMEQ D0, D0, #0 opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -1418,8 +1419,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -1437,8 +1438,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -1454,8 +1455,8 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x7EE08800; // CMGE D0, D0, #0 opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -1473,8 +1474,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -1492,8 +1493,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -1509,8 +1510,8 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x5EE08800; // CMGT D0, D0, #0 opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -1528,8 +1529,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -1547,8 +1548,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -1564,8 +1565,8 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x7EE09800; // CMLE D0, D0, #0 opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -1583,8 +1584,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -1602,8 +1603,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -1619,8 +1620,8 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x5EE0A800; // CMLT D0, D0, #0 opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -1638,8 +1639,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -1657,8 +1658,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -1674,8 +1675,8 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x0E205800; // CNT V0.8B, V0.8B opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -1691,8 +1692,8 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x4E205800; // CNT V0.16B, V0.16B opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -1704,8 +1705,8 @@ namespace Ryujinx.Tests.Cpu [ValueSource("_1S_F_")] ulong a) { ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -1722,8 +1723,8 @@ namespace Ryujinx.Tests.Cpu [ValueSource("_1D_F_")] ulong a) { ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE1(z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE1(z); + V128 v1 = MakeVectorE0(a); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -1746,8 +1747,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a * q); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a * q); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -1768,8 +1769,8 @@ namespace Ryujinx.Tests.Cpu { opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -1786,8 +1787,8 @@ namespace Ryujinx.Tests.Cpu [ValueSource("_2S_F_")] ulong a) { ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -1804,8 +1805,8 @@ namespace Ryujinx.Tests.Cpu [ValueSource("_1D_F_")] ulong a) { ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE1(z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE1(z); + V128 v1 = MakeVectorE0E1(a, a); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -1822,8 +1823,8 @@ namespace Ryujinx.Tests.Cpu [ValueSource("_1S_F_")] ulong a) { ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -1839,8 +1840,8 @@ namespace Ryujinx.Tests.Cpu [ValueSource("_1D_F_")] ulong a) { ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE1(z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE1(z); + V128 v1 = MakeVectorE0(a); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -1862,8 +1863,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a * q); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a * q); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -1883,8 +1884,8 @@ namespace Ryujinx.Tests.Cpu { opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -1899,7 +1900,7 @@ namespace Ryujinx.Tests.Cpu public void F_Cmp_Cmpe_S_S([ValueSource("_F_Cmp_Cmpe_S_S_")] uint opcodes, [ValueSource("_1S_F_")] ulong a) { - Vector128 v1 = MakeVectorE0(a); + V128 v1 = MakeVectorE0(a); bool v = TestContext.CurrentContext.Random.NextBool(); bool c = TestContext.CurrentContext.Random.NextBool(); @@ -1915,7 +1916,7 @@ namespace Ryujinx.Tests.Cpu public void F_Cmp_Cmpe_S_D([ValueSource("_F_Cmp_Cmpe_S_D_")] uint opcodes, [ValueSource("_1D_F_")] ulong a) { - Vector128 v1 = MakeVectorE0(a); + V128 v1 = MakeVectorE0(a); bool v = TestContext.CurrentContext.Random.NextBool(); bool c = TestContext.CurrentContext.Random.NextBool(); @@ -1932,8 +1933,8 @@ namespace Ryujinx.Tests.Cpu [ValueSource("_1S_F_")] ulong a) { ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE1(z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE1(z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -1945,8 +1946,8 @@ namespace Ryujinx.Tests.Cpu [ValueSource("_1D_F_")] ulong a) { ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -1958,8 +1959,8 @@ namespace Ryujinx.Tests.Cpu [ValueSource("_1S_F_")] ulong a) { ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -1971,8 +1972,8 @@ namespace Ryujinx.Tests.Cpu [ValueSource("_1H_F_")] ulong a) { ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -1984,8 +1985,8 @@ namespace Ryujinx.Tests.Cpu [ValueSource("_1S_F_W_")] ulong a) { ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -1997,8 +1998,8 @@ namespace Ryujinx.Tests.Cpu [ValueSource("_1D_F_X_")] ulong a) { ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE1(z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE1(z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -2016,8 +2017,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a * q); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a * q); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -2033,8 +2034,8 @@ namespace Ryujinx.Tests.Cpu { opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -2053,8 +2054,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -2079,8 +2080,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -2099,8 +2100,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -2125,8 +2126,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -2143,7 +2144,7 @@ namespace Ryujinx.Tests.Cpu ulong x0 = (ulong)TestContext.CurrentContext.Random.NextUInt() << 32; uint w31 = TestContext.CurrentContext.Random.NextUInt(); - Vector128 v1 = MakeVectorE0(a); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcodes, x0: x0, x31: w31, v1: v1); @@ -2159,7 +2160,7 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); ulong x31 = TestContext.CurrentContext.Random.NextULong(); - Vector128 v1 = MakeVectorE0(a); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcodes, x31: x31, v1: v1); @@ -2175,7 +2176,7 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); ulong x31 = TestContext.CurrentContext.Random.NextULong(); - Vector128 v1 = MakeVectorE1(a); + V128 v1 = MakeVectorE1(a); SingleOpcode(opcodes, x31: x31, v1: v1); @@ -2192,7 +2193,7 @@ namespace Ryujinx.Tests.Cpu uint w31 = TestContext.CurrentContext.Random.NextUInt(); ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE0E1(z, z); + V128 v0 = MakeVectorE0E1(z, z); SingleOpcode(opcodes, x1: wn, x31: w31, v0: v0); @@ -2209,7 +2210,7 @@ namespace Ryujinx.Tests.Cpu ulong x31 = TestContext.CurrentContext.Random.NextULong(); ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE1(z); + V128 v0 = MakeVectorE1(z); SingleOpcode(opcodes, x1: xn, x31: x31, v0: v0); @@ -2226,7 +2227,7 @@ namespace Ryujinx.Tests.Cpu ulong x31 = TestContext.CurrentContext.Random.NextULong(); ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE0(z); + V128 v0 = MakeVectorE0(z); SingleOpcode(opcodes, x1: xn, x31: x31, v0: v0); @@ -2238,8 +2239,8 @@ namespace Ryujinx.Tests.Cpu [ValueSource("_1S_F_")] ulong a) { ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -2251,8 +2252,8 @@ namespace Ryujinx.Tests.Cpu [ValueSource("_1D_F_")] ulong a) { ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE1(z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE1(z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -2265,8 +2266,8 @@ namespace Ryujinx.Tests.Cpu [Values(RMode.Rn)] RMode rMode) { ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -2285,8 +2286,8 @@ namespace Ryujinx.Tests.Cpu [Values(RMode.Rn)] RMode rMode) { ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE1(z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE1(z); + V128 v1 = MakeVectorE0(a); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -2311,8 +2312,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a * q); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a * q); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -2335,8 +2336,8 @@ namespace Ryujinx.Tests.Cpu { opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -2354,8 +2355,8 @@ namespace Ryujinx.Tests.Cpu [ValueSource("_1S_F_")] ulong a) { ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -2367,8 +2368,8 @@ namespace Ryujinx.Tests.Cpu [ValueSource("_1D_F_")] ulong a) { ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE1(z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE1(z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -2386,8 +2387,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a * q); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a * q); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -2403,8 +2404,8 @@ namespace Ryujinx.Tests.Cpu { opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -2417,8 +2418,8 @@ namespace Ryujinx.Tests.Cpu [Values] RMode rMode) { ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); int fpcr = (int)rMode << (int)Fpcr.RMode; @@ -2433,8 +2434,8 @@ namespace Ryujinx.Tests.Cpu [Values] RMode rMode) { ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE1(z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE1(z); + V128 v1 = MakeVectorE0(a); int fpcr = (int)rMode << (int)Fpcr.RMode; @@ -2455,8 +2456,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a * q); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a * q); int fpcr = (int)rMode << (int)Fpcr.RMode; @@ -2475,8 +2476,8 @@ namespace Ryujinx.Tests.Cpu { opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); int fpcr = (int)rMode << (int)Fpcr.RMode; @@ -2494,8 +2495,8 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x7EE0B800; // NEG D0, D0 opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -2513,8 +2514,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -2532,8 +2533,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -2549,8 +2550,8 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x2E205800; // NOT V0.8B, V0.8B opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -2566,8 +2567,8 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x6E205800; // NOT V0.16B, V0.16B opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -2583,8 +2584,8 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x2E605800; // RBIT V0.8B, V0.8B opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -2600,8 +2601,8 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x6E605800; // RBIT V0.16B, V0.16B opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -2617,8 +2618,8 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x0E201800; // REV16 V0.8B, V0.8B opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -2634,8 +2635,8 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x4E201800; // REV16 V0.16B, V0.16B opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -2653,8 +2654,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -2672,8 +2673,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -2691,8 +2692,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -2710,8 +2711,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -2729,8 +2730,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -2748,8 +2749,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -2767,8 +2768,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -2786,8 +2787,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -2805,8 +2806,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -2824,8 +2825,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -2837,8 +2838,8 @@ namespace Ryujinx.Tests.Cpu [ValueSource("_1S_")] [Random(RndCnt)] ulong a) { ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -2850,8 +2851,8 @@ namespace Ryujinx.Tests.Cpu [ValueSource("_1D_")] [Random(RndCnt)] ulong a) { ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE1(z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE1(z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -2869,8 +2870,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a * q); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a * q); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -2886,8 +2887,8 @@ namespace Ryujinx.Tests.Cpu { opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -2903,8 +2904,8 @@ namespace Ryujinx.Tests.Cpu { opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z0, z1); - Vector128 v1 = MakeVectorE0E1(a0, a1); + V128 v0 = MakeVectorE0E1(z0, z1); + V128 v1 = MakeVectorE0E1(a0, a1); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -2920,8 +2921,8 @@ namespace Ryujinx.Tests.Cpu { opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z0, z1); - Vector128 v1 = MakeVectorE0E1(a0, a1); + V128 v0 = MakeVectorE0E1(z0, z1); + V128 v1 = MakeVectorE0E1(a0, a1); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -2941,8 +2942,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((size & 3) << 22); opcode |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul); SingleOpcode(opcode, v0: v0, v1: v1); @@ -2960,8 +2961,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -2979,8 +2980,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -2998,8 +2999,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -3017,8 +3018,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -3036,8 +3037,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -3055,8 +3056,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -3074,8 +3075,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -3093,8 +3094,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -3112,8 +3113,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -3131,8 +3132,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -3150,8 +3151,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -3169,8 +3170,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -3188,8 +3189,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -3207,8 +3208,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -3226,8 +3227,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -3245,8 +3246,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -3264,8 +3265,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -3283,8 +3284,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -3302,8 +3303,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -3321,8 +3322,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -3340,8 +3341,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -3359,8 +3360,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -3378,8 +3379,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -3397,8 +3398,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -3416,8 +3417,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -3435,8 +3436,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -3454,8 +3455,8 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdCrypto.cs b/Ryujinx.Tests/Cpu/CpuTestSimdCrypto.cs index 4702b986d..fd8ec9c57 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdCrypto.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdCrypto.cs @@ -1,11 +1,9 @@ // https://www.intel.com/content/dam/doc/white-paper/advanced-encryption-standard-new-instructions-set-paper.pdf -using ChocolArm64.State; +using ARMeilleure.State; using NUnit.Framework; -using System.Runtime.Intrinsics; - namespace Ryujinx.Tests.Cpu { public class CpuTestSimdCrypto : CpuTest @@ -23,20 +21,20 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x4E285800; // AESD V0.16B, V0.16B opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(roundKeyL ^ valueL, roundKeyH ^ valueH); - Vector128 v1 = MakeVectorE0E1(roundKeyL, roundKeyH); + V128 v0 = MakeVectorE0E1(roundKeyL ^ valueL, roundKeyH ^ valueH); + V128 v1 = MakeVectorE0E1(roundKeyL, roundKeyH); - CpuThreadState threadState = SingleOpcode(opcode, v0: v0, v1: v1); + ExecutionContext context = SingleOpcode(opcode, v0: v0, v1: v1); Assert.Multiple(() => { - Assert.That(GetVectorE0(threadState.V0), Is.EqualTo(resultL)); - Assert.That(GetVectorE1(threadState.V0), Is.EqualTo(resultH)); + Assert.That(GetVectorE0(context.GetV(0)), Is.EqualTo(resultL)); + Assert.That(GetVectorE1(context.GetV(0)), Is.EqualTo(resultH)); }); Assert.Multiple(() => { - Assert.That(GetVectorE0(threadState.V1), Is.EqualTo(roundKeyL)); - Assert.That(GetVectorE1(threadState.V1), Is.EqualTo(roundKeyH)); + Assert.That(GetVectorE0(context.GetV(1)), Is.EqualTo(roundKeyL)); + Assert.That(GetVectorE1(context.GetV(1)), Is.EqualTo(roundKeyH)); }); CompareAgainstUnicorn(); @@ -55,20 +53,20 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x4E284800; // AESE V0.16B, V0.16B opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(roundKeyL ^ valueL, roundKeyH ^ valueH); - Vector128 v1 = MakeVectorE0E1(roundKeyL, roundKeyH); + V128 v0 = MakeVectorE0E1(roundKeyL ^ valueL, roundKeyH ^ valueH); + V128 v1 = MakeVectorE0E1(roundKeyL, roundKeyH); - CpuThreadState threadState = SingleOpcode(opcode, v0: v0, v1: v1); + ExecutionContext context = SingleOpcode(opcode, v0: v0, v1: v1); Assert.Multiple(() => { - Assert.That(GetVectorE0(threadState.V0), Is.EqualTo(resultL)); - Assert.That(GetVectorE1(threadState.V0), Is.EqualTo(resultH)); + Assert.That(GetVectorE0(context.GetV(0)), Is.EqualTo(resultL)); + Assert.That(GetVectorE1(context.GetV(0)), Is.EqualTo(resultH)); }); Assert.Multiple(() => { - Assert.That(GetVectorE0(threadState.V1), Is.EqualTo(roundKeyL)); - Assert.That(GetVectorE1(threadState.V1), Is.EqualTo(roundKeyH)); + Assert.That(GetVectorE0(context.GetV(1)), Is.EqualTo(roundKeyL)); + Assert.That(GetVectorE1(context.GetV(1)), Is.EqualTo(roundKeyH)); }); CompareAgainstUnicorn(); @@ -85,24 +83,24 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x4E287800; // AESIMC V0.16B, V0.16B opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v = MakeVectorE0E1(valueL, valueH); + V128 v = MakeVectorE0E1(valueL, valueH); - CpuThreadState threadState = SingleOpcode( + ExecutionContext context = SingleOpcode( opcode, - v0: rn == 0u ? v : default(Vector128), - v1: rn == 1u ? v : default(Vector128)); + v0: rn == 0u ? v : default(V128), + v1: rn == 1u ? v : default(V128)); Assert.Multiple(() => { - Assert.That(GetVectorE0(threadState.V0), Is.EqualTo(resultL)); - Assert.That(GetVectorE1(threadState.V0), Is.EqualTo(resultH)); + Assert.That(GetVectorE0(context.GetV(0)), Is.EqualTo(resultL)); + Assert.That(GetVectorE1(context.GetV(0)), Is.EqualTo(resultH)); }); if (rn == 1u) { Assert.Multiple(() => { - Assert.That(GetVectorE0(threadState.V1), Is.EqualTo(valueL)); - Assert.That(GetVectorE1(threadState.V1), Is.EqualTo(valueH)); + Assert.That(GetVectorE0(context.GetV(1)), Is.EqualTo(valueL)); + Assert.That(GetVectorE1(context.GetV(1)), Is.EqualTo(valueH)); }); } @@ -120,24 +118,24 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x4E286800; // AESMC V0.16B, V0.16B opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v = MakeVectorE0E1(valueL, valueH); + V128 v = MakeVectorE0E1(valueL, valueH); - CpuThreadState threadState = SingleOpcode( + ExecutionContext context = SingleOpcode( opcode, - v0: rn == 0u ? v : default(Vector128), - v1: rn == 1u ? v : default(Vector128)); + v0: rn == 0u ? v : default(V128), + v1: rn == 1u ? v : default(V128)); Assert.Multiple(() => { - Assert.That(GetVectorE0(threadState.V0), Is.EqualTo(resultL)); - Assert.That(GetVectorE1(threadState.V0), Is.EqualTo(resultH)); + Assert.That(GetVectorE0(context.GetV(0)), Is.EqualTo(resultL)); + Assert.That(GetVectorE1(context.GetV(0)), Is.EqualTo(resultH)); }); if (rn == 1u) { Assert.Multiple(() => { - Assert.That(GetVectorE0(threadState.V1), Is.EqualTo(valueL)); - Assert.That(GetVectorE1(threadState.V1), Is.EqualTo(valueH)); + Assert.That(GetVectorE0(context.GetV(1)), Is.EqualTo(valueL)); + Assert.That(GetVectorE1(context.GetV(1)), Is.EqualTo(valueH)); }); } diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdCvt.cs b/Ryujinx.Tests/Cpu/CpuTestSimdCvt.cs index 8e2058553..17a2853f1 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdCvt.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdCvt.cs @@ -1,10 +1,11 @@ #define SimdCvt +using ARMeilleure.State; + using NUnit.Framework; using System; using System.Collections.Generic; -using System.Runtime.Intrinsics; namespace Ryujinx.Tests.Cpu { @@ -378,7 +379,7 @@ namespace Ryujinx.Tests.Cpu ulong x0 = (ulong)TestContext.CurrentContext.Random.NextUInt() << 32; uint w31 = TestContext.CurrentContext.Random.NextUInt(); - Vector128 v1 = MakeVectorE0(a); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcodes, x0: x0, x31: w31, v1: v1); @@ -394,7 +395,7 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); ulong x31 = TestContext.CurrentContext.Random.NextULong(); - Vector128 v1 = MakeVectorE0(a); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcodes, x31: x31, v1: v1); @@ -411,7 +412,7 @@ namespace Ryujinx.Tests.Cpu ulong x0 = (ulong)TestContext.CurrentContext.Random.NextUInt() << 32; uint w31 = TestContext.CurrentContext.Random.NextUInt(); - Vector128 v1 = MakeVectorE0(a); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcodes, x0: x0, x31: w31, v1: v1); @@ -427,7 +428,7 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); ulong x31 = TestContext.CurrentContext.Random.NextULong(); - Vector128 v1 = MakeVectorE0(a); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcodes, x31: x31, v1: v1); @@ -448,7 +449,7 @@ namespace Ryujinx.Tests.Cpu ulong x0 = (ulong)TestContext.CurrentContext.Random.NextUInt() << 32; uint w31 = TestContext.CurrentContext.Random.NextUInt(); - Vector128 v1 = MakeVectorE0(a); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcodes, x0: x0, x31: w31, v1: v1); @@ -468,7 +469,7 @@ namespace Ryujinx.Tests.Cpu opcodes |= (scale << 10); ulong x31 = TestContext.CurrentContext.Random.NextULong(); - Vector128 v1 = MakeVectorE0(a); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcodes, x31: x31, v1: v1); @@ -489,7 +490,7 @@ namespace Ryujinx.Tests.Cpu ulong x0 = (ulong)TestContext.CurrentContext.Random.NextUInt() << 32; uint w31 = TestContext.CurrentContext.Random.NextUInt(); - Vector128 v1 = MakeVectorE0(a); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcodes, x0: x0, x31: w31, v1: v1); @@ -509,7 +510,7 @@ namespace Ryujinx.Tests.Cpu opcodes |= (scale << 10); ulong x31 = TestContext.CurrentContext.Random.NextULong(); - Vector128 v1 = MakeVectorE0(a); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcodes, x31: x31, v1: v1); @@ -526,7 +527,7 @@ namespace Ryujinx.Tests.Cpu uint w31 = TestContext.CurrentContext.Random.NextUInt(); ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE0E1(z, z); + V128 v0 = MakeVectorE0E1(z, z); SingleOpcode(opcodes, x1: wn, x31: w31, v0: v0); @@ -543,7 +544,7 @@ namespace Ryujinx.Tests.Cpu uint w31 = TestContext.CurrentContext.Random.NextUInt(); ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE1(z); + V128 v0 = MakeVectorE1(z); SingleOpcode(opcodes, x1: wn, x31: w31, v0: v0); @@ -560,7 +561,7 @@ namespace Ryujinx.Tests.Cpu ulong x31 = TestContext.CurrentContext.Random.NextULong(); ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE0E1(z, z); + V128 v0 = MakeVectorE0E1(z, z); SingleOpcode(opcodes, x1: xn, x31: x31, v0: v0); @@ -577,7 +578,7 @@ namespace Ryujinx.Tests.Cpu ulong x31 = TestContext.CurrentContext.Random.NextULong(); ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE1(z); + V128 v0 = MakeVectorE1(z); SingleOpcode(opcodes, x1: xn, x31: x31, v0: v0); @@ -598,7 +599,7 @@ namespace Ryujinx.Tests.Cpu uint w31 = TestContext.CurrentContext.Random.NextUInt(); ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE0E1(z, z); + V128 v0 = MakeVectorE0E1(z, z); SingleOpcode(opcodes, x1: wn, x31: w31, v0: v0); @@ -619,7 +620,7 @@ namespace Ryujinx.Tests.Cpu uint w31 = TestContext.CurrentContext.Random.NextUInt(); ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE1(z); + V128 v0 = MakeVectorE1(z); SingleOpcode(opcodes, x1: wn, x31: w31, v0: v0); @@ -640,7 +641,7 @@ namespace Ryujinx.Tests.Cpu ulong x31 = TestContext.CurrentContext.Random.NextULong(); ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE0E1(z, z); + V128 v0 = MakeVectorE0E1(z, z); SingleOpcode(opcodes, x1: xn, x31: x31, v0: v0); @@ -661,7 +662,7 @@ namespace Ryujinx.Tests.Cpu ulong x31 = TestContext.CurrentContext.Random.NextULong(); ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE1(z); + V128 v0 = MakeVectorE1(z); SingleOpcode(opcodes, x1: xn, x31: x31, v0: v0); diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdExt.cs b/Ryujinx.Tests/Cpu/CpuTestSimdExt.cs index b8548169b..0ab40cad2 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdExt.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdExt.cs @@ -1,8 +1,8 @@ #define SimdExt -using NUnit.Framework; +using ARMeilleure.State; -using System.Runtime.Intrinsics; +using NUnit.Framework; namespace Ryujinx.Tests.Cpu { @@ -37,9 +37,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= (imm4 << 11); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -61,9 +61,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= (imm4 << 11); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdFcond.cs b/Ryujinx.Tests/Cpu/CpuTestSimdFcond.cs index 4ccd43dbb..825a1c78c 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdFcond.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdFcond.cs @@ -1,9 +1,10 @@ #define SimdFcond +using ARMeilleure.State; + using NUnit.Framework; using System.Collections.Generic; -using System.Runtime.Intrinsics; namespace Ryujinx.Tests.Cpu { @@ -152,8 +153,8 @@ namespace Ryujinx.Tests.Cpu { opcodes |= ((cond & 15) << 12) | ((nzcv & 15) << 0); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); bool v = TestContext.CurrentContext.Random.NextBool(); bool c = TestContext.CurrentContext.Random.NextBool(); @@ -177,8 +178,8 @@ namespace Ryujinx.Tests.Cpu { opcodes |= ((cond & 15) << 12) | ((nzcv & 15) << 0); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); bool v = TestContext.CurrentContext.Random.NextBool(); bool c = TestContext.CurrentContext.Random.NextBool(); @@ -202,9 +203,9 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((cond & 15) << 12); ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2); @@ -223,9 +224,9 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((cond & 15) << 12); ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE1(z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE1(z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2); diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdFmov.cs b/Ryujinx.Tests/Cpu/CpuTestSimdFmov.cs index a7e0e0f96..534dba57d 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdFmov.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdFmov.cs @@ -1,8 +1,8 @@ #define SimdFmov -using NUnit.Framework; +using ARMeilleure.State; -using System.Runtime.Intrinsics; +using NUnit.Framework; namespace Ryujinx.Tests.Cpu { @@ -36,7 +36,7 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((imm8 & 0xFFu) << 13); ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE0E1(z, z); + V128 v0 = MakeVectorE0E1(z, z); SingleOpcode(opcodes, v0: v0); @@ -50,7 +50,7 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((imm8 & 0xFFu) << 13); ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE1(z); + V128 v0 = MakeVectorE1(z); SingleOpcode(opcodes, v0: v0); diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdImm.cs b/Ryujinx.Tests/Cpu/CpuTestSimdImm.cs index ce8f63bc6..1ea74a112 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdImm.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdImm.cs @@ -1,9 +1,10 @@ #define SimdImm +using ARMeilleure.State; + using NUnit.Framework; using System.Collections.Generic; -using System.Runtime.Intrinsics; namespace Ryujinx.Tests.Cpu { @@ -203,7 +204,7 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((amount & 1) << 13); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); + V128 v0 = MakeVectorE0E1(z, z); SingleOpcode(opcodes, v0: v0); @@ -224,7 +225,7 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((amount & 3) << 13); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); + V128 v0 = MakeVectorE0E1(z, z); SingleOpcode(opcodes, v0: v0); @@ -241,7 +242,7 @@ namespace Ryujinx.Tests.Cpu opcodes |= (abc << 16) | (defgh << 5); ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE1(z); + V128 v0 = MakeVectorE1(z); SingleOpcode(opcodes, v0: v0); @@ -288,7 +289,7 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((q & 1) << 30); ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE1(q == 0u ? z : 0ul); + V128 v0 = MakeVectorE1(q == 0u ? z : 0ul); SingleOpcode(opcodes, v0: v0); @@ -309,7 +310,7 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((q & 1) << 30); ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE1(q == 0u ? z : 0ul); + V128 v0 = MakeVectorE1(q == 0u ? z : 0ul); SingleOpcode(opcodes, v0: v0); @@ -330,7 +331,7 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((q & 1) << 30); ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE1(q == 0u ? z : 0ul); + V128 v0 = MakeVectorE1(q == 0u ? z : 0ul); SingleOpcode(opcodes, v0: v0); @@ -351,7 +352,7 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((q & 1) << 30); ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE1(q == 0u ? z : 0ul); + V128 v0 = MakeVectorE1(q == 0u ? z : 0ul); SingleOpcode(opcodes, v0: v0); @@ -370,7 +371,7 @@ namespace Ryujinx.Tests.Cpu opcodes |= (abc << 16) | (defgh << 5); ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE1(z); + V128 v0 = MakeVectorE1(z); SingleOpcode(opcodes, v0: v0); diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdIns.cs b/Ryujinx.Tests/Cpu/CpuTestSimdIns.cs index ea3727041..031ed0f2c 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdIns.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdIns.cs @@ -1,8 +1,8 @@ #define SimdIns -using NUnit.Framework; +using ARMeilleure.State; -using System.Runtime.Intrinsics; +using NUnit.Framework; namespace Ryujinx.Tests.Cpu { @@ -86,7 +86,7 @@ namespace Ryujinx.Tests.Cpu uint w31 = TestContext.CurrentContext.Random.NextUInt(); ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE0E1(z, z); + V128 v0 = MakeVectorE0E1(z, z); SingleOpcode(opcode, x1: wn, x31: w31, v0: v0); @@ -103,7 +103,7 @@ namespace Ryujinx.Tests.Cpu ulong x31 = TestContext.CurrentContext.Random.NextULong(); ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE0E1(z, z); + V128 v0 = MakeVectorE0E1(z, z); SingleOpcode(opcode, x1: xn, x31: x31, v0: v0); @@ -122,8 +122,8 @@ namespace Ryujinx.Tests.Cpu opcode |= (imm5 << 16); ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -142,8 +142,8 @@ namespace Ryujinx.Tests.Cpu opcode |= (imm5 << 16); ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -162,8 +162,8 @@ namespace Ryujinx.Tests.Cpu opcode |= (imm5 << 16); ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -182,8 +182,8 @@ namespace Ryujinx.Tests.Cpu opcode |= (imm5 << 16); ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -207,8 +207,8 @@ namespace Ryujinx.Tests.Cpu opcode |= (imm5 << 16); opcode |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -232,8 +232,8 @@ namespace Ryujinx.Tests.Cpu opcode |= (imm5 << 16); opcode |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -257,8 +257,8 @@ namespace Ryujinx.Tests.Cpu opcode |= (imm5 << 16); opcode |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -282,8 +282,8 @@ namespace Ryujinx.Tests.Cpu opcode |= (imm5 << 16); opcode |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -306,7 +306,7 @@ namespace Ryujinx.Tests.Cpu opcode |= (imm5 << 16); uint w31 = TestContext.CurrentContext.Random.NextUInt(); - Vector128 v0 = MakeVectorE0E1(z, z); + V128 v0 = MakeVectorE0E1(z, z); SingleOpcode(opcode, x1: wn, x31: w31, v0: v0); @@ -329,7 +329,7 @@ namespace Ryujinx.Tests.Cpu opcode |= (imm5 << 16); uint w31 = TestContext.CurrentContext.Random.NextUInt(); - Vector128 v0 = MakeVectorE0E1(z, z); + V128 v0 = MakeVectorE0E1(z, z); SingleOpcode(opcode, x1: wn, x31: w31, v0: v0); @@ -352,7 +352,7 @@ namespace Ryujinx.Tests.Cpu opcode |= (imm5 << 16); uint w31 = TestContext.CurrentContext.Random.NextUInt(); - Vector128 v0 = MakeVectorE0E1(z, z); + V128 v0 = MakeVectorE0E1(z, z); SingleOpcode(opcode, x1: wn, x31: w31, v0: v0); @@ -375,7 +375,7 @@ namespace Ryujinx.Tests.Cpu opcode |= (imm5 << 16); ulong x31 = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE0E1(z, z); + V128 v0 = MakeVectorE0E1(z, z); SingleOpcode(opcode, x1: xn, x31: x31, v0: v0); @@ -400,8 +400,8 @@ namespace Ryujinx.Tests.Cpu opcode |= (imm5 << 16); opcode |= (imm4 << 11); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -426,8 +426,8 @@ namespace Ryujinx.Tests.Cpu opcode |= (imm5 << 16); opcode |= (imm4 << 11); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -452,8 +452,8 @@ namespace Ryujinx.Tests.Cpu opcode |= (imm5 << 16); opcode |= (imm4 << 11); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -478,8 +478,8 @@ namespace Ryujinx.Tests.Cpu opcode |= (imm5 << 16); opcode |= (imm4 << 11); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, v0: v0, v1: v1); @@ -502,7 +502,7 @@ namespace Ryujinx.Tests.Cpu ulong x0 = (ulong)TestContext.CurrentContext.Random.NextUInt() << 32; uint w31 = TestContext.CurrentContext.Random.NextUInt(); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, x0: x0, x31: w31, v1: v1); @@ -525,7 +525,7 @@ namespace Ryujinx.Tests.Cpu ulong x0 = (ulong)TestContext.CurrentContext.Random.NextUInt() << 32; uint w31 = TestContext.CurrentContext.Random.NextUInt(); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, x0: x0, x31: w31, v1: v1); @@ -547,7 +547,7 @@ namespace Ryujinx.Tests.Cpu opcode |= (imm5 << 16); ulong x31 = TestContext.CurrentContext.Random.NextULong(); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, x31: x31, v1: v1); @@ -569,7 +569,7 @@ namespace Ryujinx.Tests.Cpu opcode |= (imm5 << 16); ulong x31 = TestContext.CurrentContext.Random.NextULong(); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, x31: x31, v1: v1); @@ -591,7 +591,7 @@ namespace Ryujinx.Tests.Cpu opcode |= (imm5 << 16); ulong x31 = TestContext.CurrentContext.Random.NextULong(); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, x31: x31, v1: v1); @@ -614,7 +614,7 @@ namespace Ryujinx.Tests.Cpu ulong x0 = (ulong)TestContext.CurrentContext.Random.NextUInt() << 32; uint w31 = TestContext.CurrentContext.Random.NextUInt(); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, x0: x0, x31: w31, v1: v1); @@ -637,7 +637,7 @@ namespace Ryujinx.Tests.Cpu ulong x0 = (ulong)TestContext.CurrentContext.Random.NextUInt() << 32; uint w31 = TestContext.CurrentContext.Random.NextUInt(); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, x0: x0, x31: w31, v1: v1); @@ -660,7 +660,7 @@ namespace Ryujinx.Tests.Cpu ulong x0 = (ulong)TestContext.CurrentContext.Random.NextUInt() << 32; uint w31 = TestContext.CurrentContext.Random.NextUInt(); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, x0: x0, x31: w31, v1: v1); @@ -682,7 +682,7 @@ namespace Ryujinx.Tests.Cpu opcode |= (imm5 << 16); ulong x31 = TestContext.CurrentContext.Random.NextULong(); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcode, x31: x31, v1: v1); diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs index d9b828013..9b767db40 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs @@ -1,9 +1,10 @@ #define SimdReg +using ARMeilleure.State; + using NUnit.Framework; using System.Collections.Generic; -using System.Runtime.Intrinsics; namespace Ryujinx.Tests.Cpu { @@ -570,9 +571,9 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x5EE08400; // ADD D0, D0, D0 opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -592,9 +593,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -614,9 +615,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -636,9 +637,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -658,9 +659,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -680,9 +681,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -702,9 +703,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -722,9 +723,9 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x0E201C00; // AND V0.8B, V0.8B, V0.8B opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -742,9 +743,9 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x4E201C00; // AND V0.16B, V0.16B, V0.16B opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -762,9 +763,9 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x0E601C00; // BIC V0.8B, V0.8B, V0.8B opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -782,9 +783,9 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x4E601C00; // BIC V0.16B, V0.16B, V0.16B opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -802,9 +803,9 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x2EE01C00; // BIF V0.8B, V0.8B, V0.8B opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -822,9 +823,9 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x6EE01C00; // BIF V0.16B, V0.16B, V0.16B opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -842,9 +843,9 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x2EA01C00; // BIT V0.8B, V0.8B, V0.8B opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -862,9 +863,9 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x6EA01C00; // BIT V0.16B, V0.16B, V0.16B opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -882,9 +883,9 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x2E601C00; // BSL V0.8B, V0.8B, V0.8B opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -902,9 +903,9 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x6E601C00; // BSL V0.16B, V0.16B, V0.16B opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -922,9 +923,9 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x7EE08C00; // CMEQ D0, D0, D0 opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -944,9 +945,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -966,9 +967,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -986,9 +987,9 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x5EE03C00; // CMGE D0, D0, D0 opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -1008,9 +1009,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -1030,9 +1031,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -1050,9 +1051,9 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x5EE03400; // CMGT D0, D0, D0 opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -1072,9 +1073,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -1094,9 +1095,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -1114,9 +1115,9 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x7EE03400; // CMHI D0, D0, D0 opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -1136,9 +1137,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -1158,9 +1159,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -1178,9 +1179,9 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x7EE03C00; // CMHS D0, D0, D0 opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -1200,9 +1201,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -1222,9 +1223,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -1242,9 +1243,9 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x5EE08C00; // CMTST D0, D0, D0 opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -1264,9 +1265,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -1286,9 +1287,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -1306,9 +1307,9 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x2E201C00; // EOR V0.8B, V0.8B, V0.8B opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -1326,9 +1327,9 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x6E201C00; // EOR V0.16B, V0.16B, V0.16B opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -1341,9 +1342,9 @@ namespace Ryujinx.Tests.Cpu [ValueSource("_1S_F_")] ulong b) { ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -1361,9 +1362,9 @@ namespace Ryujinx.Tests.Cpu [ValueSource("_1D_F_")] ulong b) { ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE1(z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE1(z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -1388,9 +1389,9 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a * q); - Vector128 v2 = MakeVectorE0E1(b, b * q); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a * q); + V128 v2 = MakeVectorE0E1(b, b * q); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -1413,9 +1414,9 @@ namespace Ryujinx.Tests.Cpu { opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -1433,9 +1434,9 @@ namespace Ryujinx.Tests.Cpu [ValueSource("_1S_F_")] ulong b) { ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -1452,9 +1453,9 @@ namespace Ryujinx.Tests.Cpu [ValueSource("_1D_F_")] ulong b) { ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE1(z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE1(z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -1478,9 +1479,9 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a * q); - Vector128 v2 = MakeVectorE0E1(b, b * q); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a * q); + V128 v2 = MakeVectorE0E1(b, b * q); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -1502,9 +1503,9 @@ namespace Ryujinx.Tests.Cpu { opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -1520,8 +1521,8 @@ namespace Ryujinx.Tests.Cpu [ValueSource("_1S_F_")] ulong a, [ValueSource("_1S_F_")] ulong b) { - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); bool v = TestContext.CurrentContext.Random.NextBool(); bool c = TestContext.CurrentContext.Random.NextBool(); @@ -1538,8 +1539,8 @@ namespace Ryujinx.Tests.Cpu [ValueSource("_1D_F_")] ulong a, [ValueSource("_1D_F_")] ulong b) { - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); bool v = TestContext.CurrentContext.Random.NextBool(); bool c = TestContext.CurrentContext.Random.NextBool(); @@ -1558,10 +1559,10 @@ namespace Ryujinx.Tests.Cpu [ValueSource("_1S_F_")] ulong c) { ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); - Vector128 v3 = MakeVectorE0(c); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); + V128 v3 = MakeVectorE0(c); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -1580,10 +1581,10 @@ namespace Ryujinx.Tests.Cpu [ValueSource("_1D_F_")] ulong c) { ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE1(z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); - Vector128 v3 = MakeVectorE0(c); + V128 v0 = MakeVectorE1(z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); + V128 v3 = MakeVectorE0(c); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -1601,9 +1602,9 @@ namespace Ryujinx.Tests.Cpu [ValueSource("_1S_F_")] ulong b) { ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -1621,9 +1622,9 @@ namespace Ryujinx.Tests.Cpu [ValueSource("_1D_F_")] ulong b) { ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE1(z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE1(z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -1648,9 +1649,9 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a * q); - Vector128 v2 = MakeVectorE0E1(b, b * q); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a * q); + V128 v2 = MakeVectorE0E1(b, b * q); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -1673,9 +1674,9 @@ namespace Ryujinx.Tests.Cpu { opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -1700,9 +1701,9 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a * q); - Vector128 v2 = MakeVectorE0E1(b, b * q); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a * q); + V128 v2 = MakeVectorE0E1(b, b * q); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -1725,9 +1726,9 @@ namespace Ryujinx.Tests.Cpu { opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -1745,9 +1746,9 @@ namespace Ryujinx.Tests.Cpu [ValueSource("_1S_F_")] ulong b) { ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -1765,9 +1766,9 @@ namespace Ryujinx.Tests.Cpu [ValueSource("_1D_F_")] ulong b) { ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE1(z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE1(z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -1792,9 +1793,9 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a * q); - Vector128 v2 = MakeVectorE0E1(b, b * q); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a * q); + V128 v2 = MakeVectorE0E1(b, b * q); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -1817,9 +1818,9 @@ namespace Ryujinx.Tests.Cpu { opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -1844,9 +1845,9 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2); @@ -1866,9 +1867,9 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2); @@ -1886,9 +1887,9 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x0EE01C00; // ORN V0.8B, V0.8B, V0.8B opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -1906,9 +1907,9 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x4EE01C00; // ORN V0.16B, V0.16B, V0.16B opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -1926,9 +1927,9 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x0EA01C00; // ORR V0.8B, V0.8B, V0.8B opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -1946,9 +1947,9 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x4EA01C00; // ORR V0.16B, V0.16B, V0.16B opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -1968,9 +1969,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -1990,9 +1991,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -2012,9 +2013,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -2034,9 +2035,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -2056,9 +2057,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -2078,9 +2079,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -2100,9 +2101,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -2122,9 +2123,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE1(a); - Vector128 v2 = MakeVectorE1(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE1(a); + V128 v2 = MakeVectorE1(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -2144,9 +2145,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -2166,9 +2167,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -2188,9 +2189,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -2210,9 +2211,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE1(a); - Vector128 v2 = MakeVectorE1(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE1(a); + V128 v2 = MakeVectorE1(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -2232,9 +2233,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -2254,9 +2255,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE1(a); - Vector128 v2 = MakeVectorE1(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE1(a); + V128 v2 = MakeVectorE1(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -2276,9 +2277,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -2298,9 +2299,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE1(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE1(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -2318,9 +2319,9 @@ namespace Ryujinx.Tests.Cpu { opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z0, z1); - Vector128 v1 = MakeVectorE0E1(a0, a1); - Vector128 v2 = MakeVectorE0E1(b0, b1); + V128 v0 = MakeVectorE0E1(z0, z1); + V128 v1 = MakeVectorE0E1(a0, a1); + V128 v2 = MakeVectorE0E1(b0, b1); SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2); @@ -2338,9 +2339,9 @@ namespace Ryujinx.Tests.Cpu { opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z0, z1); - Vector128 v1 = MakeVectorE0E1(a0, a1); - Vector128 v2 = MakeVectorE0E1(b0, b1); + V128 v0 = MakeVectorE0E1(z0, z1); + V128 v1 = MakeVectorE0E1(a0, a1); + V128 v2 = MakeVectorE0E1(b0, b1); SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2); @@ -2360,9 +2361,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -2382,9 +2383,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -2404,9 +2405,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -2426,9 +2427,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -2450,9 +2451,9 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((size & 3) << 22); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a * q); - Vector128 v2 = MakeVectorE0E1(b, b * q); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a * q); + V128 v2 = MakeVectorE0E1(b, b * q); SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2); @@ -2472,9 +2473,9 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2); @@ -2494,9 +2495,9 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE1(a); - Vector128 v2 = MakeVectorE1(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE1(a); + V128 v2 = MakeVectorE1(b); SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2); @@ -2516,9 +2517,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -2538,9 +2539,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -2560,9 +2561,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -2582,9 +2583,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -2604,9 +2605,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -2626,9 +2627,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -2648,9 +2649,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -2670,9 +2671,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -2692,9 +2693,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -2714,9 +2715,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -2736,9 +2737,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -2758,9 +2759,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -2780,9 +2781,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -2802,9 +2803,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -2824,9 +2825,9 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2); @@ -2846,9 +2847,9 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2); @@ -2868,9 +2869,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -2890,9 +2891,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE1(a); - Vector128 v2 = MakeVectorE1(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE1(a); + V128 v2 = MakeVectorE1(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -2912,9 +2913,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -2934,9 +2935,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE1(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE1(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -2954,9 +2955,9 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x7EE08400; // SUB D0, D0, D0 opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -2976,9 +2977,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -2998,9 +2999,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3020,9 +3021,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3042,9 +3043,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3064,9 +3065,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3086,9 +3087,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, ~z); - Vector128 v1 = MakeVectorE0E1(a, ~a); - Vector128 v2 = MakeVectorE0E1(b, ~b); + V128 v0 = MakeVectorE0E1(z, ~z); + V128 v1 = MakeVectorE0E1(a, ~a); + V128 v2 = MakeVectorE0E1(b, ~b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3108,9 +3109,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3130,9 +3131,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, ~z); - Vector128 v1 = MakeVectorE0E1(a, ~a); - Vector128 v2 = MakeVectorE0E1(b, ~b); + V128 v0 = MakeVectorE0E1(z, ~z); + V128 v1 = MakeVectorE0E1(a, ~a); + V128 v2 = MakeVectorE0E1(b, ~b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3152,9 +3153,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3174,9 +3175,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3196,9 +3197,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3218,9 +3219,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE1(a); - Vector128 v2 = MakeVectorE1(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE1(a); + V128 v2 = MakeVectorE1(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3240,9 +3241,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3262,9 +3263,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3284,9 +3285,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3306,9 +3307,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE1(a); - Vector128 v2 = MakeVectorE1(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE1(a); + V128 v2 = MakeVectorE1(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3328,9 +3329,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3350,9 +3351,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE1(a); - Vector128 v2 = MakeVectorE1(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE1(a); + V128 v2 = MakeVectorE1(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3372,9 +3373,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3394,9 +3395,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE1(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE1(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3416,9 +3417,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3438,9 +3439,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3460,9 +3461,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3482,9 +3483,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3504,9 +3505,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3526,9 +3527,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3548,9 +3549,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3570,9 +3571,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3592,9 +3593,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3614,9 +3615,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3636,9 +3637,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3658,9 +3659,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3680,9 +3681,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3702,9 +3703,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE1(a); - Vector128 v2 = MakeVectorE1(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE1(a); + V128 v2 = MakeVectorE1(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3724,9 +3725,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3746,9 +3747,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE1(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE1(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3768,9 +3769,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3790,9 +3791,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, ~z); - Vector128 v1 = MakeVectorE0E1(a, ~a); - Vector128 v2 = MakeVectorE0E1(b, ~b); + V128 v0 = MakeVectorE0E1(z, ~z); + V128 v1 = MakeVectorE0E1(a, ~a); + V128 v2 = MakeVectorE0E1(b, ~b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3812,9 +3813,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3834,9 +3835,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, ~z); - Vector128 v1 = MakeVectorE0E1(a, ~a); - Vector128 v2 = MakeVectorE0E1(b, ~b); + V128 v0 = MakeVectorE0E1(z, ~z); + V128 v1 = MakeVectorE0E1(a, ~a); + V128 v2 = MakeVectorE0E1(b, ~b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3856,9 +3857,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3878,9 +3879,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, ~z); - Vector128 v1 = MakeVectorE0E1(a, ~a); - Vector128 v2 = MakeVectorE0E1(b, ~b); + V128 v0 = MakeVectorE0E1(z, ~z); + V128 v1 = MakeVectorE0E1(a, ~a); + V128 v2 = MakeVectorE0E1(b, ~b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3900,9 +3901,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0(b); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); @@ -3922,9 +3923,9 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= ((size & 3) << 22); - Vector128 v0 = MakeVectorE0E1(z, ~z); - Vector128 v1 = MakeVectorE0E1(a, ~a); - Vector128 v2 = MakeVectorE0E1(b, ~b); + V128 v0 = MakeVectorE0E1(z, ~z); + V128 v1 = MakeVectorE0E1(a, ~a); + V128 v2 = MakeVectorE0E1(b, ~b); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdRegElem.cs b/Ryujinx.Tests/Cpu/CpuTestSimdRegElem.cs index 64f9bc6cc..23e0e3646 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdRegElem.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdRegElem.cs @@ -1,8 +1,8 @@ #define SimdRegElem -using NUnit.Framework; +using ARMeilleure.State; -using System.Runtime.Intrinsics; +using NUnit.Framework; namespace Ryujinx.Tests.Cpu { @@ -95,9 +95,9 @@ namespace Ryujinx.Tests.Cpu opcodes |= (l << 21) | (m << 20) | (h << 11); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a * q); - Vector128 v2 = MakeVectorE0E1(b, b * h); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a * q); + V128 v2 = MakeVectorE0E1(b, b * h); SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2); @@ -122,9 +122,9 @@ namespace Ryujinx.Tests.Cpu opcodes |= (l << 21) | (h << 11); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a * q); - Vector128 v2 = MakeVectorE0E1(b, b * h); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a * q); + V128 v2 = MakeVectorE0E1(b, b * h); SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2); @@ -150,9 +150,9 @@ namespace Ryujinx.Tests.Cpu opcodes |= (l << 21) | (m << 20) | (h << 11); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul); - Vector128 v2 = MakeVectorE0E1(b, b * h); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul); + V128 v2 = MakeVectorE0E1(b, b * h); SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2); @@ -177,9 +177,9 @@ namespace Ryujinx.Tests.Cpu opcodes |= (l << 21) | (h << 11); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul); - Vector128 v2 = MakeVectorE0E1(b, b * h); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul); + V128 v2 = MakeVectorE0E1(b, b * h); SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2); diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdRegElemF.cs b/Ryujinx.Tests/Cpu/CpuTestSimdRegElemF.cs index 51027195b..38197fd5f 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdRegElemF.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdRegElemF.cs @@ -1,9 +1,10 @@ #define SimdRegElemF +using ARMeilleure.State; + using NUnit.Framework; using System.Collections.Generic; -using System.Runtime.Intrinsics; namespace Ryujinx.Tests.Cpu { @@ -230,9 +231,9 @@ namespace Ryujinx.Tests.Cpu opcodes |= (l << 21) | (h << 11); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0E1(b, b * h); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0E1(b, b * h); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -255,9 +256,9 @@ namespace Ryujinx.Tests.Cpu opcodes |= h << 11; - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0E1(b, b * h); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0E1(b, b * h); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -287,9 +288,9 @@ namespace Ryujinx.Tests.Cpu opcodes |= (l << 21) | (h << 11); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a * q); - Vector128 v2 = MakeVectorE0E1(b, b * h); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a * q); + V128 v2 = MakeVectorE0E1(b, b * h); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -316,9 +317,9 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= h << 11; - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b * h); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b * h); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -342,9 +343,9 @@ namespace Ryujinx.Tests.Cpu opcodes |= (l << 21) | (h << 11); ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0E1(b, b * h); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0E1(b, b * h); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -367,9 +368,9 @@ namespace Ryujinx.Tests.Cpu opcodes |= h << 11; ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE1(z); - Vector128 v1 = MakeVectorE0(a); - Vector128 v2 = MakeVectorE0E1(b, b * h); + V128 v0 = MakeVectorE1(z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0E1(b, b * h); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -399,9 +400,9 @@ namespace Ryujinx.Tests.Cpu opcodes |= (l << 21) | (h << 11); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a * q); - Vector128 v2 = MakeVectorE0E1(b, b * h); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a * q); + V128 v2 = MakeVectorE0E1(b, b * h); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -428,9 +429,9 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= h << 11; - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); - Vector128 v2 = MakeVectorE0E1(b, b * h); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); + V128 v2 = MakeVectorE0E1(b, b * h); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs b/Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs index 54ed044d9..fbbc9f9fb 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs @@ -1,10 +1,11 @@ #define SimdShImm +using ARMeilleure.State; + using NUnit.Framework; using System; using System.Collections.Generic; -using System.Runtime.Intrinsics; namespace Ryujinx.Tests.Cpu { @@ -488,8 +489,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= (immHb << 16); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a * q); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a * q); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -509,8 +510,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= (immHb << 16); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -532,8 +533,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= (immHb << 16); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a * q); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a * q); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -553,8 +554,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= (immHb << 16); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -574,8 +575,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= (immHb << 16); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -597,8 +598,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= (immHb << 16); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a * q); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a * q); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -620,8 +621,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= (immHb << 16); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a * q); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a * q); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -643,8 +644,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= (immHb << 16); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a * q); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a * q); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -664,8 +665,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= (immHb << 16); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -687,8 +688,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= (immHb << 16); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -710,8 +711,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= (immHb << 16); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -733,8 +734,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= (immHb << 16); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -754,8 +755,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= (immHb << 16); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -777,8 +778,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= (immHb << 16); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a * q); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a * q); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -800,8 +801,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= (immHb << 16); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a * q); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a * q); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -823,8 +824,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= (immHb << 16); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a * q); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a * q); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -844,8 +845,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= (immHb << 16); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -867,8 +868,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= (immHb << 16); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -890,8 +891,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= (immHb << 16); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -913,8 +914,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= (immHb << 16); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(a, a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, a); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -934,8 +935,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= (immHb << 16); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -955,8 +956,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= (immHb << 16); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -976,8 +977,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= (immHb << 16); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -999,8 +1000,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= (immHb << 16); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -1022,8 +1023,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= (immHb << 16); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcodes, v0: v0, v1: v1); @@ -1045,8 +1046,8 @@ namespace Ryujinx.Tests.Cpu opcodes |= (immHb << 16); opcodes |= ((q & 1) << 30); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); SingleOpcode(opcodes, v0: v0, v1: v1); diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdTbl.cs b/Ryujinx.Tests/Cpu/CpuTestSimdTbl.cs index 69195af20..5e6546aba 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdTbl.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdTbl.cs @@ -1,9 +1,10 @@ #define SimdTbl +using ARMeilleure.State; + using NUnit.Framework; using System.Collections.Generic; -using System.Runtime.Intrinsics; namespace Ryujinx.Tests.Cpu { @@ -146,9 +147,9 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((q & 1) << 30); ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(table0, table0); - Vector128 v2 = MakeVectorE0E1(indexes, q == 1u ? indexes : 0ul); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(table0, table0); + V128 v2 = MakeVectorE0E1(indexes, q == 1u ? indexes : 0ul); SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2); @@ -169,10 +170,10 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((q & 1) << 30); ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(table0, table0); - Vector128 v2 = MakeVectorE0E1(table1, table1); - Vector128 v3 = MakeVectorE0E1(indexes, q == 1u ? indexes : 0ul); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(table0, table0); + V128 v2 = MakeVectorE0E1(table1, table1); + V128 v3 = MakeVectorE0E1(indexes, q == 1u ? indexes : 0ul); SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2, v3: v3); @@ -193,10 +194,10 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((q & 1) << 30); ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v30 = MakeVectorE0E1(z, z); - Vector128 v31 = MakeVectorE0E1(table0, table0); - Vector128 v0 = MakeVectorE0E1(table1, table1); - Vector128 v1 = MakeVectorE0E1(indexes, indexes); + V128 v30 = MakeVectorE0E1(z, z); + V128 v31 = MakeVectorE0E1(table0, table0); + V128 v0 = MakeVectorE0E1(table1, table1); + V128 v1 = MakeVectorE0E1(indexes, indexes); SingleOpcode(opcodes, v0: v0, v1: v1, v30: v30, v31: v31); @@ -218,11 +219,11 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((q & 1) << 30); ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(table0, table0); - Vector128 v2 = MakeVectorE0E1(table1, table1); - Vector128 v3 = MakeVectorE0E1(table2, table2); - Vector128 v4 = MakeVectorE0E1(indexes, q == 1u ? indexes : 0ul); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(table0, table0); + V128 v2 = MakeVectorE0E1(table1, table1); + V128 v3 = MakeVectorE0E1(table2, table2); + V128 v4 = MakeVectorE0E1(indexes, q == 1u ? indexes : 0ul); SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2, v3: v3, v4: v4); @@ -244,11 +245,11 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((q & 1) << 30); ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v30 = MakeVectorE0E1(z, z); - Vector128 v31 = MakeVectorE0E1(table0, table0); - Vector128 v0 = MakeVectorE0E1(table1, table1); - Vector128 v1 = MakeVectorE0E1(table2, table2); - Vector128 v2 = MakeVectorE0E1(indexes, indexes); + V128 v30 = MakeVectorE0E1(z, z); + V128 v31 = MakeVectorE0E1(table0, table0); + V128 v0 = MakeVectorE0E1(table1, table1); + V128 v1 = MakeVectorE0E1(table2, table2); + V128 v2 = MakeVectorE0E1(indexes, indexes); SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2, v30: v30, v31: v31); @@ -271,12 +272,12 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((q & 1) << 30); ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0E1(table0, table0); - Vector128 v2 = MakeVectorE0E1(table1, table1); - Vector128 v3 = MakeVectorE0E1(table2, table2); - Vector128 v4 = MakeVectorE0E1(table3, table3); - Vector128 v5 = MakeVectorE0E1(indexes, q == 1u ? indexes : 0ul); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(table0, table0); + V128 v2 = MakeVectorE0E1(table1, table1); + V128 v3 = MakeVectorE0E1(table2, table2); + V128 v4 = MakeVectorE0E1(table3, table3); + V128 v5 = MakeVectorE0E1(indexes, q == 1u ? indexes : 0ul); SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2, v3: v3, v4: v4, v5: v5); @@ -299,12 +300,12 @@ namespace Ryujinx.Tests.Cpu opcodes |= ((q & 1) << 30); ulong z = TestContext.CurrentContext.Random.NextULong(); - Vector128 v30 = MakeVectorE0E1(z, z); - Vector128 v31 = MakeVectorE0E1(table0, table0); - Vector128 v0 = MakeVectorE0E1(table1, table1); - Vector128 v1 = MakeVectorE0E1(table2, table2); - Vector128 v2 = MakeVectorE0E1(table3, table3); - Vector128 v3 = MakeVectorE0E1(indexes, indexes); + V128 v30 = MakeVectorE0E1(z, z); + V128 v31 = MakeVectorE0E1(table0, table0); + V128 v0 = MakeVectorE0E1(table1, table1); + V128 v1 = MakeVectorE0E1(table2, table2); + V128 v2 = MakeVectorE0E1(table3, table3); + V128 v3 = MakeVectorE0E1(indexes, indexes); SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2, v3: v3, v30: v30, v31: v31); diff --git a/Ryujinx.Tests/Ryujinx.Tests.csproj b/Ryujinx.Tests/Ryujinx.Tests.csproj index fd305a4ad..0ebc81960 100644 --- a/Ryujinx.Tests/Ryujinx.Tests.csproj +++ b/Ryujinx.Tests/Ryujinx.Tests.csproj @@ -2,7 +2,7 @@ netcoreapp2.1 - win10-x64;osx-x64;linux-x64 + win-x64;osx-x64;linux-x64 Exe false @@ -30,12 +30,11 @@ - - + diff --git a/Ryujinx.sln b/Ryujinx.sln index b928a06d6..8177f8617 100644 --- a/Ryujinx.sln +++ b/Ryujinx.sln @@ -28,7 +28,7 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Common", "Ryujinx.C EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Profiler", "Ryujinx.Profiler\Ryujinx.Profiler.csproj", "{4E69B67F-8CA7-42CF-A9E1-CCB0915DFB34}" EndProject -Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Tests", "Tests", "{464D8AB7-B056-4A99-B207-B8DCFB47AAA9}" +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ARMeilleure", "ARMeilleure\ARMeilleure.csproj", "{ABF09A5E-2D8B-4B6F-A51D-5CE414DDB15A}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -130,10 +130,6 @@ Global GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection - GlobalSection(NestedProjects) = preSolution - {EBB55AEA-C7D7-4DEB-BF96-FA1789E225E9} = {464D8AB7-B056-4A99-B207-B8DCFB47AAA9} - {D8F72938-78EF-4E8C-BAFE-531C9C3C8F15} = {464D8AB7-B056-4A99-B207-B8DCFB47AAA9} - EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {110169B3-3328-4730-8AB0-BA05BEF75C1A} EndGlobalSection diff --git a/Ryujinx/Config.jsonc b/Ryujinx/Config.jsonc index 454c52ae8..2acb7f38d 100644 --- a/Ryujinx/Config.jsonc +++ b/Ryujinx/Config.jsonc @@ -21,7 +21,7 @@ // Enable printing guest logs "logging_enable_guest": true, - + // Enable printing FS access logs. fs_global_access_log_mode must be 2 or 3 "logging_enable_fs_access_log": false, @@ -53,8 +53,8 @@ // Sets the "GlobalAccessLogMode". Possible modes are 0-3 "fs_global_access_log_mode": 0, - // Enable or disable aggressive CPU optimizations - "enable_aggressive_cpu_opts": true, + // Use old ChocolArm64 ARM emulator + "enable_legacy_jit": false, // Enable or disable ignoring missing services, this may cause instability "ignore_missing_services": false, diff --git a/Ryujinx/Configuration.cs b/Ryujinx/Configuration.cs index c15fff2aa..7c9182052 100644 --- a/Ryujinx/Configuration.cs +++ b/Ryujinx/Configuration.cs @@ -1,3 +1,4 @@ +using ARMeilleure; using LibHac.Fs; using OpenTK.Input; using Ryujinx.Common; @@ -108,9 +109,9 @@ namespace Ryujinx public int FsGlobalAccessLogMode { get; private set; } /// - /// Enable or Disable aggressive CPU optimizations + /// Use old ChocolArm64 ARM emulator /// - public bool EnableAggressiveCpuOpts { get; private set; } + public bool EnableLegacyJit { get; private set; } /// /// Enable or disable ignoring missing services @@ -239,10 +240,7 @@ namespace Ryujinx device.System.GlobalAccessLogMode = Instance.FsGlobalAccessLogMode; - if (Instance.EnableAggressiveCpuOpts) - { - Optimizations.AssumeStrictAbiCompliance = true; - } + device.System.UseLegacyJit = Instance.EnableLegacyJit; ServiceConfiguration.IgnoreMissingServices = Instance.IgnoreMissingServices; diff --git a/Ryujinx/Ryujinx.csproj b/Ryujinx/Ryujinx.csproj index 4ff06fa07..80b03f46b 100644 --- a/Ryujinx/Ryujinx.csproj +++ b/Ryujinx/Ryujinx.csproj @@ -2,7 +2,7 @@ netcoreapp2.1 - win10-x64;osx-x64;linux-x64 + win-x64;osx-x64;linux-x64 Exe true Debug;Release;Profile Debug;Profile Release @@ -24,12 +24,12 @@ - + diff --git a/Ryujinx/_schema.json b/Ryujinx/_schema.json index cdaf52682..c1a64c674 100644 --- a/Ryujinx/_schema.json +++ b/Ryujinx/_schema.json @@ -20,7 +20,7 @@ "enable_multicore_scheduling", "enable_fs_integrity_checks", "fs_global_access_log_mode", - "enable_aggressive_cpu_opts", + "enable_legacy_jit", "controller_type", "enable_keyboard", "keyboard_controls", @@ -462,7 +462,7 @@ "$id": "#/properties/fs_global_access_log_mode", "type": "integer", "title": "Enable FS access log", - "description": "Enables FS access log output. Possible modes are 0-3. Modes 2 and 3 output to the console.", + "description": "Enables FS access log output. Possible modes are 0-3. Modes 2 and 3 output to the console", "default": 0, "minimum": 0, "examples": [ @@ -472,12 +472,12 @@ 3 ] }, - "enable_aggressive_cpu_opts": { - "$id": "#/properties/enable_aggressive_cpu_opts", + "enable_legacy_jit": { + "$id": "#/properties/enable_legacy_jit", "type": "boolean", - "title": "Enable Aggressive CPU Optimizations", - "description": "Enable or disable aggressive CPU optimizations", - "default": true, + "title": "Enable legacy JIT", + "description": "Use old ChocolArm64 ARM emulator", + "default": false, "examples": [ true, false