From 69093cf2d69490862aff974f170cee63a0016fd0 Mon Sep 17 00:00:00 2001 From: FICTURE7 Date: Sat, 9 Oct 2021 01:15:44 +0400 Subject: [PATCH] Optimize LSRA (#2563) * Optimize `TryAllocateRegWithtoutSpill` a bit * Add a fast path for when all registers are live. * Do not query `GetOverlapPosition` if the register is already in use (i.e: free position is 0). * Do not allocate child split list if not parent * Turn `LiveRange` into a reference struct `LiveRange` is now a reference wrapping struct like `Operand` and `Operation`. It has also been changed into a singly linked-list. In micro-benchmarks traversing the linked-list was faster than binary search on `List`. Even for quite large input sizes (e.g: 1,000,000), surprisingly. Could be because the code gen for traversing the linked-list is much much cleaner and there is no virtual dispatch happening when checking if intervals overlaps. * Turn `LiveInterval` into an iterator The LSRA allocates in forward order and never inspect previous `LiveInterval` once they are expired. Something similar can be done for the `LiveRange`s within the `LiveInterval`s themselves. The `LiveInterval` is turned into a iterator which expires `LiveRange` within it. The iterator is moved forward along with interval walking code, i.e: AllocateInterval(context, interval, cIndex). * Remove `LinearScanAllocator.Sources` Local methods are less susceptible to do allocations than lambdas. * Optimize `GetOverlapPosition(interval)` a bit Time complexity should be in O(n+m) instead of O(nm) now. * Optimize `NumberLocals` a bit Use the same idea as in `HybridAllocator` to store the visited state in the MSB of the Operand's value instead of using a `HashSet`. * Optimize `InsertSplitCopies` a bit Avoid allocating a redundant `CopyResolver`. * Optimize `InsertSplitCopiesAtEdges` a bit Avoid redundant allocations of `CopyResolver`. * Use stack allocation for `freePositions` Avoid redundant computations. * Add `UseList` Replace `SortedIntegerList` with an even more specialized data structure. It allocates memory on the arena allocators and does not require copying use positions when splitting it. * Turn `LiveInterval` into a reference struct `LiveInterval` is now a reference wrapping struct like `Operand` and `Operation`. The rationale behind turning this in a reference wrapping struct is because a `LiveInterval` is associated with each local variable, and these intervals may themselves be split further. I've seen translations having up to 8000 local variables. To make the `LiveInterval` unmanaged, a new data structure called `LiveIntervalList` was added to store child splits. This differs from `SortedList<,>` because it can contain intervals with the same start position. Really wished we got some more of C++ template in C#. :^( * Optimize `GetChildSplit` a bit No need to inspect the remaining ranges if we've reached a range which starts after position, since the split list is ordered. * Optimize `CopyResolver` a bit Lazily allocate the fill, spill and parallel copy structures since most of the time only one of them is needed. * Optimize `BitMap.Enumerator` a bit Marking `MoveNext` as `AggressiveInlining` allows RyuJIT to promote the `Enumerator` struct into registers completely, reducing load/store code a lot since it does not have to store the struct on the stack for ABI purposes. * Use stack allocation for `use/blockedPositions` * Optimize `AllocateWithSpill` a bit * Address feedback * Make `LiveInterval.AddRange(,)` more conservative Produces no diff against master, but just for good measure. --- ARMeilleure/Allocators.cs | 4 + .../RegisterAllocators/CopyResolver.cs | 51 +- .../RegisterAllocators/LinearScanAllocator.cs | 386 ++++++++------- .../RegisterAllocators/LiveInterval.cs | 446 +++++++++--------- .../RegisterAllocators/LiveIntervalList.cs | 40 ++ .../CodeGen/RegisterAllocators/LiveRange.cs | 73 ++- .../CodeGen/RegisterAllocators/UseList.cs | 84 ++++ ARMeilleure/Common/BitMap.cs | 14 +- ARMeilleure/Common/SortedIntegerList.cs | 73 --- 9 files changed, 675 insertions(+), 496 deletions(-) create mode 100644 ARMeilleure/CodeGen/RegisterAllocators/LiveIntervalList.cs create mode 100644 ARMeilleure/CodeGen/RegisterAllocators/UseList.cs delete mode 100644 ARMeilleure/Common/SortedIntegerList.cs diff --git a/ARMeilleure/Allocators.cs b/ARMeilleure/Allocators.cs index df762f4c4..deabf9a26 100644 --- a/ARMeilleure/Allocators.cs +++ b/ARMeilleure/Allocators.cs @@ -10,11 +10,15 @@ namespace ARMeilleure [ThreadStatic] private static ArenaAllocator _operands; [ThreadStatic] private static ArenaAllocator _operations; [ThreadStatic] private static ArenaAllocator _references; + [ThreadStatic] private static ArenaAllocator _liveRanges; + [ThreadStatic] private static ArenaAllocator _liveIntervals; public static ArenaAllocator Default => GetAllocator(ref _default, 256 * 1024, 4); public static ArenaAllocator Operands => GetAllocator(ref _operands, 64 * 1024, 8); public static ArenaAllocator Operations => GetAllocator(ref _operations, 64 * 1024, 8); public static ArenaAllocator References => GetAllocator(ref _references, 64 * 1024, 8); + public static ArenaAllocator LiveRanges => GetAllocator(ref _liveRanges, 64 * 1024, 8); + public static ArenaAllocator LiveIntervals => GetAllocator(ref _liveIntervals, 64 * 1024, 8); [MethodImpl(MethodImplOptions.AggressiveInlining)] private static ArenaAllocator GetAllocator(ref ArenaAllocator alloc, uint pageSize, uint pageCount) diff --git a/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs b/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs index cc731b74f..df4b6db1b 100644 --- a/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs +++ b/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs @@ -1,6 +1,7 @@ using ARMeilleure.IntermediateRepresentation; using System; using System.Collections.Generic; + using static ARMeilleure.IntermediateRepresentation.Operand.Factory; using static ARMeilleure.IntermediateRepresentation.Operation.Factory; @@ -25,7 +26,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators } } - private List _copies; + private readonly List _copies; public int Count => _copies.Count; @@ -146,21 +147,12 @@ namespace ARMeilleure.CodeGen.RegisterAllocators } } - private Queue _fillQueue = new Queue(); - private Queue _spillQueue = new Queue(); - - private ParallelCopy _parallelCopy; + private Queue _fillQueue = null; + private Queue _spillQueue = null; + private ParallelCopy _parallelCopy = null; public bool HasCopy { get; private set; } - public CopyResolver() - { - _fillQueue = new Queue(); - _spillQueue = new Queue(); - - _parallelCopy = new ParallelCopy(); - } - public void AddSplit(LiveInterval left, LiveInterval right) { if (left.Local != right.Local) @@ -194,8 +186,12 @@ namespace ARMeilleure.CodeGen.RegisterAllocators private void AddSplitFill(LiveInterval left, LiveInterval right, OperandType type) { - Operand register = GetRegister(right.Register, type); + if (_fillQueue == null) + { + _fillQueue = new Queue(); + } + Operand register = GetRegister(right.Register, type); Operand offset = Const(left.SpillOffset); _fillQueue.Enqueue(Operation(Instruction.Fill, register, offset)); @@ -205,8 +201,12 @@ namespace ARMeilleure.CodeGen.RegisterAllocators private void AddSplitSpill(LiveInterval left, LiveInterval right, OperandType type) { - Operand offset = Const(right.SpillOffset); + if (_spillQueue == null) + { + _spillQueue = new Queue(); + } + Operand offset = Const(right.SpillOffset); Operand register = GetRegister(left.Register, type); _spillQueue.Enqueue(Operation(Instruction.Spill, default, offset, register)); @@ -216,6 +216,11 @@ namespace ARMeilleure.CodeGen.RegisterAllocators private void AddSplitCopy(LiveInterval left, LiveInterval right, OperandType type) { + if (_parallelCopy == null) + { + _parallelCopy = new ParallelCopy(); + } + _parallelCopy.AddCopy(right.Register, left.Register, type); HasCopy = true; @@ -225,16 +230,22 @@ namespace ARMeilleure.CodeGen.RegisterAllocators { List sequence = new List(); - while (_spillQueue.TryDequeue(out Operation spillOp)) + if (_spillQueue != null) { - sequence.Add(spillOp); + while (_spillQueue.TryDequeue(out Operation spillOp)) + { + sequence.Add(spillOp); + } } - _parallelCopy.Sequence(sequence); + _parallelCopy?.Sequence(sequence); - while (_fillQueue.TryDequeue(out Operation fillOp)) + if (_fillQueue != null) { - sequence.Add(fillOp); + while (_fillQueue.TryDequeue(out Operation fillOp)) + { + sequence.Add(fillOp); + } } return sequence.ToArray(); diff --git a/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs b/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs index fd1420a2e..d8a40365b 100644 --- a/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs +++ b/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs @@ -20,17 +20,13 @@ namespace ARMeilleure.CodeGen.RegisterAllocators private const int RegistersCount = 16; private HashSet _blockEdges; - private LiveRange[] _blockRanges; - private BitMap[] _blockLiveIn; private List _intervals; - private LiveInterval[] _parentIntervals; private List<(IntrusiveList, Operation)> _operationNodes; - private int _operationsCount; private class AllocationContext @@ -45,6 +41,11 @@ namespace ARMeilleure.CodeGen.RegisterAllocators public int IntUsedRegisters { get; set; } public int VecUsedRegisters { get; set; } + private readonly int[] _intFreePositions; + private readonly int[] _vecFreePositions; + private readonly int _intFreePositionsCount; + private readonly int _vecFreePositionsCount; + public AllocationContext(StackAllocator stackAlloc, RegisterMasks masks, int intervalsCount) { StackAlloc = stackAlloc; @@ -52,6 +53,43 @@ namespace ARMeilleure.CodeGen.RegisterAllocators Active = new BitMap(Allocators.Default, intervalsCount); Inactive = new BitMap(Allocators.Default, intervalsCount); + + PopulateFreePositions(RegisterType.Integer, out _intFreePositions, out _intFreePositionsCount); + PopulateFreePositions(RegisterType.Vector, out _vecFreePositions, out _vecFreePositionsCount); + + void PopulateFreePositions(RegisterType type, out int[] positions, out int count) + { + positions = new int[RegistersCount]; + count = BitOperations.PopCount((uint)masks.GetAvailableRegisters(type)); + + int mask = masks.GetAvailableRegisters(type); + + for (int i = 0; i < positions.Length; i++) + { + if ((mask & (1 << i)) != 0) + { + positions[i] = int.MaxValue; + } + } + } + } + + public void GetFreePositions(RegisterType type, in Span positions, out int count) + { + if (type == RegisterType.Integer) + { + _intFreePositions.CopyTo(positions); + + count = _intFreePositionsCount; + } + else + { + Debug.Assert(type == RegisterType.Vector); + + _vecFreePositions.CopyTo(positions); + + count = _vecFreePositionsCount; + } } public void MoveActiveToInactive(int bit) @@ -132,6 +170,8 @@ namespace ARMeilleure.CodeGen.RegisterAllocators { LiveInterval interval = _intervals[iIndex]; + interval.Forward(current.GetStart()); + if (interval.GetEnd() < current.GetStart()) { context.Active.Clear(iIndex); @@ -147,6 +187,8 @@ namespace ARMeilleure.CodeGen.RegisterAllocators { LiveInterval interval = _intervals[iIndex]; + interval.Forward(current.GetStart()); + if (interval.GetEnd() < current.GetStart()) { context.Inactive.Clear(iIndex); @@ -167,45 +209,48 @@ namespace ARMeilleure.CodeGen.RegisterAllocators { RegisterType regType = current.Local.Type.ToRegisterType(); - int availableRegisters = context.Masks.GetAvailableRegisters(regType); + Span freePositions = stackalloc int[RegistersCount]; - int[] freePositions = new int[RegistersCount]; - - for (int index = 0; index < RegistersCount; index++) - { - if ((availableRegisters & (1 << index)) != 0) - { - freePositions[index] = int.MaxValue; - } - } + context.GetFreePositions(regType, freePositions, out int freePositionsCount); foreach (int iIndex in context.Active) { LiveInterval interval = _intervals[iIndex]; + Register reg = interval.Register; - if (interval.Register.Type == regType) + if (reg.Type == regType) { - freePositions[interval.Register.Index] = 0; + freePositions[reg.Index] = 0; + freePositionsCount--; } } + // If all registers are already active, return early. No point in inspecting the inactive set to look for + // holes. + if (freePositionsCount == 0) + { + return false; + } + foreach (int iIndex in context.Inactive) { LiveInterval interval = _intervals[iIndex]; + Register reg = interval.Register; - if (interval.Register.Type == regType) + ref int freePosition = ref freePositions[reg.Index]; + + if (reg.Type == regType && freePosition != 0) { int overlapPosition = interval.GetOverlapPosition(current); - if (overlapPosition != LiveInterval.NotFound && freePositions[interval.Register.Index] > overlapPosition) + if (overlapPosition != LiveInterval.NotFound && freePosition > overlapPosition) { - freePositions[interval.Register.Index] = overlapPosition; + freePosition = overlapPosition; } } } int selectedReg = GetHighestValueIndex(freePositions); - int selectedNextUse = freePositions[selectedReg]; // Intervals starts and ends at odd positions, unless they span an entire @@ -227,8 +272,6 @@ namespace ARMeilleure.CodeGen.RegisterAllocators } else if (selectedNextUse < current.GetEnd()) { - Debug.Assert(selectedNextUse > current.GetStart(), "Trying to split interval at the start."); - LiveInterval splitChild = current.Split(selectedNextUse); if (splitChild.UsesCount != 0) @@ -263,44 +306,35 @@ namespace ARMeilleure.CodeGen.RegisterAllocators { RegisterType regType = current.Local.Type.ToRegisterType(); - int availableRegisters = context.Masks.GetAvailableRegisters(regType); + Span usePositions = stackalloc int[RegistersCount]; + Span blockedPositions = stackalloc int[RegistersCount]; - int[] usePositions = new int[RegistersCount]; - int[] blockedPositions = new int[RegistersCount]; - - for (int index = 0; index < RegistersCount; index++) - { - if ((availableRegisters & (1 << index)) != 0) - { - usePositions[index] = int.MaxValue; - - blockedPositions[index] = int.MaxValue; - } - } - - void SetUsePosition(int index, int position) - { - usePositions[index] = Math.Min(usePositions[index], position); - } - - void SetBlockedPosition(int index, int position) - { - blockedPositions[index] = Math.Min(blockedPositions[index], position); - - SetUsePosition(index, position); - } + context.GetFreePositions(regType, usePositions, out _); + context.GetFreePositions(regType, blockedPositions, out _); foreach (int iIndex in context.Active) { LiveInterval interval = _intervals[iIndex]; + Register reg = interval.Register; - if (!interval.IsFixed && interval.Register.Type == regType) + if (reg.Type == regType) { - int nextUse = interval.NextUseAfter(current.GetStart()); + ref int usePosition = ref usePositions[reg.Index]; + ref int blockedPosition = ref blockedPositions[reg.Index]; - if (nextUse != -1) + if (interval.IsFixed) { - SetUsePosition(interval.Register.Index, nextUse); + usePosition = 0; + blockedPosition = 0; + } + else + { + int nextUse = interval.NextUseAfter(current.GetStart()); + + if (nextUse != LiveInterval.NotFound && usePosition > nextUse) + { + usePosition = nextUse; + } } } } @@ -308,45 +342,36 @@ namespace ARMeilleure.CodeGen.RegisterAllocators foreach (int iIndex in context.Inactive) { LiveInterval interval = _intervals[iIndex]; + Register reg = interval.Register; - if (!interval.IsFixed && interval.Register.Type == regType && interval.Overlaps(current)) + if (reg.Type == regType) { - int nextUse = interval.NextUseAfter(current.GetStart()); + ref int usePosition = ref usePositions[reg.Index]; + ref int blockedPosition = ref blockedPositions[reg.Index]; - if (nextUse != -1) + if (interval.IsFixed) { - SetUsePosition(interval.Register.Index, nextUse); + int overlapPosition = interval.GetOverlapPosition(current); + + if (overlapPosition != LiveInterval.NotFound) + { + blockedPosition = Math.Min(blockedPosition, overlapPosition); + usePosition = Math.Min(usePosition, overlapPosition); + } } - } - } - - foreach (int iIndex in context.Active) - { - LiveInterval interval = _intervals[iIndex]; - - if (interval.IsFixed && interval.Register.Type == regType) - { - SetBlockedPosition(interval.Register.Index, 0); - } - } - - foreach (int iIndex in context.Inactive) - { - LiveInterval interval = _intervals[iIndex]; - - if (interval.IsFixed && interval.Register.Type == regType) - { - int overlapPosition = interval.GetOverlapPosition(current); - - if (overlapPosition != LiveInterval.NotFound) + else if (interval.Overlaps(current)) { - SetBlockedPosition(interval.Register.Index, overlapPosition); + int nextUse = interval.NextUseAfter(current.GetStart()); + + if (nextUse != LiveInterval.NotFound && usePosition > nextUse) + { + usePosition = nextUse; + } } } } int selectedReg = GetHighestValueIndex(usePositions); - int currentFirstUse = current.FirstUse(); Debug.Assert(currentFirstUse >= 0, "Current interval has no uses."); @@ -405,24 +430,24 @@ namespace ARMeilleure.CodeGen.RegisterAllocators } } - private static int GetHighestValueIndex(int[] array) + private static int GetHighestValueIndex(Span span) { - int higuest = array[0]; + int highest = span[0]; - if (higuest == int.MaxValue) + if (highest == int.MaxValue) { return 0; } int selected = 0; - for (int index = 1; index < array.Length; index++) + for (int index = 1; index < span.Length; index++) { - int current = array[index]; + int current = span[index]; - if (higuest < current) + if (highest < current) { - higuest = current; + highest = current; selected = index; if (current == int.MaxValue) @@ -543,21 +568,21 @@ namespace ARMeilleure.CodeGen.RegisterAllocators CopyResolver GetCopyResolver(int position) { - CopyResolver copyResolver = new CopyResolver(); - - if (copyResolvers.TryAdd(position, copyResolver)) + if (!copyResolvers.TryGetValue(position, out CopyResolver copyResolver)) { - return copyResolver; + copyResolver = new CopyResolver(); + + copyResolvers.Add(position, copyResolver); } - return copyResolvers[position]; + return copyResolver; } foreach (LiveInterval interval in _intervals.Where(x => x.IsSplit)) { LiveInterval previous = interval; - foreach (LiveInterval splitChild in interval.SplitChilds()) + foreach (LiveInterval splitChild in interval.SplitChildren()) { int splitPosition = splitChild.GetStart(); @@ -607,6 +632,12 @@ namespace ARMeilleure.CodeGen.RegisterAllocators return block.Index >= blocksCount; } + // Reset iterators to beginning because GetSplitChild depends on the state of the iterator. + foreach (LiveInterval interval in _intervals) + { + interval.Reset(); + } + for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext) { if (IsSplitEdgeBlock(block)) @@ -629,7 +660,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators succIndex = successor.GetSuccessor(0).Index; } - CopyResolver copyResolver = new CopyResolver(); + CopyResolver copyResolver = null; foreach (int iIndex in _blockLiveIn[succIndex]) { @@ -646,13 +677,18 @@ namespace ARMeilleure.CodeGen.RegisterAllocators LiveInterval left = interval.GetSplitChild(lEnd); LiveInterval right = interval.GetSplitChild(rStart); - if (left != null && right != null && left != right) + if (left != default && right != default && left != right) { + if (copyResolver == null) + { + copyResolver = new CopyResolver(); + } + copyResolver.AddSplit(left, right); } } - if (!copyResolver.HasCopy) + if (copyResolver == null || !copyResolver.HasCopy) { continue; } @@ -699,10 +735,8 @@ namespace ARMeilleure.CodeGen.RegisterAllocators { Operand register = GetRegister(current); - IList usePositions = current.UsePositions(); - for (int i = usePositions.Count - 1; i >= 0; i--) + foreach (int usePosition in current.UsePositions()) { - int usePosition = -usePositions[i]; (_, Operation operation) = GetOperationNode(usePosition); for (int index = 0; index < operation.SourcesCount; index++) @@ -759,7 +793,6 @@ namespace ARMeilleure.CodeGen.RegisterAllocators private void NumberLocals(ControlFlowGraph cfg) { _operationNodes = new List<(IntrusiveList, Operation)>(); - _intervals = new List(); for (int index = 0; index < RegistersCount; index++) @@ -768,7 +801,18 @@ namespace ARMeilleure.CodeGen.RegisterAllocators _intervals.Add(new LiveInterval(new Register(index, RegisterType.Vector))); } - HashSet visited = new HashSet(); + // The "visited" state is stored in the MSB of the local's value. + const ulong VisitedMask = 1ul << 63; + + bool IsVisited(Operand local) + { + return (local.GetValueUnsafe() & VisitedMask) != 0; + } + + void SetVisited(Operand local) + { + local.GetValueUnsafe() |= VisitedMask; + } _operationsCount = 0; @@ -784,11 +828,13 @@ namespace ARMeilleure.CodeGen.RegisterAllocators { Operand dest = node.GetDestination(i); - if (dest.Kind == OperandKind.LocalVariable && visited.Add(dest)) + if (dest.Kind == OperandKind.LocalVariable && !IsVisited(dest)) { dest.NumberLocal(_intervals.Count); _intervals.Add(new LiveInterval(dest)); + + SetVisited(dest); } } } @@ -824,19 +870,45 @@ namespace ARMeilleure.CodeGen.RegisterAllocators for (Operation node = block.Operations.First; node != default; node = node.ListNext) { - Sources(node, (source) => + for (int i = 0; i < node.SourcesCount; i++) { - int id = GetOperandId(source); - - if (!liveKill.IsSet(id)) - { - liveGen.Set(id); - } - }); + VisitSource(node.GetSource(i)); + } for (int i = 0; i < node.DestinationsCount; i++) { - Operand dest = node.GetDestination(i); + VisitDestination(node.GetDestination(i)); + } + + void VisitSource(Operand source) + { + if (IsLocalOrRegister(source.Kind)) + { + int id = GetOperandId(source); + + if (!liveKill.IsSet(id)) + { + liveGen.Set(id); + } + } + else if (source.Kind == OperandKind.Memory) + { + MemoryOperand memOp = source.GetMemory(); + + if (memOp.BaseAddress != default) + { + VisitSource(memOp.BaseAddress); + } + + if (memOp.Index != default) + { + VisitSource(memOp.Index); + } + } + } + + void VisitDestination(Operand dest) + { liveKill.Set(GetOperandId(dest)); } } @@ -920,34 +992,65 @@ namespace ARMeilleure.CodeGen.RegisterAllocators continue; } - foreach (Operation node in BottomOperations(block)) + for (Operation node = block.Operations.Last; node != default; node = node.ListPrevious) { operationPos -= InstructionGap; for (int i = 0; i < node.DestinationsCount; i++) { - Operand dest = node.GetDestination(i); - LiveInterval interval = _intervals[GetOperandId(dest)]; - - interval.SetStart(operationPos + 1); - interval.AddUsePosition(operationPos + 1); + VisitDestination(node.GetDestination(i)); } - Sources(node, (source) => + for (int i = 0; i < node.SourcesCount; i++) { - LiveInterval interval = _intervals[GetOperandId(source)]; - - interval.AddRange(blockStart, operationPos + 1); - interval.AddUsePosition(operationPos); - }); + VisitSource(node.GetSource(i)); + } if (node.Instruction == Instruction.Call) { AddIntervalCallerSavedReg(context.Masks.IntCallerSavedRegisters, operationPos, RegisterType.Integer); AddIntervalCallerSavedReg(context.Masks.VecCallerSavedRegisters, operationPos, RegisterType.Vector); } + + void VisitSource(Operand source) + { + if (IsLocalOrRegister(source.Kind)) + { + LiveInterval interval = _intervals[GetOperandId(source)]; + + interval.AddRange(blockStart, operationPos + 1); + interval.AddUsePosition(operationPos); + } + else if (source.Kind == OperandKind.Memory) + { + MemoryOperand memOp = source.GetMemory(); + + if (memOp.BaseAddress != default) + { + VisitSource(memOp.BaseAddress); + } + + if (memOp.Index != default) + { + VisitSource(memOp.Index); + } + } + } + + void VisitDestination(Operand dest) + { + LiveInterval interval = _intervals[GetOperandId(dest)]; + + interval.SetStart(operationPos + 1); + interval.AddUsePosition(operationPos + 1); + } } } + + foreach (LiveInterval interval in _parentIntervals) + { + interval.Reset(); + } } private void AddIntervalCallerSavedReg(int mask, int operationPos, RegisterType regType) @@ -987,45 +1090,6 @@ namespace ARMeilleure.CodeGen.RegisterAllocators return (register.Index << 1) | (register.Type == RegisterType.Vector ? 1 : 0); } - private static IEnumerable BottomOperations(BasicBlock block) - { - Operation node = block.Operations.Last; - - while (node != default) - { - yield return node; - - node = node.ListPrevious; - } - } - - private static void Sources(Operation node, Action action) - { - for (int index = 0; index < node.SourcesCount; index++) - { - Operand source = node.GetSource(index); - - if (IsLocalOrRegister(source.Kind)) - { - action(source); - } - else if (source.Kind == OperandKind.Memory) - { - MemoryOperand memOp = source.GetMemory(); - - if (memOp.BaseAddress != default) - { - action(memOp.BaseAddress); - } - - if (memOp.Index != default) - { - action(memOp.Index); - } - } - } - } - private static bool IsLocalOrRegister(OperandKind kind) { return kind == OperandKind.LocalVariable || diff --git a/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs b/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs index be5876521..77ad95416 100644 --- a/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs +++ b/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs @@ -1,341 +1,291 @@ -using ARMeilleure.Common; using ARMeilleure.IntermediateRepresentation; using System; using System.Collections.Generic; using System.Diagnostics; -using System.Linq; namespace ARMeilleure.CodeGen.RegisterAllocators { - class LiveInterval : IComparable + unsafe readonly struct LiveInterval : IComparable { public const int NotFound = -1; - private LiveInterval _parent; - - private SortedIntegerList _usePositions; - - public int UsesCount => _usePositions.Count; - - private List _ranges; - - private SortedList _childs; - - public bool IsSplit => _childs.Count != 0; - - public Operand Local { get; } - - public Register Register { get; set; } - - public int SpillOffset { get; private set; } - - public bool IsSpilled => SpillOffset != -1; - public bool IsFixed { get; } - - public bool IsEmpty => _ranges.Count == 0; - - public LiveInterval(Operand local = default, LiveInterval parent = null) + private struct Data { - Local = local; - _parent = parent ?? this; + public int End; + public int SpillOffset; - _usePositions = new SortedIntegerList(); + public LiveRange FirstRange; + public LiveRange PrevRange; + public LiveRange CurrRange; - _ranges = new List(); + public LiveInterval Parent; - _childs = new SortedList(); + public UseList Uses; + public LiveIntervalList Children; + + public Operand Local; + public Register Register; + + public bool IsFixed; + } + + private readonly Data* _data; + + private ref int End => ref _data->End; + private ref LiveRange FirstRange => ref _data->FirstRange; + private ref LiveRange CurrRange => ref _data->CurrRange; + private ref LiveRange PrevRange => ref _data->PrevRange; + private ref LiveInterval Parent => ref _data->Parent; + private ref UseList Uses => ref _data->Uses; + private ref LiveIntervalList Children => ref _data->Children; + + public Operand Local => _data->Local; + public ref Register Register => ref _data->Register; + public ref int SpillOffset => ref _data->SpillOffset; + + public bool IsFixed => _data->IsFixed; + public bool IsEmpty => FirstRange == default; + public bool IsSplit => Children.Count != 0; + public bool IsSpilled => SpillOffset != -1; + + public int UsesCount => Uses.Count; + + public LiveInterval(Operand local = default, LiveInterval parent = default) + { + _data = Allocators.LiveIntervals.Allocate(); + *_data = default; + + _data->IsFixed = false; + _data->Local = local; + + Parent = parent == default ? this : parent; + Uses = new UseList(); + Children = new LiveIntervalList(); + + FirstRange = default; + CurrRange = default; + PrevRange = default; SpillOffset = -1; } - public LiveInterval(Register register) : this() + public LiveInterval(Register register) : this(local: default, parent: default) { - IsFixed = true; + _data->IsFixed = true; + Register = register; } - public void SetStart(int position) + public void Reset() { - if (_ranges.Count != 0) - { - Debug.Assert(position != _ranges[0].End); + PrevRange = default; + CurrRange = FirstRange; + } - _ranges[0] = new LiveRange(position, _ranges[0].End); - } - else + public void Forward(int position) + { + LiveRange prev = PrevRange; + LiveRange curr = CurrRange; + + while (curr != default && curr.Start < position && !curr.Overlaps(position)) { - _ranges.Add(new LiveRange(position, position + 1)); + prev = curr; + curr = curr.Next; } + + PrevRange = prev; + CurrRange = curr; } public int GetStart() { - if (_ranges.Count == 0) - { - throw new InvalidOperationException("Empty interval."); - } + Debug.Assert(!IsEmpty, "Empty LiveInterval cannot have a start position."); - return _ranges[0].Start; + return FirstRange.Start; } - public void SetEnd(int position) + public void SetStart(int position) { - if (_ranges.Count != 0) + if (FirstRange != default) { - int lastIdx = _ranges.Count - 1; + Debug.Assert(position != FirstRange.End); - Debug.Assert(position != _ranges[lastIdx].Start); - - _ranges[lastIdx] = new LiveRange(_ranges[lastIdx].Start, position); + FirstRange.Start = position; } else { - _ranges.Add(new LiveRange(position, position + 1)); + FirstRange = new LiveRange(position, position + 1); + End = position + 1; } } public int GetEnd() { - if (_ranges.Count == 0) - { - throw new InvalidOperationException("Empty interval."); - } + Debug.Assert(!IsEmpty, "Empty LiveInterval cannot have an end position."); - return _ranges[_ranges.Count - 1].End; + return End; } public void AddRange(int start, int end) { - if (start >= end) + Debug.Assert(start < end, $"Invalid range start position {start}, {end}"); + + if (FirstRange != default) { - throw new ArgumentException("Invalid range start position " + start + ", " + end); - } - - int index = _ranges.BinarySearch(new LiveRange(start, end)); - - if (index >= 0) - { - // New range insersects with an existing range, we need to remove - // all the intersecting ranges before adding the new one. - // We also extend the new range as needed, based on the values of - // the existing ranges being removed. - int lIndex = index; - int rIndex = index; - - while (lIndex > 0 && _ranges[lIndex - 1].End >= start) + // If the new range ends exactly where the first range start, then coalesce together. + if (end == FirstRange.Start) { - lIndex--; + FirstRange.Start = start; + + return; } - - while (rIndex + 1 < _ranges.Count && _ranges[rIndex + 1].Start <= end) + // If the new range is already contained, then coalesce together. + else if (FirstRange.Overlaps(start, end)) { - rIndex++; - } + FirstRange.Start = Math.Min(FirstRange.Start, start); + FirstRange.End = Math.Max(FirstRange.End, end); + End = Math.Max(End, end); - if (start > _ranges[lIndex].Start) - { - start = _ranges[lIndex].Start; - } - - if (end < _ranges[rIndex].End) - { - end = _ranges[rIndex].End; - } - - _ranges.RemoveRange(lIndex, (rIndex - lIndex) + 1); - - InsertRange(lIndex, start, end); - } - else - { - InsertRange(~index, start, end); - } - } - - private void InsertRange(int index, int start, int end) - { - // Here we insert a new range on the ranges list. - // If possible, we extend an existing range rather than inserting a new one. - // We can extend an existing range if any of the following conditions are true: - // - The new range starts right after the end of the previous range on the list. - // - The new range ends right before the start of the next range on the list. - // If both cases are true, we can extend either one. We prefer to extend the - // previous range, and then remove the next one, but theres no specific reason - // for that, extending either one will do. - int? extIndex = null; - - if (index > 0 && _ranges[index - 1].End == start) - { - start = _ranges[index - 1].Start; - - extIndex = index - 1; - } - - if (index < _ranges.Count && _ranges[index].Start == end) - { - end = _ranges[index].End; - - if (extIndex.HasValue) - { - _ranges.RemoveAt(index); - } - else - { - extIndex = index; + Debug.Assert(FirstRange.Next == default || !FirstRange.Overlaps(FirstRange.Next)); + return; } } - if (extIndex.HasValue) - { - _ranges[extIndex.Value] = new LiveRange(start, end); - } - else - { - _ranges.Insert(index, new LiveRange(start, end)); - } + FirstRange = new LiveRange(start, end, FirstRange); + End = Math.Max(End, end); + + Debug.Assert(FirstRange.Next == default || !FirstRange.Overlaps(FirstRange.Next)); } public void AddUsePosition(int position) { - // Inserts are in descending order, but ascending is faster for SortedIntegerList<>. - // We flip the ordering, then iterate backwards when using the final list. - _usePositions.Add(-position); + Uses.Add(position); } public bool Overlaps(int position) { - return _ranges.BinarySearch(new LiveRange(position, position + 1)) >= 0; - } + LiveRange curr = CurrRange; - public bool Overlaps(LiveInterval other) - { - foreach (LiveRange range in other._ranges) + while (curr != default && curr.Start <= position) { - if (_ranges.BinarySearch(range) >= 0) + if (curr.Overlaps(position)) { return true; } + + curr = curr.Next; } return false; } + public bool Overlaps(LiveInterval other) + { + return GetOverlapPosition(other) != NotFound; + } + public int GetOverlapPosition(LiveInterval other) { - foreach (LiveRange range in other._ranges) - { - int overlapIndex = _ranges.BinarySearch(range); + LiveRange a = CurrRange; + LiveRange b = other.CurrRange; - if (overlapIndex >= 0) + while (a != default) + { + while (b != default && b.Start < a.Start) { - // It's possible that we have multiple overlaps within a single interval, - // in this case, we pick the one with the lowest start position, since - // we return the first overlap position. - while (overlapIndex > 0 && _ranges[overlapIndex - 1].End > range.Start) + if (a.Overlaps(b)) { - overlapIndex--; + return a.Start; } - LiveRange overlappingRange = _ranges[overlapIndex]; - - return overlappingRange.Start; + b = b.Next; } + + if (b == default) + { + break; + } + else if (a.Overlaps(b)) + { + return a.Start; + } + + a = a.Next; } return NotFound; } - public IEnumerable SplitChilds() + public ReadOnlySpan SplitChildren() { - return _childs.Values; + return Parent.Children.Span; } - public IList UsePositions() + public ReadOnlySpan UsePositions() { - return _usePositions.GetList(); + return Uses.Span; } public int FirstUse() { - if (_usePositions.Count == 0) - { - return NotFound; - } - - return -_usePositions.Last(); + return Uses.FirstUse; } public int NextUseAfter(int position) { - int index = _usePositions.FindLessEqualIndex(-position); - return (index >= 0) ? -_usePositions[index] : NotFound; - } - - public void RemoveAfter(int position) - { - int index = _usePositions.FindLessEqualIndex(-position); - _usePositions.RemoveRange(0, index + 1); + return Uses.NextUse(position); } public LiveInterval Split(int position) { - LiveInterval right = new LiveInterval(Local, _parent); + LiveInterval result = new(Local, Parent); + result.End = End; - int splitIndex = 0; + LiveRange prev = PrevRange; + LiveRange curr = CurrRange; - for (; splitIndex < _ranges.Count; splitIndex++) + while (curr != default && curr.Start < position && !curr.Overlaps(position)) { - LiveRange range = _ranges[splitIndex]; - - if (position > range.Start && position < range.End) - { - right._ranges.Add(new LiveRange(position, range.End)); - - range = new LiveRange(range.Start, position); - - _ranges[splitIndex++] = range; - - break; - } - - if (range.Start >= position) - { - break; - } + prev = curr; + curr = curr.Next; } - if (splitIndex < _ranges.Count) + if (curr.Start >= position) { - int count = _ranges.Count - splitIndex; + prev.Next = default; - right._ranges.AddRange(_ranges.GetRange(splitIndex, count)); + result.FirstRange = curr; - _ranges.RemoveRange(splitIndex, count); + End = prev.End; + } + else + { + result.FirstRange = new LiveRange(position, curr.End, curr.Next); + + curr.End = position; + curr.Next = default; + + End = curr.End; } - int addAfter = _usePositions.FindLessEqualIndex(-position); - for (int index = addAfter; index >= 0; index--) - { - int usePosition = _usePositions[index]; - right._usePositions.Add(usePosition); - } + result.Uses = Uses.Split(position); - RemoveAfter(position); + AddSplitChild(result); - Debug.Assert(_ranges.Count != 0, "Left interval is empty after split."); + Debug.Assert(!IsEmpty, "Left interval is empty after split."); + Debug.Assert(!result.IsEmpty, "Right interval is empty after split."); - Debug.Assert(right._ranges.Count != 0, "Right interval is empty after split."); + // Make sure the iterator in the new split is pointing to the start. + result.Reset(); - AddSplitChild(right); - - return right; + return result; } private void AddSplitChild(LiveInterval child) { - Debug.Assert(!child.IsEmpty, "Trying to insert a empty interval."); + Debug.Assert(!child.IsEmpty, "Trying to insert an empty interval."); - _parent._childs.Add(child.GetStart(), child); + Parent.Children.Add(child); } public LiveInterval GetSplitChild(int position) @@ -345,20 +295,24 @@ namespace ARMeilleure.CodeGen.RegisterAllocators return this; } - foreach (LiveInterval splitChild in _childs.Values) + foreach (LiveInterval splitChild in SplitChildren()) { if (splitChild.Overlaps(position)) { return splitChild; } + else if (splitChild.GetStart() > position) + { + break; + } } - return null; + return default; } public bool TrySpillWithSiblingOffset() { - foreach (LiveInterval splitChild in _parent._childs.Values) + foreach (LiveInterval splitChild in SplitChildren()) { if (splitChild.IsSpilled) { @@ -376,19 +330,65 @@ namespace ARMeilleure.CodeGen.RegisterAllocators SpillOffset = offset; } - public int CompareTo(LiveInterval other) + public int CompareTo(LiveInterval interval) { - if (_ranges.Count == 0 || other._ranges.Count == 0) + if (FirstRange == default || interval.FirstRange == default) { - return _ranges.Count.CompareTo(other._ranges.Count); + return 0; } - return _ranges[0].Start.CompareTo(other._ranges[0].Start); + return GetStart().CompareTo(interval.GetStart()); + } + + public bool Equals(LiveInterval interval) + { + return interval._data == _data; + } + + public override bool Equals(object obj) + { + return obj is LiveInterval interval && Equals(interval); + } + + public static bool operator ==(LiveInterval a, LiveInterval b) + { + return a.Equals(b); + } + + public static bool operator !=(LiveInterval a, LiveInterval b) + { + return !a.Equals(b); + } + + public override int GetHashCode() + { + return HashCode.Combine((IntPtr)_data); } public override string ToString() { - return string.Join("; ", _ranges); + LiveInterval self = this; + + IEnumerable GetRanges() + { + LiveRange curr = self.CurrRange; + + while (curr != default) + { + if (curr == self.CurrRange) + { + yield return "*" + curr; + } + else + { + yield return curr.ToString(); + } + + curr = curr.Next; + } + } + + return string.Join(", ", GetRanges()); } } } \ No newline at end of file diff --git a/ARMeilleure/CodeGen/RegisterAllocators/LiveIntervalList.cs b/ARMeilleure/CodeGen/RegisterAllocators/LiveIntervalList.cs new file mode 100644 index 000000000..06b979ead --- /dev/null +++ b/ARMeilleure/CodeGen/RegisterAllocators/LiveIntervalList.cs @@ -0,0 +1,40 @@ +using System; + +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + unsafe struct LiveIntervalList + { + private LiveInterval* _items; + private int _count; + private int _capacity; + + public int Count => _count; + public Span Span => new(_items, _count); + + public void Add(LiveInterval interval) + { + if (_count + 1 > _capacity) + { + var oldSpan = Span; + + _capacity = Math.Max(4, _capacity * 2); + _items = Allocators.References.Allocate((uint)_capacity); + + var newSpan = Span; + + oldSpan.CopyTo(newSpan); + } + + int position = interval.GetStart(); + int i = _count - 1; + + while (i >= 0 && _items[i].GetStart() > position) + { + _items[i + 1] = _items[i--]; + } + + _items[i + 1] = interval; + _count++; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs b/ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs index b5faeffd5..e38b5190d 100644 --- a/ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs +++ b/ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs @@ -2,30 +2,73 @@ using System; namespace ARMeilleure.CodeGen.RegisterAllocators { - struct LiveRange : IComparable + unsafe readonly struct LiveRange : IEquatable { - public int Start { get; } - public int End { get; } - - public LiveRange(int start, int end) + private struct Data { - Start = start; - End = end; + public int Start; + public int End; + public LiveRange Next; } - public int CompareTo(LiveRange other) - { - if (Start < other.End && other.Start < End) - { - return 0; - } + private readonly Data* _data; - return Start.CompareTo(other.Start); + public ref int Start => ref _data->Start; + public ref int End => ref _data->End; + public ref LiveRange Next => ref _data->Next; + + public LiveRange(int start, int end, LiveRange next = default) + { + _data = Allocators.LiveRanges.Allocate(); + + Start = start; + End = end; + Next = next; + } + + public bool Overlaps(int start, int end) + { + return Start < end && start < End; + } + + public bool Overlaps(LiveRange range) + { + return Start < range.End && range.Start < End; + } + + public bool Overlaps(int position) + { + return position >= Start && position < End; + } + + public bool Equals(LiveRange range) + { + return range._data == _data; + } + + public override bool Equals(object obj) + { + return obj is LiveRange range && Equals(range); + } + + public static bool operator ==(LiveRange a, LiveRange b) + { + return a.Equals(b); + } + + public static bool operator !=(LiveRange a, LiveRange b) + { + return !a.Equals(b); + } + + public override int GetHashCode() + { + return HashCode.Combine((IntPtr)_data); } public override string ToString() { - return $"[{Start}, {End}["; + return $"[{Start}, {End})"; } } } \ No newline at end of file diff --git a/ARMeilleure/CodeGen/RegisterAllocators/UseList.cs b/ARMeilleure/CodeGen/RegisterAllocators/UseList.cs new file mode 100644 index 000000000..c89f0854d --- /dev/null +++ b/ARMeilleure/CodeGen/RegisterAllocators/UseList.cs @@ -0,0 +1,84 @@ +using System; + +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + unsafe struct UseList + { + private int* _items; + private int _capacity; + private int _count; + + public int Count => _count; + public int FirstUse => _count > 0 ? _items[_count - 1] : LiveInterval.NotFound; + public Span Span => new(_items, _count); + + public void Add(int position) + { + if (_count + 1 > _capacity) + { + var oldSpan = Span; + + _capacity = Math.Max(4, _capacity * 2); + _items = Allocators.Default.Allocate((uint)_capacity); + + var newSpan = Span; + + oldSpan.CopyTo(newSpan); + } + + // Use positions are usually inserted in descending order, so inserting in descending order is faster, + // since the number of half exchanges is reduced. + int i = _count - 1; + + while (i >= 0 && _items[i] < position) + { + _items[i + 1] = _items[i--]; + } + + _items[i + 1] = position; + _count++; + } + + public int NextUse(int position) + { + int index = NextUseIndex(position); + + return index != LiveInterval.NotFound ? _items[index] : LiveInterval.NotFound; + } + + public int NextUseIndex(int position) + { + int i = _count - 1; + + if (i == -1 || position > _items[0]) + { + return LiveInterval.NotFound; + } + + while (i >= 0 && _items[i] < position) + { + i--; + } + + return i; + } + + public UseList Split(int position) + { + int index = NextUseIndex(position); + + // Since the list is in descending order, the new split list takes the front of the list and the current + // list takes the back of the list. + UseList result = new(); + result._count = index + 1; + result._capacity = result._count; + result._items = _items; + + _count = _count - result._count; + _capacity = _count; + _items = _items + result._count; + + return result; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Common/BitMap.cs b/ARMeilleure/Common/BitMap.cs index 4872c442e..27ef031f3 100644 --- a/ARMeilleure/Common/BitMap.cs +++ b/ARMeilleure/Common/BitMap.cs @@ -2,6 +2,7 @@ using System; using System.Collections; using System.Collections.Generic; using System.Numerics; +using System.Runtime.CompilerServices; namespace ARMeilleure.Common { @@ -170,12 +171,12 @@ namespace ARMeilleure.Common public struct Enumerator : IEnumerator { - private int _index; + private long _index; private long _mask; private int _bit; private readonly BitMap _map; - public int Current => _index * IntSize + _bit; + public int Current => (int)_index * IntSize + _bit; object IEnumerator.Current => Current; public Enumerator(BitMap map) @@ -186,6 +187,7 @@ namespace ARMeilleure.Common _map = map; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool MoveNext() { if (_mask != 0) @@ -193,14 +195,18 @@ namespace ARMeilleure.Common _mask &= ~(1L << _bit); } + // Manually hoist these loads, because RyuJIT does not. + long count = (uint)_map._count; + long* masks = _map._masks; + while (_mask == 0) { - if (++_index >= _map._count) + if (++_index >= count) { return false; } - _mask = _map._masks[_index]; + _mask = masks[_index]; } _bit = BitOperations.TrailingZeroCount(_mask); diff --git a/ARMeilleure/Common/SortedIntegerList.cs b/ARMeilleure/Common/SortedIntegerList.cs deleted file mode 100644 index cceab62bf..000000000 --- a/ARMeilleure/Common/SortedIntegerList.cs +++ /dev/null @@ -1,73 +0,0 @@ -using System; -using System.Collections.Generic; - -namespace ARMeilleure.Common -{ - public class SortedIntegerList - { - private List _items; - - public int Count => _items.Count; - - public int this[int index] - { - get - { - return _items[index]; - } - set - { - _items[index] = value; - } - } - - public SortedIntegerList() - { - _items = new List(); - } - - public bool Add(int value) - { - if (_items.Count == 0 || value > Last()) - { - _items.Add(value); - return true; - } - else - { - int index = _items.BinarySearch(value); - if (index >= 0) - { - return false; - } - - _items.Insert(-1 - index, value); - return true; - } - } - - public int FindLessEqualIndex(int value) - { - int index = _items.BinarySearch(value); - return (index < 0) ? (-2 - index) : index; - } - - public void RemoveRange(int index, int count) - { - if (count > 0) - { - _items.RemoveRange(index, count); - } - } - - public int Last() - { - return _items[Count - 1]; - } - - public List GetList() - { - return _items; - } - } -}