Ryujinx/ARMeilleure/Translation/RegisterUsage.cs
FICTURE7 fbf40424f4
Add an early TailMerge pass (#2721)
* Add an early `TailMerge` pass

Some translations can have a lot of guest calls and since for each guest
call there is a call guard which may return. This can produce a lot of
epilogue code for returns. This pass merges the epilogue into a single
block.

```
Using filter 'hcq'.
Using metric 'code size'.

Total diff: -1648111 (-7.19 %) (bytes):
  Base: 22913847
  Diff: 21265736

Improved: 4567, regressed: 14, unchanged: 144
```

* Set PTC version

* Address feedback

* Handle `void` returning functions

* Actually handle `void` returning functions

* Fix `RegisterToLocal` logging
2021-10-18 19:51:22 -03:00

394 lines
No EOL
14 KiB
C#

using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.State;
using System;
using System.Numerics;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
namespace ARMeilleure.Translation
{
static class RegisterUsage
{
private const int RegsCount = 32;
private const int RegsMask = RegsCount - 1;
private struct RegisterMask : IEquatable<RegisterMask>
{
public long IntMask => Mask.GetElement(0);
public long VecMask => Mask.GetElement(1);
public Vector128<long> Mask { get; }
public RegisterMask(Vector128<long> mask)
{
Mask = mask;
}
public RegisterMask(long intMask, long vecMask)
{
Mask = Vector128.Create(intMask, vecMask);
}
public static RegisterMask operator &(RegisterMask x, RegisterMask y)
{
if (Sse2.IsSupported)
{
return new RegisterMask(Sse2.And(x.Mask, y.Mask));
}
return new RegisterMask(x.IntMask & y.IntMask, x.VecMask & y.VecMask);
}
public static RegisterMask operator |(RegisterMask x, RegisterMask y)
{
if (Sse2.IsSupported)
{
return new RegisterMask(Sse2.Or(x.Mask, y.Mask));
}
return new RegisterMask(x.IntMask | y.IntMask, x.VecMask | y.VecMask);
}
public static RegisterMask operator ~(RegisterMask x)
{
if (Sse2.IsSupported)
{
return new RegisterMask(Sse2.AndNot(x.Mask, Vector128<long>.AllBitsSet));
}
return new RegisterMask(~x.IntMask, ~x.VecMask);
}
public static bool operator ==(RegisterMask x, RegisterMask y)
{
return x.Equals(y);
}
public static bool operator !=(RegisterMask x, RegisterMask y)
{
return !x.Equals(y);
}
public override bool Equals(object obj)
{
return obj is RegisterMask regMask && Equals(regMask);
}
public bool Equals(RegisterMask other)
{
return Mask.Equals(other.Mask);
}
public override int GetHashCode()
{
return Mask.GetHashCode();
}
}
public static void RunPass(ControlFlowGraph cfg, ExecutionMode mode)
{
// Compute local register inputs and outputs used inside blocks.
RegisterMask[] localInputs = new RegisterMask[cfg.Blocks.Count];
RegisterMask[] localOutputs = new RegisterMask[cfg.Blocks.Count];
for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
{
for (Operation node = block.Operations.First; node != default; node = node.ListNext)
{
for (int index = 0; index < node.SourcesCount; index++)
{
Operand source = node.GetSource(index);
if (source.Kind == OperandKind.Register)
{
Register register = source.GetRegister();
localInputs[block.Index] |= GetMask(register) & ~localOutputs[block.Index];
}
}
if (node.Destination != default && node.Destination.Kind == OperandKind.Register)
{
localOutputs[block.Index] |= GetMask(node.Destination.GetRegister());
}
}
}
// Compute global register inputs and outputs used across blocks.
RegisterMask[] globalCmnOutputs = new RegisterMask[cfg.Blocks.Count];
RegisterMask[] globalInputs = new RegisterMask[cfg.Blocks.Count];
RegisterMask[] globalOutputs = new RegisterMask[cfg.Blocks.Count];
bool modified;
bool firstPass = true;
do
{
modified = false;
// Compute register outputs.
for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--)
{
BasicBlock block = cfg.PostOrderBlocks[index];
if (block.Predecessors.Count != 0 && !HasContextLoad(block))
{
BasicBlock predecessor = block.Predecessors[0];
RegisterMask cmnOutputs = localOutputs[predecessor.Index] | globalCmnOutputs[predecessor.Index];
RegisterMask outputs = globalOutputs[predecessor.Index];
for (int pIndex = 1; pIndex < block.Predecessors.Count; pIndex++)
{
predecessor = block.Predecessors[pIndex];
cmnOutputs &= localOutputs[predecessor.Index] | globalCmnOutputs[predecessor.Index];
outputs |= globalOutputs[predecessor.Index];
}
globalInputs[block.Index] |= outputs & ~cmnOutputs;
if (!firstPass)
{
cmnOutputs &= globalCmnOutputs[block.Index];
}
modified |= Exchange(globalCmnOutputs, block.Index, cmnOutputs);
outputs |= localOutputs[block.Index];
modified |= Exchange(globalOutputs, block.Index, globalOutputs[block.Index] | outputs);
}
else
{
modified |= Exchange(globalOutputs, block.Index, localOutputs[block.Index]);
}
}
// Compute register inputs.
for (int index = 0; index < cfg.PostOrderBlocks.Length; index++)
{
BasicBlock block = cfg.PostOrderBlocks[index];
RegisterMask inputs = localInputs[block.Index];
for (int i = 0; i < block.SuccessorsCount; i++)
{
inputs |= globalInputs[block.GetSuccessor(i).Index];
}
inputs &= ~globalCmnOutputs[block.Index];
modified |= Exchange(globalInputs, block.Index, globalInputs[block.Index] | inputs);
}
firstPass = false;
}
while (modified);
// Insert load and store context instructions where needed.
for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
{
bool hasContextLoad = HasContextLoad(block);
if (hasContextLoad)
{
block.Operations.Remove(block.Operations.First);
}
Operand arg = default;
// The only block without any predecessor should be the entry block.
// It always needs a context load as it is the first block to run.
if (block.Predecessors.Count == 0 || hasContextLoad)
{
long vecMask = globalInputs[block.Index].VecMask;
long intMask = globalInputs[block.Index].IntMask;
if (vecMask != 0 || intMask != 0)
{
arg = Local(OperandType.I64);
Operation loadArg = block.Operations.AddFirst(Operation(Instruction.LoadArgument, arg, Const(0)));
LoadLocals(block, vecMask, RegisterType.Vector, mode, loadArg, arg);
LoadLocals(block, intMask, RegisterType.Integer, mode, loadArg, arg);
}
}
bool hasContextStore = HasContextStore(block);
if (hasContextStore)
{
block.Operations.Remove(block.Operations.Last);
}
if (EndsWithReturn(block) || hasContextStore)
{
long vecMask = globalOutputs[block.Index].VecMask;
long intMask = globalOutputs[block.Index].IntMask;
if (vecMask != 0 || intMask != 0)
{
if (arg == default)
{
arg = Local(OperandType.I64);
block.Append(Operation(Instruction.LoadArgument, arg, Const(0)));
}
StoreLocals(block, intMask, RegisterType.Integer, mode, arg);
StoreLocals(block, vecMask, RegisterType.Vector, mode, arg);
}
}
}
}
private static bool HasContextLoad(BasicBlock block)
{
return StartsWith(block, Instruction.LoadFromContext) && block.Operations.First.SourcesCount == 0;
}
private static bool HasContextStore(BasicBlock block)
{
return EndsWith(block, Instruction.StoreToContext) && block.Operations.Last.SourcesCount == 0;
}
private static bool StartsWith(BasicBlock block, Instruction inst)
{
if (block.Operations.Count > 0)
{
Operation first = block.Operations.First;
return first != default && first.Instruction == inst;
}
return false;
}
private static bool EndsWith(BasicBlock block, Instruction inst)
{
if (block.Operations.Count > 0)
{
Operation last = block.Operations.Last;
return last != default && last.Instruction == inst;
}
return false;
}
private static RegisterMask GetMask(Register register)
{
long intMask = 0;
long vecMask = 0;
switch (register.Type)
{
case RegisterType.Flag: intMask = (1L << RegsCount) << register.Index; break;
case RegisterType.Integer: intMask = 1L << register.Index; break;
case RegisterType.FpFlag: vecMask = (1L << RegsCount) << register.Index; break;
case RegisterType.Vector: vecMask = 1L << register.Index; break;
}
return new RegisterMask(intMask, vecMask);
}
private static bool Exchange(RegisterMask[] masks, int blkIndex, RegisterMask value)
{
ref RegisterMask curValue = ref masks[blkIndex];
bool changed = curValue != value;
curValue = value;
return changed;
}
private static void LoadLocals(
BasicBlock block,
long inputs,
RegisterType baseType,
ExecutionMode mode,
Operation loadArg,
Operand arg)
{
while (inputs != 0)
{
int bit = 63 - BitOperations.LeadingZeroCount((ulong)inputs);
Operand dest = GetRegFromBit(bit, baseType, mode);
Operand offset = Const((long)NativeContext.GetRegisterOffset(dest.GetRegister()));
Operand addr = Local(OperandType.I64);
block.Operations.AddAfter(loadArg, Operation(Instruction.Load, dest, addr));
block.Operations.AddAfter(loadArg, Operation(Instruction.Add, addr, arg, offset));
inputs &= ~(1L << bit);
}
}
private static void StoreLocals(
BasicBlock block,
long outputs,
RegisterType baseType,
ExecutionMode mode,
Operand arg)
{
while (outputs != 0)
{
int bit = BitOperations.TrailingZeroCount(outputs);
Operand source = GetRegFromBit(bit, baseType, mode);
Operand offset = Const((long)NativeContext.GetRegisterOffset(source.GetRegister()));
Operand addr = Local(OperandType.I64);
block.Append(Operation(Instruction.Add, addr, arg, offset));
block.Append(Operation(Instruction.Store, default, addr, source));
outputs &= ~(1L << bit);
}
}
private static Operand GetRegFromBit(int bit, RegisterType baseType, ExecutionMode mode)
{
if (bit < RegsCount)
{
return Register(bit, baseType, GetOperandType(baseType, mode));
}
else if (baseType == RegisterType.Integer)
{
return Register(bit & RegsMask, RegisterType.Flag, OperandType.I32);
}
else if (baseType == RegisterType.Vector)
{
return Register(bit & RegsMask, RegisterType.FpFlag, OperandType.I32);
}
else
{
throw new ArgumentOutOfRangeException(nameof(bit));
}
}
private static OperandType GetOperandType(RegisterType type, ExecutionMode mode)
{
switch (type)
{
case RegisterType.Flag: return OperandType.I32;
case RegisterType.FpFlag: return OperandType.I32;
case RegisterType.Integer: return (mode == ExecutionMode.Aarch64) ? OperandType.I64 : OperandType.I32;
case RegisterType.Vector: return OperandType.V128;
}
throw new ArgumentException($"Invalid register type \"{type}\".");
}
private static bool EndsWithReturn(BasicBlock block)
{
Operation last = block.Operations.Last;
return last != default && last.Instruction == Instruction.Return;
}
}
}