271 lines
9.2 KiB
C#
271 lines
9.2 KiB
C#
|
using ARMeilleure.CodeGen.Optimizations;
|
||
|
using ARMeilleure.IntermediateRepresentation;
|
||
|
using ARMeilleure.Translation;
|
||
|
using System.Collections.Generic;
|
||
|
using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
|
||
|
using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
|
||
|
|
||
|
namespace ARMeilleure.CodeGen.Arm64
|
||
|
{
|
||
|
static class Arm64Optimizer
|
||
|
{
|
||
|
private const int MaxConstantUses = 10000;
|
||
|
|
||
|
public static void RunPass(ControlFlowGraph cfg)
|
||
|
{
|
||
|
var constants = new Dictionary<ulong, Operand>();
|
||
|
|
||
|
Operand GetConstantCopy(BasicBlock block, Operation operation, Operand source)
|
||
|
{
|
||
|
// If the constant has many uses, we also force a new constant mov to be added, in order
|
||
|
// to avoid overflow of the counts field (that is limited to 16 bits).
|
||
|
if (!constants.TryGetValue(source.Value, out var constant) || constant.UsesCount > MaxConstantUses)
|
||
|
{
|
||
|
constant = Local(source.Type);
|
||
|
|
||
|
Operation copyOp = Operation(Instruction.Copy, constant, source);
|
||
|
|
||
|
block.Operations.AddBefore(operation, copyOp);
|
||
|
|
||
|
constants[source.Value] = constant;
|
||
|
}
|
||
|
|
||
|
return constant;
|
||
|
}
|
||
|
|
||
|
for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
|
||
|
{
|
||
|
constants.Clear();
|
||
|
|
||
|
Operation nextNode;
|
||
|
|
||
|
for (Operation node = block.Operations.First; node != default; node = nextNode)
|
||
|
{
|
||
|
nextNode = node.ListNext;
|
||
|
|
||
|
// Insert copies for constants that can't fit on a 32-bit immediate.
|
||
|
// Doing this early unblocks a few optimizations.
|
||
|
if (node.Instruction == Instruction.Add)
|
||
|
{
|
||
|
Operand src1 = node.GetSource(0);
|
||
|
Operand src2 = node.GetSource(1);
|
||
|
|
||
|
if (src1.Kind == OperandKind.Constant && (src1.Relocatable || ConstTooLong(src1, OperandType.I32)))
|
||
|
{
|
||
|
node.SetSource(0, GetConstantCopy(block, node, src1));
|
||
|
}
|
||
|
|
||
|
if (src2.Kind == OperandKind.Constant && (src2.Relocatable || ConstTooLong(src2, OperandType.I32)))
|
||
|
{
|
||
|
node.SetSource(1, GetConstantCopy(block, node, src2));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Try to fold something like:
|
||
|
// lsl x1, x1, #2
|
||
|
// add x0, x0, x1
|
||
|
// ldr x0, [x0]
|
||
|
// add x2, x2, #16
|
||
|
// ldr x2, [x2]
|
||
|
// Into:
|
||
|
// ldr x0, [x0, x1, lsl #2]
|
||
|
// ldr x2, [x2, #16]
|
||
|
if (IsMemoryLoadOrStore(node.Instruction))
|
||
|
{
|
||
|
OperandType type;
|
||
|
|
||
|
if (node.Destination != default)
|
||
|
{
|
||
|
type = node.Destination.Type;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
type = node.GetSource(1).Type;
|
||
|
}
|
||
|
|
||
|
Operand memOp = GetMemoryOperandOrNull(node.GetSource(0), type);
|
||
|
|
||
|
if (memOp != default)
|
||
|
{
|
||
|
node.SetSource(0, memOp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
Optimizer.RemoveUnusedNodes(cfg);
|
||
|
}
|
||
|
|
||
|
private static Operand GetMemoryOperandOrNull(Operand addr, OperandType type)
|
||
|
{
|
||
|
Operand baseOp = addr;
|
||
|
|
||
|
// First we check if the address is the result of a local X with immediate
|
||
|
// addition. If that is the case, then the baseOp is X, and the memory operand immediate
|
||
|
// becomes the addition immediate. Otherwise baseOp keeps being the address.
|
||
|
int imm = GetConstOp(ref baseOp, type);
|
||
|
if (imm != 0)
|
||
|
{
|
||
|
return MemoryOp(type, baseOp, default, Multiplier.x1, imm);
|
||
|
}
|
||
|
|
||
|
// Now we check if the baseOp is the result of a local Y with a local Z addition.
|
||
|
// If that is the case, we now set baseOp to Y and indexOp to Z. We further check
|
||
|
// if Z is the result of a left shift of local W by a value == 0 or == Log2(AccessSize),
|
||
|
// if that is the case, we set indexOp to W and adjust the scale value of the memory operand
|
||
|
// to match that of the left shift.
|
||
|
// There is one missed case, which is the address being a shift result, but this is
|
||
|
// probably not worth optimizing as it should never happen.
|
||
|
(Operand indexOp, Multiplier scale) = GetIndexOp(ref baseOp, type);
|
||
|
|
||
|
// If baseOp is still equal to address, then there's nothing that can be optimized.
|
||
|
if (baseOp == addr)
|
||
|
{
|
||
|
return default;
|
||
|
}
|
||
|
|
||
|
return MemoryOp(type, baseOp, indexOp, scale, 0);
|
||
|
}
|
||
|
|
||
|
private static int GetConstOp(ref Operand baseOp, OperandType accessType)
|
||
|
{
|
||
|
Operation operation = GetAsgOpWithInst(baseOp, Instruction.Add);
|
||
|
|
||
|
if (operation == default)
|
||
|
{
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
Operand src1 = operation.GetSource(0);
|
||
|
Operand src2 = operation.GetSource(1);
|
||
|
|
||
|
Operand constOp;
|
||
|
Operand otherOp;
|
||
|
|
||
|
if (src1.Kind == OperandKind.Constant && src2.Kind == OperandKind.LocalVariable)
|
||
|
{
|
||
|
constOp = src1;
|
||
|
otherOp = src2;
|
||
|
}
|
||
|
else if (src1.Kind == OperandKind.LocalVariable && src2.Kind == OperandKind.Constant)
|
||
|
{
|
||
|
constOp = src2;
|
||
|
otherOp = src1;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
// If we have addition by a constant that we can't encode on the instruction,
|
||
|
// then we can't optimize it further.
|
||
|
if (ConstTooLong(constOp, accessType))
|
||
|
{
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
baseOp = otherOp;
|
||
|
|
||
|
return constOp.AsInt32();
|
||
|
}
|
||
|
|
||
|
private static (Operand, Multiplier) GetIndexOp(ref Operand baseOp, OperandType accessType)
|
||
|
{
|
||
|
Operand indexOp = default;
|
||
|
|
||
|
Multiplier scale = Multiplier.x1;
|
||
|
|
||
|
Operation addOp = GetAsgOpWithInst(baseOp, Instruction.Add);
|
||
|
|
||
|
if (addOp == default)
|
||
|
{
|
||
|
return (indexOp, scale);
|
||
|
}
|
||
|
|
||
|
Operand src1 = addOp.GetSource(0);
|
||
|
Operand src2 = addOp.GetSource(1);
|
||
|
|
||
|
if (src1.Kind != OperandKind.LocalVariable || src2.Kind != OperandKind.LocalVariable)
|
||
|
{
|
||
|
return (indexOp, scale);
|
||
|
}
|
||
|
|
||
|
baseOp = src1;
|
||
|
indexOp = src2;
|
||
|
|
||
|
Operation shlOp = GetAsgOpWithInst(src1, Instruction.ShiftLeft);
|
||
|
|
||
|
bool indexOnSrc2 = false;
|
||
|
|
||
|
if (shlOp == default)
|
||
|
{
|
||
|
shlOp = GetAsgOpWithInst(src2, Instruction.ShiftLeft);
|
||
|
|
||
|
indexOnSrc2 = true;
|
||
|
}
|
||
|
|
||
|
if (shlOp != default)
|
||
|
{
|
||
|
Operand shSrc = shlOp.GetSource(0);
|
||
|
Operand shift = shlOp.GetSource(1);
|
||
|
|
||
|
int maxShift = Assembler.GetScaleForType(accessType);
|
||
|
|
||
|
if (shSrc.Kind == OperandKind.LocalVariable &&
|
||
|
shift.Kind == OperandKind.Constant &&
|
||
|
(shift.Value == 0 || shift.Value == (ulong)maxShift))
|
||
|
{
|
||
|
scale = shift.Value switch
|
||
|
{
|
||
|
1 => Multiplier.x2,
|
||
|
2 => Multiplier.x4,
|
||
|
3 => Multiplier.x8,
|
||
|
4 => Multiplier.x16,
|
||
|
_ => Multiplier.x1
|
||
|
};
|
||
|
|
||
|
baseOp = indexOnSrc2 ? src1 : src2;
|
||
|
indexOp = shSrc;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return (indexOp, scale);
|
||
|
}
|
||
|
|
||
|
private static Operation GetAsgOpWithInst(Operand op, Instruction inst)
|
||
|
{
|
||
|
// If we have multiple assignments, folding is not safe
|
||
|
// as the value may be different depending on the
|
||
|
// control flow path.
|
||
|
if (op.AssignmentsCount != 1)
|
||
|
{
|
||
|
return default;
|
||
|
}
|
||
|
|
||
|
Operation asgOp = op.Assignments[0];
|
||
|
|
||
|
if (asgOp.Instruction != inst)
|
||
|
{
|
||
|
return default;
|
||
|
}
|
||
|
|
||
|
return asgOp;
|
||
|
}
|
||
|
|
||
|
private static bool IsMemoryLoadOrStore(Instruction inst)
|
||
|
{
|
||
|
return inst == Instruction.Load || inst == Instruction.Store;
|
||
|
}
|
||
|
|
||
|
private static bool ConstTooLong(Operand constOp, OperandType accessType)
|
||
|
{
|
||
|
if ((uint)constOp.Value != constOp.Value)
|
||
|
{
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
return !CodeGenCommon.ConstFitsOnUImm12(constOp.AsInt32(), accessType);
|
||
|
}
|
||
|
}
|
||
|
}
|