Ryujinx/ARMeilleure/Decoders/Decoder.cs
LDj3SNuD 5e724cf24e
Add Profiled Persistent Translation Cache. (#769)
* Delete DelegateTypes.cs

* Delete DelegateCache.cs

* Add files via upload

* Update Horizon.cs

* Update Program.cs

* Update MainWindow.cs

* Update Aot.cs

* Update RelocEntry.cs

* Update Translator.cs

* Update MemoryManager.cs

* Update InstEmitMemoryHelper.cs

* Update Delegates.cs

* Nit.

* Nit.

* Nit.

* 10 fewer MSIL bytes for us

* Add comment. Nits.

* Update Translator.cs

* Update Aot.cs

* Nits.

* Opt..

* Opt..

* Opt..

* Opt..

* Allow to change compression level.

* Update MemoryManager.cs

* Update Translator.cs

* Manage corner cases during the save phase. Nits.

* Update Aot.cs

* Translator response tweak for Aot disabled. Nit.

* Nit.

* Nits.

* Create DelegateHelpers.cs

* Update Delegates.cs

* Nit.

* Nit.

* Nits.

* Fix due to #784.

* Fixes due to #757 & #841.

* Fix due to #846.

* Fix due to #847.

* Use MethodInfo for managed method calls.

Use IR methods instead of managed methods about Max/Min (S/U).
Follow-ups & Nits.

* Add missing exception messages.

Reintroduce slow path for Fmov_Vi.
Implement slow path for Fmov_Si.

* Switch to the new folder structure.

Nits.

* Impl. index-based relocation information. Impl. cache file version field.

* Nit.

* Address gdkchan comments.

Mainly:
- fixed cache file corruption issue on exit; - exposed a way to disable AOT on the GUI.

* Address AcK77 comment.

* Address Thealexbarney, jduncanator & emmauss comments.

Header magic, CpuId (FI) & Aot -> Ptc.

* Adaptation to the new application reloading system.

Improvements to the call system of managed methods.
Follow-ups.
Nits.

* Get the same boot times as on master when PTC is disabled.

* Profiled Aot.

* A32 support (#897).

* #975 support (1 of 2).

* #975 support (2 of 2).

* Rebase fix & nits.

* Some fixes and nits (still one bug left).

* One fix & nits.

* Tests fix (by gdk) & nits.

* Support translations not only in high quality and rejit.

Nits.

* Added possibility to skip translations and continue execution, using `ESC` key.

* Update SettingsWindow.cs

* Update GLRenderer.cs

* Update Ptc.cs

* Disabled Profiled PTC by default as requested in the past by gdk.

* Fix rejit bug. Increased number of parallel translations. Add stack unwinding stuffs support (1 of 2).

Nits.

* Add stack unwinding stuffs support (2 of 2). Tuned number of parallel translations.

* Restored the ability to assemble jumps with 8-bit offset when Profiled PTC is disabled or during profiling.

Modifications due to rebase.
Nits.

* Limited profiling of the functions to be translated to the addresses belonging to the range of static objects only.

* Nits.

* Nits.

* Update Delegates.cs

* Nit.

* Update InstEmitSimdArithmetic.cs

* Address riperiperi comments.

* Fixed the issue of unjustifiably longer boot times at the second boot than at the first boot, measured at the same time or reference point and with the same number of translated functions.

* Implemented a simple redundant load/save mechanism.

Halved the value of Decoder.MaxInstsPerFunction more appropriate for the current performance of the Translator.
Replaced by Logger.PrintError to Logger.PrintDebug in TexturePool.cs about the supposed invalid texture format to avoid the spawn of the log.
Nits.

* Nit.

Improved Logger.PrintError in TexturePool.cs to avoid log spawn.
Added missing code for FZ handling (in output) for fp max/min instructions (slow paths).

* Add configuration migration for PTC

Co-authored-by: Thog <me@thog.eu>
2020-06-16 20:28:02 +02:00

339 lines
11 KiB
C#

using ARMeilleure.Decoders.Optimizations;
using ARMeilleure.Instructions;
using ARMeilleure.Memory;
using ARMeilleure.State;
using System;
using System.Collections.Generic;
namespace ARMeilleure.Decoders
{
static class Decoder
{
// We define a limit on the number of instructions that a function may have,
// this prevents functions being potentially too large, which would
// take too long to compile and use too much memory.
private const int MaxInstsPerFunction = 2500;
// For lower code quality translation, we set a lower limit since we're blocking execution.
private const int MaxInstsPerFunctionLowCq = 500;
public static Block[] DecodeBasicBlock(IMemoryManager memory, ulong address, ExecutionMode mode)
{
Block block = new Block(address);
FillBlock(memory, mode, block, ulong.MaxValue);
return new Block[] { block };
}
public static Block[] DecodeFunction(IMemoryManager memory, ulong address, ExecutionMode mode, bool highCq)
{
List<Block> blocks = new List<Block>();
Queue<Block> workQueue = new Queue<Block>();
Dictionary<ulong, Block> visited = new Dictionary<ulong, Block>();
int opsCount = 0;
int instructionLimit = highCq ? MaxInstsPerFunction : MaxInstsPerFunctionLowCq;
Block GetBlock(ulong blkAddress)
{
if (!visited.TryGetValue(blkAddress, out Block block))
{
if (opsCount > instructionLimit || !memory.IsMapped(blkAddress))
{
return null;
}
block = new Block(blkAddress);
workQueue.Enqueue(block);
visited.Add(blkAddress, block);
}
return block;
}
GetBlock(address);
while (workQueue.TryDequeue(out Block currBlock))
{
// Check if the current block is inside another block.
if (BinarySearch(blocks, currBlock.Address, out int nBlkIndex))
{
Block nBlock = blocks[nBlkIndex];
if (nBlock.Address == currBlock.Address)
{
throw new InvalidOperationException("Found duplicate block address on the list.");
}
nBlock.Split(currBlock);
blocks.Insert(nBlkIndex + 1, currBlock);
continue;
}
// If we have a block after the current one, set the limit address.
ulong limitAddress = ulong.MaxValue;
if (nBlkIndex != blocks.Count)
{
Block nBlock = blocks[nBlkIndex];
int nextIndex = nBlkIndex + 1;
if (nBlock.Address < currBlock.Address && nextIndex < blocks.Count)
{
limitAddress = blocks[nextIndex].Address;
}
else if (nBlock.Address > currBlock.Address)
{
limitAddress = blocks[nBlkIndex].Address;
}
}
FillBlock(memory, mode, currBlock, limitAddress);
opsCount += currBlock.OpCodes.Count;
if (currBlock.OpCodes.Count != 0)
{
// Set child blocks. "Branch" is the block the branch instruction
// points to (when taken), "Next" is the block at the next address,
// executed when the branch is not taken. For Unconditional Branches
// (except BL/BLR that are sub calls) or end of executable, Next is null.
OpCode lastOp = currBlock.GetLastOp();
bool isCall = IsCall(lastOp);
if (lastOp is IOpCodeBImm op && !isCall)
{
currBlock.Branch = GetBlock((ulong)op.Immediate);
}
if (!IsUnconditionalBranch(lastOp) || isCall)
{
currBlock.Next = GetBlock(currBlock.EndAddress);
}
}
// Insert the new block on the list (sorted by address).
if (blocks.Count != 0)
{
Block nBlock = blocks[nBlkIndex];
blocks.Insert(nBlkIndex + (nBlock.Address < currBlock.Address ? 1 : 0), currBlock);
}
else
{
blocks.Add(currBlock);
}
}
TailCallRemover.RunPass(address, blocks);
return blocks.ToArray();
}
public static bool BinarySearch(List<Block> blocks, ulong address, out int index)
{
index = 0;
int left = 0;
int right = blocks.Count - 1;
while (left <= right)
{
int size = right - left;
int middle = left + (size >> 1);
Block block = blocks[middle];
index = middle;
if (address >= block.Address && address < block.EndAddress)
{
return true;
}
if (address < block.Address)
{
right = middle - 1;
}
else
{
left = middle + 1;
}
}
return false;
}
private static void FillBlock(
IMemoryManager memory,
ExecutionMode mode,
Block block,
ulong limitAddress)
{
ulong address = block.Address;
OpCode opCode;
do
{
if (address >= limitAddress)
{
break;
}
opCode = DecodeOpCode(memory, address, mode);
block.OpCodes.Add(opCode);
address += (ulong)opCode.OpCodeSizeInBytes;
}
while (!(IsBranch(opCode) || IsException(opCode)));
block.EndAddress = address;
}
private static bool IsBranch(OpCode opCode)
{
return opCode is OpCodeBImm ||
opCode is OpCodeBReg || IsAarch32Branch(opCode);
}
private static bool IsUnconditionalBranch(OpCode opCode)
{
return opCode is OpCodeBImmAl ||
opCode is OpCodeBReg || IsAarch32UnconditionalBranch(opCode);
}
private static bool IsAarch32UnconditionalBranch(OpCode opCode)
{
if (!(opCode is OpCode32 op))
{
return false;
}
// Note: On ARM32, most instructions have conditional execution,
// so there's no "Always" (unconditional) branch like on ARM64.
// We need to check if the condition is "Always" instead.
return IsAarch32Branch(op) && op.Cond >= Condition.Al;
}
private static bool IsAarch32Branch(OpCode opCode)
{
// Note: On ARM32, most ALU operations can write to R15 (PC),
// so we must consider such operations as a branch in potential aswell.
if (opCode is IOpCode32Alu opAlu && opAlu.Rd == RegisterAlias.Aarch32Pc)
{
return true;
}
// Same thing for memory operations. We have the cases where PC is a target
// register (Rt == 15 or (mask & (1 << 15)) != 0), and cases where there is
// a write back to PC (wback == true && Rn == 15), however the later may
// be "undefined" depending on the CPU, so compilers should not produce that.
if (opCode is IOpCode32Mem || opCode is IOpCode32MemMult)
{
int rt, rn;
bool wBack, isLoad;
if (opCode is IOpCode32Mem opMem)
{
rt = opMem.Rt;
rn = opMem.Rn;
wBack = opMem.WBack;
isLoad = opMem.IsLoad;
// For the dual load, we also need to take into account the
// case were Rt2 == 15 (PC).
if (rt == 14 && opMem.Instruction.Name == InstName.Ldrd)
{
rt = RegisterAlias.Aarch32Pc;
}
}
else if (opCode is IOpCode32MemMult opMemMult)
{
const int pcMask = 1 << RegisterAlias.Aarch32Pc;
rt = (opMemMult.RegisterMask & pcMask) != 0 ? RegisterAlias.Aarch32Pc : 0;
rn = opMemMult.Rn;
wBack = opMemMult.PostOffset != 0;
isLoad = opMemMult.IsLoad;
}
else
{
throw new NotImplementedException($"The type \"{opCode.GetType().Name}\" is not implemented on the decoder.");
}
if ((rt == RegisterAlias.Aarch32Pc && isLoad) ||
(rn == RegisterAlias.Aarch32Pc && wBack))
{
return true;
}
}
// Explicit branch instructions.
return opCode is IOpCode32BImm ||
opCode is IOpCode32BReg;
}
private static bool IsCall(OpCode opCode)
{
return opCode.Instruction.Name == InstName.Bl ||
opCode.Instruction.Name == InstName.Blr ||
opCode.Instruction.Name == InstName.Blx;
}
private static bool IsException(OpCode opCode)
{
return opCode.Instruction.Name == InstName.Brk ||
opCode.Instruction.Name == InstName.Svc ||
opCode.Instruction.Name == InstName.Trap ||
opCode.Instruction.Name == InstName.Und;
}
public static OpCode DecodeOpCode(IMemoryManager memory, ulong address, ExecutionMode mode)
{
int opCode = memory.Read<int>(address);
InstDescriptor inst;
OpCodeTable.MakeOp makeOp;
if (mode == ExecutionMode.Aarch64)
{
(inst, makeOp) = OpCodeTable.GetInstA64(opCode);
}
else
{
if (mode == ExecutionMode.Aarch32Arm)
{
(inst, makeOp) = OpCodeTable.GetInstA32(opCode);
}
else /* if (mode == ExecutionMode.Aarch32Thumb) */
{
(inst, makeOp) = OpCodeTable.GetInstT32(opCode);
}
}
if (makeOp != null)
{
return (OpCode)makeOp(inst, address, opCode);
}
else
{
return new OpCode(inst, address, opCode);
}
}
}
}