diff --git a/src/ARMeilleure/Common/AddressTable.cs b/src/ARMeilleure/Common/AddressTable.cs index fcab3a202f..5b6d48bbcc 100644 --- a/src/ARMeilleure/Common/AddressTable.cs +++ b/src/ARMeilleure/Common/AddressTable.cs @@ -56,6 +56,8 @@ namespace ARMeilleure.Common private bool _disposed; private TEntry** _table; private readonly List _pages; + private readonly TEntry* _fallbackTable; + private TEntry _fill; /// /// Gets the bits used by the of the instance. @@ -70,7 +72,18 @@ namespace ARMeilleure.Common /// /// Gets or sets the default fill value of newly created leaf pages. /// - public TEntry Fill { get; set; } + public TEntry Fill + { + get + { + return _fill; + } + set + { + *_fallbackTable = value; + _fill = value; + } + } /// /// Gets the base address of the . @@ -89,6 +102,19 @@ namespace ARMeilleure.Common } } + /// + /// Gets a pointer to a single entry table containing only the leaf fill value. + /// + public IntPtr Fallback + { + get + { + ObjectDisposedException.ThrowIf(_disposed, this); + + return (IntPtr)_fallbackTable; + } + } + /// /// Constructs a new instance of the class with the specified list of /// . @@ -113,6 +139,8 @@ namespace ARMeilleure.Common { Mask |= level.Mask; } + + _fallbackTable = (TEntry*)NativeAllocator.Instance.Allocate((ulong)sizeof(TEntry)); } /// @@ -237,6 +265,8 @@ namespace ARMeilleure.Common Marshal.FreeHGlobal(page); } + Marshal.FreeHGlobal((IntPtr)_fallbackTable); + _disposed = true; } } diff --git a/src/ARMeilleure/Instructions/InstEmitFlowHelper.cs b/src/ARMeilleure/Instructions/InstEmitFlowHelper.cs index 2009bafdac..fbfdcefce4 100644 --- a/src/ARMeilleure/Instructions/InstEmitFlowHelper.cs +++ b/src/ARMeilleure/Instructions/InstEmitFlowHelper.cs @@ -193,6 +193,8 @@ namespace ARMeilleure.Instructions Operand hostAddress; + var table = context.FunctionTable; + // If address is mapped onto the function table, we can skip the table walk. Otherwise we fallback // onto the dispatch stub. if (guestAddress.Kind == OperandKind.Constant && context.FunctionTable.IsValid(guestAddress.Value)) @@ -203,6 +205,45 @@ namespace ARMeilleure.Instructions hostAddress = context.Load(OperandType.I64, hostAddressAddr); } + else if (table.Levels.Length == 2) + { + // Inline table lookup. Only enabled when the sparse function table is enabled with 2 levels. + // Deliberately attempts to avoid branches. + + var level0 = table.Levels[0]; + + // Currently no bounds check. Maybe conditionally do this for unsafe host mapped. + Operand index = context.ShiftLeft(context.ShiftRightUI(guestAddress, Const(level0.Index)), Const(3)); + + Operand tableBase = !context.HasPtc ? + Const(table.Base) : + Const(table.Base, Ptc.FunctionTableSymbol); + + Operand page = context.Load(OperandType.I64, context.Add(tableBase, index)); + + // Second level + var level1 = table.Levels[1]; + + int clearBits = 64 - (level1.Index + level1.Length); + + Operand index2 = context.ShiftLeft( + context.ShiftRightUI(context.ShiftLeft(guestAddress, Const(clearBits)), Const(clearBits + level1.Index)), + Const(3) + ); + + // TODO: could possibly make a fallback page that level 1 is filled with that contains dispatch stub on all pages + // Would save this load and the comparisons + // 16MB of the same value is a bit wasteful so it could replicate with remapping. + + Operand fallback = !context.HasPtc ? + Const((long)context.FunctionTable.Fallback) : + Const((long)context.FunctionTable.Fallback, Ptc.DispatchFallbackSymbol); + + Operand pageIsZero = context.ICompareEqual(page, Const(0L)); + + // Small trick to keep this branchless - if the page is zero, load a fallback table entry that always contains the dispatch stub. + hostAddress = context.Load(OperandType.I64, context.ConditionalSelect(pageIsZero, fallback, context.Add(page, index2))); + } else { hostAddress = !context.HasPtc ? diff --git a/src/ARMeilleure/Translation/PTC/Ptc.cs b/src/ARMeilleure/Translation/PTC/Ptc.cs index c2eed7a552..569c28739c 100644 --- a/src/ARMeilleure/Translation/PTC/Ptc.cs +++ b/src/ARMeilleure/Translation/PTC/Ptc.cs @@ -40,6 +40,8 @@ namespace ARMeilleure.Translation.PTC public static readonly Symbol PageTableSymbol = new(SymbolType.Special, 1); public static readonly Symbol CountTableSymbol = new(SymbolType.Special, 2); public static readonly Symbol DispatchStubSymbol = new(SymbolType.Special, 3); + public static readonly Symbol FunctionTableSymbol = new(SymbolType.Special, 4); + public static readonly Symbol DispatchFallbackSymbol = new(SymbolType.Special, 5); private const byte FillingByte = 0x00; private const CompressionLevel SaveCompressionLevel = CompressionLevel.Fastest; @@ -705,6 +707,14 @@ namespace ARMeilleure.Translation.PTC { imm = translator.Stubs.DispatchStub; } + else if (symbol == FunctionTableSymbol) + { + imm = translator.FunctionTable.Base; + } + else if (symbol == DispatchFallbackSymbol) + { + imm = translator.FunctionTable.Fallback; + } if (imm == null) { diff --git a/src/ARMeilleure/Translation/Translator.cs b/src/ARMeilleure/Translation/Translator.cs index 014b12035b..c3796cb99b 100644 --- a/src/ARMeilleure/Translation/Translator.cs +++ b/src/ARMeilleure/Translation/Translator.cs @@ -22,6 +22,8 @@ namespace ARMeilleure.Translation { public class Translator { + private const bool UseSparseTable = true; + private static readonly AddressTable.Level[] _levels64Bit = new AddressTable.Level[] { @@ -42,6 +44,20 @@ namespace ARMeilleure.Translation new( 1, 6), }; + private static readonly AddressTable.Level[] _levels64BitSparse = + new AddressTable.Level[] + { + new(23, 16), + new( 2, 21), + }; + + private static readonly AddressTable.Level[] _levels32BitSparse = + new AddressTable.Level[] + { + new(22, 10), + new( 1, 21), + }; + private readonly IJitMemoryAllocator _allocator; private readonly ConcurrentQueue> _oldFuncs; @@ -70,9 +86,20 @@ namespace ARMeilleure.Translation JitCache.Initialize(allocator); + AddressTable.Level[] levels; + + if (UseSparseTable) + { + levels = for64Bits ? _levels64BitSparse : _levels32BitSparse; + } + else + { + levels = for64Bits ? _levels64Bit : _levels32Bit; + } + CountTable = new EntryTable(); Functions = new TranslatorCache(); - FunctionTable = new AddressTable(for64Bits ? _levels64Bit : _levels32Bit); + FunctionTable = new AddressTable(levels); Stubs = new TranslatorStubs(FunctionTable); FunctionTable.Fill = (ulong)Stubs.SlowDispatchStub;