Ryujinx/ARMeilleure/Instructions/InstEmitMemoryHelper.cs
riperiperi 54ea2285f0
POWER - Performance Optimizations With Extensive Ramifications (#2286)
* Refactoring of KMemoryManager class

* Replace some trivial uses of DRAM address with VA

* Get rid of GetDramAddressFromVa

* Abstracting more operations on derived page table class

* Run auto-format on KPageTableBase

* Managed to make TryConvertVaToPa private, few uses remains now

* Implement guest physical pages ref counting, remove manual freeing

* Make DoMmuOperation private and call new abstract methods only from the base class

* Pass pages count rather than size on Map/UnmapMemory

* Change memory managers to take host pointers

* Fix a guest memory leak and simplify KPageTable

* Expose new methods for host range query and mapping

* Some refactoring of MapPagesFromClientProcess to allow proper page ref counting and mapping without KPageLists

* Remove more uses of AddVaRangeToPageList, now only one remains (shared memory page checking)

* Add a SharedMemoryStorage class, will be useful for host mapping

* Sayonara AddVaRangeToPageList, you served us well

* Start to implement host memory mapping (WIP)

* Support memory tracking through host exception handling

* Fix some access violations from HLE service guest memory access and CPU

* Fix memory tracking

* Fix mapping list bugs, including a race and a error adding mapping ranges

* Simple page table for memory tracking

* Simple "volatile" region handle mode

* Update UBOs directly (experimental, rough)

* Fix the overlap check

* Only set non-modified buffers as volatile

* Fix some memory tracking issues

* Fix possible race in MapBufferFromClientProcess (block list updates were not locked)

* Write uniform update to memory immediately, only defer the buffer set.

* Fix some memory tracking issues

* Pass correct pages count on shared memory unmap

* Armeilleure Signal Handler v1 + Unix changes

Unix currently behaves like windows, rather than remapping physical

* Actually check if the host platform is unix

* Fix decommit on linux.

* Implement windows 10 placeholder shared memory, fix a buffer issue.

* Make PTC version something that will never match with master

* Remove testing variable for block count

* Add reference count for memory manager, fix dispose

Can still deadlock with OpenAL

* Add address validation, use page table for mapped check, add docs

Might clean up the page table traversing routines.

* Implement batched mapping/tracking.

* Move documentation, fix tests.

* Cleanup uniform buffer update stuff.

* Remove unnecessary assignment.

* Add unsafe host mapped memory switch

On by default. Would be good to turn this off for untrusted code (homebrew, exefs mods) and give the user the option to turn it on manually, though that requires some UI work.

* Remove C# exception handlers

They have issues due to current .NET limitations, so the meilleure one fully replaces them for now.

* Fix MapPhysicalMemory on the software MemoryManager.

* Null check for GetHostAddress, docs

* Add configuration for setting memory manager mode (not in UI yet)

* Add config to UI

* Fix type mismatch on Unix signal handler code emit

* Fix 6GB DRAM mode.

The size can be greater than `uint.MaxValue` when the DRAM is >4GB.

* Address some feedback.

* More detailed error if backing memory cannot be mapped.

* SetLastError on all OS functions for consistency

* Force pages dirty with UBO update instead of setting them directly.

Seems to be much faster across a few games. Need retesting.

* Rebase, configuration rework, fix mem tracking regression

* Fix race in FreePages

* Set memory managers null after decrementing ref count

* Remove readonly keyword, as this is now modified.

* Use a local variable for the signal handler rather than a register.

* Fix bug with buffer resize, and index/uniform buffer binding.

Should fix flickering in games.

* Add InvalidAccessHandler to MemoryTracking

Doesn't do anything yet

* Call invalid access handler on unmapped read/write.

Same rules as the regular memory manager.

* Make unsafe mapped memory its own MemoryManagerType

* Move FlushUboDirty into UpdateState.

* Buffer dirty cache, rather than ubo cache

Much cleaner, may be reusable for Inline2Memory updates.

* This doesn't return anything anymore.

* Add sigaction remove methods, correct a few function signatures.

* Return empty list of physical regions for size 0.

* Also on AddressSpaceManager

Co-authored-by: gdkchan <gab.dark.100@gmail.com>
2021-05-24 22:52:44 +02:00

609 lines
22 KiB
C#

using ARMeilleure.Decoders;
using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.Memory;
using ARMeilleure.Translation;
using ARMeilleure.Translation.PTC;
using System;
using System.Reflection;
using static ARMeilleure.Instructions.InstEmitHelper;
using static ARMeilleure.IntermediateRepresentation.OperandHelper;
namespace ARMeilleure.Instructions
{
static class InstEmitMemoryHelper
{
private const int PageBits = 12;
private const int PageMask = (1 << PageBits) - 1;
private enum Extension
{
Zx,
Sx32,
Sx64
}
public static void EmitLoadZx(ArmEmitterContext context, Operand address, int rt, int size)
{
EmitLoad(context, address, Extension.Zx, rt, size);
}
public static void EmitLoadSx32(ArmEmitterContext context, Operand address, int rt, int size)
{
EmitLoad(context, address, Extension.Sx32, rt, size);
}
public static void EmitLoadSx64(ArmEmitterContext context, Operand address, int rt, int size)
{
EmitLoad(context, address, Extension.Sx64, rt, size);
}
private static void EmitLoad(ArmEmitterContext context, Operand address, Extension ext, int rt, int size)
{
bool isSimd = IsSimd(context);
if ((uint)size > (isSimd ? 4 : 3))
{
throw new ArgumentOutOfRangeException(nameof(size));
}
if (isSimd)
{
EmitReadVector(context, address, context.VectorZero(), rt, 0, size);
}
else
{
EmitReadInt(context, address, rt, size);
}
if (!isSimd && !(context.CurrOp is OpCode32 && rt == State.RegisterAlias.Aarch32Pc))
{
Operand value = GetInt(context, rt);
if (ext == Extension.Sx32 || ext == Extension.Sx64)
{
OperandType destType = ext == Extension.Sx64 ? OperandType.I64 : OperandType.I32;
switch (size)
{
case 0: value = context.SignExtend8 (destType, value); break;
case 1: value = context.SignExtend16(destType, value); break;
case 2: value = context.SignExtend32(destType, value); break;
}
}
SetInt(context, rt, value);
}
}
public static void EmitLoadSimd(
ArmEmitterContext context,
Operand address,
Operand vector,
int rt,
int elem,
int size)
{
EmitReadVector(context, address, vector, rt, elem, size);
}
public static void EmitStore(ArmEmitterContext context, Operand address, int rt, int size)
{
bool isSimd = IsSimd(context);
if ((uint)size > (isSimd ? 4 : 3))
{
throw new ArgumentOutOfRangeException(nameof(size));
}
if (isSimd)
{
EmitWriteVector(context, address, rt, 0, size);
}
else
{
EmitWriteInt(context, address, rt, size);
}
}
public static void EmitStoreSimd(
ArmEmitterContext context,
Operand address,
int rt,
int elem,
int size)
{
EmitWriteVector(context, address, rt, elem, size);
}
private static bool IsSimd(ArmEmitterContext context)
{
return context.CurrOp is IOpCodeSimd &&
!(context.CurrOp is OpCodeSimdMemMs ||
context.CurrOp is OpCodeSimdMemSs);
}
private static void EmitReadInt(ArmEmitterContext context, Operand address, int rt, int size)
{
Operand lblSlowPath = Label();
Operand lblEnd = Label();
Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath, write: false, size);
Operand value = null;
switch (size)
{
case 0: value = context.Load8 (physAddr); break;
case 1: value = context.Load16(physAddr); break;
case 2: value = context.Load (OperandType.I32, physAddr); break;
case 3: value = context.Load (OperandType.I64, physAddr); break;
}
SetInt(context, rt, value);
if (!context.Memory.Type.IsHostMapped())
{
context.Branch(lblEnd);
context.MarkLabel(lblSlowPath, BasicBlockFrequency.Cold);
EmitReadIntFallback(context, address, rt, size);
context.MarkLabel(lblEnd);
}
}
public static Operand EmitReadIntAligned(ArmEmitterContext context, Operand address, int size)
{
if ((uint)size > 4)
{
throw new ArgumentOutOfRangeException(nameof(size));
}
Operand physAddr = EmitPtPointerLoad(context, address, null, write: false, size);
return size switch
{
0 => context.Load8(physAddr),
1 => context.Load16(physAddr),
2 => context.Load(OperandType.I32, physAddr),
3 => context.Load(OperandType.I64, physAddr),
_ => context.Load(OperandType.V128, physAddr)
};
}
private static void EmitReadVector(
ArmEmitterContext context,
Operand address,
Operand vector,
int rt,
int elem,
int size)
{
Operand lblSlowPath = Label();
Operand lblEnd = Label();
Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath, write: false, size);
Operand value = null;
switch (size)
{
case 0: value = context.VectorInsert8 (vector, context.Load8(physAddr), elem); break;
case 1: value = context.VectorInsert16(vector, context.Load16(physAddr), elem); break;
case 2: value = context.VectorInsert (vector, context.Load(OperandType.I32, physAddr), elem); break;
case 3: value = context.VectorInsert (vector, context.Load(OperandType.I64, physAddr), elem); break;
case 4: value = context.Load (OperandType.V128, physAddr); break;
}
context.Copy(GetVec(rt), value);
if (!context.Memory.Type.IsHostMapped())
{
context.Branch(lblEnd);
context.MarkLabel(lblSlowPath, BasicBlockFrequency.Cold);
EmitReadVectorFallback(context, address, vector, rt, elem, size);
context.MarkLabel(lblEnd);
}
}
private static Operand VectorCreate(ArmEmitterContext context, Operand value)
{
return context.VectorInsert(context.VectorZero(), value, 0);
}
private static void EmitWriteInt(ArmEmitterContext context, Operand address, int rt, int size)
{
Operand lblSlowPath = Label();
Operand lblEnd = Label();
Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath, write: true, size);
Operand value = GetInt(context, rt);
if (size < 3 && value.Type == OperandType.I64)
{
value = context.ConvertI64ToI32(value);
}
switch (size)
{
case 0: context.Store8 (physAddr, value); break;
case 1: context.Store16(physAddr, value); break;
case 2: context.Store (physAddr, value); break;
case 3: context.Store (physAddr, value); break;
}
if (!context.Memory.Type.IsHostMapped())
{
context.Branch(lblEnd);
context.MarkLabel(lblSlowPath, BasicBlockFrequency.Cold);
EmitWriteIntFallback(context, address, rt, size);
context.MarkLabel(lblEnd);
}
}
public static void EmitWriteIntAligned(ArmEmitterContext context, Operand address, Operand value, int size)
{
if ((uint)size > 4)
{
throw new ArgumentOutOfRangeException(nameof(size));
}
Operand physAddr = EmitPtPointerLoad(context, address, null, write: true, size);
if (size < 3 && value.Type == OperandType.I64)
{
value = context.ConvertI64ToI32(value);
}
if (size == 0)
{
context.Store8(physAddr, value);
}
else if (size == 1)
{
context.Store16(physAddr, value);
}
else
{
context.Store(physAddr, value);
}
}
private static void EmitWriteVector(
ArmEmitterContext context,
Operand address,
int rt,
int elem,
int size)
{
Operand lblSlowPath = Label();
Operand lblEnd = Label();
Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath, write: true, size);
Operand value = GetVec(rt);
switch (size)
{
case 0: context.Store8 (physAddr, context.VectorExtract8(value, elem)); break;
case 1: context.Store16(physAddr, context.VectorExtract16(value, elem)); break;
case 2: context.Store (physAddr, context.VectorExtract(OperandType.I32, value, elem)); break;
case 3: context.Store (physAddr, context.VectorExtract(OperandType.I64, value, elem)); break;
case 4: context.Store (physAddr, value); break;
}
if (!context.Memory.Type.IsHostMapped())
{
context.Branch(lblEnd);
context.MarkLabel(lblSlowPath, BasicBlockFrequency.Cold);
EmitWriteVectorFallback(context, address, rt, elem, size);
context.MarkLabel(lblEnd);
}
}
public static Operand EmitPtPointerLoad(ArmEmitterContext context, Operand address, Operand lblSlowPath, bool write, int size)
{
if (context.Memory.Type.IsHostMapped())
{
return EmitHostMappedPointer(context, address);
}
int ptLevelBits = context.Memory.AddressSpaceBits - PageBits;
int ptLevelSize = 1 << ptLevelBits;
int ptLevelMask = ptLevelSize - 1;
Operand addrRotated = size != 0 ? context.RotateRight(address, Const(size)) : address;
Operand addrShifted = context.ShiftRightUI(addrRotated, Const(PageBits - size));
Operand pte = Ptc.State == PtcState.Disabled
? Const(context.Memory.PageTablePointer.ToInt64())
: Const(context.Memory.PageTablePointer.ToInt64(), true, Ptc.PageTablePointerIndex);
Operand pteOffset = context.BitwiseAnd(addrShifted, Const(addrShifted.Type, ptLevelMask));
if (pteOffset.Type == OperandType.I32)
{
pteOffset = context.ZeroExtend32(OperandType.I64, pteOffset);
}
pte = context.Load(OperandType.I64, context.Add(pte, context.ShiftLeft(pteOffset, Const(3))));
if (addrShifted.Type == OperandType.I32)
{
addrShifted = context.ZeroExtend32(OperandType.I64, addrShifted);
}
// If the VA is out of range, or not aligned to the access size, force PTE to 0 by masking it.
pte = context.BitwiseAnd(pte, context.ShiftRightSI(context.Add(addrShifted, Const(-(long)ptLevelSize)), Const(63)));
if (lblSlowPath != null)
{
if (write)
{
context.BranchIf(lblSlowPath, pte, Const(0L), Comparison.LessOrEqual);
pte = context.BitwiseAnd(pte, Const(0xffffffffffffUL)); // Ignore any software protection bits. (they are still used by C# memory access)
}
else
{
pte = context.ShiftLeft(pte, Const(1));
context.BranchIf(lblSlowPath, pte, Const(0L), Comparison.LessOrEqual);
pte = context.ShiftRightUI(pte, Const(1));
}
}
else
{
// When no label is provided to jump to a slow path if the address is invalid,
// we do the validation ourselves, and throw if needed.
Operand lblNotWatched = Label();
// Is the page currently being tracked for read/write? If so we need to call SignalMemoryTracking.
context.BranchIf(lblNotWatched, pte, Const(0L), Comparison.GreaterOrEqual, BasicBlockFrequency.Cold);
// Signal memory tracking. Size here doesn't matter as address is assumed to be size aligned here.
context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.SignalMemoryTracking)), address, Const(1UL), Const(write ? 1 : 0));
context.MarkLabel(lblNotWatched);
pte = context.BitwiseAnd(pte, Const(0xffffffffffffUL)); // Ignore any software protection bits. (they are still used by C# memory access)
Operand lblNonNull = Label();
// Skip exception if the PTE address is non-null (not zero).
context.BranchIfTrue(lblNonNull, pte, BasicBlockFrequency.Cold);
// The call is not expected to return (it should throw).
context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ThrowInvalidMemoryAccess)), address);
context.MarkLabel(lblNonNull);
}
Operand pageOffset = context.BitwiseAnd(address, Const(address.Type, PageMask));
if (pageOffset.Type == OperandType.I32)
{
pageOffset = context.ZeroExtend32(OperandType.I64, pageOffset);
}
return context.Add(pte, pageOffset);
}
public static Operand EmitHostMappedPointer(ArmEmitterContext context, Operand address)
{
if (address.Type == OperandType.I32)
{
address = context.ZeroExtend32(OperandType.I64, address);
}
if (context.Memory.Type == MemoryManagerType.HostMapped)
{
Operand mask = Const(ulong.MaxValue >> (64 - context.Memory.AddressSpaceBits));
address = context.BitwiseAnd(address, mask);
}
Operand baseAddr = Ptc.State == PtcState.Disabled
? Const(context.Memory.PageTablePointer.ToInt64())
: Const(context.Memory.PageTablePointer.ToInt64(), true, Ptc.PageTablePointerIndex);
return context.Add(baseAddr, address);
}
private static void EmitReadIntFallback(ArmEmitterContext context, Operand address, int rt, int size)
{
MethodInfo info = null;
switch (size)
{
case 0: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadByte)); break;
case 1: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt16)); break;
case 2: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt32)); break;
case 3: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt64)); break;
}
SetInt(context, rt, context.Call(info, address));
}
private static void EmitReadVectorFallback(
ArmEmitterContext context,
Operand address,
Operand vector,
int rt,
int elem,
int size)
{
MethodInfo info = null;
switch (size)
{
case 0: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadByte)); break;
case 1: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt16)); break;
case 2: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt32)); break;
case 3: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt64)); break;
case 4: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadVector128)); break;
}
Operand value = context.Call(info, address);
switch (size)
{
case 0: value = context.VectorInsert8 (vector, value, elem); break;
case 1: value = context.VectorInsert16(vector, value, elem); break;
case 2: value = context.VectorInsert (vector, value, elem); break;
case 3: value = context.VectorInsert (vector, value, elem); break;
}
context.Copy(GetVec(rt), value);
}
private static void EmitWriteIntFallback(ArmEmitterContext context, Operand address, int rt, int size)
{
MethodInfo info = null;
switch (size)
{
case 0: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteByte)); break;
case 1: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt16)); break;
case 2: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt32)); break;
case 3: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt64)); break;
}
Operand value = GetInt(context, rt);
if (size < 3 && value.Type == OperandType.I64)
{
value = context.ConvertI64ToI32(value);
}
context.Call(info, address, value);
}
private static void EmitWriteVectorFallback(
ArmEmitterContext context,
Operand address,
int rt,
int elem,
int size)
{
MethodInfo info = null;
switch (size)
{
case 0: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteByte)); break;
case 1: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt16)); break;
case 2: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt32)); break;
case 3: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt64)); break;
case 4: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteVector128)); break;
}
Operand value = null;
if (size < 4)
{
switch (size)
{
case 0: value = context.VectorExtract8 (GetVec(rt), elem); break;
case 1: value = context.VectorExtract16(GetVec(rt), elem); break;
case 2: value = context.VectorExtract (OperandType.I32, GetVec(rt), elem); break;
case 3: value = context.VectorExtract (OperandType.I64, GetVec(rt), elem); break;
}
}
else
{
value = GetVec(rt);
}
context.Call(info, address, value);
}
private static Operand GetInt(ArmEmitterContext context, int rt)
{
return context.CurrOp is OpCode32 ? GetIntA32(context, rt) : GetIntOrZR(context, rt);
}
private static void SetInt(ArmEmitterContext context, int rt, Operand value)
{
if (context.CurrOp is OpCode32)
{
SetIntA32(context, rt, value);
}
else
{
SetIntOrZR(context, rt, value);
}
}
// ARM32 helpers.
public static Operand GetMemM(ArmEmitterContext context, bool setCarry = true)
{
switch (context.CurrOp)
{
case OpCode32MemRsImm op: return GetMShiftedByImmediate(context, op, setCarry);
case OpCode32MemReg op: return GetIntA32(context, op.Rm);
case OpCode32Mem op: return Const(op.Immediate);
case OpCode32SimdMemImm op: return Const(op.Immediate);
default: throw InvalidOpCodeType(context.CurrOp);
}
}
private static Exception InvalidOpCodeType(OpCode opCode)
{
return new InvalidOperationException($"Invalid OpCode type \"{opCode?.GetType().Name ?? "null"}\".");
}
public static Operand GetMShiftedByImmediate(ArmEmitterContext context, OpCode32MemRsImm op, bool setCarry)
{
Operand m = GetIntA32(context, op.Rm);
int shift = op.Immediate;
if (shift == 0)
{
switch (op.ShiftType)
{
case ShiftType.Lsr: shift = 32; break;
case ShiftType.Asr: shift = 32; break;
case ShiftType.Ror: shift = 1; break;
}
}
if (shift != 0)
{
setCarry &= false;
switch (op.ShiftType)
{
case ShiftType.Lsl: m = InstEmitAluHelper.GetLslC(context, m, setCarry, shift); break;
case ShiftType.Lsr: m = InstEmitAluHelper.GetLsrC(context, m, setCarry, shift); break;
case ShiftType.Asr: m = InstEmitAluHelper.GetAsrC(context, m, setCarry, shift); break;
case ShiftType.Ror:
if (op.Immediate != 0)
{
m = InstEmitAluHelper.GetRorC(context, m, setCarry, shift);
}
else
{
m = InstEmitAluHelper.GetRrxC(context, m, setCarry);
}
break;
}
}
return m;
}
}
}