From 61634dd415fb71b3ae85871a0873d00195b0900c Mon Sep 17 00:00:00 2001 From: gdkchan Date: Wed, 16 Dec 2020 17:07:42 -0300 Subject: [PATCH] Clear JIT cache on exit (#1518) * Initial cache memory allocator implementation * Get rid of CallFlag * Perform cache cleanup on exit * Basic cache invalidation * Thats not how conditionals works in C# it seems * Set PTC version to PR number * Address PR feedback * Update InstEmitFlowHelper.cs * Flag clear on address is no longer needed * Do not include exit block in function size calculation * Dispose jump table * For future use * InternalVersion = 1519 (force retest). Co-authored-by: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> --- ARMeilleure/Common/BitMap.cs | 34 +++- ARMeilleure/Instructions/InstEmitAluHelper.cs | 5 +- .../Instructions/InstEmitFlowHelper.cs | 38 ++-- ARMeilleure/Instructions/InstEmitHelper.cs | 2 +- ARMeilleure/Instructions/NativeInterface.cs | 24 ++- ARMeilleure/Memory/ReservedRegion.cs | 5 + ARMeilleure/Translation/ArmEmitterContext.cs | 19 +- ARMeilleure/Translation/Cache/CacheEntry.cs | 26 +++ .../Translation/Cache/CacheMemoryAllocator.cs | 96 +++++++++ ARMeilleure/Translation/Cache/JitCache.cs | 182 ++++++++++++++++++ .../{ => Cache}/JitUnwindWindows.cs | 10 +- .../Translation/{ => Cache}/JumpTable.cs | 143 ++++++++++---- .../Cache/JumpTableEntryAllocator.cs | 72 +++++++ ARMeilleure/Translation/Compiler.cs | 4 +- ARMeilleure/Translation/Delegates.cs | 1 + ARMeilleure/Translation/DirectCallStubs.cs | 1 - ARMeilleure/Translation/JitCache.cs | 142 -------------- ARMeilleure/Translation/JitCacheEntry.cs | 19 -- ARMeilleure/Translation/PTC/Ptc.cs | 24 ++- ARMeilleure/Translation/PTC/PtcJumpTable.cs | 126 +++++++----- ARMeilleure/Translation/TranslatedFunction.cs | 15 +- ARMeilleure/Translation/Translator.cs | 170 +++++++++++++--- Ryujinx.Cpu/CpuContext.cs | 16 +- Ryujinx.Cpu/MemoryManager.cs | 10 + 24 files changed, 827 insertions(+), 357 deletions(-) create mode 100644 ARMeilleure/Translation/Cache/CacheEntry.cs create mode 100644 ARMeilleure/Translation/Cache/CacheMemoryAllocator.cs create mode 100644 ARMeilleure/Translation/Cache/JitCache.cs rename ARMeilleure/Translation/{ => Cache}/JitUnwindWindows.cs (95%) rename ARMeilleure/Translation/{ => Cache}/JumpTable.cs (57%) create mode 100644 ARMeilleure/Translation/Cache/JumpTableEntryAllocator.cs delete mode 100644 ARMeilleure/Translation/JitCache.cs delete mode 100644 ARMeilleure/Translation/JitCacheEntry.cs diff --git a/ARMeilleure/Common/BitMap.cs b/ARMeilleure/Common/BitMap.cs index 35100536..b1c9a673 100644 --- a/ARMeilleure/Common/BitMap.cs +++ b/ARMeilleure/Common/BitMap.cs @@ -4,12 +4,12 @@ using System.Numerics; namespace ARMeilleure.Common { - class BitMap : IEnumerator + class BitMap : IEnumerator, IEnumerable { private const int IntSize = 64; private const int IntMask = IntSize - 1; - private List _masks; + private readonly List _masks; private int _enumIndex; private long _enumMask; @@ -57,7 +57,7 @@ namespace ARMeilleure.Common EnsureCapacity(bit + 1); int wordIndex = bit / IntSize; - int wordBit = bit & IntMask; + int wordBit = bit & IntMask; long wordMask = 1L << wordBit; @@ -76,7 +76,7 @@ namespace ARMeilleure.Common EnsureCapacity(bit + 1); int wordIndex = bit / IntSize; - int wordBit = bit & IntMask; + int wordBit = bit & IntMask; long wordMask = 1L << wordBit; @@ -88,11 +88,26 @@ namespace ARMeilleure.Common EnsureCapacity(bit + 1); int wordIndex = bit / IntSize; - int wordBit = bit & IntMask; + int wordBit = bit & IntMask; return (_masks[wordIndex] & (1L << wordBit)) != 0; } + public int FindFirstUnset() + { + for (int index = 0; index < _masks.Count; index++) + { + long mask = _masks[index]; + + if (mask != -1L) + { + return BitOperations.TrailingZeroCount(~mask) + index * IntSize; + } + } + + return _masks.Count * IntSize; + } + public bool Set(BitMap map) { EnsureCapacity(map._masks.Count * IntSize); @@ -135,7 +150,7 @@ namespace ARMeilleure.Common return modified; } - #region IEnumerable Methods + #region IEnumerable Methods // Note: The bit enumerator is embedded in this class to avoid creating garbage when enumerating. @@ -147,6 +162,11 @@ namespace ARMeilleure.Common } } + IEnumerator IEnumerable.GetEnumerator() + { + return GetEnumerator(); + } + public IEnumerator GetEnumerator() { Reset(); @@ -180,6 +200,6 @@ namespace ARMeilleure.Common public void Dispose() { } -#endregion + #endregion } } \ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitAluHelper.cs b/ARMeilleure/Instructions/InstEmitAluHelper.cs index 8c71d4cb..caef66c2 100644 --- a/ARMeilleure/Instructions/InstEmitAluHelper.cs +++ b/ARMeilleure/Instructions/InstEmitAluHelper.cs @@ -119,15 +119,12 @@ namespace ARMeilleure.Instructions if (IsThumb(context.CurrOp)) { bool isReturn = IsA32Return(context); - if (!isReturn) { context.StoreToContext(); } - Operand addr = context.BitwiseOr(value, Const(1)); - - InstEmitFlowHelper.EmitVirtualJump(context, addr, isReturn); + InstEmitFlowHelper.EmitVirtualJump(context, value, isReturn); } else { diff --git a/ARMeilleure/Instructions/InstEmitFlowHelper.cs b/ARMeilleure/Instructions/InstEmitFlowHelper.cs index 0e7ca9ec..216f9b89 100644 --- a/ARMeilleure/Instructions/InstEmitFlowHelper.cs +++ b/ARMeilleure/Instructions/InstEmitFlowHelper.cs @@ -2,8 +2,8 @@ using ARMeilleure.Decoders; using ARMeilleure.IntermediateRepresentation; using ARMeilleure.State; using ARMeilleure.Translation; +using ARMeilleure.Translation.Cache; using ARMeilleure.Translation.PTC; -using System; using static ARMeilleure.Instructions.InstEmitHelper; using static ARMeilleure.IntermediateRepresentation.OperandHelper; @@ -12,8 +12,6 @@ namespace ARMeilleure.Instructions { static class InstEmitFlowHelper { - public const ulong CallFlag = 1; - public static void EmitCondBranch(ArmEmitterContext context, Operand target, Condition cond) { if (cond != Condition.Al) @@ -144,7 +142,7 @@ namespace ARMeilleure.Instructions public static void EmitCall(ArmEmitterContext context, ulong immediate) { - bool isRecursive = immediate == (ulong)context.BaseAddress; + bool isRecursive = immediate == context.EntryAddress; EmitJumpTableBranch(context, Const(immediate), isRecursive); } @@ -176,7 +174,7 @@ namespace ARMeilleure.Instructions Operand lblContinue = context.GetLabel(nextAddr.Value); // We need to clear out the call flag for the return address before comparing it. - context.BranchIf(lblContinue, context.BitwiseAnd(returnAddress, Const(~CallFlag)), nextAddr, Comparison.Equal, BasicBlockFrequency.Cold); + context.BranchIf(lblContinue, returnAddress, nextAddr, Comparison.Equal, BasicBlockFrequency.Cold); context.Return(returnAddress); } @@ -209,9 +207,9 @@ namespace ARMeilleure.Instructions } } - public static void EmitTailContinue(ArmEmitterContext context, Operand address, bool allowRejit = false) + public static void EmitTailContinue(ArmEmitterContext context, Operand address, bool allowRejit) { - // Left option here as it may be useful if we need to return to managed rather than tail call in future. + // Left option here as it may be useful if we need to return to managed rather than tail call in future. // (eg. for debug) bool useTailContinue = true; @@ -225,12 +223,9 @@ namespace ARMeilleure.Instructions } else { - if (allowRejit) - { - address = context.BitwiseOr(address, Const(CallFlag)); - } - - Operand fallbackAddr = context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetFunctionAddress)), address); + Operand fallbackAddr = context.Call(typeof(NativeInterface).GetMethod(allowRejit + ? nameof(NativeInterface.GetFunctionAddress) + : nameof(NativeInterface.GetFunctionAddressWithoutRejit)), address); EmitNativeCall(context, fallbackAddr, true); } @@ -251,7 +246,6 @@ namespace ARMeilleure.Instructions private static void EmitBranchFallback(ArmEmitterContext context, Operand address, bool isJump) { - address = context.BitwiseOr(address, Const(address.Type, (long)CallFlag)); // Set call flag. Operand fallbackAddr = context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetFunctionAddress)), address); EmitNativeCall(context, fallbackAddr, isJump); @@ -270,9 +264,9 @@ namespace ARMeilleure.Instructions Operand gotResult = context.CompareAndSwap(tableAddress, Const(0L), address); // Is the address ours? (either taken via CompareAndSwap (0), or what was already here) - context.BranchIfFalse(entrySkipLabel, + context.BranchIfFalse(entrySkipLabel, context.BitwiseOr( - context.ICompareEqual(gotResult, address), + context.ICompareEqual(gotResult, address), context.ICompareEqual(gotResult, Const(0L))) ); @@ -281,7 +275,7 @@ namespace ARMeilleure.Instructions Operand targetFunction = context.Load(OperandType.I64, targetFunctionPtr); // Call the function. - // We pass in the entry address as the guest address, as the entry may need to be updated by the + // We pass in the entry address as the guest address, as the entry may need to be updated by the // indirect call stub. EmitNativeCallWithGuestAddress(context, targetFunction, tableAddress, isJump); @@ -295,7 +289,7 @@ namespace ARMeilleure.Instructions { // If this is the last entry, avoid emitting the additional label and add. EmitTableEntry(fallbackLabel); - } + } else { Operand nextLabel = Label(); @@ -323,10 +317,10 @@ namespace ARMeilleure.Instructions address = context.ZeroExtend32(OperandType.I64, address); } - // TODO: Constant folding. Indirect calls are slower in the best case and emit more code so we want to + // TODO: Constant folding. Indirect calls are slower in the best case and emit more code so we want to // avoid them when possible. bool isConst = address.Kind == OperandKind.Constant; - long constAddr = (long)address.Value; + ulong constAddr = address.Value; if (!context.HighCq) { @@ -337,7 +331,7 @@ namespace ARMeilleure.Instructions else if (!isConst) { // Virtual branch/call - store first used addresses on a small table for fast lookup. - int entry = context.JumpTable.ReserveDynamicEntry(isJump); + int entry = context.JumpTable.ReserveDynamicEntry(context.EntryAddress, isJump); int jumpOffset = entry * JumpTable.JumpTableStride * JumpTable.DynamicTableElems; @@ -357,7 +351,7 @@ namespace ARMeilleure.Instructions } else { - int entry = context.JumpTable.ReserveTableEntry(context.BaseAddress & (~3L), constAddr, isJump); + int entry = context.JumpTable.ReserveTableEntry(context.EntryAddress, constAddr, isJump); int jumpOffset = entry * JumpTable.JumpTableStride + 8; // Offset directly to the host address. diff --git a/ARMeilleure/Instructions/InstEmitHelper.cs b/ARMeilleure/Instructions/InstEmitHelper.cs index d1bbe3f1..cd515c0c 100644 --- a/ARMeilleure/Instructions/InstEmitHelper.cs +++ b/ARMeilleure/Instructions/InstEmitHelper.cs @@ -172,7 +172,7 @@ namespace ARMeilleure.Instructions SetFlag(context, PState.TFlag, mode); - Operand addr = context.ConditionalSelect(mode, context.BitwiseOr(pc, Const((int)InstEmitFlowHelper.CallFlag)), context.BitwiseAnd(pc, Const(~3))); + Operand addr = context.ConditionalSelect(mode, pc, context.BitwiseAnd(pc, Const(~3))); InstEmitFlowHelper.EmitVirtualJump(context, addr, isReturn); } diff --git a/ARMeilleure/Instructions/NativeInterface.cs b/ARMeilleure/Instructions/NativeInterface.cs index bc1be21d..b2b200b9 100644 --- a/ARMeilleure/Instructions/NativeInterface.cs +++ b/ARMeilleure/Instructions/NativeInterface.cs @@ -23,16 +23,16 @@ namespace ARMeilleure.Instructions } [ThreadStatic] - private static ThreadContext _context; + private static ThreadContext Context; public static void RegisterThread(ExecutionContext context, IMemoryManager memory, Translator translator) { - _context = new ThreadContext(context, memory, translator); + Context = new ThreadContext(context, memory, translator); } public static void UnregisterThread() { - _context = null; + Context = null; } public static void Break(ulong address, int imm) @@ -231,14 +231,24 @@ namespace ARMeilleure.Instructions public static ulong GetFunctionAddress(ulong address) { - TranslatedFunction function = _context.Translator.GetOrTranslate(address, GetContext().ExecutionMode); + return GetFunctionAddressWithHint(address, true); + } + + public static ulong GetFunctionAddressWithoutRejit(ulong address) + { + return GetFunctionAddressWithHint(address, false); + } + + private static ulong GetFunctionAddressWithHint(ulong address, bool hintRejit) + { + TranslatedFunction function = Context.Translator.GetOrTranslate(address, GetContext().ExecutionMode, hintRejit); return (ulong)function.FuncPtr.ToInt64(); } public static ulong GetIndirectFunctionAddress(ulong address, ulong entryAddress) { - TranslatedFunction function = _context.Translator.GetOrTranslate(address, GetContext().ExecutionMode); + TranslatedFunction function = Context.Translator.GetOrTranslate(address, GetContext().ExecutionMode, hintRejit: true); ulong ptr = (ulong)function.FuncPtr.ToInt64(); @@ -266,12 +276,12 @@ namespace ARMeilleure.Instructions public static ExecutionContext GetContext() { - return _context.Context; + return Context.Context; } public static IMemoryManager GetMemoryManager() { - return _context.Memory; + return Context.Memory; } } } \ No newline at end of file diff --git a/ARMeilleure/Memory/ReservedRegion.cs b/ARMeilleure/Memory/ReservedRegion.cs index dc6eb9da..d6349108 100644 --- a/ARMeilleure/Memory/ReservedRegion.cs +++ b/ARMeilleure/Memory/ReservedRegion.cs @@ -49,5 +49,10 @@ namespace ARMeilleure.Memory } } } + + public void Dispose() + { + Block.Dispose(); + } } } diff --git a/ARMeilleure/Translation/ArmEmitterContext.cs b/ARMeilleure/Translation/ArmEmitterContext.cs index 7ce6a3f4..8f153192 100644 --- a/ARMeilleure/Translation/ArmEmitterContext.cs +++ b/ARMeilleure/Translation/ArmEmitterContext.cs @@ -3,6 +3,7 @@ using ARMeilleure.Instructions; using ARMeilleure.IntermediateRepresentation; using ARMeilleure.Memory; using ARMeilleure.State; +using ARMeilleure.Translation.Cache; using System.Collections.Generic; using static ARMeilleure.IntermediateRepresentation.OperandHelper; @@ -39,21 +40,19 @@ namespace ARMeilleure.Translation public IMemoryManager Memory { get; } - public Aarch32Mode Mode { get; } - public JumpTable JumpTable { get; } - public long BaseAddress { get; } - + public ulong EntryAddress { get; } public bool HighCq { get; } + public Aarch32Mode Mode { get; } - public ArmEmitterContext(IMemoryManager memory, JumpTable jumpTable, long baseAddress, bool highCq, Aarch32Mode mode) + public ArmEmitterContext(IMemoryManager memory, JumpTable jumpTable, ulong entryAddress, bool highCq, Aarch32Mode mode) { - Memory = memory; - JumpTable = jumpTable; - BaseAddress = baseAddress; - HighCq = highCq; - Mode = mode; + Memory = memory; + JumpTable = jumpTable; + EntryAddress = entryAddress; + HighCq = highCq; + Mode = mode; _labels = new Dictionary(); } diff --git a/ARMeilleure/Translation/Cache/CacheEntry.cs b/ARMeilleure/Translation/Cache/CacheEntry.cs new file mode 100644 index 00000000..fce984c3 --- /dev/null +++ b/ARMeilleure/Translation/Cache/CacheEntry.cs @@ -0,0 +1,26 @@ +using ARMeilleure.CodeGen.Unwinding; +using System; +using System.Diagnostics.CodeAnalysis; + +namespace ARMeilleure.Translation.Cache +{ + struct CacheEntry : IComparable + { + public int Offset { get; } + public int Size { get; } + + public UnwindInfo UnwindInfo { get; } + + public CacheEntry(int offset, int size, UnwindInfo unwindInfo) + { + Offset = offset; + Size = size; + UnwindInfo = unwindInfo; + } + + public int CompareTo([AllowNull] CacheEntry other) + { + return Offset.CompareTo(other.Offset); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Translation/Cache/CacheMemoryAllocator.cs b/ARMeilleure/Translation/Cache/CacheMemoryAllocator.cs new file mode 100644 index 00000000..3111e886 --- /dev/null +++ b/ARMeilleure/Translation/Cache/CacheMemoryAllocator.cs @@ -0,0 +1,96 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; + +namespace ARMeilleure.Translation.Cache +{ + class CacheMemoryAllocator + { + private struct MemoryBlock : IComparable + { + public int Offset { get; } + public int Size { get; } + + public MemoryBlock(int offset, int size) + { + Offset = offset; + Size = size; + } + + public int CompareTo([AllowNull] MemoryBlock other) + { + return Offset.CompareTo(other.Offset); + } + } + + private readonly List _blocks = new List(); + + public CacheMemoryAllocator(int capacity) + { + _blocks.Add(new MemoryBlock(0, capacity)); + } + + public int Allocate(int size) + { + for (int i = 0; i < _blocks.Count; i++) + { + MemoryBlock block = _blocks[i]; + + if (block.Size > size) + { + _blocks[i] = new MemoryBlock(block.Offset + size, block.Size - size); + return block.Offset; + } + else if (block.Size == size) + { + _blocks.RemoveAt(i); + return block.Offset; + } + } + + // We don't have enough free memory to perform the allocation. + return -1; + } + + public void Free(int offset, int size) + { + Insert(new MemoryBlock(offset, size)); + } + + private void Insert(MemoryBlock block) + { + int index = _blocks.BinarySearch(block); + + if (index < 0) + { + index = ~index; + } + + if (index < _blocks.Count) + { + MemoryBlock next = _blocks[index]; + + int endOffs = block.Offset + block.Size; + + if (next.Offset == endOffs) + { + block = new MemoryBlock(block.Offset, block.Size + next.Size); + _blocks.RemoveAt(index); + } + } + + if (index > 0) + { + MemoryBlock prev = _blocks[index - 1]; + + if (prev.Offset + prev.Size == block.Offset) + { + block = new MemoryBlock(block.Offset - prev.Size, block.Size + prev.Size); + _blocks.RemoveAt(--index); + } + } + + _blocks.Insert(index, block); + } + } +} diff --git a/ARMeilleure/Translation/Cache/JitCache.cs b/ARMeilleure/Translation/Cache/JitCache.cs new file mode 100644 index 00000000..db45c608 --- /dev/null +++ b/ARMeilleure/Translation/Cache/JitCache.cs @@ -0,0 +1,182 @@ +using ARMeilleure.CodeGen; +using ARMeilleure.CodeGen.Unwinding; +using ARMeilleure.Memory; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Runtime.InteropServices; + +namespace ARMeilleure.Translation.Cache +{ + static class JitCache + { + private const int PageSize = 4 * 1024; + private const int PageMask = PageSize - 1; + + private const int CodeAlignment = 4; // Bytes. + private const int CacheSize = 2047 * 1024 * 1024; + + private static ReservedRegion _jitRegion; + + private static CacheMemoryAllocator _cacheAllocator; + + private static readonly List _cacheEntries = new List(); + + private static readonly object _lock = new object(); + private static bool _initialized; + + public static void Initialize(IJitMemoryAllocator allocator) + { + if (_initialized) return; + + lock (_lock) + { + if (_initialized) return; + + _jitRegion = new ReservedRegion(allocator, CacheSize); + + _cacheAllocator = new CacheMemoryAllocator(CacheSize); + + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) + { + JitUnwindWindows.InstallFunctionTableHandler(_jitRegion.Pointer, CacheSize, _jitRegion.Pointer + Allocate(PageSize)); + } + + _initialized = true; + } + } + + public static IntPtr Map(CompiledFunction func) + { + byte[] code = func.Code; + + lock (_lock) + { + Debug.Assert(_initialized); + + int funcOffset = Allocate(code.Length); + + IntPtr funcPtr = _jitRegion.Pointer + funcOffset; + + ReprotectAsWritable(funcOffset, code.Length); + + Marshal.Copy(code, 0, funcPtr, code.Length); + + ReprotectAsExecutable(funcOffset, code.Length); + + Add(funcOffset, code.Length, func.UnwindInfo); + + return funcPtr; + } + } + + public static void Unmap(IntPtr pointer) + { + lock (_lock) + { + Debug.Assert(_initialized); + + int funcOffset = (int)(pointer.ToInt64() - _jitRegion.Pointer.ToInt64()); + + bool result = TryFind(funcOffset, out CacheEntry entry); + Debug.Assert(result); + + _cacheAllocator.Free(funcOffset, AlignCodeSize(entry.Size)); + + Remove(funcOffset); + } + } + + private static void ReprotectAsWritable(int offset, int size) + { + int endOffs = offset + size; + + int regionStart = offset & ~PageMask; + int regionEnd = (endOffs + PageMask) & ~PageMask; + + _jitRegion.Block.MapAsRwx((ulong)regionStart, (ulong)(regionEnd - regionStart)); + } + + private static void ReprotectAsExecutable(int offset, int size) + { + int endOffs = offset + size; + + int regionStart = offset & ~PageMask; + int regionEnd = (endOffs + PageMask) & ~PageMask; + + _jitRegion.Block.MapAsRx((ulong)regionStart, (ulong)(regionEnd - regionStart)); + } + + private static int Allocate(int codeSize) + { + codeSize = AlignCodeSize(codeSize); + + int allocOffset = _cacheAllocator.Allocate(codeSize); + + if (allocOffset < 0) + { + throw new OutOfMemoryException("JIT Cache exhausted."); + } + + _jitRegion.ExpandIfNeeded((ulong)allocOffset + (ulong)codeSize); + + return allocOffset; + } + + private static int AlignCodeSize(int codeSize) + { + return checked(codeSize + (CodeAlignment - 1)) & ~(CodeAlignment - 1); + } + + private static void Add(int offset, int size, UnwindInfo unwindInfo) + { + CacheEntry entry = new CacheEntry(offset, size, unwindInfo); + + int index = _cacheEntries.BinarySearch(entry); + + if (index < 0) + { + index = ~index; + } + + _cacheEntries.Insert(index, entry); + } + + private static void Remove(int offset) + { + int index = _cacheEntries.BinarySearch(new CacheEntry(offset, 0, default)); + + if (index < 0) + { + index = ~index - 1; + } + + if (index >= 0) + { + _cacheEntries.RemoveAt(index); + } + } + + public static bool TryFind(int offset, out CacheEntry entry) + { + lock (_lock) + { + int index = _cacheEntries.BinarySearch(new CacheEntry(offset, 0, default)); + + if (index < 0) + { + index = ~index - 1; + } + + if (index >= 0) + { + entry = _cacheEntries[index]; + return true; + } + } + + entry = default; + return false; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Translation/JitUnwindWindows.cs b/ARMeilleure/Translation/Cache/JitUnwindWindows.cs similarity index 95% rename from ARMeilleure/Translation/JitUnwindWindows.cs rename to ARMeilleure/Translation/Cache/JitUnwindWindows.cs index e118d129..072c0f51 100644 --- a/ARMeilleure/Translation/JitUnwindWindows.cs +++ b/ARMeilleure/Translation/Cache/JitUnwindWindows.cs @@ -5,7 +5,7 @@ using System; using System.Diagnostics; using System.Runtime.InteropServices; -namespace ARMeilleure.Translation +namespace ARMeilleure.Translation.Cache { static class JitUnwindWindows { @@ -59,7 +59,7 @@ namespace ARMeilleure.Translation private unsafe static UnwindInfo* _unwindInfo; - public static void InstallFunctionTableHandler(IntPtr codeCachePointer, uint codeCacheLength) + public static void InstallFunctionTableHandler(IntPtr codeCachePointer, uint codeCacheLength, IntPtr workBufferPtr) { ulong codeCachePtr = (ulong)codeCachePointer.ToInt64(); @@ -69,9 +69,9 @@ namespace ARMeilleure.Translation unsafe { - _runtimeFunction = (RuntimeFunction*)codeCachePointer; + _runtimeFunction = (RuntimeFunction*)workBufferPtr; - _unwindInfo = (UnwindInfo*)(codeCachePointer + _sizeOfRuntimeFunction); + _unwindInfo = (UnwindInfo*)(workBufferPtr + _sizeOfRuntimeFunction); _getRuntimeFunctionCallback = new GetRuntimeFunctionCallback(FunctionTableHandler); @@ -94,7 +94,7 @@ namespace ARMeilleure.Translation { int offset = (int)((long)controlPc - context.ToInt64()); - if (!JitCache.TryFind(offset, out JitCacheEntry funcEntry)) + if (!JitCache.TryFind(offset, out CacheEntry funcEntry)) { return null; // Not found. } diff --git a/ARMeilleure/Translation/JumpTable.cs b/ARMeilleure/Translation/Cache/JumpTable.cs similarity index 57% rename from ARMeilleure/Translation/JumpTable.cs rename to ARMeilleure/Translation/Cache/JumpTable.cs index fe7a6ec3..71a036d8 100644 --- a/ARMeilleure/Translation/JumpTable.cs +++ b/ARMeilleure/Translation/Cache/JumpTable.cs @@ -1,17 +1,15 @@ using ARMeilleure.Diagnostics; using ARMeilleure.Memory; +using ARMeilleure.Translation.PTC; using System; using System.Collections.Concurrent; using System.Collections.Generic; using System.Diagnostics; using System.Runtime.InteropServices; -using System.Threading; -namespace ARMeilleure.Translation +namespace ARMeilleure.Translation.Cache { - using PTC; - - class JumpTable + class JumpTable : IDisposable { // The jump table is a block of (guestAddress, hostAddress) function mappings. // Each entry corresponds to one branch in a JIT compiled function. The entries are @@ -44,28 +42,32 @@ namespace ARMeilleure.Translation private const int DynamicTableSize = 1048576; private const int DynamicTableByteSize = DynamicTableSize * DynamicTableStride; + private const int DynamicEntryTag = 1 << 31; + private readonly ReservedRegion _jumpRegion; private readonly ReservedRegion _dynamicRegion; - private int _tableEnd = 0; - private int _dynTableEnd = 0; - - public IntPtr JumpPointer => _jumpRegion.Pointer; + public IntPtr JumpPointer => _jumpRegion.Pointer; public IntPtr DynamicPointer => _dynamicRegion.Pointer; - public int TableEnd => _tableEnd; - public int DynTableEnd => _dynTableEnd; + public JumpTableEntryAllocator Table { get; } + public JumpTableEntryAllocator DynTable { get; } - public ConcurrentDictionary Targets { get; } - public ConcurrentDictionary> Dependants { get; } // TODO: Attach to TranslatedFunction or a wrapper class. + public ConcurrentDictionary Targets { get; } + public ConcurrentDictionary> Dependants { get; } // TODO: Attach to TranslatedFunction or a wrapper class. + public ConcurrentDictionary> Owners { get; } public JumpTable(IJitMemoryAllocator allocator) { - _jumpRegion = new ReservedRegion(allocator, JumpTableByteSize); + _jumpRegion = new ReservedRegion(allocator, JumpTableByteSize); _dynamicRegion = new ReservedRegion(allocator, DynamicTableByteSize); - Targets = new ConcurrentDictionary(); - Dependants = new ConcurrentDictionary>(); + Table = new JumpTableEntryAllocator(); + DynTable = new JumpTableEntryAllocator(); + + Targets = new ConcurrentDictionary(); + Dependants = new ConcurrentDictionary>(); + Owners = new ConcurrentDictionary>(); Symbols.Add((ulong)_jumpRegion.Pointer.ToInt64(), JumpTableByteSize, JumpTableStride, "JMP_TABLE"); Symbols.Add((ulong)_dynamicRegion.Pointer.ToInt64(), DynamicTableByteSize, DynamicTableStride, "DYN_TABLE"); @@ -73,9 +75,6 @@ namespace ARMeilleure.Translation public void Initialize(PtcJumpTable ptcJumpTable, ConcurrentDictionary funcs) { - _tableEnd = ptcJumpTable.TableEnd; - _dynTableEnd = ptcJumpTable.DynTableEnd; - foreach (ulong guestAddress in ptcJumpTable.Targets) { if (funcs.TryGetValue(guestAddress, out TranslatedFunction func)) @@ -90,18 +89,22 @@ namespace ARMeilleure.Translation foreach (var item in ptcJumpTable.Dependants) { - Dependants.TryAdd(item.Key, new LinkedList(item.Value)); + Dependants.TryAdd(item.Key, new List(item.Value)); + } + + foreach (var item in ptcJumpTable.Owners) + { + Owners.TryAdd(item.Key, new List(item.Value)); } } public void RegisterFunction(ulong address, TranslatedFunction func) { - address &= ~3UL; Targets.AddOrUpdate(address, func, (key, oldFunc) => func); long funcPtr = func.FuncPtr.ToInt64(); // Update all jump table entries that target this address. - if (Dependants.TryGetValue(address, out LinkedList myDependants)) + if (Dependants.TryGetValue(address, out List myDependants)) { lock (myDependants) { @@ -115,41 +118,55 @@ namespace ARMeilleure.Translation } } - public int ReserveTableEntry(long ownerAddress, long address, bool isJump) + public int ReserveTableEntry(ulong ownerGuestAddress, ulong address, bool isJump) { - int entry = Interlocked.Increment(ref _tableEnd); + int entry = Table.AllocateEntry(); ExpandIfNeededJumpTable(entry); // Is the address we have already registered? If so, put the function address in the jump table. // If not, it will point to the direct call stub. long value = DirectCallStubs.DirectCallStub(isJump).ToInt64(); - if (Targets.TryGetValue((ulong)address, out TranslatedFunction func)) + if (Targets.TryGetValue(address, out TranslatedFunction func)) { value = func.FuncPtr.ToInt64(); } // Make sure changes to the function at the target address update this jump table entry. - LinkedList targetDependants = Dependants.GetOrAdd((ulong)address, (addr) => new LinkedList()); + List targetDependants = Dependants.GetOrAdd(address, (addr) => new List()); lock (targetDependants) { - targetDependants.AddLast(entry); + targetDependants.Add(entry); + } + + // Keep track of ownership for jump table entries. + List ownerEntries = Owners.GetOrAdd(ownerGuestAddress, (addr) => new List()); + lock (ownerEntries) + { + ownerEntries.Add(entry); } IntPtr addr = GetEntryAddressJumpTable(entry); - Marshal.WriteInt64(addr, 0, address); + Marshal.WriteInt64(addr, 0, (long)address); Marshal.WriteInt64(addr, 8, value); return entry; } - public int ReserveDynamicEntry(bool isJump) + public int ReserveDynamicEntry(ulong ownerGuestAddress, bool isJump) { - int entry = Interlocked.Increment(ref _dynTableEnd); + int entry = DynTable.AllocateEntry(); ExpandIfNeededDynamicTable(entry); + // Keep track of ownership for jump table entries. + List ownerEntries = Owners.GetOrAdd(ownerGuestAddress, (addr) => new List()); + lock (ownerEntries) + { + ownerEntries.Add(entry | DynamicEntryTag); + } + // Initialize all host function pointers to the indirect call stub. IntPtr addr = GetEntryAddressDynamicTable(entry); long stubPtr = DirectCallStubs.IndirectCallStub(isJump).ToInt64(); @@ -162,13 +179,51 @@ namespace ARMeilleure.Translation return entry; } - public void ExpandIfNeededJumpTable(int entries) + // For future use. + public void RemoveFunctionEntries(ulong guestAddress) { - Debug.Assert(entries > 0); - - if (entries < JumpTableSize) + if (Owners.TryRemove(guestAddress, out List list)) { - _jumpRegion.ExpandIfNeeded((ulong)((entries + 1) * JumpTableStride)); + for (int i = 0; i < list.Count; i++) + { + int entry = list[i]; + + bool isDynamic = (entry & DynamicEntryTag) != 0; + + entry &= ~DynamicEntryTag; + + if (isDynamic) + { + IntPtr addr = GetEntryAddressDynamicTable(entry); + + for (int j = 0; j < DynamicTableElems; j++) + { + Marshal.WriteInt64(addr + j * JumpTableStride, 0, 0L); + Marshal.WriteInt64(addr + j * JumpTableStride, 8, 0L); + } + + DynTable.FreeEntry(entry); + } + else + { + IntPtr addr = GetEntryAddressJumpTable(entry); + + Marshal.WriteInt64(addr, 0, 0L); + Marshal.WriteInt64(addr, 8, 0L); + + Table.FreeEntry(entry); + } + } + } + } + + public void ExpandIfNeededJumpTable(int entry) + { + Debug.Assert(entry >= 0); + + if (entry < JumpTableSize) + { + _jumpRegion.ExpandIfNeeded((ulong)((entry + 1) * JumpTableStride)); } else { @@ -176,13 +231,13 @@ namespace ARMeilleure.Translation } } - public void ExpandIfNeededDynamicTable(int entries) + public void ExpandIfNeededDynamicTable(int entry) { - Debug.Assert(entries > 0); + Debug.Assert(entry >= 0); - if (entries < DynamicTableSize) + if (entry < DynamicTableSize) { - _dynamicRegion.ExpandIfNeeded((ulong)((entries + 1) * DynamicTableStride)); + _dynamicRegion.ExpandIfNeeded((ulong)((entry + 1) * DynamicTableStride)); } else { @@ -192,16 +247,22 @@ namespace ARMeilleure.Translation public IntPtr GetEntryAddressJumpTable(int entry) { - Debug.Assert(entry >= 1 && entry <= _tableEnd); + Debug.Assert(Table.EntryIsValid(entry)); return _jumpRegion.Pointer + entry * JumpTableStride; } public IntPtr GetEntryAddressDynamicTable(int entry) { - Debug.Assert(entry >= 1 && entry <= _dynTableEnd); + Debug.Assert(DynTable.EntryIsValid(entry)); return _dynamicRegion.Pointer + entry * DynamicTableStride; } + + public void Dispose() + { + _jumpRegion.Dispose(); + _dynamicRegion.Dispose(); + } } } diff --git a/ARMeilleure/Translation/Cache/JumpTableEntryAllocator.cs b/ARMeilleure/Translation/Cache/JumpTableEntryAllocator.cs new file mode 100644 index 00000000..ae2c075e --- /dev/null +++ b/ARMeilleure/Translation/Cache/JumpTableEntryAllocator.cs @@ -0,0 +1,72 @@ +using ARMeilleure.Common; +using System.Collections.Generic; +using System.Diagnostics; + +namespace ARMeilleure.Translation.Cache +{ + class JumpTableEntryAllocator + { + private readonly BitMap _bitmap; + private int _freeHint; + + public JumpTableEntryAllocator() + { + _bitmap = new BitMap(); + } + + public bool EntryIsValid(int entryIndex) + { + lock (_bitmap) + { + return _bitmap.IsSet(entryIndex); + } + } + + public void SetEntry(int entryIndex) + { + lock (_bitmap) + { + _bitmap.Set(entryIndex); + } + } + + public int AllocateEntry() + { + lock (_bitmap) + { + int entryIndex; + + if (!_bitmap.IsSet(_freeHint)) + { + entryIndex = _freeHint; + } + else + { + entryIndex = _bitmap.FindFirstUnset(); + } + + _freeHint = entryIndex + 1; + + bool wasSet = _bitmap.Set(entryIndex); + Debug.Assert(wasSet); + + return entryIndex; + } + } + + public void FreeEntry(int entryIndex) + { + lock (_bitmap) + { + _bitmap.Clear(entryIndex); + + _freeHint = entryIndex; + } + } + + public IEnumerable GetEntries() + { + return _bitmap; + } + } +} diff --git a/ARMeilleure/Translation/Compiler.cs b/ARMeilleure/Translation/Compiler.cs index 934c0db6..af718e21 100644 --- a/ARMeilleure/Translation/Compiler.cs +++ b/ARMeilleure/Translation/Compiler.cs @@ -2,13 +2,13 @@ using ARMeilleure.CodeGen; using ARMeilleure.CodeGen.X86; using ARMeilleure.Diagnostics; using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation.Cache; +using ARMeilleure.Translation.PTC; using System; using System.Runtime.InteropServices; namespace ARMeilleure.Translation { - using PTC; - static class Compiler { public static T Compile( diff --git a/ARMeilleure/Translation/Delegates.cs b/ARMeilleure/Translation/Delegates.cs index 20289d62..a45192df 100644 --- a/ARMeilleure/Translation/Delegates.cs +++ b/ARMeilleure/Translation/Delegates.cs @@ -113,6 +113,7 @@ namespace ARMeilleure.Translation SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetFpscr))); // A32 only. SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetFpsr))); SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetFunctionAddress))); + SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetFunctionAddressWithoutRejit))); SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetIndirectFunctionAddress))); SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetTpidr))); SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetTpidr32))); // A32 only. diff --git a/ARMeilleure/Translation/DirectCallStubs.cs b/ARMeilleure/Translation/DirectCallStubs.cs index 7c11fdb2..df7ca16e 100644 --- a/ARMeilleure/Translation/DirectCallStubs.cs +++ b/ARMeilleure/Translation/DirectCallStubs.cs @@ -77,7 +77,6 @@ namespace ARMeilleure.Translation Operand address = context.Load(OperandType.I64, context.Add(nativeContextPtr, Const((long)NativeContext.GetCallAddressOffset()))); - address = context.BitwiseOr(address, Const(address.Type, 1)); // Set call flag. Operand functionAddr = context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetFunctionAddress)), address); EmitCall(context, functionAddr, tailCall); diff --git a/ARMeilleure/Translation/JitCache.cs b/ARMeilleure/Translation/JitCache.cs deleted file mode 100644 index a828b4ed..00000000 --- a/ARMeilleure/Translation/JitCache.cs +++ /dev/null @@ -1,142 +0,0 @@ -using ARMeilleure.CodeGen; -using ARMeilleure.Memory; -using System; -using System.Collections.Generic; -using System.Diagnostics; -using System.Runtime.InteropServices; - -namespace ARMeilleure.Translation -{ - static class JitCache - { - private const int PageSize = 4 * 1024; - private const int PageMask = PageSize - 1; - - private const int CodeAlignment = 4; // Bytes. - private const int CacheSize = 2047 * 1024 * 1024; - - private static ReservedRegion _jitRegion; - private static int _offset; - - private static readonly List _cacheEntries = new List(); - - private static readonly object _lock = new object(); - private static bool _initialized; - - public static void Initialize(IJitMemoryAllocator allocator) - { - if (_initialized) return; - - lock (_lock) - { - if (_initialized) return; - - _jitRegion = new ReservedRegion(allocator, CacheSize); - - if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) - { - _jitRegion.ExpandIfNeeded((ulong)PageSize); - - JitUnwindWindows.InstallFunctionTableHandler(_jitRegion.Pointer, CacheSize); - - // The first page is used for the table based SEH structs. - _offset = PageSize; - } - - _initialized = true; - } - } - - public static IntPtr Map(CompiledFunction func) - { - byte[] code = func.Code; - - lock (_lock) - { - Debug.Assert(_initialized); - - int funcOffset = Allocate(code.Length); - - IntPtr funcPtr = _jitRegion.Pointer + funcOffset; - - Marshal.Copy(code, 0, funcPtr, code.Length); - - ReprotectRange(funcOffset, code.Length); - - Add(new JitCacheEntry(funcOffset, code.Length, func.UnwindInfo)); - - return funcPtr; - } - } - - private static void ReprotectRange(int offset, int size) - { - // Map pages that are already full as RX. - // Map pages that are not full yet as RWX. - // On unix, the address must be page aligned. - int endOffs = offset + size; - - int pageStart = offset & ~PageMask; - int pageEnd = endOffs & ~PageMask; - - int fullPagesSize = pageEnd - pageStart; - - if (fullPagesSize != 0) - { - _jitRegion.Block.MapAsRx((ulong)pageStart, (ulong)fullPagesSize); - } - - int remaining = endOffs - pageEnd; - - if (remaining != 0) - { - _jitRegion.Block.MapAsRwx((ulong)pageEnd, (ulong)remaining); - } - } - - private static int Allocate(int codeSize) - { - codeSize = checked(codeSize + (CodeAlignment - 1)) & ~(CodeAlignment - 1); - - int allocOffset = _offset; - - _offset += codeSize; - - if (_offset > CacheSize) - { - throw new OutOfMemoryException("JIT Cache exhausted."); - } - - _jitRegion.ExpandIfNeeded((ulong)_offset); - - return allocOffset; - } - - private static void Add(JitCacheEntry entry) - { - _cacheEntries.Add(entry); - } - - public static bool TryFind(int offset, out JitCacheEntry entry) - { - lock (_lock) - { - foreach (JitCacheEntry cacheEntry in _cacheEntries) - { - int endOffset = cacheEntry.Offset + cacheEntry.Size; - - if (offset >= cacheEntry.Offset && offset < endOffset) - { - entry = cacheEntry; - - return true; - } - } - } - - entry = default; - - return false; - } - } -} \ No newline at end of file diff --git a/ARMeilleure/Translation/JitCacheEntry.cs b/ARMeilleure/Translation/JitCacheEntry.cs deleted file mode 100644 index 87d020e6..00000000 --- a/ARMeilleure/Translation/JitCacheEntry.cs +++ /dev/null @@ -1,19 +0,0 @@ -using ARMeilleure.CodeGen.Unwinding; - -namespace ARMeilleure.Translation -{ - struct JitCacheEntry - { - public int Offset { get; } - public int Size { get; } - - public UnwindInfo UnwindInfo { get; } - - public JitCacheEntry(int offset, int size, UnwindInfo unwindInfo) - { - Offset = offset; - Size = size; - UnwindInfo = unwindInfo; - } - } -} \ No newline at end of file diff --git a/ARMeilleure/Translation/PTC/Ptc.cs b/ARMeilleure/Translation/PTC/Ptc.cs index 05dd352e..1cdaa8fe 100644 --- a/ARMeilleure/Translation/PTC/Ptc.cs +++ b/ARMeilleure/Translation/PTC/Ptc.cs @@ -2,6 +2,7 @@ using ARMeilleure.CodeGen; using ARMeilleure.CodeGen.Unwinding; using ARMeilleure.CodeGen.X86; using ARMeilleure.Memory; +using ARMeilleure.Translation.Cache; using Ryujinx.Common.Configuration; using Ryujinx.Common.Logging; using System; @@ -21,7 +22,7 @@ namespace ARMeilleure.Translation.PTC { private const string HeaderMagic = "PTChd"; - private const int InternalVersion = 1801; //! To be incremented manually for each change to the ARMeilleure project. + private const int InternalVersion = 1519; //! To be incremented manually for each change to the ARMeilleure project. private const string ActualDir = "0"; private const string BackupDir = "1"; @@ -433,9 +434,9 @@ namespace ARMeilleure.Translation.PTC UnwindInfo unwindInfo = ReadUnwindInfo(unwindInfosReader); - TranslatedFunction func = FastTranslate(code, unwindInfo, infoEntry.HighCq); + TranslatedFunction func = FastTranslate(code, infoEntry.GuestSize, unwindInfo, infoEntry.HighCq); - funcs.AddOrUpdate((ulong)infoEntry.Address, func, (key, oldFunc) => func.HighCq && !oldFunc.HighCq ? func : oldFunc); + funcs.AddOrUpdate(infoEntry.Address, func, (key, oldFunc) => func.HighCq && !oldFunc.HighCq ? func : oldFunc); } } @@ -457,7 +458,8 @@ namespace ARMeilleure.Translation.PTC { InfoEntry infoEntry = new InfoEntry(); - infoEntry.Address = infosReader.ReadInt64(); + infoEntry.Address = infosReader.ReadUInt64(); + infoEntry.GuestSize = infosReader.ReadUInt64(); infoEntry.HighCq = infosReader.ReadBoolean(); infoEntry.CodeLen = infosReader.ReadInt32(); infoEntry.RelocEntriesCount = infosReader.ReadInt32(); @@ -541,7 +543,7 @@ namespace ARMeilleure.Translation.PTC return new UnwindInfo(pushEntries, prologueSize); } - private static TranslatedFunction FastTranslate(byte[] code, UnwindInfo unwindInfo, bool highCq) + private static TranslatedFunction FastTranslate(byte[] code, ulong guestSize, UnwindInfo unwindInfo, bool highCq) { CompiledFunction cFunc = new CompiledFunction(code, unwindInfo); @@ -549,7 +551,7 @@ namespace ARMeilleure.Translation.PTC GuestFunction gFunc = Marshal.GetDelegateForFunctionPointer(codePtr); - TranslatedFunction tFunc = new TranslatedFunction(gFunc, highCq); + TranslatedFunction tFunc = new TranslatedFunction(gFunc, guestSize, highCq); return tFunc; } @@ -632,12 +634,13 @@ namespace ARMeilleure.Translation.PTC Logger.Info?.Print(LogClass.Ptc, $"{funcsCount + _translateCount} of {ProfiledFuncsCount} functions to translate - {_rejitCount} functions rejited"); } - internal static void WriteInfoCodeReloc(long address, bool highCq, PtcInfo ptcInfo) + internal static void WriteInfoCodeReloc(ulong address, ulong guestSize, bool highCq, PtcInfo ptcInfo) { lock (_lock) { // WriteInfo. - _infosWriter.Write((long)address); // InfoEntry.Address + _infosWriter.Write((ulong)address); // InfoEntry.Address + _infosWriter.Write((ulong)guestSize); // InfoEntry.GuestSize _infosWriter.Write((bool)highCq); // InfoEntry.HighCq _infosWriter.Write((int)ptcInfo.CodeStream.Length); // InfoEntry.CodeLen _infosWriter.Write((int)ptcInfo.RelocEntriesCount); // InfoEntry.RelocEntriesCount @@ -673,9 +676,10 @@ namespace ARMeilleure.Translation.PTC private struct InfoEntry { - public const int Stride = 17; // Bytes. + public const int Stride = 25; // Bytes. - public long Address; + public ulong Address; + public ulong GuestSize; public bool HighCq; public int CodeLen; public int RelocEntriesCount; diff --git a/ARMeilleure/Translation/PTC/PtcJumpTable.cs b/ARMeilleure/Translation/PTC/PtcJumpTable.cs index 0a3ae240..e4af68d6 100644 --- a/ARMeilleure/Translation/PTC/PtcJumpTable.cs +++ b/ARMeilleure/Translation/PTC/PtcJumpTable.cs @@ -1,3 +1,4 @@ +using ARMeilleure.Translation.Cache; using System; using System.Collections.Concurrent; using System.Collections.Generic; @@ -8,25 +9,53 @@ namespace ARMeilleure.Translation.PTC [Serializable] class PtcJumpTable { - private readonly List> _jumpTable; - private readonly List> _dynamicTable; + [Serializable] + private struct TableEntry + { + public int EntryIndex; + public long GuestAddress; + public TAddress HostAddress; + + public TableEntry(int entryIndex, long guestAddress, TAddress hostAddress) + { + EntryIndex = entryIndex; + GuestAddress = guestAddress; + HostAddress = hostAddress; + } + } + + private enum DirectHostAddress + { + CallStub, + TailCallStub, + Host + } + + private enum IndirectHostAddress + { + CallStub, + TailCallStub + } + + private readonly List> _jumpTable; + private readonly List> _dynamicTable; private readonly List _targets; - private readonly Dictionary> _dependants; - - public int TableEnd => _jumpTable.Count; - public int DynTableEnd => _dynamicTable.Count; + private readonly Dictionary> _dependants; + private readonly Dictionary> _owners; public List Targets => _targets; - public Dictionary> Dependants => _dependants; + public Dictionary> Dependants => _dependants; + public Dictionary> Owners => _owners; public PtcJumpTable() { - _jumpTable = new List>(); - _dynamicTable = new List>(); + _jumpTable = new List>(); + _dynamicTable = new List>(); - _targets = new List(); - _dependants = new Dictionary>(); + _targets = new List(); + _dependants = new Dictionary>(); + _owners = new Dictionary>(); } public void Initialize(JumpTable jumpTable) @@ -42,7 +71,14 @@ namespace ARMeilleure.Translation.PTC foreach (var item in jumpTable.Dependants) { - _dependants.Add(item.Key, new LinkedList(item.Value)); + _dependants.Add(item.Key, new List(item.Value)); + } + + _owners.Clear(); + + foreach (var item in jumpTable.Owners) + { + _owners.Add(item.Key, new List(item.Value)); } } @@ -53,20 +89,17 @@ namespace ARMeilleure.Translation.PTC _targets.Clear(); _dependants.Clear(); + _owners.Clear(); } public void WriteJumpTable(JumpTable jumpTable, ConcurrentDictionary funcs) { - jumpTable.ExpandIfNeededJumpTable(TableEnd); - - int entry = 0; + // Writes internal state to jump table in-memory, after PTC was loaded. foreach (var item in _jumpTable) { - entry += 1; - - long guestAddress = item.Key; - DirectHostAddress directHostAddress = item.Value; + long guestAddress = item.GuestAddress; + DirectHostAddress directHostAddress = item.HostAddress; long hostAddress; @@ -94,6 +127,11 @@ namespace ARMeilleure.Translation.PTC throw new InvalidOperationException(nameof(directHostAddress)); } + int entry = item.EntryIndex; + + jumpTable.Table.SetEntry(entry); + jumpTable.ExpandIfNeededJumpTable(entry); + IntPtr addr = jumpTable.GetEntryAddressJumpTable(entry); Marshal.WriteInt64(addr, 0, guestAddress); @@ -103,21 +141,17 @@ namespace ARMeilleure.Translation.PTC public void WriteDynamicTable(JumpTable jumpTable) { + // Writes internal state to jump table in-memory, after PTC was loaded. + if (JumpTable.DynamicTableElems > 1) { throw new NotSupportedException(); } - jumpTable.ExpandIfNeededDynamicTable(DynTableEnd); - - int entry = 0; - foreach (var item in _dynamicTable) { - entry += 1; - - long guestAddress = item.Key; - IndirectHostAddress indirectHostAddress = item.Value; + long guestAddress = item.GuestAddress; + IndirectHostAddress indirectHostAddress = item.HostAddress; long hostAddress; @@ -134,6 +168,11 @@ namespace ARMeilleure.Translation.PTC throw new InvalidOperationException(nameof(indirectHostAddress)); } + int entry = item.EntryIndex; + + jumpTable.DynTable.SetEntry(entry); + jumpTable.ExpandIfNeededDynamicTable(entry); + IntPtr addr = jumpTable.GetEntryAddressDynamicTable(entry); Marshal.WriteInt64(addr, 0, guestAddress); @@ -143,14 +182,18 @@ namespace ARMeilleure.Translation.PTC public void ReadJumpTable(JumpTable jumpTable) { + // Reads in-memory jump table state and store internally for PTC serialization. + _jumpTable.Clear(); - for (int entry = 1; entry <= jumpTable.TableEnd; entry++) + IEnumerable entries = jumpTable.Table.GetEntries(); + + foreach (int entry in entries) { IntPtr addr = jumpTable.GetEntryAddressJumpTable(entry); long guestAddress = Marshal.ReadInt64(addr, 0); - long hostAddress = Marshal.ReadInt64(addr, 8); + long hostAddress = Marshal.ReadInt64(addr, 8); DirectHostAddress directHostAddress; @@ -167,12 +210,14 @@ namespace ARMeilleure.Translation.PTC directHostAddress = DirectHostAddress.Host; } - _jumpTable.Add(new KeyValuePair(guestAddress, directHostAddress)); + _jumpTable.Add(new TableEntry(entry, guestAddress, directHostAddress)); } } public void ReadDynamicTable(JumpTable jumpTable) { + // Reads in-memory jump table state and store internally for PTC serialization. + if (JumpTable.DynamicTableElems > 1) { throw new NotSupportedException(); @@ -180,12 +225,14 @@ namespace ARMeilleure.Translation.PTC _dynamicTable.Clear(); - for (int entry = 1; entry <= jumpTable.DynTableEnd; entry++) + IEnumerable entries = jumpTable.DynTable.GetEntries(); + + foreach (int entry in entries) { IntPtr addr = jumpTable.GetEntryAddressDynamicTable(entry); long guestAddress = Marshal.ReadInt64(addr, 0); - long hostAddress = Marshal.ReadInt64(addr, 8); + long hostAddress = Marshal.ReadInt64(addr, 8); IndirectHostAddress indirectHostAddress; @@ -202,21 +249,8 @@ namespace ARMeilleure.Translation.PTC throw new InvalidOperationException($"({nameof(hostAddress)} = 0x{hostAddress:X16})"); } - _dynamicTable.Add(new KeyValuePair(guestAddress, indirectHostAddress)); + _dynamicTable.Add(new TableEntry(entry, guestAddress, indirectHostAddress)); } } - - private enum DirectHostAddress - { - CallStub, - TailCallStub, - Host - } - - private enum IndirectHostAddress - { - CallStub, - TailCallStub - } } } \ No newline at end of file diff --git a/ARMeilleure/Translation/TranslatedFunction.cs b/ARMeilleure/Translation/TranslatedFunction.cs index 36fae50a..54c050f6 100644 --- a/ARMeilleure/Translation/TranslatedFunction.cs +++ b/ARMeilleure/Translation/TranslatedFunction.cs @@ -4,23 +4,24 @@ using System.Threading; namespace ARMeilleure.Translation { - sealed class TranslatedFunction + class TranslatedFunction { private const int MinCallsForRejit = 100; private readonly GuestFunction _func; // Ensure that this delegate will not be garbage collected. - private int _callCount = 0; + private int _callCount; - public bool HighCq { get; } + public ulong GuestSize { get; } + public bool HighCq { get; } public IntPtr FuncPtr { get; } - public TranslatedFunction(GuestFunction func, bool highCq) + public TranslatedFunction(GuestFunction func, ulong guestSize, bool highCq) { _func = func; - - HighCq = highCq; - FuncPtr = Marshal.GetFunctionPointerForDelegate(func); + GuestSize = guestSize; + HighCq = highCq; + FuncPtr = Marshal.GetFunctionPointerForDelegate(func); } public ulong Execute(State.ExecutionContext context) diff --git a/ARMeilleure/Translation/Translator.cs b/ARMeilleure/Translation/Translator.cs index 80f7b962..ffcd551c 100644 --- a/ARMeilleure/Translation/Translator.cs +++ b/ARMeilleure/Translation/Translator.cs @@ -4,8 +4,13 @@ using ARMeilleure.Instructions; using ARMeilleure.IntermediateRepresentation; using ARMeilleure.Memory; using ARMeilleure.State; +using ARMeilleure.Translation.Cache; +using ARMeilleure.Translation.PTC; using System; using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; using System.Threading; using static ARMeilleure.IntermediateRepresentation.OperandHelper; @@ -13,21 +18,19 @@ using static ARMeilleure.IntermediateRepresentation.OperationHelper; namespace ARMeilleure.Translation { - using PTC; - public class Translator { - private const ulong CallFlag = InstEmitFlowHelper.CallFlag; - + private readonly IJitMemoryAllocator _allocator; private readonly IMemoryManager _memory; private readonly ConcurrentDictionary _funcs; + private readonly ConcurrentQueue> _oldFuncs; private readonly ConcurrentStack _backgroundStack; - private readonly AutoResetEvent _backgroundTranslatorEvent; + private readonly ReaderWriterLock _backgroundTranslatorLock; - private readonly JumpTable _jumpTable; + private JumpTable _jumpTable; private volatile int _threadCount; @@ -36,35 +39,36 @@ namespace ARMeilleure.Translation public Translator(IJitMemoryAllocator allocator, IMemoryManager memory) { + _allocator = allocator; _memory = memory; _funcs = new ConcurrentDictionary(); + _oldFuncs = new ConcurrentQueue>(); _backgroundStack = new ConcurrentStack(); - _backgroundTranslatorEvent = new AutoResetEvent(false); - - _jumpTable = new JumpTable(allocator); + _backgroundTranslatorLock = new ReaderWriterLock(); JitCache.Initialize(allocator); DirectCallStubs.InitializeStubs(); - - if (Ptc.State == PtcState.Enabled) - { - Ptc.LoadTranslations(_funcs, memory.PageTablePointer, _jumpTable); - } } private void TranslateStackedSubs() { while (_threadCount != 0) { + _backgroundTranslatorLock.AcquireReaderLock(Timeout.Infinite); + if (_backgroundStack.TryPop(out RejitRequest request)) { TranslatedFunction func = Translate(_memory, _jumpTable, request.Address, request.Mode, highCq: true); - _funcs.AddOrUpdate(request.Address, func, (key, oldFunc) => func); + _funcs.AddOrUpdate(request.Address, func, (key, oldFunc) => + { + EnqueueForDeletion(key, oldFunc); + return func; + }); _jumpTable.RegisterFunction(request.Address, func); @@ -72,9 +76,12 @@ namespace ARMeilleure.Translation { PtcProfiler.UpdateEntry(request.Address, request.Mode, highCq: true); } + + _backgroundTranslatorLock.ReleaseReaderLock(); } else { + _backgroundTranslatorLock.ReleaseReaderLock(); _backgroundTranslatorEvent.WaitOne(); } } @@ -88,8 +95,12 @@ namespace ARMeilleure.Translation { IsReadyForTranslation.WaitOne(); + Debug.Assert(_jumpTable == null); + _jumpTable = new JumpTable(_allocator); + if (Ptc.State == PtcState.Enabled) { + Ptc.LoadTranslations(_funcs, _memory.PageTablePointer, _jumpTable); Ptc.MakeAndSaveTranslations(_funcs, _memory, _jumpTable); } @@ -126,13 +137,18 @@ namespace ARMeilleure.Translation { address = ExecuteSingle(context, address); } - while (context.Running && (address & ~1UL) != 0); + while (context.Running && address != 0); NativeInterface.UnregisterThread(); if (Interlocked.Decrement(ref _threadCount) == 0) { _backgroundTranslatorEvent.Set(); + + ClearJitCache(); + + _jumpTable.Dispose(); + _jumpTable = null; } } @@ -149,19 +165,19 @@ namespace ARMeilleure.Translation return nextAddr; } - internal TranslatedFunction GetOrTranslate(ulong address, ExecutionMode mode) + internal TranslatedFunction GetOrTranslate(ulong address, ExecutionMode mode, bool hintRejit = false) { - // TODO: Investigate how we should handle code at unaligned addresses. - // Currently, those low bits are used to store special flags. - bool isCallTarget = (address & CallFlag) != 0; - - address &= ~CallFlag; - if (!_funcs.TryGetValue(address, out TranslatedFunction func)) { func = Translate(_memory, _jumpTable, address, mode, highCq: false); - _funcs.TryAdd(address, func); + TranslatedFunction getFunc = _funcs.GetOrAdd(address, func); + + if (getFunc != func) + { + JitCache.Unmap(func.FuncPtr); + func = getFunc; + } if (PtcProfiler.Enabled) { @@ -169,10 +185,9 @@ namespace ARMeilleure.Translation } } - if (isCallTarget && func.ShouldRejit()) + if (hintRejit && func.ShouldRejit()) { _backgroundStack.Push(new RejitRequest(address, mode)); - _backgroundTranslatorEvent.Set(); } @@ -181,7 +196,7 @@ namespace ARMeilleure.Translation internal static TranslatedFunction Translate(IMemoryManager memory, JumpTable jumpTable, ulong address, ExecutionMode mode, bool highCq) { - ArmEmitterContext context = new ArmEmitterContext(memory, jumpTable, (long)address, highCq, Aarch32Mode.User); + ArmEmitterContext context = new ArmEmitterContext(memory, jumpTable, address, highCq, Aarch32Mode.User); PrepareOperandPool(highCq); PrepareOperationPool(highCq); @@ -201,7 +216,9 @@ namespace ARMeilleure.Translation context.Branch(context.GetLabel(address)); } - ControlFlowGraph cfg = EmitAndGetCFG(context, blocks); + ControlFlowGraph cfg = EmitAndGetCFG(context, blocks, out Range funcRange); + + ulong funcSize = funcRange.End - funcRange.Start; Logger.EndPass(PassName.Translation); @@ -227,22 +244,50 @@ namespace ARMeilleure.Translation { func = Compiler.Compile(cfg, argTypes, OperandType.I64, options, ptcInfo); - Ptc.WriteInfoCodeReloc((long)address, highCq, ptcInfo); + Ptc.WriteInfoCodeReloc(address, funcSize, highCq, ptcInfo); } } ResetOperandPool(highCq); ResetOperationPool(highCq); - return new TranslatedFunction(func, highCq); + return new TranslatedFunction(func, funcSize, highCq); } - private static ControlFlowGraph EmitAndGetCFG(ArmEmitterContext context, Block[] blocks) + private struct Range { + public ulong Start { get; } + public ulong End { get; } + + public Range(ulong start, ulong end) + { + Start = start; + End = end; + } + } + + private static ControlFlowGraph EmitAndGetCFG(ArmEmitterContext context, Block[] blocks, out Range range) + { + ulong rangeStart = ulong.MaxValue; + ulong rangeEnd = 0; + for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++) { Block block = blocks[blkIndex]; + if (!block.Exit) + { + if (rangeStart > block.Address) + { + rangeStart = block.Address; + } + + if (rangeEnd < block.EndAddress) + { + rangeEnd = block.EndAddress; + } + } + context.CurrBlock = block; context.MarkLabel(context.GetLabel(block.Address)); @@ -292,6 +337,8 @@ namespace ARMeilleure.Translation } } + range = new Range(rangeStart, rangeEnd); + return context.GetControlFlowGraph(); } @@ -322,5 +369,66 @@ namespace ARMeilleure.Translation context.MarkLabel(lblExit); } + + public void InvalidateJitCacheRegion(ulong address, ulong size) + { + static bool OverlapsWith(ulong funcAddress, ulong funcSize, ulong address, ulong size) + { + return funcAddress < address + size && address < funcAddress + funcSize; + } + + // Make a copy of all overlapping functions, as we can't otherwise + // remove elements from the collection we are iterating. + // Doing that before clearing the rejit queue is fine, even + // if a function is translated after this, it would only replace + // a existing function, as rejit is only triggered on functions + // that were already executed before. + var toDelete = _funcs.Where(x => OverlapsWith(x.Key, x.Value.GuestSize, address, size)).ToArray(); + + if (toDelete.Length != 0) + { + // If rejit is running, stop it as it may be trying to rejit the functions we are + // supposed to remove. + ClearRejitQueue(); + } + + foreach (var kv in toDelete) + { + if (_funcs.TryRemove(kv.Key, out TranslatedFunction func)) + { + EnqueueForDeletion(kv.Key, func); + } + } + } + + private void EnqueueForDeletion(ulong guestAddress, TranslatedFunction func) + { + _oldFuncs.Enqueue(new KeyValuePair(guestAddress, func.FuncPtr)); + } + + private void ClearJitCache() + { + // Ensure no attempt will be made to compile new functions due to rejit. + ClearRejitQueue(); + + foreach (var kv in _funcs) + { + JitCache.Unmap(kv.Value.FuncPtr); + } + + _funcs.Clear(); + + while (_oldFuncs.TryDequeue(out var kv)) + { + JitCache.Unmap(kv.Value); + } + } + + private void ClearRejitQueue() + { + _backgroundTranslatorLock.AcquireWriterLock(Timeout.Infinite); + _backgroundStack.Clear(); + _backgroundTranslatorLock.ReleaseWriterLock(); + } } } diff --git a/Ryujinx.Cpu/CpuContext.cs b/Ryujinx.Cpu/CpuContext.cs index 275fcc68..45040586 100644 --- a/Ryujinx.Cpu/CpuContext.cs +++ b/Ryujinx.Cpu/CpuContext.cs @@ -10,10 +10,22 @@ namespace Ryujinx.Cpu public CpuContext(MemoryManager memory) { _translator = new Translator(new JitMemoryAllocator(), memory); + memory.UnmapEvent += UnmapHandler; } - public static ExecutionContext CreateExecutionContext() => new ExecutionContext(new JitMemoryAllocator()); + private void UnmapHandler(ulong address, ulong size) + { + _translator.InvalidateJitCacheRegion(address, size); + } - public void Execute(ExecutionContext context, ulong address) => _translator.Execute(context, address); + public static ExecutionContext CreateExecutionContext() + { + return new ExecutionContext(new JitMemoryAllocator()); + } + + public void Execute(ExecutionContext context, ulong address) + { + _translator.Execute(context, address); + } } } diff --git a/Ryujinx.Cpu/MemoryManager.cs b/Ryujinx.Cpu/MemoryManager.cs index 36ae9b32..5a778e64 100644 --- a/Ryujinx.Cpu/MemoryManager.cs +++ b/Ryujinx.Cpu/MemoryManager.cs @@ -40,6 +40,8 @@ namespace Ryujinx.Cpu public MemoryTracking Tracking { get; } + internal event Action UnmapEvent; + /// /// Creates a new instance of the memory manager. /// @@ -100,6 +102,14 @@ namespace Ryujinx.Cpu /// Size of the range to be unmapped public void Unmap(ulong va, ulong size) { + // If size is 0, there's nothing to unmap, just exit early. + if (size == 0) + { + return; + } + + UnmapEvent?.Invoke(va, size); + ulong remainingSize = size; ulong oVa = va; while (remainingSize != 0)