From 3208173620a0003d09d8e756729ca905ff14c47f Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sun, 29 Jul 2018 01:39:15 -0300
Subject: [PATCH] Cache changes (#302)

* Skip repeated cache tests between same sync

* Skip some checks for regions where just one resource is resident

* Dehardcode residency page size

* Some cleanup
---
 ChocolArm64/Memory/AMemory.cs            | 10 +--
 Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs | 34 ++++++++-
 Ryujinx.HLE/Gpu/Memory/NvGpuVmmCache.cs  | 93 +++++++++++++++++++++++-
 3 files changed, 127 insertions(+), 10 deletions(-)

diff --git a/ChocolArm64/Memory/AMemory.cs b/ChocolArm64/Memory/AMemory.cs
index 054277b29..e969cca5f 100644
--- a/ChocolArm64/Memory/AMemory.cs
+++ b/ChocolArm64/Memory/AMemory.cs
@@ -160,23 +160,23 @@ namespace ChocolArm64.Memory
             return HostPageSize;
         }
 
-        public bool[] IsRegionModified(long Position, long Size)
+        public (bool[], long) IsRegionModified(long Position, long Size)
         {
             if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
             {
-                return null;
+                return (null, 0);
             }
 
             long EndPos = Position + Size;
 
             if ((ulong)EndPos < (ulong)Position)
             {
-                return null;
+                return (null, 0);
             }
 
             if ((ulong)EndPos > AMemoryMgr.RamSize)
             {
-                return null;
+                return (null, 0);
             }
 
             IntPtr MemAddress = new IntPtr(RamPtr + Position);
@@ -201,7 +201,7 @@ namespace ChocolArm64.Memory
                 Modified[(VA - Position) / HostPageSize] = true;
             }
 
-            return Modified;
+            return (Modified, Count);
         }
 
         public IntPtr GetHostAddress(long Position, long Size)
diff --git a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs
index c3e7a77fc..3dd0e98e6 100644
--- a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs
+++ b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs
@@ -25,6 +25,8 @@ namespace Ryujinx.HLE.Gpu.Engines
 
         private HashSet<long> FrameBuffers;
 
+        private List<long>[] UploadedKeys;
+
         public NvGpuEngine3d(NvGpu Gpu)
         {
             this.Gpu = Gpu;
@@ -57,6 +59,13 @@ namespace Ryujinx.HLE.Gpu.Engines
             }
 
             FrameBuffers = new HashSet<long>();
+
+            UploadedKeys = new List<long>[(int)NvGpuBufferType.Count];
+
+            for (int i = 0; i < UploadedKeys.Length; i++)
+            {
+                UploadedKeys[i] = new List<long>();
+            }
         }
 
         public void CallMethod(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
@@ -516,7 +525,7 @@ namespace Ryujinx.HLE.Gpu.Engines
 
                 if (Gpu.Renderer.Texture.TryGetCachedTexture(Key, Size, out GalTexture Texture))
                 {
-                    if (NewTexture.Equals(Texture) && !Vmm.IsRegionModified(Key, Size, NvGpuBufferType.Texture))
+                    if (NewTexture.Equals(Texture) && !QueryKeyUpload(Vmm, Key, Size, NvGpuBufferType.Texture))
                     {
                         Gpu.Renderer.Texture.Bind(Key, TexIndex);
 
@@ -593,7 +602,7 @@ namespace Ryujinx.HLE.Gpu.Engines
 
                 bool IboCached = Gpu.Renderer.Rasterizer.IsIboCached(IboKey, (uint)IbSize);
 
-                if (!IboCached || Vmm.IsRegionModified(IboKey, (uint)IbSize, NvGpuBufferType.Index))
+                if (!IboCached || QueryKeyUpload(Vmm, IboKey, (uint)IbSize, NvGpuBufferType.Index))
                 {
                     IntPtr DataAddress = Vmm.GetHostAddress(IndexPosition, IbSize);
 
@@ -657,7 +666,7 @@ namespace Ryujinx.HLE.Gpu.Engines
 
                 bool VboCached = Gpu.Renderer.Rasterizer.IsVboCached(VboKey, VbSize);
 
-                if (!VboCached || Vmm.IsRegionModified(VboKey, VbSize, NvGpuBufferType.Vertex))
+                if (!VboCached || QueryKeyUpload(Vmm, VboKey, VbSize, NvGpuBufferType.Vertex))
                 {
                     IntPtr DataAddress = Vmm.GetHostAddress(VertexPosition, VbSize);
 
@@ -692,6 +701,11 @@ namespace Ryujinx.HLE.Gpu.Engines
 
             if (Mode == 0)
             {
+                foreach (List<long> Uploaded in UploadedKeys)
+                {
+                    Uploaded.Clear();
+                }
+
                 //Write mode.
                 Vmm.WriteInt32(Position, Seq);
             }
@@ -774,5 +788,19 @@ namespace Ryujinx.HLE.Gpu.Engines
         {
             return FrameBuffers.Contains(Position);
         }
+
+        private bool QueryKeyUpload(NvGpuVmm Vmm, long Key, long Size, NvGpuBufferType Type)
+        {
+            List<long> Uploaded = UploadedKeys[(int)Type];
+
+            if (Uploaded.Contains(Key))
+            {
+                return false;
+            }
+
+            Uploaded.Add(Key);
+
+            return Vmm.IsRegionModified(Key, Size, Type);
+        }
     }
 }
\ No newline at end of file
diff --git a/Ryujinx.HLE/Gpu/Memory/NvGpuVmmCache.cs b/Ryujinx.HLE/Gpu/Memory/NvGpuVmmCache.cs
index ac9bd850e..3c2560443 100644
--- a/Ryujinx.HLE/Gpu/Memory/NvGpuVmmCache.cs
+++ b/Ryujinx.HLE/Gpu/Memory/NvGpuVmmCache.cs
@@ -19,12 +19,14 @@ namespace Ryujinx.HLE.Gpu.Memory
                 public Range(long Start, long End)
                 {
                     this.Start = Start;
-                    this.End   = End;
+                    this.End = End;
                 }
             }
 
             private List<Range>[] Regions;
 
+            private HashSet<long> ResidencyKeys;
+
             public LinkedListNode<long> Node { get; set; }
 
             public int Timestamp { get; private set; }
@@ -37,6 +39,27 @@ namespace Ryujinx.HLE.Gpu.Memory
                 {
                     Regions[Index] = new List<Range>();
                 }
+
+                ResidencyKeys = new HashSet<long>();
+            }
+
+            public void AddResidency(long Key)
+            {
+                ResidencyKeys.Add(Key);
+            }
+
+            public void RemoveResidency(HashSet<long>[] Residency, long PageSize)
+            {
+                for (int i = 0; i < (int)NvGpuBufferType.Count; i++)
+                {
+                    foreach (Range Region in Regions[i])
+                    {
+                        foreach (long Key in ResidencyKeys)
+                        {
+                            Residency[Region.Start / PageSize].Remove(Key);
+                        }
+                    }
+                }
             }
 
             public bool AddRange(long Start, long End, NvGpuBufferType BufferType)
@@ -89,6 +112,10 @@ namespace Ryujinx.HLE.Gpu.Memory
 
         private LinkedList<long> SortedCache;
 
+        private HashSet<long>[] Residency;
+
+        private long ResidencyPageSize;
+
         private int CpCount;
 
         public NvGpuVmmCache()
@@ -100,7 +127,7 @@ namespace Ryujinx.HLE.Gpu.Memory
 
         public bool IsRegionModified(AMemory Memory, NvGpuBufferType BufferType, long PA, long Size)
         {
-            bool[] Modified = Memory.IsRegionModified(PA, Size);
+            (bool[] Modified, long ModifiedCount) = Memory.IsRegionModified(PA, Size);
 
             if (Modified == null)
             {
@@ -111,8 +138,19 @@ namespace Ryujinx.HLE.Gpu.Memory
 
             long PageSize = Memory.GetHostPageSize();
 
+            EnsureResidencyInitialized(PageSize);
+
+            bool HasResidents = AddResidency(PA, Size);
+
+            if (!HasResidents && ModifiedCount == 0)
+            {
+                return false;
+            }
+
             long Mask = PageSize - 1;
 
+            long ResidencyKey = PA;
+
             long PAEnd = PA + Size;
 
             bool RegMod = false;
@@ -147,6 +185,8 @@ namespace Ryujinx.HLE.Gpu.Memory
                     Cache[Key] = Cp;
                 }
 
+                Cp.AddResidency(ResidencyKey);
+
                 Cp.Node = SortedCache.AddLast(Key);
 
                 RegMod |= Cp.AddRange(PA, PAPgEnd, BufferType);
@@ -159,6 +199,53 @@ namespace Ryujinx.HLE.Gpu.Memory
             return RegMod;
         }
 
+        private bool AddResidency(long PA, long Size)
+        {
+            long PageSize = ResidencyPageSize;
+
+            long Mask = PageSize - 1;
+
+            long Key = PA;
+
+            bool ResidentFound = false;
+
+            for (long Cursor = PA & ~Mask; Cursor < ((PA + Size + PageSize - 1) & ~Mask); Cursor += PageSize)
+            {
+                long PageIndex = Cursor / PageSize;
+
+                Residency[PageIndex].Add(Key);
+
+                if (Residency[PageIndex].Count > 1)
+                {
+                    ResidentFound = true;
+                }
+            }
+
+            return ResidentFound;
+        }
+
+        private void EnsureResidencyInitialized(long PageSize)
+        {
+            if (Residency == null)
+            {
+                Residency = new HashSet<long>[AMemoryMgr.RamSize / PageSize];
+
+                for (int i = 0; i < Residency.Length; i++)
+                {
+                    Residency[i] = new HashSet<long>();
+                }
+
+                ResidencyPageSize = PageSize;
+            }
+            else
+            {
+                if (ResidencyPageSize != PageSize)
+                {
+                    throw new InvalidOperationException("Tried to change residency page size");
+                }
+            }
+        }
+
         private void ClearCachedPagesIfNeeded()
         {
             if (CpCount <= MaxCpCount)
@@ -179,6 +266,8 @@ namespace Ryujinx.HLE.Gpu.Memory
 
                 CachedPage Cp = Cache[Key];
 
+                Cp.RemoveResidency(Residency, ResidencyPageSize);
+
                 Cache.Remove(Key);
 
                 CpCount -= Cp.GetTotalCount();