From 095db47e132a475e25d128e691ebdae101611cc9 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Sun, 8 Jul 2018 16:55:15 -0300 Subject: [PATCH] Query multiple pages at once with GetWriteWatch (#222) * Query multiple pages at once with GetWriteWatch * Allow multiple buffer types to share the same page, aways use the physical address as cache key * Remove a variable that is no longer needed --- ChocolArm64/Memory/AMemory.cs | 58 ++++++----- ChocolArm64/Memory/AMemoryWin32.cs | 29 +++++- Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs | 32 +++--- Ryujinx.HLE/Gpu/Memory/NvGpuBufferType.cs | 3 +- Ryujinx.HLE/Gpu/Memory/NvGpuVmm.cs | 6 +- Ryujinx.HLE/Gpu/Memory/NvGpuVmmCache.cs | 120 ++++++++++++---------- 6 files changed, 145 insertions(+), 103 deletions(-) diff --git a/ChocolArm64/Memory/AMemory.cs b/ChocolArm64/Memory/AMemory.cs index c02bf172..da5cf007 100644 --- a/ChocolArm64/Memory/AMemory.cs +++ b/ChocolArm64/Memory/AMemory.cs @@ -33,19 +33,25 @@ namespace ChocolArm64.Memory private byte* RamPtr; + private int HostPageSize; + public AMemory() { Manager = new AMemoryMgr(); Monitors = new Dictionary(); + IntPtr Size = (IntPtr)AMemoryMgr.RamSize + AMemoryMgr.PageSize; + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { - Ram = AMemoryWin32.Allocate((IntPtr)AMemoryMgr.RamSize + AMemoryMgr.PageSize); + Ram = AMemoryWin32.Allocate(Size); + + HostPageSize = AMemoryWin32.GetPageSize(Ram, Size); } else { - Ram = Marshal.AllocHGlobal((IntPtr)AMemoryMgr.RamSize + AMemoryMgr.PageSize); + Ram = Marshal.AllocHGlobal(Size); } RamPtr = (byte*)Ram; @@ -149,49 +155,53 @@ namespace ChocolArm64.Memory } } - public long GetHostPageSize() + public int GetHostPageSize() { - if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) - { - return AMemoryMgr.PageSize; - } - - IntPtr MemAddress = new IntPtr(RamPtr); - IntPtr MemSize = new IntPtr(AMemoryMgr.RamSize); - - long PageSize = AMemoryWin32.IsRegionModified(MemAddress, MemSize, Reset: false); - - if (PageSize < 1) - { - throw new InvalidOperationException(); - } - - return PageSize; + return HostPageSize; } - public bool IsRegionModified(long Position, long Size) + public bool[] IsRegionModified(long Position, long Size) { if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { - return true; + return null; } long EndPos = Position + Size; if ((ulong)EndPos < (ulong)Position) { - return false; + return null; } if ((ulong)EndPos > AMemoryMgr.RamSize) { - return false; + return null; } IntPtr MemAddress = new IntPtr(RamPtr + Position); IntPtr MemSize = new IntPtr(Size); - return AMemoryWin32.IsRegionModified(MemAddress, MemSize, Reset: true) != 0; + int HostPageMask = HostPageSize - 1; + + Position &= ~HostPageMask; + + Size = EndPos - Position; + + IntPtr[] Addresses = new IntPtr[(Size + HostPageMask) / HostPageSize]; + + AMemoryWin32.IsRegionModified(MemAddress, MemSize, Addresses, out int Count); + + bool[] Modified = new bool[Addresses.Length]; + + for (int Index = 0; Index < Count; Index++) + { + long VA = Addresses[Index].ToInt64() - Ram.ToInt64(); + + Modified[(VA - Position) / HostPageSize] = true; + } + + return Modified; } public sbyte ReadSByte(long Position) diff --git a/ChocolArm64/Memory/AMemoryWin32.cs b/ChocolArm64/Memory/AMemoryWin32.cs index d097dc87..387ca32c 100644 --- a/ChocolArm64/Memory/AMemoryWin32.cs +++ b/ChocolArm64/Memory/AMemoryWin32.cs @@ -49,7 +49,7 @@ namespace ChocolArm64.Memory VirtualFree(Address, IntPtr.Zero, MEM_RELEASE); } - public unsafe static long IsRegionModified(IntPtr Address, IntPtr Size, bool Reset) + public unsafe static int GetPageSize(IntPtr Address, IntPtr Size) { IntPtr[] Addresses = new IntPtr[1]; @@ -57,17 +57,36 @@ namespace ChocolArm64.Memory long Granularity; - int Flags = Reset ? WRITE_WATCH_FLAG_RESET : 0; - GetWriteWatch( - Flags, + 0, Address, Size, Addresses, &Count, &Granularity); - return Count != 0 ? Granularity : 0; + return (int)Granularity; + } + + public unsafe static void IsRegionModified( + IntPtr Address, + IntPtr Size, + IntPtr[] Addresses, + out int AddrCount) + { + long Count = Addresses.Length; + + long Granularity; + + GetWriteWatch( + WRITE_WATCH_FLAG_RESET, + Address, + Size, + Addresses, + &Count, + &Granularity); + + AddrCount = (int)Count; } } } \ No newline at end of file diff --git a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs index 10c99494..b9f9cc49 100644 --- a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs +++ b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs @@ -464,19 +464,17 @@ namespace Ryujinx.HLE.Gpu.Engines GalTextureSampler Sampler = TextureFactory.MakeSampler(Gpu, Vmm, TscPosition); - long TextureAddress = Vmm.ReadInt64(TicPosition + 4) & 0xffffffffffff; + long Key = Vmm.ReadInt64(TicPosition + 4) & 0xffffffffffff; - long Key = TextureAddress; + Key = Vmm.GetPhysicalAddress(Key); - TextureAddress = Vmm.GetPhysicalAddress(TextureAddress); - - if (IsFrameBufferPosition(TextureAddress)) + if (IsFrameBufferPosition(Key)) { //This texture is a frame buffer texture, //we shouldn't read anything from memory and bind //the frame buffer texture instead, since we're not //really writing anything to memory. - Gpu.Renderer.FrameBuffer.BindTexture(TextureAddress, TexIndex); + Gpu.Renderer.FrameBuffer.BindTexture(Key, TexIndex); } else { @@ -544,6 +542,8 @@ namespace Ryujinx.HLE.Gpu.Engines { long IndexPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.IndexArrayAddress); + long IboKey = Vmm.GetPhysicalAddress(IndexPosition); + int IndexEntryFmt = ReadRegister(NvGpuEngine3dReg.IndexArrayFormat); int IndexFirst = ReadRegister(NvGpuEngine3dReg.IndexBatchFirst); int IndexCount = ReadRegister(NvGpuEngine3dReg.IndexBatchCount); @@ -561,16 +561,16 @@ namespace Ryujinx.HLE.Gpu.Engines { int IbSize = IndexCount * IndexEntrySize; - bool IboCached = Gpu.Renderer.Rasterizer.IsIboCached(IndexPosition, (uint)IbSize); + bool IboCached = Gpu.Renderer.Rasterizer.IsIboCached(IboKey, (uint)IbSize); - if (!IboCached || Vmm.IsRegionModified(IndexPosition, (uint)IbSize, NvGpuBufferType.Index)) + if (!IboCached || Vmm.IsRegionModified(IboKey, (uint)IbSize, NvGpuBufferType.Index)) { byte[] Data = Vmm.ReadBytes(IndexPosition, (uint)IbSize); - Gpu.Renderer.Rasterizer.CreateIbo(IndexPosition, Data); + Gpu.Renderer.Rasterizer.CreateIbo(IboKey, Data); } - Gpu.Renderer.Rasterizer.SetIndexArray(IndexPosition, IbSize, IndexFormat); + Gpu.Renderer.Rasterizer.SetIndexArray(IboKey, IbSize, IndexFormat); } List[] Attribs = new List[32]; @@ -619,20 +619,22 @@ namespace Ryujinx.HLE.Gpu.Engines continue; } + long VboKey = Vmm.GetPhysicalAddress(VertexPosition); + int Stride = Control & 0xfff; long VbSize = (VertexEndPos - VertexPosition) + 1; - bool VboCached = Gpu.Renderer.Rasterizer.IsVboCached(VertexPosition, VbSize); + bool VboCached = Gpu.Renderer.Rasterizer.IsVboCached(VboKey, VbSize); - if (!VboCached || Vmm.IsRegionModified(VertexPosition, VbSize, NvGpuBufferType.Vertex)) + if (!VboCached || Vmm.IsRegionModified(VboKey, VbSize, NvGpuBufferType.Vertex)) { byte[] Data = Vmm.ReadBytes(VertexPosition, VbSize); - Gpu.Renderer.Rasterizer.CreateVbo(VertexPosition, Data); + Gpu.Renderer.Rasterizer.CreateVbo(VboKey, Data); } - Gpu.Renderer.Rasterizer.SetVertexArray(Index, Stride, VertexPosition, Attribs[Index].ToArray()); + Gpu.Renderer.Rasterizer.SetVertexArray(Index, Stride, VboKey, Attribs[Index].ToArray()); } GalPrimitiveType PrimType = (GalPrimitiveType)(PrimCtrl & 0xffff); @@ -641,7 +643,7 @@ namespace Ryujinx.HLE.Gpu.Engines { int VertexBase = ReadRegister(NvGpuEngine3dReg.VertexArrayElemBase); - Gpu.Renderer.Rasterizer.DrawElements(IndexPosition, IndexFirst, VertexBase, PrimType); + Gpu.Renderer.Rasterizer.DrawElements(IboKey, IndexFirst, VertexBase, PrimType); } else { diff --git a/Ryujinx.HLE/Gpu/Memory/NvGpuBufferType.cs b/Ryujinx.HLE/Gpu/Memory/NvGpuBufferType.cs index 7474aa33..469cd6cd 100644 --- a/Ryujinx.HLE/Gpu/Memory/NvGpuBufferType.cs +++ b/Ryujinx.HLE/Gpu/Memory/NvGpuBufferType.cs @@ -4,6 +4,7 @@ namespace Ryujinx.HLE.Gpu.Memory { Index, Vertex, - Texture + Texture, + Count } } \ No newline at end of file diff --git a/Ryujinx.HLE/Gpu/Memory/NvGpuVmm.cs b/Ryujinx.HLE/Gpu/Memory/NvGpuVmm.cs index 36f6406a..0c81dd15 100644 --- a/Ryujinx.HLE/Gpu/Memory/NvGpuVmm.cs +++ b/Ryujinx.HLE/Gpu/Memory/NvGpuVmm.cs @@ -274,11 +274,9 @@ namespace Ryujinx.HLE.Gpu.Memory PageTable[L0][L1] = TgtAddr; } - public bool IsRegionModified(long Position, long Size, NvGpuBufferType BufferType) + public bool IsRegionModified(long PA, long Size, NvGpuBufferType BufferType) { - long PA = GetPhysicalAddress(Position); - - return Cache.IsRegionModified(Memory, BufferType, Position, PA, Size); + return Cache.IsRegionModified(Memory, BufferType, PA, Size); } public byte ReadByte(long Position) diff --git a/Ryujinx.HLE/Gpu/Memory/NvGpuVmmCache.cs b/Ryujinx.HLE/Gpu/Memory/NvGpuVmmCache.cs index c7108f00..ac9bd850 100644 --- a/Ryujinx.HLE/Gpu/Memory/NvGpuVmmCache.cs +++ b/Ryujinx.HLE/Gpu/Memory/NvGpuVmmCache.cs @@ -11,43 +11,53 @@ namespace Ryujinx.HLE.Gpu.Memory private class CachedPage { - private List<(long Start, long End)> Regions; + private struct Range + { + public long Start; + public long End; + + public Range(long Start, long End) + { + this.Start = Start; + this.End = End; + } + } + + private List[] Regions; public LinkedListNode Node { get; set; } - public int Count => Regions.Count; - public int Timestamp { get; private set; } - public long PABase { get; private set; } - - public NvGpuBufferType BufferType { get; private set; } - - public CachedPage(long PABase, NvGpuBufferType BufferType) + public CachedPage() { - this.PABase = PABase; - this.BufferType = BufferType; + Regions = new List[(int)NvGpuBufferType.Count]; - Regions = new List<(long, long)>(); + for (int Index = 0; Index < Regions.Length; Index++) + { + Regions[Index] = new List(); + } } - public bool AddRange(long Start, long End) + public bool AddRange(long Start, long End, NvGpuBufferType BufferType) { - for (int Index = 0; Index < Regions.Count; Index++) - { - (long RgStart, long RgEnd) = Regions[Index]; + List BtRegions = Regions[(int)BufferType]; - if (Start >= RgStart && End <= RgEnd) + for (int Index = 0; Index < BtRegions.Count; Index++) + { + Range Rg = BtRegions[Index]; + + if (Start >= Rg.Start && End <= Rg.End) { return false; } - if (Start <= RgEnd && RgStart <= End) + if (Start <= Rg.End && Rg.Start <= End) { - long MinStart = Math.Min(RgStart, Start); - long MaxEnd = Math.Max(RgEnd, End); + long MinStart = Math.Min(Rg.Start, Start); + long MaxEnd = Math.Max(Rg.End, End); - Regions[Index] = (MinStart, MaxEnd); + BtRegions[Index] = new Range(MinStart, MaxEnd); Timestamp = Environment.TickCount; @@ -55,12 +65,24 @@ namespace Ryujinx.HLE.Gpu.Memory } } - Regions.Add((Start, End)); + BtRegions.Add(new Range(Start, End)); Timestamp = Environment.TickCount; return true; } + + public int GetTotalCount() + { + int Count = 0; + + for (int Index = 0; Index < Regions.Length; Index++) + { + Count += Regions[Index].Count; + } + + return Count; + } } private Dictionary Cache; @@ -76,71 +98,61 @@ namespace Ryujinx.HLE.Gpu.Memory SortedCache = new LinkedList(); } - public bool IsRegionModified( - AMemory Memory, - NvGpuBufferType BufferType, - long VA, - long PA, - long Size) + public bool IsRegionModified(AMemory Memory, NvGpuBufferType BufferType, long PA, long Size) { + bool[] Modified = Memory.IsRegionModified(PA, Size); + + if (Modified == null) + { + return true; + } + ClearCachedPagesIfNeeded(); long PageSize = Memory.GetHostPageSize(); long Mask = PageSize - 1; - long VAEnd = VA + Size; long PAEnd = PA + Size; bool RegMod = false; - while (VA < VAEnd) - { - long Key = VA & ~Mask; - long PABase = PA & ~Mask; + int Index = 0; + + while (PA < PAEnd) + { + long Key = PA & ~Mask; - long VAPgEnd = Math.Min((VA + PageSize) & ~Mask, VAEnd); long PAPgEnd = Math.Min((PA + PageSize) & ~Mask, PAEnd); bool IsCached = Cache.TryGetValue(Key, out CachedPage Cp); - bool PgReset = false; - - if (!IsCached) + if (IsCached) { - Cp = new CachedPage(PABase, BufferType); + CpCount -= Cp.GetTotalCount(); - Cache.Add(Key, Cp); + SortedCache.Remove(Cp.Node); } else { - CpCount -= Cp.Count; + Cp = new CachedPage(); - SortedCache.Remove(Cp.Node); - - if (Cp.PABase != PABase || - Cp.BufferType != BufferType) - { - PgReset = true; - } + Cache.Add(Key, Cp); } - PgReset |= Memory.IsRegionModified(PA, PAPgEnd - PA) && IsCached; - - if (PgReset) + if (Modified[Index++] && IsCached) { - Cp = new CachedPage(PABase, BufferType); + Cp = new CachedPage(); Cache[Key] = Cp; } Cp.Node = SortedCache.AddLast(Key); - RegMod |= Cp.AddRange(VA, VAPgEnd); + RegMod |= Cp.AddRange(PA, PAPgEnd, BufferType); - CpCount += Cp.Count; + CpCount += Cp.GetTotalCount(); - VA = VAPgEnd; PA = PAPgEnd; } @@ -169,7 +181,7 @@ namespace Ryujinx.HLE.Gpu.Memory Cache.Remove(Key); - CpCount -= Cp.Count; + CpCount -= Cp.GetTotalCount(); TimeDelta = RingDelta(Cp.Timestamp, Timestamp); }