diff --git a/src/Ryujinx.Ava/AppHost.cs b/src/Ryujinx.Ava/AppHost.cs
index 349b64b4..2502fa41 100644
--- a/src/Ryujinx.Ava/AppHost.cs
+++ b/src/Ryujinx.Ava/AppHost.cs
@@ -92,6 +92,8 @@ namespace Ryujinx.Ava
         private bool _isActive;
         private bool _renderingStarted;
 
+        private ManualResetEvent _gpuDoneEvent;
+
         private IRenderer                        _renderer;
         private readonly Thread                  _renderingThread;
         private readonly CancellationTokenSource _gpuCancellationTokenSource;
@@ -183,6 +185,7 @@ namespace Ryujinx.Ava
             ConfigurationState.Instance.Multiplayer.LanInterfaceId.Event   += UpdateLanInterfaceIdState;
 
             _gpuCancellationTokenSource = new CancellationTokenSource();
+            _gpuDoneEvent = new ManualResetEvent(false);
         }
 
         private void TopLevel_PointerEnterOrMoved(object sender, PointerEventArgs e)
@@ -423,10 +426,10 @@ namespace Ryujinx.Ava
 
             _isActive = false;
 
-            if (_renderingThread.IsAlive)
-            {
-                _renderingThread.Join();
-            }
+            // NOTE: The render loop is allowed to stay alive until the renderer itself is disposed, as it may handle resource dispose.
+            // We only need to wait for all commands submitted during the main gpu loop to be processed.
+            _gpuDoneEvent.WaitOne();
+            _gpuDoneEvent.Dispose();
 
             DisplaySleep.Restore();
 
@@ -917,6 +920,14 @@ namespace Ryujinx.Ava
                         UpdateStatus();
                     }
                 }
+
+                // Make sure all commands in the run loop are fully executed before leaving the loop.
+                if (Device.Gpu.Renderer is ThreadedRenderer threaded)
+                {
+                    threaded.FlushThreadedCommands();
+                }
+
+                _gpuDoneEvent.Set();
             });
 
             (_rendererHost.EmbeddedWindow as EmbeddedWindowOpenGL)?.MakeCurrent(null);
diff --git a/src/Ryujinx.Graphics.GAL/IRenderer.cs b/src/Ryujinx.Graphics.GAL/IRenderer.cs
index d36dd26b..b668d56e 100644
--- a/src/Ryujinx.Graphics.GAL/IRenderer.cs
+++ b/src/Ryujinx.Graphics.GAL/IRenderer.cs
@@ -1,5 +1,6 @@
 using Ryujinx.Common.Configuration;
 using System;
+using System.Threading;
 
 namespace Ryujinx.Graphics.GAL
 {
@@ -52,7 +53,7 @@ namespace Ryujinx.Graphics.GAL
 
         void ResetCounter(CounterType type);
 
-        void RunLoop(Action gpuLoop)
+        void RunLoop(ThreadStart gpuLoop)
         {
             gpuLoop();
         }
diff --git a/src/Ryujinx.Graphics.GAL/Multithreading/ThreadedRenderer.cs b/src/Ryujinx.Graphics.GAL/Multithreading/ThreadedRenderer.cs
index 3e179621..e6169d89 100644
--- a/src/Ryujinx.Graphics.GAL/Multithreading/ThreadedRenderer.cs
+++ b/src/Ryujinx.Graphics.GAL/Multithreading/ThreadedRenderer.cs
@@ -30,7 +30,6 @@ namespace Ryujinx.Graphics.GAL.Multithreading
         private IRenderer _baseRenderer;
         private Thread _gpuThread;
         private Thread _backendThread;
-        private bool _disposed;
         private bool _running;
 
         private AutoResetEvent _frameComplete = new AutoResetEvent(true);
@@ -98,19 +97,17 @@ namespace Ryujinx.Graphics.GAL.Multithreading
             _refQueue = new object[MaxRefsPerCommand * QueueCount];
         }
 
-        public void RunLoop(Action gpuLoop)
+        public void RunLoop(ThreadStart gpuLoop)
         {
             _running = true;
 
             _backendThread = Thread.CurrentThread;
 
-            _gpuThread = new Thread(() => {
-                gpuLoop();
-                _running = false;
-                _galWorkAvailable.Set();
-            });
+            _gpuThread = new Thread(gpuLoop)
+            {
+                Name = "GPU.MainThread"
+            };
 
-            _gpuThread.Name = "GPU.MainThread";
             _gpuThread.Start();
 
             RenderLoop();
@@ -120,7 +117,7 @@ namespace Ryujinx.Graphics.GAL.Multithreading
         {
             // Power through the render queue until the Gpu thread work is done.
 
-            while (_running && !_disposed)
+            while (_running)
             {
                 _galWorkAvailable.Wait();
                 _galWorkAvailable.Reset();
@@ -488,12 +485,23 @@ namespace Ryujinx.Graphics.GAL.Multithreading
             return _baseRenderer.PrepareHostMapping(address, size);
         }
 
+        public void FlushThreadedCommands()
+        {
+            SpinWait wait = new();
+
+            while (Volatile.Read(ref _commandCount) > 0)
+            {
+                wait.SpinOnce();
+            }
+        }
+
         public void Dispose()
         {
             // Dispose must happen from the render thread, after all commands have completed.
 
             // Stop the GPU thread.
-            _disposed = true;
+            _running = false;
+            _galWorkAvailable.Set();
 
             if (_gpuThread != null && _gpuThread.IsAlive)
             {
diff --git a/src/Ryujinx.Graphics.Gpu/GpuContext.cs b/src/Ryujinx.Graphics.Gpu/GpuContext.cs
index 0fe6a28f..233227b4 100644
--- a/src/Ryujinx.Graphics.Gpu/GpuContext.cs
+++ b/src/Ryujinx.Graphics.Gpu/GpuContext.cs
@@ -390,7 +390,6 @@ namespace Ryujinx.Graphics.Gpu
         /// </summary>
         public void Dispose()
         {
-            Renderer.Dispose();
             GPFifo.Dispose();
             HostInitalized.Dispose();
 
@@ -403,6 +402,8 @@ namespace Ryujinx.Graphics.Gpu
             PhysicalMemoryRegistry.Clear();
 
             RunDeferredActions();
+
+            Renderer.Dispose();
         }
     }
 }
\ No newline at end of file
diff --git a/src/Ryujinx.Headless.SDL2/WindowBase.cs b/src/Ryujinx.Headless.SDL2/WindowBase.cs
index 7c310153..d163da22 100644
--- a/src/Ryujinx.Headless.SDL2/WindowBase.cs
+++ b/src/Ryujinx.Headless.SDL2/WindowBase.cs
@@ -62,6 +62,7 @@ namespace Ryujinx.Headless.SDL2
         private readonly long _ticksPerFrame;
         private readonly CancellationTokenSource _gpuCancellationTokenSource;
         private readonly ManualResetEvent _exitEvent;
+        private readonly ManualResetEvent _gpuDoneEvent;
 
         private long _ticks;
         private bool _isActive;
@@ -91,6 +92,7 @@ namespace Ryujinx.Headless.SDL2
             _ticksPerFrame = Stopwatch.Frequency / TargetFps;
             _gpuCancellationTokenSource = new CancellationTokenSource();
             _exitEvent = new ManualResetEvent(false);
+            _gpuDoneEvent = new ManualResetEvent(false);
             _aspectRatio = aspectRatio;
             _enableMouse = enableMouse;
             HostUiTheme = new HeadlessHostUiTheme();
@@ -275,6 +277,14 @@ namespace Ryujinx.Headless.SDL2
                         _ticks = Math.Min(_ticks - _ticksPerFrame, _ticksPerFrame);
                     }
                 }
+
+                // Make sure all commands in the run loop are fully executed before leaving the loop.
+                if (Device.Gpu.Renderer is ThreadedRenderer threaded)
+                {
+                    threaded.FlushThreadedCommands();
+                }
+
+                _gpuDoneEvent.Set();
             });
 
             FinalizeWindowRenderer();
@@ -404,7 +414,10 @@ namespace Ryujinx.Headless.SDL2
 
             MainLoop();
 
-            renderLoopThread.Join();
+            // NOTE: The render loop is allowed to stay alive until the renderer itself is disposed, as it may handle resource dispose.
+            // We only need to wait for all commands submitted during the main gpu loop to be processed.
+            _gpuDoneEvent.WaitOne();
+            _gpuDoneEvent.Dispose();
             nvStutterWorkaround?.Join();
 
             Exit();
diff --git a/src/Ryujinx/Ui/RendererWidgetBase.cs b/src/Ryujinx/Ui/RendererWidgetBase.cs
index e2cba777..87ff7f6c 100644
--- a/src/Ryujinx/Ui/RendererWidgetBase.cs
+++ b/src/Ryujinx/Ui/RendererWidgetBase.cs
@@ -65,6 +65,7 @@ namespace Ryujinx.Ui
         private KeyboardHotkeyState _prevHotkeyState;
 
         private readonly ManualResetEvent _exitEvent;
+        private readonly ManualResetEvent _gpuDoneEvent;
 
         private readonly CancellationTokenSource _gpuCancellationTokenSource;
 
@@ -110,6 +111,7 @@ namespace Ryujinx.Ui
                           | EventMask.KeyReleaseMask));
 
             _exitEvent = new ManualResetEvent(false);
+            _gpuDoneEvent = new ManualResetEvent(false);
 
             _gpuCancellationTokenSource = new CancellationTokenSource();
 
@@ -499,6 +501,14 @@ namespace Ryujinx.Ui
                         _ticks = Math.Min(_ticks - _ticksPerFrame, _ticksPerFrame);
                     }
                 }
+
+                // Make sure all commands in the run loop are fully executed before leaving the loop.
+                if (Device.Gpu.Renderer is ThreadedRenderer threaded)
+                {
+                    threaded.FlushThreadedCommands();
+                }
+
+                _gpuDoneEvent.Set();
             });
         }
 
@@ -542,7 +552,10 @@ namespace Ryujinx.Ui
 
             MainLoop();
 
-            renderLoopThread.Join();
+            // NOTE: The render loop is allowed to stay alive until the renderer itself is disposed, as it may handle resource dispose.
+            // We only need to wait for all commands submitted during the main gpu loop to be processed.
+            _gpuDoneEvent.WaitOne();
+            _gpuDoneEvent.Dispose();
             nvStutterWorkaround?.Join();
 
             Exit();