mirror of
https://github.com/GreemDev/Ryujinx.git
synced 2025-01-03 21:02:00 +00:00
ec3e848d79
* Initial Implementation
About as fast as nvidia GL multithreading, can be improved with faster command queuing.
* Struct based command list
Speeds up a bit. Still a lot of time lost to resource copy.
* Do shader init while the render thread is active.
* Introduce circular span pool V1
Ideally should be able to use structs instead of references for storing these spans on commands. Will try that next.
* Refactor SpanRef some more
Use a struct to represent SpanRef, rather than a reference.
* Flush buffers on background thread
* Use a span for UpdateRenderScale.
Much faster than copying the array.
* Calculate command size using reflection
* WIP parallel shaders
* Some minor optimisation
* Only 2 max refs per command now.
The command with 3 refs is gone. 😌
* Don't cast on the GPU side
* Remove redundant casts, force sync on window present
* Fix Shader Cache
* Fix host shader save.
* Fixup to work with new renderer stuff
* Make command Run static, use array of delegates as lookup
Profile says this takes less time than the previous way.
* Bring up to date
* Add settings toggle. Fix Muiltithreading Off mode.
* Fix warning.
* Release tracking lock for flushes
* Fix Conditional Render fast path with threaded gal
* Make handle iteration safe when releasing the lock
This is mostly temporary.
* Attempt to set backend threading on driver
Only really works on nvidia before launching a game.
* Fix race condition with BufferModifiedRangeList, exceptions in tracking actions
* Update buffer set commands
* Some cleanup
* Only use stutter workaround when using opengl renderer non-threaded
* Add host-conditional reservation of counter events
There has always been the possibility that conditional rendering could use a query object just as it is disposed by the counter queue. This change makes it so that when the host decides to use host conditional rendering, the query object is reserved so that it cannot be deleted. Counter events can optionally start reserved, as the threaded implementation can reserve them before the backend creates them, and there would otherwise be a short amount of time where the counter queue could dispose the event before a call to reserve it could be made.
* Address Feedback
* Make counter flush tracked again.
Hopefully does not cause any issues this time.
* Wait for FlushTo on the main queue thread.
Currently assumes only one thread will want to FlushTo (in this case, the GPU thread)
* Add SDL2 headless integration
* Add HLE macro commands.
Co-authored-by: Mary <mary@mary.zone>
234 lines
No EOL
8.7 KiB
C#
234 lines
No EOL
8.7 KiB
C#
using Ryujinx.Graphics.GAL;
|
|
using Ryujinx.Graphics.Gpu.Image;
|
|
using Ryujinx.Graphics.Texture;
|
|
using Ryujinx.Memory.Range;
|
|
using System;
|
|
using System.Collections.Concurrent;
|
|
using System.Threading;
|
|
|
|
namespace Ryujinx.Graphics.Gpu
|
|
{
|
|
using Texture = Image.Texture;
|
|
|
|
/// <summary>
|
|
/// GPU image presentation window.
|
|
/// </summary>
|
|
public class Window
|
|
{
|
|
private readonly GpuContext _context;
|
|
|
|
/// <summary>
|
|
/// Texture presented on the window.
|
|
/// </summary>
|
|
private struct PresentationTexture
|
|
{
|
|
/// <summary>
|
|
/// Texture cache where the texture might be located.
|
|
/// </summary>
|
|
public TextureCache Cache { get; }
|
|
|
|
/// <summary>
|
|
/// Texture information.
|
|
/// </summary>
|
|
public TextureInfo Info { get; }
|
|
|
|
/// <summary>
|
|
/// Physical memory locations where the texture data is located.
|
|
/// </summary>
|
|
public MultiRange Range { get; }
|
|
|
|
/// <summary>
|
|
/// Texture crop region.
|
|
/// </summary>
|
|
public ImageCrop Crop { get; }
|
|
|
|
/// <summary>
|
|
/// Texture acquire callback.
|
|
/// </summary>
|
|
public Action<GpuContext, object> AcquireCallback { get; }
|
|
|
|
/// <summary>
|
|
/// Texture release callback.
|
|
/// </summary>
|
|
public Action<object> ReleaseCallback { get; }
|
|
|
|
/// <summary>
|
|
/// User defined object, passed to the various callbacks.
|
|
/// </summary>
|
|
public object UserObj { get; }
|
|
|
|
/// <summary>
|
|
/// Creates a new instance of the presentation texture.
|
|
/// </summary>
|
|
/// <param name="cache">Texture cache used to look for the texture to be presented</param>
|
|
/// <param name="info">Information of the texture to be presented</param>
|
|
/// <param name="range">Physical memory locations where the texture data is located</param>
|
|
/// <param name="crop">Texture crop region</param>
|
|
/// <param name="acquireCallback">Texture acquire callback</param>
|
|
/// <param name="releaseCallback">Texture release callback</param>
|
|
/// <param name="userObj">User defined object passed to the release callback, can be used to identify the texture</param>
|
|
public PresentationTexture(
|
|
TextureCache cache,
|
|
TextureInfo info,
|
|
MultiRange range,
|
|
ImageCrop crop,
|
|
Action<GpuContext, object> acquireCallback,
|
|
Action<object> releaseCallback,
|
|
object userObj)
|
|
{
|
|
Cache = cache;
|
|
Info = info;
|
|
Range = range;
|
|
Crop = crop;
|
|
AcquireCallback = acquireCallback;
|
|
ReleaseCallback = releaseCallback;
|
|
UserObj = userObj;
|
|
}
|
|
}
|
|
|
|
private readonly ConcurrentQueue<PresentationTexture> _frameQueue;
|
|
|
|
private int _framesAvailable;
|
|
|
|
/// <summary>
|
|
/// Creates a new instance of the GPU presentation window.
|
|
/// </summary>
|
|
/// <param name="context">GPU emulation context</param>
|
|
public Window(GpuContext context)
|
|
{
|
|
_context = context;
|
|
|
|
_frameQueue = new ConcurrentQueue<PresentationTexture>();
|
|
}
|
|
|
|
/// <summary>
|
|
/// Enqueues a frame for presentation.
|
|
/// This method is thread safe and can be called from any thread.
|
|
/// When the texture is presented and not needed anymore, the release callback is called.
|
|
/// It's an error to modify the texture after calling this method, before the release callback is called.
|
|
/// </summary>
|
|
/// <param name="pid">Process ID of the process that owns the texture pointed to by <paramref name="address"/></param>
|
|
/// <param name="address">CPU virtual address of the texture data</param>
|
|
/// <param name="width">Texture width</param>
|
|
/// <param name="height">Texture height</param>
|
|
/// <param name="stride">Texture stride for linear texture, should be zero otherwise</param>
|
|
/// <param name="isLinear">Indicates if the texture is linear, normally false</param>
|
|
/// <param name="gobBlocksInY">GOB blocks in the Y direction, for block linear textures</param>
|
|
/// <param name="format">Texture format</param>
|
|
/// <param name="bytesPerPixel">Texture format bytes per pixel (must match the format)</param>
|
|
/// <param name="crop">Texture crop region</param>
|
|
/// <param name="acquireCallback">Texture acquire callback</param>
|
|
/// <param name="releaseCallback">Texture release callback</param>
|
|
/// <param name="userObj">User defined object passed to the release callback</param>
|
|
/// <exception cref="ArgumentException">Thrown when <paramref name="pid"/> is invalid</exception>
|
|
public void EnqueueFrameThreadSafe(
|
|
long pid,
|
|
ulong address,
|
|
int width,
|
|
int height,
|
|
int stride,
|
|
bool isLinear,
|
|
int gobBlocksInY,
|
|
Format format,
|
|
int bytesPerPixel,
|
|
ImageCrop crop,
|
|
Action<GpuContext, object> acquireCallback,
|
|
Action<object> releaseCallback,
|
|
object userObj)
|
|
{
|
|
if (!_context.PhysicalMemoryRegistry.TryGetValue(pid, out var physicalMemory))
|
|
{
|
|
throw new ArgumentException("The PID is invalid or the process was not registered", nameof(pid));
|
|
}
|
|
|
|
FormatInfo formatInfo = new FormatInfo(format, 1, 1, bytesPerPixel, 4);
|
|
|
|
TextureInfo info = new TextureInfo(
|
|
0UL,
|
|
width,
|
|
height,
|
|
1,
|
|
1,
|
|
1,
|
|
1,
|
|
stride,
|
|
isLinear,
|
|
gobBlocksInY,
|
|
1,
|
|
1,
|
|
Target.Texture2D,
|
|
formatInfo);
|
|
|
|
int size = SizeCalculator.GetBlockLinearTextureSize(
|
|
width,
|
|
height,
|
|
1,
|
|
1,
|
|
1,
|
|
1,
|
|
1,
|
|
bytesPerPixel,
|
|
gobBlocksInY,
|
|
1,
|
|
1).TotalSize;
|
|
|
|
MultiRange range = new MultiRange(address, (ulong)size);
|
|
|
|
_frameQueue.Enqueue(new PresentationTexture(
|
|
physicalMemory.TextureCache,
|
|
info,
|
|
range,
|
|
crop,
|
|
acquireCallback,
|
|
releaseCallback,
|
|
userObj));
|
|
}
|
|
|
|
/// <summary>
|
|
/// Presents a texture on the queue.
|
|
/// If the queue is empty, then no texture is presented.
|
|
/// </summary>
|
|
/// <param name="swapBuffersCallback">Callback method to call when a new texture should be presented on the screen</param>
|
|
public void Present(Action swapBuffersCallback)
|
|
{
|
|
_context.AdvanceSequence();
|
|
|
|
if (_frameQueue.TryDequeue(out PresentationTexture pt))
|
|
{
|
|
pt.AcquireCallback(_context, pt.UserObj);
|
|
|
|
Texture texture = pt.Cache.FindOrCreateTexture(null, TextureSearchFlags.WithUpscale, pt.Info, 0, null, pt.Range);
|
|
|
|
texture.SynchronizeMemory();
|
|
|
|
_context.Renderer.Window.Present(texture.HostTexture, pt.Crop, swapBuffersCallback);
|
|
|
|
pt.ReleaseCallback(pt.UserObj);
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Indicate that a frame on the queue is ready to be acquired.
|
|
/// </summary>
|
|
public void SignalFrameReady()
|
|
{
|
|
Interlocked.Increment(ref _framesAvailable);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Determine if any frames are available, and decrement the available count if there are.
|
|
/// </summary>
|
|
/// <returns>True if a frame is available, false otherwise</returns>
|
|
public bool ConsumeFrameAvailable()
|
|
{
|
|
if (Interlocked.CompareExchange(ref _framesAvailable, 0, 0) != 0)
|
|
{
|
|
Interlocked.Decrement(ref _framesAvailable);
|
|
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
}
|
|
} |