Support non-contiguous copies on I2M and DMA engines (#2473)
* Support non-contiguous copies on I2M and DMA engines * Vector copy should start aligned on I2M * Nits * Zero extend the offset
This commit is contained in:
parent
ff8849671a
commit
ff5df5d8a1
2 changed files with 29 additions and 24 deletions
|
@ -152,14 +152,11 @@ namespace Ryujinx.Graphics.Gpu.Engine.Dma
|
||||||
dst.MemoryLayout.UnpackGobBlocksInZ(),
|
dst.MemoryLayout.UnpackGobBlocksInZ(),
|
||||||
dstBpp);
|
dstBpp);
|
||||||
|
|
||||||
ulong srcBaseAddress = memoryManager.Translate(srcGpuVa);
|
|
||||||
ulong dstBaseAddress = memoryManager.Translate(dstGpuVa);
|
|
||||||
|
|
||||||
(int srcBaseOffset, int srcSize) = srcCalculator.GetRectangleRange(src.RegionX, src.RegionY, xCount, yCount);
|
(int srcBaseOffset, int srcSize) = srcCalculator.GetRectangleRange(src.RegionX, src.RegionY, xCount, yCount);
|
||||||
(int dstBaseOffset, int dstSize) = dstCalculator.GetRectangleRange(dst.RegionX, dst.RegionY, xCount, yCount);
|
(int dstBaseOffset, int dstSize) = dstCalculator.GetRectangleRange(dst.RegionX, dst.RegionY, xCount, yCount);
|
||||||
|
|
||||||
ReadOnlySpan<byte> srcSpan = memoryManager.Physical.GetSpan(srcBaseAddress + (ulong)srcBaseOffset, srcSize, true);
|
ReadOnlySpan<byte> srcSpan = memoryManager.GetSpan(srcGpuVa + (uint)srcBaseOffset, srcSize, true);
|
||||||
Span<byte> dstSpan = memoryManager.Physical.GetSpan(dstBaseAddress + (ulong)dstBaseOffset, dstSize).ToArray();
|
Span<byte> dstSpan = memoryManager.GetSpan(dstGpuVa + (uint)dstBaseOffset, dstSize).ToArray();
|
||||||
|
|
||||||
bool completeSource = IsTextureCopyComplete(src, srcLinear, srcBpp, srcStride, xCount, yCount);
|
bool completeSource = IsTextureCopyComplete(src, srcLinear, srcBpp, srcStride, xCount, yCount);
|
||||||
bool completeDest = IsTextureCopyComplete(dst, dstLinear, dstBpp, dstStride, xCount, yCount);
|
bool completeDest = IsTextureCopyComplete(dst, dstLinear, dstBpp, dstStride, xCount, yCount);
|
||||||
|
@ -217,7 +214,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Dma
|
||||||
{
|
{
|
||||||
srcSpan.CopyTo(dstSpan); // No layout conversion has to be performed, just copy the data entirely.
|
srcSpan.CopyTo(dstSpan); // No layout conversion has to be performed, just copy the data entirely.
|
||||||
|
|
||||||
memoryManager.Physical.Write(dstBaseAddress + (ulong)dstBaseOffset, dstSpan);
|
memoryManager.Write(dstGpuVa + (uint)dstBaseOffset, dstSpan);
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -258,7 +255,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Dma
|
||||||
_ => throw new NotSupportedException($"Unable to copy ${srcBpp} bpp pixel format.")
|
_ => throw new NotSupportedException($"Unable to copy ${srcBpp} bpp pixel format.")
|
||||||
};
|
};
|
||||||
|
|
||||||
memoryManager.Physical.Write(dstBaseAddress + (ulong)dstBaseOffset, dstSpan);
|
memoryManager.Write(dstGpuVa + (uint)dstBaseOffset, dstSpan);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
|
@ -4,6 +4,7 @@ using Ryujinx.Graphics.Texture;
|
||||||
using System;
|
using System;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using System.Runtime.InteropServices;
|
using System.Runtime.InteropServices;
|
||||||
|
using System.Runtime.Intrinsics;
|
||||||
|
|
||||||
namespace Ryujinx.Graphics.Gpu.Engine.InlineToMemory
|
namespace Ryujinx.Graphics.Gpu.Engine.InlineToMemory
|
||||||
{
|
{
|
||||||
|
@ -169,13 +170,13 @@ namespace Ryujinx.Graphics.Gpu.Engine.InlineToMemory
|
||||||
/// </summary>
|
/// </summary>
|
||||||
private void FinishTransfer()
|
private void FinishTransfer()
|
||||||
{
|
{
|
||||||
Span<byte> data = MemoryMarshal.Cast<int, byte>(_buffer).Slice(0, _size);
|
var memoryManager = _channel.MemoryManager;
|
||||||
|
|
||||||
|
var data = MemoryMarshal.Cast<int, byte>(_buffer).Slice(0, _size);
|
||||||
|
|
||||||
if (_isLinear && _lineCount == 1)
|
if (_isLinear && _lineCount == 1)
|
||||||
{
|
{
|
||||||
ulong address = _channel.MemoryManager.Translate(_dstGpuVa);
|
memoryManager.Write(_dstGpuVa, data);
|
||||||
|
|
||||||
_channel.MemoryManager.Physical.Write(address, data);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -189,36 +190,43 @@ namespace Ryujinx.Graphics.Gpu.Engine.InlineToMemory
|
||||||
|
|
||||||
int srcOffset = 0;
|
int srcOffset = 0;
|
||||||
|
|
||||||
ulong dstBaseAddress = _channel.MemoryManager.Translate(_dstGpuVa);
|
|
||||||
|
|
||||||
for (int y = _dstY; y < _dstY + _lineCount; y++)
|
for (int y = _dstY; y < _dstY + _lineCount; y++)
|
||||||
{
|
{
|
||||||
int x1 = _dstX;
|
int x1 = _dstX;
|
||||||
int x2 = _dstX + _lineLengthIn;
|
int x2 = _dstX + _lineLengthIn;
|
||||||
int x2Trunc = _dstX + BitUtils.AlignDown(_lineLengthIn, 16);
|
int x1Round = BitUtils.AlignUp(_dstX, 16);
|
||||||
|
int x2Trunc = BitUtils.AlignDown(x2, 16);
|
||||||
|
|
||||||
int x;
|
int x = x1;
|
||||||
|
|
||||||
for (x = x1; x < x2Trunc; x += 16, srcOffset += 16)
|
if (x1Round <= x2)
|
||||||
|
{
|
||||||
|
for (; x < x1Round; x++, srcOffset++)
|
||||||
|
{
|
||||||
|
int dstOffset = dstCalculator.GetOffset(x, y);
|
||||||
|
|
||||||
|
ulong dstAddress = _dstGpuVa + (uint)dstOffset;
|
||||||
|
|
||||||
|
memoryManager.Write(dstAddress, data[srcOffset]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (; x < x2Trunc; x += 16, srcOffset += 16)
|
||||||
{
|
{
|
||||||
int dstOffset = dstCalculator.GetOffset(x, y);
|
int dstOffset = dstCalculator.GetOffset(x, y);
|
||||||
|
|
||||||
ulong dstAddress = dstBaseAddress + (ulong)dstOffset;
|
ulong dstAddress = _dstGpuVa + (uint)dstOffset;
|
||||||
|
|
||||||
Span<byte> pixel = data.Slice(srcOffset, 16);
|
memoryManager.Write(dstAddress, MemoryMarshal.Cast<byte, Vector128<byte>>(data.Slice(srcOffset, 16))[0]);
|
||||||
|
|
||||||
_channel.MemoryManager.Physical.Write(dstAddress, pixel);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (; x < x2; x++, srcOffset++)
|
for (; x < x2; x++, srcOffset++)
|
||||||
{
|
{
|
||||||
int dstOffset = dstCalculator.GetOffset(x, y);
|
int dstOffset = dstCalculator.GetOffset(x, y);
|
||||||
|
|
||||||
ulong dstAddress = dstBaseAddress + (ulong)dstOffset;
|
ulong dstAddress = _dstGpuVa + (uint)dstOffset;
|
||||||
|
|
||||||
Span<byte> pixel = data.Slice(srcOffset, 1);
|
memoryManager.Write(dstAddress, data[srcOffset]);
|
||||||
|
|
||||||
_channel.MemoryManager.Physical.Write(dstAddress, pixel);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Reference in a new issue