mirror of
https://github.com/ryujinx-mirror/ryujinx.git
synced 2025-01-11 07:21:58 +00:00
Shader: Bias textureGather instructions on AMD/Intel (#4703)
* Experimental (GLSL, forced) * SPIR-V attempt * Add capability * Fix pCount == 1 on glsl * Fix typo
This commit is contained in:
parent
e27f5522e2
commit
8d9d508dc7
9 changed files with 84 additions and 9 deletions
|
@ -48,6 +48,8 @@ namespace Ryujinx.Graphics.GAL
|
||||||
public readonly float MaximumSupportedAnisotropy;
|
public readonly float MaximumSupportedAnisotropy;
|
||||||
public readonly int StorageBufferOffsetAlignment;
|
public readonly int StorageBufferOffsetAlignment;
|
||||||
|
|
||||||
|
public readonly int GatherBiasPrecision;
|
||||||
|
|
||||||
public Capabilities(
|
public Capabilities(
|
||||||
TargetApi api,
|
TargetApi api,
|
||||||
string vendorName,
|
string vendorName,
|
||||||
|
@ -87,7 +89,8 @@ namespace Ryujinx.Graphics.GAL
|
||||||
uint maximumImagesPerStage,
|
uint maximumImagesPerStage,
|
||||||
int maximumComputeSharedMemorySize,
|
int maximumComputeSharedMemorySize,
|
||||||
float maximumSupportedAnisotropy,
|
float maximumSupportedAnisotropy,
|
||||||
int storageBufferOffsetAlignment)
|
int storageBufferOffsetAlignment,
|
||||||
|
int gatherBiasPrecision)
|
||||||
{
|
{
|
||||||
Api = api;
|
Api = api;
|
||||||
VendorName = vendorName;
|
VendorName = vendorName;
|
||||||
|
@ -128,6 +131,7 @@ namespace Ryujinx.Graphics.GAL
|
||||||
MaximumComputeSharedMemorySize = maximumComputeSharedMemorySize;
|
MaximumComputeSharedMemorySize = maximumComputeSharedMemorySize;
|
||||||
MaximumSupportedAnisotropy = maximumSupportedAnisotropy;
|
MaximumSupportedAnisotropy = maximumSupportedAnisotropy;
|
||||||
StorageBufferOffsetAlignment = storageBufferOffsetAlignment;
|
StorageBufferOffsetAlignment = storageBufferOffsetAlignment;
|
||||||
|
GatherBiasPrecision = gatherBiasPrecision;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
|
||||||
private const ushort FileFormatVersionMajor = 1;
|
private const ushort FileFormatVersionMajor = 1;
|
||||||
private const ushort FileFormatVersionMinor = 2;
|
private const ushort FileFormatVersionMinor = 2;
|
||||||
private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor;
|
private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor;
|
||||||
private const uint CodeGenVersion = 4404;
|
private const uint CodeGenVersion = 4703;
|
||||||
|
|
||||||
private const string SharedTocFileName = "shared.toc";
|
private const string SharedTocFileName = "shared.toc";
|
||||||
private const string SharedDataFileName = "shared.data";
|
private const string SharedDataFileName = "shared.data";
|
||||||
|
|
|
@ -112,6 +112,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int QueryHostGatherBiasPrecision() => _context.Capabilities.GatherBiasPrecision;
|
||||||
|
|
||||||
public bool QueryHostReducedPrecision() => _context.Capabilities.ReduceShaderPrecision;
|
public bool QueryHostReducedPrecision() => _context.Capabilities.ReduceShaderPrecision;
|
||||||
|
|
||||||
public bool QueryHostHasFrontFacingBug() => _context.Capabilities.HasFrontFacingBug;
|
public bool QueryHostHasFrontFacingBug() => _context.Capabilities.HasFrontFacingBug;
|
||||||
|
|
|
@ -103,11 +103,14 @@ namespace Ryujinx.Graphics.OpenGL
|
||||||
|
|
||||||
public Capabilities GetCapabilities()
|
public Capabilities GetCapabilities()
|
||||||
{
|
{
|
||||||
|
bool intelWindows = HwCapabilities.Vendor == HwCapabilities.GpuVendor.IntelWindows;
|
||||||
|
bool amdWindows = HwCapabilities.Vendor == HwCapabilities.GpuVendor.AmdWindows;
|
||||||
|
|
||||||
return new Capabilities(
|
return new Capabilities(
|
||||||
api: TargetApi.OpenGL,
|
api: TargetApi.OpenGL,
|
||||||
vendorName: GpuVendor,
|
vendorName: GpuVendor,
|
||||||
hasFrontFacingBug: HwCapabilities.Vendor == HwCapabilities.GpuVendor.IntelWindows,
|
hasFrontFacingBug: intelWindows,
|
||||||
hasVectorIndexingBug: HwCapabilities.Vendor == HwCapabilities.GpuVendor.AmdWindows,
|
hasVectorIndexingBug: amdWindows,
|
||||||
needsFragmentOutputSpecialization: false,
|
needsFragmentOutputSpecialization: false,
|
||||||
reduceShaderPrecision: false,
|
reduceShaderPrecision: false,
|
||||||
supportsAstcCompression: HwCapabilities.SupportsAstcCompression,
|
supportsAstcCompression: HwCapabilities.SupportsAstcCompression,
|
||||||
|
@ -142,7 +145,8 @@ namespace Ryujinx.Graphics.OpenGL
|
||||||
maximumImagesPerStage: 8,
|
maximumImagesPerStage: 8,
|
||||||
maximumComputeSharedMemorySize: HwCapabilities.MaximumComputeSharedMemorySize,
|
maximumComputeSharedMemorySize: HwCapabilities.MaximumComputeSharedMemorySize,
|
||||||
maximumSupportedAnisotropy: HwCapabilities.MaximumSupportedAnisotropy,
|
maximumSupportedAnisotropy: HwCapabilities.MaximumSupportedAnisotropy,
|
||||||
storageBufferOffsetAlignment: HwCapabilities.StorageBufferOffsetAlignment);
|
storageBufferOffsetAlignment: HwCapabilities.StorageBufferOffsetAlignment,
|
||||||
|
gatherBiasPrecision: intelWindows || amdWindows ? 8 : 0); // Precision is 8 for these vendors on Vulkan.
|
||||||
}
|
}
|
||||||
|
|
||||||
public void SetBufferData(BufferHandle buffer, int offset, ReadOnlySpan<byte> data)
|
public void SetBufferData(BufferHandle buffer, int offset, ReadOnlySpan<byte> data)
|
||||||
|
|
|
@ -677,7 +677,28 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
|
||||||
return vector;
|
return vector;
|
||||||
}
|
}
|
||||||
|
|
||||||
Append(ApplyScaling(AssemblePVector(pCount)));
|
string ApplyBias(string vector)
|
||||||
|
{
|
||||||
|
int gatherBiasPrecision = context.Config.GpuAccessor.QueryHostGatherBiasPrecision();
|
||||||
|
if (isGather && gatherBiasPrecision != 0)
|
||||||
|
{
|
||||||
|
// GPU requires texture gather to be slightly offset to match NVIDIA behaviour when point is exactly between two texels.
|
||||||
|
// Offset by the gather precision divided by 2 to correct for rounding.
|
||||||
|
|
||||||
|
if (pCount == 1)
|
||||||
|
{
|
||||||
|
vector = $"{vector} + (1.0 / (float(textureSize({samplerName}, 0)) * float({1 << (gatherBiasPrecision + 1)})))";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
vector = $"{vector} + (1.0 / (vec{pCount}(textureSize({samplerName}, 0).{"xyz".Substring(0, pCount)}) * float({1 << (gatherBiasPrecision + 1)})))";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return vector;
|
||||||
|
}
|
||||||
|
|
||||||
|
Append(ApplyBias(ApplyScaling(AssemblePVector(pCount))));
|
||||||
|
|
||||||
string AssembleDerivativesVector(int count)
|
string AssembleDerivativesVector(int count)
|
||||||
{
|
{
|
||||||
|
|
|
@ -4,6 +4,7 @@ using Ryujinx.Graphics.Shader.Translation;
|
||||||
using System;
|
using System;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using System.Diagnostics;
|
using System.Diagnostics;
|
||||||
|
using System.Linq;
|
||||||
using System.Numerics;
|
using System.Numerics;
|
||||||
using static Spv.Specification;
|
using static Spv.Specification;
|
||||||
|
|
||||||
|
@ -1556,6 +1557,33 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SpvInstruction ApplyBias(SpvInstruction vector, SpvInstruction image)
|
||||||
|
{
|
||||||
|
int gatherBiasPrecision = context.Config.GpuAccessor.QueryHostGatherBiasPrecision();
|
||||||
|
if (isGather && gatherBiasPrecision != 0)
|
||||||
|
{
|
||||||
|
// GPU requires texture gather to be slightly offset to match NVIDIA behaviour when point is exactly between two texels.
|
||||||
|
// Offset by the gather precision divided by 2 to correct for rounding.
|
||||||
|
var sizeType = pCount == 1 ? context.TypeS32() : context.TypeVector(context.TypeS32(), pCount);
|
||||||
|
var pVectorType = pCount == 1 ? context.TypeFP32() : context.TypeVector(context.TypeFP32(), pCount);
|
||||||
|
|
||||||
|
var bias = context.Constant(context.TypeFP32(), (float)(1 << (gatherBiasPrecision + 1)));
|
||||||
|
var biasVector = context.CompositeConstruct(pVectorType, Enumerable.Repeat(bias, pCount).ToArray());
|
||||||
|
|
||||||
|
var one = context.Constant(context.TypeFP32(), 1f);
|
||||||
|
var oneVector = context.CompositeConstruct(pVectorType, Enumerable.Repeat(one, pCount).ToArray());
|
||||||
|
|
||||||
|
var divisor = context.FMul(
|
||||||
|
pVectorType,
|
||||||
|
context.ConvertSToF(pVectorType, context.ImageQuerySize(sizeType, image)),
|
||||||
|
biasVector);
|
||||||
|
|
||||||
|
vector = context.FAdd(pVectorType, vector, context.FDiv(pVectorType, oneVector, divisor));
|
||||||
|
}
|
||||||
|
|
||||||
|
return vector;
|
||||||
|
}
|
||||||
|
|
||||||
SpvInstruction pCoords = AssemblePVector(pCount);
|
SpvInstruction pCoords = AssemblePVector(pCount);
|
||||||
pCoords = ScalingHelpers.ApplyScaling(context, texOp, pCoords, intCoords, isBindless, isIndexed, isArray, pCount);
|
pCoords = ScalingHelpers.ApplyScaling(context, texOp, pCoords, intCoords, isBindless, isIndexed, isArray, pCount);
|
||||||
|
|
||||||
|
@ -1716,6 +1744,8 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
|
||||||
image = context.Image(imageType, image);
|
image = context.Image(imageType, image);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pCoords = ApplyBias(pCoords, image);
|
||||||
|
|
||||||
var operands = operandsList.ToArray();
|
var operands = operandsList.ToArray();
|
||||||
|
|
||||||
SpvInstruction result;
|
SpvInstruction result;
|
||||||
|
|
|
@ -196,6 +196,15 @@ namespace Ryujinx.Graphics.Shader
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Queries host's gather operation precision bits for biasing their coordinates. Zero means no bias.
|
||||||
|
/// </summary>
|
||||||
|
/// <returns>Bits of gather operation precision to use for coordinate bias</returns>
|
||||||
|
int QueryHostGatherBiasPrecision()
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Queries host about whether to reduce precision to improve performance.
|
/// Queries host about whether to reduce precision to improve performance.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
|
|
@ -46,6 +46,7 @@ namespace Ryujinx.Graphics.Vulkan
|
||||||
public readonly SampleCountFlags SupportedSampleCounts;
|
public readonly SampleCountFlags SupportedSampleCounts;
|
||||||
public readonly PortabilitySubsetFlags PortabilitySubset;
|
public readonly PortabilitySubsetFlags PortabilitySubset;
|
||||||
public readonly uint VertexBufferAlignment;
|
public readonly uint VertexBufferAlignment;
|
||||||
|
public readonly uint SubTexelPrecisionBits;
|
||||||
|
|
||||||
public HardwareCapabilities(
|
public HardwareCapabilities(
|
||||||
bool supportsIndexTypeUint8,
|
bool supportsIndexTypeUint8,
|
||||||
|
@ -77,7 +78,8 @@ namespace Ryujinx.Graphics.Vulkan
|
||||||
ShaderStageFlags requiredSubgroupSizeStages,
|
ShaderStageFlags requiredSubgroupSizeStages,
|
||||||
SampleCountFlags supportedSampleCounts,
|
SampleCountFlags supportedSampleCounts,
|
||||||
PortabilitySubsetFlags portabilitySubset,
|
PortabilitySubsetFlags portabilitySubset,
|
||||||
uint vertexBufferAlignment)
|
uint vertexBufferAlignment,
|
||||||
|
uint subTexelPrecisionBits)
|
||||||
{
|
{
|
||||||
SupportsIndexTypeUint8 = supportsIndexTypeUint8;
|
SupportsIndexTypeUint8 = supportsIndexTypeUint8;
|
||||||
SupportsCustomBorderColor = supportsCustomBorderColor;
|
SupportsCustomBorderColor = supportsCustomBorderColor;
|
||||||
|
@ -109,6 +111,7 @@ namespace Ryujinx.Graphics.Vulkan
|
||||||
SupportedSampleCounts = supportedSampleCounts;
|
SupportedSampleCounts = supportedSampleCounts;
|
||||||
PortabilitySubset = portabilitySubset;
|
PortabilitySubset = portabilitySubset;
|
||||||
VertexBufferAlignment = vertexBufferAlignment;
|
VertexBufferAlignment = vertexBufferAlignment;
|
||||||
|
SubTexelPrecisionBits = subTexelPrecisionBits;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -311,7 +311,8 @@ namespace Ryujinx.Graphics.Vulkan
|
||||||
propertiesSubgroupSizeControl.RequiredSubgroupSizeStages,
|
propertiesSubgroupSizeControl.RequiredSubgroupSizeStages,
|
||||||
supportedSampleCounts,
|
supportedSampleCounts,
|
||||||
portabilityFlags,
|
portabilityFlags,
|
||||||
vertexBufferAlignment);
|
vertexBufferAlignment,
|
||||||
|
properties.Limits.SubTexelPrecisionBits);
|
||||||
|
|
||||||
IsSharedMemory = MemoryAllocator.IsDeviceMemoryShared(_physicalDevice);
|
IsSharedMemory = MemoryAllocator.IsDeviceMemoryShared(_physicalDevice);
|
||||||
|
|
||||||
|
@ -576,7 +577,8 @@ namespace Ryujinx.Graphics.Vulkan
|
||||||
maximumImagesPerStage: Constants.MaxImagesPerStage,
|
maximumImagesPerStage: Constants.MaxImagesPerStage,
|
||||||
maximumComputeSharedMemorySize: (int)limits.MaxComputeSharedMemorySize,
|
maximumComputeSharedMemorySize: (int)limits.MaxComputeSharedMemorySize,
|
||||||
maximumSupportedAnisotropy: (int)limits.MaxSamplerAnisotropy,
|
maximumSupportedAnisotropy: (int)limits.MaxSamplerAnisotropy,
|
||||||
storageBufferOffsetAlignment: (int)limits.MinStorageBufferOffsetAlignment);
|
storageBufferOffsetAlignment: (int)limits.MinStorageBufferOffsetAlignment,
|
||||||
|
gatherBiasPrecision: IsIntelWindows || IsAmdWindows ? (int)Capabilities.SubTexelPrecisionBits : 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
public HardwareInfo GetHardwareInfo()
|
public HardwareInfo GetHardwareInfo()
|
||||||
|
|
Loading…
Reference in a new issue