1
0
Fork 0
mirror of https://github.com/Ryujinx/Ryujinx.git synced 2024-10-01 12:30:00 +02:00
Ryujinx/Ryujinx.Graphics.Shader/Translation/Rewriter.cs
gdkchan f1d1670b0b
Implement HLE macro for DrawElementsIndirect (#3748)
* Implement HLE macro for DrawElementsIndirect

* Shader cache version bump

* Use GL_ARB_shader_draw_parameters extension on OpenGL

* Fix DrawIndexedIndirectCount on Vulkan when extension is not supported

* Implement DrawIndex

* Alignment

* Fix some validation errors

* Rename BaseIds to DrawParameters

* Fix incorrect index buffer and vertex buffer size in some cases

* Add HLE macros for DrawArraysInstanced and DrawElementsInstanced

* Perform a regular draw when indirect data is not modified

* Use non-indirect draw methods if indirect buffer was not GPU modified

* Only check if draw parameters match if the shader actually uses them

* Expose Macro HLE setting on GUI

* Reset FirstVertex and FirstInstance after draw

* Update shader cache version again since some people already tested this

* PR feedback

Co-authored-by: riperiperi <rhy3756547@hotmail.com>
2022-11-16 14:53:04 -03:00

602 lines
No EOL
22 KiB
C#

using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
using static Ryujinx.Graphics.Shader.Translation.GlobalMemory;
namespace Ryujinx.Graphics.Shader.Translation
{
static class Rewriter
{
public static void RunPass(BasicBlock[] blocks, ShaderConfig config)
{
bool isVertexShader = config.Stage == ShaderStage.Vertex;
bool hasConstantBufferDrawParameters = config.GpuAccessor.QueryHasConstantBufferDrawParameters();
for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
{
BasicBlock block = blocks[blkIndex];
for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
{
if (node.Value is not Operation operation)
{
continue;
}
if (isVertexShader)
{
if (hasConstantBufferDrawParameters)
{
if (ReplaceConstantBufferWithDrawParameters(operation))
{
config.SetUsedFeature(FeatureFlags.DrawParameters);
}
}
else if (HasConstantBufferDrawParameters(operation))
{
config.SetUsedFeature(FeatureFlags.DrawParameters);
}
}
if (UsesGlobalMemory(operation.Inst))
{
node = RewriteGlobalAccess(node, config);
}
if (operation is TextureOperation texOp)
{
if (texOp.Inst == Instruction.TextureSample)
{
node = RewriteTextureSample(node, config);
if (texOp.Type == SamplerType.TextureBuffer)
{
node = InsertSnormNormalization(node, config);
}
}
}
}
}
}
private static LinkedListNode<INode> RewriteGlobalAccess(LinkedListNode<INode> node, ShaderConfig config)
{
Operation operation = (Operation)node.Value;
bool isAtomic = operation.Inst.IsAtomic();
bool isStg16Or8 = operation.Inst == Instruction.StoreGlobal16 || operation.Inst == Instruction.StoreGlobal8;
bool isWrite = isAtomic || operation.Inst == Instruction.StoreGlobal || isStg16Or8;
Operation storageOp;
Operand PrependOperation(Instruction inst, params Operand[] sources)
{
Operand local = Local();
node.List.AddBefore(node, new Operation(inst, local, sources));
return local;
}
Operand addrLow = operation.GetSource(0);
Operand addrHigh = operation.GetSource(1);
Operand sbBaseAddrLow = Const(0);
Operand sbSlot = Const(0);
for (int slot = 0; slot < StorageMaxCount; slot++)
{
config.SetUsedStorageBuffer(slot, isWrite);
int cbOffset = GetStorageCbOffset(config.Stage, slot);
Operand baseAddrLow = Cbuf(0, cbOffset);
Operand baseAddrHigh = Cbuf(0, cbOffset + 1);
Operand size = Cbuf(0, cbOffset + 2);
Operand offset = PrependOperation(Instruction.Subtract, addrLow, baseAddrLow);
Operand borrow = PrependOperation(Instruction.CompareLessU32, addrLow, baseAddrLow);
Operand inRangeLow = PrependOperation(Instruction.CompareLessU32, offset, size);
Operand addrHighBorrowed = PrependOperation(Instruction.Add, addrHigh, borrow);
Operand inRangeHigh = PrependOperation(Instruction.CompareEqual, addrHighBorrowed, baseAddrHigh);
Operand inRange = PrependOperation(Instruction.BitwiseAnd, inRangeLow, inRangeHigh);
sbBaseAddrLow = PrependOperation(Instruction.ConditionalSelect, inRange, baseAddrLow, sbBaseAddrLow);
sbSlot = PrependOperation(Instruction.ConditionalSelect, inRange, Const(slot), sbSlot);
}
Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment());
Operand baseAddrTrunc = PrependOperation(Instruction.BitwiseAnd, sbBaseAddrLow, alignMask);
Operand byteOffset = PrependOperation(Instruction.Subtract, addrLow, baseAddrTrunc);
Operand[] sources = new Operand[operation.SourcesCount];
sources[0] = sbSlot;
if (isStg16Or8)
{
sources[1] = byteOffset;
}
else
{
sources[1] = PrependOperation(Instruction.ShiftRightU32, byteOffset, Const(2));
}
for (int index = 2; index < operation.SourcesCount; index++)
{
sources[index] = operation.GetSource(index);
}
if (isAtomic)
{
Instruction inst = (operation.Inst & ~Instruction.MrMask) | Instruction.MrStorage;
storageOp = new Operation(inst, operation.Dest, sources);
}
else if (operation.Inst == Instruction.LoadGlobal)
{
storageOp = new Operation(Instruction.LoadStorage, operation.Dest, sources);
}
else
{
Instruction storeInst = operation.Inst switch
{
Instruction.StoreGlobal16 => Instruction.StoreStorage16,
Instruction.StoreGlobal8 => Instruction.StoreStorage8,
_ => Instruction.StoreStorage
};
storageOp = new Operation(storeInst, null, sources);
}
for (int index = 0; index < operation.SourcesCount; index++)
{
operation.SetSource(index, null);
}
LinkedListNode<INode> oldNode = node;
node = node.List.AddBefore(node, storageOp);
node.List.Remove(oldNode);
return node;
}
private static LinkedListNode<INode> RewriteTextureSample(LinkedListNode<INode> node, ShaderConfig config)
{
TextureOperation texOp = (TextureOperation)node.Value;
bool hasOffset = (texOp.Flags & TextureFlags.Offset) != 0;
bool hasOffsets = (texOp.Flags & TextureFlags.Offsets) != 0;
bool hasInvalidOffset = (hasOffset || hasOffsets) && !config.GpuAccessor.QueryHostSupportsNonConstantTextureOffset();
bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
bool isCoordNormalized = isBindless || config.GpuAccessor.QueryTextureCoordNormalized(texOp.Handle, texOp.CbufSlot);
if (!hasInvalidOffset && isCoordNormalized)
{
return node;
}
bool isGather = (texOp.Flags & TextureFlags.Gather) != 0;
bool hasDerivatives = (texOp.Flags & TextureFlags.Derivatives) != 0;
bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0;
bool hasLodBias = (texOp.Flags & TextureFlags.LodBias) != 0;
bool hasLodLevel = (texOp.Flags & TextureFlags.LodLevel) != 0;
bool isArray = (texOp.Type & SamplerType.Array) != 0;
bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
bool isMultisample = (texOp.Type & SamplerType.Multisample) != 0;
bool isShadow = (texOp.Type & SamplerType.Shadow) != 0;
int coordsCount = texOp.Type.GetDimensions();
int offsetsCount;
if (hasOffsets)
{
offsetsCount = coordsCount * 4;
}
else if (hasOffset)
{
offsetsCount = coordsCount;
}
else
{
offsetsCount = 0;
}
Operand[] offsets = new Operand[offsetsCount];
Operand[] sources = new Operand[texOp.SourcesCount - offsetsCount];
int copyCount = 0;
if (isBindless || isIndexed)
{
copyCount++;
}
Operand[] lodSources = new Operand[copyCount + coordsCount];
for (int index = 0; index < lodSources.Length; index++)
{
lodSources[index] = texOp.GetSource(index);
}
copyCount += coordsCount;
if (isArray)
{
copyCount++;
}
if (isShadow)
{
copyCount++;
}
if (hasDerivatives)
{
copyCount += coordsCount * 2;
}
if (isMultisample)
{
copyCount++;
}
else if (hasLodLevel)
{
copyCount++;
}
int srcIndex = 0;
int dstIndex = 0;
for (int index = 0; index < copyCount; index++)
{
sources[dstIndex++] = texOp.GetSource(srcIndex++);
}
bool areAllOffsetsConstant = true;
for (int index = 0; index < offsetsCount; index++)
{
Operand offset = texOp.GetSource(srcIndex++);
areAllOffsetsConstant &= offset.Type == OperandType.Constant;
offsets[index] = offset;
}
hasInvalidOffset &= !areAllOffsetsConstant;
if (!hasInvalidOffset && isCoordNormalized)
{
return node;
}
if (hasLodBias)
{
sources[dstIndex++] = texOp.GetSource(srcIndex++);
}
if (isGather && !isShadow)
{
sources[dstIndex++] = texOp.GetSource(srcIndex++);
}
int coordsIndex = isBindless || isIndexed ? 1 : 0;
int componentIndex = texOp.Index;
Operand Int(Operand value)
{
Operand res = Local();
node.List.AddBefore(node, new Operation(Instruction.ConvertFP32ToS32, res, value));
return res;
}
Operand Float(Operand value)
{
Operand res = Local();
node.List.AddBefore(node, new Operation(Instruction.ConvertS32ToFP32, res, value));
return res;
}
// Emulate non-normalized coordinates by normalizing the coordinates on the shader.
// Without normalization, the coordinates are expected to the in the [0, W or H] range,
// and otherwise, it is expected to be in the [0, 1] range.
// We normalize by dividing the coords by the texture size.
if (!isCoordNormalized && !intCoords)
{
config.SetUsedFeature(FeatureFlags.IntegerSampling);
int normCoordsCount = (texOp.Type & SamplerType.Mask) == SamplerType.TextureCube ? 2 : coordsCount;
for (int index = 0; index < normCoordsCount; index++)
{
Operand coordSize = Local();
Operand[] texSizeSources;
if (isBindless || isIndexed)
{
texSizeSources = new Operand[] { sources[0], Const(0) };
}
else
{
texSizeSources = new Operand[] { Const(0) };
}
node.List.AddBefore(node, new TextureOperation(
Instruction.TextureSize,
texOp.Type,
texOp.Format,
texOp.Flags,
texOp.CbufSlot,
texOp.Handle,
index,
coordSize,
texSizeSources));
config.SetUsedTexture(Instruction.TextureSize, texOp.Type, texOp.Format, texOp.Flags, texOp.CbufSlot, texOp.Handle);
Operand source = sources[coordsIndex + index];
Operand coordNormalized = Local();
node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Divide, coordNormalized, source, Float(coordSize)));
sources[coordsIndex + index] = coordNormalized;
}
}
// Technically, non-constant texture offsets are not allowed (according to the spec),
// however some GPUs does support that.
// For GPUs where it is not supported, we can replace the instruction with the following:
// For texture*Offset, we replace it by texture*, and add the offset to the P coords.
// The offset can be calculated as offset / textureSize(lod), where lod = textureQueryLod(coords).
// For texelFetchOffset, we replace it by texelFetch and add the offset to the P coords directly.
// For textureGatherOffset, we take advantage of the fact that the operation is already broken down
// to read the 4 pixels separately, and just replace it with 4 textureGather with a different offset
// for each pixel.
if (hasInvalidOffset)
{
if (intCoords)
{
for (int index = 0; index < coordsCount; index++)
{
Operand source = sources[coordsIndex + index];
Operand coordPlusOffset = Local();
node.List.AddBefore(node, new Operation(Instruction.Add, coordPlusOffset, source, offsets[index]));
sources[coordsIndex + index] = coordPlusOffset;
}
}
else
{
config.SetUsedFeature(FeatureFlags.IntegerSampling);
Operand lod = Local();
node.List.AddBefore(node, new TextureOperation(
Instruction.Lod,
texOp.Type,
texOp.Format,
texOp.Flags,
texOp.CbufSlot,
texOp.Handle,
0,
lod,
lodSources));
for (int index = 0; index < coordsCount; index++)
{
Operand coordSize = Local();
Operand[] texSizeSources;
if (isBindless || isIndexed)
{
texSizeSources = new Operand[] { sources[0], Int(lod) };
}
else
{
texSizeSources = new Operand[] { Int(lod) };
}
node.List.AddBefore(node, new TextureOperation(
Instruction.TextureSize,
texOp.Type,
texOp.Format,
texOp.Flags,
texOp.CbufSlot,
texOp.Handle,
index,
coordSize,
texSizeSources));
config.SetUsedTexture(Instruction.TextureSize, texOp.Type, texOp.Format, texOp.Flags, texOp.CbufSlot, texOp.Handle);
Operand offset = Local();
Operand intOffset = offsets[index + (hasOffsets ? texOp.Index * coordsCount : 0)];
node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Divide, offset, Float(intOffset), Float(coordSize)));
Operand source = sources[coordsIndex + index];
Operand coordPlusOffset = Local();
node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Add, coordPlusOffset, source, offset));
sources[coordsIndex + index] = coordPlusOffset;
}
}
if (isGather && !isShadow)
{
Operand gatherComponent = sources[dstIndex - 1];
Debug.Assert(gatherComponent.Type == OperandType.Constant);
componentIndex = gatherComponent.Value;
}
}
TextureOperation newTexOp = new TextureOperation(
Instruction.TextureSample,
texOp.Type,
texOp.Format,
texOp.Flags & ~(TextureFlags.Offset | TextureFlags.Offsets),
texOp.CbufSlot,
texOp.Handle,
componentIndex,
texOp.Dest,
sources);
for (int index = 0; index < texOp.SourcesCount; index++)
{
texOp.SetSource(index, null);
}
LinkedListNode<INode> oldNode = node;
node = node.List.AddBefore(node, newTexOp);
node.List.Remove(oldNode);
return node;
}
private static LinkedListNode<INode> InsertSnormNormalization(LinkedListNode<INode> node, ShaderConfig config)
{
TextureOperation texOp = (TextureOperation)node.Value;
// We can't query the format of a bindless texture,
// because the handle is unknown, it can have any format.
if (texOp.Flags.HasFlag(TextureFlags.Bindless))
{
return node;
}
TextureFormat format = config.GpuAccessor.QueryTextureFormat(texOp.Handle, texOp.CbufSlot);
int maxPositive = format switch
{
TextureFormat.R8Snorm => sbyte.MaxValue,
TextureFormat.R8G8Snorm => sbyte.MaxValue,
TextureFormat.R8G8B8A8Snorm => sbyte.MaxValue,
TextureFormat.R16Snorm => short.MaxValue,
TextureFormat.R16G16Snorm => short.MaxValue,
TextureFormat.R16G16B16A16Snorm => short.MaxValue,
_ => 0
};
// The value being 0 means that the format is not a SNORM format,
// so there's nothing to do here.
if (maxPositive == 0)
{
return node;
}
// Do normalization. We assume SINT formats are being used
// as replacement for SNORM (which is not supported).
INode[] uses = texOp.Dest.UseOps.ToArray();
Operation convOp = new Operation(Instruction.ConvertS32ToFP32, Local(), texOp.Dest);
Operation normOp = new Operation(Instruction.FP32 | Instruction.Multiply, Local(), convOp.Dest, ConstF(1f / maxPositive));
node = node.List.AddAfter(node, convOp);
node = node.List.AddAfter(node, normOp);
foreach (INode useOp in uses)
{
if (useOp is not Operation op)
{
continue;
}
// Replace all uses of the texture pixel value with the normalized value.
for (int index = 0; index < op.SourcesCount; index++)
{
if (op.GetSource(index) == texOp.Dest)
{
op.SetSource(index, normOp.Dest);
}
}
}
return node;
}
private static bool ReplaceConstantBufferWithDrawParameters(Operation operation)
{
bool modified = false;
for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
{
Operand src = operation.GetSource(srcIndex);
if (src.Type == OperandType.ConstantBuffer && src.GetCbufSlot() == 0)
{
switch (src.GetCbufOffset())
{
case Constants.NvnBaseVertexByteOffset / 4:
operation.SetSource(srcIndex, Attribute(AttributeConsts.BaseVertex));
modified = true;
break;
case Constants.NvnBaseInstanceByteOffset / 4:
operation.SetSource(srcIndex, Attribute(AttributeConsts.BaseInstance));
modified = true;
break;
case Constants.NvnDrawIndexByteOffset / 4:
operation.SetSource(srcIndex, Attribute(AttributeConsts.DrawIndex));
modified = true;
break;
}
}
}
return modified;
}
private static bool HasConstantBufferDrawParameters(Operation operation)
{
for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
{
Operand src = operation.GetSource(srcIndex);
if (src.Type == OperandType.ConstantBuffer && src.GetCbufSlot() == 0)
{
switch (src.GetCbufOffset())
{
case Constants.NvnBaseVertexByteOffset / 4:
case Constants.NvnBaseInstanceByteOffset / 4:
case Constants.NvnDrawIndexByteOffset / 4:
return true;
}
}
}
return false;
}
}
}