1
0
Fork 0
mirror of https://github.com/Ryujinx/Ryujinx.git synced 2024-10-01 12:30:00 +02:00

Extend bindless elimination to work with masked and shifted handles (#2727)

* Extent bindless elimination to work with masked handles

* Extend bindless elimination to catch shifted pattern, refactor handle packing/unpacking
This commit is contained in:
gdkchan 2021-10-17 17:28:18 -03:00 committed by GitHub
parent d05573bfd1
commit 25fd4ef10e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 159 additions and 40 deletions

View file

@ -14,12 +14,6 @@ namespace Ryujinx.Graphics.Gpu.Image
private const int InitialTextureStateSize = 32; private const int InitialTextureStateSize = 32;
private const int InitialImageStateSize = 8; private const int InitialImageStateSize = 8;
private const int HandleHigh = 16;
private const int HandleMask = (1 << HandleHigh) - 1;
private const int SlotHigh = 16;
private const int SlotMask = (1 << SlotHigh) - 1;
private readonly GpuContext _context; private readonly GpuContext _context;
private readonly bool _isCompute; private readonly bool _isCompute;
@ -348,19 +342,7 @@ namespace Ryujinx.Graphics.Gpu.Image
{ {
TextureBindingInfo bindingInfo = _textureBindings[stageIndex][index]; TextureBindingInfo bindingInfo = _textureBindings[stageIndex][index];
int textureBufferIndex; (int textureBufferIndex, int samplerBufferIndex) = TextureHandle.UnpackSlots(bindingInfo.CbufSlot, _textureBufferIndex);
int samplerBufferIndex;
if (bindingInfo.CbufSlot < 0)
{
textureBufferIndex = _textureBufferIndex;
samplerBufferIndex = textureBufferIndex;
}
else
{
textureBufferIndex = bindingInfo.CbufSlot & SlotMask;
samplerBufferIndex = ((bindingInfo.CbufSlot >> SlotHigh) != 0) ? (bindingInfo.CbufSlot >> SlotHigh) - 1 : textureBufferIndex;
}
int packedId = ReadPackedId(stageIndex, bindingInfo.Handle, textureBufferIndex, samplerBufferIndex); int packedId = ReadPackedId(stageIndex, bindingInfo.Handle, textureBufferIndex, samplerBufferIndex);
int textureId = UnpackTextureId(packedId); int textureId = UnpackTextureId(packedId);
@ -440,19 +422,7 @@ namespace Ryujinx.Graphics.Gpu.Image
{ {
TextureBindingInfo bindingInfo = _imageBindings[stageIndex][index]; TextureBindingInfo bindingInfo = _imageBindings[stageIndex][index];
int textureBufferIndex; (int textureBufferIndex, int samplerBufferIndex) = TextureHandle.UnpackSlots(bindingInfo.CbufSlot, _textureBufferIndex);
int samplerBufferIndex;
if (bindingInfo.CbufSlot < 0)
{
textureBufferIndex = _textureBufferIndex;
samplerBufferIndex = textureBufferIndex;
}
else
{
textureBufferIndex = bindingInfo.CbufSlot & SlotMask;
samplerBufferIndex = ((bindingInfo.CbufSlot >> SlotHigh) != 0) ? (bindingInfo.CbufSlot >> SlotHigh) - 1 : textureBufferIndex;
}
int packedId = ReadPackedId(stageIndex, bindingInfo.Handle, textureBufferIndex, samplerBufferIndex); int packedId = ReadPackedId(stageIndex, bindingInfo.Handle, textureBufferIndex, samplerBufferIndex);
int textureId = UnpackTextureId(packedId); int textureId = UnpackTextureId(packedId);
@ -522,8 +492,9 @@ namespace Ryujinx.Graphics.Gpu.Image
int handle, int handle,
int cbufSlot) int cbufSlot)
{ {
int textureBufferIndex = cbufSlot < 0 ? bufferIndex : cbufSlot & SlotMask; (int textureBufferIndex, int samplerBufferIndex) = TextureHandle.UnpackSlots(cbufSlot, bufferIndex);
int packedId = ReadPackedId(stageIndex, handle, textureBufferIndex, textureBufferIndex);
int packedId = ReadPackedId(stageIndex, handle, textureBufferIndex, samplerBufferIndex);
int textureId = UnpackTextureId(packedId); int textureId = UnpackTextureId(packedId);
ulong poolAddress = _channel.MemoryManager.Translate(poolGpuVa); ulong poolAddress = _channel.MemoryManager.Translate(poolGpuVa);
@ -544,11 +515,13 @@ namespace Ryujinx.Graphics.Gpu.Image
/// <returns>The packed texture and sampler ID (the real texture handle)</returns> /// <returns>The packed texture and sampler ID (the real texture handle)</returns>
private int ReadPackedId(int stageIndex, int wordOffset, int textureBufferIndex, int samplerBufferIndex) private int ReadPackedId(int stageIndex, int wordOffset, int textureBufferIndex, int samplerBufferIndex)
{ {
(int textureWordOffset, int samplerWordOffset, TextureHandleType handleType) = TextureHandle.UnpackOffsets(wordOffset);
ulong textureBufferAddress = _isCompute ulong textureBufferAddress = _isCompute
? _channel.BufferManager.GetComputeUniformBufferAddress(textureBufferIndex) ? _channel.BufferManager.GetComputeUniformBufferAddress(textureBufferIndex)
: _channel.BufferManager.GetGraphicsUniformBufferAddress(stageIndex, textureBufferIndex); : _channel.BufferManager.GetGraphicsUniformBufferAddress(stageIndex, textureBufferIndex);
int handle = _channel.MemoryManager.Physical.Read<int>(textureBufferAddress + (ulong)(wordOffset & HandleMask) * 4); int handle = _channel.MemoryManager.Physical.Read<int>(textureBufferAddress + (uint)textureWordOffset * 4);
// The "wordOffset" (which is really the immediate value used on texture instructions on the shader) // The "wordOffset" (which is really the immediate value used on texture instructions on the shader)
// is a 13-bit value. However, in order to also support separate samplers and textures (which uses // is a 13-bit value. However, in order to also support separate samplers and textures (which uses
@ -556,13 +529,20 @@ namespace Ryujinx.Graphics.Gpu.Image
// another offset for the sampler. // another offset for the sampler.
// The shader translator has code to detect separate texture and sampler uses with a bindless texture, // The shader translator has code to detect separate texture and sampler uses with a bindless texture,
// turn that into a regular texture access and produce those special handles with values on the higher 16 bits. // turn that into a regular texture access and produce those special handles with values on the higher 16 bits.
if (wordOffset >> HandleHigh != 0) if (handleType != TextureHandleType.CombinedSampler)
{ {
ulong samplerBufferAddress = _isCompute ulong samplerBufferAddress = _isCompute
? _channel.BufferManager.GetComputeUniformBufferAddress(samplerBufferIndex) ? _channel.BufferManager.GetComputeUniformBufferAddress(samplerBufferIndex)
: _channel.BufferManager.GetGraphicsUniformBufferAddress(stageIndex, samplerBufferIndex); : _channel.BufferManager.GetGraphicsUniformBufferAddress(stageIndex, samplerBufferIndex);
handle |= _channel.MemoryManager.Physical.Read<int>(samplerBufferAddress + (ulong)((wordOffset >> HandleHigh) - 1) * 4); int samplerHandle = _channel.MemoryManager.Physical.Read<int>(samplerBufferAddress + (uint)samplerWordOffset * 4);
if (handleType == TextureHandleType.SeparateSamplerId)
{
samplerHandle <<= 20;
}
handle |= samplerHandle;
} }
return handle; return handle;

View file

@ -0,0 +1,54 @@
using System.Runtime.CompilerServices;
namespace Ryujinx.Graphics.Shader
{
public enum TextureHandleType
{
CombinedSampler = 0, // Must be 0.
SeparateSamplerHandle = 1,
SeparateSamplerId = 2
}
public static class TextureHandle
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int PackSlots(int cbufSlot0, int cbufSlot1)
{
return cbufSlot0 | ((cbufSlot1 + 1) << 16);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static (int, int) UnpackSlots(int slots, int defaultTextureBufferIndex)
{
int textureBufferIndex;
int samplerBufferIndex;
if (slots < 0)
{
textureBufferIndex = defaultTextureBufferIndex;
samplerBufferIndex = textureBufferIndex;
}
else
{
uint high = (uint)slots >> 16;
textureBufferIndex = (ushort)slots;
samplerBufferIndex = high != 0 ? (int)high - 1 : textureBufferIndex;
}
return (textureBufferIndex, samplerBufferIndex);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int PackOffsets(int cbufOffset0, int cbufOffset1, TextureHandleType type)
{
return cbufOffset0 | (cbufOffset1 << 14) | ((int)type << 28);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static (int, int, TextureHandleType) UnpackOffsets(int handle)
{
return (handle & 0x3fff, (handle >> 14) & 0x3fff, (TextureHandleType)((uint)handle >> 28));
}
}
}

View file

@ -51,6 +51,60 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
Operand src0 = Utils.FindLastOperation(handleCombineOp.GetSource(0), block); Operand src0 = Utils.FindLastOperation(handleCombineOp.GetSource(0), block);
Operand src1 = Utils.FindLastOperation(handleCombineOp.GetSource(1), block); Operand src1 = Utils.FindLastOperation(handleCombineOp.GetSource(1), block);
TextureHandleType handleType = TextureHandleType.SeparateSamplerHandle;
// Try to match masked pattern:
// - samplerHandle = samplerHandle & 0xFFF00000;
// - textureHandle = textureHandle & 0xFFFFF;
// - combinedHandle = samplerHandle | textureHandle;
// where samplerHandle and textureHandle comes from a constant buffer, and shifted pattern:
// - samplerHandle = samplerId << 20;
// - combinedHandle = samplerHandle | textureHandle;
// where samplerId and textureHandle comes from a constant buffer.
if (src0.AsgOp is Operation src0AsgOp)
{
if (src1.AsgOp is Operation src1AsgOp &&
src0AsgOp.Inst == Instruction.BitwiseAnd &&
src1AsgOp.Inst == Instruction.BitwiseAnd)
{
src0 = GetSourceForMaskedHandle(src0AsgOp, 0xFFFFF);
src1 = GetSourceForMaskedHandle(src1AsgOp, 0xFFF00000);
// The OR operation is commutative, so we can also try to swap the operands to get a match.
if (src0 == null || src1 == null)
{
src0 = GetSourceForMaskedHandle(src1AsgOp, 0xFFFFF);
src1 = GetSourceForMaskedHandle(src0AsgOp, 0xFFF00000);
}
if (src0 == null || src1 == null)
{
continue;
}
}
else if (src0AsgOp.Inst == Instruction.ShiftLeft)
{
Operand shift = src0AsgOp.GetSource(1);
if (shift.Type == OperandType.Constant && shift.Value == 20)
{
src0 = src1;
src1 = src0AsgOp.GetSource(0);
handleType = TextureHandleType.SeparateSamplerId;
}
}
}
else if (src1.AsgOp is Operation src1AsgOp && src1AsgOp.Inst == Instruction.ShiftLeft)
{
Operand shift = src1AsgOp.GetSource(1);
if (shift.Type == OperandType.Constant && shift.Value == 20)
{
src1 = src1AsgOp.GetSource(0);
handleType = TextureHandleType.SeparateSamplerId;
}
}
if (src0.Type != OperandType.ConstantBuffer || src1.Type != OperandType.ConstantBuffer) if (src0.Type != OperandType.ConstantBuffer || src1.Type != OperandType.ConstantBuffer)
{ {
continue; continue;
@ -59,8 +113,8 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
SetHandle( SetHandle(
config, config,
texOp, texOp,
src0.GetCbufOffset() | ((src1.GetCbufOffset() + 1) << 16), TextureHandle.PackOffsets(src0.GetCbufOffset(), src1.GetCbufOffset(), handleType),
src0.GetCbufSlot() | ((src1.GetCbufSlot() + 1) << 16), TextureHandle.PackSlots(src0.GetCbufSlot(), src1.GetCbufSlot()),
rewriteSamplerType); rewriteSamplerType);
} }
else if (texOp.Inst == Instruction.ImageLoad || else if (texOp.Inst == Instruction.ImageLoad ||
@ -89,10 +143,41 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
} }
} }
private static Operand GetSourceForMaskedHandle(Operation asgOp, uint mask)
{
// Assume it was already checked that the operation is bitwise AND.
Operand src0 = asgOp.GetSource(0);
Operand src1 = asgOp.GetSource(1);
if (src0.Type == OperandType.ConstantBuffer && src1.Type == OperandType.ConstantBuffer)
{
// We can't check if the mask matches here as both operands are from a constant buffer.
// Be optimistic and assume it matches. Avoid constant buffer 1 as official drivers
// uses this one to store compiler constants.
return src0.GetCbufSlot() == 1 ? src1 : src0;
}
else if (src0.Type == OperandType.ConstantBuffer && src1.Type == OperandType.Constant)
{
if ((uint)src1.Value == mask)
{
return src0;
}
}
else if (src0.Type == OperandType.Constant && src1.Type == OperandType.ConstantBuffer)
{
if ((uint)src0.Value == mask)
{
return src1;
}
}
return null;
}
private static void SetHandle(ShaderConfig config, TextureOperation texOp, int cbufOffset, int cbufSlot, bool rewriteSamplerType) private static void SetHandle(ShaderConfig config, TextureOperation texOp, int cbufOffset, int cbufSlot, bool rewriteSamplerType)
{ {
texOp.SetHandle(cbufOffset, cbufSlot); texOp.SetHandle(cbufOffset, cbufSlot);
if (rewriteSamplerType) if (rewriteSamplerType)
{ {
texOp.Type = config.GpuAccessor.QuerySamplerType(cbufOffset, cbufSlot); texOp.Type = config.GpuAccessor.QuerySamplerType(cbufOffset, cbufSlot);