diff --git a/Ryujinx.Common/GraphicsDriver/DriverUtilities.cs b/Ryujinx.Common/GraphicsDriver/DriverUtilities.cs new file mode 100644 index 00000000..60c176f8 --- /dev/null +++ b/Ryujinx.Common/GraphicsDriver/DriverUtilities.cs @@ -0,0 +1,22 @@ +using System; + +namespace Ryujinx.Common.GraphicsDriver +{ + public static class DriverUtilities + { + public static void ToggleOGLThreading(bool enabled) + { + Environment.SetEnvironmentVariable("mesa_glthread", enabled.ToString()); + Environment.SetEnvironmentVariable("__GL_THREADED_OPTIMIZATIONS", enabled ? "1" : "0"); + + try + { + NVThreadedOptimization.SetThreadedOptimization(enabled); + } + catch + { + // NVAPI is not available, or couldn't change the application profile. + } + } + } +} diff --git a/Ryujinx.Common/GraphicsDriver/NVAPI/Nvapi.cs b/Ryujinx.Common/GraphicsDriver/NVAPI/Nvapi.cs new file mode 100644 index 00000000..99eaa68f --- /dev/null +++ b/Ryujinx.Common/GraphicsDriver/NVAPI/Nvapi.cs @@ -0,0 +1,11 @@ +namespace Ryujinx.Common.GraphicsDriver.NVAPI +{ + enum Nvapi : uint + { + OglThreadControlId = 0x20C1221E, + + OglThreadControlDefault = 0, + OglThreadControlEnable = 1, + OglThreadControlDisable = 2 + } +} diff --git a/Ryujinx.Common/GraphicsDriver/NVAPI/NvapiUnicodeString.cs b/Ryujinx.Common/GraphicsDriver/NVAPI/NvapiUnicodeString.cs new file mode 100644 index 00000000..6bbff2de --- /dev/null +++ b/Ryujinx.Common/GraphicsDriver/NVAPI/NvapiUnicodeString.cs @@ -0,0 +1,42 @@ +using System.Runtime.InteropServices; +using System.Text; + +namespace Ryujinx.Common.GraphicsDriver.NVAPI +{ + [StructLayout(LayoutKind.Sequential, Pack = 4)] + public unsafe struct NvapiUnicodeString + { + private fixed byte _data[4096]; + + public NvapiUnicodeString(string text) + { + Set(text); + } + + public string Get() + { + fixed (byte* data = _data) + { + string text = Encoding.Unicode.GetString(data, 4096); + + int index = text.IndexOf('\0'); + if (index > -1) + { + text = text.Remove(index); + } + + return text; + } + } + + public void Set(string text) + { + text += '\0'; + fixed (char* textPtr = text) + fixed (byte* data = _data) + { + int written = Encoding.Unicode.GetBytes(textPtr, text.Length, data, 4096); + } + } + } +} diff --git a/Ryujinx.Common/GraphicsDriver/NVAPI/NvdrsApplicationV4.cs b/Ryujinx.Common/GraphicsDriver/NVAPI/NvdrsApplicationV4.cs new file mode 100644 index 00000000..8b472cd1 --- /dev/null +++ b/Ryujinx.Common/GraphicsDriver/NVAPI/NvdrsApplicationV4.cs @@ -0,0 +1,17 @@ +using System.Runtime.InteropServices; + +namespace Ryujinx.Common.GraphicsDriver.NVAPI +{ + [StructLayout(LayoutKind.Sequential, Pack = 4)] + unsafe struct NvdrsApplicationV4 + { + public uint Version; + public uint IsPredefined; + public NvapiUnicodeString AppName; + public NvapiUnicodeString UserFriendlyName; + public NvapiUnicodeString Launcher; + public NvapiUnicodeString FileInFolder; + public uint Flags; + public NvapiUnicodeString CommandLine; + } +} diff --git a/Ryujinx.Common/GraphicsDriver/NVAPI/NvdrsProfile.cs b/Ryujinx.Common/GraphicsDriver/NVAPI/NvdrsProfile.cs new file mode 100644 index 00000000..5a325d08 --- /dev/null +++ b/Ryujinx.Common/GraphicsDriver/NVAPI/NvdrsProfile.cs @@ -0,0 +1,16 @@ +using System; +using System.Runtime.InteropServices; + +namespace Ryujinx.Common.GraphicsDriver.NVAPI +{ + [StructLayout(LayoutKind.Sequential, Pack = 1)] + unsafe struct NvdrsProfile + { + public uint Version; + public NvapiUnicodeString ProfileName; + public uint GpuSupport; + public uint IsPredefined; + public uint NumOfApps; + public uint NumOfSettings; + } +} diff --git a/Ryujinx.Common/GraphicsDriver/NVAPI/NvdrsSetting.cs b/Ryujinx.Common/GraphicsDriver/NVAPI/NvdrsSetting.cs new file mode 100644 index 00000000..ac188b35 --- /dev/null +++ b/Ryujinx.Common/GraphicsDriver/NVAPI/NvdrsSetting.cs @@ -0,0 +1,49 @@ +using System.Runtime.InteropServices; + +namespace Ryujinx.Common.GraphicsDriver.NVAPI +{ + enum NvdrsSettingType : uint + { + NvdrsDwordType, + NvdrsBinaryType, + NvdrsStringType, + NvdrsWstringType, + } + + enum NvdrsSettingLocation : uint + { + NvdrsCurrentProfileLocation, + NvdrsGlobalProfileLocation, + NvdrsBaseProfileLocation, + NvdrsDefaultProfileLocation, + } + + [StructLayout(LayoutKind.Explicit, Size = 0x3020)] + unsafe struct NvdrsSetting + { + [FieldOffset(0x0)] + public uint Version; + [FieldOffset(0x4)] + public NvapiUnicodeString SettingName; + [FieldOffset(0x1004)] + public Nvapi SettingId; + [FieldOffset(0x1008)] + public NvdrsSettingType SettingType; + [FieldOffset(0x100C)] + public NvdrsSettingLocation SettingLocation; + [FieldOffset(0x1010)] + public uint IsCurrentPredefined; + [FieldOffset(0x1014)] + public uint IsPredefinedValid; + + [FieldOffset(0x1018)] + public uint PredefinedValue; + [FieldOffset(0x1018)] + public NvapiUnicodeString PredefinedString; + + [FieldOffset(0x201C)] + public uint CurrentValue; + [FieldOffset(0x201C)] + public NvapiUnicodeString CurrentString; + } +} diff --git a/Ryujinx.Common/GraphicsDriver/NVThreadedOptimization.cs b/Ryujinx.Common/GraphicsDriver/NVThreadedOptimization.cs new file mode 100644 index 00000000..ad223631 --- /dev/null +++ b/Ryujinx.Common/GraphicsDriver/NVThreadedOptimization.cs @@ -0,0 +1,163 @@ +using Ryujinx.Common.GraphicsDriver.NVAPI; +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace Ryujinx.Common.GraphicsDriver +{ + static class NVThreadedOptimization + { + private const string ProfileName = "Ryujinx Nvidia Profile"; + + private const uint NvAPI_Initialize_ID = 0x0150E828; + private const uint NvAPI_DRS_CreateSession_ID = 0x0694D52E; + private const uint NvAPI_DRS_LoadSettings_ID = 0x375DBD6B; + private const uint NvAPI_DRS_FindProfileByName_ID = 0x7E4A9A0B; + private const uint NvAPI_DRS_CreateProfile_ID = 0x0CC176068; + private const uint NvAPI_DRS_CreateApplication_ID = 0x4347A9DE; + private const uint NvAPI_DRS_SetSetting_ID = 0x577DD202; + private const uint NvAPI_DRS_SaveSettings_ID = 0xFCBC7E14; + private const uint NvAPI_DRS_DestroySession_ID = 0x0DAD9CFF8; + + [DllImport("nvapi64")] + private static extern IntPtr nvapi_QueryInterface(uint id); + + private delegate int NvAPI_InitializeDelegate(); + private static NvAPI_InitializeDelegate NvAPI_Initialize; + + private delegate int NvAPI_DRS_CreateSessionDelegate(out IntPtr handle); + private static NvAPI_DRS_CreateSessionDelegate NvAPI_DRS_CreateSession; + + private delegate int NvAPI_DRS_LoadSettingsDelegate(IntPtr handle); + private static NvAPI_DRS_LoadSettingsDelegate NvAPI_DRS_LoadSettings; + + private delegate int NvAPI_DRS_FindProfileByNameDelegate(IntPtr handle, NvapiUnicodeString profileName, out IntPtr profileHandle); + private static NvAPI_DRS_FindProfileByNameDelegate NvAPI_DRS_FindProfileByName; + + private delegate int NvAPI_DRS_CreateProfileDelegate(IntPtr handle, ref NvdrsProfile profileInfo, out IntPtr profileHandle); + private static NvAPI_DRS_CreateProfileDelegate NvAPI_DRS_CreateProfile; + + private delegate int NvAPI_DRS_CreateApplicationDelegate(IntPtr handle, IntPtr profileHandle, ref NvdrsApplicationV4 app); + private static NvAPI_DRS_CreateApplicationDelegate NvAPI_DRS_CreateApplication; + + private delegate int NvAPI_DRS_SetSettingDelegate(IntPtr handle, IntPtr profileHandle, ref NvdrsSetting setting); + private static NvAPI_DRS_SetSettingDelegate NvAPI_DRS_SetSetting; + + private delegate int NvAPI_DRS_SaveSettingsDelegate(IntPtr handle); + private static NvAPI_DRS_SaveSettingsDelegate NvAPI_DRS_SaveSettings; + + private delegate int NvAPI_DRS_DestroySessionDelegate(IntPtr handle); + private static NvAPI_DRS_DestroySessionDelegate NvAPI_DRS_DestroySession; + + private static bool _initialized; + + private static void Check(int status) + { + if (status != 0) + { + throw new Exception($"NVAPI Error: {status}"); + } + } + + private static void Initialize() + { + if (!_initialized) + { + NvAPI_Initialize = NvAPI_Delegate(NvAPI_Initialize_ID); + + Check(NvAPI_Initialize()); + + NvAPI_DRS_CreateSession = NvAPI_Delegate(NvAPI_DRS_CreateSession_ID); + NvAPI_DRS_LoadSettings = NvAPI_Delegate(NvAPI_DRS_LoadSettings_ID); + NvAPI_DRS_FindProfileByName = NvAPI_Delegate(NvAPI_DRS_FindProfileByName_ID); + NvAPI_DRS_CreateProfile = NvAPI_Delegate(NvAPI_DRS_CreateProfile_ID); + NvAPI_DRS_CreateApplication = NvAPI_Delegate(NvAPI_DRS_CreateApplication_ID); + NvAPI_DRS_SetSetting = NvAPI_Delegate(NvAPI_DRS_SetSetting_ID); + NvAPI_DRS_SaveSettings = NvAPI_Delegate(NvAPI_DRS_SaveSettings_ID); + NvAPI_DRS_DestroySession = NvAPI_Delegate(NvAPI_DRS_DestroySession_ID); + + _initialized = true; + } + } + + private static uint MakeVersion(uint version) where T : unmanaged + { + return (uint)Unsafe.SizeOf() | version << 16; + } + + public static void SetThreadedOptimization(bool enabled) + { + Initialize(); + + uint targetValue = (uint)(enabled ? Nvapi.OglThreadControlEnable : Nvapi.OglThreadControlDisable); + + Check(NvAPI_Initialize()); + + Check(NvAPI_DRS_CreateSession(out IntPtr handle)); + + Check(NvAPI_DRS_LoadSettings(handle)); + + IntPtr profileHandle; + + // Check if the profile already exists. + + int status = NvAPI_DRS_FindProfileByName(handle, new NvapiUnicodeString(ProfileName), out profileHandle); + + if (status != 0) + { + NvdrsProfile profile = new NvdrsProfile { + Version = MakeVersion(1), + IsPredefined = 0, + GpuSupport = uint.MaxValue + }; + profile.ProfileName.Set(ProfileName); + Check(NvAPI_DRS_CreateProfile(handle, ref profile, out profileHandle)); + + NvdrsApplicationV4 application = new NvdrsApplicationV4 + { + Version = MakeVersion(4), + IsPredefined = 0, + Flags = 3 // IsMetro, IsCommandLine + }; + application.AppName.Set("Ryujinx.exe"); + application.UserFriendlyName.Set("Ryujinx"); + application.Launcher.Set(""); + application.FileInFolder.Set(""); + + Check(NvAPI_DRS_CreateApplication(handle, profileHandle, ref application)); + } + + NvdrsSetting setting = new NvdrsSetting + { + Version = MakeVersion(1), + SettingId = Nvapi.OglThreadControlId, + SettingType = NvdrsSettingType.NvdrsDwordType, + SettingLocation = NvdrsSettingLocation.NvdrsCurrentProfileLocation, + IsCurrentPredefined = 0, + IsPredefinedValid = 0, + CurrentValue = targetValue, + PredefinedValue = targetValue + }; + + Check(NvAPI_DRS_SetSetting(handle, profileHandle, ref setting)); + + Check(NvAPI_DRS_SaveSettings(handle)); + + NvAPI_DRS_DestroySession(handle); + } + + private static T NvAPI_Delegate(uint id) where T : class + { + IntPtr ptr = nvapi_QueryInterface(id); + + if (ptr != IntPtr.Zero) + { + return Marshal.GetDelegateForFunctionPointer(ptr, typeof(T)) as T; + } + else + { + return null; + } + } + } +} diff --git a/Ryujinx.Common/System/ForceDedicatedGpu.cs b/Ryujinx.Common/System/ForceDedicatedGpu.cs deleted file mode 100644 index 60272f1a..00000000 --- a/Ryujinx.Common/System/ForceDedicatedGpu.cs +++ /dev/null @@ -1,16 +0,0 @@ -using System.Runtime.InteropServices; - -namespace Ryujinx.Common.System -{ - public static class ForceDedicatedGpu - { - public static void Nvidia() - { - if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) - { - // NOTE: If the DLL exists, we can load it to force the usage of the dedicated Nvidia Gpu. - NativeLibrary.TryLoad("nvapi64.dll", out _); - } - } - } -} \ No newline at end of file diff --git a/Ryujinx.Graphics.GAL/IProgram.cs b/Ryujinx.Graphics.GAL/IProgram.cs index 5ab8346f..272a2f7d 100644 --- a/Ryujinx.Graphics.GAL/IProgram.cs +++ b/Ryujinx.Graphics.GAL/IProgram.cs @@ -4,6 +4,8 @@ namespace Ryujinx.Graphics.GAL { public interface IProgram : IDisposable { + ProgramLinkStatus CheckProgramLink(bool blocking); + byte[] GetBinary(); } } diff --git a/Ryujinx.Graphics.GAL/ProgramLinkStatus.cs b/Ryujinx.Graphics.GAL/ProgramLinkStatus.cs new file mode 100644 index 00000000..5ca1be8c --- /dev/null +++ b/Ryujinx.Graphics.GAL/ProgramLinkStatus.cs @@ -0,0 +1,9 @@ +namespace Ryujinx.Graphics.GAL +{ + public enum ProgramLinkStatus + { + Incomplete, + Success, + Failure + } +} diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs index 164960d0..f6dcf052 100644 --- a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs @@ -10,6 +10,7 @@ using System; using System.Collections.Generic; using System.Diagnostics; using System.Threading; +using System.Threading.Tasks; namespace Ryujinx.Graphics.Gpu.Shader { @@ -102,234 +103,323 @@ namespace Ryujinx.Graphics.Gpu.Shader progressReportThread.Start(progressReportEvent); } - for (int programIndex = 0; programIndex < guestProgramList.Length; programIndex++) + // Make sure these are initialized before doing compilation. + Capabilities caps = _context.Capabilities; + + int maxTaskCount = Math.Min(Environment.ProcessorCount, 8); + int programIndex = 0; + List activeTasks = new List(); + + AutoResetEvent taskDoneEvent = new AutoResetEvent(false); + + // This thread dispatches tasks to do shader translation, and creates programs that OpenGL will link in the background. + // The program link status is checked in a non-blocking manner so that multiple shaders can be compiled at once. + + while (programIndex < guestProgramList.Length || activeTasks.Count > 0) { - Hash128 key = guestProgramList[programIndex]; - - byte[] hostProgramBinary = _cacheManager.GetHostProgramByHash(ref key); - bool hasHostCache = hostProgramBinary != null; - - IProgram hostProgram = null; - - // If the program sources aren't in the cache, compile from saved guest program. - byte[] guestProgram = _cacheManager.GetGuestProgramByHash(ref key); - - if (guestProgram == null) + if (activeTasks.Count < maxTaskCount && programIndex < guestProgramList.Length) { - Logger.Error?.Print(LogClass.Gpu, $"Ignoring orphan shader hash {key} in cache (is the cache incomplete?)"); + // Begin a new shader compilation. + Hash128 key = guestProgramList[programIndex]; - // Should not happen, but if someone messed with the cache it's better to catch it. - invalidEntries?.Add(key); + byte[] hostProgramBinary = _cacheManager.GetHostProgramByHash(ref key); + bool hasHostCache = hostProgramBinary != null; - continue; - } + IProgram hostProgram = null; - ReadOnlySpan guestProgramReadOnlySpan = guestProgram; + // If the program sources aren't in the cache, compile from saved guest program. + byte[] guestProgram = _cacheManager.GetGuestProgramByHash(ref key); - ReadOnlySpan cachedShaderEntries = GuestShaderCacheEntry.Parse(ref guestProgramReadOnlySpan, out GuestShaderCacheHeader fileHeader); - - if (cachedShaderEntries[0].Header.Stage == ShaderStage.Compute) - { - Debug.Assert(cachedShaderEntries.Length == 1); - - GuestShaderCacheEntry entry = cachedShaderEntries[0]; - - HostShaderCacheEntry[] hostShaderEntries = null; - - // Try loading host shader binary. - if (hasHostCache) + if (guestProgram == null) { - hostShaderEntries = HostShaderCacheEntry.Parse(hostProgramBinary, out ReadOnlySpan hostProgramBinarySpan); - hostProgramBinary = hostProgramBinarySpan.ToArray(); - hostProgram = _context.Renderer.LoadProgramBinary(hostProgramBinary); + Logger.Error?.Print(LogClass.Gpu, $"Ignoring orphan shader hash {key} in cache (is the cache incomplete?)"); + + // Should not happen, but if someone messed with the cache it's better to catch it. + invalidEntries?.Add(key); + + _shaderCount = ++programIndex; + + continue; } - bool isHostProgramValid = hostProgram != null; + ReadOnlySpan guestProgramReadOnlySpan = guestProgram; - ShaderProgram program; - ShaderProgramInfo shaderProgramInfo; + ReadOnlySpan cachedShaderEntries = GuestShaderCacheEntry.Parse(ref guestProgramReadOnlySpan, out GuestShaderCacheHeader fileHeader); - // Reconstruct code holder. - if (isHostProgramValid) + if (cachedShaderEntries[0].Header.Stage == ShaderStage.Compute) { - program = new ShaderProgram(entry.Header.Stage, ""); - shaderProgramInfo = hostShaderEntries[0].ToShaderProgramInfo(); + Debug.Assert(cachedShaderEntries.Length == 1); + + GuestShaderCacheEntry entry = cachedShaderEntries[0]; + + HostShaderCacheEntry[] hostShaderEntries = null; + + // Try loading host shader binary. + if (hasHostCache) + { + hostShaderEntries = HostShaderCacheEntry.Parse(hostProgramBinary, out ReadOnlySpan hostProgramBinarySpan); + hostProgramBinary = hostProgramBinarySpan.ToArray(); + hostProgram = _context.Renderer.LoadProgramBinary(hostProgramBinary); + } + + ShaderCompileTask task = new ShaderCompileTask(taskDoneEvent); + activeTasks.Add(task); + + task.OnCompiled(hostProgram, (bool isHostProgramValid, ShaderCompileTask task) => + { + ShaderProgram program = null; + ShaderProgramInfo shaderProgramInfo = null; + + if (isHostProgramValid) + { + // Reconstruct code holder. + + program = new ShaderProgram(entry.Header.Stage, ""); + shaderProgramInfo = hostShaderEntries[0].ToShaderProgramInfo(); + + ShaderCodeHolder shader = new ShaderCodeHolder(program, shaderProgramInfo, entry.Code); + + _cpProgramsDiskCache.Add(key, new ShaderBundle(hostProgram, shader)); + + return true; + } + else + { + // If the host program was rejected by the gpu driver or isn't in cache, try to build from program sources again. + + Task compileTask = Task.Run(() => + { + IGpuAccessor gpuAccessor = new CachedGpuAccessor(_context, entry.Code, entry.Header.GpuAccessorHeader, entry.TextureDescriptors); + + program = Translator.CreateContext(0, gpuAccessor, DefaultFlags | TranslationFlags.Compute).Translate(out shaderProgramInfo); + }); + + task.OnTask(compileTask, (bool _, ShaderCompileTask task) => + { + ShaderCodeHolder shader = new ShaderCodeHolder(program, shaderProgramInfo, entry.Code); + + Logger.Info?.Print(LogClass.Gpu, $"Host shader {key} got invalidated, rebuilding from guest..."); + + // Compile shader and create program as the shader program binary got invalidated. + shader.HostShader = _context.Renderer.CompileShader(ShaderStage.Compute, shader.Program.Code); + hostProgram = _context.Renderer.CreateProgram(new IShader[] { shader.HostShader }, null); + + task.OnCompiled(hostProgram, (bool isNewProgramValid, ShaderCompileTask task) => + { + // As the host program was invalidated, save the new entry in the cache. + hostProgramBinary = HostShaderCacheEntry.Create(hostProgram.GetBinary(), new ShaderCodeHolder[] { shader }); + + if (!isReadOnly) + { + if (hasHostCache) + { + _cacheManager.ReplaceHostProgram(ref key, hostProgramBinary); + } + else + { + Logger.Warning?.Print(LogClass.Gpu, $"Add missing host shader {key} in cache (is the cache incomplete?)"); + + _cacheManager.AddHostProgram(ref key, hostProgramBinary); + } + } + + _cpProgramsDiskCache.Add(key, new ShaderBundle(hostProgram, shader)); + + return true; + }); + + return false; // Not finished: still need to compile the host program. + }); + + return false; // Not finished: translating the program. + } + }); } else { - IGpuAccessor gpuAccessor = new CachedGpuAccessor(_context, entry.Code, entry.Header.GpuAccessorHeader, entry.TextureDescriptors); + Debug.Assert(cachedShaderEntries.Length == Constants.ShaderStages); - program = Translator.CreateContext(0, gpuAccessor, DefaultFlags | TranslationFlags.Compute).Translate(out shaderProgramInfo); - } + ShaderCodeHolder[] shaders = new ShaderCodeHolder[cachedShaderEntries.Length]; + List shaderPrograms = new List(); - ShaderCodeHolder shader = new ShaderCodeHolder(program, shaderProgramInfo, entry.Code); + TransformFeedbackDescriptor[] tfd = CacheHelper.ReadTransformFeedbackInformation(ref guestProgramReadOnlySpan, fileHeader); - // If the host program was rejected by the gpu driver or isn't in cache, try to build from program sources again. - if (hostProgram == null) - { - Logger.Info?.Print(LogClass.Gpu, $"Host shader {key} got invalidated, rebuilding from guest..."); + TranslationFlags flags = DefaultFlags; - // Compile shader and create program as the shader program binary got invalidated. - shader.HostShader = _context.Renderer.CompileShader(ShaderStage.Compute, shader.Program.Code); - hostProgram = _context.Renderer.CreateProgram(new IShader[] { shader.HostShader }, null); - - // As the host program was invalidated, save the new entry in the cache. - hostProgramBinary = HostShaderCacheEntry.Create(hostProgram.GetBinary(), new ShaderCodeHolder[] { shader }); - - if (!isReadOnly) + if (tfd != null) { - if (hasHostCache) - { - _cacheManager.ReplaceHostProgram(ref key, hostProgramBinary); - } - else - { - Logger.Warning?.Print(LogClass.Gpu, $"Add missing host shader {key} in cache (is the cache incomplete?)"); - - _cacheManager.AddHostProgram(ref key, hostProgramBinary); - } + flags |= TranslationFlags.Feedback; } + + TranslationCounts counts = new TranslationCounts(); + + HostShaderCacheEntry[] hostShaderEntries = null; + + // Try loading host shader binary. + if (hasHostCache) + { + hostShaderEntries = HostShaderCacheEntry.Parse(hostProgramBinary, out ReadOnlySpan hostProgramBinarySpan); + hostProgramBinary = hostProgramBinarySpan.ToArray(); + hostProgram = _context.Renderer.LoadProgramBinary(hostProgramBinary); + } + + ShaderCompileTask task = new ShaderCompileTask(taskDoneEvent); + activeTasks.Add(task); + + GuestShaderCacheEntry[] entries = cachedShaderEntries.ToArray(); + + task.OnCompiled(hostProgram, (bool isHostProgramValid, ShaderCompileTask task) => + { + Task compileTask = Task.Run(() => + { + // Reconstruct code holder. + for (int i = 0; i < entries.Length; i++) + { + GuestShaderCacheEntry entry = entries[i]; + + if (entry == null) + { + continue; + } + + ShaderProgram program; + + if (entry.Header.SizeA != 0) + { + ShaderProgramInfo shaderProgramInfo; + + if (isHostProgramValid) + { + program = new ShaderProgram(entry.Header.Stage, ""); + shaderProgramInfo = hostShaderEntries[i].ToShaderProgramInfo(); + } + else + { + IGpuAccessor gpuAccessor = new CachedGpuAccessor(_context, entry.Code, entry.Header.GpuAccessorHeader, entry.TextureDescriptors); + + TranslatorContext translatorContext = Translator.CreateContext(0, gpuAccessor, flags, counts); + TranslatorContext translatorContext2 = Translator.CreateContext((ulong)entry.Header.Size, gpuAccessor, flags | TranslationFlags.VertexA, counts); + + program = translatorContext.Translate(out shaderProgramInfo, translatorContext2); + } + + // NOTE: Vertex B comes first in the shader cache. + byte[] code = entry.Code.AsSpan().Slice(0, entry.Header.Size).ToArray(); + byte[] code2 = entry.Code.AsSpan().Slice(entry.Header.Size, entry.Header.SizeA).ToArray(); + + shaders[i] = new ShaderCodeHolder(program, shaderProgramInfo, code, code2); + } + else + { + ShaderProgramInfo shaderProgramInfo; + + if (isHostProgramValid) + { + program = new ShaderProgram(entry.Header.Stage, ""); + shaderProgramInfo = hostShaderEntries[i].ToShaderProgramInfo(); + } + else + { + IGpuAccessor gpuAccessor = new CachedGpuAccessor(_context, entry.Code, entry.Header.GpuAccessorHeader, entry.TextureDescriptors); + + program = Translator.CreateContext(0, gpuAccessor, flags, counts).Translate(out shaderProgramInfo); + } + + shaders[i] = new ShaderCodeHolder(program, shaderProgramInfo, entry.Code); + } + + shaderPrograms.Add(program); + } + }); + + task.OnTask(compileTask, (bool _, ShaderCompileTask task) => + { + // If the host program was rejected by the gpu driver or isn't in cache, try to build from program sources again. + if (!isHostProgramValid) + { + Logger.Info?.Print(LogClass.Gpu, $"Host shader {key} got invalidated, rebuilding from guest..."); + + List hostShaders = new List(); + + // Compile shaders and create program as the shader program binary got invalidated. + for (int stage = 0; stage < Constants.ShaderStages; stage++) + { + ShaderProgram program = shaders[stage]?.Program; + + if (program == null) + { + continue; + } + + IShader hostShader = _context.Renderer.CompileShader(program.Stage, program.Code); + + shaders[stage].HostShader = hostShader; + + hostShaders.Add(hostShader); + } + + hostProgram = _context.Renderer.CreateProgram(hostShaders.ToArray(), tfd); + + task.OnCompiled(hostProgram, (bool isNewProgramValid, ShaderCompileTask task) => + { + // As the host program was invalidated, save the new entry in the cache. + hostProgramBinary = HostShaderCacheEntry.Create(hostProgram.GetBinary(), shaders); + + if (!isReadOnly) + { + if (hasHostCache) + { + _cacheManager.ReplaceHostProgram(ref key, hostProgramBinary); + } + else + { + Logger.Warning?.Print(LogClass.Gpu, $"Add missing host shader {key} in cache (is the cache incomplete?)"); + + _cacheManager.AddHostProgram(ref key, hostProgramBinary); + } + } + + _gpProgramsDiskCache.Add(key, new ShaderBundle(hostProgram, shaders)); + + return true; + }); + + return false; // Not finished: still need to compile the host program. + } + else + { + _gpProgramsDiskCache.Add(key, new ShaderBundle(hostProgram, shaders)); + + return true; + } + }); + + return false; // Not finished: translating the program. + }); } - _cpProgramsDiskCache.Add(key, new ShaderBundle(hostProgram, shader)); + _shaderCount = ++programIndex; } - else + + // Process the queue. + for (int i = 0; i < activeTasks.Count; i++) { - Debug.Assert(cachedShaderEntries.Length == Constants.ShaderStages); + ShaderCompileTask task = activeTasks[i]; - ShaderCodeHolder[] shaders = new ShaderCodeHolder[cachedShaderEntries.Length]; - List shaderPrograms = new List(); - - TransformFeedbackDescriptor[] tfd = CacheHelper.ReadTransformFeedbackInformation(ref guestProgramReadOnlySpan, fileHeader); - - TranslationFlags flags = DefaultFlags; - - if (tfd != null) + if (task.IsDone()) { - flags |= TranslationFlags.Feedback; + activeTasks.RemoveAt(i--); } - - TranslationCounts counts = new TranslationCounts(); - - HostShaderCacheEntry[] hostShaderEntries = null; - - // Try loading host shader binary. - if (hasHostCache) - { - hostShaderEntries = HostShaderCacheEntry.Parse(hostProgramBinary, out ReadOnlySpan hostProgramBinarySpan); - hostProgramBinary = hostProgramBinarySpan.ToArray(); - hostProgram = _context.Renderer.LoadProgramBinary(hostProgramBinary); - } - - bool isHostProgramValid = hostProgram != null; - - // Reconstruct code holder. - for (int i = 0; i < cachedShaderEntries.Length; i++) - { - GuestShaderCacheEntry entry = cachedShaderEntries[i]; - - if (entry == null) - { - continue; - } - - ShaderProgram program; - - if (entry.Header.SizeA != 0) - { - ShaderProgramInfo shaderProgramInfo; - - if (isHostProgramValid) - { - program = new ShaderProgram(entry.Header.Stage, ""); - shaderProgramInfo = hostShaderEntries[i].ToShaderProgramInfo(); - } - else - { - IGpuAccessor gpuAccessor = new CachedGpuAccessor(_context, entry.Code, entry.Header.GpuAccessorHeader, entry.TextureDescriptors); - - TranslatorContext translatorContext = Translator.CreateContext(0, gpuAccessor, flags, counts); - TranslatorContext translatorContext2 = Translator.CreateContext((ulong)entry.Header.Size, gpuAccessor, flags | TranslationFlags.VertexA, counts); - - program = translatorContext.Translate(out shaderProgramInfo, translatorContext2); - } - - // NOTE: Vertex B comes first in the shader cache. - byte[] code = entry.Code.AsSpan().Slice(0, entry.Header.Size).ToArray(); - byte[] code2 = entry.Code.AsSpan().Slice(entry.Header.Size, entry.Header.SizeA).ToArray(); - - shaders[i] = new ShaderCodeHolder(program, shaderProgramInfo, code, code2); - } - else - { - ShaderProgramInfo shaderProgramInfo; - - if (isHostProgramValid) - { - program = new ShaderProgram(entry.Header.Stage, ""); - shaderProgramInfo = hostShaderEntries[i].ToShaderProgramInfo(); - } - else - { - IGpuAccessor gpuAccessor = new CachedGpuAccessor(_context, entry.Code, entry.Header.GpuAccessorHeader, entry.TextureDescriptors); - - program = Translator.CreateContext(0, gpuAccessor, flags, counts).Translate(out shaderProgramInfo); - } - - shaders[i] = new ShaderCodeHolder(program, shaderProgramInfo, entry.Code); - } - - shaderPrograms.Add(program); - } - - // If the host program was rejected by the gpu driver or isn't in cache, try to build from program sources again. - if (!isHostProgramValid) - { - Logger.Info?.Print(LogClass.Gpu, $"Host shader {key} got invalidated, rebuilding from guest..."); - - List hostShaders = new List(); - - // Compile shaders and create program as the shader program binary got invalidated. - for (int stage = 0; stage < Constants.ShaderStages; stage++) - { - ShaderProgram program = shaders[stage]?.Program; - - if (program == null) - { - continue; - } - - IShader hostShader = _context.Renderer.CompileShader(program.Stage, program.Code); - - shaders[stage].HostShader = hostShader; - - hostShaders.Add(hostShader); - } - - hostProgram = _context.Renderer.CreateProgram(hostShaders.ToArray(), tfd); - - // As the host program was invalidated, save the new entry in the cache. - hostProgramBinary = HostShaderCacheEntry.Create(hostProgram.GetBinary(), shaders); - - if (!isReadOnly) - { - if (hasHostCache) - { - _cacheManager.ReplaceHostProgram(ref key, hostProgramBinary); - } - else - { - Logger.Warning?.Print(LogClass.Gpu, $"Add missing host shader {key} in cache (is the cache incomplete?)"); - - _cacheManager.AddHostProgram(ref key, hostProgramBinary); - } - } - } - - _gpProgramsDiskCache.Add(key, new ShaderBundle(hostProgram, shaders)); } - _shaderCount = programIndex + 1; + if (activeTasks.Count == maxTaskCount) + { + // Wait for a task to be done, or for 1ms. + // Host shader compilation cannot signal when it is done, + // so the 1ms timeout is required to poll status. + + taskDoneEvent.WaitOne(1); + } } if (!isReadOnly) @@ -458,6 +548,8 @@ namespace Ryujinx.Graphics.Gpu.Shader IProgram hostProgram = _context.Renderer.CreateProgram(new IShader[] { shader.HostShader }, null); + hostProgram.CheckProgramLink(true); + byte[] hostProgramBinary = HostShaderCacheEntry.Create(hostProgram.GetBinary(), new ShaderCodeHolder[] { shader }); cpShader = new ShaderBundle(hostProgram, shader); @@ -598,6 +690,8 @@ namespace Ryujinx.Graphics.Gpu.Shader IProgram hostProgram = _context.Renderer.CreateProgram(hostShaders.ToArray(), tfd); + hostProgram.CheckProgramLink(true); + byte[] hostProgramBinary = HostShaderCacheEntry.Create(hostProgram.GetBinary(), shaders); gpShaders = new ShaderBundle(hostProgram, shaders); diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCompileTask.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCompileTask.cs new file mode 100644 index 00000000..ff48fab0 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCompileTask.cs @@ -0,0 +1,94 @@ +using Ryujinx.Graphics.GAL; +using System; +using System.Threading; +using System.Threading.Tasks; + +namespace Ryujinx.Graphics.Gpu.Shader +{ + delegate bool ShaderCompileTaskCallback(bool success, ShaderCompileTask task); + + /// + /// A class that represents a shader compilation. + /// + class ShaderCompileTask + { + private bool _compiling; + + private Task _programsTask; + private IProgram _program; + + private ShaderCompileTaskCallback _action; + private AutoResetEvent _taskDoneEvent; + + /// + /// Create a new shader compile task, with an event to signal whenever a subtask completes. + /// + /// Event to signal when a subtask completes + public ShaderCompileTask(AutoResetEvent taskDoneEvent) + { + _taskDoneEvent = taskDoneEvent; + } + + /// + /// Check the completion status of the shader compile task, and run callbacks on step completion. + /// Calling this periodically is required to progress through steps of the compilation. + /// + /// True if the task is complete, false if it is in progress + public bool IsDone() + { + if (_compiling) + { + ProgramLinkStatus status = _program.CheckProgramLink(false); + + if (status != ProgramLinkStatus.Incomplete) + { + return _action(status == ProgramLinkStatus.Success, this); + } + } + else + { + // Waiting on the task. + + if (_programsTask.IsCompleted) + { + return _action(true, this); + } + } + + return false; + } + + /// + /// Run a callback when the specified task has completed. + /// + /// The task object that needs to complete + /// The action to perform when it is complete + public void OnTask(Task task, ShaderCompileTaskCallback action) + { + _compiling = false; + + _programsTask = task; + _action = action; + + task.ContinueWith(task => _taskDoneEvent.Set()); + } + + /// + /// Run a callback when the specified program has been linked. + /// + /// The program that needs to be linked + /// The action to perform when linking is complete + public void OnCompiled(IProgram program, ShaderCompileTaskCallback action) + { + _compiling = true; + + _program = program; + _action = action; + + if (program == null) + { + action(false, this); + } + } + } +} diff --git a/Ryujinx.Graphics.OpenGL/HwCapabilities.cs b/Ryujinx.Graphics.OpenGL/HwCapabilities.cs index 7c2acacd..c994113d 100644 --- a/Ryujinx.Graphics.OpenGL/HwCapabilities.cs +++ b/Ryujinx.Graphics.OpenGL/HwCapabilities.cs @@ -10,6 +10,7 @@ namespace Ryujinx.Graphics.OpenGL private static readonly Lazy _supportsPolygonOffsetClamp = new Lazy(() => HasExtension("GL_EXT_polygon_offset_clamp")); private static readonly Lazy _supportsViewportSwizzle = new Lazy(() => HasExtension("GL_NV_viewport_swizzle")); private static readonly Lazy _supportsSeamlessCubemapPerTexture = new Lazy(() => HasExtension("GL_ARB_seamless_cubemap_per_texture")); + private static readonly Lazy _supportsParallelShaderCompile = new Lazy(() => HasExtension("GL_ARB_parallel_shader_compile")); private static readonly Lazy _maximumComputeSharedMemorySize = new Lazy(() => GetLimit(All.MaxComputeSharedMemorySize)); private static readonly Lazy _storageBufferOffsetAlignment = new Lazy(() => GetLimit(All.ShaderStorageBufferOffsetAlignment)); @@ -34,6 +35,7 @@ namespace Ryujinx.Graphics.OpenGL public static bool SupportsPolygonOffsetClamp => _supportsPolygonOffsetClamp.Value; public static bool SupportsViewportSwizzle => _supportsViewportSwizzle.Value; public static bool SupportsSeamlessCubemapPerTexture => _supportsSeamlessCubemapPerTexture.Value; + public static bool SupportsParallelShaderCompile => _supportsParallelShaderCompile.Value; public static bool SupportsNonConstantTextureOffset => _gpuVendor.Value == GpuVendor.Nvidia; public static bool RequiresSyncFlush => _gpuVendor.Value == GpuVendor.Amd || _gpuVendor.Value == GpuVendor.IntelWindows || _gpuVendor.Value == GpuVendor.IntelUnix; public static bool SupportsMismatchingViewFormat => _gpuVendor.Value != GpuVendor.Amd && _gpuVendor.Value != GpuVendor.IntelWindows; diff --git a/Ryujinx.Graphics.OpenGL/Program.cs b/Ryujinx.Graphics.OpenGL/Program.cs index d39e181d..decc75b1 100644 --- a/Ryujinx.Graphics.OpenGL/Program.cs +++ b/Ryujinx.Graphics.OpenGL/Program.cs @@ -13,11 +13,26 @@ namespace Ryujinx.Graphics.OpenGL { public int Handle { get; private set; } - public int FragmentIsBgraUniform { get; } - public int FragmentRenderScaleUniform { get; } - public int ComputeRenderScaleUniform { get; } + public int FragmentIsBgraUniform { get; private set; } + public int FragmentRenderScaleUniform { get; private set; } + public int ComputeRenderScaleUniform { get; private set; } - public bool IsLinked { get; private set; } + public bool IsLinked + { + get + { + if (_status == ProgramLinkStatus.Incomplete) + { + CheckProgramLink(true); + } + + return _status == ProgramLinkStatus.Success; + } + } + + private bool _initialized; + private ProgramLinkStatus _status = ProgramLinkStatus.Incomplete; + private IShader[] _shaders; public Program(IShader[] shaders, TransformFeedbackDescriptor[] transformFeedbackDescriptors) { @@ -82,18 +97,7 @@ namespace Ryujinx.Graphics.OpenGL GL.LinkProgram(Handle); - for (int index = 0; index < shaders.Length; index++) - { - int shaderHandle = ((Shader)shaders[index]).Handle; - - GL.DetachShader(Handle, shaderHandle); - } - - CheckProgramLink(); - - FragmentIsBgraUniform = GL.GetUniformLocation(Handle, "is_bgra"); - FragmentRenderScaleUniform = GL.GetUniformLocation(Handle, "fp_renderScale"); - ComputeRenderScaleUniform = GL.GetUniformLocation(Handle, "cp_renderScale"); + _shaders = shaders; } public Program(ReadOnlySpan code) @@ -109,32 +113,60 @@ namespace Ryujinx.Graphics.OpenGL GL.ProgramBinary(Handle, binaryFormat, (IntPtr)ptr, code.Length - 4); } } - - CheckProgramLink(); - - FragmentIsBgraUniform = GL.GetUniformLocation(Handle, "is_bgra"); - FragmentRenderScaleUniform = GL.GetUniformLocation(Handle, "fp_renderScale"); - ComputeRenderScaleUniform = GL.GetUniformLocation(Handle, "cp_renderScale"); } public void Bind() { + if (!_initialized) + { + FragmentIsBgraUniform = GL.GetUniformLocation(Handle, "is_bgra"); + FragmentRenderScaleUniform = GL.GetUniformLocation(Handle, "fp_renderScale"); + ComputeRenderScaleUniform = GL.GetUniformLocation(Handle, "cp_renderScale"); + + _initialized = true; + } + GL.UseProgram(Handle); } - private void CheckProgramLink() + public ProgramLinkStatus CheckProgramLink(bool blocking) { + if (!blocking && HwCapabilities.SupportsParallelShaderCompile) + { + GL.GetProgram(Handle, (GetProgramParameterName)ArbParallelShaderCompile.CompletionStatusArb, out int completed); + + if (completed == 0) + { + return ProgramLinkStatus.Incomplete; + } + } + GL.GetProgram(Handle, GetProgramParameterName.LinkStatus, out int status); + if (_shaders != null) + { + for (int index = 0; index < _shaders.Length; index++) + { + int shaderHandle = ((Shader)_shaders[index]).Handle; + + GL.DetachShader(Handle, shaderHandle); + } + + _shaders = null; + } + if (status == 0) { // Use GL.GetProgramInfoLog(Handle), it may be too long to print on the log. + _status = ProgramLinkStatus.Failure; Logger.Debug?.Print(LogClass.Gpu, "Shader linking failed."); } else { - IsLinked = true; + _status = ProgramLinkStatus.Success; } + + return _status; } public byte[] GetBinary() diff --git a/Ryujinx.Graphics.OpenGL/Renderer.cs b/Ryujinx.Graphics.OpenGL/Renderer.cs index b909f792..7416cdd7 100644 --- a/Ryujinx.Graphics.OpenGL/Renderer.cs +++ b/Ryujinx.Graphics.OpenGL/Renderer.cs @@ -131,6 +131,11 @@ namespace Ryujinx.Graphics.OpenGL PrintGpuInformation(); + if (HwCapabilities.SupportsParallelShaderCompile) + { + GL.Arb.MaxShaderCompilerThreads(Math.Min(Environment.ProcessorCount, 8)); + } + _counters.Initialize(); } @@ -178,16 +183,7 @@ namespace Ryujinx.Graphics.OpenGL public IProgram LoadProgramBinary(byte[] programBinary) { - Program program = new Program(programBinary); - - if (program.IsLinked) - { - return program; - } - - program.Dispose(); - - return null; + return new Program(programBinary); } public void CreateSync(ulong id) diff --git a/Ryujinx/Program.cs b/Ryujinx/Program.cs index e6771a43..9600f9dc 100644 --- a/Ryujinx/Program.cs +++ b/Ryujinx/Program.cs @@ -2,6 +2,7 @@ using ARMeilleure.Translation.PTC; using FFmpeg.AutoGen; using Gtk; using Ryujinx.Common.Configuration; +using Ryujinx.Common.GraphicsDriver; using Ryujinx.Common.Logging; using Ryujinx.Common.System; using Ryujinx.Common.SystemInfo; @@ -147,6 +148,9 @@ namespace Ryujinx // Logging system information. PrintSystemInfo(); + // Enable OGL multithreading on the driver, when available. + DriverUtilities.ToggleOGLThreading(true); + // Initialize Gtk. Application.Init(); @@ -158,9 +162,6 @@ namespace Ryujinx UserErrorDialog.CreateUserErrorDialog(UserError.NoKeys); } - // Force dedicated GPU if we can. - ForceDedicatedGpu.Nvidia(); - // Show the main window UI. MainWindow mainWindow = new MainWindow(); mainWindow.Show();