From b5ac9e494615444f5b7144e7c818bee52cacbaee Mon Sep 17 00:00:00 2001 From: SachinVin <26602104+SachinVin@users.noreply.github.com> Date: Sat, 2 Apr 2022 11:57:14 +0530 Subject: [PATCH] Backport non-seperable disk shader from android (#5976) * VideoCore: Backport Shader cache for non separable shaders android: disk_shader_cache: remove redundant lookup * disk_shader_cache: dont compile dumped shaders * cleanup Seperable shader loading * gl_shader_disk_cache.cpp: remove redundant checks * Skip compiling non seperable raws * gl_shader_manager.cpp: Skip `GLAD_GL_ARB_get_program_binary` check on GLES --- src/video_core/renderer_base.cpp | 2 +- .../renderer_opengl/gl_rasterizer.cpp | 11 +- .../renderer_opengl/gl_rasterizer.h | 2 +- .../renderer_opengl/gl_shader_disk_cache.cpp | 114 +++++- .../renderer_opengl/gl_shader_disk_cache.h | 23 +- .../renderer_opengl/gl_shader_manager.cpp | 348 +++++++++++------- .../renderer_opengl/gl_shader_manager.h | 4 +- .../renderer_opengl/gl_shader_util.cpp | 1 + 8 files changed, 351 insertions(+), 154 deletions(-) diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp index 353550dee..441126167 100644 --- a/src/video_core/renderer_base.cpp +++ b/src/video_core/renderer_base.cpp @@ -22,7 +22,7 @@ void RendererBase::RefreshRasterizerSetting() { opengl_rasterizer_active = hw_renderer_enabled; if (hw_renderer_enabled) { - rasterizer = std::make_unique(); + rasterizer = std::make_unique(render_window); } else { rasterizer = std::make_unique(); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 4a331c630..2f04412a4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -48,7 +48,7 @@ static bool IsVendorIntel() { return gpu_vendor == "Intel Inc."; } -RasterizerOpenGL::RasterizerOpenGL() +RasterizerOpenGL::RasterizerOpenGL(Frontend::EmuWindow& emu_window) : is_amd(IsVendorAmd()), vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE, is_amd), uniform_buffer(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE, false), index_buffer(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE, false), @@ -172,15 +172,16 @@ RasterizerOpenGL::RasterizerOpenGL() #ifdef __APPLE__ if (IsVendorIntel()) { shader_program_manager = std::make_unique( + emu_window, VideoCore::g_separable_shader_enabled ? GLAD_GL_ARB_separate_shader_objects : false, is_amd); } else { - shader_program_manager = - std::make_unique(GLAD_GL_ARB_separate_shader_objects, is_amd); + shader_program_manager = std::make_unique( + emu_window, GLAD_GL_ARB_separate_shader_objects, is_amd); } #else - shader_program_manager = - std::make_unique(GLAD_GL_ARB_separate_shader_objects, is_amd); + shader_program_manager = std::make_unique( + emu_window, GLAD_GL_ARB_separate_shader_objects, is_amd); #endif glEnable(GL_BLEND); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 4748655d5..849905ef4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -38,7 +38,7 @@ class ShaderProgramManager; class RasterizerOpenGL : public VideoCore::RasterizerInterface { public: - explicit RasterizerOpenGL(); + explicit RasterizerOpenGL(Frontend::EmuWindow& emu_window); ~RasterizerOpenGL() override; void LoadDiskResources(const std::atomic_bool& stop_loading, diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 974fede66..1a3ea1d73 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -171,7 +171,7 @@ std::optional> ShaderDiskCache::LoadTransferable } std::pair, ShaderDumpsMap> -ShaderDiskCache::LoadPrecompiled() { +ShaderDiskCache::LoadPrecompiled(bool compressed) { if (!IsUsable()) return {}; @@ -182,7 +182,7 @@ ShaderDiskCache::LoadPrecompiled() { return {}; } - const auto result = LoadPrecompiledFile(file); + const auto result = LoadPrecompiledFile(file, compressed); if (!result) { LOG_INFO(Render_OpenGL, "Failed to load precompiled cache for game with title id={} - removing", @@ -195,12 +195,18 @@ ShaderDiskCache::LoadPrecompiled() { } std::optional, ShaderDumpsMap>> -ShaderDiskCache::LoadPrecompiledFile(FileUtil::IOFile& file) { +ShaderDiskCache::LoadPrecompiledFile(FileUtil::IOFile& file, bool compressed) { // Read compressed file from disk and decompress to virtual precompiled cache file - std::vector compressed(file.GetSize()); - file.ReadBytes(compressed.data(), compressed.size()); - const std::vector decompressed = Common::Compression::DecompressDataZSTD(compressed); - SaveArrayToPrecompiled(decompressed.data(), decompressed.size()); + std::vector precompiled_file(file.GetSize()); + file.ReadBytes(precompiled_file.data(), precompiled_file.size()); + if (compressed) { + const std::vector decompressed = + Common::Compression::DecompressDataZSTD(precompiled_file); + SaveArrayToPrecompiled(decompressed.data(), decompressed.size()); + } else { + SaveArrayToPrecompiled(precompiled_file.data(), precompiled_file.size()); + } + decompressed_precompiled_cache_offset = 0; ShaderCacheVersionHash file_hash{}; @@ -293,9 +299,25 @@ std::optional ShaderDiskCache::LoadDecompiledEntry() return entry; } -bool ShaderDiskCache::SaveDecompiledFile(u64 unique_identifier, - const ShaderDecompiler::ProgramResult& result, - bool sanitize_mul) { +void ShaderDiskCache::SaveDecompiledToFile(FileUtil::IOFile& file, u64 unique_identifier, + const ShaderDecompiler::ProgramResult& result, + bool sanitize_mul) { + if (!IsUsable()) + return; + + if (file.WriteObject(static_cast(PrecompiledEntryKind::Decompiled)) != 1 || + file.WriteObject(unique_identifier) != 1 || file.WriteObject(sanitize_mul) != 1 || + file.WriteObject(static_cast(result.code.size())) != 1 || + file.WriteArray(result.code.data(), result.code.size()) != result.code.size()) { + LOG_ERROR(Render_OpenGL, "Failed to save decompiled cache entry - removing"); + file.Close(); + InvalidatePrecompiled(); + } +} + +bool ShaderDiskCache::SaveDecompiledToCache(u64 unique_identifier, + const ShaderDecompiler::ProgramResult& result, + bool sanitize_mul) { if (!SaveObjectToPrecompiled(static_cast(PrecompiledEntryKind::Decompiled)) || !SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(sanitize_mul) || !SaveObjectToPrecompiled(static_cast(result.code.size())) || @@ -315,7 +337,7 @@ void ShaderDiskCache::InvalidateAll() { } void ShaderDiskCache::InvalidatePrecompiled() { - // Clear virtaul precompiled cache file + // Clear virtual precompiled cache file decompressed_precompiled_cache.resize(0); if (!FileUtil::Delete(GetPrecompiledPath())) { @@ -351,11 +373,11 @@ void ShaderDiskCache::SaveDecompiled(u64 unique_identifier, if (!IsUsable()) return; - if (decompressed_precompiled_cache.size() == 0) { + if (decompressed_precompiled_cache.empty()) { SavePrecompiledHeaderToVirtualPrecompiledCache(); } - if (!SaveDecompiledFile(unique_identifier, code, sanitize_mul)) { + if (!SaveDecompiledToCache(unique_identifier, code, sanitize_mul)) { LOG_ERROR(Render_OpenGL, "Failed to save decompiled entry to the precompiled file - removing"); InvalidatePrecompiled(); @@ -385,6 +407,37 @@ void ShaderDiskCache::SaveDump(u64 unique_identifier, GLuint program) { } } +void ShaderDiskCache::SaveDumpToFile(u64 unique_identifier, GLuint program, bool sanitize_mul) { + if (!IsUsable()) + return; + + FileUtil::IOFile file = AppendPrecompiledFile(); + if (!file.IsOpen()) + return; + + GLint binary_length{}; + glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length); + + GLenum binary_format{}; + std::vector binary(binary_length); + glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); + + if (file.WriteObject(static_cast(PrecompiledEntryKind::Dump)) != 1 || + file.WriteObject(unique_identifier) != 1 || + file.WriteObject(static_cast(binary_format)) != 1 || + file.WriteObject(static_cast(binary_length)) != 1 || + file.WriteArray(binary.data(), binary.size()) != binary.size()) { + LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016x} - removing", + unique_identifier); + InvalidatePrecompiled(); + return; + } + + // SaveDecompiled is used only to store the accurate multiplication setting, a better way is to + // probably change the header in SaveDump + SaveDecompiledToFile(file, unique_identifier, {}, sanitize_mul); +} + bool ShaderDiskCache::IsUsable() const { return tried_to_load && Settings::values.use_disk_shader_cache; } @@ -412,6 +465,30 @@ FileUtil::IOFile ShaderDiskCache::AppendTransferableFile() { return file; } +FileUtil::IOFile ShaderDiskCache::AppendPrecompiledFile() { + if (!EnsureDirectories()) + return {}; + + const auto precompiled_path{GetPrecompiledPath()}; + const bool existed = FileUtil::Exists(precompiled_path); + + FileUtil::IOFile file(precompiled_path, "ab"); + if (!file.IsOpen()) { + LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}", precompiled_path); + return {}; + } + if (!existed || file.GetSize() == 0) { + // If the file didn't exist, write its version + const auto hash{GetShaderCacheVersionHash()}; + if (file.WriteArray(hash.data(), hash.size()) != hash.size()) { + LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}", + precompiled_path); + return {}; + } + } + return file; +} + void ShaderDiskCache::SavePrecompiledHeaderToVirtualPrecompiledCache() { const auto hash{GetShaderCacheVersionHash()}; if (!SaveArrayToPrecompiled(hash.data(), hash.size())) { @@ -451,7 +528,7 @@ bool ShaderDiskCache::EnsureDirectories() const { return CreateDir(FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir)) && CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) && - CreateDir(GetPrecompiledDir()); + CreateDir(GetPrecompiledDir()) && CreateDir(GetPrecompiledShaderDir()); } std::string ShaderDiskCache::GetTransferablePath() { @@ -459,7 +536,7 @@ std::string ShaderDiskCache::GetTransferablePath() { } std::string ShaderDiskCache::GetPrecompiledPath() { - return FileUtil::SanitizePath(GetPrecompiledDir() + DIR_SEP_CHR + GetTitleID() + ".bin"); + return FileUtil::SanitizePath(GetPrecompiledShaderDir() + DIR_SEP_CHR + GetTitleID() + ".bin"); } std::string ShaderDiskCache::GetTransferableDir() const { @@ -470,6 +547,13 @@ std::string ShaderDiskCache::GetPrecompiledDir() const { return GetBaseDir() + DIR_SEP "precompiled"; } +std::string ShaderDiskCache::GetPrecompiledShaderDir() const { + if (separable) { + return GetPrecompiledDir() + DIR_SEP "separable"; + } + return GetPrecompiledDir() + DIR_SEP "conventional"; +} + std::string ShaderDiskCache::GetBaseDir() const { return FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir) + DIR_SEP "opengl"; } diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index a3743d1a0..5a2faeb9a 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -97,7 +97,7 @@ public: std::optional> LoadTransferable(); /// Loads current game's precompiled cache. Invalidates on failure. - std::pair LoadPrecompiled(); + std::pair LoadPrecompiled(bool compressed); /// Removes the transferable (and precompiled) cache file. void InvalidateAll(); @@ -115,21 +115,28 @@ public: /// Saves a dump entry to the precompiled file. Does not check for collisions. void SaveDump(u64 unique_identifier, GLuint program); + /// Saves a dump entry to the precompiled file. Does not check for collisions. + void SaveDumpToFile(u64 unique_identifier, GLuint program, bool sanitize_mul); + /// Serializes virtual precompiled shader cache file to real file void SaveVirtualPrecompiledFile(); private: /// Loads the transferable cache. Returns empty on failure. std::optional> LoadPrecompiledFile( - FileUtil::IOFile& file); + FileUtil::IOFile& file, bool compressed); /// Loads a decompiled cache entry from m_precompiled_cache_virtual_file. Returns empty on /// failure. std::optional LoadDecompiledEntry(); - /// Saves a decompiled entry to the passed file. Returns true on success. - bool SaveDecompiledFile(u64 unique_identifier, const ShaderDecompiler::ProgramResult& code, - bool sanitize_mul); + /// Saves a decompiled entry to the passed file. Does not check for collisions. + void SaveDecompiledToFile(FileUtil::IOFile& file, u64 unique_identifier, + const ShaderDecompiler::ProgramResult& code, bool sanitize_mul); + + /// Saves a decompiled entry to the virtual precompiled cache. Does not check for collisions. + bool SaveDecompiledToCache(u64 unique_identifier, const ShaderDecompiler::ProgramResult& code, + bool sanitize_mul); /// Returns if the cache can be used bool IsUsable() const; @@ -155,6 +162,8 @@ private: /// Get user's precompiled directory path std::string GetPrecompiledDir() const; + std::string GetPrecompiledShaderDir() const; + /// Get user's shader directory path std::string GetBaseDir() const; @@ -197,7 +206,7 @@ private: return LoadArrayFromPrecompiled(&object, 1); } - // Stores whole precompiled cache which will be read from or saved to the precompiled chache + // Stores whole precompiled cache which will be read from or saved to the precompiled cache // file std::vector decompressed_precompiled_cache; // Stores the current offset of the precompiled cache file for IO purposes @@ -213,6 +222,8 @@ private: u64 program_id{}; std::string title_id; + + FileUtil::IOFile AppendPrecompiledFile(); }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index a5664b1ff..7f49e9b73 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -8,8 +8,10 @@ #include #include #include "core/core.h" +#include "core/frontend/scope_acquire_context.h" #include "video_core/renderer_opengl/gl_shader_disk_cache.h" #include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_vars.h" #include "video_core/video_core.h" namespace OpenGL { @@ -26,7 +28,8 @@ static u64 GetUniqueIdentifier(const Pica::Regs& regs, const ProgramCode& code) } static OGLProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump, - const std::set& supported_formats) { + const std::set& supported_formats, + bool separable) { if (supported_formats.find(dump.binary_format) == supported_formats.end()) { LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing"); @@ -35,7 +38,9 @@ static OGLProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump, auto shader = OGLProgram(); shader.handle = glCreateProgram(); - glProgramParameteri(shader.handle, GL_PROGRAM_SEPARABLE, GL_TRUE); + if (separable) { + glProgramParameteri(shader.handle, GL_PROGRAM_SEPARABLE, GL_TRUE); + } glProgramBinary(shader.handle, dump.binary_format, dump.binary.data(), static_cast(dump.binary.size())); @@ -239,6 +244,10 @@ public: shaders.emplace(key, std::move(stage)); } + void Inject(const KeyConfigType& key, OGLShaderStage&& stage) { + shaders.emplace(key, std::move(stage)); + } + private: bool separable; std::unordered_map shaders; @@ -294,6 +303,12 @@ public: shader_map.insert_or_assign(key, &cached_shader); } + void Inject(const KeyConfigType& key, std::string decomp, OGLShaderStage&& stage) { + const auto iter = shader_cache.emplace(std::move(decomp), std::move(stage)).first; + OGLShaderStage& cached_shader = iter->second; + shader_map.insert_or_assign(key, &cached_shader); + } + private: bool separable; std::unordered_map shader_map; @@ -323,6 +338,10 @@ public: GLuint gs = 0; GLuint fs = 0; + std::size_t vs_hash = 0; + std::size_t gs_hash = 0; + std::size_t fs_hash = 0; + bool operator==(const ShaderTuple& rhs) const { return std::tie(vs, gs, fs) == std::tie(rhs.vs, rhs.gs, rhs.fs); } @@ -331,15 +350,13 @@ public: return std::tie(vs, gs, fs) != std::tie(rhs.vs, rhs.gs, rhs.fs); } - struct Hash { - std::size_t operator()(const ShaderTuple& tuple) const { - std::size_t hash = 0; - boost::hash_combine(hash, tuple.vs); - boost::hash_combine(hash, tuple.gs); - boost::hash_combine(hash, tuple.fs); - return hash; - } - }; + std::size_t GetConfigHash() const { + std::size_t hash = 0; + boost::hash_combine(hash, vs_hash); + boost::hash_combine(hash, gs_hash); + boost::hash_combine(hash, fs_hash); + return hash; + } }; bool is_amd; @@ -353,13 +370,14 @@ public: FixedGeometryShaders fixed_geometry_shaders; FragmentShaders fragment_shaders; - std::unordered_map program_cache; + std::unordered_map program_cache; OGLPipeline pipeline; ShaderDiskCache disk_cache; }; -ShaderProgramManager::ShaderProgramManager(bool separable, bool is_amd) - : impl(std::make_unique(separable, is_amd)) {} +ShaderProgramManager::ShaderProgramManager(Frontend::EmuWindow& emu_window_, bool separable, + bool is_amd) + : impl(std::make_unique(separable, is_amd)), emu_window{emu_window_} {} ShaderProgramManager::~ShaderProgramManager() = default; @@ -370,6 +388,8 @@ bool ShaderProgramManager::UseProgrammableVertexShader(const Pica::Regs& regs, if (handle == 0) return false; impl->current.vs = handle; + impl->current.vs_hash = config.Hash(); + // Save VS to the disk cache if its a new shader if (result) { auto& disk_cache = impl->disk_cache; @@ -380,28 +400,33 @@ bool ShaderProgramManager::UseProgrammableVertexShader(const Pica::Regs& regs, const ShaderDiskCacheRaw raw{unique_identifier, ProgramType::VS, regs, std::move(program_code)}; disk_cache.SaveRaw(raw); + disk_cache.SaveDecompiled(unique_identifier, *result, VideoCore::g_hw_shader_accurate_mul); } return true; } void ShaderProgramManager::UseTrivialVertexShader() { impl->current.vs = impl->trivial_vertex_shader.Get(); + impl->current.vs_hash = 0; } void ShaderProgramManager::UseFixedGeometryShader(const Pica::Regs& regs) { PicaFixedGSConfig gs_config(regs); auto [handle, _] = impl->fixed_geometry_shaders.Get(gs_config); impl->current.gs = handle; + impl->current.gs_hash = gs_config.Hash(); } void ShaderProgramManager::UseTrivialGeometryShader() { impl->current.gs = 0; + impl->current.gs_hash = 0; } void ShaderProgramManager::UseFragmentShader(const Pica::Regs& regs) { PicaFSConfig config = PicaFSConfig::BuildFromRegs(regs); auto [handle, result] = impl->fragment_shaders.Get(config); impl->current.fs = handle; + impl->current.fs_hash = config.Hash(); // Save FS to the disk cache if its a new shader if (result) { auto& disk_cache = impl->disk_cache; @@ -429,9 +454,14 @@ void ShaderProgramManager::ApplyTo(OpenGLState& state) { state.draw.shader_program = 0; state.draw.program_pipeline = impl->pipeline.handle; } else { - OGLProgram& cached_program = impl->program_cache[impl->current]; + const u64 unique_identifier = impl->current.GetConfigHash(); + OGLProgram& cached_program = impl->program_cache[unique_identifier]; if (cached_program.handle == 0) { cached_program.Create(false, {impl->current.vs, impl->current.gs, impl->current.fs}); + auto& disk_cache = impl->disk_cache; + disk_cache.SaveDumpToFile(unique_identifier, cached_program.handle, + VideoCore::g_hw_shader_accurate_mul); + SetShaderUniformBlockBindings(cached_program.handle); SetShaderSamplerBindings(cached_program.handle); } @@ -441,12 +471,7 @@ void ShaderProgramManager::ApplyTo(OpenGLState& state) { void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback) { - if (!impl->separable) { - LOG_ERROR(Render_OpenGL, - "Cannot load disk cache as separate shader programs are unsupported!"); - return; - } - if (!GLAD_GL_ARB_get_program_binary) { + if (!GLAD_GL_ARB_get_program_binary && !GLES) { LOG_ERROR(Render_OpenGL, "Cannot load disk cache as ARB_get_program_binary is not supported!"); return; @@ -459,7 +484,9 @@ void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading, } const auto& raws = *transferable; - auto [decompiled, dumps] = disk_cache.LoadPrecompiled(); + // Load uncompressed precompiled file for non-separable shaders. + // Precompiled file for separable shaders is compressed. + auto [decompiled, dumps] = disk_cache.LoadPrecompiled(impl->separable); if (stop_loading) { return; @@ -478,125 +505,179 @@ void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading, } std::vector load_raws_index; // Loads both decompiled and precompiled shaders from the cache. If either one is missing for - const auto LoadPrecompiledWorker = - [&](std::size_t begin, std::size_t end, const std::vector& raw_cache, - const ShaderDecompiledMap& decompiled_map, const ShaderDumpsMap& dump_map) { - for (std::size_t i = 0; i < end; ++i) { - if (stop_loading || compilation_failed) { - return; - } - const auto& raw{raw_cache[i]}; - const u64 unique_identifier{raw.GetUniqueIdentifier()}; - - const u64 calculated_hash = - GetUniqueIdentifier(raw.GetRawShaderConfig(), raw.GetProgramCode()); - if (unique_identifier != calculated_hash) { - LOG_ERROR(Render_OpenGL, - "Invalid hash in entry={:016x} (obtained hash={:016x}) - removing " - "shader cache", - raw.GetUniqueIdentifier(), calculated_hash); - disk_cache.InvalidateAll(); - return; - } - - const auto dump{dump_map.find(unique_identifier)}; - const auto decomp{decompiled_map.find(unique_identifier)}; - - OGLProgram shader; - - if (dump != dump_map.end() && decomp != decompiled_map.end()) { - // Only load this shader if its sanitize_mul setting matches - if (raw.GetProgramType() == ProgramType::VS && - decomp->second.sanitize_mul != VideoCore::g_hw_shader_accurate_mul) { - continue; - } - - // If the shader is dumped, attempt to load it - shader = GeneratePrecompiledProgram(dump->second, supported_formats); - if (shader.handle == 0) { - // If any shader failed, stop trying to compile, delete the cache, and start - // loading from raws - compilation_failed = true; - return; - } - // we have both the binary shader and the decompiled, so inject it into the - // cache - if (raw.GetProgramType() == ProgramType::VS) { - auto [conf, setup] = BuildVSConfigFromRaw(raw); - std::scoped_lock lock(mutex); - - impl->programmable_vertex_shaders.Inject(conf, decomp->second.result.code, - std::move(shader)); - } else if (raw.GetProgramType() == ProgramType::FS) { - PicaFSConfig conf = PicaFSConfig::BuildFromRegs(raw.GetRawShaderConfig()); - std::scoped_lock lock(mutex); - impl->fragment_shaders.Inject(conf, std::move(shader)); - } else { - // Unsupported shader type got stored somehow so nuke the cache - - LOG_CRITICAL(Frontend, "failed to load raw programtype {}", - raw.GetProgramType()); - compilation_failed = true; - return; - } - } else { - // Since precompiled didn't have the dump, we'll load them in the next phase - std::scoped_lock lock(mutex); - load_raws_index.push_back(i); - } - if (callback) { - callback(VideoCore::LoadCallbackStage::Decompile, i, raw_cache.size()); - } - } - }; - - LoadPrecompiledWorker(0, raws.size(), raws, decompiled, dumps); - - if (compilation_failed) { - // Invalidate the precompiled cache if a shader dumped shader was rejected - disk_cache.InvalidatePrecompiled(); - dumps.clear(); - precompiled_cache_altered = true; - } - - if (callback) { - callback(VideoCore::LoadCallbackStage::Build, 0, raws.size()); - } - - compilation_failed = false; - - const auto LoadTransferable = [&](std::size_t begin, std::size_t end, - const std::vector& raw_cache) { - for (std::size_t i = 0; i < end; ++i) { + const auto LoadPrecompiledShader = [&](std::size_t begin, std::size_t end, + const std::vector& raw_cache, + const ShaderDecompiledMap& decompiled_map, + const ShaderDumpsMap& dump_map) { + for (std::size_t i = begin; i < end; ++i) { if (stop_loading || compilation_failed) { return; } const auto& raw{raw_cache[i]}; const u64 unique_identifier{raw.GetUniqueIdentifier()}; + const u64 calculated_hash = + GetUniqueIdentifier(raw.GetRawShaderConfig(), raw.GetProgramCode()); + if (unique_identifier != calculated_hash) { + LOG_ERROR(Render_OpenGL, + "Invalid hash in entry={:016x} (obtained hash={:016x}) - removing " + "shader cache", + raw.GetUniqueIdentifier(), calculated_hash); + disk_cache.InvalidateAll(); + return; + } + + const auto dump{dump_map.find(unique_identifier)}; + const auto decomp{decompiled_map.find(unique_identifier)}; + + OGLProgram shader; + + if (dump != dump_map.end() && decomp != decompiled_map.end()) { + // Only load the vertex shader if its sanitize_mul setting matches + if (raw.GetProgramType() == ProgramType::VS && + decomp->second.sanitize_mul != VideoCore::g_hw_shader_accurate_mul) { + continue; + } + + // If the shader is dumped, attempt to load it + shader = + GeneratePrecompiledProgram(dump->second, supported_formats, impl->separable); + if (shader.handle == 0) { + // If any shader failed, stop trying to compile, delete the cache, and start + // loading from raws + compilation_failed = true; + return; + } + // we have both the binary shader and the decompiled, so inject it into the + // cache + if (raw.GetProgramType() == ProgramType::VS) { + auto [conf, setup] = BuildVSConfigFromRaw(raw); + std::scoped_lock lock(mutex); + impl->programmable_vertex_shaders.Inject(conf, decomp->second.result.code, + std::move(shader)); + } else if (raw.GetProgramType() == ProgramType::FS) { + PicaFSConfig conf = PicaFSConfig::BuildFromRegs(raw.GetRawShaderConfig()); + std::scoped_lock lock(mutex); + impl->fragment_shaders.Inject(conf, std::move(shader)); + } else { + // Unsupported shader type got stored somehow so nuke the cache + + LOG_CRITICAL(Frontend, "failed to load raw ProgramType {}", + raw.GetProgramType()); + compilation_failed = true; + return; + } + } else { + // Since precompiled didn't have the dump, we'll load them in the next phase + std::scoped_lock lock(mutex); + load_raws_index.push_back(i); + } + if (callback) { + callback(VideoCore::LoadCallbackStage::Decompile, i, raw_cache.size()); + } + } + }; + + const auto LoadPrecompiledProgram = [&](const ShaderDecompiledMap& decompiled_map, + const ShaderDumpsMap& dump_map) { + std::size_t i{0}; + for (const auto& dump : dump_map) { + if (stop_loading) { + break; + } + const u64 unique_identifier{dump.first}; + const auto decomp{decompiled_map.find(unique_identifier)}; + + // Only load the program if its sanitize_mul setting matches + if (decomp->second.sanitize_mul != VideoCore::g_hw_shader_accurate_mul) { + continue; + } + + // If the shader program is dumped, attempt to load it + OGLProgram shader = + GeneratePrecompiledProgram(dump.second, supported_formats, impl->separable); + if (shader.handle != 0) { + SetShaderUniformBlockBindings(shader.handle); + SetShaderSamplerBindings(shader.handle); + impl->program_cache.emplace(unique_identifier, std::move(shader)); + } else { + LOG_ERROR(Frontend, "Failed to link Precompiled program!"); + compilation_failed = true; + break; + } + if (callback) { + callback(VideoCore::LoadCallbackStage::Decompile, ++i, dump_map.size()); + } + } + }; + + if (impl->separable) { + LoadPrecompiledShader(0, raws.size(), raws, decompiled, dumps); + } else { + LoadPrecompiledProgram(decompiled, dumps); + } + + bool load_all_raws = false; + if (compilation_failed) { + // Invalidate the precompiled cache if a shader dumped shader was rejected + impl->program_cache.clear(); + disk_cache.InvalidatePrecompiled(); + dumps.clear(); + precompiled_cache_altered = true; + load_all_raws = true; + } + // TODO(SachinV): Skip loading raws until we implement a proper way to link non-seperable + // shaders. + if (!impl->separable) { + return; + } + + const std::size_t load_raws_size = load_all_raws ? raws.size() : load_raws_index.size(); + + if (callback) { + callback(VideoCore::LoadCallbackStage::Build, 0, load_raws_size); + } + + compilation_failed = false; + + std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex + const auto LoadRawSepareble = [&](Frontend::GraphicsContext* context, std::size_t begin, + std::size_t end) { + Frontend::ScopeAcquireContext scope(*context); + for (std::size_t i = begin; i < end; ++i) { + if (stop_loading || compilation_failed) { + return; + } + + const std::size_t raws_index = load_all_raws ? i : load_raws_index[i]; + const auto& raw{raws[raws_index]}; + const u64 unique_identifier{raw.GetUniqueIdentifier()}; + bool sanitize_mul = false; GLuint handle{0}; std::optional result; // Otherwise decompile and build the shader at boot and save the result to the // precompiled file if (raw.GetProgramType() == ProgramType::VS) { - // TODO: This isn't the ideal place to lock, since we really only want to - // lock access to the shared cache auto [conf, setup] = BuildVSConfigFromRaw(raw); - std::scoped_lock lock(mutex); - auto [h, r] = impl->programmable_vertex_shaders.Get(conf, setup); - handle = h; - result = std::move(r); + result = GenerateVertexShader(setup, conf, impl->separable); + OGLShaderStage stage{impl->separable}; + stage.Create(result->code.c_str(), GL_VERTEX_SHADER); + handle = stage.GetHandle(); sanitize_mul = conf.state.sanitize_mul; + std::scoped_lock lock(mutex); + impl->programmable_vertex_shaders.Inject(conf, result->code, std::move(stage)); } else if (raw.GetProgramType() == ProgramType::FS) { PicaFSConfig conf = PicaFSConfig::BuildFromRegs(raw.GetRawShaderConfig()); + result = GenerateFragmentShader(conf, impl->separable); + OGLShaderStage stage{impl->separable}; + stage.Create(result->code.c_str(), GL_FRAGMENT_SHADER); + handle = stage.GetHandle(); std::scoped_lock lock(mutex); - auto [h, r] = impl->fragment_shaders.Get(conf); - handle = h; - result = std::move(r); + impl->fragment_shaders.Inject(conf, std::move(stage)); } else { // Unsupported shader type got stored somehow so nuke the cache - LOG_ERROR(Frontend, "failed to load raw programtype {}", raw.GetProgramType()); + LOG_ERROR(Frontend, "failed to load raw ProgramType {}", raw.GetProgramType()); compilation_failed = true; return; } @@ -606,7 +687,9 @@ void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading, compilation_failed = true; return; } - // If this is a new shader, add it the precompiled cache + + std::scoped_lock lock(mutex); + // If this is a new separable shader, add it the precompiled cache if (result) { disk_cache.SaveDecompiled(unique_identifier, *result, sanitize_mul); disk_cache.SaveDump(unique_identifier, handle); @@ -614,12 +697,27 @@ void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading, } if (callback) { - callback(VideoCore::LoadCallbackStage::Build, i, raw_cache.size()); + callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, load_raws_size); } } }; - LoadTransferable(0, raws.size(), raws); + const std::size_t num_workers{std::max(1U, std::thread::hardware_concurrency())}; + const std::size_t bucket_size{load_raws_size / num_workers}; + std::vector> contexts(num_workers); + std::vector threads(num_workers); + for (std::size_t i = 0; i < num_workers; ++i) { + const bool is_last_worker = i + 1 == num_workers; + const std::size_t start{bucket_size * i}; + const std::size_t end{is_last_worker ? load_raws_size : start + bucket_size}; + + // On some platforms the shared context has to be created from the GUI thread + contexts[i] = emu_window.CreateSharedContext(); + threads[i] = std::thread(LoadRawSepareble, contexts[i].get(), start, end); + } + for (auto& thread : threads) { + thread.join(); + } if (compilation_failed) { disk_cache.InvalidateAll(); diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 94149415c..39c1392f2 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -99,7 +99,7 @@ static_assert(sizeof(VSUniformData) < 16384, /// A class that manage different shader stages and configures them with given config data. class ShaderProgramManager { public: - ShaderProgramManager(bool separable, bool is_amd); + ShaderProgramManager(Frontend::EmuWindow& emu_window_, bool separable, bool is_amd); ~ShaderProgramManager(); void LoadDiskCache(const std::atomic_bool& stop_loading, @@ -120,5 +120,7 @@ public: private: class Impl; std::unique_ptr impl; + + Frontend::EmuWindow& emu_window; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index a51ad443b..036cd49a3 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp @@ -84,6 +84,7 @@ GLuint LoadProgram(bool separable_program, const std::vector& shaders) { glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE); } + glProgramParameteri(program_id, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE); glLinkProgram(program_id); // Check the program