diff --git a/.gitmodules b/.gitmodules index 0dc4538c1..b247ccdbe 100644 --- a/.gitmodules +++ b/.gitmodules @@ -46,3 +46,6 @@ [submodule "lodepng"] path = externals/lodepng/lodepng url = https://github.com/lvandeve/lodepng.git +[submodule "zstd"] + path = externals/zstd + url = https://github.com/facebook/zstd.git diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake new file mode 100644 index 000000000..fb9351f79 --- /dev/null +++ b/CMakeModules/GenerateSCMRev.cmake @@ -0,0 +1,90 @@ +# Gets a UTC timstamp and sets the provided variable to it +function(get_timestamp _var) + string(TIMESTAMP timestamp UTC) + set(${_var} "${timestamp}" PARENT_SCOPE) +endfunction() + +list(APPEND CMAKE_MODULE_PATH "${SRC_DIR}/externals/cmake-modules") + +# Find the package here with the known path so that the GetGit commands can find it as well +find_package(Git QUIET PATHS "${GIT_EXECUTABLE}") + +# generate git/build information +include(GetGitRevisionDescription) +get_git_head_revision(GIT_REF_SPEC GIT_REV) +git_describe(GIT_DESC --always --long --dirty) +git_branch_name(GIT_BRANCH) +get_timestamp(BUILD_DATE) + +# Generate cpp with Git revision from template +# Also if this is a CI build, add the build name (ie: Nightly, Canary) to the scm_rev file as well +set(REPO_NAME "") +set(BUILD_VERSION "0") +if (DEFINED ENV{CI}) + if (DEFINED ENV{TRAVIS}) + set(BUILD_REPOSITORY $ENV{TRAVIS_REPO_SLUG}) + set(BUILD_TAG $ENV{TRAVIS_TAG}) + elseif(DEFINED ENV{APPVEYOR}) + set(BUILD_REPOSITORY $ENV{APPVEYOR_REPO_NAME}) + set(BUILD_TAG $ENV{APPVEYOR_REPO_TAG_NAME}) + elseif(DEFINED ENV{BITRISE_IO}) + set(BUILD_REPOSITORY "$ENV{BITRISEIO_GIT_REPOSITORY_OWNER}/$ENV{BITRISEIO_GIT_REPOSITORY_SLUG}") + set(BUILD_TAG $ENV{BITRISE_GIT_TAG}) + endif() + + # regex capture the string nightly or canary into CMAKE_MATCH_1 + string(REGEX MATCH "citra-emu/citra-?(.*)" OUTVAR ${BUILD_REPOSITORY}) + if ("${CMAKE_MATCH_COUNT}" GREATER 0) + # capitalize the first letter of each word in the repo name. + string(REPLACE "-" ";" REPO_NAME_LIST ${CMAKE_MATCH_1}) + foreach(WORD ${REPO_NAME_LIST}) + string(SUBSTRING ${WORD} 0 1 FIRST_LETTER) + string(SUBSTRING ${WORD} 1 -1 REMAINDER) + string(TOUPPER ${FIRST_LETTER} FIRST_LETTER) + set(REPO_NAME "${REPO_NAME}${FIRST_LETTER}${REMAINDER}") + endforeach() + if (BUILD_TAG) + string(REGEX MATCH "${CMAKE_MATCH_1}-([0-9]+)" OUTVAR ${BUILD_TAG}) + if (${CMAKE_MATCH_COUNT} GREATER 0) + set(BUILD_VERSION ${CMAKE_MATCH_1}) + endif() + if (BUILD_VERSION) + # This leaves a trailing space on the last word, but we actually want that + # because of how it's styled in the title bar. + set(BUILD_FULLNAME "${REPO_NAME} ${BUILD_VERSION} ") + else() + set(BUILD_FULLNAME "") + endif() + endif() + endif() +endif() + +# The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR) +set(VIDEO_CORE "${SRC_DIR}/src/video_core") +set(HASH_FILES + "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" + "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h" + "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp" + "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h" + "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.cpp" + "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.h" + "${VIDEO_CORE}/shader/shader.cpp" + "${VIDEO_CORE}/shader/shader.h" + "${VIDEO_CORE}/pica.cpp" + "${VIDEO_CORE}/pica.h" + "${VIDEO_CORE}/regs_framebuffer.h" + "${VIDEO_CORE}/regs_lighting.h" + "${VIDEO_CORE}/regs_pipeline.h" + "${VIDEO_CORE}/regs_rasterizer.h" + "${VIDEO_CORE}/regs_shader.h" + "${VIDEO_CORE}/regs_texturing.h" + "${VIDEO_CORE}/regs.cpp" + "${VIDEO_CORE}/regs.h" +) +set(COMBINED "") +foreach (F IN LISTS HASH_FILES) + file(READ ${F} TMP) + set(COMBINED "${COMBINED}${TMP}") +endforeach() +string(MD5 SHADER_CACHE_VERSION "${COMBINED}") +configure_file("${SRC_DIR}/src/common/scm_rev.cpp.in" "scm_rev.cpp" @ONLY) diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 2ae4c33f9..63159a52b 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -62,6 +62,10 @@ if (ARCHITECTURE_x86_64) target_compile_definitions(xbyak INTERFACE XBYAK_NO_OP_NAMES) endif() +# Zstandard +add_subdirectory(zstd/build/cmake EXCLUDE_FROM_ALL) +target_include_directories(libzstd_static INTERFACE ./zstd/lib) + # ENet add_subdirectory(enet) target_include_directories(enet INTERFACE ./enet/include) diff --git a/externals/zstd b/externals/zstd new file mode 160000 index 000000000..10f0e6993 --- /dev/null +++ b/externals/zstd @@ -0,0 +1 @@ +Subproject commit 10f0e6993f9d2f682da6d04aa2385b7d53cbb4ee diff --git a/src/citra/citra.cpp b/src/citra/citra.cpp index debc54b1c..ce06b31b7 100644 --- a/src/citra/citra.cpp +++ b/src/citra/citra.cpp @@ -43,7 +43,7 @@ #include "core/movie.h" #include "core/settings.h" #include "network/network.h" -#include "video_core/video_core.h" +#include "video_core/renderer_base.h" #undef _UNICODE #include @@ -413,6 +413,14 @@ int main(int argc, char** argv) { } std::thread render_thread([&emu_window] { emu_window->Present(); }); + + std::atomic_bool stop_run; + Core::System::GetInstance().Renderer().Rasterizer()->LoadDiskResources( + stop_run, [](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) { + LOG_DEBUG(Frontend, "Loading stage {} progress {} {}", static_cast(stage), value, + total); + }); + while (emu_window->IsOpen()) { system.RunLoop(); } diff --git a/src/citra/config.cpp b/src/citra/config.cpp index 9193dcb5e..072568827 100644 --- a/src/citra/config.cpp +++ b/src/citra/config.cpp @@ -122,6 +122,8 @@ void Config::ReadValues() { Settings::values.resolution_factor = static_cast(sdl2_config->GetInteger("Renderer", "resolution_factor", 1)); Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true); + Settings::values.use_disk_shader_cache = + sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", true); Settings::values.frame_limit = static_cast(sdl2_config->GetInteger("Renderer", "frame_limit", 100)); Settings::values.use_vsync_new = diff --git a/src/citra/default_ini.h b/src/citra/default_ini.h index 79481dc75..ca154c4a7 100644 --- a/src/citra/default_ini.h +++ b/src/citra/default_ini.h @@ -117,6 +117,10 @@ use_shader_jit = # 0: Off, 1 (default): On use_vsync_new = +# Reduce stuttering by storing and loading generated shaders to disk +# 0: Off, 1 (default. On) +use_disk_shader_cache = + # Resolution scale factor # 0: Auto (scales resolution to window size), 1: Native 3DS screen resolution, Otherwise a scale # factor for the 3DS resolution diff --git a/src/citra_qt/bootmanager.cpp b/src/citra_qt/bootmanager.cpp index 719059dd5..f28fcdf10 100644 --- a/src/citra_qt/bootmanager.cpp +++ b/src/citra_qt/bootmanager.cpp @@ -45,6 +45,13 @@ static GMainWindow* GetMainWindow() { void EmuThread::run() { MicroProfileOnThreadCreate("EmuThread"); Frontend::ScopeAcquireContext scope(core_context); + + Core::System::GetInstance().Renderer().Rasterizer()->LoadDiskResources( + stop_run, [this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) { + LOG_DEBUG(Frontend, "Loading stage {} progress {} {}", static_cast(stage), value, + total); + }); + // Holds whether the cpu was running during the last iteration, // so that the DebugModeLeft signal can be emitted before the // next execution step. diff --git a/src/citra_qt/configuration/config.cpp b/src/citra_qt/configuration/config.cpp index 6bf3b8171..7601d485b 100644 --- a/src/citra_qt/configuration/config.cpp +++ b/src/citra_qt/configuration/config.cpp @@ -239,6 +239,7 @@ void Config::ReadUtilityValues() { Settings::values.dump_textures = ReadSetting("dump_textures", false).toBool(); Settings::values.custom_textures = ReadSetting("custom_textures", false).toBool(); Settings::values.preload_textures = ReadSetting("preload_textures", false).toBool(); + Settings::values.use_disk_shader_cache = ReadSetting("use_disk_shader_cache", true).toBool(); qt_config->endGroup(); } @@ -713,6 +714,7 @@ void Config::SaveUtilityValues() { WriteSetting("dump_textures", Settings::values.dump_textures, false); WriteSetting("custom_textures", Settings::values.custom_textures, false); WriteSetting("preload_textures", Settings::values.preload_textures, false); + WriteSetting("use_disk_shader_cache", Settings::values.use_disk_shader_cache, true); qt_config->endGroup(); } diff --git a/src/citra_qt/configuration/configure_enhancements.cpp b/src/citra_qt/configuration/configure_enhancements.cpp index 97cc1f53d..4067d64bc 100644 --- a/src/citra_qt/configuration/configure_enhancements.cpp +++ b/src/citra_qt/configuration/configure_enhancements.cpp @@ -52,6 +52,7 @@ void ConfigureEnhancements::SetConfiguration() { ui->toggle_linear_filter->setChecked(Settings::values.filter_mode); ui->layout_combobox->setCurrentIndex(static_cast(Settings::values.layout_option)); ui->swap_screen->setChecked(Settings::values.swap_screen); + ui->toggle_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache); ui->toggle_dump_textures->setChecked(Settings::values.dump_textures); ui->toggle_custom_textures->setChecked(Settings::values.custom_textures); ui->toggle_preload_textures->setChecked(Settings::values.preload_textures); @@ -99,6 +100,7 @@ void ConfigureEnhancements::ApplyConfiguration() { Settings::values.layout_option = static_cast(ui->layout_combobox->currentIndex()); Settings::values.swap_screen = ui->swap_screen->isChecked(); + Settings::values.use_disk_shader_cache = ui->toggle_disk_shader_cache->isChecked(); Settings::values.dump_textures = ui->toggle_dump_textures->isChecked(); Settings::values.custom_textures = ui->toggle_custom_textures->isChecked(); Settings::values.preload_textures = ui->toggle_preload_textures->isChecked(); diff --git a/src/citra_qt/configuration/configure_enhancements.ui b/src/citra_qt/configuration/configure_enhancements.ui index b268a50fe..26c65ed59 100644 --- a/src/citra_qt/configuration/configure_enhancements.ui +++ b/src/citra_qt/configuration/configure_enhancements.ui @@ -269,6 +269,13 @@ Utility + + + + Use Disk Shader Cache + + + diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 89ee75dd2..af07ac215 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -1,45 +1,55 @@ -# Generate cpp with Git revision from template -# Also if this is a CI build, add the build name (ie: Nightly, Canary) to the scm_rev file as well -set(REPO_NAME "") -set(BUILD_VERSION "0") -if ($ENV{CI}) - if ($ENV{TRAVIS}) +# Add a custom command to generate a new shader_cache_version hash when any of the following files change +# NOTE: This is an approximation of what files affect shader generation, its possible something else +# could affect the result, but much more unlikely than the following files. Keeping a list of files +# like this allows for much better caching since it doesn't force the user to recompile binary shaders every update +set(VIDEO_CORE "${CMAKE_SOURCE_DIR}/src/video_core") +if (DEFINED ENV{CI}) + if (DEFINED ENV{TRAVIS}) set(BUILD_REPOSITORY $ENV{TRAVIS_REPO_SLUG}) set(BUILD_TAG $ENV{TRAVIS_TAG}) - elseif($ENV{APPVEYOR}) + elseif(DEFINED ENV{APPVEYOR}) set(BUILD_REPOSITORY $ENV{APPVEYOR_REPO_NAME}) set(BUILD_TAG $ENV{APPVEYOR_REPO_TAG_NAME}) - elseif($ENV{BITRISE_IO}) - set(BUILD_REPOSITORY "$ENV{BITRISEIO_GIT_REPOSITORY_OWNER}/$ENV{BITRISEIO_GIT_REPOSITORY_SLUG}") - set(BUILD_TAG $ENV{BITRISE_GIT_TAG}) - endif() - # regex capture the string nightly or canary into CMAKE_MATCH_1 - string(REGEX MATCH "citra-emu/citra-?(.*)" OUTVAR ${BUILD_REPOSITORY}) - if (${CMAKE_MATCH_COUNT} GREATER 0) - # capitalize the first letter of each word in the repo name. - string(REPLACE "-" ";" REPO_NAME_LIST ${CMAKE_MATCH_1}) - foreach(WORD ${REPO_NAME_LIST}) - string(SUBSTRING ${WORD} 0 1 FIRST_LETTER) - string(SUBSTRING ${WORD} 1 -1 REMAINDER) - string(TOUPPER ${FIRST_LETTER} FIRST_LETTER) - set(REPO_NAME "${REPO_NAME}${FIRST_LETTER}${REMAINDER}") - endforeach() - if (BUILD_TAG) - string(REGEX MATCH "${CMAKE_MATCH_1}-([0-9]+)" OUTVAR ${BUILD_TAG}) - if (${CMAKE_MATCH_COUNT} GREATER 0) - set(BUILD_VERSION ${CMAKE_MATCH_1}) - endif() - if (BUILD_VERSION) - # This leaves a trailing space on the last word, but we actually want that - # because of how it's styled in the title bar. - set(BUILD_FULLNAME "${REPO_NAME} ${BUILD_VERSION} ") - else() - set(BUILD_FULLNAME "") - endif() - endif() endif() endif() -configure_file("${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp" @ONLY) + +# Pass the path to git to the GenerateSCMRev.cmake as well +find_package(Git QUIET) + +add_custom_command(OUTPUT scm_rev.cpp + COMMAND ${CMAKE_COMMAND} + -DSRC_DIR="${CMAKE_SOURCE_DIR}" + -DBUILD_REPOSITORY="${BUILD_REPOSITORY}" + -DBUILD_TAG="${BUILD_TAG}" + -DGIT_EXECUTABLE="${GIT_EXECUTABLE}" + -P "${CMAKE_SOURCE_DIR}/CMakeModules/GenerateSCMRev.cmake" + DEPENDS + # WARNING! It was too much work to try and make a common location for this list, + # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well + "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" + "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h" + "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp" + "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h" + "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.cpp" + "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.h" + "${VIDEO_CORE}/shader/shader.cpp" + "${VIDEO_CORE}/shader/shader.h" + "${VIDEO_CORE}/pica.cpp" + "${VIDEO_CORE}/pica.h" + "${VIDEO_CORE}/regs_framebuffer.h" + "${VIDEO_CORE}/regs_lighting.h" + "${VIDEO_CORE}/regs_pipeline.h" + "${VIDEO_CORE}/regs_rasterizer.h" + "${VIDEO_CORE}/regs_shader.h" + "${VIDEO_CORE}/regs_texturing.h" + "${VIDEO_CORE}/regs.cpp" + "${VIDEO_CORE}/regs.h" + # and also check that the scm_rev files haven't changed + "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" + "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h" + # technically we should regenerate if the git version changed, but its not worth the effort imo + "${CMAKE_SOURCE_DIR}/CMakeModules/GenerateSCMRev.cmake" +) add_library(common STATIC alignment.h @@ -94,6 +104,8 @@ add_library(common STATIC timer.h vector_math.h web_result.h + zstd_compression.cpp + zstd_compression.h ) if(ARCHITECTURE_x86_64) @@ -110,6 +122,7 @@ endif() create_target_directory_groups(common) target_link_libraries(common PUBLIC fmt microprofile) +target_link_libraries(common PRIVATE libzstd_static) if (ARCHITECTURE_x86_64) target_link_libraries(common PRIVATE xbyak) endif() diff --git a/src/common/file_util.cpp b/src/common/file_util.cpp index 249fbbb6f..d59e34dae 100644 --- a/src/common/file_util.cpp +++ b/src/common/file_util.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include "common/assert.h" #include "common/common_funcs.h" @@ -355,12 +356,12 @@ u64 GetSize(FILE* f) { // can't use off_t here because it can be 32-bit u64 pos = ftello(f); if (fseeko(f, 0, SEEK_END) != 0) { - LOG_ERROR(Common_Filesystem, "GetSize: seek failed {}: {}", (void*)f, GetLastErrorMsg()); + LOG_ERROR(Common_Filesystem, "GetSize: seek failed {}: {}", fmt::ptr(f), GetLastErrorMsg()); return 0; } u64 size = ftello(f); if ((size != pos) && (fseeko(f, pos, SEEK_SET) != 0)) { - LOG_ERROR(Common_Filesystem, "GetSize: seek failed {}: {}", (void*)f, GetLastErrorMsg()); + LOG_ERROR(Common_Filesystem, "GetSize: seek failed {}: {}", fmt::ptr(f), GetLastErrorMsg()); return 0; } return size; @@ -369,7 +370,7 @@ u64 GetSize(FILE* f) { bool CreateEmptyFile(const std::string& filename) { LOG_TRACE(Common_Filesystem, "{}", filename); - if (!FileUtil::IOFile(filename, "wb")) { + if (!FileUtil::IOFile(filename, "wb").IsOpen()) { LOG_ERROR(Common_Filesystem, "failed {}: {}", filename, GetLastErrorMsg()); return false; } @@ -541,12 +542,11 @@ std::optional GetCurrentDir() { // Get the current working directory (getcwd uses malloc) #ifdef _WIN32 wchar_t* dir; - if (!(dir = _wgetcwd(nullptr, 0))) + if (!(dir = _wgetcwd(nullptr, 0))) { #else char* dir; - if (!(dir = getcwd(nullptr, 0))) + if (!(dir = getcwd(nullptr, 0))) { #endif - { LOG_ERROR(Common_Filesystem, "GetCurrentDirectory failed: {}", GetLastErrorMsg()); return {}; } @@ -557,7 +557,7 @@ std::optional GetCurrentDir() { #endif free(dir); return strDir; -} +} // namespace FileUtil bool SetCurrentDir(const std::string& directory) { #ifdef _WIN32 @@ -733,7 +733,6 @@ const std::string& GetUserPath(UserPath path) { SetUserPath(); return g_paths[path]; } - std::size_t WriteStringToFile(bool text_file, const std::string& filename, std::string_view str) { return IOFile(filename, text_file ? "w" : "wb").WriteString(str); } @@ -741,8 +740,8 @@ std::size_t WriteStringToFile(bool text_file, const std::string& filename, std:: std::size_t ReadFileToString(bool text_file, const std::string& filename, std::string& str) { IOFile file(filename, text_file ? "r" : "rb"); - if (!file) - return false; + if (!file.IsOpen()) + return 0; str.resize(static_cast(file.GetSize())); return file.ReadArray(&str[0], str.size()); @@ -783,6 +782,103 @@ void SplitFilename83(const std::string& filename, std::array& short_nam } } +std::vector SplitPathComponents(std::string_view filename) { + std::string copy(filename); + std::replace(copy.begin(), copy.end(), '\\', '/'); + std::vector out; + + std::stringstream stream(copy); + std::string item; + while (std::getline(stream, item, '/')) { + out.push_back(std::move(item)); + } + + return out; +} + +std::string_view GetParentPath(std::string_view path) { + const auto name_bck_index = path.rfind('\\'); + const auto name_fwd_index = path.rfind('/'); + std::size_t name_index; + + if (name_bck_index == std::string_view::npos || name_fwd_index == std::string_view::npos) { + name_index = std::min(name_bck_index, name_fwd_index); + } else { + name_index = std::max(name_bck_index, name_fwd_index); + } + + return path.substr(0, name_index); +} + +std::string_view GetPathWithoutTop(std::string_view path) { + if (path.empty()) { + return path; + } + + while (path[0] == '\\' || path[0] == '/') { + path.remove_prefix(1); + if (path.empty()) { + return path; + } + } + + const auto name_bck_index = path.find('\\'); + const auto name_fwd_index = path.find('/'); + return path.substr(std::min(name_bck_index, name_fwd_index) + 1); +} + +std::string_view GetFilename(std::string_view path) { + const auto name_index = path.find_last_of("\\/"); + + if (name_index == std::string_view::npos) { + return {}; + } + + return path.substr(name_index + 1); +} + +std::string_view GetExtensionFromFilename(std::string_view name) { + const std::size_t index = name.rfind('.'); + + if (index == std::string_view::npos) { + return {}; + } + + return name.substr(index + 1); +} + +std::string_view RemoveTrailingSlash(std::string_view path) { + if (path.empty()) { + return path; + } + + if (path.back() == '\\' || path.back() == '/') { + path.remove_suffix(1); + return path; + } + + return path; +} + +std::string SanitizePath(std::string_view path_, DirectorySeparator directory_separator) { + std::string path(path_); + char type1 = directory_separator == DirectorySeparator::BackwardSlash ? '/' : '\\'; + char type2 = directory_separator == DirectorySeparator::BackwardSlash ? '\\' : '/'; + + if (directory_separator == DirectorySeparator::PlatformDefault) { +#ifdef _WIN32 + type1 = '/'; + type2 = '\\'; +#endif + } + + std::replace(path.begin(), path.end(), type1, type2); + path.erase(std::unique(path.begin(), path.end(), + [type2](char c1, char c2) { return c1 == type2 && c2 == type2; }), + path.end()); + return std::string(RemoveTrailingSlash(path)); +} + IOFile::IOFile() {} IOFile::IOFile(const std::string& filename, const char openmode[], int flags) { diff --git a/src/common/file_util.h b/src/common/file_util.h index 45917423a..09c3cb6f0 100644 --- a/src/common/file_util.h +++ b/src/common/file_util.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include "common/common_types.h" @@ -166,6 +167,41 @@ std::size_t ReadFileToString(bool text_file, const std::string& filename, std::s void SplitFilename83(const std::string& filename, std::array& short_name, std::array& extension); +// Splits the path on '/' or '\' and put the components into a vector +// i.e. "C:\Users\Yuzu\Documents\save.bin" becomes {"C:", "Users", "Yuzu", "Documents", "save.bin" } +std::vector SplitPathComponents(std::string_view filename); + +// Gets all of the text up to the last '/' or '\' in the path. +std::string_view GetParentPath(std::string_view path); + +// Gets all of the text after the first '/' or '\' in the path. +std::string_view GetPathWithoutTop(std::string_view path); + +// Gets the filename of the path +std::string_view GetFilename(std::string_view path); + +// Gets the extension of the filename +std::string_view GetExtensionFromFilename(std::string_view name); + +// Removes the final '/' or '\' if one exists +std::string_view RemoveTrailingSlash(std::string_view path); + +// Creates a new vector containing indices [first, last) from the original. +template +std::vector SliceVector(const std::vector& vector, std::size_t first, std::size_t last) { + if (first >= last) + return {}; + last = std::min(last, vector.size()); + return std::vector(vector.begin() + first, vector.begin() + first + last); +} + +enum class DirectorySeparator { ForwardSlash, BackwardSlash, PlatformDefault }; + +// Removes trailing slash, makes all '\\' into '/', and removes duplicate '/'. Makes '/' into '\\' +// depending if directory_separator is BackwardSlash or PlatformDefault and running on windows +std::string SanitizePath(std::string_view path, + DirectorySeparator directory_separator = DirectorySeparator::ForwardSlash); + // simple wrapper for cstdlib file functions to // hopefully will make error checking easier // and make forgetting an fclose() harder diff --git a/src/common/scm_rev.cpp.in b/src/common/scm_rev.cpp.in index daf99487f..0e52a6d21 100644 --- a/src/common/scm_rev.cpp.in +++ b/src/common/scm_rev.cpp.in @@ -11,6 +11,7 @@ #define BUILD_DATE "@BUILD_DATE@" #define BUILD_VERSION "@BUILD_VERSION@" #define BUILD_FULLNAME "@BUILD_FULLNAME@" +#define SHADER_CACHE_VERSION "@SHADER_CACHE_VERSION@" namespace Common { @@ -21,6 +22,7 @@ const char g_build_name[] = BUILD_NAME; const char g_build_date[] = BUILD_DATE; const char g_build_fullname[] = BUILD_FULLNAME; const char g_build_version[] = BUILD_VERSION; +const char g_shader_cache_version[] = SHADER_CACHE_VERSION; } // namespace diff --git a/src/common/scm_rev.h b/src/common/scm_rev.h index af9a9daed..666bf0367 100644 --- a/src/common/scm_rev.h +++ b/src/common/scm_rev.h @@ -13,5 +13,6 @@ extern const char g_build_name[]; extern const char g_build_date[]; extern const char g_build_fullname[]; extern const char g_build_version[]; +extern const char g_shader_cache_version[]; } // namespace Common diff --git a/src/common/zstd_compression.cpp b/src/common/zstd_compression.cpp new file mode 100644 index 000000000..64fb18869 --- /dev/null +++ b/src/common/zstd_compression.cpp @@ -0,0 +1,51 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "common/assert.h" +#include "common/zstd_compression.h" + +namespace Common::Compression { + +std::vector CompressDataZSTD(const u8* source, std::size_t source_size, s32 compression_level) { + compression_level = std::clamp(compression_level, ZSTD_minCLevel(), ZSTD_maxCLevel()); + + const std::size_t max_compressed_size = ZSTD_compressBound(source_size); + std::vector compressed(max_compressed_size); + + const std::size_t compressed_size = + ZSTD_compress(compressed.data(), compressed.size(), source, source_size, compression_level); + + if (ZSTD_isError(compressed_size)) { + // Compression failed + return {}; + } + + compressed.resize(compressed_size); + + return compressed; +} + +std::vector CompressDataZSTDDefault(const u8* source, std::size_t source_size) { + return CompressDataZSTD(source, source_size, ZSTD_CLEVEL_DEFAULT); +} + +std::vector DecompressDataZSTD(const std::vector& compressed) { + const std::size_t decompressed_size = + ZSTD_getDecompressedSize(compressed.data(), compressed.size()); + std::vector decompressed(decompressed_size); + + const std::size_t uncompressed_result_size = ZSTD_decompress( + decompressed.data(), decompressed.size(), compressed.data(), compressed.size()); + + if (decompressed_size != uncompressed_result_size || ZSTD_isError(uncompressed_result_size)) { + // Decompression failed + return {}; + } + return decompressed; +} + +} // namespace Common::Compression diff --git a/src/common/zstd_compression.h b/src/common/zstd_compression.h new file mode 100644 index 000000000..1715bae7a --- /dev/null +++ b/src/common/zstd_compression.h @@ -0,0 +1,44 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/common_types.h" + +namespace Common::Compression { + +/** + * Compresses a source memory region with Zstandard and returns the compressed data in a vector. + * + * @param source the uncompressed source memory region. + * @param source_size the size in bytes of the uncompressed source memory region. + * @param compression_level the used compression level. Should be between 1 and 22. + * + * @return the compressed data. + */ +std::vector CompressDataZSTD(const u8* source, std::size_t source_size, s32 compression_level); + +/** + * Compresses a source memory region with Zstandard with the default compression level and returns + * the compressed data in a vector. + * + * @param source the uncompressed source memory region. + * @param source_size the size in bytes of the uncompressed source memory region. + * + * @return the compressed data. + */ +std::vector CompressDataZSTDDefault(const u8* source, std::size_t source_size); + +/** + * Decompresses a source memory region with Zstandard and returns the uncompressed data in a vector. + * + * @param compressed the compressed source memory region. + * + * @return the decompressed data. + */ +std::vector DecompressDataZSTD(const std::vector& compressed); + +} // namespace Common::Compression diff --git a/src/core/core.cpp b/src/core/core.cpp index c0dc93b55..ebaee4f87 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -237,9 +237,16 @@ System::ResultStatus System::Init(Frontend::EmuWindow& emu_window, u32 system_mo Service::Init(*this); GDBStub::Init(); - ResultStatus result = VideoCore::Init(emu_window, *memory); - if (result != ResultStatus::Success) { - return result; + VideoCore::ResultStatus result = VideoCore::Init(emu_window, *memory); + if (result != VideoCore::ResultStatus::Success) { + switch (result) { + case VideoCore::ResultStatus::ErrorGenericDrivers: + return ResultStatus::ErrorVideoCore_ErrorGenericDrivers; + case VideoCore::ResultStatus::ErrorBelowGL33: + return ResultStatus::ErrorVideoCore_ErrorBelowGL33; + default: + return ResultStatus::ErrorVideoCore; + } } #ifdef ENABLE_FFMPEG_VIDEO_DUMPER @@ -253,6 +260,10 @@ System::ResultStatus System::Init(Frontend::EmuWindow& emu_window, u32 system_mo return ResultStatus::Success; } +RendererBase& System::Renderer() { + return *VideoCore::g_renderer; +} + Service::SM::ServiceManager& System::ServiceManager() { return *service_manager; } diff --git a/src/core/core.h b/src/core/core.h index b533c4bc3..5b7965453 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -55,6 +55,8 @@ namespace VideoDumper { class Backend; } +class RendererBase; + namespace Core { class Timing; @@ -170,6 +172,8 @@ public: return *dsp_core; } + RendererBase& Renderer(); + /** * Gets a reference to the service manager. * @returns A reference to the service manager. diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index b1e5f8a01..fcb60e6fe 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp @@ -11,6 +11,7 @@ #include "common/logging/log.h" #include "common/microprofile.h" #include "common/vector_math.h" +#include "core/core.h" #include "core/core_timing.h" #include "core/hle/service/gsp/gsp.h" #include "core/hw/gpu.h" diff --git a/src/core/settings.cpp b/src/core/settings.cpp index 81a6d7db6..164ffaf73 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -28,6 +28,7 @@ void Apply() { VideoCore::g_shader_jit_enabled = values.use_shader_jit; VideoCore::g_hw_shader_enabled = values.use_hw_shader; VideoCore::g_hw_shader_accurate_mul = values.shaders_accurate_mul; + VideoCore::g_use_disk_shader_cache = values.use_disk_shader_cache; if (VideoCore::g_renderer) { VideoCore::g_renderer->UpdateCurrentFramebufferLayout(); diff --git a/src/core/settings.h b/src/core/settings.h index a0b0464f2..2d139e1b2 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -141,6 +141,7 @@ struct Values { bool use_gles; bool use_hw_renderer; bool use_hw_shader; + bool use_disk_shader_cache; bool shaders_accurate_mul; bool use_shader_jit; u16 resolution_factor; diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 3129982ed..4cb976354 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -31,6 +31,8 @@ add_library(video_core STATIC renderer_opengl/gl_resource_manager.h renderer_opengl/gl_shader_decompiler.cpp renderer_opengl/gl_shader_decompiler.h + renderer_opengl/gl_shader_disk_cache.cpp + renderer_opengl/gl_shader_disk_cache.h renderer_opengl/gl_shader_gen.cpp renderer_opengl/gl_shader_gen.h renderer_opengl/gl_shader_manager.cpp diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 7222e97ce..468d84084 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -4,6 +4,8 @@ #pragma once +#include +#include #include "common/common_types.h" #include "core/hw/gpu.h" @@ -17,6 +19,14 @@ struct OutputVertex; namespace VideoCore { +enum class LoadCallbackStage { + Prepare, + Decompile, + Build, + Complete, +}; +using DiskResourceLoadCallback = std::function; + class RasterizerInterface { public: virtual ~RasterizerInterface() {} @@ -71,5 +81,8 @@ public: virtual bool AccelerateDrawBatch(bool is_indexed) { return false; } + + virtual void LoadDiskResources(const std::atomic_bool& stop_loading, + const DiskResourceLoadCallback& callback) {} }; } // namespace VideoCore diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index b1ee6a973..7fcaf5370 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -6,8 +6,8 @@ #include #include "common/common_types.h" -#include "core/core.h" #include "video_core/rasterizer_interface.h" +#include "video_core/video_core.h" namespace Frontend { class EmuWindow; @@ -23,7 +23,7 @@ public: virtual ~RendererBase(); /// Initialize the renderer - virtual Core::System::ResultStatus Init() = 0; + virtual VideoCore::ResultStatus Init() = 0; /// Shutdown the renderer virtual void ShutDown() = 0; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index dde0ac445..862dcac00 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -171,6 +171,11 @@ RasterizerOpenGL::RasterizerOpenGL(Frontend::EmuWindow& window) RasterizerOpenGL::~RasterizerOpenGL() {} +void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) { + shader_program_manager->LoadDiskCache(stop_loading, callback); +} + void RasterizerOpenGL::SyncEntireState() { // Sync fixed function OpenGL state SyncClipEnabled(); @@ -378,16 +383,15 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset, bool RasterizerOpenGL::SetupVertexShader() { MICROPROFILE_SCOPE(OpenGL_VS); - PicaVSConfig vs_config(Pica::g_state.regs, Pica::g_state.vs); - return shader_program_manager->UseProgrammableVertexShader(vs_config, Pica::g_state.vs); + return shader_program_manager->UseProgrammableVertexShader(Pica::g_state.regs, + Pica::g_state.vs); } bool RasterizerOpenGL::SetupGeometryShader() { MICROPROFILE_SCOPE(OpenGL_GS); const auto& regs = Pica::g_state.regs; if (regs.pipeline.use_gs == Pica::PipelineRegs::UseGS::No) { - PicaFixedGSConfig gs_config(regs); - shader_program_manager->UseFixedGeometryShader(gs_config); + shader_program_manager->UseFixedGeometryShader(regs); return true; } else { LOG_ERROR(Render_OpenGL, "Accelerate draw doesn't support geometry shader"); @@ -1622,8 +1626,7 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig( } void RasterizerOpenGL::SetShader() { - auto config = PicaFSConfig::BuildFromRegs(Pica::g_state.regs); - shader_program_manager->UseFragmentShader(config); + shader_program_manager->UseFragmentShader(Pica::g_state.regs); } void RasterizerOpenGL::SyncClipEnabled() { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 6ba23762d..b3356a69b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -42,6 +42,9 @@ public: explicit RasterizerOpenGL(Frontend::EmuWindow& renderer); ~RasterizerOpenGL() override; + void LoadDiskResources(const std::atomic_bool& stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) override; + void AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1, const Pica::Shader::OutputVertex& v2) override; void DrawTriangles() override; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 19ac13ce2..9e436b0f5 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -56,7 +56,7 @@ struct Subroutine { /// Analyzes shader code and produces a set of subroutines. class ControlFlowAnalyzer { public: - ControlFlowAnalyzer(const ProgramCode& program_code, u32 main_offset) + ControlFlowAnalyzer(const Pica::Shader::ProgramCode& program_code, u32 main_offset) : program_code(program_code) { // Recursively finds all subroutines. @@ -70,7 +70,7 @@ public: } private: - const ProgramCode& program_code; + const Pica::Shader::ProgramCode& program_code; std::set subroutines; std::map, ExitMethod> exit_method_map; @@ -246,8 +246,9 @@ constexpr auto GetSelectorSrc3 = GetSelectorSrc<&SwizzlePattern::GetSelectorSrc3 class GLSLGenerator { public: - GLSLGenerator(const std::set& subroutines, const ProgramCode& program_code, - const SwizzleData& swizzle_data, u32 main_offset, + GLSLGenerator(const std::set& subroutines, + const Pica::Shader::ProgramCode& program_code, + const Pica::Shader::SwizzleData& swizzle_data, u32 main_offset, const RegGetter& inputreg_getter, const RegGetter& outputreg_getter, bool sanitize_mul) : subroutines(subroutines), program_code(program_code), swizzle_data(swizzle_data), @@ -865,8 +866,8 @@ private: private: const std::set& subroutines; - const ProgramCode& program_code; - const SwizzleData& swizzle_data; + const Pica::Shader::ProgramCode& program_code; + const Pica::Shader::SwizzleData& swizzle_data; const u32 main_offset; const RegGetter& inputreg_getter; const RegGetter& outputreg_getter; @@ -888,16 +889,17 @@ bool exec_shader(); )"; } -std::optional DecompileProgram(const ProgramCode& program_code, - const SwizzleData& swizzle_data, u32 main_offset, - const RegGetter& inputreg_getter, - const RegGetter& outputreg_getter, bool sanitize_mul) { +std::optional DecompileProgram(const Pica::Shader::ProgramCode& program_code, + const Pica::Shader::SwizzleData& swizzle_data, + u32 main_offset, const RegGetter& inputreg_getter, + const RegGetter& outputreg_getter, + bool sanitize_mul) { try { auto subroutines = ControlFlowAnalyzer(program_code, main_offset).MoveSubroutines(); GLSLGenerator generator(subroutines, program_code, swizzle_data, main_offset, inputreg_getter, outputreg_getter, sanitize_mul); - return generator.MoveShaderCode(); + return {ProgramResult{generator.MoveShaderCode()}}; } catch (const DecompileFail& exception) { LOG_INFO(HW_GPU, "Shader decompilation failed: {}", exception.what()); return {}; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 51befb91d..67499960e 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -11,15 +11,17 @@ namespace OpenGL::ShaderDecompiler { -using ProgramCode = std::array; -using SwizzleData = std::array; using RegGetter = std::function; +struct ProgramResult { + std::string code; +}; + std::string GetCommonDeclarations(); -std::optional DecompileProgram(const ProgramCode& program_code, - const SwizzleData& swizzle_data, u32 main_offset, - const RegGetter& inputreg_getter, - const RegGetter& outputreg_getter, bool sanitize_mul); +std::optional DecompileProgram(const Pica::Shader::ProgramCode& program_code, + const Pica::Shader::SwizzleData& swizzle_data, + u32 main_offset, const RegGetter& inputreg_getter, + const RegGetter& outputreg_getter, bool sanitize_mul); } // namespace OpenGL::ShaderDecompiler diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp new file mode 100644 index 000000000..1e8ca1041 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -0,0 +1,495 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "common/assert.h" +#include "common/common_paths.h" +#include "common/common_types.h" +#include "common/file_util.h" +#include "common/logging/log.h" +#include "common/scm_rev.h" +#include "common/zstd_compression.h" +#include "core/core.h" +#include "core/hle/kernel/process.h" +#include "core/settings.h" +#include "video_core/renderer_opengl/gl_shader_disk_cache.h" + +namespace OpenGL { + +constexpr std::size_t HASH_LENGTH = 64; +using ShaderCacheVersionHash = std::array; + +enum class TransferableEntryKind : u32 { + Raw, +}; + +enum class PrecompiledEntryKind : u32 { + Decompiled, + Dump, +}; + +constexpr u32 NativeVersion = 1; + +ShaderCacheVersionHash GetShaderCacheVersionHash() { + ShaderCacheVersionHash hash{}; + const std::size_t length = std::min(std::strlen(Common::g_shader_cache_version), hash.size()); + std::memcpy(hash.data(), Common::g_shader_cache_version, length); + return hash; +} + +ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, + RawShaderConfig config, ProgramCode program_code) + : unique_identifier{unique_identifier}, program_type{program_type}, config{config}, + program_code{std::move(program_code)} {} + +bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) { + if (file.ReadBytes(&unique_identifier, sizeof(u64)) != sizeof(u64) || + file.ReadBytes(&program_type, sizeof(u32)) != sizeof(u32)) { + return false; + } + + u64 reg_array_len{}; + if (file.ReadBytes(®_array_len, sizeof(u64)) != sizeof(u64)) { + return false; + } + + if (file.ReadArray(config.reg_array.data(), reg_array_len) != reg_array_len) { + return false; + } + + // Read in type specific configuration + if (program_type == ProgramType::VS) { + u64 code_len{}; + if (file.ReadBytes(&code_len, sizeof(u64)) != sizeof(u64)) { + return false; + } + program_code.resize(code_len); + if (file.ReadArray(program_code.data(), code_len) != code_len) { + return false; + } + } + + return true; +} + +bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const { + if (file.WriteObject(unique_identifier) != 1 || + file.WriteObject(static_cast(program_type)) != 1) { + return false; + } + + // Just for future proofing, save the sizes of the array to the file + const std::size_t reg_array_len = Pica::Regs::NUM_REGS; + if (file.WriteObject(static_cast(reg_array_len)) != 1) { + return false; + } + if (file.WriteArray(config.reg_array.data(), reg_array_len) != reg_array_len) { + return false; + } + + if (program_type == ProgramType::VS) { + const std::size_t code_len = program_code.size(); + if (file.WriteObject(static_cast(code_len)) != 1) { + return false; + } + if (file.WriteArray(program_code.data(), code_len) != code_len) { + return false; + } + } + return true; +} + +ShaderDiskCache::ShaderDiskCache(bool separable) : separable{separable} {} + +std::optional> ShaderDiskCache::LoadTransferable() { + const bool has_title_id = GetProgramID() != 0; + if (!Settings::values.use_disk_shader_cache || !has_title_id) + return {}; + tried_to_load = true; + + FileUtil::IOFile file(GetTransferablePath(), "rb"); + if (!file.IsOpen()) { + LOG_INFO(Render_OpenGL, "No transferable shader cache found for game with title id={}", + GetTitleID()); + return {}; + } + + u32 version{}; + if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) { + LOG_ERROR(Render_OpenGL, + "Failed to get transferable cache version for title id={} - skipping", + GetTitleID()); + return {}; + } + + if (version < NativeVersion) { + LOG_INFO(Render_OpenGL, "Transferable shader cache is old - removing"); + file.Close(); + InvalidateAll(); + return {}; + } + if (version > NativeVersion) { + LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version " + "of the emulator - skipping"); + return {}; + } + + // Version is valid, load the shaders + std::vector raws; + while (file.Tell() < file.GetSize()) { + TransferableEntryKind kind{}; + if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) { + LOG_ERROR(Render_OpenGL, "Failed to read transferable file - skipping"); + return {}; + } + + switch (kind) { + case TransferableEntryKind::Raw: { + ShaderDiskCacheRaw entry; + if (!entry.Load(file)) { + LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry - skipping"); + return {}; + } + transferable.emplace(entry.GetUniqueIdentifier(), ShaderDiskCacheRaw{}); + raws.push_back(std::move(entry)); + break; + } + default: + LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={} - skipping", + static_cast(kind)); + return {}; + } + } + + LOG_INFO(Render_OpenGL, "Found a transferable disk cache with {} entries", raws.size()); + return {raws}; +} + +std::pair, ShaderDumpsMap> +ShaderDiskCache::LoadPrecompiled() { + if (!IsUsable()) + return {}; + + FileUtil::IOFile file(GetPrecompiledPath(), "rb"); + if (!file.IsOpen()) { + LOG_INFO(Render_OpenGL, "No precompiled shader cache found for game with title id={}", + GetTitleID()); + return {}; + } + + const auto result = LoadPrecompiledFile(file); + if (!result) { + LOG_INFO(Render_OpenGL, + "Failed to load precompiled cache for game with title id={} - removing", + GetTitleID()); + file.Close(); + InvalidatePrecompiled(); + return {}; + } + return *result; +} + +std::optional, ShaderDumpsMap>> +ShaderDiskCache::LoadPrecompiledFile(FileUtil::IOFile& file) { + // Read compressed file from disk and decompress to virtual precompiled cache file + std::vector compressed(file.GetSize()); + file.ReadBytes(compressed.data(), compressed.size()); + const std::vector decompressed = Common::Compression::DecompressDataZSTD(compressed); + SaveArrayToPrecompiled(decompressed.data(), decompressed.size()); + decompressed_precompiled_cache_offset = 0; + + ShaderCacheVersionHash file_hash{}; + if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) { + return {}; + } + if (GetShaderCacheVersionHash() != file_hash) { + LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator"); + return {}; + } + + std::unordered_map decompiled; + ShaderDumpsMap dumps; + while (decompressed_precompiled_cache_offset < decompressed_precompiled_cache.size()) { + PrecompiledEntryKind kind{}; + if (!LoadObjectFromPrecompiled(kind)) { + return {}; + } + + switch (kind) { + case PrecompiledEntryKind::Decompiled: { + u64 unique_identifier{}; + if (!LoadObjectFromPrecompiled(unique_identifier)) { + return {}; + } + + auto entry = LoadDecompiledEntry(); + if (!entry) { + return {}; + } + decompiled.insert({unique_identifier, std::move(*entry)}); + break; + } + case PrecompiledEntryKind::Dump: { + u64 unique_identifier; + if (!LoadObjectFromPrecompiled(unique_identifier)) { + return {}; + } + + ShaderDiskCacheDump dump; + if (!LoadObjectFromPrecompiled(dump.binary_format)) { + return {}; + } + + u32 binary_length{}; + if (!LoadObjectFromPrecompiled(binary_length)) { + return {}; + } + + dump.binary.resize(binary_length); + if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) { + return {}; + } + + dumps.insert({unique_identifier, dump}); + break; + } + default: + return {}; + } + } + + LOG_INFO(Render_OpenGL, + "Found a precompiled disk cache with {} decompiled entries and {} binary entries", + decompiled.size(), dumps.size()); + return {{decompiled, dumps}}; +} + +std::optional ShaderDiskCache::LoadDecompiledEntry() { + + bool sanitize_mul; + if (!LoadObjectFromPrecompiled(sanitize_mul)) { + return {}; + } + + u32 code_size{}; + if (!LoadObjectFromPrecompiled(code_size)) { + return {}; + } + + std::string code(code_size, '\0'); + if (!LoadArrayFromPrecompiled(code.data(), code.size())) { + return {}; + } + + ShaderDiskCacheDecompiled entry; + entry.result.code = std::move(code); + entry.sanitize_mul = sanitize_mul; + + return entry; +} + +bool ShaderDiskCache::SaveDecompiledFile(u64 unique_identifier, + const ShaderDecompiler::ProgramResult& result, + bool sanitize_mul) { + if (!SaveObjectToPrecompiled(static_cast(PrecompiledEntryKind::Decompiled)) || + !SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(sanitize_mul) || + !SaveObjectToPrecompiled(static_cast(result.code.size())) || + !SaveArrayToPrecompiled(result.code.data(), result.code.size())) { + return false; + } + + return true; +} + +void ShaderDiskCache::InvalidateAll() { + if (!FileUtil::Delete(GetTransferablePath())) { + LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}", + GetTransferablePath()); + } + InvalidatePrecompiled(); +} + +void ShaderDiskCache::InvalidatePrecompiled() { + // Clear virtaul precompiled cache file + decompressed_precompiled_cache.resize(0); + + if (!FileUtil::Delete(GetPrecompiledPath())) { + LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}", GetPrecompiledPath()); + } +} + +void ShaderDiskCache::SaveRaw(const ShaderDiskCacheRaw& entry) { + if (!IsUsable()) + return; + + const u64 id = entry.GetUniqueIdentifier(); + if (transferable.find(id) != transferable.end()) { + // The shader already exists + return; + } + + FileUtil::IOFile file = AppendTransferableFile(); + if (!file.IsOpen()) + return; + if (file.WriteObject(TransferableEntryKind::Raw) != 1 || !entry.Save(file)) { + LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry - removing"); + file.Close(); + InvalidateAll(); + return; + } + transferable.insert({id, entry}); +} + +void ShaderDiskCache::SaveDecompiled(u64 unique_identifier, + const ShaderDecompiler::ProgramResult& code, + bool sanitize_mul) { + if (!IsUsable()) + return; + + if (decompressed_precompiled_cache.size() == 0) { + SavePrecompiledHeaderToVirtualPrecompiledCache(); + } + + if (!SaveDecompiledFile(unique_identifier, code, sanitize_mul)) { + LOG_ERROR(Render_OpenGL, + "Failed to save decompiled entry to the precompiled file - removing"); + InvalidatePrecompiled(); + } +} + +void ShaderDiskCache::SaveDump(u64 unique_identifier, GLuint program) { + if (!IsUsable()) + return; + + GLint binary_length{}; + glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length); + + GLenum binary_format{}; + std::vector binary(binary_length); + glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); + + if (!SaveObjectToPrecompiled(static_cast(PrecompiledEntryKind::Dump)) || + !SaveObjectToPrecompiled(unique_identifier) || + !SaveObjectToPrecompiled(static_cast(binary_format)) || + !SaveObjectToPrecompiled(static_cast(binary_length)) || + !SaveArrayToPrecompiled(binary.data(), binary.size())) { + LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016x} - removing", + unique_identifier); + InvalidatePrecompiled(); + return; + } +} + +bool ShaderDiskCache::IsUsable() const { + return tried_to_load && Settings::values.use_disk_shader_cache; +} + +FileUtil::IOFile ShaderDiskCache::AppendTransferableFile() { + if (!EnsureDirectories()) + return {}; + + const auto transferable_path{GetTransferablePath()}; + const bool existed = FileUtil::Exists(transferable_path); + + FileUtil::IOFile file(transferable_path, "ab"); + if (!file.IsOpen()) { + LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}", transferable_path); + return {}; + } + if (!existed || file.GetSize() == 0) { + // If the file didn't exist, write its version + if (file.WriteObject(NativeVersion) != 1) { + LOG_ERROR(Render_OpenGL, "Failed to write transferable cache version in path={}", + transferable_path); + return {}; + } + } + return file; +} + +void ShaderDiskCache::SavePrecompiledHeaderToVirtualPrecompiledCache() { + const auto hash{GetShaderCacheVersionHash()}; + if (!SaveArrayToPrecompiled(hash.data(), hash.size())) { + LOG_ERROR( + Render_OpenGL, + "Failed to write precompiled cache version hash to virtual precompiled cache file"); + } +} + +void ShaderDiskCache::SaveVirtualPrecompiledFile() { + decompressed_precompiled_cache_offset = 0; + const std::vector& compressed = Common::Compression::CompressDataZSTDDefault( + decompressed_precompiled_cache.data(), decompressed_precompiled_cache.size()); + + const auto precompiled_path{GetPrecompiledPath()}; + FileUtil::IOFile file(precompiled_path, "wb"); + + if (!file.IsOpen()) { + LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}", precompiled_path); + return; + } + if (file.WriteBytes(compressed.data(), compressed.size()) != compressed.size()) { + LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}", + precompiled_path); + return; + } +} + +bool ShaderDiskCache::EnsureDirectories() const { + const auto CreateDir = [](const std::string& dir) { + if (!FileUtil::CreateDir(dir)) { + LOG_ERROR(Render_OpenGL, "Failed to create directory={}", dir); + return false; + } + return true; + }; + + return CreateDir(FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir)) && + CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) && + CreateDir(GetPrecompiledDir()); +} + +std::string ShaderDiskCache::GetTransferablePath() { + return FileUtil::SanitizePath(GetTransferableDir() + DIR_SEP_CHR + GetTitleID() + ".bin"); +} + +std::string ShaderDiskCache::GetPrecompiledPath() { + return FileUtil::SanitizePath(GetPrecompiledDir() + DIR_SEP_CHR + GetTitleID() + ".bin"); +} + +std::string ShaderDiskCache::GetTransferableDir() const { + return GetBaseDir() + DIR_SEP "transferable"; +} + +std::string ShaderDiskCache::GetPrecompiledDir() const { + return GetBaseDir() + DIR_SEP "precompiled"; +} + +std::string ShaderDiskCache::GetBaseDir() const { + return FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir) + DIR_SEP "opengl"; +} + +u64 ShaderDiskCache::GetProgramID() { + // Skip games without title id + if (program_id != 0) { + return program_id; + } + if (Core::System::GetInstance().GetAppLoader().ReadProgramId(program_id) != + Loader::ResultStatus::Success) { + return 0; + } + return program_id; +} + +std::string ShaderDiskCache::GetTitleID() { + if (!title_id.empty()) { + return title_id; + } + title_id = fmt::format("{:016X}", GetProgramID()); + return title_id; +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h new file mode 100644 index 000000000..a3743d1a0 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -0,0 +1,218 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/regs.h" +#include "video_core/renderer_opengl/gl_shader_decompiler.h" +#include "video_core/renderer_opengl/gl_shader_gen.h" + +namespace Core { +class System; +} + +namespace FileUtil { +class IOFile; +} + +namespace OpenGL { + +struct ShaderDiskCacheDecompiled; +struct ShaderDiskCacheDump; + +using RawShaderConfig = Pica::Regs; +using ProgramCode = std::vector; +using ShaderDecompiledMap = std::unordered_map; +using ShaderDumpsMap = std::unordered_map; + +/// Describes a shader how it's used by the guest GPU +class ShaderDiskCacheRaw { +public: + explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, + RawShaderConfig config, ProgramCode program_code); + ShaderDiskCacheRaw() = default; + ~ShaderDiskCacheRaw() = default; + + bool Load(FileUtil::IOFile& file); + + bool Save(FileUtil::IOFile& file) const; + + u64 GetUniqueIdentifier() const { + return unique_identifier; + } + + ProgramType GetProgramType() const { + return program_type; + } + + const ProgramCode& GetProgramCode() const { + return program_code; + } + + const RawShaderConfig& GetRawShaderConfig() const { + return config; + } + +private: + u64 unique_identifier{}; + ProgramType program_type{}; + RawShaderConfig config{}; + ProgramCode program_code{}; +}; + +/// Contains decompiled data from a shader +struct ShaderDiskCacheDecompiled { + ShaderDecompiler::ProgramResult result; + bool sanitize_mul; +}; + +/// Contains an OpenGL dumped binary program +struct ShaderDiskCacheDump { + GLenum binary_format; + std::vector binary; +}; + +class ShaderDiskCache { +public: + explicit ShaderDiskCache(bool separable); + ~ShaderDiskCache() = default; + + /// Loads transferable cache. If file has a old version or on failure, it deletes the file. + std::optional> LoadTransferable(); + + /// Loads current game's precompiled cache. Invalidates on failure. + std::pair LoadPrecompiled(); + + /// Removes the transferable (and precompiled) cache file. + void InvalidateAll(); + + /// Removes the precompiled cache file and clears virtual precompiled cache file. + void InvalidatePrecompiled(); + + /// Saves a raw dump to the transferable file. Checks for collisions. + void SaveRaw(const ShaderDiskCacheRaw& entry); + + /// Saves a decompiled entry to the precompiled file. Does not check for collisions. + void SaveDecompiled(u64 unique_identifier, const ShaderDecompiler::ProgramResult& code, + bool sanitize_mul); + + /// Saves a dump entry to the precompiled file. Does not check for collisions. + void SaveDump(u64 unique_identifier, GLuint program); + + /// Serializes virtual precompiled shader cache file to real file + void SaveVirtualPrecompiledFile(); + +private: + /// Loads the transferable cache. Returns empty on failure. + std::optional> LoadPrecompiledFile( + FileUtil::IOFile& file); + + /// Loads a decompiled cache entry from m_precompiled_cache_virtual_file. Returns empty on + /// failure. + std::optional LoadDecompiledEntry(); + + /// Saves a decompiled entry to the passed file. Returns true on success. + bool SaveDecompiledFile(u64 unique_identifier, const ShaderDecompiler::ProgramResult& code, + bool sanitize_mul); + + /// Returns if the cache can be used + bool IsUsable() const; + + /// Opens current game's transferable file and write it's header if it doesn't exist + FileUtil::IOFile AppendTransferableFile(); + + /// Save precompiled header to precompiled_cache_in_memory + void SavePrecompiledHeaderToVirtualPrecompiledCache(); + + /// Create shader disk cache directories. Returns true on success. + bool EnsureDirectories() const; + + /// Gets current game's transferable file path + std::string GetTransferablePath(); + + /// Gets current game's precompiled file path + std::string GetPrecompiledPath(); + + /// Get user's transferable directory path + std::string GetTransferableDir() const; + + /// Get user's precompiled directory path + std::string GetPrecompiledDir() const; + + /// Get user's shader directory path + std::string GetBaseDir() const; + + /// Get current game's title id as u64 + u64 GetProgramID(); + + /// Get current game's title id + std::string GetTitleID(); + + template + bool SaveArrayToPrecompiled(const T* data, std::size_t length) { + const u8* data_view = reinterpret_cast(data); + decompressed_precompiled_cache.insert(decompressed_precompiled_cache.end(), &data_view[0], + &data_view[length * sizeof(T)]); + decompressed_precompiled_cache_offset += length * sizeof(T); + return true; + } + + template + bool LoadArrayFromPrecompiled(T* data, std::size_t length) { + u8* data_view = reinterpret_cast(data); + std::copy_n(decompressed_precompiled_cache.data() + decompressed_precompiled_cache_offset, + length * sizeof(T), data_view); + decompressed_precompiled_cache_offset += length * sizeof(T); + return true; + } + + template + bool SaveObjectToPrecompiled(const T& object) { + return SaveArrayToPrecompiled(&object, 1); + } + + bool SaveObjectToPrecompiled(bool object) { + const auto value = static_cast(object); + return SaveArrayToPrecompiled(&value, 1); + } + + template + bool LoadObjectFromPrecompiled(T& object) { + return LoadArrayFromPrecompiled(&object, 1); + } + + // Stores whole precompiled cache which will be read from or saved to the precompiled chache + // file + std::vector decompressed_precompiled_cache; + // Stores the current offset of the precompiled cache file for IO purposes + std::size_t decompressed_precompiled_cache_offset = 0; + + // Stored transferable shaders + std::unordered_map transferable; + + // The cache has been loaded at boot + bool tried_to_load{}; + + bool separable{}; + + u64 program_id{}; + std::string title_id; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 377b9915b..08015d5fc 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -1231,7 +1231,8 @@ float ProcTexNoiseCoef(vec2 x) { } } -std::string GenerateFragmentShader(const PicaFSConfig& config, bool separable_shader) { +ShaderDecompiler::ProgramResult GenerateFragmentShader(const PicaFSConfig& config, + bool separable_shader) { const auto& state = config.state; std::string out = R"( @@ -1482,7 +1483,7 @@ vec4 secondary_fragment_color = vec4(0.0); // Do not do any sort of processing if it's obvious we're not going to pass the alpha test if (state.alpha_test_func == FramebufferRegs::CompareFunc::Never) { out += "discard; }"; - return out; + return {out}; } // Append the scissor test @@ -1546,7 +1547,7 @@ vec4 secondary_fragment_color = vec4(0.0); "VideoCore_Pica_UseGasMode", true); LOG_CRITICAL(Render_OpenGL, "Unimplemented gas mode"); out += "discard; }"; - return out; + return {out}; } if (state.shadow_rendering) { @@ -1584,10 +1585,10 @@ do { out += "}"; - return out; + return {out}; } -std::string GenerateTrivialVertexShader(bool separable_shader) { +ShaderDecompiler::ProgramResult GenerateTrivialVertexShader(bool separable_shader) { std::string out = ""; if (separable_shader) { out += "#extension GL_ARB_separate_shader_objects : enable\n"; @@ -1630,11 +1631,11 @@ void main() { } )"; - return out; + return {out}; } -std::optional GenerateVertexShader(const Pica::Shader::ShaderSetup& setup, - const PicaVSConfig& config, bool separable_shader) { +std::optional GenerateVertexShader( + const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config, bool separable_shader) { std::string out = ""; if (separable_shader) { out += "#extension GL_ARB_separate_shader_objects : enable\n"; @@ -1664,7 +1665,7 @@ std::optional GenerateVertexShader(const Pica::Shader::ShaderSetup& if (!program_source_opt) return {}; - std::string& program_source = *program_source_opt; + std::string& program_source = program_source_opt->code; out += R"( #define uniforms vs_uniforms @@ -1696,7 +1697,7 @@ layout (std140) uniform vs_config { out += program_source; - return out; + return {{out}}; } static std::string GetGSCommonSource(const PicaGSConfigCommonRaw& config, bool separable_shader) { @@ -1784,7 +1785,8 @@ void EmitPrim(Vertex vtx0, Vertex vtx1, Vertex vtx2) { return out; }; -std::string GenerateFixedGeometryShader(const PicaFixedGSConfig& config, bool separable_shader) { +ShaderDecompiler::ProgramResult GenerateFixedGeometryShader(const PicaFixedGSConfig& config, + bool separable_shader) { std::string out = ""; if (separable_shader) { out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; @@ -1814,6 +1816,6 @@ void main() { out += " EmitPrim(prim_buffer[0], prim_buffer[1], prim_buffer[2]);\n"; out += "}\n"; - return out; + return {out}; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index 4f798dd07..ebf54fe9f 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -16,6 +16,12 @@ namespace OpenGL { +namespace ShaderDecompiler { +struct ProgramResult; +} + +enum class ProgramType : u32 { VS, GS, FS }; + enum Attributes { ATTRIBUTE_POSITION, ATTRIBUTE_COLOR, @@ -161,8 +167,11 @@ struct PicaShaderConfigCommon { * shader. */ struct PicaVSConfig : Common::HashableStruct { - explicit PicaVSConfig(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup) { - state.Init(regs.vs, setup); + explicit PicaVSConfig(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) { + state.Init(regs, setup); + } + explicit PicaVSConfig(PicaShaderConfigCommon& conf) { + state = conf; } }; @@ -197,20 +206,21 @@ struct PicaFixedGSConfig : Common::HashableStruct { * @param separable_shader generates shader that can be used for separate shader object * @returns String of the shader source code */ -std::string GenerateTrivialVertexShader(bool separable_shader); +ShaderDecompiler::ProgramResult GenerateTrivialVertexShader(bool separable_shader); /** * Generates the GLSL vertex shader program source code for the given VS program * @returns String of the shader source code; boost::none on failure */ -std::optional GenerateVertexShader(const Pica::Shader::ShaderSetup& setup, - const PicaVSConfig& config, bool separable_shader); +std::optional GenerateVertexShader( + const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config, bool separable_shader); /* * Generates the GLSL fixed geometry shader program source code for non-GS PICA pipeline * @returns String of the shader source code */ -std::string GenerateFixedGeometryShader(const PicaFixedGSConfig& config, bool separable_shader); +ShaderDecompiler::ProgramResult GenerateFixedGeometryShader(const PicaFixedGSConfig& config, + bool separable_shader); /** * Generates the GLSL fragment shader program source code for the current Pica state @@ -219,7 +229,8 @@ std::string GenerateFixedGeometryShader(const PicaFixedGSConfig& config, bool se * @param separable_shader generates shader that can be used for separate shader object * @returns String of the shader source code */ -std::string GenerateFragmentShader(const PicaFSConfig& config, bool separable_shader); +ShaderDecompiler::ProgramResult GenerateFragmentShader(const PicaFSConfig& config, + bool separable_shader); } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index c0172c900..d51e4be56 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -3,13 +3,80 @@ // Refer to the license.txt file included. #include +#include #include #include #include +#include "core/core.h" +#include "video_core/renderer_opengl/gl_shader_disk_cache.h" #include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/video_core.h" namespace OpenGL { +static u64 GetUniqueIdentifier(const Pica::Regs& regs, const ProgramCode& code) { + std::size_t hash = 0; + u64 regs_uid = Common::ComputeHash64(regs.reg_array.data(), Pica::Regs::NUM_REGS * sizeof(u32)); + boost::hash_combine(hash, regs_uid); + if (code.size() > 0) { + u64 code_uid = Common::ComputeHash64(code.data(), code.size() * sizeof(u32)); + boost::hash_combine(hash, code_uid); + } + return static_cast(hash); +} + +static OGLProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump, + const std::set& supported_formats) { + + if (supported_formats.find(dump.binary_format) == supported_formats.end()) { + LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing"); + return {}; + } + + auto shader = OGLProgram(); + shader.handle = glCreateProgram(); + glProgramParameteri(shader.handle, GL_PROGRAM_SEPARABLE, GL_TRUE); + glProgramBinary(shader.handle, dump.binary_format, dump.binary.data(), + static_cast(dump.binary.size())); + + GLint link_status{}; + glGetProgramiv(shader.handle, GL_LINK_STATUS, &link_status); + if (link_status == GL_FALSE) { + LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver - removing"); + return {}; + } + + return shader; +} + +static std::set GetSupportedFormats() { + std::set supported_formats; + + GLint num_formats{}; + glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); + + std::vector formats(num_formats); + glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); + + for (const GLint format : formats) + supported_formats.insert(static_cast(format)); + return supported_formats; +} + +static std::tuple BuildVSConfigFromRaw( + const ShaderDiskCacheRaw& raw) { + Pica::Shader::ProgramCode program_code{}; + Pica::Shader::SwizzleData swizzle_data{}; + std::copy_n(raw.GetProgramCode().begin(), Pica::Shader::MAX_PROGRAM_CODE_LENGTH, + program_code.begin()); + std::copy_n(raw.GetProgramCode().begin() + Pica::Shader::MAX_PROGRAM_CODE_LENGTH, + Pica::Shader::MAX_SWIZZLE_DATA_LENGTH, swizzle_data.begin()); + Pica::Shader::ShaderSetup setup; + setup.program_code = program_code; + setup.swizzle_data = swizzle_data; + return {PicaVSConfig{raw.GetRawShaderConfig().vs, setup}, setup}; +} + static void SetShaderUniformBlockBinding(GLuint shader, const char* name, UniformBindings binding, std::size_t expected_size) { const GLuint ub_index = glGetUniformBlockIndex(shader, name); @@ -121,6 +188,12 @@ public: } } + void Inject(OGLProgram&& program) { + SetShaderUniformBlockBindings(program.handle); + SetShaderSamplerBindings(program.handle); + shader_or_program = std::move(program); + } + private: boost::variant shader_or_program; }; @@ -128,7 +201,7 @@ private: class TrivialVertexShader { public: explicit TrivialVertexShader(bool separable) : program(separable) { - program.Create(GenerateTrivialVertexShader(separable).c_str(), GL_VERTEX_SHADER); + program.Create(GenerateTrivialVertexShader(separable).code.c_str(), GL_VERTEX_SHADER); } GLuint Get() const { return program.GetHandle(); @@ -138,18 +211,28 @@ private: OGLShaderStage program; }; -template class ShaderCache { public: explicit ShaderCache(bool separable) : separable(separable) {} - GLuint Get(const KeyConfigType& config) { + std::tuple> Get( + const KeyConfigType& config) { auto [iter, new_shader] = shaders.emplace(config, OGLShaderStage{separable}); OGLShaderStage& cached_shader = iter->second; + std::optional result{}; if (new_shader) { - cached_shader.Create(CodeGenerator(config, separable).c_str(), ShaderType); + result = CodeGenerator(config, separable); + cached_shader.Create(result->code.c_str(), ShaderType); } - return cached_shader.GetHandle(); + return {cached_shader.GetHandle(), result}; + } + + void Inject(const KeyConfigType& key, std::string decomp, OGLProgram&& program) { + OGLShaderStage stage{separable}; + stage.Inject(std::move(program)); + shaders.emplace(key, std::move(stage)); } private: @@ -163,36 +246,47 @@ private: // program buffer from the previous shader, which is hashed into the config, resulting several // different config values from the same shader program. template (*CodeGenerator)(const Pica::Shader::ShaderSetup&, - const KeyConfigType&, bool), + std::optional (*CodeGenerator)( + const Pica::Shader::ShaderSetup&, const KeyConfigType&, bool), GLenum ShaderType> class ShaderDoubleCache { public: explicit ShaderDoubleCache(bool separable) : separable(separable) {} - GLuint Get(const KeyConfigType& key, const Pica::Shader::ShaderSetup& setup) { + std::tuple> Get( + const KeyConfigType& key, const Pica::Shader::ShaderSetup& setup) { + std::optional result{}; auto map_it = shader_map.find(key); if (map_it == shader_map.end()) { auto program_opt = CodeGenerator(setup, key, separable); if (!program_opt) { shader_map[key] = nullptr; - return 0; + return {0, {}}; } - std::string& program = *program_opt; + std::string& program = program_opt->code; auto [iter, new_shader] = shader_cache.emplace(program, OGLShaderStage{separable}); OGLShaderStage& cached_shader = iter->second; if (new_shader) { + result->code = program; cached_shader.Create(program.c_str(), ShaderType); } shader_map[key] = &cached_shader; - return cached_shader.GetHandle(); + return {cached_shader.GetHandle(), result}; } if (map_it->second == nullptr) { - return 0; + return {0, {}}; } - return map_it->second->GetHandle(); + return {map_it->second->GetHandle(), {}}; + } + + void Inject(const KeyConfigType& key, std::string decomp, OGLProgram&& program) { + OGLShaderStage stage{separable}; + stage.Inject(std::move(program)); + auto [iter, new_shader] = shader_cache.emplace(decomp, std::move(stage)); + OGLShaderStage& cached_shader = iter->second; + shader_map[key] = &cached_shader; } private: @@ -214,7 +308,7 @@ public: explicit Impl(bool separable, bool is_amd) : is_amd(is_amd), separable(separable), programmable_vertex_shaders(separable), trivial_vertex_shader(separable), fixed_geometry_shaders(separable), - fragment_shaders(separable) { + fragment_shaders(separable), disk_cache(separable) { if (separable) pipeline.Create(); } @@ -244,6 +338,7 @@ public: }; bool is_amd; + bool separable; ShaderTuple current; @@ -253,10 +348,9 @@ public: FixedGeometryShaders fixed_geometry_shaders; FragmentShaders fragment_shaders; - - bool separable; std::unordered_map program_cache; OGLPipeline pipeline; + ShaderDiskCache disk_cache; }; ShaderProgramManager::ShaderProgramManager(bool separable, bool is_amd) @@ -264,12 +358,23 @@ ShaderProgramManager::ShaderProgramManager(bool separable, bool is_amd) ShaderProgramManager::~ShaderProgramManager() = default; -bool ShaderProgramManager::UseProgrammableVertexShader(const PicaVSConfig& config, - const Pica::Shader::ShaderSetup setup) { - GLuint handle = impl->programmable_vertex_shaders.Get(config, setup); +bool ShaderProgramManager::UseProgrammableVertexShader(const Pica::Regs& regs, + Pica::Shader::ShaderSetup& setup) { + PicaVSConfig config{regs.vs, setup}; + auto [handle, result] = impl->programmable_vertex_shaders.Get(config, setup); if (handle == 0) return false; impl->current.vs = handle; + // Save VS to the disk cache if its a new shader + if (result) { + auto& disk_cache = impl->disk_cache; + ProgramCode program_code{setup.program_code.begin(), setup.program_code.end()}; + program_code.insert(program_code.end(), setup.swizzle_data.begin(), + setup.swizzle_data.end()); + u64 unique_identifier = GetUniqueIdentifier(regs, program_code); + ShaderDiskCacheRaw raw{unique_identifier, ProgramType::VS, regs, program_code}; + disk_cache.SaveRaw(raw); + } return true; } @@ -277,25 +382,36 @@ void ShaderProgramManager::UseTrivialVertexShader() { impl->current.vs = impl->trivial_vertex_shader.Get(); } -void ShaderProgramManager::UseFixedGeometryShader(const PicaFixedGSConfig& config) { - impl->current.gs = impl->fixed_geometry_shaders.Get(config); +void ShaderProgramManager::UseFixedGeometryShader(const Pica::Regs& regs) { + PicaFixedGSConfig gs_config(regs); + auto [handle, _] = impl->fixed_geometry_shaders.Get(gs_config); + impl->current.gs = handle; } void ShaderProgramManager::UseTrivialGeometryShader() { impl->current.gs = 0; } -void ShaderProgramManager::UseFragmentShader(const PicaFSConfig& config) { - impl->current.fs = impl->fragment_shaders.Get(config); +void ShaderProgramManager::UseFragmentShader(const Pica::Regs& regs) { + PicaFSConfig config = PicaFSConfig::BuildFromRegs(regs); + auto [handle, result] = impl->fragment_shaders.Get(config); + impl->current.fs = handle; + // Save FS to the disk cache if its a new shader + if (result) { + auto& disk_cache = impl->disk_cache; + u64 unique_identifier = GetUniqueIdentifier(regs, {}); + ShaderDiskCacheRaw raw{unique_identifier, ProgramType::FS, regs, {}}; + disk_cache.SaveRaw(raw); + disk_cache.SaveDecompiled(unique_identifier, *result, false); + } } void ShaderProgramManager::ApplyTo(OpenGLState& state) { if (impl->separable) { if (impl->is_amd) { - // Without this reseting, AMD sometimes freezes when one stage is changed but not for - // the others. - // On the other hand, including this reset seems to introduce memory leak in Intel - // Graphics. + // Without this reseting, AMD sometimes freezes when one stage is changed but not + // for the others. On the other hand, including this reset seems to introduce memory + // leak in Intel Graphics. glUseProgramStages( impl->pipeline.handle, GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT, 0); @@ -316,4 +432,192 @@ void ShaderProgramManager::ApplyTo(OpenGLState& state) { state.draw.shader_program = cached_program.handle; } } + +void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) { + if (!impl->separable) { + LOG_ERROR(Render_OpenGL, + "Cannot load disk cache as separate shader programs are unsupported!"); + return; + } + auto& disk_cache = impl->disk_cache; + const auto transferable = disk_cache.LoadTransferable(); + if (!transferable) { + return; + } + const auto raws = *transferable; + + auto [decompiled, dumps] = disk_cache.LoadPrecompiled(); + + if (stop_loading) { + return; + } + + std::set supported_formats = GetSupportedFormats(); + + // Track if precompiled cache was altered during loading to know if we have to serialize the + // virtual precompiled cache file back to the hard drive + bool precompiled_cache_altered = false; + + std::mutex mutex; + std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex + std::atomic_bool compilation_failed = false; + if (callback) { + callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size()); + } + std::vector load_raws_index; + // Loads both decompiled and precompiled shaders from the cache. If either one is missing for + const auto LoadPrecompiledWorker = + [&](std::size_t begin, std::size_t end, const std::vector& raws, + const ShaderDecompiledMap& decompiled, const ShaderDumpsMap& dumps) { + for (std::size_t i = 0; i < end; ++i) { + if (stop_loading || compilation_failed) { + return; + } + const auto& raw{raws[i]}; + const u64 unique_identifier{raw.GetUniqueIdentifier()}; + + const u64 calculated_hash = + GetUniqueIdentifier(raw.GetRawShaderConfig(), raw.GetProgramCode()); + if (unique_identifier != calculated_hash) { + LOG_ERROR(Render_OpenGL, + "Invalid hash in entry={:016x} (obtained hash={:016x}) - removing " + "shader cache", + raw.GetUniqueIdentifier(), calculated_hash); + disk_cache.InvalidateAll(); + return; + } + + const auto dump{dumps.find(unique_identifier)}; + const auto decomp{decompiled.find(unique_identifier)}; + + OGLProgram shader; + + if (dump != dumps.end() && decomp != decompiled.end()) { + // Only load this shader if its sanitize_mul setting matches + if (decomp->second.sanitize_mul == VideoCore::g_hw_shader_accurate_mul) { + continue; + } + + // If the shader is dumped, attempt to load it + shader = GeneratePrecompiledProgram(dump->second, supported_formats); + if (shader.handle == 0) { + // If any shader failed, stop trying to compile, delete the cache, and start + // loading from raws + compilation_failed = true; + return; + } + // we have both the binary shader and the decompiled, so inject it into the + // cache + if (raw.GetProgramType() == ProgramType::VS) { + auto [conf, setup] = BuildVSConfigFromRaw(raw); + std::scoped_lock lock(mutex); + + impl->programmable_vertex_shaders.Inject(conf, decomp->second.result.code, + std::move(shader)); + } else if (raw.GetProgramType() == ProgramType::FS) { + PicaFSConfig conf = PicaFSConfig::BuildFromRegs(raw.GetRawShaderConfig()); + std::scoped_lock lock(mutex); + impl->fragment_shaders.Inject(conf, decomp->second.result.code, + std::move(shader)); + } else { + // Unsupported shader type got stored somehow so nuke the cache + + LOG_CRITICAL(Frontend, "failed to load raw programtype {}", + static_cast(raw.GetProgramType())); + compilation_failed = true; + return; + } + } else { + // Since precompiled didn't have the dump, we'll load them in the next phase + std::scoped_lock lock(mutex); + load_raws_index.push_back(i); + } + if (callback) { + callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size()); + } + } + }; + + LoadPrecompiledWorker(0, raws.size(), raws, decompiled, dumps); + + if (compilation_failed) { + // Invalidate the precompiled cache if a shader dumped shader was rejected + disk_cache.InvalidatePrecompiled(); + dumps.clear(); + precompiled_cache_altered = true; + } + + if (callback) { + callback(VideoCore::LoadCallbackStage::Build, 0, raws.size()); + } + + compilation_failed = false; + + const auto LoadTransferable = [&](std::size_t begin, std::size_t end, + const std::vector& raws) { + for (std::size_t i = 0; i < end; ++i) { + if (stop_loading || compilation_failed) { + return; + } + const auto& raw{raws[i]}; + const u64 unique_identifier{raw.GetUniqueIdentifier()}; + + bool sanitize_mul = false; + GLuint handle{0}; + std::optional result; + // Otherwise decompile and build the shader at boot and save the result to the + // precompiled file + if (raw.GetProgramType() == ProgramType::VS) { + // TODO: This isn't the ideal place to lock, since we really only want to + // lock access to the shared cache + auto [conf, setup] = BuildVSConfigFromRaw(raw); + std::scoped_lock lock(mutex); + auto [h, r] = impl->programmable_vertex_shaders.Get(conf, setup); + handle = h; + result = r; + sanitize_mul = conf.state.sanitize_mul; + } else if (raw.GetProgramType() == ProgramType::FS) { + PicaFSConfig conf = PicaFSConfig::BuildFromRegs(raw.GetRawShaderConfig()); + std::scoped_lock lock(mutex); + auto [h, r] = impl->fragment_shaders.Get(conf); + handle = h; + result = r; + } else { + // Unsupported shader type got stored somehow so nuke the cache + LOG_ERROR(Frontend, "failed to load raw programtype {}", + static_cast(raw.GetProgramType())); + compilation_failed = true; + return; + } + if (handle == 0) { + LOG_ERROR(Frontend, "compilation from raw failed {:x} {:x}", + raw.GetProgramCode().at(0), raw.GetProgramCode().at(1)); + compilation_failed = true; + return; + } + // If this is a new shader, add it the precompiled cache + if (result) { + disk_cache.SaveDecompiled(unique_identifier, *result, sanitize_mul); + disk_cache.SaveDump(unique_identifier, handle); + precompiled_cache_altered = true; + } + + if (callback) { + callback(VideoCore::LoadCallbackStage::Build, i, raws.size()); + } + } + }; + + LoadTransferable(0, raws.size(), raws); + + if (compilation_failed) { + disk_cache.InvalidateAll(); + } + + if (precompiled_cache_altered) { + disk_cache.SaveVirtualPrecompiledFile(); + } +} // namespace OpenGL + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index a31fbc4e6..94149415c 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -6,12 +6,17 @@ #include #include +#include "video_core/rasterizer_interface.h" #include "video_core/regs_lighting.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_gen.h" #include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/pica_to_gl.h" +namespace Core { +class System; +} + namespace OpenGL { enum class UniformBindings : GLuint { Common, VS, GS }; @@ -97,16 +102,18 @@ public: ShaderProgramManager(bool separable, bool is_amd); ~ShaderProgramManager(); - bool UseProgrammableVertexShader(const PicaVSConfig& config, - const Pica::Shader::ShaderSetup setup); + void LoadDiskCache(const std::atomic_bool& stop_loading, + const VideoCore::DiskResourceLoadCallback& callback); + + bool UseProgrammableVertexShader(const Pica::Regs& config, Pica::Shader::ShaderSetup& setup); void UseTrivialVertexShader(); - void UseFixedGeometryShader(const PicaFixedGSConfig& config); + void UseFixedGeometryShader(const Pica::Regs& regs); void UseTrivialGeometryShader(); - void UseFragmentShader(const PicaFSConfig& config); + void UseFragmentShader(const Pica::Regs& config); void ApplyTo(OpenGLState& state); diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index ecf0d7a68..b1fe38a7e 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -1039,9 +1039,9 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum } /// Initialize the renderer -Core::System::ResultStatus RendererOpenGL::Init() { +VideoCore::ResultStatus RendererOpenGL::Init() { if (!gladLoadGL()) { - return Core::System::ResultStatus::ErrorVideoCore_ErrorBelowGL33; + return VideoCore::ResultStatus::ErrorBelowGL33; } if (GLAD_GL_KHR_debug) { @@ -1063,18 +1063,18 @@ Core::System::ResultStatus RendererOpenGL::Init() { telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version); if (!strcmp(gpu_vendor, "GDI Generic")) { - return Core::System::ResultStatus::ErrorVideoCore_ErrorGenericDrivers; + return VideoCore::ResultStatus::ErrorGenericDrivers; } if (!(GLAD_GL_VERSION_3_3 || GLAD_GL_ES_VERSION_3_1)) { - return Core::System::ResultStatus::ErrorVideoCore_ErrorBelowGL33; + return VideoCore::ResultStatus::ErrorBelowGL33; } InitOpenGLObjects(); RefreshRasterizerSetting(); - return Core::System::ResultStatus::Success; + return VideoCore::ResultStatus::Success; } /// Shutdown the renderer diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index a9417853b..465ebfb6e 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -48,7 +48,7 @@ public: ~RendererOpenGL() override; /// Initialize the renderer - Core::System::ResultStatus Init() override; + VideoCore::ResultStatus Init() override; /// Shutdown the renderer void ShutDown() override; diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 68b7542fe..bb6a5fae7 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -26,6 +26,8 @@ namespace Pica::Shader { constexpr unsigned MAX_PROGRAM_CODE_LENGTH = 4096; constexpr unsigned MAX_SWIZZLE_DATA_LENGTH = 4096; +using ProgramCode = std::array; +using SwizzleData = std::array; struct AttributeBuffer { alignas(16) Common::Vec4 attr[16]; @@ -196,8 +198,8 @@ struct Uniforms { struct ShaderSetup { Uniforms uniforms; - std::array program_code; - std::array swizzle_data; + ProgramCode program_code; + SwizzleData swizzle_data; /// Data private to ShaderEngines struct EngineData { diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index 2b2dffcd3..b7e1d4885 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -22,6 +22,7 @@ std::atomic g_hw_renderer_enabled; std::atomic g_shader_jit_enabled; std::atomic g_hw_shader_enabled; std::atomic g_hw_shader_accurate_mul; +std::atomic g_use_disk_shader_cache; std::atomic g_renderer_bg_color_update_requested; std::atomic g_renderer_sampler_update_requested; std::atomic g_renderer_shader_update_requested; @@ -34,16 +35,16 @@ Layout::FramebufferLayout g_screenshot_framebuffer_layout; Memory::MemorySystem* g_memory; /// Initialize the video core -Core::System::ResultStatus Init(Frontend::EmuWindow& emu_window, Memory::MemorySystem& memory) { +ResultStatus Init(Frontend::EmuWindow& emu_window, Memory::MemorySystem& memory) { g_memory = &memory; Pica::Init(); OpenGL::GLES = Settings::values.use_gles; g_renderer = std::make_unique(emu_window); - Core::System::ResultStatus result = g_renderer->Init(); + ResultStatus result = g_renderer->Init(); - if (result != Core::System::ResultStatus::Success) { + if (result != ResultStatus::Success) { LOG_ERROR(Render, "initialization failed !"); } else { LOG_DEBUG(Render, "initialized OK"); diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h index fdf99add8..f11b67839 100644 --- a/src/video_core/video_core.h +++ b/src/video_core/video_core.h @@ -6,7 +6,6 @@ #include #include -#include "core/core.h" #include "core/frontend/emu_window.h" namespace Frontend { @@ -32,6 +31,7 @@ extern std::atomic g_hw_renderer_enabled; extern std::atomic g_shader_jit_enabled; extern std::atomic g_hw_shader_enabled; extern std::atomic g_hw_shader_accurate_mul; +extern std::atomic g_use_disk_shader_cache; extern std::atomic g_renderer_bg_color_update_requested; extern std::atomic g_renderer_sampler_update_requested; extern std::atomic g_renderer_shader_update_requested; @@ -43,8 +43,14 @@ extern Layout::FramebufferLayout g_screenshot_framebuffer_layout; extern Memory::MemorySystem* g_memory; +enum class ResultStatus { + Success, + ErrorGenericDrivers, + ErrorBelowGL33, +}; + /// Initialize the video core -Core::System::ResultStatus Init(Frontend::EmuWindow& emu_window, Memory::MemorySystem& memory); +ResultStatus Init(Frontend::EmuWindow& emu_window, Memory::MemorySystem& memory); /// Shutdown the video core void Shutdown();