Backport non-seperable disk shader from android (#5976)

* VideoCore: Backport Shader cache for non separable shaders

android: disk_shader_cache: remove redundant lookup

* disk_shader_cache: dont compile dumped shaders

* cleanup Seperable shader loading

* gl_shader_disk_cache.cpp: remove redundant checks

* Skip compiling non seperable raws

* gl_shader_manager.cpp: Skip `GLAD_GL_ARB_get_program_binary` check on GLES
This commit is contained in:
SachinVin 2022-04-02 11:57:14 +05:30 committed by GitHub
parent e31f5727f7
commit b5ac9e4946
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 351 additions and 154 deletions

View file

@ -22,7 +22,7 @@ void RendererBase::RefreshRasterizerSetting() {
opengl_rasterizer_active = hw_renderer_enabled; opengl_rasterizer_active = hw_renderer_enabled;
if (hw_renderer_enabled) { if (hw_renderer_enabled) {
rasterizer = std::make_unique<OpenGL::RasterizerOpenGL>(); rasterizer = std::make_unique<OpenGL::RasterizerOpenGL>(render_window);
} else { } else {
rasterizer = std::make_unique<VideoCore::SWRasterizer>(); rasterizer = std::make_unique<VideoCore::SWRasterizer>();
} }

View file

@ -48,7 +48,7 @@ static bool IsVendorIntel() {
return gpu_vendor == "Intel Inc."; return gpu_vendor == "Intel Inc.";
} }
RasterizerOpenGL::RasterizerOpenGL() RasterizerOpenGL::RasterizerOpenGL(Frontend::EmuWindow& emu_window)
: is_amd(IsVendorAmd()), vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE, is_amd), : is_amd(IsVendorAmd()), vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE, is_amd),
uniform_buffer(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE, false), uniform_buffer(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE, false),
index_buffer(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE, false), index_buffer(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE, false),
@ -172,15 +172,16 @@ RasterizerOpenGL::RasterizerOpenGL()
#ifdef __APPLE__ #ifdef __APPLE__
if (IsVendorIntel()) { if (IsVendorIntel()) {
shader_program_manager = std::make_unique<ShaderProgramManager>( shader_program_manager = std::make_unique<ShaderProgramManager>(
emu_window,
VideoCore::g_separable_shader_enabled ? GLAD_GL_ARB_separate_shader_objects : false, VideoCore::g_separable_shader_enabled ? GLAD_GL_ARB_separate_shader_objects : false,
is_amd); is_amd);
} else { } else {
shader_program_manager = shader_program_manager = std::make_unique<ShaderProgramManager>(
std::make_unique<ShaderProgramManager>(GLAD_GL_ARB_separate_shader_objects, is_amd); emu_window, GLAD_GL_ARB_separate_shader_objects, is_amd);
} }
#else #else
shader_program_manager = shader_program_manager = std::make_unique<ShaderProgramManager>(
std::make_unique<ShaderProgramManager>(GLAD_GL_ARB_separate_shader_objects, is_amd); emu_window, GLAD_GL_ARB_separate_shader_objects, is_amd);
#endif #endif
glEnable(GL_BLEND); glEnable(GL_BLEND);

View file

@ -38,7 +38,7 @@ class ShaderProgramManager;
class RasterizerOpenGL : public VideoCore::RasterizerInterface { class RasterizerOpenGL : public VideoCore::RasterizerInterface {
public: public:
explicit RasterizerOpenGL(); explicit RasterizerOpenGL(Frontend::EmuWindow& emu_window);
~RasterizerOpenGL() override; ~RasterizerOpenGL() override;
void LoadDiskResources(const std::atomic_bool& stop_loading, void LoadDiskResources(const std::atomic_bool& stop_loading,

View file

@ -171,7 +171,7 @@ std::optional<std::vector<ShaderDiskCacheRaw>> ShaderDiskCache::LoadTransferable
} }
std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap> std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap>
ShaderDiskCache::LoadPrecompiled() { ShaderDiskCache::LoadPrecompiled(bool compressed) {
if (!IsUsable()) if (!IsUsable())
return {}; return {};
@ -182,7 +182,7 @@ ShaderDiskCache::LoadPrecompiled() {
return {}; return {};
} }
const auto result = LoadPrecompiledFile(file); const auto result = LoadPrecompiledFile(file, compressed);
if (!result) { if (!result) {
LOG_INFO(Render_OpenGL, LOG_INFO(Render_OpenGL,
"Failed to load precompiled cache for game with title id={} - removing", "Failed to load precompiled cache for game with title id={} - removing",
@ -195,12 +195,18 @@ ShaderDiskCache::LoadPrecompiled() {
} }
std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap>> std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap>>
ShaderDiskCache::LoadPrecompiledFile(FileUtil::IOFile& file) { ShaderDiskCache::LoadPrecompiledFile(FileUtil::IOFile& file, bool compressed) {
// Read compressed file from disk and decompress to virtual precompiled cache file // Read compressed file from disk and decompress to virtual precompiled cache file
std::vector<u8> compressed(file.GetSize()); std::vector<u8> precompiled_file(file.GetSize());
file.ReadBytes(compressed.data(), compressed.size()); file.ReadBytes(precompiled_file.data(), precompiled_file.size());
const std::vector<u8> decompressed = Common::Compression::DecompressDataZSTD(compressed); if (compressed) {
SaveArrayToPrecompiled(decompressed.data(), decompressed.size()); const std::vector<u8> decompressed =
Common::Compression::DecompressDataZSTD(precompiled_file);
SaveArrayToPrecompiled(decompressed.data(), decompressed.size());
} else {
SaveArrayToPrecompiled(precompiled_file.data(), precompiled_file.size());
}
decompressed_precompiled_cache_offset = 0; decompressed_precompiled_cache_offset = 0;
ShaderCacheVersionHash file_hash{}; ShaderCacheVersionHash file_hash{};
@ -293,9 +299,25 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCache::LoadDecompiledEntry()
return entry; return entry;
} }
bool ShaderDiskCache::SaveDecompiledFile(u64 unique_identifier, void ShaderDiskCache::SaveDecompiledToFile(FileUtil::IOFile& file, u64 unique_identifier,
const ShaderDecompiler::ProgramResult& result, const ShaderDecompiler::ProgramResult& result,
bool sanitize_mul) { bool sanitize_mul) {
if (!IsUsable())
return;
if (file.WriteObject(static_cast<u32>(PrecompiledEntryKind::Decompiled)) != 1 ||
file.WriteObject(unique_identifier) != 1 || file.WriteObject(sanitize_mul) != 1 ||
file.WriteObject(static_cast<u32>(result.code.size())) != 1 ||
file.WriteArray(result.code.data(), result.code.size()) != result.code.size()) {
LOG_ERROR(Render_OpenGL, "Failed to save decompiled cache entry - removing");
file.Close();
InvalidatePrecompiled();
}
}
bool ShaderDiskCache::SaveDecompiledToCache(u64 unique_identifier,
const ShaderDecompiler::ProgramResult& result,
bool sanitize_mul) {
if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Decompiled)) || if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Decompiled)) ||
!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(sanitize_mul) || !SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(sanitize_mul) ||
!SaveObjectToPrecompiled(static_cast<u32>(result.code.size())) || !SaveObjectToPrecompiled(static_cast<u32>(result.code.size())) ||
@ -315,7 +337,7 @@ void ShaderDiskCache::InvalidateAll() {
} }
void ShaderDiskCache::InvalidatePrecompiled() { void ShaderDiskCache::InvalidatePrecompiled() {
// Clear virtaul precompiled cache file // Clear virtual precompiled cache file
decompressed_precompiled_cache.resize(0); decompressed_precompiled_cache.resize(0);
if (!FileUtil::Delete(GetPrecompiledPath())) { if (!FileUtil::Delete(GetPrecompiledPath())) {
@ -351,11 +373,11 @@ void ShaderDiskCache::SaveDecompiled(u64 unique_identifier,
if (!IsUsable()) if (!IsUsable())
return; return;
if (decompressed_precompiled_cache.size() == 0) { if (decompressed_precompiled_cache.empty()) {
SavePrecompiledHeaderToVirtualPrecompiledCache(); SavePrecompiledHeaderToVirtualPrecompiledCache();
} }
if (!SaveDecompiledFile(unique_identifier, code, sanitize_mul)) { if (!SaveDecompiledToCache(unique_identifier, code, sanitize_mul)) {
LOG_ERROR(Render_OpenGL, LOG_ERROR(Render_OpenGL,
"Failed to save decompiled entry to the precompiled file - removing"); "Failed to save decompiled entry to the precompiled file - removing");
InvalidatePrecompiled(); InvalidatePrecompiled();
@ -385,6 +407,37 @@ void ShaderDiskCache::SaveDump(u64 unique_identifier, GLuint program) {
} }
} }
void ShaderDiskCache::SaveDumpToFile(u64 unique_identifier, GLuint program, bool sanitize_mul) {
if (!IsUsable())
return;
FileUtil::IOFile file = AppendPrecompiledFile();
if (!file.IsOpen())
return;
GLint binary_length{};
glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length);
GLenum binary_format{};
std::vector<u8> binary(binary_length);
glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
if (file.WriteObject(static_cast<u32>(PrecompiledEntryKind::Dump)) != 1 ||
file.WriteObject(unique_identifier) != 1 ||
file.WriteObject(static_cast<u32>(binary_format)) != 1 ||
file.WriteObject(static_cast<u32>(binary_length)) != 1 ||
file.WriteArray(binary.data(), binary.size()) != binary.size()) {
LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016x} - removing",
unique_identifier);
InvalidatePrecompiled();
return;
}
// SaveDecompiled is used only to store the accurate multiplication setting, a better way is to
// probably change the header in SaveDump
SaveDecompiledToFile(file, unique_identifier, {}, sanitize_mul);
}
bool ShaderDiskCache::IsUsable() const { bool ShaderDiskCache::IsUsable() const {
return tried_to_load && Settings::values.use_disk_shader_cache; return tried_to_load && Settings::values.use_disk_shader_cache;
} }
@ -412,6 +465,30 @@ FileUtil::IOFile ShaderDiskCache::AppendTransferableFile() {
return file; return file;
} }
FileUtil::IOFile ShaderDiskCache::AppendPrecompiledFile() {
if (!EnsureDirectories())
return {};
const auto precompiled_path{GetPrecompiledPath()};
const bool existed = FileUtil::Exists(precompiled_path);
FileUtil::IOFile file(precompiled_path, "ab");
if (!file.IsOpen()) {
LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}", precompiled_path);
return {};
}
if (!existed || file.GetSize() == 0) {
// If the file didn't exist, write its version
const auto hash{GetShaderCacheVersionHash()};
if (file.WriteArray(hash.data(), hash.size()) != hash.size()) {
LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}",
precompiled_path);
return {};
}
}
return file;
}
void ShaderDiskCache::SavePrecompiledHeaderToVirtualPrecompiledCache() { void ShaderDiskCache::SavePrecompiledHeaderToVirtualPrecompiledCache() {
const auto hash{GetShaderCacheVersionHash()}; const auto hash{GetShaderCacheVersionHash()};
if (!SaveArrayToPrecompiled(hash.data(), hash.size())) { if (!SaveArrayToPrecompiled(hash.data(), hash.size())) {
@ -451,7 +528,7 @@ bool ShaderDiskCache::EnsureDirectories() const {
return CreateDir(FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir)) && return CreateDir(FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir)) &&
CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) && CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) &&
CreateDir(GetPrecompiledDir()); CreateDir(GetPrecompiledDir()) && CreateDir(GetPrecompiledShaderDir());
} }
std::string ShaderDiskCache::GetTransferablePath() { std::string ShaderDiskCache::GetTransferablePath() {
@ -459,7 +536,7 @@ std::string ShaderDiskCache::GetTransferablePath() {
} }
std::string ShaderDiskCache::GetPrecompiledPath() { std::string ShaderDiskCache::GetPrecompiledPath() {
return FileUtil::SanitizePath(GetPrecompiledDir() + DIR_SEP_CHR + GetTitleID() + ".bin"); return FileUtil::SanitizePath(GetPrecompiledShaderDir() + DIR_SEP_CHR + GetTitleID() + ".bin");
} }
std::string ShaderDiskCache::GetTransferableDir() const { std::string ShaderDiskCache::GetTransferableDir() const {
@ -470,6 +547,13 @@ std::string ShaderDiskCache::GetPrecompiledDir() const {
return GetBaseDir() + DIR_SEP "precompiled"; return GetBaseDir() + DIR_SEP "precompiled";
} }
std::string ShaderDiskCache::GetPrecompiledShaderDir() const {
if (separable) {
return GetPrecompiledDir() + DIR_SEP "separable";
}
return GetPrecompiledDir() + DIR_SEP "conventional";
}
std::string ShaderDiskCache::GetBaseDir() const { std::string ShaderDiskCache::GetBaseDir() const {
return FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir) + DIR_SEP "opengl"; return FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir) + DIR_SEP "opengl";
} }

View file

@ -97,7 +97,7 @@ public:
std::optional<std::vector<ShaderDiskCacheRaw>> LoadTransferable(); std::optional<std::vector<ShaderDiskCacheRaw>> LoadTransferable();
/// Loads current game's precompiled cache. Invalidates on failure. /// Loads current game's precompiled cache. Invalidates on failure.
std::pair<ShaderDecompiledMap, ShaderDumpsMap> LoadPrecompiled(); std::pair<ShaderDecompiledMap, ShaderDumpsMap> LoadPrecompiled(bool compressed);
/// Removes the transferable (and precompiled) cache file. /// Removes the transferable (and precompiled) cache file.
void InvalidateAll(); void InvalidateAll();
@ -115,21 +115,28 @@ public:
/// Saves a dump entry to the precompiled file. Does not check for collisions. /// Saves a dump entry to the precompiled file. Does not check for collisions.
void SaveDump(u64 unique_identifier, GLuint program); void SaveDump(u64 unique_identifier, GLuint program);
/// Saves a dump entry to the precompiled file. Does not check for collisions.
void SaveDumpToFile(u64 unique_identifier, GLuint program, bool sanitize_mul);
/// Serializes virtual precompiled shader cache file to real file /// Serializes virtual precompiled shader cache file to real file
void SaveVirtualPrecompiledFile(); void SaveVirtualPrecompiledFile();
private: private:
/// Loads the transferable cache. Returns empty on failure. /// Loads the transferable cache. Returns empty on failure.
std::optional<std::pair<ShaderDecompiledMap, ShaderDumpsMap>> LoadPrecompiledFile( std::optional<std::pair<ShaderDecompiledMap, ShaderDumpsMap>> LoadPrecompiledFile(
FileUtil::IOFile& file); FileUtil::IOFile& file, bool compressed);
/// Loads a decompiled cache entry from m_precompiled_cache_virtual_file. Returns empty on /// Loads a decompiled cache entry from m_precompiled_cache_virtual_file. Returns empty on
/// failure. /// failure.
std::optional<ShaderDiskCacheDecompiled> LoadDecompiledEntry(); std::optional<ShaderDiskCacheDecompiled> LoadDecompiledEntry();
/// Saves a decompiled entry to the passed file. Returns true on success. /// Saves a decompiled entry to the passed file. Does not check for collisions.
bool SaveDecompiledFile(u64 unique_identifier, const ShaderDecompiler::ProgramResult& code, void SaveDecompiledToFile(FileUtil::IOFile& file, u64 unique_identifier,
bool sanitize_mul); const ShaderDecompiler::ProgramResult& code, bool sanitize_mul);
/// Saves a decompiled entry to the virtual precompiled cache. Does not check for collisions.
bool SaveDecompiledToCache(u64 unique_identifier, const ShaderDecompiler::ProgramResult& code,
bool sanitize_mul);
/// Returns if the cache can be used /// Returns if the cache can be used
bool IsUsable() const; bool IsUsable() const;
@ -155,6 +162,8 @@ private:
/// Get user's precompiled directory path /// Get user's precompiled directory path
std::string GetPrecompiledDir() const; std::string GetPrecompiledDir() const;
std::string GetPrecompiledShaderDir() const;
/// Get user's shader directory path /// Get user's shader directory path
std::string GetBaseDir() const; std::string GetBaseDir() const;
@ -197,7 +206,7 @@ private:
return LoadArrayFromPrecompiled(&object, 1); return LoadArrayFromPrecompiled(&object, 1);
} }
// Stores whole precompiled cache which will be read from or saved to the precompiled chache // Stores whole precompiled cache which will be read from or saved to the precompiled cache
// file // file
std::vector<u8> decompressed_precompiled_cache; std::vector<u8> decompressed_precompiled_cache;
// Stores the current offset of the precompiled cache file for IO purposes // Stores the current offset of the precompiled cache file for IO purposes
@ -213,6 +222,8 @@ private:
u64 program_id{}; u64 program_id{};
std::string title_id; std::string title_id;
FileUtil::IOFile AppendPrecompiledFile();
}; };
} // namespace OpenGL } // namespace OpenGL

View file

@ -8,8 +8,10 @@
#include <boost/functional/hash.hpp> #include <boost/functional/hash.hpp>
#include <boost/variant.hpp> #include <boost/variant.hpp>
#include "core/core.h" #include "core/core.h"
#include "core/frontend/scope_acquire_context.h"
#include "video_core/renderer_opengl/gl_shader_disk_cache.h" #include "video_core/renderer_opengl/gl_shader_disk_cache.h"
#include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_vars.h"
#include "video_core/video_core.h" #include "video_core/video_core.h"
namespace OpenGL { namespace OpenGL {
@ -26,7 +28,8 @@ static u64 GetUniqueIdentifier(const Pica::Regs& regs, const ProgramCode& code)
} }
static OGLProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump, static OGLProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump,
const std::set<GLenum>& supported_formats) { const std::set<GLenum>& supported_formats,
bool separable) {
if (supported_formats.find(dump.binary_format) == supported_formats.end()) { if (supported_formats.find(dump.binary_format) == supported_formats.end()) {
LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing"); LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing");
@ -35,7 +38,9 @@ static OGLProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump,
auto shader = OGLProgram(); auto shader = OGLProgram();
shader.handle = glCreateProgram(); shader.handle = glCreateProgram();
glProgramParameteri(shader.handle, GL_PROGRAM_SEPARABLE, GL_TRUE); if (separable) {
glProgramParameteri(shader.handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
}
glProgramBinary(shader.handle, dump.binary_format, dump.binary.data(), glProgramBinary(shader.handle, dump.binary_format, dump.binary.data(),
static_cast<GLsizei>(dump.binary.size())); static_cast<GLsizei>(dump.binary.size()));
@ -239,6 +244,10 @@ public:
shaders.emplace(key, std::move(stage)); shaders.emplace(key, std::move(stage));
} }
void Inject(const KeyConfigType& key, OGLShaderStage&& stage) {
shaders.emplace(key, std::move(stage));
}
private: private:
bool separable; bool separable;
std::unordered_map<KeyConfigType, OGLShaderStage> shaders; std::unordered_map<KeyConfigType, OGLShaderStage> shaders;
@ -294,6 +303,12 @@ public:
shader_map.insert_or_assign(key, &cached_shader); shader_map.insert_or_assign(key, &cached_shader);
} }
void Inject(const KeyConfigType& key, std::string decomp, OGLShaderStage&& stage) {
const auto iter = shader_cache.emplace(std::move(decomp), std::move(stage)).first;
OGLShaderStage& cached_shader = iter->second;
shader_map.insert_or_assign(key, &cached_shader);
}
private: private:
bool separable; bool separable;
std::unordered_map<KeyConfigType, OGLShaderStage*> shader_map; std::unordered_map<KeyConfigType, OGLShaderStage*> shader_map;
@ -323,6 +338,10 @@ public:
GLuint gs = 0; GLuint gs = 0;
GLuint fs = 0; GLuint fs = 0;
std::size_t vs_hash = 0;
std::size_t gs_hash = 0;
std::size_t fs_hash = 0;
bool operator==(const ShaderTuple& rhs) const { bool operator==(const ShaderTuple& rhs) const {
return std::tie(vs, gs, fs) == std::tie(rhs.vs, rhs.gs, rhs.fs); return std::tie(vs, gs, fs) == std::tie(rhs.vs, rhs.gs, rhs.fs);
} }
@ -331,15 +350,13 @@ public:
return std::tie(vs, gs, fs) != std::tie(rhs.vs, rhs.gs, rhs.fs); return std::tie(vs, gs, fs) != std::tie(rhs.vs, rhs.gs, rhs.fs);
} }
struct Hash { std::size_t GetConfigHash() const {
std::size_t operator()(const ShaderTuple& tuple) const { std::size_t hash = 0;
std::size_t hash = 0; boost::hash_combine(hash, vs_hash);
boost::hash_combine(hash, tuple.vs); boost::hash_combine(hash, gs_hash);
boost::hash_combine(hash, tuple.gs); boost::hash_combine(hash, fs_hash);
boost::hash_combine(hash, tuple.fs); return hash;
return hash; }
}
};
}; };
bool is_amd; bool is_amd;
@ -353,13 +370,14 @@ public:
FixedGeometryShaders fixed_geometry_shaders; FixedGeometryShaders fixed_geometry_shaders;
FragmentShaders fragment_shaders; FragmentShaders fragment_shaders;
std::unordered_map<ShaderTuple, OGLProgram, ShaderTuple::Hash> program_cache; std::unordered_map<u64, OGLProgram> program_cache;
OGLPipeline pipeline; OGLPipeline pipeline;
ShaderDiskCache disk_cache; ShaderDiskCache disk_cache;
}; };
ShaderProgramManager::ShaderProgramManager(bool separable, bool is_amd) ShaderProgramManager::ShaderProgramManager(Frontend::EmuWindow& emu_window_, bool separable,
: impl(std::make_unique<Impl>(separable, is_amd)) {} bool is_amd)
: impl(std::make_unique<Impl>(separable, is_amd)), emu_window{emu_window_} {}
ShaderProgramManager::~ShaderProgramManager() = default; ShaderProgramManager::~ShaderProgramManager() = default;
@ -370,6 +388,8 @@ bool ShaderProgramManager::UseProgrammableVertexShader(const Pica::Regs& regs,
if (handle == 0) if (handle == 0)
return false; return false;
impl->current.vs = handle; impl->current.vs = handle;
impl->current.vs_hash = config.Hash();
// Save VS to the disk cache if its a new shader // Save VS to the disk cache if its a new shader
if (result) { if (result) {
auto& disk_cache = impl->disk_cache; auto& disk_cache = impl->disk_cache;
@ -380,28 +400,33 @@ bool ShaderProgramManager::UseProgrammableVertexShader(const Pica::Regs& regs,
const ShaderDiskCacheRaw raw{unique_identifier, ProgramType::VS, regs, const ShaderDiskCacheRaw raw{unique_identifier, ProgramType::VS, regs,
std::move(program_code)}; std::move(program_code)};
disk_cache.SaveRaw(raw); disk_cache.SaveRaw(raw);
disk_cache.SaveDecompiled(unique_identifier, *result, VideoCore::g_hw_shader_accurate_mul);
} }
return true; return true;
} }
void ShaderProgramManager::UseTrivialVertexShader() { void ShaderProgramManager::UseTrivialVertexShader() {
impl->current.vs = impl->trivial_vertex_shader.Get(); impl->current.vs = impl->trivial_vertex_shader.Get();
impl->current.vs_hash = 0;
} }
void ShaderProgramManager::UseFixedGeometryShader(const Pica::Regs& regs) { void ShaderProgramManager::UseFixedGeometryShader(const Pica::Regs& regs) {
PicaFixedGSConfig gs_config(regs); PicaFixedGSConfig gs_config(regs);
auto [handle, _] = impl->fixed_geometry_shaders.Get(gs_config); auto [handle, _] = impl->fixed_geometry_shaders.Get(gs_config);
impl->current.gs = handle; impl->current.gs = handle;
impl->current.gs_hash = gs_config.Hash();
} }
void ShaderProgramManager::UseTrivialGeometryShader() { void ShaderProgramManager::UseTrivialGeometryShader() {
impl->current.gs = 0; impl->current.gs = 0;
impl->current.gs_hash = 0;
} }
void ShaderProgramManager::UseFragmentShader(const Pica::Regs& regs) { void ShaderProgramManager::UseFragmentShader(const Pica::Regs& regs) {
PicaFSConfig config = PicaFSConfig::BuildFromRegs(regs); PicaFSConfig config = PicaFSConfig::BuildFromRegs(regs);
auto [handle, result] = impl->fragment_shaders.Get(config); auto [handle, result] = impl->fragment_shaders.Get(config);
impl->current.fs = handle; impl->current.fs = handle;
impl->current.fs_hash = config.Hash();
// Save FS to the disk cache if its a new shader // Save FS to the disk cache if its a new shader
if (result) { if (result) {
auto& disk_cache = impl->disk_cache; auto& disk_cache = impl->disk_cache;
@ -429,9 +454,14 @@ void ShaderProgramManager::ApplyTo(OpenGLState& state) {
state.draw.shader_program = 0; state.draw.shader_program = 0;
state.draw.program_pipeline = impl->pipeline.handle; state.draw.program_pipeline = impl->pipeline.handle;
} else { } else {
OGLProgram& cached_program = impl->program_cache[impl->current]; const u64 unique_identifier = impl->current.GetConfigHash();
OGLProgram& cached_program = impl->program_cache[unique_identifier];
if (cached_program.handle == 0) { if (cached_program.handle == 0) {
cached_program.Create(false, {impl->current.vs, impl->current.gs, impl->current.fs}); cached_program.Create(false, {impl->current.vs, impl->current.gs, impl->current.fs});
auto& disk_cache = impl->disk_cache;
disk_cache.SaveDumpToFile(unique_identifier, cached_program.handle,
VideoCore::g_hw_shader_accurate_mul);
SetShaderUniformBlockBindings(cached_program.handle); SetShaderUniformBlockBindings(cached_program.handle);
SetShaderSamplerBindings(cached_program.handle); SetShaderSamplerBindings(cached_program.handle);
} }
@ -441,12 +471,7 @@ void ShaderProgramManager::ApplyTo(OpenGLState& state) {
void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading, void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) { const VideoCore::DiskResourceLoadCallback& callback) {
if (!impl->separable) { if (!GLAD_GL_ARB_get_program_binary && !GLES) {
LOG_ERROR(Render_OpenGL,
"Cannot load disk cache as separate shader programs are unsupported!");
return;
}
if (!GLAD_GL_ARB_get_program_binary) {
LOG_ERROR(Render_OpenGL, LOG_ERROR(Render_OpenGL,
"Cannot load disk cache as ARB_get_program_binary is not supported!"); "Cannot load disk cache as ARB_get_program_binary is not supported!");
return; return;
@ -459,7 +484,9 @@ void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading,
} }
const auto& raws = *transferable; const auto& raws = *transferable;
auto [decompiled, dumps] = disk_cache.LoadPrecompiled(); // Load uncompressed precompiled file for non-separable shaders.
// Precompiled file for separable shaders is compressed.
auto [decompiled, dumps] = disk_cache.LoadPrecompiled(impl->separable);
if (stop_loading) { if (stop_loading) {
return; return;
@ -478,125 +505,179 @@ void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading,
} }
std::vector<std::size_t> load_raws_index; std::vector<std::size_t> load_raws_index;
// Loads both decompiled and precompiled shaders from the cache. If either one is missing for // Loads both decompiled and precompiled shaders from the cache. If either one is missing for
const auto LoadPrecompiledWorker = const auto LoadPrecompiledShader = [&](std::size_t begin, std::size_t end,
[&](std::size_t begin, std::size_t end, const std::vector<ShaderDiskCacheRaw>& raw_cache, const std::vector<ShaderDiskCacheRaw>& raw_cache,
const ShaderDecompiledMap& decompiled_map, const ShaderDumpsMap& dump_map) { const ShaderDecompiledMap& decompiled_map,
for (std::size_t i = 0; i < end; ++i) { const ShaderDumpsMap& dump_map) {
if (stop_loading || compilation_failed) { for (std::size_t i = begin; i < end; ++i) {
return;
}
const auto& raw{raw_cache[i]};
const u64 unique_identifier{raw.GetUniqueIdentifier()};
const u64 calculated_hash =
GetUniqueIdentifier(raw.GetRawShaderConfig(), raw.GetProgramCode());
if (unique_identifier != calculated_hash) {
LOG_ERROR(Render_OpenGL,
"Invalid hash in entry={:016x} (obtained hash={:016x}) - removing "
"shader cache",
raw.GetUniqueIdentifier(), calculated_hash);
disk_cache.InvalidateAll();
return;
}
const auto dump{dump_map.find(unique_identifier)};
const auto decomp{decompiled_map.find(unique_identifier)};
OGLProgram shader;
if (dump != dump_map.end() && decomp != decompiled_map.end()) {
// Only load this shader if its sanitize_mul setting matches
if (raw.GetProgramType() == ProgramType::VS &&
decomp->second.sanitize_mul != VideoCore::g_hw_shader_accurate_mul) {
continue;
}
// If the shader is dumped, attempt to load it
shader = GeneratePrecompiledProgram(dump->second, supported_formats);
if (shader.handle == 0) {
// If any shader failed, stop trying to compile, delete the cache, and start
// loading from raws
compilation_failed = true;
return;
}
// we have both the binary shader and the decompiled, so inject it into the
// cache
if (raw.GetProgramType() == ProgramType::VS) {
auto [conf, setup] = BuildVSConfigFromRaw(raw);
std::scoped_lock lock(mutex);
impl->programmable_vertex_shaders.Inject(conf, decomp->second.result.code,
std::move(shader));
} else if (raw.GetProgramType() == ProgramType::FS) {
PicaFSConfig conf = PicaFSConfig::BuildFromRegs(raw.GetRawShaderConfig());
std::scoped_lock lock(mutex);
impl->fragment_shaders.Inject(conf, std::move(shader));
} else {
// Unsupported shader type got stored somehow so nuke the cache
LOG_CRITICAL(Frontend, "failed to load raw programtype {}",
raw.GetProgramType());
compilation_failed = true;
return;
}
} else {
// Since precompiled didn't have the dump, we'll load them in the next phase
std::scoped_lock lock(mutex);
load_raws_index.push_back(i);
}
if (callback) {
callback(VideoCore::LoadCallbackStage::Decompile, i, raw_cache.size());
}
}
};
LoadPrecompiledWorker(0, raws.size(), raws, decompiled, dumps);
if (compilation_failed) {
// Invalidate the precompiled cache if a shader dumped shader was rejected
disk_cache.InvalidatePrecompiled();
dumps.clear();
precompiled_cache_altered = true;
}
if (callback) {
callback(VideoCore::LoadCallbackStage::Build, 0, raws.size());
}
compilation_failed = false;
const auto LoadTransferable = [&](std::size_t begin, std::size_t end,
const std::vector<ShaderDiskCacheRaw>& raw_cache) {
for (std::size_t i = 0; i < end; ++i) {
if (stop_loading || compilation_failed) { if (stop_loading || compilation_failed) {
return; return;
} }
const auto& raw{raw_cache[i]}; const auto& raw{raw_cache[i]};
const u64 unique_identifier{raw.GetUniqueIdentifier()}; const u64 unique_identifier{raw.GetUniqueIdentifier()};
const u64 calculated_hash =
GetUniqueIdentifier(raw.GetRawShaderConfig(), raw.GetProgramCode());
if (unique_identifier != calculated_hash) {
LOG_ERROR(Render_OpenGL,
"Invalid hash in entry={:016x} (obtained hash={:016x}) - removing "
"shader cache",
raw.GetUniqueIdentifier(), calculated_hash);
disk_cache.InvalidateAll();
return;
}
const auto dump{dump_map.find(unique_identifier)};
const auto decomp{decompiled_map.find(unique_identifier)};
OGLProgram shader;
if (dump != dump_map.end() && decomp != decompiled_map.end()) {
// Only load the vertex shader if its sanitize_mul setting matches
if (raw.GetProgramType() == ProgramType::VS &&
decomp->second.sanitize_mul != VideoCore::g_hw_shader_accurate_mul) {
continue;
}
// If the shader is dumped, attempt to load it
shader =
GeneratePrecompiledProgram(dump->second, supported_formats, impl->separable);
if (shader.handle == 0) {
// If any shader failed, stop trying to compile, delete the cache, and start
// loading from raws
compilation_failed = true;
return;
}
// we have both the binary shader and the decompiled, so inject it into the
// cache
if (raw.GetProgramType() == ProgramType::VS) {
auto [conf, setup] = BuildVSConfigFromRaw(raw);
std::scoped_lock lock(mutex);
impl->programmable_vertex_shaders.Inject(conf, decomp->second.result.code,
std::move(shader));
} else if (raw.GetProgramType() == ProgramType::FS) {
PicaFSConfig conf = PicaFSConfig::BuildFromRegs(raw.GetRawShaderConfig());
std::scoped_lock lock(mutex);
impl->fragment_shaders.Inject(conf, std::move(shader));
} else {
// Unsupported shader type got stored somehow so nuke the cache
LOG_CRITICAL(Frontend, "failed to load raw ProgramType {}",
raw.GetProgramType());
compilation_failed = true;
return;
}
} else {
// Since precompiled didn't have the dump, we'll load them in the next phase
std::scoped_lock lock(mutex);
load_raws_index.push_back(i);
}
if (callback) {
callback(VideoCore::LoadCallbackStage::Decompile, i, raw_cache.size());
}
}
};
const auto LoadPrecompiledProgram = [&](const ShaderDecompiledMap& decompiled_map,
const ShaderDumpsMap& dump_map) {
std::size_t i{0};
for (const auto& dump : dump_map) {
if (stop_loading) {
break;
}
const u64 unique_identifier{dump.first};
const auto decomp{decompiled_map.find(unique_identifier)};
// Only load the program if its sanitize_mul setting matches
if (decomp->second.sanitize_mul != VideoCore::g_hw_shader_accurate_mul) {
continue;
}
// If the shader program is dumped, attempt to load it
OGLProgram shader =
GeneratePrecompiledProgram(dump.second, supported_formats, impl->separable);
if (shader.handle != 0) {
SetShaderUniformBlockBindings(shader.handle);
SetShaderSamplerBindings(shader.handle);
impl->program_cache.emplace(unique_identifier, std::move(shader));
} else {
LOG_ERROR(Frontend, "Failed to link Precompiled program!");
compilation_failed = true;
break;
}
if (callback) {
callback(VideoCore::LoadCallbackStage::Decompile, ++i, dump_map.size());
}
}
};
if (impl->separable) {
LoadPrecompiledShader(0, raws.size(), raws, decompiled, dumps);
} else {
LoadPrecompiledProgram(decompiled, dumps);
}
bool load_all_raws = false;
if (compilation_failed) {
// Invalidate the precompiled cache if a shader dumped shader was rejected
impl->program_cache.clear();
disk_cache.InvalidatePrecompiled();
dumps.clear();
precompiled_cache_altered = true;
load_all_raws = true;
}
// TODO(SachinV): Skip loading raws until we implement a proper way to link non-seperable
// shaders.
if (!impl->separable) {
return;
}
const std::size_t load_raws_size = load_all_raws ? raws.size() : load_raws_index.size();
if (callback) {
callback(VideoCore::LoadCallbackStage::Build, 0, load_raws_size);
}
compilation_failed = false;
std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex
const auto LoadRawSepareble = [&](Frontend::GraphicsContext* context, std::size_t begin,
std::size_t end) {
Frontend::ScopeAcquireContext scope(*context);
for (std::size_t i = begin; i < end; ++i) {
if (stop_loading || compilation_failed) {
return;
}
const std::size_t raws_index = load_all_raws ? i : load_raws_index[i];
const auto& raw{raws[raws_index]};
const u64 unique_identifier{raw.GetUniqueIdentifier()};
bool sanitize_mul = false; bool sanitize_mul = false;
GLuint handle{0}; GLuint handle{0};
std::optional<ShaderDecompiler::ProgramResult> result; std::optional<ShaderDecompiler::ProgramResult> result;
// Otherwise decompile and build the shader at boot and save the result to the // Otherwise decompile and build the shader at boot and save the result to the
// precompiled file // precompiled file
if (raw.GetProgramType() == ProgramType::VS) { if (raw.GetProgramType() == ProgramType::VS) {
// TODO: This isn't the ideal place to lock, since we really only want to
// lock access to the shared cache
auto [conf, setup] = BuildVSConfigFromRaw(raw); auto [conf, setup] = BuildVSConfigFromRaw(raw);
std::scoped_lock lock(mutex); result = GenerateVertexShader(setup, conf, impl->separable);
auto [h, r] = impl->programmable_vertex_shaders.Get(conf, setup); OGLShaderStage stage{impl->separable};
handle = h; stage.Create(result->code.c_str(), GL_VERTEX_SHADER);
result = std::move(r); handle = stage.GetHandle();
sanitize_mul = conf.state.sanitize_mul; sanitize_mul = conf.state.sanitize_mul;
std::scoped_lock lock(mutex);
impl->programmable_vertex_shaders.Inject(conf, result->code, std::move(stage));
} else if (raw.GetProgramType() == ProgramType::FS) { } else if (raw.GetProgramType() == ProgramType::FS) {
PicaFSConfig conf = PicaFSConfig::BuildFromRegs(raw.GetRawShaderConfig()); PicaFSConfig conf = PicaFSConfig::BuildFromRegs(raw.GetRawShaderConfig());
result = GenerateFragmentShader(conf, impl->separable);
OGLShaderStage stage{impl->separable};
stage.Create(result->code.c_str(), GL_FRAGMENT_SHADER);
handle = stage.GetHandle();
std::scoped_lock lock(mutex); std::scoped_lock lock(mutex);
auto [h, r] = impl->fragment_shaders.Get(conf); impl->fragment_shaders.Inject(conf, std::move(stage));
handle = h;
result = std::move(r);
} else { } else {
// Unsupported shader type got stored somehow so nuke the cache // Unsupported shader type got stored somehow so nuke the cache
LOG_ERROR(Frontend, "failed to load raw programtype {}", raw.GetProgramType()); LOG_ERROR(Frontend, "failed to load raw ProgramType {}", raw.GetProgramType());
compilation_failed = true; compilation_failed = true;
return; return;
} }
@ -606,7 +687,9 @@ void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading,
compilation_failed = true; compilation_failed = true;
return; return;
} }
// If this is a new shader, add it the precompiled cache
std::scoped_lock lock(mutex);
// If this is a new separable shader, add it the precompiled cache
if (result) { if (result) {
disk_cache.SaveDecompiled(unique_identifier, *result, sanitize_mul); disk_cache.SaveDecompiled(unique_identifier, *result, sanitize_mul);
disk_cache.SaveDump(unique_identifier, handle); disk_cache.SaveDump(unique_identifier, handle);
@ -614,12 +697,27 @@ void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading,
} }
if (callback) { if (callback) {
callback(VideoCore::LoadCallbackStage::Build, i, raw_cache.size()); callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, load_raws_size);
} }
} }
}; };
LoadTransferable(0, raws.size(), raws); const std::size_t num_workers{std::max(1U, std::thread::hardware_concurrency())};
const std::size_t bucket_size{load_raws_size / num_workers};
std::vector<std::unique_ptr<Frontend::GraphicsContext>> contexts(num_workers);
std::vector<std::thread> threads(num_workers);
for (std::size_t i = 0; i < num_workers; ++i) {
const bool is_last_worker = i + 1 == num_workers;
const std::size_t start{bucket_size * i};
const std::size_t end{is_last_worker ? load_raws_size : start + bucket_size};
// On some platforms the shared context has to be created from the GUI thread
contexts[i] = emu_window.CreateSharedContext();
threads[i] = std::thread(LoadRawSepareble, contexts[i].get(), start, end);
}
for (auto& thread : threads) {
thread.join();
}
if (compilation_failed) { if (compilation_failed) {
disk_cache.InvalidateAll(); disk_cache.InvalidateAll();

View file

@ -99,7 +99,7 @@ static_assert(sizeof(VSUniformData) < 16384,
/// A class that manage different shader stages and configures them with given config data. /// A class that manage different shader stages and configures them with given config data.
class ShaderProgramManager { class ShaderProgramManager {
public: public:
ShaderProgramManager(bool separable, bool is_amd); ShaderProgramManager(Frontend::EmuWindow& emu_window_, bool separable, bool is_amd);
~ShaderProgramManager(); ~ShaderProgramManager();
void LoadDiskCache(const std::atomic_bool& stop_loading, void LoadDiskCache(const std::atomic_bool& stop_loading,
@ -120,5 +120,7 @@ public:
private: private:
class Impl; class Impl;
std::unique_ptr<Impl> impl; std::unique_ptr<Impl> impl;
Frontend::EmuWindow& emu_window;
}; };
} // namespace OpenGL } // namespace OpenGL

View file

@ -84,6 +84,7 @@ GLuint LoadProgram(bool separable_program, const std::vector<GLuint>& shaders) {
glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE); glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE);
} }
glProgramParameteri(program_id, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE);
glLinkProgram(program_id); glLinkProgram(program_id);
// Check the program // Check the program