From 2ef696c85a37917102a9f869775180ab225f0d56 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 23 Sep 2019 11:15:09 -0400 Subject: [PATCH 01/13] Shader_IR: Implement BRX tracking. --- src/video_core/shader/control_flow.cpp | 113 +++++++++++++++++++++++++ 1 file changed, 113 insertions(+) diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 9d21f45ded..70f758642a 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -124,6 +124,111 @@ enum class ParseResult : u32 { AbnormalFlow, }; +struct BranchIndirectInfo { + u32 buffer{}; + u32 offset{}; + u32 entries{}; + s32 relative_position{}; +}; + +std::optional TrackBranchIndirectInfo(const CFGRebuildState& state, + u32 start_address, u32 current_position) { + const u32 shader_start = state.start; + u32 pos = current_position; + BranchIndirectInfo result{}; + u64 track_register = 0; + + // Step 0 Get BRX Info + const Instruction instr = {state.program_code[pos]}; + const auto opcode = OpCode::Decode(instr); + if (opcode->get().GetId() != OpCode::Id::BRX) { + return {}; + } + if (instr.brx.constant_buffer != 0) { + return {}; + } + track_register = instr.gpr8.Value(); + result.relative_position = instr.brx.GetBranchExtend(); + pos--; + bool found_track = false; + + // Step 1 Track LDC + while (pos >= shader_start) { + if (IsSchedInstruction(pos, shader_start)) { + pos--; + continue; + } + const Instruction instr = {state.program_code[pos]}; + const auto opcode = OpCode::Decode(instr); + if (opcode->get().GetId() == OpCode::Id::LD_C) { + if (instr.gpr0.Value() == track_register && + instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single) { + result.buffer = instr.cbuf36.index; + result.offset = instr.cbuf36.GetOffset(); + track_register = instr.gpr8.Value(); + pos--; + found_track = true; + break; + } + } + pos--; + } + + if (!found_track) { + return {}; + } + found_track = false; + + // Step 2 Track SHL + while (pos >= shader_start) { + if (IsSchedInstruction(pos, shader_start)) { + pos--; + continue; + } + const Instruction instr = {state.program_code[pos]}; + const auto opcode = OpCode::Decode(instr); + if (opcode->get().GetId() == OpCode::Id::SHL_IMM) { + if (instr.gpr0.Value() == track_register) { + track_register = instr.gpr8.Value(); + pos--; + found_track = true; + break; + } + } + pos--; + } + + if (!found_track) { + return {}; + } + found_track = false; + + // Step 3 Track IMNMX + while (pos >= shader_start) { + if (IsSchedInstruction(pos, shader_start)) { + pos--; + continue; + } + const Instruction instr = {state.program_code[pos]}; + const auto opcode = OpCode::Decode(instr); + if (opcode->get().GetId() == OpCode::Id::IMNMX_IMM) { + if (instr.gpr0.Value() == track_register) { + track_register = instr.gpr8.Value(); + result.entries = instr.alu.GetSignedImm20_20(); + pos--; + found_track = true; + break; + } + } + pos--; + } + + if (!found_track) { + return {}; + } + return {result}; +} + std::pair ParseCode(CFGRebuildState& state, u32 address) { u32 offset = static_cast(address); const u32 end_address = static_cast(state.program_size / sizeof(Instruction)); @@ -298,6 +403,14 @@ std::pair ParseCode(CFGRebuildState& state, u32 address) break; } case OpCode::Id::BRX: { + auto tmp = TrackBranchIndirectInfo(state, address, offset); + if (tmp) { + auto result = *tmp; + LOG_CRITICAL(HW_GPU, "Track Successful, BRX: buffer:{}, offset:{}, entries:{}", + result.buffer, result.offset, result.entries); + } else { + LOG_CRITICAL(HW_GPU, "Track Unsuccesful"); + } return {ParseResult::AbnormalFlow, parse_info}; } default: From 1a58f45d76fe7756dd365e099d1536da769c1eab Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 23 Sep 2019 14:02:02 -0400 Subject: [PATCH 02/13] VideoCore: Unify const buffer accessing along engines and provide ConstBufferLocker class to shaders. --- CMakeModules/GenerateSCMRev.cmake | 6 +- src/common/CMakeLists.txt | 6 +- src/common/hash.h | 11 +++ src/video_core/CMakeLists.txt | 7 +- .../engines/const_buffer_engine_interface.h | 26 +++++++ src/video_core/engines/kepler_compute.cpp | 3 +- src/video_core/engines/kepler_compute.h | 5 +- src/video_core/engines/maxwell_3d.cpp | 3 +- src/video_core/engines/maxwell_3d.h | 5 +- .../renderer_opengl/gl_rasterizer.cpp | 7 +- src/video_core/shader/const_buffer_locker.cpp | 72 +++++++++++++++++++ src/video_core/shader/const_buffer_locker.h | 50 +++++++++++++ src/video_core/shader/shader_ir.h | 1 + 13 files changed, 187 insertions(+), 15 deletions(-) create mode 100644 src/video_core/engines/const_buffer_engine_interface.h create mode 100644 src/video_core/shader/const_buffer_locker.cpp create mode 100644 src/video_core/shader/const_buffer_locker.h diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake index 09eabe2c73..21e03ae98f 100644 --- a/CMakeModules/GenerateSCMRev.cmake +++ b/CMakeModules/GenerateSCMRev.cmake @@ -85,10 +85,12 @@ set(HASH_FILES "${VIDEO_CORE}/shader/decode/xmad.cpp" "${VIDEO_CORE}/shader/ast.cpp" "${VIDEO_CORE}/shader/ast.h" - "${VIDEO_CORE}/shader/control_flow.cpp" - "${VIDEO_CORE}/shader/control_flow.h" "${VIDEO_CORE}/shader/compiler_settings.cpp" "${VIDEO_CORE}/shader/compiler_settings.h" + "${VIDEO_CORE}/shader/const_buffer_locker.cpp" + "${VIDEO_CORE}/shader/const_buffer_locker.h" + "${VIDEO_CORE}/shader/control_flow.cpp" + "${VIDEO_CORE}/shader/control_flow.h" "${VIDEO_CORE}/shader/decode.cpp" "${VIDEO_CORE}/shader/expr.cpp" "${VIDEO_CORE}/shader/expr.h" diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 5b51fcafa9..9c6f1c07c2 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -74,10 +74,12 @@ add_custom_command(OUTPUT scm_rev.cpp "${VIDEO_CORE}/shader/decode/xmad.cpp" "${VIDEO_CORE}/shader/ast.cpp" "${VIDEO_CORE}/shader/ast.h" - "${VIDEO_CORE}/shader/control_flow.cpp" - "${VIDEO_CORE}/shader/control_flow.h" "${VIDEO_CORE}/shader/compiler_settings.cpp" "${VIDEO_CORE}/shader/compiler_settings.h" + "${VIDEO_CORE}/shader/const_buffer_locker.cpp" + "${VIDEO_CORE}/shader/const_buffer_locker.h" + "${VIDEO_CORE}/shader/control_flow.cpp" + "${VIDEO_CORE}/shader/control_flow.h" "${VIDEO_CORE}/shader/decode.cpp" "${VIDEO_CORE}/shader/expr.cpp" "${VIDEO_CORE}/shader/expr.h" diff --git a/src/common/hash.h b/src/common/hash.h index 40194d1ee4..c939709bc9 100644 --- a/src/common/hash.h +++ b/src/common/hash.h @@ -6,6 +6,8 @@ #include #include +#include +#include #include "common/cityhash.h" #include "common/common_types.h" @@ -68,4 +70,13 @@ struct HashableStruct { } }; +struct PairHash { + template + std::size_t operator()(const std::pair& pair) const { + std::size_t seed = std::hash()(pair.first); + boost::hash_combine(seed, std::hash()(pair.second)); + return seed; + } +}; + } // namespace Common diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index eaa694ff8d..cb6eda1b86 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -6,6 +6,7 @@ add_library(video_core STATIC dma_pusher.h debug_utils/debug_utils.cpp debug_utils/debug_utils.h + engines/const_buffer_engine_interface.h engines/const_buffer_info.h engines/engine_upload.cpp engines/engine_upload.h @@ -107,10 +108,12 @@ add_library(video_core STATIC shader/decode/other.cpp shader/ast.cpp shader/ast.h - shader/control_flow.cpp - shader/control_flow.h shader/compiler_settings.cpp shader/compiler_settings.h + shader/const_buffer_locker.cpp + shader/const_buffer_locker.h + shader/control_flow.cpp + shader/control_flow.h shader/decode.cpp shader/expr.cpp shader/expr.h diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h new file mode 100644 index 0000000000..cc41a9cacb --- /dev/null +++ b/src/video_core/engines/const_buffer_engine_interface.h @@ -0,0 +1,26 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +namespace Tegra::Engines { + +enum class ShaderType : u32 { + Vertex = 0, + TesselationControl = 1, + TesselationEval = 2, + Geometry = 3, + Fragment = 4, + Compute = 5, +}; + +class ConstBufferEngineInterface { +public: + virtual ~ConstBufferEngineInterface() {} + virtual u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const = 0; +}; + +} diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 63d4491352..ba97c28949 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -70,7 +70,8 @@ Texture::FullTextureInfo KeplerCompute::GetTextureInfo(const Texture::TextureHan GetTSCEntry(tex_handle.tsc_id)}; } -u32 KeplerCompute::AccessConstBuffer32(u64 const_buffer, u64 offset) const { +u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { + ASSERT(stage == ShaderType::Compute); const auto& buffer = launch_description.const_buffer_config[const_buffer]; u32 result; std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32)); diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 90cf650d2f..d7e0dfcd6f 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -11,6 +11,7 @@ #include "common/common_funcs.h" #include "common/common_types.h" #include "video_core/engines/engine_upload.h" +#include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/gpu.h" #include "video_core/textures/texture.h" @@ -37,7 +38,7 @@ namespace Tegra::Engines { #define KEPLER_COMPUTE_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) -class KeplerCompute final { +class KeplerCompute final : public ConstBufferEngineInterface { public: explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager); @@ -201,7 +202,7 @@ public: Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle, std::size_t offset) const; - u32 AccessConstBuffer32(u64 const_buffer, u64 offset) const; + u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; private: Core::System& system; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 59976943a2..92e38b0713 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -847,7 +847,8 @@ void Maxwell3D::ProcessClearBuffers() { rasterizer.Clear(); } -u32 Maxwell3D::AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const { +u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { + ASSERT(stage != ShaderType::Compute); const auto& shader_stage = state.shader_stages[static_cast(stage)]; const auto& buffer = shader_stage.const_buffers[const_buffer]; u32 result; diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index e3f1047d5c..04d02d2086 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -16,6 +16,7 @@ #include "common/common_types.h" #include "common/math_util.h" #include "video_core/engines/const_buffer_info.h" +#include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/engines/engine_upload.h" #include "video_core/gpu.h" #include "video_core/macro_interpreter.h" @@ -44,7 +45,7 @@ namespace Tegra::Engines { #define MAXWELL3D_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) -class Maxwell3D final { +class Maxwell3D final : public ConstBufferEngineInterface { public: explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager); @@ -1257,7 +1258,7 @@ public: /// Returns the texture information for a specific texture in a specific shader stage. Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const; - u32 AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const; + u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than /// we've seen used. diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index cbcf81414b..10114909b1 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -975,7 +975,8 @@ TextureBufferUsage RasterizerOpenGL::SetupDrawTextures(Maxwell::ShaderStage stag } const auto cbuf = entry.GetBindlessCBuf(); Tegra::Texture::TextureHandle tex_handle; - tex_handle.raw = maxwell3d.AccessConstBuffer32(stage, cbuf.first, cbuf.second); + Tegra::Engines::ShaderType shader_type = static_cast(stage); + tex_handle.raw = maxwell3d.AccessConstBuffer32(shader_type, cbuf.first, cbuf.second); return maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset()); }(); @@ -1005,7 +1006,7 @@ TextureBufferUsage RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) } const auto cbuf = entry.GetBindlessCBuf(); Tegra::Texture::TextureHandle tex_handle; - tex_handle.raw = compute.AccessConstBuffer32(cbuf.first, cbuf.second); + tex_handle.raw = compute.AccessConstBuffer32(Tegra::Engines::ShaderType::Compute, cbuf.first, cbuf.second); return compute.GetTextureInfo(tex_handle, entry.GetOffset()); }(); @@ -1050,7 +1051,7 @@ void RasterizerOpenGL::SetupComputeImages(const Shader& shader) { } const auto cbuf = entry.GetBindlessCBuf(); Tegra::Texture::TextureHandle tex_handle; - tex_handle.raw = compute.AccessConstBuffer32(cbuf.first, cbuf.second); + tex_handle.raw = compute.AccessConstBuffer32(Tegra::Engines::ShaderType::Compute, cbuf.first, cbuf.second); return compute.GetTextureInfo(tex_handle, entry.GetOffset()).tic; }(); SetupImage(bindpoint, tic, entry); diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp new file mode 100644 index 0000000000..6a9e0ed5e3 --- /dev/null +++ b/src/video_core/shader/const_buffer_locker.cpp @@ -0,0 +1,72 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/shader/const_buffer_locker.h" + +namespace VideoCommon::Shader { + +ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage) + : engine{nullptr}, shader_stage{shader_stage} {} + +ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, + Tegra::Engines::ConstBufferEngineInterface* engine) + : engine{engine}, shader_stage{shader_stage} {} + +bool ConstBufferLocker::IsEngineSet() const { + return engine != nullptr; +} + +void ConstBufferLocker::SetEngine(Tegra::Engines::ConstBufferEngineInterface* engine_) { + engine = engine_; +} + +std::optional ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) { + const std::pair key = {buffer, offset}; + const auto iter = keys.find(key); + if (iter != keys.end()) { + return {iter->second}; + } + if (!IsEngineSet()) { + return {}; + } + const u32 value = engine->AccessConstBuffer32(shader_stage, buffer, offset); + keys.emplace(key, value); + return {value}; +} + +void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) { + const std::pair key = {buffer, offset}; + keys[key] = value; +} + +u32 ConstBufferLocker::NumKeys() const { + return keys.size(); +} + +const std::unordered_map, u32, Common::PairHash>& +ConstBufferLocker::AccessKeys() const { + return keys; +} + +bool ConstBufferLocker::AreKeysConsistant() const { + if (!IsEngineSet()) { + return false; + } + for (const auto& key_val : keys) { + const std::pair key = key_val.first; + const u32 value = key_val.second; + const u32 other_value = engine->AccessConstBuffer32(shader_stage, key.first, key.second); + if (other_value != value) { + return false; + } + } + return true; +} + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h new file mode 100644 index 0000000000..39e62584da --- /dev/null +++ b/src/video_core/shader/const_buffer_locker.h @@ -0,0 +1,50 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include "common/common_types.h" +#include "common/hash.h" +#include "video_core/engines/const_buffer_engine_interface.h" + +namespace VideoCommon::Shader { + +class ConstBufferLocker { +public: + explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage); + + explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, + Tegra::Engines::ConstBufferEngineInterface* engine); + + // Checks if an engine is setup, it may be possible that during disk shader + // cache run, the engines have not been created yet. + bool IsEngineSet() const; + + // Use this to set/change the engine used for this shader. + void SetEngine(Tegra::Engines::ConstBufferEngineInterface* engine); + + // Retrieves a key from the locker, if it's registered, it will give the + // registered value, if not it will obtain it from maxwell3d and register it. + std::optional ObtainKey(u32 buffer, u32 offset); + + // Manually inserts a key. + void InsertKey(u32 buffer, u32 offset, u32 value); + + // Retrieves the number of keys registered. + u32 NumKeys() const; + + // Gives an accessor to the key's database. + const std::unordered_map, u32, Common::PairHash>& AccessKeys() const; + + // Checks keys against maxwell3d's current const buffers. Returns true if they + // are the same value, false otherwise; + bool AreKeysConsistant() const; + +private: + Tegra::Engines::ConstBufferEngineInterface* engine; + Tegra::Engines::ShaderType shader_stage; + std::unordered_map, u32, Common::PairHash> keys{}; +}; +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 91cd0a5349..68818643c1 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -17,6 +17,7 @@ #include "video_core/engines/shader_header.h" #include "video_core/shader/ast.h" #include "video_core/shader/compiler_settings.h" +#include "video_core/shader/const_buffer_locker.h" #include "video_core/shader/node.h" namespace VideoCommon::Shader { From acd64411342e70bd7e9f7156f62c3b1a609ac3c4 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 23 Sep 2019 15:40:58 -0400 Subject: [PATCH 03/13] Shader_Cache: setup connection of ConstBufferLocker --- .../renderer_opengl/gl_rasterizer.cpp | 6 ++- .../renderer_opengl/gl_shader_cache.cpp | 45 ++++++++++++------- .../renderer_opengl/gl_shader_cache.h | 1 + .../renderer_opengl/gl_shader_gen.cpp | 26 +++++++---- .../renderer_opengl/gl_shader_gen.h | 13 ++++-- src/video_core/shader/control_flow.cpp | 22 ++++++--- src/video_core/shader/control_flow.h | 3 +- src/video_core/shader/decode.cpp | 2 +- src/video_core/shader/shader_ir.cpp | 4 +- src/video_core/shader/shader_ir.h | 3 +- 10 files changed, 82 insertions(+), 43 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 10114909b1..9431d64aca 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -1006,7 +1006,8 @@ TextureBufferUsage RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) } const auto cbuf = entry.GetBindlessCBuf(); Tegra::Texture::TextureHandle tex_handle; - tex_handle.raw = compute.AccessConstBuffer32(Tegra::Engines::ShaderType::Compute, cbuf.first, cbuf.second); + tex_handle.raw = compute.AccessConstBuffer32(Tegra::Engines::ShaderType::Compute, + cbuf.first, cbuf.second); return compute.GetTextureInfo(tex_handle, entry.GetOffset()); }(); @@ -1051,7 +1052,8 @@ void RasterizerOpenGL::SetupComputeImages(const Shader& shader) { } const auto cbuf = entry.GetBindlessCBuf(); Tegra::Texture::TextureHandle tex_handle; - tex_handle.raw = compute.AccessConstBuffer32(Tegra::Engines::ShaderType::Compute, cbuf.first, cbuf.second); + tex_handle.raw = compute.AccessConstBuffer32(Tegra::Engines::ShaderType::Compute, + cbuf.first, cbuf.second); return compute.GetTextureInfo(tex_handle, entry.GetOffset()).tic; }(); SetupImage(bindpoint, tic, entry); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 42ca3b1bd2..9e27998769 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -10,6 +10,7 @@ #include "common/scope_exit.h" #include "core/core.h" #include "core/frontend/emu_window.h" +#include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_rasterizer.h" @@ -173,8 +174,9 @@ u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code, } /// Creates an unspecialized program from code streams -GLShader::ProgramResult CreateProgram(const Device& device, ProgramType program_type, - ProgramCode program_code, ProgramCode program_code_b) { +GLShader::ProgramResult CreateProgram(Core::System& system, const Device& device, + ProgramType program_type, ProgramCode program_code, + ProgramCode program_code_b) { GLShader::ShaderSetup setup(program_code); setup.program.size_a = CalculateProgramSize(program_code); setup.program.size_b = 0; @@ -190,14 +192,25 @@ GLShader::ProgramResult CreateProgram(const Device& device, ProgramType program_ switch (program_type) { case ProgramType::VertexA: - case ProgramType::VertexB: - return GLShader::GenerateVertexShader(device, setup); - case ProgramType::Geometry: - return GLShader::GenerateGeometryShader(device, setup); - case ProgramType::Fragment: - return GLShader::GenerateFragmentShader(device, setup); - case ProgramType::Compute: - return GLShader::GenerateComputeShader(device, setup); + case ProgramType::VertexB: { + VideoCommon::Shader::ConstBufferLocker locker{Tegra::Engines::ShaderType::Vertex, + &(system.GPU().Maxwell3D())}; + return GLShader::GenerateVertexShader(locker, device, setup); + } + case ProgramType::Geometry: { + VideoCommon::Shader::ConstBufferLocker locker{Tegra::Engines::ShaderType::Geometry, + &(system.GPU().Maxwell3D())}; + return GLShader::GenerateGeometryShader(locker, device, setup); + } + case ProgramType::Fragment: { + VideoCommon::Shader::ConstBufferLocker locker{Tegra::Engines::ShaderType::Fragment, + &(system.GPU().Maxwell3D())}; + return GLShader::GenerateFragmentShader(locker, device, setup); + } + case ProgramType::Compute: { + VideoCommon::Shader::ConstBufferLocker locker{Tegra::Engines::ShaderType::Compute, &(system.GPU().KeplerCompute())}; + return GLShader::GenerateComputeShader(locker, device, setup); + } default: UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast(program_type)); return {}; @@ -307,8 +320,8 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, ProgramCode&& program_code_b) { const auto code_size{CalculateProgramSize(program_code)}; const auto code_size_b{CalculateProgramSize(program_code_b)}; - auto result{ - CreateProgram(params.device, GetProgramType(program_type), program_code, program_code_b)}; + auto result{CreateProgram(params.system, params.device, GetProgramType(program_type), + program_code, program_code_b)}; if (result.first.empty()) { // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now return {}; @@ -331,7 +344,7 @@ Shader CachedShader::CreateStageFromCache(const ShaderParameters& params, } Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code) { - auto result{CreateProgram(params.device, ProgramType::Compute, code, {})}; + auto result{CreateProgram(params.system, params.device, ProgramType::Compute, code, {})}; const auto code_size{CalculateProgramSize(code)}; params.disk_cache.SaveRaw(ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute, @@ -566,7 +579,7 @@ std::unordered_map ShaderCacheOpenGL::GenerateUnspecia result = {stored_decompiled.code, stored_decompiled.entries}; } else { // Otherwise decompile the shader at boot and save the result to the decompiled file - result = CreateProgram(device, raw.GetProgramType(), raw.GetProgramCode(), + result = CreateProgram(system, device, raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB()); disk_cache.SaveDecompiled(unique_identifier, result.first, result.second); } @@ -612,7 +625,7 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { const auto unique_identifier = GetUniqueIdentifier(GetProgramType(program), program_code, program_code_b); const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; - const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, + const ShaderParameters params{disk_cache, precompiled_programs, system, device, cpu_addr, host_ptr, unique_identifier}; const auto found = precompiled_shaders.find(unique_identifier); @@ -639,7 +652,7 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})}; const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; - const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, + const ShaderParameters params{disk_cache, precompiled_programs, system, device, cpu_addr, host_ptr, unique_identifier}; const auto found = precompiled_shaders.find(unique_identifier); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index de195cc5de..6ff78f0053 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -45,6 +45,7 @@ using PrecompiledShaders = std::unordered_map; struct ShaderParameters { ShaderDiskCacheOpenGL& disk_cache; const PrecompiledPrograms& precompiled_programs; + Core::System& system; const Device& device; VAddr cpu_addr; u8* host_ptr; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index b5a43e79e7..817c6e12c7 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -21,7 +21,8 @@ static constexpr u32 COMPUTE_OFFSET = 0; static constexpr CompilerSettings settings{CompileDepth::NoFlowStack, true}; -ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) { +ProgramResult GenerateVertexShader(ConstBufferLocker& locker, const Device& device, + const ShaderSetup& setup) { const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); std::string out = "// Shader Unique Id: VS" + id + "\n\n"; @@ -35,14 +36,15 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { )"; - const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings); + const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings, + locker); const auto stage = setup.IsDualProgram() ? ProgramType::VertexA : ProgramType::VertexB; ProgramResult program = Decompile(device, program_ir, stage, "vertex"); out += program.first; if (setup.IsDualProgram()) { const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b, - settings); + settings, locker); ProgramResult program_b = Decompile(device, program_ir_b, ProgramType::VertexB, "vertex_b"); out += program_b.first; } @@ -71,7 +73,8 @@ void main() { return {std::move(out), std::move(program.second)}; } -ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup) { +ProgramResult GenerateGeometryShader(ConstBufferLocker& locker, const Device& device, + const ShaderSetup& setup) { const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); std::string out = "// Shader Unique Id: GS" + id + "\n\n"; @@ -85,7 +88,8 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { )"; - const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings); + const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings, + locker); ProgramResult program = Decompile(device, program_ir, ProgramType::Geometry, "geometry"); out += program.first; @@ -97,7 +101,8 @@ void main() { return {std::move(out), std::move(program.second)}; } -ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup) { +ProgramResult GenerateFragmentShader(ConstBufferLocker& locker, const Device& device, + const ShaderSetup& setup) { const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); std::string out = "// Shader Unique Id: FS" + id + "\n\n"; @@ -120,7 +125,8 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { )"; - const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings); + const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings, + locker); ProgramResult program = Decompile(device, program_ir, ProgramType::Fragment, "fragment"); out += program.first; @@ -133,13 +139,15 @@ void main() { return {std::move(out), std::move(program.second)}; } -ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup) { +ProgramResult GenerateComputeShader(ConstBufferLocker& locker, const Device& device, + const ShaderSetup& setup) { const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); std::string out = "// Shader Unique Id: CS" + id + "\n\n"; out += GetCommonDeclarations(); - const ShaderIR program_ir(setup.program.code, COMPUTE_OFFSET, setup.program.size_a, settings); + const ShaderIR program_ir(setup.program.code, COMPUTE_OFFSET, setup.program.size_a, settings, + locker); ProgramResult program = Decompile(device, program_ir, ProgramType::Compute, "compute"); out += program.first; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index 3833e88ab8..05f157298d 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -16,6 +16,7 @@ class Device; namespace OpenGL::GLShader { +using VideoCommon::Shader::ConstBufferLocker; using VideoCommon::Shader::ProgramCode; struct ShaderSetup { @@ -46,15 +47,19 @@ private: }; /// Generates the GLSL vertex shader program source code for the given VS program -ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup); +ProgramResult GenerateVertexShader(ConstBufferLocker& locker, const Device& device, + const ShaderSetup& setup); /// Generates the GLSL geometry shader program source code for the given GS program -ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup); +ProgramResult GenerateGeometryShader(ConstBufferLocker& locker, const Device& device, + const ShaderSetup& setup); /// Generates the GLSL fragment shader program source code for the given FS program -ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup); +ProgramResult GenerateFragmentShader(ConstBufferLocker& locker, const Device& device, + const ShaderSetup& setup); /// Generates the GLSL compute shader program source code for the given CS program -ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup); +ProgramResult GenerateComputeShader(ConstBufferLocker& locker, const Device& device, + const ShaderSetup& setup); } // namespace OpenGL::GLShader diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 70f758642a..dac2e42728 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -57,8 +57,8 @@ struct BlockInfo { struct CFGRebuildState { explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size, - const u32 start) - : start{start}, program_code{program_code}, program_size{program_size} {} + const u32 start, ConstBufferLocker& locker) + : start{start}, program_code{program_code}, program_size{program_size}, locker{locker} {} u32 start{}; std::vector block_info{}; @@ -72,6 +72,7 @@ struct CFGRebuildState { const ProgramCode& program_code; const std::size_t program_size; ASTManager* manager; + ConstBufferLocker& locker; }; enum class BlockCollision : u32 { None, Found, Inside }; @@ -214,7 +215,7 @@ std::optional TrackBranchIndirectInfo(const CFGRebuildState& if (opcode->get().GetId() == OpCode::Id::IMNMX_IMM) { if (instr.gpr0.Value() == track_register) { track_register = instr.gpr8.Value(); - result.entries = instr.alu.GetSignedImm20_20(); + result.entries = instr.alu.GetSignedImm20_20() + 1; pos--; found_track = true; break; @@ -406,8 +407,14 @@ std::pair ParseCode(CFGRebuildState& state, u32 address) auto tmp = TrackBranchIndirectInfo(state, address, offset); if (tmp) { auto result = *tmp; - LOG_CRITICAL(HW_GPU, "Track Successful, BRX: buffer:{}, offset:{}, entries:{}", - result.buffer, result.offset, result.entries); + std::string entries{}; + for (u32 i = 0; i < result.entries; i++) { + auto k = locker.ObtainKey(result.buffer, result.offset + i * 4); + entries = entries + std::to_string(*k) + '\n'; + } + LOG_CRITICAL(HW_GPU, + "Track Successful, BRX: buffer:{}, offset:{}, entries:{}, inner:\n{}", + result.buffer, result.offset, result.entries, entries); } else { LOG_CRITICAL(HW_GPU, "Track Unsuccesful"); } @@ -588,14 +595,15 @@ void DecompileShader(CFGRebuildState& state) { std::unique_ptr ScanFlow(const ProgramCode& program_code, std::size_t program_size, u32 start_address, - const CompilerSettings& settings) { + const CompilerSettings& settings, + ConstBufferLocker& locker) { auto result_out = std::make_unique(); if (settings.depth == CompileDepth::BruteForce) { result_out->settings.depth = CompileDepth::BruteForce; return result_out; } - CFGRebuildState state{program_code, program_size, start_address}; + CFGRebuildState state{program_code, program_size, start_address, locker}; // Inspect Code and generate blocks state.labels.clear(); state.labels.emplace(start_address); diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index 37e987d629..6d0e50d7c8 100644 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h @@ -78,6 +78,7 @@ struct ShaderCharacteristics { std::unique_ptr ScanFlow(const ProgramCode& program_code, std::size_t program_size, u32 start_address, - const CompilerSettings& settings); + const CompilerSettings& settings, + ConstBufferLocker& locker); } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 2626b16160..3f87b87ca2 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -102,7 +102,7 @@ void ShaderIR::Decode() { std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); decompiled = false; - auto info = ScanFlow(program_code, program_size, main_offset, settings); + auto info = ScanFlow(program_code, program_size, main_offset, settings, locker); auto& shader_info = *info; coverage_begin = shader_info.start; coverage_end = shader_info.end; diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index c1f2b88c80..6430575ecd 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -23,9 +23,9 @@ using Tegra::Shader::PredOperation; using Tegra::Shader::Register; ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, const std::size_t size, - CompilerSettings settings) + CompilerSettings settings, ConstBufferLocker& locker) : program_code{program_code}, main_offset{main_offset}, program_size{size}, basic_blocks{}, - program_manager{true, true}, settings{settings} { + program_manager{true, true}, settings{settings}, locker{locker} { Decode(); } diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 68818643c1..e3b568d3e9 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -68,7 +68,7 @@ struct GlobalMemoryUsage { class ShaderIR final { public: explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, std::size_t size, - CompilerSettings settings); + CompilerSettings settings, ConstBufferLocker& locker); ~ShaderIR(); const std::map& GetBasicBlocks() const { @@ -389,6 +389,7 @@ private: NodeBlock global_code; ASTManager program_manager; CompilerSettings settings{}; + ConstBufferLocker& locker; std::set used_registers; std::set used_predicates; From 8909f52166bf9c27d52b5a722efbd46d1a11e876 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 23 Sep 2019 22:55:25 -0400 Subject: [PATCH 04/13] Shader_IR: Implement Fast BRX and allow multi-branches in the CFG. --- .../renderer_opengl/gl_shader_decompiler.cpp | 5 + .../renderer_vulkan/vk_shader_decompiler.cpp | 7 + src/video_core/shader/ast.cpp | 4 + src/video_core/shader/control_flow.cpp | 262 +++++++++++------- src/video_core/shader/control_flow.h | 63 +++-- src/video_core/shader/decode.cpp | 34 ++- src/video_core/shader/expr.h | 17 +- 7 files changed, 260 insertions(+), 132 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index baec66ff0d..71d7389cb9 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -2338,6 +2338,11 @@ public: inner += expr.value ? "true" : "false"; } + void operator()(VideoCommon::Shader::ExprGprEqual& expr) { + inner += + "( ftou(" + decomp.GetRegister(expr.gpr) + ") == " + std::to_string(expr.value) + ')'; + } + const std::string& GetResult() const { return inner; } diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 0d943a826b..42cf068b6a 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -1704,6 +1704,13 @@ public: return expr.value ? decomp.v_true : decomp.v_false; } + Id operator()(const ExprGprEqual& expr) { + const Id target = decomp.Constant(decomp.t_uint, expr.value); + const Id gpr = decomp.BitcastTo( + decomp.Emit(decomp.OpLoad(decomp.t_float, decomp.registers.at(expr.gpr)))); + return decomp.Emit(decomp.OpLogicalEqual(decomp.t_uint, gpr, target)); + } + Id Visit(const Expr& node) { return std::visit(*this, *node); } diff --git a/src/video_core/shader/ast.cpp b/src/video_core/shader/ast.cpp index e43aecc18d..2fa3a3f7dc 100644 --- a/src/video_core/shader/ast.cpp +++ b/src/video_core/shader/ast.cpp @@ -228,6 +228,10 @@ public: inner += expr.value ? "true" : "false"; } + void operator()(ExprGprEqual const& expr) { + inner += "( gpr_" + std::to_string(expr.gpr) + " == " + std::to_string(expr.value) + ')'; + } + const std::string& GetResult() const { return inner; } diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index dac2e42728..d1c269ea72 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -35,14 +35,24 @@ struct BlockStack { std::stack pbk_stack{}; }; -struct BlockBranchInfo { - Condition condition{}; - s32 address{exit_branch}; - bool kill{}; - bool is_sync{}; - bool is_brk{}; - bool ignore{}; -}; +template +BlockBranchInfo MakeBranchInfo(Args&&... args) { + static_assert(std::is_convertible_v); + return std::make_shared(T(std::forward(args)...)); +} + +bool BlockBranchInfoAreEqual(BlockBranchInfo first, BlockBranchInfo second) { + return false; //(*first) == (*second); +} + +bool BlockBranchIsIgnored(BlockBranchInfo first) { + bool ignore = false; + if (std::holds_alternative(*first)) { + auto branch = std::get_if(first.get()); + ignore = branch->ignore; + } + return ignore; +} struct BlockInfo { u32 start{}; @@ -234,6 +244,7 @@ std::pair ParseCode(CFGRebuildState& state, u32 address) u32 offset = static_cast(address); const u32 end_address = static_cast(state.program_size / sizeof(Instruction)); ParseInfo parse_info{}; + SingleBranch single_branch{}; const auto insert_label = [](CFGRebuildState& state, u32 address) { const auto pair = state.labels.emplace(address); @@ -246,13 +257,14 @@ std::pair ParseCode(CFGRebuildState& state, u32 address) if (offset >= end_address) { // ASSERT_OR_EXECUTE can't be used, as it ignores the break ASSERT_MSG(false, "Shader passed the current limit!"); - parse_info.branch_info.address = exit_branch; - parse_info.branch_info.ignore = false; + + single_branch.address = exit_branch; + single_branch.ignore = false; break; } if (state.registered.count(offset) != 0) { - parse_info.branch_info.address = offset; - parse_info.branch_info.ignore = true; + single_branch.address = offset; + single_branch.ignore = true; break; } if (IsSchedInstruction(offset, state.start)) { @@ -269,24 +281,26 @@ std::pair ParseCode(CFGRebuildState& state, u32 address) switch (opcode->get().GetId()) { case OpCode::Id::EXIT: { const auto pred_index = static_cast(instr.pred.pred_index); - parse_info.branch_info.condition.predicate = - GetPredicate(pred_index, instr.negate_pred != 0); - if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { + single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); + if (single_branch.condition.predicate == Pred::NeverExecute) { offset++; continue; } const ConditionCode cc = instr.flow_condition_code; - parse_info.branch_info.condition.cc = cc; + single_branch.condition.cc = cc; if (cc == ConditionCode::F) { offset++; continue; } - parse_info.branch_info.address = exit_branch; - parse_info.branch_info.kill = false; - parse_info.branch_info.is_sync = false; - parse_info.branch_info.is_brk = false; - parse_info.branch_info.ignore = false; + single_branch.address = exit_branch; + single_branch.kill = false; + single_branch.is_sync = false; + single_branch.is_brk = false; + single_branch.ignore = false; parse_info.end_address = offset; + parse_info.branch_info = MakeBranchInfo( + single_branch.condition, single_branch.address, single_branch.kill, + single_branch.is_sync, single_branch.is_brk, single_branch.ignore); return {ParseResult::ControlCaught, parse_info}; } @@ -295,99 +309,107 @@ std::pair ParseCode(CFGRebuildState& state, u32 address) return {ParseResult::AbnormalFlow, parse_info}; } const auto pred_index = static_cast(instr.pred.pred_index); - parse_info.branch_info.condition.predicate = - GetPredicate(pred_index, instr.negate_pred != 0); - if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { + single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); + if (single_branch.condition.predicate == Pred::NeverExecute) { offset++; continue; } const ConditionCode cc = instr.flow_condition_code; - parse_info.branch_info.condition.cc = cc; + single_branch.condition.cc = cc; if (cc == ConditionCode::F) { offset++; continue; } const u32 branch_offset = offset + instr.bra.GetBranchTarget(); if (branch_offset == 0) { - parse_info.branch_info.address = exit_branch; + single_branch.address = exit_branch; } else { - parse_info.branch_info.address = branch_offset; + single_branch.address = branch_offset; } insert_label(state, branch_offset); - parse_info.branch_info.kill = false; - parse_info.branch_info.is_sync = false; - parse_info.branch_info.is_brk = false; - parse_info.branch_info.ignore = false; + single_branch.kill = false; + single_branch.is_sync = false; + single_branch.is_brk = false; + single_branch.ignore = false; parse_info.end_address = offset; + parse_info.branch_info = MakeBranchInfo( + single_branch.condition, single_branch.address, single_branch.kill, + single_branch.is_sync, single_branch.is_brk, single_branch.ignore); return {ParseResult::ControlCaught, parse_info}; } case OpCode::Id::SYNC: { const auto pred_index = static_cast(instr.pred.pred_index); - parse_info.branch_info.condition.predicate = - GetPredicate(pred_index, instr.negate_pred != 0); - if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { + single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); + if (single_branch.condition.predicate == Pred::NeverExecute) { offset++; continue; } const ConditionCode cc = instr.flow_condition_code; - parse_info.branch_info.condition.cc = cc; + single_branch.condition.cc = cc; if (cc == ConditionCode::F) { offset++; continue; } - parse_info.branch_info.address = unassigned_branch; - parse_info.branch_info.kill = false; - parse_info.branch_info.is_sync = true; - parse_info.branch_info.is_brk = false; - parse_info.branch_info.ignore = false; + single_branch.address = unassigned_branch; + single_branch.kill = false; + single_branch.is_sync = true; + single_branch.is_brk = false; + single_branch.ignore = false; parse_info.end_address = offset; + parse_info.branch_info = MakeBranchInfo( + single_branch.condition, single_branch.address, single_branch.kill, + single_branch.is_sync, single_branch.is_brk, single_branch.ignore); return {ParseResult::ControlCaught, parse_info}; } case OpCode::Id::BRK: { const auto pred_index = static_cast(instr.pred.pred_index); - parse_info.branch_info.condition.predicate = - GetPredicate(pred_index, instr.negate_pred != 0); - if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { + single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); + if (single_branch.condition.predicate == Pred::NeverExecute) { offset++; continue; } const ConditionCode cc = instr.flow_condition_code; - parse_info.branch_info.condition.cc = cc; + single_branch.condition.cc = cc; if (cc == ConditionCode::F) { offset++; continue; } - parse_info.branch_info.address = unassigned_branch; - parse_info.branch_info.kill = false; - parse_info.branch_info.is_sync = false; - parse_info.branch_info.is_brk = true; - parse_info.branch_info.ignore = false; + single_branch.address = unassigned_branch; + single_branch.kill = false; + single_branch.is_sync = false; + single_branch.is_brk = true; + single_branch.ignore = false; parse_info.end_address = offset; + parse_info.branch_info = MakeBranchInfo( + single_branch.condition, single_branch.address, single_branch.kill, + single_branch.is_sync, single_branch.is_brk, single_branch.ignore); return {ParseResult::ControlCaught, parse_info}; } case OpCode::Id::KIL: { const auto pred_index = static_cast(instr.pred.pred_index); - parse_info.branch_info.condition.predicate = - GetPredicate(pred_index, instr.negate_pred != 0); - if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { + single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); + if (single_branch.condition.predicate == Pred::NeverExecute) { offset++; continue; } const ConditionCode cc = instr.flow_condition_code; - parse_info.branch_info.condition.cc = cc; + single_branch.condition.cc = cc; if (cc == ConditionCode::F) { offset++; continue; } - parse_info.branch_info.address = exit_branch; - parse_info.branch_info.kill = true; - parse_info.branch_info.is_sync = false; - parse_info.branch_info.is_brk = false; - parse_info.branch_info.ignore = false; + single_branch.address = exit_branch; + single_branch.kill = true; + single_branch.is_sync = false; + single_branch.is_brk = false; + single_branch.ignore = false; parse_info.end_address = offset; + parse_info.branch_info = MakeBranchInfo( + single_branch.condition, single_branch.address, single_branch.kill, + single_branch.is_sync, single_branch.is_brk, single_branch.ignore); return {ParseResult::ControlCaught, parse_info}; } @@ -407,16 +429,25 @@ std::pair ParseCode(CFGRebuildState& state, u32 address) auto tmp = TrackBranchIndirectInfo(state, address, offset); if (tmp) { auto result = *tmp; - std::string entries{}; + std::vector branches{}; + s32 pc_target = offset + result.relative_position; for (u32 i = 0; i < result.entries; i++) { - auto k = locker.ObtainKey(result.buffer, result.offset + i * 4); - entries = entries + std::to_string(*k) + '\n'; + auto k = state.locker.ObtainKey(result.buffer, result.offset + i * 4); + if (!k) { + return {ParseResult::AbnormalFlow, parse_info}; + } + u32 value = *k; + u32 target = static_cast((value >> 3) + pc_target); + insert_label(state, target); + branches.emplace_back(value, target); } - LOG_CRITICAL(HW_GPU, - "Track Successful, BRX: buffer:{}, offset:{}, entries:{}, inner:\n{}", - result.buffer, result.offset, result.entries, entries); + parse_info.end_address = offset; + parse_info.branch_info = + MakeBranchInfo(static_cast(instr.gpr8.Value()), branches); + + return {ParseResult::ControlCaught, parse_info}; } else { - LOG_CRITICAL(HW_GPU, "Track Unsuccesful"); + LOG_WARNING(HW_GPU, "BRX Track Unsuccesful"); } return {ParseResult::AbnormalFlow, parse_info}; } @@ -426,10 +457,13 @@ std::pair ParseCode(CFGRebuildState& state, u32 address) offset++; } - parse_info.branch_info.kill = false; - parse_info.branch_info.is_sync = false; - parse_info.branch_info.is_brk = false; + single_branch.kill = false; + single_branch.is_sync = false; + single_branch.is_brk = false; parse_info.end_address = offset - 1; + parse_info.branch_info = MakeBranchInfo( + single_branch.condition, single_branch.address, single_branch.kill, single_branch.is_sync, + single_branch.is_brk, single_branch.ignore); return {ParseResult::BlockEnd, parse_info}; } @@ -453,9 +487,10 @@ bool TryInspectAddress(CFGRebuildState& state) { BlockInfo& current_block = state.block_info[block_index]; current_block.end = address - 1; new_block.branch = current_block.branch; - BlockBranchInfo forward_branch{}; - forward_branch.address = address; - forward_branch.ignore = true; + BlockBranchInfo forward_branch = MakeBranchInfo(); + auto branch = std::get_if(forward_branch.get()); + branch->address = address; + branch->ignore = true; current_block.branch = forward_branch; return true; } @@ -470,12 +505,15 @@ bool TryInspectAddress(CFGRebuildState& state) { BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address); block_info.branch = parse_info.branch_info; - if (parse_info.branch_info.condition.IsUnconditional()) { + if (std::holds_alternative(*block_info.branch)) { + auto branch = std::get_if(block_info.branch.get()); + if (branch->condition.IsUnconditional()) { + return true; + } + const u32 fallthrough_address = parse_info.end_address + 1; + state.inspect_queries.push_front(fallthrough_address); return true; } - - const u32 fallthrough_address = parse_info.end_address + 1; - state.inspect_queries.push_front(fallthrough_address); return true; } @@ -513,31 +551,41 @@ bool TryQuery(CFGRebuildState& state) { state.queries.pop_front(); gather_labels(q2.ssy_stack, state.ssy_labels, block); gather_labels(q2.pbk_stack, state.pbk_labels, block); - if (!block.branch.condition.IsUnconditional()) { - q2.address = block.end + 1; - state.queries.push_back(q2); - } + if (std::holds_alternative(*block.branch)) { + auto branch = std::get_if(block.branch.get()); + if (!branch->condition.IsUnconditional()) { + q2.address = block.end + 1; + state.queries.push_back(q2); + } - Query conditional_query{q2}; - if (block.branch.is_sync) { - if (block.branch.address == unassigned_branch) { - block.branch.address = conditional_query.ssy_stack.top(); + Query conditional_query{q2}; + if (branch->is_sync) { + if (branch->address == unassigned_branch) { + branch->address = conditional_query.ssy_stack.top(); + } + conditional_query.ssy_stack.pop(); } - conditional_query.ssy_stack.pop(); - } - if (block.branch.is_brk) { - if (block.branch.address == unassigned_branch) { - block.branch.address = conditional_query.pbk_stack.top(); + if (branch->is_brk) { + if (branch->address == unassigned_branch) { + branch->address = conditional_query.pbk_stack.top(); + } + conditional_query.pbk_stack.pop(); } - conditional_query.pbk_stack.pop(); + conditional_query.address = branch->address; + state.queries.push_back(std::move(conditional_query)); + return true; + } + auto multi_branch = std::get_if(block.branch.get()); + for (auto& branch_case : multi_branch->branches) { + Query conditional_query{q2}; + conditional_query.address = branch_case.address; + state.queries.push_back(std::move(conditional_query)); } - conditional_query.address = block.branch.address; - state.queries.push_back(std::move(conditional_query)); return true; } } // Anonymous namespace -void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch) { +void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) { const auto get_expr = ([&](const Condition& cond) -> Expr { Expr result{}; if (cond.cc != ConditionCode::T) { @@ -564,15 +612,24 @@ void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch) { } return MakeExpr(true); }); - if (branch.address < 0) { - if (branch.kill) { - mm.InsertReturn(get_expr(branch.condition), true); + if (std::holds_alternative(*branch_info)) { + auto branch = std::get_if(branch_info.get()); + if (branch->address < 0) { + if (branch->kill) { + mm.InsertReturn(get_expr(branch->condition), true); + return; + } + mm.InsertReturn(get_expr(branch->condition), false); return; } - mm.InsertReturn(get_expr(branch.condition), false); + mm.InsertGoto(get_expr(branch->condition), branch->address); return; } - mm.InsertGoto(get_expr(branch.condition), branch.address); + auto multi_branch = std::get_if(branch_info.get()); + for (auto& branch_case : multi_branch->branches) { + mm.InsertGoto(MakeExpr(multi_branch->gpr, branch_case.cmp_value), + branch_case.address); + } } void DecompileShader(CFGRebuildState& state) { @@ -584,9 +641,10 @@ void DecompileShader(CFGRebuildState& state) { if (state.labels.count(block.start) != 0) { state.manager->InsertLabel(block.start); } - u32 end = block.branch.ignore ? block.end + 1 : block.end; + const bool ignore = BlockBranchIsIgnored(block.branch); + u32 end = ignore ? block.end + 1 : block.end; state.manager->InsertBlock(block.start, end); - if (!block.branch.ignore) { + if (!ignore) { InsertBranch(*state.manager, block.branch); } } @@ -668,11 +726,9 @@ std::unique_ptr ScanFlow(const ProgramCode& program_code, ShaderBlock new_block{}; new_block.start = block.start; new_block.end = block.end; - new_block.ignore_branch = block.branch.ignore; + new_block.ignore_branch = BlockBranchIsIgnored(block.branch); if (!new_block.ignore_branch) { - new_block.branch.cond = block.branch.condition; - new_block.branch.kills = block.branch.kill; - new_block.branch.address = block.branch.address; + new_block.branch = block.branch; } result_out->end = std::max(result_out->end, block.end); result_out->blocks.push_back(new_block); diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index 6d0e50d7c8..369ca255b9 100644 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h @@ -7,6 +7,7 @@ #include #include #include +#include #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/ast.h" @@ -37,29 +38,57 @@ struct Condition { } }; +class SingleBranch { +public: + SingleBranch() = default; + SingleBranch(Condition condition, s32 address, bool kill, bool is_sync, bool is_brk, + bool ignore) + : condition{condition}, address{address}, kill{kill}, is_sync{is_sync}, is_brk{is_brk}, + ignore{ignore} {} + + bool operator==(const SingleBranch& b) const { + return std::tie(condition, address, kill, is_sync, is_brk, ignore) == + std::tie(b.condition, b.address, b.kill, b.is_sync, b.is_brk, b.ignore); + } + + Condition condition{}; + s32 address{exit_branch}; + bool kill{}; + bool is_sync{}; + bool is_brk{}; + bool ignore{}; +}; + +struct CaseBranch { + CaseBranch(u32 cmp_value, u32 address) : cmp_value{cmp_value}, address{address} {} + u32 cmp_value; + u32 address; +}; + +class MultiBranch { +public: + MultiBranch(u32 gpr, std::vector& branches) + : gpr{gpr}, branches{std::move(branches)} {} + + u32 gpr{}; + std::vector branches{}; +}; + +using BranchData = std::variant; +using BlockBranchInfo = std::shared_ptr; + +bool BlockBranchInfoAreEqual(BlockBranchInfo first, BlockBranchInfo second); + struct ShaderBlock { - struct Branch { - Condition cond{}; - bool kills{}; - s32 address{}; - - bool operator==(const Branch& b) const { - return std::tie(cond, kills, address) == std::tie(b.cond, b.kills, b.address); - } - - bool operator!=(const Branch& b) const { - return !operator==(b); - } - }; - u32 start{}; u32 end{}; bool ignore_branch{}; - Branch branch{}; + BlockBranchInfo branch{}; bool operator==(const ShaderBlock& sb) const { - return std::tie(start, end, ignore_branch, branch) == - std::tie(sb.start, sb.end, sb.ignore_branch, sb.branch); + return std::tie(start, end, ignore_branch) == + std::tie(sb.start, sb.end, sb.ignore_branch) && + BlockBranchInfoAreEqual(branch, sb.branch); } bool operator!=(const ShaderBlock& sb) const { diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 3f87b87ca2..053241128b 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -198,24 +198,38 @@ void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { } return result; }; - if (block.branch.address < 0) { - if (block.branch.kills) { - Node n = Operation(OperationCode::Discard); - n = apply_conditions(block.branch.cond, n); + if (std::holds_alternative(*block.branch)) { + auto branch = std::get_if(block.branch.get()); + if (branch->address < 0) { + if (branch->kill) { + Node n = Operation(OperationCode::Discard); + n = apply_conditions(branch->condition, n); + bb.push_back(n); + global_code.push_back(n); + return; + } + Node n = Operation(OperationCode::Exit); + n = apply_conditions(branch->condition, n); bb.push_back(n); global_code.push_back(n); return; } - Node n = Operation(OperationCode::Exit); - n = apply_conditions(block.branch.cond, n); + Node n = Operation(OperationCode::Branch, Immediate(branch->address)); + n = apply_conditions(branch->condition, n); bb.push_back(n); global_code.push_back(n); return; } - Node n = Operation(OperationCode::Branch, Immediate(block.branch.address)); - n = apply_conditions(block.branch.cond, n); - bb.push_back(n); - global_code.push_back(n); + auto multi_branch = std::get_if(block.branch.get()); + Node op_a = GetRegister(multi_branch->gpr); + for (auto& branch_case : multi_branch->branches) { + Node n = Operation(OperationCode::Branch, Immediate(branch_case.address)); + Node op_b = Immediate(branch_case.cmp_value); + Node condition = GetPredicateComparisonInteger(Tegra::Shader::PredCondition::Equal, false, op_a, op_b); + auto result = Conditional(condition, {n}); + bb.push_back(result); + global_code.push_back(result); + } } u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { diff --git a/src/video_core/shader/expr.h b/src/video_core/shader/expr.h index d3dcd00ec9..e41d23e936 100644 --- a/src/video_core/shader/expr.h +++ b/src/video_core/shader/expr.h @@ -17,13 +17,14 @@ using Tegra::Shader::Pred; class ExprAnd; class ExprBoolean; class ExprCondCode; +class ExprGprEqual; class ExprNot; class ExprOr; class ExprPredicate; class ExprVar; -using ExprData = - std::variant; +using ExprData = std::variant; using Expr = std::shared_ptr; class ExprAnd final { @@ -118,6 +119,18 @@ public: bool value; }; +class ExprGprEqual final { +public: + ExprGprEqual(u32 gpr, u32 value) : gpr{gpr}, value{value} {} + + bool operator==(const ExprGprEqual& b) const { + return gpr == b.gpr && value == b.value; + } + + u32 gpr; + u32 value; +}; + template Expr MakeExpr(Args&&... args) { static_assert(std::is_convertible_v); From 33fcec3502f5dd5a99b7a8337128b7c99bfba908 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 25 Sep 2019 09:53:18 -0400 Subject: [PATCH 05/13] Shader_IR: allow lookup of texture samplers within the shader_ir for instructions that don't provide it --- .../engines/const_buffer_engine_interface.h | 95 ++++++++++++++- src/video_core/engines/kepler_compute.cpp | 18 +++ src/video_core/engines/kepler_compute.h | 13 ++- src/video_core/engines/maxwell_3d.cpp | 18 +++ src/video_core/engines/maxwell_3d.h | 11 +- src/video_core/shader/const_buffer_locker.cpp | 110 +++++++++++++++--- src/video_core/shader/const_buffer_locker.h | 60 +++++++++- src/video_core/shader/decode/texture.cpp | 72 +++++++++--- src/video_core/shader/shader_ir.h | 12 +- 9 files changed, 363 insertions(+), 46 deletions(-) diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h index cc41a9cacb..c0e3a3a173 100644 --- a/src/video_core/engines/const_buffer_engine_interface.h +++ b/src/video_core/engines/const_buffer_engine_interface.h @@ -4,7 +4,10 @@ #pragma once +#include "common/bit_field.h" #include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" +#include "video_core/textures/texture.h" namespace Tegra::Engines { @@ -17,10 +20,100 @@ enum class ShaderType : u32 { Compute = 5, }; +struct SamplerDescriptor { + union { + BitField<0, 20, Tegra::Shader::TextureType> texture_type; + BitField<20, 1, u32> is_array; + BitField<21, 1, u32> is_buffer; + BitField<22, 1, u32> is_shadow; + u32 raw{}; + }; + + static SamplerDescriptor FromTicTexture(Tegra::Texture::TextureType tic_texture_type) { + SamplerDescriptor result{}; + switch (tic_texture_type) { + case Tegra::Texture::TextureType::Texture1D: { + result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); + result.is_array.Assign(0); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + } + case Tegra::Texture::TextureType::Texture2D: { + result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); + result.is_array.Assign(0); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + } + case Tegra::Texture::TextureType::Texture3D: { + result.texture_type.Assign(Tegra::Shader::TextureType::Texture3D); + result.is_array.Assign(0); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + } + case Tegra::Texture::TextureType::TextureCubemap: { + result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube); + result.is_array.Assign(0); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + } + case Tegra::Texture::TextureType::Texture1DArray: { + result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); + result.is_array.Assign(1); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + } + case Tegra::Texture::TextureType::Texture2DArray: { + result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); + result.is_array.Assign(1); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + } + case Tegra::Texture::TextureType::Texture1DBuffer: { + result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); + result.is_array.Assign(0); + result.is_buffer.Assign(1); + result.is_shadow.Assign(0); + return result; + } + case Tegra::Texture::TextureType::Texture2DNoMipmap: { + result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); + result.is_array.Assign(0); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + } + case Tegra::Texture::TextureType::TextureCubeArray: { + result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube); + result.is_array.Assign(1); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + } + default: { + result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); + result.is_array.Assign(0); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + } + } + } +}; + class ConstBufferEngineInterface { public: virtual ~ConstBufferEngineInterface() {} virtual u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const = 0; + virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0; + virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, + u64 offset) const = 0; + virtual u32 GetBoundBuffer() const = 0; }; -} +} // namespace Tegra::Engines diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index ba97c28949..6f00db1c16 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -78,6 +78,24 @@ u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 o return result; } +SamplerDescriptor KeplerCompute::AccessBoundSampler(ShaderType stage, u64 offset) const { + return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle)); +} + +SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 const_buffer, + u64 offset) const { + ASSERT(stage == ShaderType::Compute); + const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer]; + const GPUVAddr tex_info_address = + tex_info_buffer.Address() + offset; + + const Texture::TextureHandle tex_handle{memory_manager.Read(tex_info_address)}; + const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle, offset); + SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value()); + result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); + return result; +} + void KeplerCompute::ProcessLaunch() { const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index d7e0dfcd6f..8e71827276 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -10,8 +10,8 @@ #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" -#include "video_core/engines/engine_upload.h" #include "video_core/engines/const_buffer_engine_interface.h" +#include "video_core/engines/engine_upload.h" #include "video_core/gpu.h" #include "video_core/textures/texture.h" @@ -38,7 +38,7 @@ namespace Tegra::Engines { #define KEPLER_COMPUTE_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) -class KeplerCompute final : public ConstBufferEngineInterface { +class KeplerCompute final : public ConstBufferEngineInterface { public: explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager); @@ -204,6 +204,15 @@ public: u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; + SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; + + SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, + u64 offset) const override; + + u32 GetBoundBuffer() const override { + return regs.tex_cb_index; + } + private: Core::System& system; VideoCore::RasterizerInterface& rasterizer; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 92e38b0713..5589554517 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -856,4 +856,22 @@ u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offse return result; } +SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const { + return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle)); +} + +SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_buffer, + u64 offset) const { + ASSERT(stage != ShaderType::Compute); + const auto& shader = state.shader_stages[static_cast(stage)]; + const auto& tex_info_buffer = shader.const_buffers[const_buffer]; + const GPUVAddr tex_info_address = tex_info_buffer.address + offset; + + const Texture::TextureHandle tex_handle{memory_manager.Read(tex_info_address)}; + const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle, offset); + SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value()); + result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); + return result; +} + } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 04d02d2086..fa846a6217 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -15,8 +15,8 @@ #include "common/common_funcs.h" #include "common/common_types.h" #include "common/math_util.h" -#include "video_core/engines/const_buffer_info.h" #include "video_core/engines/const_buffer_engine_interface.h" +#include "video_core/engines/const_buffer_info.h" #include "video_core/engines/engine_upload.h" #include "video_core/gpu.h" #include "video_core/macro_interpreter.h" @@ -1260,6 +1260,15 @@ public: u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; + SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; + + SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, + u64 offset) const override; + + u32 GetBoundBuffer() const override { + return regs.tex_cb_index; + } + /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than /// we've seen used. using MacroMemory = std::array; diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp index 6a9e0ed5e3..4f5de8ae9f 100644 --- a/src/video_core/shader/const_buffer_locker.cpp +++ b/src/video_core/shader/const_buffer_locker.cpp @@ -27,43 +27,121 @@ void ConstBufferLocker::SetEngine(Tegra::Engines::ConstBufferEngineInterface* en } std::optional ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) { + if (!keys) { + keys = std::make_shared(); + } + auto& key_map = *keys; const std::pair key = {buffer, offset}; - const auto iter = keys.find(key); - if (iter != keys.end()) { + const auto iter = key_map.find(key); + if (iter != key_map.end()) { return {iter->second}; } if (!IsEngineSet()) { return {}; } const u32 value = engine->AccessConstBuffer32(shader_stage, buffer, offset); - keys.emplace(key, value); + key_map.emplace(key, value); + return {value}; +} + +std::optional ConstBufferLocker::ObtainBoundSampler(u32 offset) { + if (!bound_samplers) { + bound_samplers = std::make_shared(); + } + auto& key_map = *bound_samplers; + const u32 key = offset; + const auto iter = key_map.find(key); + if (iter != key_map.end()) { + return {iter->second}; + } + if (!IsEngineSet()) { + return {}; + } + const Tegra::Engines::SamplerDescriptor value = + engine->AccessBoundSampler(shader_stage, offset); + key_map.emplace(key, value); + return {value}; +} + +std::optional ConstBufferLocker::ObtainBindlessSampler( + u32 buffer, u32 offset) { + if (!bindless_samplers) { + bindless_samplers = std::make_shared(); + } + auto& key_map = *bindless_samplers; + const std::pair key = {buffer, offset}; + const auto iter = key_map.find(key); + if (iter != key_map.end()) { + return {iter->second}; + } + if (!IsEngineSet()) { + return {}; + } + const Tegra::Engines::SamplerDescriptor value = + engine->AccessBindlessSampler(shader_stage, buffer, offset); + key_map.emplace(key, value); return {value}; } void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) { + if (!keys) { + keys = std::make_shared(); + } const std::pair key = {buffer, offset}; - keys[key] = value; + (*keys)[key] = value; } -u32 ConstBufferLocker::NumKeys() const { - return keys.size(); +void ConstBufferLocker::InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler) { + if (!bound_samplers) { + bound_samplers = std::make_shared(); + } + (*bound_samplers)[offset] = sampler; } -const std::unordered_map, u32, Common::PairHash>& -ConstBufferLocker::AccessKeys() const { - return keys; +void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, + Tegra::Engines::SamplerDescriptor sampler) { + if (!bindless_samplers) { + bindless_samplers = std::make_shared(); + } + const std::pair key = {buffer, offset}; + (*bindless_samplers)[key] = sampler; } -bool ConstBufferLocker::AreKeysConsistant() const { +bool ConstBufferLocker::IsConsistant() const { if (!IsEngineSet()) { return false; } - for (const auto& key_val : keys) { - const std::pair key = key_val.first; - const u32 value = key_val.second; - const u32 other_value = engine->AccessConstBuffer32(shader_stage, key.first, key.second); - if (other_value != value) { - return false; + if (keys) { + for (const auto& key_val : *keys) { + const std::pair key = key_val.first; + const u32 value = key_val.second; + const u32 other_value = + engine->AccessConstBuffer32(shader_stage, key.first, key.second); + if (other_value != value) { + return false; + } + } + } + if (bound_samplers) { + for (const auto& sampler_val : *bound_samplers) { + const u32 key = sampler_val.first; + const Tegra::Engines::SamplerDescriptor value = sampler_val.second; + const Tegra::Engines::SamplerDescriptor other_value = + engine->AccessBoundSampler(shader_stage, key); + if (other_value.raw != value.raw) { + return false; + } + } + } + if (bindless_samplers) { + for (const auto& sampler_val : *bindless_samplers) { + const std::pair key = sampler_val.first; + const Tegra::Engines::SamplerDescriptor value = sampler_val.second; + const Tegra::Engines::SamplerDescriptor other_value = + engine->AccessBindlessSampler(shader_stage, key.first, key.second); + if (other_value.raw != value.raw) { + return false; + } } } return true; diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h index 39e62584da..0bc2577811 100644 --- a/src/video_core/shader/const_buffer_locker.h +++ b/src/video_core/shader/const_buffer_locker.h @@ -11,6 +11,11 @@ namespace VideoCommon::Shader { +using KeyMap = std::unordered_map, u32, Common::PairHash>; +using BoundSamplerMap = std::unordered_map; +using BindlessSamplerMap = + std::unordered_map, Tegra::Engines::SamplerDescriptor, Common::PairHash>; + class ConstBufferLocker { public: explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage); @@ -29,22 +34,67 @@ public: // registered value, if not it will obtain it from maxwell3d and register it. std::optional ObtainKey(u32 buffer, u32 offset); + std::optional ObtainBoundSampler(u32 offset); + + std::optional ObtainBindlessSampler(u32 buffer, u32 offset); + // Manually inserts a key. void InsertKey(u32 buffer, u32 offset, u32 value); + void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler); + + void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); + // Retrieves the number of keys registered. - u32 NumKeys() const; + std::size_t NumKeys() const { + if (!keys) { + return 0; + } + return keys->size(); + } + + std::size_t NumBoundSamplers() const { + if (!bound_samplers) { + return 0; + } + return bound_samplers->size(); + } + + std::size_t NumBindlessSamplers() const { + if (!bindless_samplers) { + return 0; + } + return bindless_samplers->size(); + } // Gives an accessor to the key's database. - const std::unordered_map, u32, Common::PairHash>& AccessKeys() const; + // Pre: NumKeys > 0 + const KeyMap& AccessKeys() const { + return *keys; + } - // Checks keys against maxwell3d's current const buffers. Returns true if they + // Gives an accessor to the sampler's database. + // Pre: NumBindlessSamplers > 0 + const BoundSamplerMap& AccessBoundSamplers() const { + return *bound_samplers; + } + + // Gives an accessor to the sampler's database. + // Pre: NumBindlessSamplers > 0 + const BindlessSamplerMap& AccessBindlessSamplers() const { + return *bindless_samplers; + } + + // Checks keys & samplers against engine's current const buffers. Returns true if they // are the same value, false otherwise; - bool AreKeysConsistant() const; + bool IsConsistant() const; private: Tegra::Engines::ConstBufferEngineInterface* engine; Tegra::Engines::ShaderType shader_stage; - std::unordered_map, u32, Common::PairHash> keys{}; + // All containers are lazy initialized as most shaders don't use them. + std::shared_ptr keys{}; + std::shared_ptr bound_samplers{}; + std::shared_ptr bindless_samplers{}; }; } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 0b934a0696..c369e23adf 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -141,7 +141,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { const Node component = Immediate(static_cast(instr.tld4s.component)); const auto& sampler = - GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare); + GetSampler(instr.sampler, {{TextureType::Texture2D, false, depth_compare}}); Node4 values; for (u32 element = 0; element < values.size(); ++element) { @@ -165,10 +165,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { // Sadly, not all texture instructions specify the type of texture their sampler // uses. This must be fixed at a later instance. const auto& sampler = - is_bindless - ? GetBindlessSampler(instr.gpr8, Tegra::Shader::TextureType::Texture2D, false, - false) - : GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); + is_bindless ? GetBindlessSampler(instr.gpr8, {}) : GetSampler(instr.sampler, {}); u32 indexer = 0; switch (instr.txq.query_type) { @@ -207,9 +204,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { auto texture_type = instr.tmml.texture_type.Value(); const bool is_array = instr.tmml.array != 0; - const auto& sampler = is_bindless - ? GetBindlessSampler(instr.gpr20, texture_type, is_array, false) - : GetSampler(instr.sampler, texture_type, is_array, false); + const auto& sampler = + is_bindless ? GetBindlessSampler(instr.gpr20, {{texture_type, is_array, false}}) + : GetSampler(instr.sampler, {{texture_type, is_array, false}}); std::vector coords; @@ -285,10 +282,30 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { return pc; } -const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type, - bool is_array, bool is_shadow) { +const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, + std::optional sampler_info) { const auto offset = static_cast(sampler.index.Value()); + Tegra::Shader::TextureType type; + bool is_array; + bool is_shadow; + if (sampler_info) { + type = sampler_info->type; + is_array = sampler_info->is_array; + is_shadow = sampler_info->is_shadow; + } else { + auto sampler = locker.ObtainBoundSampler(offset); + if (sampler) { + type = sampler->texture_type.Value(); + is_array = sampler->is_array.Value() != 0; + is_shadow = sampler->is_shadow.Value() != 0; + } else { + type = Tegra::Shader::TextureType::Texture2D; + is_array = false; + is_shadow = false; + } + } + // If this sampler has already been used, return the existing mapping. const auto itr = std::find_if(used_samplers.begin(), used_samplers.end(), @@ -305,13 +322,32 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu return *used_samplers.emplace(entry).first; } -const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type, - bool is_array, bool is_shadow) { +const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, + std::optional sampler_info) { const Node sampler_register = GetRegister(reg); const auto [base_sampler, cbuf_index, cbuf_offset] = TrackCbuf(sampler_register, global_code, static_cast(global_code.size())); ASSERT(base_sampler != nullptr); const auto cbuf_key = (static_cast(cbuf_index) << 32) | static_cast(cbuf_offset); + Tegra::Shader::TextureType type; + bool is_array; + bool is_shadow; + if (sampler_info) { + type = sampler_info->type; + is_array = sampler_info->is_array; + is_shadow = sampler_info->is_shadow; + } else { + auto sampler = locker.ObtainBindlessSampler(cbuf_index, cbuf_offset); + if (sampler) { + type = sampler->texture_type.Value(); + is_array = sampler->is_array.Value() != 0; + is_shadow = sampler->is_shadow.Value() != 0; + } else { + type = Tegra::Shader::TextureType::Texture2D; + is_array = false; + is_shadow = false; + } + } // If this sampler has already been used, return the existing mapping. const auto itr = @@ -411,9 +447,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, (texture_type == TextureType::TextureCube && is_array && is_shadow), "This method is not supported."); - const auto& sampler = is_bindless - ? GetBindlessSampler(*bindless_reg, texture_type, is_array, is_shadow) - : GetSampler(instr.sampler, texture_type, is_array, is_shadow); + const auto& sampler = + is_bindless ? GetBindlessSampler(*bindless_reg, {{texture_type, is_array, is_shadow}}) + : GetSampler(instr.sampler, {{texture_type, is_array, is_shadow}}); const bool lod_needed = process_mode == TextureProcessMode::LZ || process_mode == TextureProcessMode::LL || @@ -577,7 +613,7 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de dc = GetRegister(parameter_register++); } - const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); + const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, depth_compare}}); Node4 values; for (u32 element = 0; element < values.size(); ++element) { @@ -610,7 +646,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; - const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); + const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}}); Node4 values; for (u32 element = 0; element < values.size(); ++element) { @@ -646,7 +682,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is // When lod is used always is in gpr20 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); - const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); + const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}}); Node4 values; for (u32 element = 0; element < values.size(); ++element) { diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index e3b568d3e9..3a3e381d25 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -173,6 +173,13 @@ public: private: friend class ASTDecoder; + + struct SamplerInfo { + Tegra::Shader::TextureType type; + bool is_array; + bool is_shadow; + }; + void Decode(); NodeBlock DecodeRange(u32 begin, u32 end); @@ -297,12 +304,11 @@ private: /// Accesses a texture sampler const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler, - Tegra::Shader::TextureType type, bool is_array, bool is_shadow); + std::optional sampler_info); // Accesses a texture sampler for a bindless texture. const Sampler& GetBindlessSampler(const Tegra::Shader::Register& reg, - Tegra::Shader::TextureType type, bool is_array, - bool is_shadow); + std::optional sampler_info); /// Accesses an image. Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); From a05120ec0b8b1827ebeffd4e78a553f7886fa178 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 25 Sep 2019 15:03:13 -0400 Subject: [PATCH 06/13] Shader_IR: Correct typo in Consistent method. --- src/video_core/shader/const_buffer_locker.cpp | 2 +- src/video_core/shader/const_buffer_locker.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp index 4f5de8ae9f..9d23bcecfb 100644 --- a/src/video_core/shader/const_buffer_locker.cpp +++ b/src/video_core/shader/const_buffer_locker.cpp @@ -107,7 +107,7 @@ void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, (*bindless_samplers)[key] = sampler; } -bool ConstBufferLocker::IsConsistant() const { +bool ConstBufferLocker::IsConsistent() const { if (!IsEngineSet()) { return false; } diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h index 0bc2577811..13eeba3204 100644 --- a/src/video_core/shader/const_buffer_locker.h +++ b/src/video_core/shader/const_buffer_locker.h @@ -87,7 +87,7 @@ public: // Checks keys & samplers against engine's current const buffers. Returns true if they // are the same value, false otherwise; - bool IsConsistant() const; + bool IsConsistent() const; private: Tegra::Engines::ConstBufferEngineInterface* engine; From 1244f2d368076aec61327ee1440c5efd9ae046d6 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 23 Sep 2019 22:55:25 -0400 Subject: [PATCH 07/13] Shader_IR: Implement Fast BRX and allow multi-branches in the CFG. --- src/video_core/shader/ast.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/shader/ast.cpp b/src/video_core/shader/ast.cpp index 2fa3a3f7dc..3f96d9076c 100644 --- a/src/video_core/shader/ast.cpp +++ b/src/video_core/shader/ast.cpp @@ -228,7 +228,7 @@ public: inner += expr.value ? "true" : "false"; } - void operator()(ExprGprEqual const& expr) { + void operator()(const ExprGprEqual& expr) { inner += "( gpr_" + std::to_string(expr.gpr) + " == " + std::to_string(expr.value) + ')'; } From 7b81ba4d8a9805f808fcc60a0905ac74d293b2ee Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 24 Sep 2019 23:34:18 -0300 Subject: [PATCH 08/13] gl_shader_decompiler: Move entries to a separate function --- .../renderer_opengl/gl_shader_cache.cpp | 481 +++++++++--------- .../renderer_opengl/gl_shader_cache.h | 66 ++- .../renderer_opengl/gl_shader_decompiler.cpp | 72 +-- .../renderer_opengl/gl_shader_decompiler.h | 9 +- .../renderer_opengl/gl_shader_disk_cache.cpp | 286 ++--------- .../renderer_opengl/gl_shader_disk_cache.h | 32 +- .../renderer_opengl/gl_shader_gen.cpp | 94 +--- .../renderer_opengl/gl_shader_gen.h | 41 +- src/video_core/shader/const_buffer_locker.cpp | 8 +- src/video_core/shader/const_buffer_locker.h | 4 +- src/video_core/shader/control_flow.cpp | 18 +- src/video_core/shader/control_flow.h | 3 +- src/video_core/shader/decode.cpp | 9 +- src/video_core/shader/shader_ir.cpp | 7 +- src/video_core/shader/shader_ir.h | 12 +- 15 files changed, 420 insertions(+), 722 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 9e27998769..6402d67632 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -3,10 +3,12 @@ // Refer to the license.txt file included. #include +#include +#include #include +#include #include #include "common/assert.h" -#include "common/hash.h" #include "common/scope_exit.h" #include "core/core.h" #include "core/frontend/emu_window.h" @@ -22,18 +24,20 @@ namespace OpenGL { +using Tegra::Engines::ShaderType; +using VideoCommon::Shader::ConstBufferLocker; using VideoCommon::Shader::ProgramCode; +using VideoCommon::Shader::ShaderIR; + +namespace { // One UBO is always reserved for emulation values on staged shaders constexpr u32 STAGE_RESERVED_UBOS = 1; -struct UnspecializedShader { - std::string code; - GLShader::ShaderEntries entries; - ProgramType program_type; -}; +constexpr u32 STAGE_MAIN_OFFSET = 10; +constexpr u32 KERNEL_MAIN_OFFSET = 0; -namespace { +constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{}; /// Gets the address for the specified shader stage program GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) { @@ -42,6 +46,39 @@ GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) return gpu.regs.code_address.CodeAddress() + shader_config.offset; } +/// Gets if the current instruction offset is a scheduler instruction +constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { + // Sched instructions appear once every 4 instructions. + constexpr std::size_t SchedPeriod = 4; + const std::size_t absolute_offset = offset - main_offset; + return (absolute_offset % SchedPeriod) == 0; +} + +/// Calculates the size of a program stream +std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { + constexpr std::size_t start_offset = 10; + // This is the encoded version of BRA that jumps to itself. All Nvidia + // shaders end with one. + constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL; + constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL; + std::size_t offset = start_offset; + while (offset < program.size()) { + const u64 instruction = program[offset]; + if (!IsSchedInstruction(offset, start_offset)) { + if ((instruction & mask) == self_jumping_branch) { + // End on Maxwell's "nop" instruction + break; + } + if (instruction == 0) { + break; + } + } + offset++; + } + // The last instruction is included in the program size + return std::min(offset + 1, program.size()); +} + /// Gets the shader program code from memory for the specified address ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr, const u8* host_ptr) { @@ -52,6 +89,7 @@ ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr g }); memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(), program_code.size() * sizeof(u64)); + program_code.resize(CalculateProgramSize(program_code)); return program_code; } @@ -72,14 +110,6 @@ constexpr GLenum GetShaderType(ProgramType program_type) { } } -/// Gets if the current instruction offset is a scheduler instruction -constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { - // Sched instructions appear once every 4 instructions. - constexpr std::size_t SchedPeriod = 4; - const std::size_t absolute_offset = offset - main_offset; - return (absolute_offset % SchedPeriod) == 0; -} - /// Describes primitive behavior on geometry shaders constexpr std::tuple GetPrimitiveDescription(GLenum primitive_mode) { switch (primitive_mode) { @@ -122,122 +152,114 @@ ProgramType GetProgramType(Maxwell::ShaderProgram program) { return {}; } -/// Calculates the size of a program stream -std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { - constexpr std::size_t start_offset = 10; - // This is the encoded version of BRA that jumps to itself. All Nvidia - // shaders end with one. - constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL; - constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL; - std::size_t offset = start_offset; - std::size_t size = start_offset * sizeof(u64); - while (offset < program.size()) { - const u64 instruction = program[offset]; - if (!IsSchedInstruction(offset, start_offset)) { - if ((instruction & mask) == self_jumping_branch) { - // End on Maxwell's "nop" instruction - break; - } - if (instruction == 0) { - break; - } - } - size += sizeof(u64); - offset++; - } - // The last instruction is included in the program size - return std::min(size + sizeof(u64), program.size() * sizeof(u64)); -} - /// Hashes one (or two) program streams u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code, - const ProgramCode& code_b, std::size_t size_a = 0, std::size_t size_b = 0) { - if (size_a == 0) { - size_a = CalculateProgramSize(code); + const ProgramCode& code_b) { + u64 unique_identifier = boost::hash_value(code); + if (program_type == ProgramType::VertexA) { + // VertexA programs include two programs + boost::hash_combine(unique_identifier, boost::hash_value(code_b)); } - u64 unique_identifier = Common::CityHash64(reinterpret_cast(code.data()), size_a); - if (program_type != ProgramType::VertexA) { - return unique_identifier; - } - // VertexA programs include two programs - - std::size_t seed = 0; - boost::hash_combine(seed, unique_identifier); - - if (size_b == 0) { - size_b = CalculateProgramSize(code_b); - } - const u64 identifier_b = - Common::CityHash64(reinterpret_cast(code_b.data()), size_b); - boost::hash_combine(seed, identifier_b); - return static_cast(seed); + return unique_identifier; } /// Creates an unspecialized program from code streams -GLShader::ProgramResult CreateProgram(Core::System& system, const Device& device, - ProgramType program_type, ProgramCode program_code, - ProgramCode program_code_b) { - GLShader::ShaderSetup setup(program_code); - setup.program.size_a = CalculateProgramSize(program_code); - setup.program.size_b = 0; - if (program_type == ProgramType::VertexA) { - // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. - // Conventional HW does not support this, so we combine VertexA and VertexB into one - // stage here. - setup.SetProgramB(program_code_b); - setup.program.size_b = CalculateProgramSize(program_code_b); - } - setup.program.unique_identifier = GetUniqueIdentifier( - program_type, program_code, program_code_b, setup.program.size_a, setup.program.size_b); - +std::string GenerateGLSL(const Device& device, ProgramType program_type, const ShaderIR& ir, + const std::optional& ir_b) { switch (program_type) { case ProgramType::VertexA: - case ProgramType::VertexB: { - VideoCommon::Shader::ConstBufferLocker locker{Tegra::Engines::ShaderType::Vertex, - &(system.GPU().Maxwell3D())}; - return GLShader::GenerateVertexShader(locker, device, setup); - } - case ProgramType::Geometry: { - VideoCommon::Shader::ConstBufferLocker locker{Tegra::Engines::ShaderType::Geometry, - &(system.GPU().Maxwell3D())}; - return GLShader::GenerateGeometryShader(locker, device, setup); - } - case ProgramType::Fragment: { - VideoCommon::Shader::ConstBufferLocker locker{Tegra::Engines::ShaderType::Fragment, - &(system.GPU().Maxwell3D())}; - return GLShader::GenerateFragmentShader(locker, device, setup); - } - case ProgramType::Compute: { - VideoCommon::Shader::ConstBufferLocker locker{Tegra::Engines::ShaderType::Compute, &(system.GPU().KeplerCompute())}; - return GLShader::GenerateComputeShader(locker, device, setup); - } + case ProgramType::VertexB: + return GLShader::GenerateVertexShader(device, ir, ir_b ? &*ir_b : nullptr); + case ProgramType::Geometry: + return GLShader::GenerateGeometryShader(device, ir); + case ProgramType::Fragment: + return GLShader::GenerateFragmentShader(device, ir); + case ProgramType::Compute: + return GLShader::GenerateComputeShader(device, ir); default: UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast(program_type)); return {}; } } -CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries, - ProgramType program_type, const ProgramVariant& variant, - bool hint_retrievable = false) { +constexpr const char* GetProgramTypeName(ProgramType program_type) { + switch (program_type) { + case ProgramType::VertexA: + case ProgramType::VertexB: + return "VS"; + case ProgramType::TessellationControl: + return "TCS"; + case ProgramType::TessellationEval: + return "TES"; + case ProgramType::Geometry: + return "GS"; + case ProgramType::Fragment: + return "FS"; + case ProgramType::Compute: + return "CS"; + } + return "UNK"; +} + +Tegra::Engines::ShaderType GetEnginesShaderType(ProgramType program_type) { + switch (program_type) { + case ProgramType::VertexA: + case ProgramType::VertexB: + return Tegra::Engines::ShaderType::Vertex; + case ProgramType::TessellationControl: + return Tegra::Engines::ShaderType::TesselationControl; + case ProgramType::TessellationEval: + return Tegra::Engines::ShaderType::TesselationEval; + case ProgramType::Geometry: + return Tegra::Engines::ShaderType::Geometry; + case ProgramType::Fragment: + return Tegra::Engines::ShaderType::Fragment; + case ProgramType::Compute: + return Tegra::Engines::ShaderType::Compute; + } + UNREACHABLE(); + return {}; +} + +std::string GetShaderId(u64 unique_identifier, ProgramType program_type) { + return fmt::format("{}{:016X}", GetProgramTypeName(program_type), unique_identifier); +} + +CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramType program_type, + const ProgramCode& program_code, const ProgramCode& program_code_b, + const ProgramVariant& variant, ConstBufferLocker& locker, + bool hint_retrievable = false) { + LOG_INFO(Render_OpenGL, "called. {}", GetShaderId(unique_identifier, program_type)); + + const bool is_compute = program_type == ProgramType::Compute; + const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; + const ShaderIR ir(program_code, main_offset, COMPILER_SETTINGS, locker); + std::optional ir_b; + if (!program_code_b.empty()) { + ir_b.emplace(program_code_b, main_offset, COMPILER_SETTINGS, locker); + } + const auto entries = GLShader::GetEntries(ir); + auto base_bindings{variant.base_bindings}; const auto primitive_mode{variant.primitive_mode}; const auto texture_buffer_usage{variant.texture_buffer_usage}; - std::string source = R"(#version 430 core + std::string source = fmt::format(R"(// {} +#version 430 core #extension GL_ARB_separate_shader_objects : enable #extension GL_ARB_shader_viewport_layer_array : enable #extension GL_EXT_shader_image_load_formatted : enable #extension GL_NV_gpu_shader5 : enable #extension GL_NV_shader_thread_group : enable #extension GL_NV_shader_thread_shuffle : enable -)"; - if (program_type == ProgramType::Compute) { +)", + GetShaderId(unique_identifier, program_type)); + if (is_compute) { source += "#extension GL_ARB_compute_variable_group_size : require\n"; } source += '\n'; - if (program_type != ProgramType::Compute) { + if (!is_compute) { source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); } @@ -281,7 +303,7 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn } source += '\n'; - source += code; + source += GenerateGLSL(device, program_type, ir, ir_b); OGLShader shader; shader.Create(source.c_str(), GetShaderType(program_type)); @@ -291,85 +313,86 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn return program; } -std::set GetSupportedFormats() { - std::set supported_formats; - +std::unordered_set GetSupportedFormats() { GLint num_formats{}; glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); std::vector formats(num_formats); glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); - for (const GLint format : formats) + std::unordered_set supported_formats; + for (const GLint format : formats) { supported_formats.insert(static_cast(format)); + } return supported_formats; } } // Anonymous namespace CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type, - GLShader::ProgramResult result) - : RasterizerCacheObject{params.host_ptr}, cpu_addr{params.cpu_addr}, - unique_identifier{params.unique_identifier}, program_type{program_type}, - disk_cache{params.disk_cache}, precompiled_programs{params.precompiled_programs}, - entries{result.second}, code{std::move(result.first)}, shader_length{entries.shader_length} {} + GLShader::ShaderEntries entries, ProgramCode program_code, + ProgramCode program_code_b) + : RasterizerCacheObject{params.host_ptr}, system{params.system}, + disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr}, + unique_identifier{params.unique_identifier}, program_type{program_type}, entries{entries}, + program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} { + if (params.precompiled_variants) { + for (const auto& pair : *params.precompiled_variants) { + const auto& variant = pair->first.variant; + programs.emplace(variant, pair->second); + } + } +} Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, Maxwell::ShaderProgram program_type, - ProgramCode&& program_code, - ProgramCode&& program_code_b) { - const auto code_size{CalculateProgramSize(program_code)}; - const auto code_size_b{CalculateProgramSize(program_code_b)}; - auto result{CreateProgram(params.system, params.device, GetProgramType(program_type), - program_code, program_code_b)}; - if (result.first.empty()) { - // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now - return {}; - } - + ProgramCode program_code, ProgramCode program_code_b) { params.disk_cache.SaveRaw(ShaderDiskCacheRaw( - params.unique_identifier, GetProgramType(program_type), - static_cast(code_size / sizeof(u64)), static_cast(code_size_b / sizeof(u64)), - std::move(program_code), std::move(program_code_b))); + params.unique_identifier, GetProgramType(program_type), program_code, program_code_b)); + ConstBufferLocker locker(GetEnginesShaderType(GetProgramType(program_type))); + const ShaderIR ir(program_code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, locker); + // TODO(Rodrigo): Handle VertexA shaders + // std::optional ir_b; + // if (!program_code_b.empty()) { + // ir_b.emplace(program_code_b, STAGE_MAIN_OFFSET); + // } return std::shared_ptr( - new CachedShader(params, GetProgramType(program_type), std::move(result))); + new CachedShader(params, GetProgramType(program_type), GLShader::GetEntries(ir), + std::move(program_code), std::move(program_code_b))); } -Shader CachedShader::CreateStageFromCache(const ShaderParameters& params, - Maxwell::ShaderProgram program_type, - GLShader::ProgramResult result) { - return std::shared_ptr( - new CachedShader(params, GetProgramType(program_type), std::move(result))); +Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { + params.disk_cache.SaveRaw( + ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute, code)); + + ConstBufferLocker locker(Tegra::Engines::ShaderType::Compute); + const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, locker); + return std::shared_ptr(new CachedShader( + params, ProgramType::Compute, GLShader::GetEntries(ir), std::move(code), {})); } -Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code) { - auto result{CreateProgram(params.system, params.device, ProgramType::Compute, code, {})}; - - const auto code_size{CalculateProgramSize(code)}; - params.disk_cache.SaveRaw(ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute, - static_cast(code_size / sizeof(u64)), 0, - std::move(code), {})); - - return std::shared_ptr( - new CachedShader(params, ProgramType::Compute, std::move(result))); -} - -Shader CachedShader::CreateKernelFromCache(const ShaderParameters& params, - GLShader::ProgramResult result) { - return std::shared_ptr( - new CachedShader(params, ProgramType::Compute, std::move(result))); +Shader CachedShader::CreateFromCache(const ShaderParameters& params, + const UnspecializedShader& unspecialized) { + return std::shared_ptr(new CachedShader(params, unspecialized.program_type, + unspecialized.entries, unspecialized.code, + unspecialized.code_b)); } std::tuple CachedShader::GetProgramHandle(const ProgramVariant& variant) { const auto [entry, is_cache_miss] = programs.try_emplace(variant); auto& program = entry->second; if (is_cache_miss) { - program = TryLoadProgram(variant); - if (!program) { - program = SpecializeShader(code, entries, program_type, variant); - disk_cache.SaveUsage(GetUsage(variant)); + Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; + if (program_type == ProgramType::Compute) { + engine = &system.GPU().KeplerCompute(); + } else { + engine = &system.GPU().Maxwell3D(); } + ConstBufferLocker locker(GetEnginesShaderType(program_type), *engine); + program = BuildShader(device, unique_identifier, program_type, program_code, program_code_b, + variant, locker); + disk_cache.SaveUsage(GetUsage(variant)); LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); } @@ -385,14 +408,6 @@ std::tuple CachedShader::GetProgramHandle(const ProgramVar return {program->handle, base_bindings}; } -CachedProgram CachedShader::TryLoadProgram(const ProgramVariant& variant) const { - const auto found = precompiled_programs.find(GetUsage(variant)); - if (found == precompiled_programs.end()) { - return {}; - } - return found->second; -} - ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant) const { ShaderDiskCacheUsage usage; usage.unique_identifier = unique_identifier; @@ -412,18 +427,15 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, return; } const auto [raws, shader_usages] = *transferable; - - auto [decompiled, dumps] = disk_cache.LoadPrecompiled(); - - const auto supported_formats{GetSupportedFormats()}; - const auto unspecialized_shaders{ - GenerateUnspecializedShaders(stop_loading, callback, raws, decompiled)}; - if (stop_loading) { + if (!GenerateUnspecializedShaders(stop_loading, callback, raws) || stop_loading) { return; } - // Track if precompiled cache was altered during loading to know if we have to serialize the - // virtual precompiled cache file back to the hard drive + const auto dumps = disk_cache.LoadPrecompiled(); + const auto supported_formats = GetSupportedFormats(); + + // Track if precompiled cache was altered during loading to know if we have to + // serialize the virtual precompiled cache file back to the hard drive bool precompiled_cache_altered = false; // Inform the frontend about shader build initialization @@ -446,9 +458,6 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, return; } const auto& usage{shader_usages[i]}; - LOG_INFO(Render_OpenGL, "Building shader {:016x} (index {} of {})", - usage.unique_identifier, i, shader_usages.size()); - const auto& unspecialized{unspecialized_shaders.at(usage.unique_identifier)}; const auto dump{dumps.find(usage)}; @@ -462,21 +471,27 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, } } if (!shader) { - shader = SpecializeShader(unspecialized.code, unspecialized.entries, - unspecialized.program_type, usage.variant, true); + ConstBufferLocker locker(GetEnginesShaderType(unspecialized.program_type)); + shader = BuildShader(device, usage.unique_identifier, unspecialized.program_type, + unspecialized.code, unspecialized.code_b, usage.variant, + locker, true); } - std::scoped_lock lock(mutex); + std::scoped_lock lock{mutex}; if (callback) { callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, shader_usages.size()); } precompiled_programs.emplace(usage, std::move(shader)); + + // TODO(Rodrigo): Is there a better way to do this? + precompiled_variants[usage.unique_identifier].push_back( + precompiled_programs.find(usage)); } }; - const auto num_workers{static_cast(std::thread::hardware_concurrency() + 1)}; + const auto num_workers{static_cast(std::thread::hardware_concurrency() + 1ULL)}; const std::size_t bucket_size{shader_usages.size() / num_workers}; std::vector> contexts(num_workers); std::vector threads(num_workers); @@ -496,7 +511,6 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, if (compilation_failed) { // Invalidate the precompiled cache if a shader dumped shader was rejected disk_cache.InvalidatePrecompiled(); - dumps.clear(); precompiled_cache_altered = true; return; } @@ -504,8 +518,8 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, return; } - // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw before - // precompiling them + // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw + // before precompiling them for (std::size_t i = 0; i < shader_usages.size(); ++i) { const auto& usage{shader_usages[i]}; @@ -521,9 +535,13 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, } } -CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram( - const ShaderDiskCacheDump& dump, const std::set& supported_formats) { +const PrecompiledVariants* ShaderCacheOpenGL::GetPrecompiledVariants(u64 unique_identifier) const { + const auto it = precompiled_variants.find(unique_identifier); + return it == precompiled_variants.end() ? nullptr : &it->second; +} +CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram( + const ShaderDiskCacheDump& dump, const std::unordered_set& supported_formats) { if (supported_formats.find(dump.binary_format) == supported_formats.end()) { LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing"); return {}; @@ -545,56 +563,52 @@ CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram( return shader; } -std::unordered_map ShaderCacheOpenGL::GenerateUnspecializedShaders( +bool ShaderCacheOpenGL::GenerateUnspecializedShaders( const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback, - const std::vector& raws, - const std::unordered_map& decompiled) { - std::unordered_map unspecialized; - + const std::vector& raws) { if (callback) { callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size()); } for (std::size_t i = 0; i < raws.size(); ++i) { if (stop_loading) { - return {}; + return false; } const auto& raw{raws[i]}; const u64 unique_identifier{raw.GetUniqueIdentifier()}; const u64 calculated_hash{ GetUniqueIdentifier(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB())}; if (unique_identifier != calculated_hash) { - LOG_ERROR( - Render_OpenGL, - "Invalid hash in entry={:016x} (obtained hash={:016x}) - removing shader cache", - raw.GetUniqueIdentifier(), calculated_hash); + LOG_ERROR(Render_OpenGL, + "Invalid hash in entry={:016x} (obtained hash={:016x}) - " + "removing shader cache", + raw.GetUniqueIdentifier(), calculated_hash); disk_cache.InvalidateTransferable(); - return {}; + return false; } - GLShader::ProgramResult result; - if (const auto it = decompiled.find(unique_identifier); it != decompiled.end()) { - // If it's stored in the precompiled file, avoid decompiling it here - const auto& stored_decompiled{it->second}; - result = {stored_decompiled.code, stored_decompiled.entries}; - } else { - // Otherwise decompile the shader at boot and save the result to the decompiled file - result = CreateProgram(system, device, raw.GetProgramType(), raw.GetProgramCode(), - raw.GetProgramCodeB()); - disk_cache.SaveDecompiled(unique_identifier, result.first, result.second); - } + const u32 main_offset = + raw.GetProgramType() == ProgramType::Compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; + ConstBufferLocker locker(GetEnginesShaderType(raw.GetProgramType())); + const ShaderIR ir(raw.GetProgramCode(), main_offset, COMPILER_SETTINGS, locker); + // TODO(Rodrigo): Handle VertexA shaders + // std::optional ir_b; + // if (raw.HasProgramA()) { + // ir_b.emplace(raw.GetProgramCodeB(), main_offset); + // } - precompiled_shaders.insert({unique_identifier, result}); - - unspecialized.insert( - {raw.GetUniqueIdentifier(), - {std::move(result.first), std::move(result.second), raw.GetProgramType()}}); + UnspecializedShader unspecialized; + unspecialized.entries = GLShader::GetEntries(ir); + unspecialized.program_type = raw.GetProgramType(); + unspecialized.code = raw.GetProgramCode(); + unspecialized.code_b = raw.GetProgramCodeB(); + unspecialized_shaders.emplace(raw.GetUniqueIdentifier(), unspecialized); if (callback) { callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size()); } } - return unspecialized; + return true; } Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { @@ -603,37 +617,35 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { } auto& memory_manager{system.GPU().MemoryManager()}; - const GPUVAddr program_addr{GetShaderAddress(system, program)}; + const GPUVAddr address{GetShaderAddress(system, program)}; // Look up shader in the cache based on address - const auto host_ptr{memory_manager.GetPointer(program_addr)}; + const auto host_ptr{memory_manager.GetPointer(address)}; Shader shader{TryGet(host_ptr)}; if (shader) { return last_shaders[static_cast(program)] = shader; } // No shader found - create a new one - ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; - ProgramCode program_code_b; - const bool is_program_a{program == Maxwell::ShaderProgram::VertexA}; - if (is_program_a) { - const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; - program_code_b = GetShaderCode(memory_manager, program_addr_b, - memory_manager.GetPointer(program_addr_b)); + ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)}; + ProgramCode code_b; + if (program == Maxwell::ShaderProgram::VertexA) { + const GPUVAddr address_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; + code_b = GetShaderCode(memory_manager, address_b, memory_manager.GetPointer(address_b)); } - const auto unique_identifier = - GetUniqueIdentifier(GetProgramType(program), program_code, program_code_b); - const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; - const ShaderParameters params{disk_cache, precompiled_programs, system, device, cpu_addr, - host_ptr, unique_identifier}; + const auto unique_identifier = GetUniqueIdentifier(GetProgramType(program), code, code_b); + const auto precompiled_variants = GetPrecompiledVariants(unique_identifier); + const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)}; + const ShaderParameters params{system, disk_cache, precompiled_variants, device, + cpu_addr, host_ptr, unique_identifier}; - const auto found = precompiled_shaders.find(unique_identifier); - if (found == precompiled_shaders.end()) { - shader = CachedShader::CreateStageFromMemory(params, program, std::move(program_code), - std::move(program_code_b)); + const auto found = unspecialized_shaders.find(unique_identifier); + if (found == unspecialized_shaders.end()) { + shader = CachedShader::CreateStageFromMemory(params, program, std::move(code), + std::move(code_b)); } else { - shader = CachedShader::CreateStageFromCache(params, program, found->second); + shader = CachedShader::CreateFromCache(params, found->second); } Register(shader); @@ -651,15 +663,16 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { // No kernel found - create a new one auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})}; + const auto precompiled_variants = GetPrecompiledVariants(unique_identifier); const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; - const ShaderParameters params{disk_cache, precompiled_programs, system, device, cpu_addr, - host_ptr, unique_identifier}; + const ShaderParameters params{system, disk_cache, precompiled_variants, device, + cpu_addr, host_ptr, unique_identifier}; - const auto found = precompiled_shaders.find(unique_identifier); - if (found == precompiled_shaders.end()) { + const auto found = unspecialized_shaders.find(unique_identifier); + if (found == unspecialized_shaders.end()) { kernel = CachedShader::CreateKernelFromMemory(params, std::move(code)); } else { - kernel = CachedShader::CreateKernelFromCache(params, found->second); + kernel = CachedShader::CreateFromCache(params, found->second); } Register(kernel); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 6ff78f0053..700a838533 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -8,9 +8,10 @@ #include #include #include -#include +#include #include #include +#include #include #include @@ -20,6 +21,7 @@ #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_disk_cache.h" +#include "video_core/shader/shader_ir.h" namespace Core { class System; @@ -40,12 +42,19 @@ using Shader = std::shared_ptr; using CachedProgram = std::shared_ptr; using Maxwell = Tegra::Engines::Maxwell3D::Regs; using PrecompiledPrograms = std::unordered_map; -using PrecompiledShaders = std::unordered_map; +using PrecompiledVariants = std::vector; + +struct UnspecializedShader { + GLShader::ShaderEntries entries; + ProgramType program_type; + ProgramCode code; + ProgramCode code_b; +}; struct ShaderParameters { - ShaderDiskCacheOpenGL& disk_cache; - const PrecompiledPrograms& precompiled_programs; Core::System& system; + ShaderDiskCacheOpenGL& disk_cache; + const PrecompiledVariants* precompiled_variants; const Device& device; VAddr cpu_addr; u8* host_ptr; @@ -56,23 +65,18 @@ class CachedShader final : public RasterizerCacheObject { public: static Shader CreateStageFromMemory(const ShaderParameters& params, Maxwell::ShaderProgram program_type, - ProgramCode&& program_code, ProgramCode&& program_code_b); + ProgramCode program_code, ProgramCode program_code_b); + static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code); - static Shader CreateStageFromCache(const ShaderParameters& params, - Maxwell::ShaderProgram program_type, - GLShader::ProgramResult result); - - static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code); - - static Shader CreateKernelFromCache(const ShaderParameters& params, - GLShader::ProgramResult result); + static Shader CreateFromCache(const ShaderParameters& params, + const UnspecializedShader& unspecialized); VAddr GetCpuAddr() const override { return cpu_addr; } std::size_t GetSizeInBytes() const override { - return shader_length; + return program_code.size() * sizeof(u64); } /// Gets the shader entries for the shader @@ -85,21 +89,24 @@ public: private: explicit CachedShader(const ShaderParameters& params, ProgramType program_type, - GLShader::ProgramResult result); - - CachedProgram TryLoadProgram(const ProgramVariant& variant) const; + GLShader::ShaderEntries entries, ProgramCode program_code, + ProgramCode program_code_b); ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant) const; + Core::System& system; + ShaderDiskCacheOpenGL& disk_cache; + const Device& device; + VAddr cpu_addr{}; + u64 unique_identifier{}; ProgramType program_type{}; - ShaderDiskCacheOpenGL& disk_cache; - const PrecompiledPrograms& precompiled_programs; GLShader::ShaderEntries entries; - std::string code; - std::size_t shader_length{}; + + ProgramCode program_code; + ProgramCode program_code_b; std::unordered_map programs; }; @@ -124,21 +131,26 @@ protected: void FlushObjectInner(const Shader& object) override {} private: - std::unordered_map GenerateUnspecializedShaders( - const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback, - const std::vector& raws, - const std::unordered_map& decompiled); + bool GenerateUnspecializedShaders(const std::atomic_bool& stop_loading, + const VideoCore::DiskResourceLoadCallback& callback, + const std::vector& raws); CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump, - const std::set& supported_formats); + const std::unordered_set& supported_formats); + + const PrecompiledVariants* GetPrecompiledVariants(u64 unique_identifier) const; Core::System& system; Core::Frontend::EmuWindow& emu_window; const Device& device; + ShaderDiskCacheOpenGL disk_cache; - PrecompiledShaders precompiled_shaders; PrecompiledPrograms precompiled_programs; + std::unordered_map precompiled_variants; + + std::unordered_map unspecialized_shaders; + std::array last_shaders; }; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 71d7389cb9..030550c533 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -415,27 +415,6 @@ public: return code.GetResult(); } - ShaderEntries GetShaderEntries() const { - ShaderEntries entries; - for (const auto& cbuf : ir.GetConstantBuffers()) { - entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), - cbuf.first); - } - for (const auto& sampler : ir.GetSamplers()) { - entries.samplers.emplace_back(sampler); - } - for (const auto& [offset, image] : ir.GetImages()) { - entries.images.emplace_back(image); - } - for (const auto& [base, usage] : ir.GetGlobalMemory()) { - entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, - usage.is_read, usage.is_written); - } - entries.clip_distances = ir.GetClipDistances(); - entries.shader_length = ir.GetLength(); - return entries; - } - private: friend class ASTDecompiler; friend class ExprDecompiler; @@ -2481,25 +2460,46 @@ void GLSLDecompiler::DecompileAST() { } // Anonymous namespace -std::string GetCommonDeclarations() { - return fmt::format( - "#define ftoi floatBitsToInt\n" - "#define ftou floatBitsToUint\n" - "#define itof intBitsToFloat\n" - "#define utof uintBitsToFloat\n\n" - "bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{\n" - " bvec2 is_nan1 = isnan(pair1);\n" - " bvec2 is_nan2 = isnan(pair2);\n" - " return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || " - "is_nan2.y);\n" - "}}\n\n"); +ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) { + ShaderEntries entries; + for (const auto& cbuf : ir.GetConstantBuffers()) { + entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), + cbuf.first); + } + for (const auto& sampler : ir.GetSamplers()) { + entries.samplers.emplace_back(sampler); + } + for (const auto& [offset, image] : ir.GetImages()) { + entries.images.emplace_back(image); + } + for (const auto& [base, usage] : ir.GetGlobalMemory()) { + entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_read, + usage.is_written); + } + entries.clip_distances = ir.GetClipDistances(); + entries.shader_length = ir.GetLength(); + return entries; } -ProgramResult Decompile(const Device& device, const ShaderIR& ir, ProgramType stage, - const std::string& suffix) { +std::string GetCommonDeclarations() { + return R"(#define ftoi floatBitsToInt +#define ftou floatBitsToUint +#define itof intBitsToFloat +#define utof uintBitsToFloat + +bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) { + bvec2 is_nan1 = isnan(pair1); + bvec2 is_nan2 = isnan(pair2); + return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y); +} +)"; +} + +std::string Decompile(const Device& device, const ShaderIR& ir, ProgramType stage, + const std::string& suffix) { GLSLDecompiler decompiler(device, ir, stage, suffix); decompiler.Decompile(); - return {decompiler.GetResult(), decompiler.GetShaderEntries()}; + return decompiler.GetResult(); } } // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index e538dc001b..fead2a51ef 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -34,10 +34,7 @@ enum class ProgramType : u32 { namespace OpenGL::GLShader { -struct ShaderEntries; - using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using ProgramResult = std::pair; using SamplerEntry = VideoCommon::Shader::Sampler; using ImageEntry = VideoCommon::Shader::Image; @@ -93,9 +90,11 @@ struct ShaderEntries { std::size_t shader_length{}; }; +ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir); + std::string GetCommonDeclarations(); -ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - ProgramType stage, const std::string& suffix); +std::string Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, + ProgramType stage, const std::string& suffix); } // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 74cc334763..ddc19dccde 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -29,12 +29,7 @@ enum class TransferableEntryKind : u32 { Usage, }; -enum class PrecompiledEntryKind : u32 { - Decompiled, - Dump, -}; - -constexpr u32 NativeVersion = 4; +constexpr u32 NativeVersion = 5; // Making sure sizes doesn't change by accident static_assert(sizeof(BaseBindings) == 16); @@ -49,13 +44,11 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() { return hash; } -} // namespace +} // Anonymous namespace ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, - u32 program_code_size, u32 program_code_size_b, ProgramCode program_code, ProgramCode program_code_b) : unique_identifier{unique_identifier}, program_type{program_type}, - program_code_size{program_code_size}, program_code_size_b{program_code_size_b}, program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} {} ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default; @@ -90,15 +83,16 @@ bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) { bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const { if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(static_cast(program_type)) != 1 || - file.WriteObject(program_code_size) != 1 || file.WriteObject(program_code_size_b) != 1) { + file.WriteObject(static_cast(program_code.size())) != 1 || + file.WriteObject(static_cast(program_code_b.size())) != 1) { return false; } - if (file.WriteArray(program_code.data(), program_code_size) != program_code_size) + if (file.WriteArray(program_code.data(), program_code.size()) != program_code.size()) return false; if (HasProgramA() && - file.WriteArray(program_code_b.data(), program_code_size_b) != program_code_size_b) { + file.WriteArray(program_code_b.data(), program_code_b.size()) != program_code_b.size()) { return false; } return true; @@ -186,13 +180,14 @@ ShaderDiskCacheOpenGL::LoadTransferable() { return {{std::move(raws), std::move(usages)}}; } -std::pair, ShaderDumpsMap> +std::unordered_map ShaderDiskCacheOpenGL::LoadPrecompiled() { if (!is_usable) { return {}; } - FileUtil::IOFile file(GetPrecompiledPath(), "rb"); + std::string path = GetPrecompiledPath(); + FileUtil::IOFile file(path, "rb"); if (!file.IsOpen()) { LOG_INFO(Render_OpenGL, "No precompiled shader cache found for game with title id={}", GetTitleID()); @@ -211,7 +206,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiled() { return *result; } -std::optional, ShaderDumpsMap>> +std::optional> ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { // Read compressed file from disk and decompress to virtual precompiled cache file std::vector compressed(file.GetSize()); @@ -231,238 +226,31 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { return {}; } - std::unordered_map decompiled; ShaderDumpsMap dumps; while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { - PrecompiledEntryKind kind{}; - if (!LoadObjectFromPrecompiled(kind)) { + ShaderDiskCacheUsage usage; + if (!LoadObjectFromPrecompiled(usage)) { return {}; } - switch (kind) { - case PrecompiledEntryKind::Decompiled: { - u64 unique_identifier{}; - if (!LoadObjectFromPrecompiled(unique_identifier)) { - return {}; - } - - auto entry = LoadDecompiledEntry(); - if (!entry) { - return {}; - } - decompiled.insert({unique_identifier, std::move(*entry)}); - break; - } - case PrecompiledEntryKind::Dump: { - ShaderDiskCacheUsage usage; - if (!LoadObjectFromPrecompiled(usage)) { - return {}; - } - - ShaderDiskCacheDump dump; - if (!LoadObjectFromPrecompiled(dump.binary_format)) { - return {}; - } - - u32 binary_length{}; - if (!LoadObjectFromPrecompiled(binary_length)) { - return {}; - } - - dump.binary.resize(binary_length); - if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) { - return {}; - } - - dumps.insert({usage, dump}); - break; - } - default: + ShaderDiskCacheDump dump; + if (!LoadObjectFromPrecompiled(dump.binary_format)) { return {}; } - } - return {{decompiled, dumps}}; -} -std::optional ShaderDiskCacheOpenGL::LoadDecompiledEntry() { - u32 code_size{}; - if (!LoadObjectFromPrecompiled(code_size)) { - return {}; - } - - std::string code(code_size, '\0'); - if (!LoadArrayFromPrecompiled(code.data(), code.size())) { - return {}; - } - - ShaderDiskCacheDecompiled entry; - entry.code = std::move(code); - - u32 const_buffers_count{}; - if (!LoadObjectFromPrecompiled(const_buffers_count)) { - return {}; - } - - for (u32 i = 0; i < const_buffers_count; ++i) { - u32 max_offset{}; - u32 index{}; - bool is_indirect{}; - if (!LoadObjectFromPrecompiled(max_offset) || !LoadObjectFromPrecompiled(index) || - !LoadObjectFromPrecompiled(is_indirect)) { + u32 binary_length{}; + if (!LoadObjectFromPrecompiled(binary_length)) { return {}; } - entry.entries.const_buffers.emplace_back(max_offset, is_indirect, index); - } - u32 samplers_count{}; - if (!LoadObjectFromPrecompiled(samplers_count)) { - return {}; - } - - for (u32 i = 0; i < samplers_count; ++i) { - u64 offset{}; - u64 index{}; - u32 type{}; - bool is_array{}; - bool is_shadow{}; - bool is_bindless{}; - if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) || - !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_array) || - !LoadObjectFromPrecompiled(is_shadow) || !LoadObjectFromPrecompiled(is_bindless)) { + dump.binary.resize(binary_length); + if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) { return {}; } - entry.entries.samplers.emplace_back( - static_cast(offset), static_cast(index), - static_cast(type), is_array, is_shadow, is_bindless); - } - u32 images_count{}; - if (!LoadObjectFromPrecompiled(images_count)) { - return {}; + dumps.emplace(usage, dump); } - for (u32 i = 0; i < images_count; ++i) { - u64 offset{}; - u64 index{}; - u32 type{}; - u8 is_bindless{}; - u8 is_written{}; - u8 is_read{}; - u8 is_atomic{}; - if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) || - !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless) || - !LoadObjectFromPrecompiled(is_written) || !LoadObjectFromPrecompiled(is_read) || - !LoadObjectFromPrecompiled(is_atomic)) { - return {}; - } - entry.entries.images.emplace_back( - static_cast(offset), static_cast(index), - static_cast(type), is_bindless != 0, is_written != 0, - is_read != 0, is_atomic != 0); - } - - u32 global_memory_count{}; - if (!LoadObjectFromPrecompiled(global_memory_count)) { - return {}; - } - for (u32 i = 0; i < global_memory_count; ++i) { - u32 cbuf_index{}; - u32 cbuf_offset{}; - bool is_read{}; - bool is_written{}; - if (!LoadObjectFromPrecompiled(cbuf_index) || !LoadObjectFromPrecompiled(cbuf_offset) || - !LoadObjectFromPrecompiled(is_read) || !LoadObjectFromPrecompiled(is_written)) { - return {}; - } - entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read, - is_written); - } - - for (auto& clip_distance : entry.entries.clip_distances) { - if (!LoadObjectFromPrecompiled(clip_distance)) { - return {}; - } - } - - u64 shader_length{}; - if (!LoadObjectFromPrecompiled(shader_length)) { - return {}; - } - entry.entries.shader_length = static_cast(shader_length); - - return entry; -} - -bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std::string& code, - const GLShader::ShaderEntries& entries) { - if (!SaveObjectToPrecompiled(static_cast(PrecompiledEntryKind::Decompiled)) || - !SaveObjectToPrecompiled(unique_identifier) || - !SaveObjectToPrecompiled(static_cast(code.size())) || - !SaveArrayToPrecompiled(code.data(), code.size())) { - return false; - } - - if (!SaveObjectToPrecompiled(static_cast(entries.const_buffers.size()))) { - return false; - } - for (const auto& cbuf : entries.const_buffers) { - if (!SaveObjectToPrecompiled(static_cast(cbuf.GetMaxOffset())) || - !SaveObjectToPrecompiled(static_cast(cbuf.GetIndex())) || - !SaveObjectToPrecompiled(cbuf.IsIndirect())) { - return false; - } - } - - if (!SaveObjectToPrecompiled(static_cast(entries.samplers.size()))) { - return false; - } - for (const auto& sampler : entries.samplers) { - if (!SaveObjectToPrecompiled(static_cast(sampler.GetOffset())) || - !SaveObjectToPrecompiled(static_cast(sampler.GetIndex())) || - !SaveObjectToPrecompiled(static_cast(sampler.GetType())) || - !SaveObjectToPrecompiled(sampler.IsArray()) || - !SaveObjectToPrecompiled(sampler.IsShadow()) || - !SaveObjectToPrecompiled(sampler.IsBindless())) { - return false; - } - } - - if (!SaveObjectToPrecompiled(static_cast(entries.images.size()))) { - return false; - } - for (const auto& image : entries.images) { - if (!SaveObjectToPrecompiled(static_cast(image.GetOffset())) || - !SaveObjectToPrecompiled(static_cast(image.GetIndex())) || - !SaveObjectToPrecompiled(static_cast(image.GetType())) || - !SaveObjectToPrecompiled(static_cast(image.IsBindless() ? 1 : 0)) || - !SaveObjectToPrecompiled(static_cast(image.IsWritten() ? 1 : 0)) || - !SaveObjectToPrecompiled(static_cast(image.IsRead() ? 1 : 0)) || - !SaveObjectToPrecompiled(static_cast(image.IsAtomic() ? 1 : 0))) { - return false; - } - } - - if (!SaveObjectToPrecompiled(static_cast(entries.global_memory_entries.size()))) { - return false; - } - for (const auto& gmem : entries.global_memory_entries) { - if (!SaveObjectToPrecompiled(static_cast(gmem.GetCbufIndex())) || - !SaveObjectToPrecompiled(static_cast(gmem.GetCbufOffset())) || - !SaveObjectToPrecompiled(gmem.IsRead()) || !SaveObjectToPrecompiled(gmem.IsWritten())) { - return false; - } - } - - for (const bool clip_distance : entries.clip_distances) { - if (!SaveObjectToPrecompiled(clip_distance)) { - return false; - } - } - - if (!SaveObjectToPrecompiled(static_cast(entries.shader_length))) { - return false; - } - - return true; + return dumps; } void ShaderDiskCacheOpenGL::InvalidateTransferable() { @@ -532,28 +320,18 @@ void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) { } } -void ShaderDiskCacheOpenGL::SaveDecompiled(u64 unique_identifier, const std::string& code, - const GLShader::ShaderEntries& entries) { - if (!is_usable) { - return; - } - - if (precompiled_cache_virtual_file.GetSize() == 0) { - SavePrecompiledHeaderToVirtualPrecompiledCache(); - } - - if (!SaveDecompiledFile(unique_identifier, code, entries)) { - LOG_ERROR(Render_OpenGL, - "Failed to save decompiled entry to the precompiled file - removing"); - InvalidatePrecompiled(); - } -} - void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint program) { if (!is_usable) { return; } + // TODO(Rodrigo): This is a design smell. I shouldn't be having to manually write the header + // when writing the dump. This should be done the moment I get access to write to the virtual + // file. + if (precompiled_cache_virtual_file.GetSize() == 0) { + SavePrecompiledHeaderToVirtualPrecompiledCache(); + } + GLint binary_length{}; glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length); @@ -561,8 +339,7 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p std::vector binary(binary_length); glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); - if (!SaveObjectToPrecompiled(static_cast(PrecompiledEntryKind::Dump)) || - !SaveObjectToPrecompiled(usage) || + if (!SaveObjectToPrecompiled(usage) || !SaveObjectToPrecompiled(static_cast(binary_format)) || !SaveObjectToPrecompiled(static_cast(binary_length)) || !SaveArrayToPrecompiled(binary.data(), binary.size())) { @@ -574,8 +351,9 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p } FileUtil::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const { - if (!EnsureDirectories()) + if (!EnsureDirectories()) { return {}; + } const auto transferable_path{GetTransferablePath()}; const bool existed = FileUtil::Exists(transferable_path); @@ -607,8 +385,8 @@ void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() { void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() { precompiled_cache_virtual_file_offset = 0; - const std::vector& uncompressed = precompiled_cache_virtual_file.ReadAllBytes(); - const std::vector& compressed = + const std::vector uncompressed = precompiled_cache_virtual_file.ReadAllBytes(); + const std::vector compressed = Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size()); const auto precompiled_path{GetPrecompiledPath()}; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 9595bd71ba..61b46d728d 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -123,8 +123,7 @@ namespace OpenGL { class ShaderDiskCacheRaw { public: explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, - u32 program_code_size, u32 program_code_size_b, - ProgramCode program_code, ProgramCode program_code_b); + ProgramCode program_code, ProgramCode program_code_b = {}); ShaderDiskCacheRaw(); ~ShaderDiskCacheRaw(); @@ -155,22 +154,14 @@ public: private: u64 unique_identifier{}; ProgramType program_type{}; - u32 program_code_size{}; - u32 program_code_size_b{}; ProgramCode program_code; ProgramCode program_code_b; }; -/// Contains decompiled data from a shader -struct ShaderDiskCacheDecompiled { - std::string code; - GLShader::ShaderEntries entries; -}; - /// Contains an OpenGL dumped binary program struct ShaderDiskCacheDump { - GLenum binary_format; + GLenum binary_format{}; std::vector binary; }; @@ -184,9 +175,7 @@ public: LoadTransferable(); /// Loads current game's precompiled cache. Invalidates on failure. - std::pair, - std::unordered_map> - LoadPrecompiled(); + std::unordered_map LoadPrecompiled(); /// Removes the transferable (and precompiled) cache file. void InvalidateTransferable(); @@ -200,10 +189,6 @@ public: /// Saves shader usage to the transferable file. Does not check for collisions. void SaveUsage(const ShaderDiskCacheUsage& usage); - /// Saves a decompiled entry to the precompiled file. Does not check for collisions. - void SaveDecompiled(u64 unique_identifier, const std::string& code, - const GLShader::ShaderEntries& entries); - /// Saves a dump entry to the precompiled file. Does not check for collisions. void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program); @@ -212,18 +197,9 @@ public: private: /// Loads the transferable cache. Returns empty on failure. - std::optional, - std::unordered_map>> + std::optional> LoadPrecompiledFile(FileUtil::IOFile& file); - /// Loads a decompiled cache entry from m_precompiled_cache_virtual_file. Returns empty on - /// failure. - std::optional LoadDecompiledEntry(); - - /// Saves a decompiled entry to the passed file. Returns true on success. - bool SaveDecompiledFile(u64 unique_identifier, const std::string& code, - const GLShader::ShaderEntries& entries); - /// Opens current game's transferable file and write it's header if it doesn't exist FileUtil::IOFile AppendTransferableFile() const; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 817c6e12c7..0e22eede90 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -16,18 +16,8 @@ using VideoCommon::Shader::CompilerSettings; using VideoCommon::Shader::ProgramCode; using VideoCommon::Shader::ShaderIR; -static constexpr u32 PROGRAM_OFFSET = 10; -static constexpr u32 COMPUTE_OFFSET = 0; - -static constexpr CompilerSettings settings{CompileDepth::NoFlowStack, true}; - -ProgramResult GenerateVertexShader(ConstBufferLocker& locker, const Device& device, - const ShaderSetup& setup) { - const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); - - std::string out = "// Shader Unique Id: VS" + id + "\n\n"; - out += GetCommonDeclarations(); - +std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b) { + std::string out = GetCommonDeclarations(); out += R"( layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { vec4 viewport_flip; @@ -35,18 +25,10 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { }; )"; - - const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings, - locker); - const auto stage = setup.IsDualProgram() ? ProgramType::VertexA : ProgramType::VertexB; - ProgramResult program = Decompile(device, program_ir, stage, "vertex"); - out += program.first; - - if (setup.IsDualProgram()) { - const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b, - settings, locker); - ProgramResult program_b = Decompile(device, program_ir_b, ProgramType::VertexB, "vertex_b"); - out += program_b.first; + const auto stage = ir_b ? ProgramType::VertexA : ProgramType::VertexB; + out += Decompile(device, ir, stage, "vertex"); + if (ir_b) { + out += Decompile(device, *ir_b, ProgramType::VertexB, "vertex_b"); } out += R"( @@ -54,7 +36,7 @@ void main() { execute_vertex(); )"; - if (setup.IsDualProgram()) { + if (ir_b) { out += " execute_vertex_b();"; } @@ -68,18 +50,13 @@ void main() { // Viewport can be flipped, which is unsupported by glViewport gl_Position.xy *= viewport_flip.xy; } -})"; - - return {std::move(out), std::move(program.second)}; +} +)"; + return out; } -ProgramResult GenerateGeometryShader(ConstBufferLocker& locker, const Device& device, - const ShaderSetup& setup) { - const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); - - std::string out = "// Shader Unique Id: GS" + id + "\n\n"; - out += GetCommonDeclarations(); - +std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir) { + std::string out = GetCommonDeclarations(); out += R"( layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { vec4 viewport_flip; @@ -87,27 +64,18 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { }; )"; - - const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings, - locker); - ProgramResult program = Decompile(device, program_ir, ProgramType::Geometry, "geometry"); - out += program.first; + out += Decompile(device, ir, ProgramType::Geometry, "geometry"); out += R"( void main() { execute_geometry(); -};)"; - - return {std::move(out), std::move(program.second)}; +} +)"; + return out; } -ProgramResult GenerateFragmentShader(ConstBufferLocker& locker, const Device& device, - const ShaderSetup& setup) { - const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); - - std::string out = "// Shader Unique Id: FS" + id + "\n\n"; - out += GetCommonDeclarations(); - +std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir) { + std::string out = GetCommonDeclarations(); out += R"( layout (location = 0) out vec4 FragColor0; layout (location = 1) out vec4 FragColor1; @@ -124,39 +92,25 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { }; )"; - - const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings, - locker); - ProgramResult program = Decompile(device, program_ir, ProgramType::Fragment, "fragment"); - out += program.first; + out += Decompile(device, ir, ProgramType::Fragment, "fragment"); out += R"( void main() { execute_fragment(); } - )"; - return {std::move(out), std::move(program.second)}; + return out; } -ProgramResult GenerateComputeShader(ConstBufferLocker& locker, const Device& device, - const ShaderSetup& setup) { - const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); - - std::string out = "// Shader Unique Id: CS" + id + "\n\n"; - out += GetCommonDeclarations(); - - const ShaderIR program_ir(setup.program.code, COMPUTE_OFFSET, setup.program.size_a, settings, - locker); - ProgramResult program = Decompile(device, program_ir, ProgramType::Compute, "compute"); - out += program.first; - +std::string GenerateComputeShader(const Device& device, const ShaderIR& ir) { + std::string out = GetCommonDeclarations(); + out += Decompile(device, ir, ProgramType::Compute, "compute"); out += R"( void main() { execute_compute(); } )"; - return {std::move(out), std::move(program.second)}; + return out; } } // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index 05f157298d..cba2be9f93 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -16,50 +16,19 @@ class Device; namespace OpenGL::GLShader { -using VideoCommon::Shader::ConstBufferLocker; using VideoCommon::Shader::ProgramCode; - -struct ShaderSetup { - explicit ShaderSetup(ProgramCode program_code) { - program.code = std::move(program_code); - } - - struct { - ProgramCode code; - ProgramCode code_b; // Used for dual vertex shaders - u64 unique_identifier; - std::size_t size_a; - std::size_t size_b; - } program; - - /// Used in scenarios where we have a dual vertex shaders - void SetProgramB(ProgramCode program_b) { - program.code_b = std::move(program_b); - has_program_b = true; - } - - bool IsDualProgram() const { - return has_program_b; - } - -private: - bool has_program_b{}; -}; +using VideoCommon::Shader::ShaderIR; /// Generates the GLSL vertex shader program source code for the given VS program -ProgramResult GenerateVertexShader(ConstBufferLocker& locker, const Device& device, - const ShaderSetup& setup); +std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b); /// Generates the GLSL geometry shader program source code for the given GS program -ProgramResult GenerateGeometryShader(ConstBufferLocker& locker, const Device& device, - const ShaderSetup& setup); +std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir); /// Generates the GLSL fragment shader program source code for the given FS program -ProgramResult GenerateFragmentShader(ConstBufferLocker& locker, const Device& device, - const ShaderSetup& setup); +std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir); /// Generates the GLSL compute shader program source code for the given CS program -ProgramResult GenerateComputeShader(ConstBufferLocker& locker, const Device& device, - const ShaderSetup& setup); +std::string GenerateComputeShader(const Device& device, const ShaderIR& ir); } // namespace OpenGL::GLShader diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp index 9d23bcecfb..37a0968a17 100644 --- a/src/video_core/shader/const_buffer_locker.cpp +++ b/src/video_core/shader/const_buffer_locker.cpp @@ -15,15 +15,15 @@ ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage) : engine{nullptr}, shader_stage{shader_stage} {} ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, - Tegra::Engines::ConstBufferEngineInterface* engine) - : engine{engine}, shader_stage{shader_stage} {} + Tegra::Engines::ConstBufferEngineInterface& engine) + : engine{&engine}, shader_stage{shader_stage} {} bool ConstBufferLocker::IsEngineSet() const { return engine != nullptr; } -void ConstBufferLocker::SetEngine(Tegra::Engines::ConstBufferEngineInterface* engine_) { - engine = engine_; +void ConstBufferLocker::SetEngine(Tegra::Engines::ConstBufferEngineInterface& engine_) { + engine = &engine_; } std::optional ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) { diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h index 13eeba3204..54459977f2 100644 --- a/src/video_core/shader/const_buffer_locker.h +++ b/src/video_core/shader/const_buffer_locker.h @@ -21,14 +21,14 @@ public: explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage); explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, - Tegra::Engines::ConstBufferEngineInterface* engine); + Tegra::Engines::ConstBufferEngineInterface& engine); // Checks if an engine is setup, it may be possible that during disk shader // cache run, the engines have not been created yet. bool IsEngineSet() const; // Use this to set/change the engine used for this shader. - void SetEngine(Tegra::Engines::ConstBufferEngineInterface* engine); + void SetEngine(Tegra::Engines::ConstBufferEngineInterface& engine); // Retrieves a key from the locker, if it's registered, it will give the // registered value, if not it will obtain it from maxwell3d and register it. diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index d1c269ea72..6c698bcff2 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -66,10 +66,11 @@ struct BlockInfo { }; struct CFGRebuildState { - explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size, - const u32 start, ConstBufferLocker& locker) - : start{start}, program_code{program_code}, program_size{program_size}, locker{locker} {} + explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker) + : program_code{program_code}, start{start}, locker{locker} {} + const ProgramCode& program_code; + ConstBufferLocker& locker; u32 start{}; std::vector block_info{}; std::list inspect_queries{}; @@ -79,10 +80,7 @@ struct CFGRebuildState { std::map ssy_labels{}; std::map pbk_labels{}; std::unordered_map stacks{}; - const ProgramCode& program_code; - const std::size_t program_size; ASTManager* manager; - ConstBufferLocker& locker; }; enum class BlockCollision : u32 { None, Found, Inside }; @@ -242,7 +240,7 @@ std::optional TrackBranchIndirectInfo(const CFGRebuildState& std::pair ParseCode(CFGRebuildState& state, u32 address) { u32 offset = static_cast(address); - const u32 end_address = static_cast(state.program_size / sizeof(Instruction)); + const u32 end_address = static_cast(state.program_code.size()); ParseInfo parse_info{}; SingleBranch single_branch{}; @@ -583,6 +581,7 @@ bool TryQuery(CFGRebuildState& state) { } return true; } + } // Anonymous namespace void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) { @@ -651,8 +650,7 @@ void DecompileShader(CFGRebuildState& state) { state.manager->Decompile(); } -std::unique_ptr ScanFlow(const ProgramCode& program_code, - std::size_t program_size, u32 start_address, +std::unique_ptr ScanFlow(const ProgramCode& program_code, u32 start_address, const CompilerSettings& settings, ConstBufferLocker& locker) { auto result_out = std::make_unique(); @@ -661,7 +659,7 @@ std::unique_ptr ScanFlow(const ProgramCode& program_code, return result_out; } - CFGRebuildState state{program_code, program_size, start_address, locker}; + CFGRebuildState state{program_code, start_address, locker}; // Inspect Code and generate blocks state.labels.clear(); state.labels.emplace(start_address); diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index 369ca255b9..288ee68afe 100644 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h @@ -105,8 +105,7 @@ struct ShaderCharacteristics { CompilerSettings settings{}; }; -std::unique_ptr ScanFlow(const ProgramCode& program_code, - std::size_t program_size, u32 start_address, +std::unique_ptr ScanFlow(const ProgramCode& program_code, u32 start_address, const CompilerSettings& settings, ConstBufferLocker& locker); diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 053241128b..e1afa45820 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -33,7 +33,7 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { return (absolute_offset % SchedPeriod) == 0; } -} // namespace +} // Anonymous namespace class ASTDecoder { public: @@ -102,7 +102,7 @@ void ShaderIR::Decode() { std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); decompiled = false; - auto info = ScanFlow(program_code, program_size, main_offset, settings, locker); + auto info = ScanFlow(program_code, main_offset, settings, locker); auto& shader_info = *info; coverage_begin = shader_info.start; coverage_end = shader_info.end; @@ -155,7 +155,7 @@ void ShaderIR::Decode() { [[fallthrough]]; case CompileDepth::BruteForce: { coverage_begin = main_offset; - const u32 shader_end = static_cast(program_size / sizeof(u64)); + const u32 shader_end = program_code.size(); coverage_end = shader_end; for (u32 label = main_offset; label < shader_end; label++) { basic_blocks.insert({label, DecodeRange(label, label + 1)}); @@ -225,7 +225,8 @@ void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { for (auto& branch_case : multi_branch->branches) { Node n = Operation(OperationCode::Branch, Immediate(branch_case.address)); Node op_b = Immediate(branch_case.cmp_value); - Node condition = GetPredicateComparisonInteger(Tegra::Shader::PredCondition::Equal, false, op_a, op_b); + Node condition = + GetPredicateComparisonInteger(Tegra::Shader::PredCondition::Equal, false, op_a, op_b); auto result = Conditional(condition, {n}); bb.push_back(result); global_code.push_back(result); diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 6430575ecd..1d718ccc6e 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -22,10 +22,9 @@ using Tegra::Shader::PredCondition; using Tegra::Shader::PredOperation; using Tegra::Shader::Register; -ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, const std::size_t size, - CompilerSettings settings, ConstBufferLocker& locker) - : program_code{program_code}, main_offset{main_offset}, program_size{size}, basic_blocks{}, - program_manager{true, true}, settings{settings}, locker{locker} { +ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, + ConstBufferLocker& locker) + : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} { Decode(); } diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 3a3e381d25..3ebea91b97 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -67,8 +67,8 @@ struct GlobalMemoryUsage { class ShaderIR final { public: - explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, std::size_t size, - CompilerSettings settings, ConstBufferLocker& locker); + explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, + ConstBufferLocker& locker); ~ShaderIR(); const std::map& GetBasicBlocks() const { @@ -384,7 +384,9 @@ private: const ProgramCode& program_code; const u32 main_offset; - const std::size_t program_size; + const CompilerSettings settings; + ConstBufferLocker& locker; + bool decompiled{}; bool disable_flow_stack{}; @@ -393,9 +395,7 @@ private: std::map basic_blocks; NodeBlock global_code; - ASTManager program_manager; - CompilerSettings settings{}; - ConstBufferLocker& locker; + ASTManager program_manager{true, true}; std::set used_registers; std::set used_predicates; From fa2c297f3eddc718123767142dbc296270f58f7a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 25 Sep 2019 19:19:41 -0300 Subject: [PATCH 09/13] const_buffer_locker: Minor style changes --- src/video_core/shader/const_buffer_locker.cpp | 152 ++++++------------ src/video_core/shader/const_buffer_locker.h | 86 ++++------ 2 files changed, 81 insertions(+), 157 deletions(-) diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp index 37a0968a17..ebeba102d0 100644 --- a/src/video_core/shader/const_buffer_locker.cpp +++ b/src/video_core/shader/const_buffer_locker.cpp @@ -4,6 +4,8 @@ #pragma once +#include +#include #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" @@ -11,140 +13,92 @@ namespace VideoCommon::Shader { +using Tegra::Engines::SamplerDescriptor; + ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage) - : engine{nullptr}, shader_stage{shader_stage} {} + : stage{shader_stage} {} ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, Tegra::Engines::ConstBufferEngineInterface& engine) - : engine{&engine}, shader_stage{shader_stage} {} - -bool ConstBufferLocker::IsEngineSet() const { - return engine != nullptr; -} - -void ConstBufferLocker::SetEngine(Tegra::Engines::ConstBufferEngineInterface& engine_) { - engine = &engine_; -} + : stage{shader_stage}, engine{&engine} {} std::optional ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) { - if (!keys) { - keys = std::make_shared(); - } - auto& key_map = *keys; const std::pair key = {buffer, offset}; - const auto iter = key_map.find(key); - if (iter != key_map.end()) { - return {iter->second}; + const auto iter = keys.find(key); + if (iter != keys.end()) { + return iter->second; } - if (!IsEngineSet()) { + if (!engine) { return {}; } - const u32 value = engine->AccessConstBuffer32(shader_stage, buffer, offset); - key_map.emplace(key, value); - return {value}; + const u32 value = engine->AccessConstBuffer32(stage, buffer, offset); + keys.emplace(key, value); + return value; } -std::optional ConstBufferLocker::ObtainBoundSampler(u32 offset) { - if (!bound_samplers) { - bound_samplers = std::make_shared(); - } - auto& key_map = *bound_samplers; +std::optional ConstBufferLocker::ObtainBoundSampler(u32 offset) { const u32 key = offset; - const auto iter = key_map.find(key); - if (iter != key_map.end()) { - return {iter->second}; + const auto iter = bound_samplers.find(key); + if (iter != bound_samplers.end()) { + return iter->second; } - if (!IsEngineSet()) { + if (!engine) { return {}; } - const Tegra::Engines::SamplerDescriptor value = - engine->AccessBoundSampler(shader_stage, offset); - key_map.emplace(key, value); - return {value}; + const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset); + bound_samplers.emplace(key, value); + return value; } std::optional ConstBufferLocker::ObtainBindlessSampler( u32 buffer, u32 offset) { - if (!bindless_samplers) { - bindless_samplers = std::make_shared(); + const std::pair key = {buffer, offset}; + const auto iter = bindless_samplers.find(key); + if (iter != bindless_samplers.end()) { + return iter->second; } - auto& key_map = *bindless_samplers; - const std::pair key = {buffer, offset}; - const auto iter = key_map.find(key); - if (iter != key_map.end()) { - return {iter->second}; - } - if (!IsEngineSet()) { + if (!engine) { return {}; } - const Tegra::Engines::SamplerDescriptor value = - engine->AccessBindlessSampler(shader_stage, buffer, offset); - key_map.emplace(key, value); - return {value}; + const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset); + bindless_samplers.emplace(key, value); + return value; } void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) { - if (!keys) { - keys = std::make_shared(); - } - const std::pair key = {buffer, offset}; - (*keys)[key] = value; + keys.insert_or_assign({buffer, offset}, value); } -void ConstBufferLocker::InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler) { - if (!bound_samplers) { - bound_samplers = std::make_shared(); - } - (*bound_samplers)[offset] = sampler; +void ConstBufferLocker::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) { + bound_samplers.insert_or_assign(offset, sampler); } -void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, - Tegra::Engines::SamplerDescriptor sampler) { - if (!bindless_samplers) { - bindless_samplers = std::make_shared(); - } - const std::pair key = {buffer, offset}; - (*bindless_samplers)[key] = sampler; +void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) { + bindless_samplers.insert_or_assign({buffer, offset}, sampler); } bool ConstBufferLocker::IsConsistent() const { - if (!IsEngineSet()) { + if (!engine) { return false; } - if (keys) { - for (const auto& key_val : *keys) { - const std::pair key = key_val.first; - const u32 value = key_val.second; - const u32 other_value = - engine->AccessConstBuffer32(shader_stage, key.first, key.second); - if (other_value != value) { - return false; - } - } - } - if (bound_samplers) { - for (const auto& sampler_val : *bound_samplers) { - const u32 key = sampler_val.first; - const Tegra::Engines::SamplerDescriptor value = sampler_val.second; - const Tegra::Engines::SamplerDescriptor other_value = - engine->AccessBoundSampler(shader_stage, key); - if (other_value.raw != value.raw) { - return false; - } - } - } - if (bindless_samplers) { - for (const auto& sampler_val : *bindless_samplers) { - const std::pair key = sampler_val.first; - const Tegra::Engines::SamplerDescriptor value = sampler_val.second; - const Tegra::Engines::SamplerDescriptor other_value = - engine->AccessBindlessSampler(shader_stage, key.first, key.second); - if (other_value.raw != value.raw) { - return false; - } - } - } - return true; + return std::all_of(keys.begin(), keys.end(), + [](const auto& key) { + const auto [value, other_value] = key.first; + return value == other_value; + }) && + std::all_of(bound_samplers.begin(), bound_samplers.end(), + [this](const auto& sampler) { + const auto [key, value] = sampler; + const auto other_value = engine->AccessBoundSampler(stage, key); + return value.raw == other_value.raw; + }) && + std::all_of( + bindless_samplers.begin(), bindless_samplers.end(), [this](const auto& sampler) { + const auto [cbuf, offset] = sampler.first; + const auto value = sampler.second; + const auto other_value = engine->AccessBindlessSampler(stage, cbuf, offset); + return value.raw == other_value.raw; + }); } } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h index 54459977f2..417d5a16f0 100644 --- a/src/video_core/shader/const_buffer_locker.h +++ b/src/video_core/shader/const_buffer_locker.h @@ -23,78 +23,48 @@ public: explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, Tegra::Engines::ConstBufferEngineInterface& engine); - // Checks if an engine is setup, it may be possible that during disk shader - // cache run, the engines have not been created yet. - bool IsEngineSet() const; - - // Use this to set/change the engine used for this shader. - void SetEngine(Tegra::Engines::ConstBufferEngineInterface& engine); - - // Retrieves a key from the locker, if it's registered, it will give the - // registered value, if not it will obtain it from maxwell3d and register it. + /// Retrieves a key from the locker, if it's registered, it will give the registered value, if + /// not it will obtain it from maxwell3d and register it. std::optional ObtainKey(u32 buffer, u32 offset); std::optional ObtainBoundSampler(u32 offset); std::optional ObtainBindlessSampler(u32 buffer, u32 offset); - // Manually inserts a key. + /// Inserts a key. void InsertKey(u32 buffer, u32 offset, u32 value); + /// Inserts a bound sampler key. void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler); + /// Inserts a bindless sampler key. void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); - // Retrieves the number of keys registered. - std::size_t NumKeys() const { - if (!keys) { - return 0; - } - return keys->size(); - } - - std::size_t NumBoundSamplers() const { - if (!bound_samplers) { - return 0; - } - return bound_samplers->size(); - } - - std::size_t NumBindlessSamplers() const { - if (!bindless_samplers) { - return 0; - } - return bindless_samplers->size(); - } - - // Gives an accessor to the key's database. - // Pre: NumKeys > 0 - const KeyMap& AccessKeys() const { - return *keys; - } - - // Gives an accessor to the sampler's database. - // Pre: NumBindlessSamplers > 0 - const BoundSamplerMap& AccessBoundSamplers() const { - return *bound_samplers; - } - - // Gives an accessor to the sampler's database. - // Pre: NumBindlessSamplers > 0 - const BindlessSamplerMap& AccessBindlessSamplers() const { - return *bindless_samplers; - } - - // Checks keys & samplers against engine's current const buffers. Returns true if they - // are the same value, false otherwise; + /// Checks keys and samplers against engine's current const buffers. Returns true if they are + /// the same value, false otherwise; bool IsConsistent() const; + /// Gives an getter to the const buffer keys in the database. + const KeyMap& GetKeys() const { + return keys; + } + + /// Gets samplers database. + const BoundSamplerMap& GetBoundSamplers() const { + return bound_samplers; + } + + /// Gets bindless samplers database. + const BindlessSamplerMap& GetBindlessSamplers() const { + return bindless_samplers; + } + private: - Tegra::Engines::ConstBufferEngineInterface* engine; - Tegra::Engines::ShaderType shader_stage; - // All containers are lazy initialized as most shaders don't use them. - std::shared_ptr keys{}; - std::shared_ptr bound_samplers{}; - std::shared_ptr bindless_samplers{}; + const Tegra::Engines::ShaderType stage; + Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; + KeyMap keys; + BoundSamplerMap bound_samplers; + BindlessSamplerMap bindless_samplers; }; + } // namespace VideoCommon::Shader From ec85648af3316d5e43c7b57fca55d0dad3d03f96 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 25 Sep 2019 21:46:34 -0300 Subject: [PATCH 10/13] gl_shader_disk_cache: Store and load fast BRX --- .../engines/const_buffer_engine_interface.h | 35 ++-- .../renderer_opengl/gl_shader_cache.cpp | 19 +- .../renderer_opengl/gl_shader_cache.h | 7 +- .../renderer_opengl/gl_shader_disk_cache.cpp | 183 +++++++++++++++--- .../renderer_opengl/gl_shader_disk_cache.h | 12 +- src/video_core/shader/const_buffer_locker.cpp | 4 +- 6 files changed, 210 insertions(+), 50 deletions(-) diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h index c0e3a3a173..80f470777d 100644 --- a/src/video_core/engines/const_buffer_engine_interface.h +++ b/src/video_core/engines/const_buffer_engine_interface.h @@ -4,6 +4,7 @@ #pragma once +#include #include "common/bit_field.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" @@ -29,51 +30,49 @@ struct SamplerDescriptor { u32 raw{}; }; + bool operator==(const SamplerDescriptor& rhs) const noexcept { + return raw == rhs.raw; + } + static SamplerDescriptor FromTicTexture(Tegra::Texture::TextureType tic_texture_type) { - SamplerDescriptor result{}; + SamplerDescriptor result; switch (tic_texture_type) { - case Tegra::Texture::TextureType::Texture1D: { + case Tegra::Texture::TextureType::Texture1D: result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); result.is_array.Assign(0); result.is_buffer.Assign(0); result.is_shadow.Assign(0); return result; - } - case Tegra::Texture::TextureType::Texture2D: { + case Tegra::Texture::TextureType::Texture2D: result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); result.is_array.Assign(0); result.is_buffer.Assign(0); result.is_shadow.Assign(0); return result; - } - case Tegra::Texture::TextureType::Texture3D: { + case Tegra::Texture::TextureType::Texture3D: result.texture_type.Assign(Tegra::Shader::TextureType::Texture3D); result.is_array.Assign(0); result.is_buffer.Assign(0); result.is_shadow.Assign(0); return result; - } - case Tegra::Texture::TextureType::TextureCubemap: { + case Tegra::Texture::TextureType::TextureCubemap: result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube); result.is_array.Assign(0); result.is_buffer.Assign(0); result.is_shadow.Assign(0); return result; - } - case Tegra::Texture::TextureType::Texture1DArray: { + case Tegra::Texture::TextureType::Texture1DArray: result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); result.is_array.Assign(1); result.is_buffer.Assign(0); result.is_shadow.Assign(0); return result; - } - case Tegra::Texture::TextureType::Texture2DArray: { + case Tegra::Texture::TextureType::Texture2DArray: result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); result.is_array.Assign(1); result.is_buffer.Assign(0); result.is_shadow.Assign(0); return result; - } case Tegra::Texture::TextureType::Texture1DBuffer: { result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); result.is_array.Assign(0); @@ -81,30 +80,28 @@ struct SamplerDescriptor { result.is_shadow.Assign(0); return result; } - case Tegra::Texture::TextureType::Texture2DNoMipmap: { + case Tegra::Texture::TextureType::Texture2DNoMipmap: result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); result.is_array.Assign(0); result.is_buffer.Assign(0); result.is_shadow.Assign(0); return result; - } - case Tegra::Texture::TextureType::TextureCubeArray: { + case Tegra::Texture::TextureType::TextureCubeArray: result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube); result.is_array.Assign(1); result.is_buffer.Assign(0); result.is_shadow.Assign(0); return result; - } - default: { + default: result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); result.is_array.Assign(0); result.is_buffer.Assign(0); result.is_shadow.Assign(0); return result; } - } } }; +static_assert(std::is_trivially_copyable_v); class ConstBufferEngineInterface { public: diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 6402d67632..7e7aea15f3 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -392,7 +392,7 @@ std::tuple CachedShader::GetProgramHandle(const ProgramVar ConstBufferLocker locker(GetEnginesShaderType(program_type), *engine); program = BuildShader(device, unique_identifier, program_type, program_code, program_code_b, variant, locker); - disk_cache.SaveUsage(GetUsage(variant)); + disk_cache.SaveUsage(GetUsage(variant, locker)); LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); } @@ -408,10 +408,14 @@ std::tuple CachedShader::GetProgramHandle(const ProgramVar return {program->handle, base_bindings}; } -ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant) const { +ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant, + const ConstBufferLocker& locker) const { ShaderDiskCacheUsage usage; usage.unique_identifier = unique_identifier; usage.variant = variant; + usage.keys = locker.GetKeys(); + usage.bound_samplers = locker.GetBoundSamplers(); + usage.bindless_samplers = locker.GetBindlessSamplers(); return usage; } @@ -472,6 +476,17 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, } if (!shader) { ConstBufferLocker locker(GetEnginesShaderType(unspecialized.program_type)); + for (const auto& key : usage.keys) { + const auto [buffer, offset] = key.first; + locker.InsertKey(buffer, offset, key.second); + } + for (const auto& [offset, sampler] : usage.bound_samplers) { + locker.InsertBoundSampler(offset, sampler); + } + for (const auto& [key, sampler] : usage.bindless_samplers) { + const auto [buffer, offset] = key; + locker.InsertBindlessSampler(buffer, offset, sampler); + } shader = BuildShader(device, usage.unique_identifier, unspecialized.program_type, unspecialized.code, unspecialized.code_b, usage.variant, locker, true); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 700a838533..2935e68312 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -31,6 +31,10 @@ namespace Core::Frontend { class EmuWindow; } +namespace VideoCommon::Shader { +class ConstBufferLocker; +} + namespace OpenGL { class CachedShader; @@ -92,7 +96,8 @@ private: GLShader::ShaderEntries entries, ProgramCode program_code, ProgramCode program_code_b); - ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant) const; + ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant, + const VideoCommon::Shader::ConstBufferLocker& locker) const; Core::System& system; ShaderDiskCacheOpenGL& disk_cache; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index ddc19dccde..184a565e60 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -22,6 +22,29 @@ namespace OpenGL { +using VideoCommon::Shader::BindlessSamplerMap; +using VideoCommon::Shader::BoundSamplerMap; +using VideoCommon::Shader::KeyMap; + +namespace { + +struct ConstBufferKey { + u32 cbuf; + u32 offset; + u32 value; +}; + +struct BoundSamplerKey { + u32 offset; + Tegra::Engines::SamplerDescriptor sampler; +}; + +struct BindlessSamplerKey { + u32 cbuf; + u32 offset; + Tegra::Engines::SamplerDescriptor sampler; +}; + using ShaderCacheVersionHash = std::array; enum class TransferableEntryKind : u32 { @@ -33,9 +56,6 @@ constexpr u32 NativeVersion = 5; // Making sure sizes doesn't change by accident static_assert(sizeof(BaseBindings) == 16); -static_assert(sizeof(ShaderDiskCacheUsage) == 40); - -namespace { ShaderCacheVersionHash GetShaderCacheVersionHash() { ShaderCacheVersionHash hash{}; @@ -121,13 +141,13 @@ ShaderDiskCacheOpenGL::LoadTransferable() { u32 version{}; if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) { LOG_ERROR(Render_OpenGL, - "Failed to get transferable cache version for title id={} - skipping", + "Failed to get transferable cache version for title id={}, skipping", GetTitleID()); return {}; } if (version < NativeVersion) { - LOG_INFO(Render_OpenGL, "Transferable shader cache is old - removing"); + LOG_INFO(Render_OpenGL, "Transferable shader cache is old, removing"); file.Close(); InvalidateTransferable(); is_usable = true; @@ -135,17 +155,18 @@ ShaderDiskCacheOpenGL::LoadTransferable() { } if (version > NativeVersion) { LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version " - "of the emulator - skipping"); + "of the emulator, skipping"); return {}; } // Version is valid, load the shaders + constexpr const char error_loading[] = "Failed to load transferable raw entry, skipping"; std::vector raws; std::vector usages; while (file.Tell() < file.GetSize()) { TransferableEntryKind kind{}; if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) { - LOG_ERROR(Render_OpenGL, "Failed to read transferable file - skipping"); + LOG_ERROR(Render_OpenGL, "Failed to read transferable file, skipping"); return {}; } @@ -153,7 +174,7 @@ ShaderDiskCacheOpenGL::LoadTransferable() { case TransferableEntryKind::Raw: { ShaderDiskCacheRaw entry; if (!entry.Load(file)) { - LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry - skipping"); + LOG_ERROR(Render_OpenGL, error_loading); return {}; } transferable.insert({entry.GetUniqueIdentifier(), {}}); @@ -161,16 +182,45 @@ ShaderDiskCacheOpenGL::LoadTransferable() { break; } case TransferableEntryKind::Usage: { - ShaderDiskCacheUsage usage{}; - if (file.ReadBytes(&usage, sizeof(usage)) != sizeof(usage)) { - LOG_ERROR(Render_OpenGL, "Failed to load transferable usage entry - skipping"); + ShaderDiskCacheUsage usage; + + u32 num_keys{}; + u32 num_bound_samplers{}; + u32 num_bindless_samplers{}; + if (file.ReadArray(&usage.unique_identifier, 1) != 1 || + file.ReadArray(&usage.variant, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 || + file.ReadArray(&num_bound_samplers, 1) != 1 || + file.ReadArray(&num_bindless_samplers, 1) != 1) { + LOG_ERROR(Render_OpenGL, error_loading); return {}; } + + std::vector keys(num_keys); + std::vector bound_samplers(num_bound_samplers); + std::vector bindless_samplers(num_bindless_samplers); + if (file.ReadArray(keys.data(), keys.size()) != keys.size() || + file.ReadArray(bound_samplers.data(), bound_samplers.size()) != + bound_samplers.size() || + file.ReadArray(bindless_samplers.data(), bindless_samplers.size()) != + bindless_samplers.size()) { + LOG_ERROR(Render_OpenGL, error_loading); + return {}; + } + for (const auto& key : keys) { + usage.keys.insert({{key.cbuf, key.offset}, key.value}); + } + for (const auto& key : bound_samplers) { + usage.bound_samplers.emplace(key.offset, key.sampler); + } + for (const auto& key : bindless_samplers) { + usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler}); + } + usages.push_back(std::move(usage)); break; } default: - LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={} - skipping", + LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={}, skipping", static_cast(kind)); return {}; } @@ -197,7 +247,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiled() { const auto result = LoadPrecompiledFile(file); if (!result) { LOG_INFO(Render_OpenGL, - "Failed to load precompiled cache for game with title id={} - removing", + "Failed to load precompiled cache for game with title id={}, removing", GetTitleID()); file.Close(); InvalidatePrecompiled(); @@ -228,10 +278,35 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { ShaderDumpsMap dumps; while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { + u32 num_keys{}; + u32 num_bound_samplers{}; + u32 num_bindless_samplers{}; ShaderDiskCacheUsage usage; - if (!LoadObjectFromPrecompiled(usage)) { + if (!LoadObjectFromPrecompiled(usage.unique_identifier) || + !LoadObjectFromPrecompiled(usage.variant) || !LoadObjectFromPrecompiled(num_keys) || + !LoadObjectFromPrecompiled(num_bound_samplers) || + !LoadObjectFromPrecompiled(num_bindless_samplers)) { return {}; } + std::vector keys(num_keys); + std::vector bound_samplers(num_bound_samplers); + std::vector bindless_samplers(num_bindless_samplers); + if (!LoadArrayFromPrecompiled(keys.data(), keys.size()) || + !LoadArrayFromPrecompiled(bound_samplers.data(), bound_samplers.size()) != + bound_samplers.size() || + !LoadArrayFromPrecompiled(bindless_samplers.data(), bindless_samplers.size()) != + bindless_samplers.size()) { + return {}; + } + for (const auto& key : keys) { + usage.keys.insert({{key.cbuf, key.offset}, key.value}); + } + for (const auto& key : bound_samplers) { + usage.bound_samplers.emplace(key.offset, key.sampler); + } + for (const auto& key : bindless_samplers) { + usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler}); + } ShaderDiskCacheDump dump; if (!LoadObjectFromPrecompiled(dump.binary_format)) { @@ -248,7 +323,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { return {}; } - dumps.emplace(usage, dump); + dumps.emplace(std::move(usage), dump); } return dumps; } @@ -282,10 +357,11 @@ void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) { } FileUtil::IOFile file = AppendTransferableFile(); - if (!file.IsOpen()) + if (!file.IsOpen()) { return; + } if (file.WriteObject(TransferableEntryKind::Raw) != 1 || !entry.Save(file)) { - LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry - removing"); + LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing"); file.Close(); InvalidateTransferable(); return; @@ -311,13 +387,40 @@ void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) { FileUtil::IOFile file = AppendTransferableFile(); if (!file.IsOpen()) return; - - if (file.WriteObject(TransferableEntryKind::Usage) != 1 || file.WriteObject(usage) != 1) { - LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry - removing"); + const auto Close = [&] { + LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry, removing"); file.Close(); InvalidateTransferable(); + }; + + if (file.WriteObject(TransferableEntryKind::Usage) != 1 || + file.WriteObject(usage.unique_identifier) != 1 || file.WriteObject(usage.variant) != 1 || + file.WriteObject(static_cast(usage.keys.size())) != 1 || + file.WriteObject(static_cast(usage.bound_samplers.size())) != 1 || + file.WriteObject(static_cast(usage.bindless_samplers.size())) != 1) { + Close(); return; } + for (const auto& [pair, value] : usage.keys) { + const auto [cbuf, offset] = pair; + if (file.WriteObject(ConstBufferKey{cbuf, offset, value}) != 1) { + Close(); + return; + } + } + for (const auto& [offset, sampler] : usage.bound_samplers) { + if (file.WriteObject(BoundSamplerKey{offset, sampler}) != 1) { + Close(); + return; + } + } + for (const auto& [pair, sampler] : usage.bindless_samplers) { + const auto [cbuf, offset] = pair; + if (file.WriteObject(BindlessSamplerKey{cbuf, offset, sampler}) != 1) { + Close(); + return; + } + } } void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint program) { @@ -339,15 +442,45 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p std::vector binary(binary_length); glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); - if (!SaveObjectToPrecompiled(usage) || - !SaveObjectToPrecompiled(static_cast(binary_format)) || - !SaveObjectToPrecompiled(static_cast(binary_length)) || - !SaveArrayToPrecompiled(binary.data(), binary.size())) { - LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016x} - removing", + const auto Close = [&] { + LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing", usage.unique_identifier); InvalidatePrecompiled(); + }; + + if (!SaveObjectToPrecompiled(usage.unique_identifier) || + !SaveObjectToPrecompiled(usage.variant) || + !SaveObjectToPrecompiled(static_cast(usage.keys.size())) || + !SaveObjectToPrecompiled(static_cast(usage.bound_samplers.size())) || + !SaveObjectToPrecompiled(static_cast(usage.bindless_samplers.size()))) { + Close(); return; } + for (const auto& [pair, value] : usage.keys) { + const auto [cbuf, offset] = pair; + if (SaveObjectToPrecompiled(ConstBufferKey{cbuf, offset, value}) != 1) { + Close(); + return; + } + } + for (const auto& [offset, sampler] : usage.bound_samplers) { + if (SaveObjectToPrecompiled(BoundSamplerKey{offset, sampler}) != 1) { + Close(); + return; + } + } + for (const auto& [pair, sampler] : usage.bindless_samplers) { + const auto [cbuf, offset] = pair; + if (SaveObjectToPrecompiled(BindlessSamplerKey{cbuf, offset, sampler}) != 1) { + Close(); + return; + } + } + if (!SaveObjectToPrecompiled(static_cast(binary_format)) || + !SaveObjectToPrecompiled(static_cast(binary_length)) || + !SaveArrayToPrecompiled(binary.data(), binary.size())) { + Close(); + } } FileUtil::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const { diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 61b46d728d..db23ada93a 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -19,6 +20,7 @@ #include "common/common_types.h" #include "core/file_sys/vfs_vector.h" #include "video_core/renderer_opengl/gl_shader_gen.h" +#include "video_core/shader/const_buffer_locker.h" namespace Core { class System; @@ -53,6 +55,7 @@ struct BaseBindings { return !operator==(rhs); } }; +static_assert(std::is_trivially_copyable_v); /// Describes the different variants a single program can be compiled. struct ProgramVariant { @@ -70,13 +73,20 @@ struct ProgramVariant { } }; +static_assert(std::is_trivially_copyable_v); + /// Describes how a shader is used. struct ShaderDiskCacheUsage { u64 unique_identifier{}; ProgramVariant variant; + VideoCommon::Shader::KeyMap keys; + VideoCommon::Shader::BoundSamplerMap bound_samplers; + VideoCommon::Shader::BindlessSamplerMap bindless_samplers; bool operator==(const ShaderDiskCacheUsage& rhs) const { - return std::tie(unique_identifier, variant) == std::tie(rhs.unique_identifier, rhs.variant); + return std::tie(unique_identifier, variant, keys, bound_samplers, bindless_samplers) == + std::tie(rhs.unique_identifier, rhs.variant, rhs.keys, rhs.bound_samplers, + rhs.bindless_samplers); } bool operator!=(const ShaderDiskCacheUsage& rhs) const { diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp index ebeba102d0..fda9e3c384 100644 --- a/src/video_core/shader/const_buffer_locker.cpp +++ b/src/video_core/shader/const_buffer_locker.cpp @@ -90,14 +90,14 @@ bool ConstBufferLocker::IsConsistent() const { [this](const auto& sampler) { const auto [key, value] = sampler; const auto other_value = engine->AccessBoundSampler(stage, key); - return value.raw == other_value.raw; + return value == other_value; }) && std::all_of( bindless_samplers.begin(), bindless_samplers.end(), [this](const auto& sampler) { const auto [cbuf, offset] = sampler.first; const auto value = sampler.second; const auto other_value = engine->AccessBindlessSampler(stage, cbuf, offset); - return value.raw == other_value.raw; + return value == other_value; }); } From 78f3e8a75792c976eb5bfa6df4c020d898642684 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 26 Sep 2019 00:23:08 -0300 Subject: [PATCH 11/13] gl_shader_cache: Implement locker variants invalidation --- .../renderer_opengl/gl_shader_cache.cpp | 102 +++++++++++++----- .../renderer_opengl/gl_shader_cache.h | 15 ++- src/video_core/shader/const_buffer_locker.cpp | 28 ++--- src/video_core/shader/const_buffer_locker.h | 3 + 4 files changed, 104 insertions(+), 44 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 7e7aea15f3..f1b89165d3 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -225,6 +225,34 @@ std::string GetShaderId(u64 unique_identifier, ProgramType program_type) { return fmt::format("{}{:016X}", GetProgramTypeName(program_type), unique_identifier); } +Tegra::Engines::ConstBufferEngineInterface& GetConstBufferEngineInterface( + Core::System& system, ProgramType program_type) { + if (program_type == ProgramType::Compute) { + return system.GPU().KeplerCompute(); + } else { + return system.GPU().Maxwell3D(); + } +} + +std::unique_ptr MakeLocker(Core::System& system, ProgramType program_type) { + return std::make_unique(GetEnginesShaderType(program_type), + GetConstBufferEngineInterface(system, program_type)); +} + +void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) { + for (const auto& key : usage.keys) { + const auto [buffer, offset] = key.first; + locker.InsertKey(buffer, offset, key.second); + } + for (const auto& [offset, sampler] : usage.bound_samplers) { + locker.InsertBoundSampler(offset, sampler); + } + for (const auto& [key, sampler] : usage.bindless_samplers) { + const auto [buffer, offset] = key; + locker.InsertBindlessSampler(buffer, offset, sampler); + } +} + CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramType program_type, const ProgramCode& program_code, const ProgramCode& program_code_b, const ProgramVariant& variant, ConstBufferLocker& locker, @@ -336,11 +364,27 @@ CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_t disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr}, unique_identifier{params.unique_identifier}, program_type{program_type}, entries{entries}, program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} { - if (params.precompiled_variants) { - for (const auto& pair : *params.precompiled_variants) { - const auto& variant = pair->first.variant; - programs.emplace(variant, pair->second); + if (!params.precompiled_variants) { + return; + } + for (const auto& pair : *params.precompiled_variants) { + auto locker = MakeLocker(system, program_type); + const auto& usage = pair->first; + FillLocker(*locker, usage); + + std::unique_ptr* locker_variant = nullptr; + const auto it = + std::find_if(locker_variants.begin(), locker_variants.end(), [&](const auto& variant) { + return variant->locker->HasEqualKeys(*locker); + }); + if (it == locker_variants.end()) { + locker_variant = &locker_variants.emplace_back(); + *locker_variant = std::make_unique(); + locker_variant->get()->locker = std::move(locker); + } else { + locker_variant = &*it; } + locker_variant->get()->programs.emplace(usage.variant, pair->second); } } @@ -380,19 +424,14 @@ Shader CachedShader::CreateFromCache(const ShaderParameters& params, } std::tuple CachedShader::GetProgramHandle(const ProgramVariant& variant) { - const auto [entry, is_cache_miss] = programs.try_emplace(variant); + UpdateVariant(); + + const auto [entry, is_cache_miss] = curr_variant->programs.try_emplace(variant); auto& program = entry->second; if (is_cache_miss) { - Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; - if (program_type == ProgramType::Compute) { - engine = &system.GPU().KeplerCompute(); - } else { - engine = &system.GPU().Maxwell3D(); - } - ConstBufferLocker locker(GetEnginesShaderType(program_type), *engine); program = BuildShader(device, unique_identifier, program_type, program_code, program_code_b, - variant, locker); - disk_cache.SaveUsage(GetUsage(variant, locker)); + variant, *curr_variant->locker); + disk_cache.SaveUsage(GetUsage(variant, *curr_variant->locker)); LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); } @@ -408,6 +447,25 @@ std::tuple CachedShader::GetProgramHandle(const ProgramVar return {program->handle, base_bindings}; } +void CachedShader::UpdateVariant() { + if (curr_variant && !curr_variant->locker->IsConsistent()) { + curr_variant = nullptr; + } + if (!curr_variant) { + for (auto& variant : locker_variants) { + if (variant->locker->IsConsistent()) { + curr_variant = variant.get(); + } + } + } + if (!curr_variant) { + auto& new_variant = locker_variants.emplace_back(); + new_variant = std::make_unique(); + new_variant->locker = MakeLocker(system, program_type); + curr_variant = new_variant.get(); + } +} + ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant, const ConstBufferLocker& locker) const { ShaderDiskCacheUsage usage; @@ -475,21 +533,11 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, } } if (!shader) { - ConstBufferLocker locker(GetEnginesShaderType(unspecialized.program_type)); - for (const auto& key : usage.keys) { - const auto [buffer, offset] = key.first; - locker.InsertKey(buffer, offset, key.second); - } - for (const auto& [offset, sampler] : usage.bound_samplers) { - locker.InsertBoundSampler(offset, sampler); - } - for (const auto& [key, sampler] : usage.bindless_samplers) { - const auto [buffer, offset] = key; - locker.InsertBindlessSampler(buffer, offset, sampler); - } + auto locker{MakeLocker(system, unspecialized.program_type)}; + FillLocker(*locker, usage); shader = BuildShader(device, usage.unique_identifier, unspecialized.program_type, unspecialized.code, unspecialized.code_b, usage.variant, - locker, true); + *locker, true); } std::scoped_lock lock{mutex}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 2935e68312..6bd7c9cf15 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -21,6 +21,7 @@ #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_disk_cache.h" +#include "video_core/shader/const_buffer_locker.h" #include "video_core/shader/shader_ir.h" namespace Core { @@ -31,10 +32,6 @@ namespace Core::Frontend { class EmuWindow; } -namespace VideoCommon::Shader { -class ConstBufferLocker; -} - namespace OpenGL { class CachedShader; @@ -92,10 +89,17 @@ public: std::tuple GetProgramHandle(const ProgramVariant& variant); private: + struct LockerVariant { + std::unique_ptr locker; + std::unordered_map programs; + }; + explicit CachedShader(const ShaderParameters& params, ProgramType program_type, GLShader::ShaderEntries entries, ProgramCode program_code, ProgramCode program_code_b); + void UpdateVariant(); + ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant, const VideoCommon::Shader::ConstBufferLocker& locker) const; @@ -113,7 +117,8 @@ private: ProgramCode program_code; ProgramCode program_code_b; - std::unordered_map programs; + LockerVariant* curr_variant = nullptr; + std::vector> locker_variants; }; class ShaderCacheOpenGL final : public RasterizerCache { diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp index fda9e3c384..592bbf6576 100644 --- a/src/video_core/shader/const_buffer_locker.cpp +++ b/src/video_core/shader/const_buffer_locker.cpp @@ -82,23 +82,27 @@ bool ConstBufferLocker::IsConsistent() const { return false; } return std::all_of(keys.begin(), keys.end(), - [](const auto& key) { - const auto [value, other_value] = key.first; - return value == other_value; + [this](const auto& pair) { + const auto [cbuf, offset] = pair.first; + const auto value = pair.second; + return value == engine->AccessConstBuffer32(stage, cbuf, offset); }) && std::all_of(bound_samplers.begin(), bound_samplers.end(), [this](const auto& sampler) { const auto [key, value] = sampler; - const auto other_value = engine->AccessBoundSampler(stage, key); - return value == other_value; + return value == engine->AccessBoundSampler(stage, key); }) && - std::all_of( - bindless_samplers.begin(), bindless_samplers.end(), [this](const auto& sampler) { - const auto [cbuf, offset] = sampler.first; - const auto value = sampler.second; - const auto other_value = engine->AccessBindlessSampler(stage, cbuf, offset); - return value == other_value; - }); + std::all_of(bindless_samplers.begin(), bindless_samplers.end(), + [this](const auto& sampler) { + const auto [cbuf, offset] = sampler.first; + const auto value = sampler.second; + return value == engine->AccessBindlessSampler(stage, cbuf, offset); + }); +} + +bool ConstBufferLocker::HasEqualKeys(const ConstBufferLocker& rhs) const { + return keys == rhs.keys && bound_samplers == rhs.bound_samplers && + bindless_samplers == rhs.bindless_samplers; } } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h index 417d5a16f0..966537fd6e 100644 --- a/src/video_core/shader/const_buffer_locker.h +++ b/src/video_core/shader/const_buffer_locker.h @@ -44,6 +44,9 @@ public: /// the same value, false otherwise; bool IsConsistent() const; + /// Returns true if the keys are equal to the other ones in the locker. + bool HasEqualKeys(const ConstBufferLocker& rhs) const; + /// Gives an getter to the const buffer keys in the database. const KeyMap& GetKeys() const { return keys; From e3afd6595a3b7d67e554aaa21f929b64c9fe4b8b Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 30 Sep 2019 20:55:25 -0400 Subject: [PATCH 12/13] Shader_IR: Clang format --- src/video_core/engines/kepler_compute.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 6f00db1c16..91adef360d 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -86,8 +86,7 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con u64 offset) const { ASSERT(stage == ShaderType::Compute); const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer]; - const GPUVAddr tex_info_address = - tex_info_buffer.Address() + offset; + const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset; const Texture::TextureHandle tex_handle{memory_manager.Read(tex_info_address)}; const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle, offset); From be856a38d6b0c7c90c861baf3204ac48a108f3d2 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 17 Oct 2019 10:35:16 -0400 Subject: [PATCH 13/13] Shader_IR: Address Feedback. --- src/common/hash.h | 2 +- .../engines/const_buffer_engine_interface.h | 9 ++-- src/video_core/shader/const_buffer_locker.cpp | 8 ++-- src/video_core/shader/const_buffer_locker.h | 7 +++ src/video_core/shader/control_flow.cpp | 46 +++++++++---------- src/video_core/shader/control_flow.h | 6 ++- src/video_core/shader/decode.cpp | 2 +- src/video_core/shader/decode/texture.cpp | 38 +++++++-------- src/video_core/shader/expr.h | 4 ++ 9 files changed, 66 insertions(+), 56 deletions(-) diff --git a/src/common/hash.h b/src/common/hash.h index c939709bc9..ebd4125e23 100644 --- a/src/common/hash.h +++ b/src/common/hash.h @@ -72,7 +72,7 @@ struct HashableStruct { struct PairHash { template - std::size_t operator()(const std::pair& pair) const { + std::size_t operator()(const std::pair& pair) const noexcept { std::size_t seed = std::hash()(pair.first); boost::hash_combine(seed, std::hash()(pair.second)); return seed; diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h index 80f470777d..ac27b6cbe1 100644 --- a/src/video_core/engines/const_buffer_engine_interface.h +++ b/src/video_core/engines/const_buffer_engine_interface.h @@ -34,6 +34,10 @@ struct SamplerDescriptor { return raw == rhs.raw; } + bool operator!=(const SamplerDescriptor& rhs) const noexcept { + return !operator==(rhs); + } + static SamplerDescriptor FromTicTexture(Tegra::Texture::TextureType tic_texture_type) { SamplerDescriptor result; switch (tic_texture_type) { @@ -73,13 +77,12 @@ struct SamplerDescriptor { result.is_buffer.Assign(0); result.is_shadow.Assign(0); return result; - case Tegra::Texture::TextureType::Texture1DBuffer: { + case Tegra::Texture::TextureType::Texture1DBuffer: result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); result.is_array.Assign(0); result.is_buffer.Assign(1); result.is_shadow.Assign(0); return result; - } case Tegra::Texture::TextureType::Texture2DNoMipmap: result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); result.is_array.Assign(0); @@ -105,7 +108,7 @@ static_assert(std::is_trivially_copyable_v); class ConstBufferEngineInterface { public: - virtual ~ConstBufferEngineInterface() {} + virtual ~ConstBufferEngineInterface() = default; virtual u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const = 0; virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0; virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp index 592bbf6576..fe467608e1 100644 --- a/src/video_core/shader/const_buffer_locker.cpp +++ b/src/video_core/shader/const_buffer_locker.cpp @@ -22,6 +22,8 @@ ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, Tegra::Engines::ConstBufferEngineInterface& engine) : stage{shader_stage}, engine{&engine} {} +ConstBufferLocker::~ConstBufferLocker() = default; + std::optional ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) { const std::pair key = {buffer, offset}; const auto iter = keys.find(key); @@ -29,7 +31,7 @@ std::optional ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) { return iter->second; } if (!engine) { - return {}; + return std::nullopt; } const u32 value = engine->AccessConstBuffer32(stage, buffer, offset); keys.emplace(key, value); @@ -43,7 +45,7 @@ std::optional ConstBufferLocker::ObtainBoundSampler(u32 offse return iter->second; } if (!engine) { - return {}; + return std::nullopt; } const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset); bound_samplers.emplace(key, value); @@ -58,7 +60,7 @@ std::optional ConstBufferLocker::ObtainBindle return iter->second; } if (!engine) { - return {}; + return std::nullopt; } const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset); bindless_samplers.emplace(key, value); diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h index 966537fd6e..600e2f3c39 100644 --- a/src/video_core/shader/const_buffer_locker.h +++ b/src/video_core/shader/const_buffer_locker.h @@ -16,6 +16,11 @@ using BoundSamplerMap = std::unordered_map, Tegra::Engines::SamplerDescriptor, Common::PairHash>; +/** + * The ConstBufferLocker is a class use to interface the 3D and compute engines with the shader + * compiler. with it, the shader can obtain required data from GPU state and store it for disk + * shader compilation. + **/ class ConstBufferLocker { public: explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage); @@ -23,6 +28,8 @@ public: explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, Tegra::Engines::ConstBufferEngineInterface& engine); + ~ConstBufferLocker(); + /// Retrieves a key from the locker, if it's registered, it will give the registered value, if /// not it will obtain it from maxwell3d and register it. std::optional ObtainKey(u32 buffer, u32 offset); diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 6c698bcff2..d47c63d9fb 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -41,14 +41,10 @@ BlockBranchInfo MakeBranchInfo(Args&&... args) { return std::make_shared(T(std::forward(args)...)); } -bool BlockBranchInfoAreEqual(BlockBranchInfo first, BlockBranchInfo second) { - return false; //(*first) == (*second); -} - bool BlockBranchIsIgnored(BlockBranchInfo first) { bool ignore = false; if (std::holds_alternative(*first)) { - auto branch = std::get_if(first.get()); + const auto branch = std::get_if(first.get()); ignore = branch->ignore; } return ignore; @@ -151,10 +147,10 @@ std::optional TrackBranchIndirectInfo(const CFGRebuildState& const Instruction instr = {state.program_code[pos]}; const auto opcode = OpCode::Decode(instr); if (opcode->get().GetId() != OpCode::Id::BRX) { - return {}; + return std::nullopt; } if (instr.brx.constant_buffer != 0) { - return {}; + return std::nullopt; } track_register = instr.gpr8.Value(); result.relative_position = instr.brx.GetBranchExtend(); @@ -172,8 +168,8 @@ std::optional TrackBranchIndirectInfo(const CFGRebuildState& if (opcode->get().GetId() == OpCode::Id::LD_C) { if (instr.gpr0.Value() == track_register && instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single) { - result.buffer = instr.cbuf36.index; - result.offset = instr.cbuf36.GetOffset(); + result.buffer = instr.cbuf36.index.Value(); + result.offset = static_cast(instr.cbuf36.GetOffset()); track_register = instr.gpr8.Value(); pos--; found_track = true; @@ -184,7 +180,7 @@ std::optional TrackBranchIndirectInfo(const CFGRebuildState& } if (!found_track) { - return {}; + return std::nullopt; } found_track = false; @@ -194,7 +190,7 @@ std::optional TrackBranchIndirectInfo(const CFGRebuildState& pos--; continue; } - const Instruction instr = {state.program_code[pos]}; + const Instruction instr = state.program_code[pos]; const auto opcode = OpCode::Decode(instr); if (opcode->get().GetId() == OpCode::Id::SHL_IMM) { if (instr.gpr0.Value() == track_register) { @@ -208,7 +204,7 @@ std::optional TrackBranchIndirectInfo(const CFGRebuildState& } if (!found_track) { - return {}; + return std::nullopt; } found_track = false; @@ -218,7 +214,7 @@ std::optional TrackBranchIndirectInfo(const CFGRebuildState& pos--; continue; } - const Instruction instr = {state.program_code[pos]}; + const Instruction instr = state.program_code[pos]; const auto opcode = OpCode::Decode(instr); if (opcode->get().GetId() == OpCode::Id::IMNMX_IMM) { if (instr.gpr0.Value() == track_register) { @@ -233,9 +229,9 @@ std::optional TrackBranchIndirectInfo(const CFGRebuildState& } if (!found_track) { - return {}; + return std::nullopt; } - return {result}; + return result; } std::pair ParseCode(CFGRebuildState& state, u32 address) { @@ -440,8 +436,8 @@ std::pair ParseCode(CFGRebuildState& state, u32 address) branches.emplace_back(value, target); } parse_info.end_address = offset; - parse_info.branch_info = - MakeBranchInfo(static_cast(instr.gpr8.Value()), branches); + parse_info.branch_info = MakeBranchInfo( + static_cast(instr.gpr8.Value()), std::move(branches)); return {ParseResult::ControlCaught, parse_info}; } else { @@ -486,7 +482,7 @@ bool TryInspectAddress(CFGRebuildState& state) { current_block.end = address - 1; new_block.branch = current_block.branch; BlockBranchInfo forward_branch = MakeBranchInfo(); - auto branch = std::get_if(forward_branch.get()); + const auto branch = std::get_if(forward_branch.get()); branch->address = address; branch->ignore = true; current_block.branch = forward_branch; @@ -504,7 +500,7 @@ bool TryInspectAddress(CFGRebuildState& state) { BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address); block_info.branch = parse_info.branch_info; if (std::holds_alternative(*block_info.branch)) { - auto branch = std::get_if(block_info.branch.get()); + const auto branch = std::get_if(block_info.branch.get()); if (branch->condition.IsUnconditional()) { return true; } @@ -550,7 +546,7 @@ bool TryQuery(CFGRebuildState& state) { gather_labels(q2.ssy_stack, state.ssy_labels, block); gather_labels(q2.pbk_stack, state.pbk_labels, block); if (std::holds_alternative(*block.branch)) { - auto branch = std::get_if(block.branch.get()); + const auto branch = std::get_if(block.branch.get()); if (!branch->condition.IsUnconditional()) { q2.address = block.end + 1; state.queries.push_back(q2); @@ -573,8 +569,8 @@ bool TryQuery(CFGRebuildState& state) { state.queries.push_back(std::move(conditional_query)); return true; } - auto multi_branch = std::get_if(block.branch.get()); - for (auto& branch_case : multi_branch->branches) { + const auto multi_branch = std::get_if(block.branch.get()); + for (const auto& branch_case : multi_branch->branches) { Query conditional_query{q2}; conditional_query.address = branch_case.address; state.queries.push_back(std::move(conditional_query)); @@ -612,7 +608,7 @@ void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) { return MakeExpr(true); }); if (std::holds_alternative(*branch_info)) { - auto branch = std::get_if(branch_info.get()); + const auto branch = std::get_if(branch_info.get()); if (branch->address < 0) { if (branch->kill) { mm.InsertReturn(get_expr(branch->condition), true); @@ -624,8 +620,8 @@ void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) { mm.InsertGoto(get_expr(branch->condition), branch->address); return; } - auto multi_branch = std::get_if(branch_info.get()); - for (auto& branch_case : multi_branch->branches) { + const auto multi_branch = std::get_if(branch_info.get()); + for (const auto& branch_case : multi_branch->branches) { mm.InsertGoto(MakeExpr(multi_branch->gpr, branch_case.cmp_value), branch_case.address); } diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index 288ee68afe..5304998b95 100644 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h @@ -51,6 +51,10 @@ public: std::tie(b.condition, b.address, b.kill, b.is_sync, b.is_brk, b.ignore); } + bool operator!=(const SingleBranch& b) const { + return !operator==(b); + } + Condition condition{}; s32 address{exit_branch}; bool kill{}; @@ -67,7 +71,7 @@ struct CaseBranch { class MultiBranch { public: - MultiBranch(u32 gpr, std::vector& branches) + MultiBranch(u32 gpr, std::vector&& branches) : gpr{gpr}, branches{std::move(branches)} {} u32 gpr{}; diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index e1afa45820..21fb9cb83f 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -155,7 +155,7 @@ void ShaderIR::Decode() { [[fallthrough]]; case CompileDepth::BruteForce: { coverage_begin = main_offset; - const u32 shader_end = program_code.size(); + const std::size_t shader_end = program_code.size(); coverage_end = shader_end; for (u32 label = main_offset; label < shader_end; label++) { basic_blocks.insert({label, DecodeRange(label, label + 1)}); diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index c369e23adf..f33e9c67c3 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -284,7 +284,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, std::optional sampler_info) { - const auto offset = static_cast(sampler.index.Value()); + const auto offset = static_cast(sampler.index.Value()); Tegra::Shader::TextureType type; bool is_array; @@ -293,17 +293,14 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, type = sampler_info->type; is_array = sampler_info->is_array; is_shadow = sampler_info->is_shadow; + } else if (auto sampler = locker.ObtainBoundSampler(offset); sampler) { + type = sampler->texture_type.Value(); + is_array = sampler->is_array.Value() != 0; + is_shadow = sampler->is_shadow.Value() != 0; } else { - auto sampler = locker.ObtainBoundSampler(offset); - if (sampler) { - type = sampler->texture_type.Value(); - is_array = sampler->is_array.Value() != 0; - is_shadow = sampler->is_shadow.Value() != 0; - } else { - type = Tegra::Shader::TextureType::Texture2D; - is_array = false; - is_shadow = false; - } + type = Tegra::Shader::TextureType::Texture2D; + is_array = false; + is_shadow = false; } // If this sampler has already been used, return the existing mapping. @@ -320,7 +317,7 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, const std::size_t next_index = used_samplers.size(); const Sampler entry{offset, next_index, type, is_array, is_shadow}; return *used_samplers.emplace(entry).first; -} +} // namespace VideoCommon::Shader const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, std::optional sampler_info) { @@ -336,17 +333,14 @@ const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, type = sampler_info->type; is_array = sampler_info->is_array; is_shadow = sampler_info->is_shadow; + } else if (auto sampler = locker.ObtainBindlessSampler(cbuf_index, cbuf_offset); sampler) { + type = sampler->texture_type.Value(); + is_array = sampler->is_array.Value() != 0; + is_shadow = sampler->is_shadow.Value() != 0; } else { - auto sampler = locker.ObtainBindlessSampler(cbuf_index, cbuf_offset); - if (sampler) { - type = sampler->texture_type.Value(); - is_array = sampler->is_array.Value() != 0; - is_shadow = sampler->is_shadow.Value() != 0; - } else { - type = Tegra::Shader::TextureType::Texture2D; - is_array = false; - is_shadow = false; - } + type = Tegra::Shader::TextureType::Texture2D; + is_array = false; + is_shadow = false; } // If this sampler has already been used, return the existing mapping. diff --git a/src/video_core/shader/expr.h b/src/video_core/shader/expr.h index e41d23e936..4e82643674 100644 --- a/src/video_core/shader/expr.h +++ b/src/video_core/shader/expr.h @@ -127,6 +127,10 @@ public: return gpr == b.gpr && value == b.value; } + bool operator!=(const ExprGprEqual& b) const { + return !operator==(b); + } + u32 gpr; u32 value; };