diff --git a/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp b/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp index c556d3b15..7adc3ad14 100644 --- a/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp +++ b/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp @@ -518,7 +518,9 @@ void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_d info.labels.insert({entry_point, "main"}); // Generate debug information - debug_data = shader_setup.ProduceDebugInfo(input_vertex, num_attributes, entry_point); + auto* shader_engine = Pica::Shader::GetEngine(); + shader_engine->SetupBatch(&shader_setup); + debug_data = shader_engine->ProduceDebugInfo(input_vertex, num_attributes, entry_point); // Reload widget state for (int attr = 0; attr < num_attributes; ++attr) { diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index fc224c6f2..694c9f169 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -142,15 +142,16 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { MICROPROFILE_SCOPE(GPU_Drawing); immediate_attribute_id = 0; - Shader::UnitState shader_unit; - g_state.vs.Setup(); + auto* shader_engine = Shader::GetEngine(); + shader_engine->SetupBatch(&g_state.vs); // Send to vertex shader if (g_debug_context) g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, static_cast(&immediate_input)); + Shader::UnitState shader_unit; shader_unit.LoadInputVertex(immediate_input, regs.vs.num_input_attributes + 1); - g_state.vs.Run(shader_unit, regs.vs.main_offset); + shader_engine->Run(shader_unit, regs.vs.main_offset); Shader::OutputVertex output_vertex = shader_unit.output_registers.ToVertex(regs.vs); @@ -244,8 +245,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { unsigned int vertex_cache_pos = 0; vertex_cache_ids.fill(-1); + auto* shader_engine = Shader::GetEngine(); Shader::UnitState shader_unit; - g_state.vs.Setup(); + + shader_engine->SetupBatch(&g_state.vs); for (unsigned int index = 0; index < regs.num_vertices; ++index) { // Indexed rendering doesn't use the start offset @@ -285,7 +288,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, (void*)&input); shader_unit.LoadInputVertex(input, loader.GetNumTotalAttributes()); - g_state.vs.Run(shader_unit, regs.vs.main_offset); + shader_engine->Run(shader_unit, regs.vs.main_offset); // Retrieve vertex from register data output_vertex = shader_unit.output_registers.ToVertex(regs.vs); diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index ae696533f..d276a1221 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -87,6 +87,17 @@ void UnitState::LoadInputVertex(const InputVertex& input, int num_attributes) { conditional_code[1] = false; } +class MergedShaderEngine : public ShaderEngine { +public: + void SetupBatch(const ShaderSetup* setup) override; + void Run(UnitState& state, unsigned int entry_point) const override; + DebugData ProduceDebugInfo(const InputVertex& input, int num_attributes, + unsigned int entry_point) const override; + +private: + const ShaderSetup* setup = nullptr; +}; + #ifdef ARCHITECTURE_x86_64 static std::unordered_map> shader_map; static const JitShader* jit_shader; @@ -98,13 +109,17 @@ void ClearCache() { #endif // ARCHITECTURE_x86_64 } -void ShaderSetup::Setup() { +void MergedShaderEngine::SetupBatch(const ShaderSetup* setup_) { + setup = setup_; + if (setup == nullptr) + return; + #ifdef ARCHITECTURE_x86_64 if (VideoCore::g_shader_jit_enabled) { - u64 cache_key = - Common::ComputeHash64(&program_code, sizeof(program_code)) ^ - Common::ComputeHash64(&swizzle_data, sizeof(swizzle_data)); + u64 code_hash = Common::ComputeHash64(&setup->program_code, sizeof(setup->program_code)); + u64 swizzle_hash = Common::ComputeHash64(&setup->swizzle_data, sizeof(setup->swizzle_data)); + u64 cache_key = code_hash ^ swizzle_hash; auto iter = shader_map.find(cache_key); if (iter != shader_map.end()) { jit_shader = iter->second.get(); @@ -120,26 +135,28 @@ void ShaderSetup::Setup() { MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); -void ShaderSetup::Run(UnitState& state, unsigned int entry_point) const { +void MergedShaderEngine::Run(UnitState& state, unsigned int entry_point) const { + ASSERT(setup != nullptr); ASSERT(entry_point < 1024); MICROPROFILE_SCOPE(GPU_Shader); #ifdef ARCHITECTURE_x86_64 if (VideoCore::g_shader_jit_enabled) { - jit_shader->Run(*this, state, entry_point); + jit_shader->Run(*setup, state, entry_point); } else { DebugData dummy_debug_data; - RunInterpreter(*this, state, dummy_debug_data, entry_point); + RunInterpreter(*setup, state, dummy_debug_data, entry_point); } #else DebugData dummy_debug_data; - RunInterpreter(*this, state, dummy_debug_data, entry_point); + RunInterpreter(*setup, state, dummy_debug_data, entry_point); #endif // ARCHITECTURE_x86_64 } -DebugData ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, - unsigned int entry_point) const { +DebugData MergedShaderEngine::ProduceDebugInfo(const InputVertex& input, int num_attributes, + unsigned int entry_point) const { + ASSERT(setup != nullptr); ASSERT(entry_point < 1024); UnitState state; @@ -148,10 +165,15 @@ DebugData ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_ // Setup input register table boost::fill(state.registers.input, Math::Vec4::AssignToAll(float24::Zero())); state.LoadInputVertex(input, num_attributes); - RunInterpreter(*this, state, debug_data, entry_point); + RunInterpreter(*setup, state, debug_data, entry_point); return debug_data; } +ShaderEngine* GetEngine() { + static MergedShaderEngine merged_engine; + return &merged_engine; +} + } // namespace Shader } // namespace Pica diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 44b9861e9..899fb2607 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -156,7 +156,6 @@ struct UnitState { void ClearCache(); struct ShaderSetup { - struct { // The float uniforms are accessed by the shader JIT using SSE instructions, and are // therefore required to be 16-byte aligned. @@ -180,18 +179,23 @@ struct ShaderSetup { std::array program_code; std::array swizzle_data; +}; + +class ShaderEngine { +public: + virtual ~ShaderEngine() = default; /** * Performs any shader unit setup that only needs to happen once per shader (as opposed to once * per vertex, which would happen within the `Run` function). */ - void Setup(); + virtual void SetupBatch(const ShaderSetup* setup) = 0; /** * Runs the currently setup shader * @param state Shader unit state, must be setup per shader and per shader unit */ - void Run(UnitState& state, unsigned int entry_point) const; + virtual void Run(UnitState& state, unsigned int entry_point) const = 0; /** * Produce debug information based on the given shader and input vertex @@ -200,10 +204,13 @@ struct ShaderSetup { * @param config Configuration object for the shader pipeline * @return Debug information for this shader with regards to the given vertex */ - DebugData ProduceDebugInfo(const InputVertex& input, int num_attributes, - unsigned int entry_point) const; + virtual DebugData ProduceDebugInfo(const InputVertex& input, int num_attributes, + unsigned int entry_point) const = 0; }; +// TODO(yuriks): Remove and make it non-global state somewhere +ShaderEngine* GetEngine(); + } // namespace Shader } // namespace Pica diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h index d31dcd7a6..3237b50b3 100644 --- a/src/video_core/shader/shader_interpreter.h +++ b/src/video_core/shader/shader_interpreter.h @@ -8,6 +8,7 @@ namespace Pica { namespace Shader { +struct ShaderSetup; struct UnitState; template