From bbc7844021dc34e26285a495ed86bad088b87279 Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Sun, 18 Dec 2016 15:39:56 -0800 Subject: [PATCH 1/9] VideoCore: Change misleading register names A few registers had names such as "count" or "number" when they actually contained the maximum (that is, count - 1). This can easily lead to hard to notice off by one errors. --- src/video_core/command_processor.cpp | 5 +++-- src/video_core/pica.h | 8 ++++---- src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 +- src/video_core/renderer_opengl/gl_rasterizer.h | 2 +- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index eb79974a8a..9c0ed79c73 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -138,7 +138,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { immediate_input.attr[immediate_attribute_id++] = attribute; - if (immediate_attribute_id >= regs.vs.num_input_attributes + 1) { + if (immediate_attribute_id >= regs.vs.max_input_attribute_index + 1) { MICROPROFILE_SCOPE(GPU_Drawing); immediate_attribute_id = 0; @@ -150,7 +150,8 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, static_cast(&immediate_input)); Shader::UnitState shader_unit; - shader_unit.LoadInputVertex(immediate_input, regs.vs.num_input_attributes + 1); + shader_unit.LoadInputVertex(immediate_input, + regs.vs.max_input_attribute_index + 1); shader_engine->Run(g_state.vs, shader_unit); auto output_vertex = Shader::OutputVertex::FromRegisters( shader_unit.registers.output, regs, regs.vs.output_mask); diff --git a/src/video_core/pica.h b/src/video_core/pica.h index b2db609ec4..5afc9d5dde 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -868,7 +868,7 @@ struct Regs { LightSrc light[8]; LightColor global_ambient; // Emission + (material.ambient * lighting.ambient) INSERT_PADDING_WORDS(0x1); - BitField<0, 3, u32> num_lights; // Number of enabled lights - 1 + BitField<0, 3, u32> max_light_index; // Number of enabled lights - 1 union { BitField<2, 2, LightingFresnelSelector> fresnel_selector; @@ -1045,7 +1045,7 @@ struct Regs { BitField<48, 12, u64> attribute_mask; // number of total attributes minus 1 - BitField<60, 4, u64> num_extra_attributes; + BitField<60, 4, u64> max_attribute_index; }; inline VertexAttributeFormat GetFormat(int n) const { @@ -1076,7 +1076,7 @@ struct Regs { } inline int GetNumTotalAttributes() const { - return (int)num_extra_attributes + 1; + return (int)max_attribute_index + 1; } // Attribute loaders map the source vertex data to input attributes @@ -1214,7 +1214,7 @@ struct Regs { union { // Number of input attributes to shader unit - 1 - BitField<0, 4, u32> num_input_attributes; + BitField<0, 4, u32> max_input_attribute_index; }; // Offset to shader program entry point (in words) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 2d2f4edc12..9dd9ae0fba 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -467,7 +467,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { // Fragment lighting switches case PICA_REG_INDEX(lighting.disable): - case PICA_REG_INDEX(lighting.num_lights): + case PICA_REG_INDEX(lighting.max_light_index): case PICA_REG_INDEX(lighting.config0): case PICA_REG_INDEX(lighting.config1): case PICA_REG_INDEX(lighting.abs_lut_input): diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index cc3e4bed59..a1aa07074e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -84,7 +84,7 @@ union PicaShaderConfig { // Fragment lighting state.lighting.enable = !regs.lighting.disable; - state.lighting.src_num = regs.lighting.num_lights + 1; + state.lighting.src_num = regs.lighting.max_light_index + 1; for (unsigned light_index = 0; light_index < state.lighting.src_num; ++light_index) { unsigned num = regs.lighting.light_enable.GetNum(light_index); From ab6954e942654fb003964fc95c0846aa8b89ac91 Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Sun, 18 Dec 2016 16:42:19 -0800 Subject: [PATCH 2/9] VideoCore: Rename some types to more accurate names --- src/citra_qt/debugger/graphics/graphics_tracing.cpp | 4 ++-- .../debugger/graphics/graphics_vertex_shader.h | 2 +- src/video_core/command_processor.cpp | 11 +++++------ src/video_core/pica_state.h | 4 ++-- src/video_core/shader/shader.cpp | 2 +- src/video_core/shader/shader.h | 4 ++-- src/video_core/shader/shader_interpreter.cpp | 4 ++-- src/video_core/shader/shader_interpreter.h | 2 +- src/video_core/vertex_loader.cpp | 5 +++-- src/video_core/vertex_loader.h | 4 ++-- 10 files changed, 21 insertions(+), 21 deletions(-) diff --git a/src/citra_qt/debugger/graphics/graphics_tracing.cpp b/src/citra_qt/debugger/graphics/graphics_tracing.cpp index 716ed50b84..17f1c5ce25 100644 --- a/src/citra_qt/debugger/graphics/graphics_tracing.cpp +++ b/src/citra_qt/debugger/graphics/graphics_tracing.cpp @@ -71,8 +71,8 @@ void GraphicsTracingWidget::StartRecording() { std::array default_attributes; for (unsigned i = 0; i < 16; ++i) { for (unsigned comp = 0; comp < 3; ++comp) { - default_attributes[4 * i + comp] = - nihstro::to_float24(Pica::g_state.vs_default_attributes[i][comp].ToFloat32()); + default_attributes[4 * i + comp] = nihstro::to_float24( + Pica::g_state.input_default_attributes.attr[i][comp].ToFloat32()); } } diff --git a/src/citra_qt/debugger/graphics/graphics_vertex_shader.h b/src/citra_qt/debugger/graphics/graphics_vertex_shader.h index 3292573f3b..c249a2ff87 100644 --- a/src/citra_qt/debugger/graphics/graphics_vertex_shader.h +++ b/src/citra_qt/debugger/graphics/graphics_vertex_shader.h @@ -82,7 +82,7 @@ private: nihstro::ShaderInfo info; Pica::Shader::DebugData debug_data; - Pica::Shader::InputVertex input_vertex; + Pica::Shader::AttributeBuffer input_vertex; friend class GraphicsVertexShaderModel; }; diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 9c0ed79c73..45b994b463 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -125,7 +125,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { // TODO: Verify that this actually modifies the register! if (setup.index < 15) { - g_state.vs_default_attributes[setup.index] = attribute; + g_state.input_default_attributes.attr[setup.index] = attribute; setup.index++; } else { // Put each attribute into an immediate input buffer. @@ -138,7 +138,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { immediate_input.attr[immediate_attribute_id++] = attribute; - if (immediate_attribute_id >= regs.vs.max_input_attribute_index + 1) { + if (immediate_attribute_id > regs.vs.max_input_attribute_index) { MICROPROFILE_SCOPE(GPU_Drawing); immediate_attribute_id = 0; @@ -150,8 +150,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, static_cast(&immediate_input)); Shader::UnitState shader_unit; - shader_unit.LoadInputVertex(immediate_input, - regs.vs.max_input_attribute_index + 1); + shader_unit.LoadInput(immediate_input, regs.vs.max_input_attribute_index + 1); shader_engine->Run(g_state.vs, shader_unit); auto output_vertex = Shader::OutputVertex::FromRegisters( shader_unit.registers.output, regs, regs.vs.output_mask); @@ -281,14 +280,14 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { if (!vertex_cache_hit) { // Initialize data for the current vertex - Shader::InputVertex input; + Shader::AttributeBuffer input; loader.LoadVertex(base_address, index, vertex, input, memory_accesses); // Send to vertex shader if (g_debug_context) g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, (void*)&input); - shader_unit.LoadInputVertex(input, loader.GetNumTotalAttributes()); + shader_unit.LoadInput(input, loader.GetNumTotalAttributes()); shader_engine->Run(g_state.vs, shader_unit); // Retrieve vertex from register data diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h index e4f2e6d5d8..785d056504 100644 --- a/src/video_core/pica_state.h +++ b/src/video_core/pica_state.h @@ -23,7 +23,7 @@ struct State { Shader::ShaderSetup vs; Shader::ShaderSetup gs; - std::array, 16> vs_default_attributes; + Shader::AttributeBuffer input_default_attributes; struct { union LutEntry { @@ -66,7 +66,7 @@ struct State { /// Struct used to describe immediate mode rendering state struct ImmediateModeState { // Used to buffer partial vertices for immediate-mode rendering. - Shader::InputVertex input_vertex; + Shader::AttributeBuffer input_vertex; // Index of the next attribute to be loaded into `input_vertex`. u32 current_attribute = 0; } immediate; diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 2da50bd620..971ce5b7a9 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -71,7 +71,7 @@ OutputVertex OutputVertex::FromRegisters(Math::Vec4 output_regs[16], co return ret; } -void UnitState::LoadInputVertex(const InputVertex& input, int num_attributes) { +void UnitState::LoadInput(const AttributeBuffer& input, int num_attributes) { // Setup input register table const auto& attribute_register_map = g_state.regs.vs.input_register_map; diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 44d9f76c3a..cb38ec0a6c 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -23,7 +23,7 @@ namespace Pica { namespace Shader { -struct InputVertex { +struct AttributeBuffer { alignas(16) Math::Vec4 attr[16]; }; @@ -140,7 +140,7 @@ struct UnitState { * @param input Input vertex into the shader * @param num_attributes The number of vertex shader attributes to load */ - void LoadInputVertex(const InputVertex& input, int num_attributes); + void LoadInput(const AttributeBuffer& input, int num_attributes); }; struct ShaderSetup { diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index c0c89b8578..d803aebbf6 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp @@ -668,14 +668,14 @@ void InterpreterEngine::Run(const ShaderSetup& setup, UnitState& state) const { } DebugData InterpreterEngine::ProduceDebugInfo(const ShaderSetup& setup, - const InputVertex& input, + const AttributeBuffer& input, int num_attributes) const { UnitState state; DebugData debug_data; // Setup input register table boost::fill(state.registers.input, Math::Vec4::AssignToAll(float24::Zero())); - state.LoadInputVertex(input, num_attributes); + state.LoadInput(input, num_attributes); RunInterpreter(setup, state, debug_data, setup.engine_data.entry_point); return debug_data; } diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h index d6c0e2d8c1..593e021576 100644 --- a/src/video_core/shader/shader_interpreter.h +++ b/src/video_core/shader/shader_interpreter.h @@ -23,7 +23,7 @@ public: * @param config Configuration object for the shader pipeline * @return Debug information for this shader with regards to the given vertex */ - DebugData ProduceDebugInfo(const ShaderSetup& setup, const InputVertex& input, + DebugData ProduceDebugInfo(const ShaderSetup& setup, const AttributeBuffer& input, int num_attributes) const; }; diff --git a/src/video_core/vertex_loader.cpp b/src/video_core/vertex_loader.cpp index 2b8ef70184..bf83b61ca2 100644 --- a/src/video_core/vertex_loader.cpp +++ b/src/video_core/vertex_loader.cpp @@ -70,7 +70,8 @@ void VertexLoader::Setup(const Pica::Regs& regs) { is_setup = true; } -void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, +void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, + Shader::AttributeBuffer& input, DebugUtils::MemoryAccessTracker& memory_accesses) { ASSERT_MSG(is_setup, "A VertexLoader needs to be setup before loading vertices."); @@ -142,7 +143,7 @@ void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::I input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); } else if (vertex_attribute_is_default[i]) { // Load the default attribute if we're configured to do so - input.attr[i] = g_state.vs_default_attributes[i]; + input.attr[i] = g_state.input_default_attributes.attr[i]; LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", i, vertex, index, input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), diff --git a/src/video_core/vertex_loader.h b/src/video_core/vertex_loader.h index 9f2098bb25..51f3d45b49 100644 --- a/src/video_core/vertex_loader.h +++ b/src/video_core/vertex_loader.h @@ -11,7 +11,7 @@ class MemoryAccessTracker; } namespace Shader { -struct InputVertex; +struct AttributeBuffer; } class VertexLoader { @@ -22,7 +22,7 @@ public: } void Setup(const Pica::Regs& regs); - void LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, + void LoadVertex(u32 base_address, int index, int vertex, Shader::AttributeBuffer& input, DebugUtils::MemoryAccessTracker& memory_accesses); int GetNumTotalAttributes() const { From fccb28d2e9f2f813230912e5cf1fea7f352797c7 Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Sun, 18 Dec 2016 16:50:04 -0800 Subject: [PATCH 3/9] VideoCore: Use correct register for immediate mode attribute count --- src/video_core/command_processor.cpp | 13 +++++++------ src/video_core/pica.h | 7 ++++++- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 45b994b463..27b7a023f0 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -128,17 +128,18 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { g_state.input_default_attributes.attr[setup.index] = attribute; setup.index++; } else { - // Put each attribute into an immediate input buffer. - // When all specified immediate attributes are present, the Vertex Shader is invoked - // and everything is - // sent to the primitive assembler. + // Put each attribute into an immediate input buffer. When all specified immediate + // attributes are present, the Vertex Shader is invoked and everything is sent to + // the primitive assembler. auto& immediate_input = g_state.immediate.input_vertex; auto& immediate_attribute_id = g_state.immediate.current_attribute; - immediate_input.attr[immediate_attribute_id++] = attribute; + immediate_input.attr[immediate_attribute_id] = attribute; - if (immediate_attribute_id > regs.vs.max_input_attribute_index) { + if (immediate_attribute_id < regs.max_input_attrib_index) { + immediate_attribute_id += 1; + } else { MICROPROFILE_SCOPE(GPU_Drawing); immediate_attribute_id = 0; diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 5afc9d5dde..c772896e03 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -1176,7 +1176,12 @@ struct Regs { } } command_buffer; - INSERT_PADDING_WORDS(0x07); + INSERT_PADDING_WORDS(4); + + /// Number of input attributes to the vertex shader minus 1 + BitField<0, 4, u32> max_input_attrib_index; + + INSERT_PADDING_WORDS(2); enum class GPUMode : u32 { Drawing = 0, From 335df895b9f9e9760ed5cd0d6dfaea8befb94dac Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Sun, 18 Dec 2016 17:25:03 -0800 Subject: [PATCH 4/9] VideoCore: Consistently use shader configuration to load attributes --- .../graphics/graphics_vertex_shader.cpp | 6 +-- src/video_core/command_processor.cpp | 4 +- src/video_core/pica.h | 37 ++++--------------- src/video_core/shader/shader.cpp | 11 +++--- src/video_core/shader/shader.h | 6 +-- src/video_core/shader/shader_interpreter.cpp | 4 +- src/video_core/shader/shader_interpreter.h | 3 +- 7 files changed, 25 insertions(+), 46 deletions(-) diff --git a/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp b/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp index f375241908..489ec5f215 100644 --- a/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp +++ b/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp @@ -511,7 +511,7 @@ void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_d auto& shader_config = Pica::g_state.regs.vs; for (auto instr : shader_setup.program_code) info.code.push_back({instr}); - int num_attributes = Pica::g_state.regs.vertex_attributes.GetNumTotalAttributes(); + int num_attributes = shader_config.max_input_attribute_index + 1; for (auto pattern : shader_setup.swizzle_data) info.swizzle_info.push_back({pattern}); @@ -522,11 +522,11 @@ void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_d // Generate debug information Pica::Shader::InterpreterEngine shader_engine; shader_engine.SetupBatch(shader_setup, entry_point); - debug_data = shader_engine.ProduceDebugInfo(shader_setup, input_vertex, num_attributes); + debug_data = shader_engine.ProduceDebugInfo(shader_setup, input_vertex, shader_config); // Reload widget state for (int attr = 0; attr < num_attributes; ++attr) { - unsigned source_attr = shader_config.input_register_map.GetRegisterForAttribute(attr); + unsigned source_attr = shader_config.GetRegisterForAttribute(attr); input_data_mapping[attr]->setText(QString("-> v%1").arg(source_attr)); input_data_container[attr]->setVisible(true); } diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 27b7a023f0..fef0b4ceb0 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -151,7 +151,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, static_cast(&immediate_input)); Shader::UnitState shader_unit; - shader_unit.LoadInput(immediate_input, regs.vs.max_input_attribute_index + 1); + shader_unit.LoadInput(regs.vs, immediate_input); shader_engine->Run(g_state.vs, shader_unit); auto output_vertex = Shader::OutputVertex::FromRegisters( shader_unit.registers.output, regs, regs.vs.output_mask); @@ -288,7 +288,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { if (g_debug_context) g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, (void*)&input); - shader_unit.LoadInput(input, loader.GetNumTotalAttributes()); + shader_unit.LoadInput(regs.vs, input); shader_engine->Run(g_state.vs, shader_unit); // Retrieve vertex from register data diff --git a/src/video_core/pica.h b/src/video_core/pica.h index c772896e03..ac81a3d0f9 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -1225,36 +1225,15 @@ struct Regs { // Offset to shader program entry point (in words) BitField<0, 16, u32> main_offset; - union { - BitField<0, 4, u64> attribute0_register; - BitField<4, 4, u64> attribute1_register; - BitField<8, 4, u64> attribute2_register; - BitField<12, 4, u64> attribute3_register; - BitField<16, 4, u64> attribute4_register; - BitField<20, 4, u64> attribute5_register; - BitField<24, 4, u64> attribute6_register; - BitField<28, 4, u64> attribute7_register; - BitField<32, 4, u64> attribute8_register; - BitField<36, 4, u64> attribute9_register; - BitField<40, 4, u64> attribute10_register; - BitField<44, 4, u64> attribute11_register; - BitField<48, 4, u64> attribute12_register; - BitField<52, 4, u64> attribute13_register; - BitField<56, 4, u64> attribute14_register; - BitField<60, 4, u64> attribute15_register; + /// Maps input attributes to registers. 4-bits per attribute, specifying a register index + u32 input_attribute_to_register_map_low; + u32 input_attribute_to_register_map_high; - int GetRegisterForAttribute(int attribute_index) const { - u64 fields[] = { - attribute0_register, attribute1_register, attribute2_register, - attribute3_register, attribute4_register, attribute5_register, - attribute6_register, attribute7_register, attribute8_register, - attribute9_register, attribute10_register, attribute11_register, - attribute12_register, attribute13_register, attribute14_register, - attribute15_register, - }; - return (int)fields[attribute_index]; - } - } input_register_map; + unsigned int GetRegisterForAttribute(unsigned int attribute_index) const { + u64 map = ((u64)input_attribute_to_register_map_high << 32) | + (u64)input_attribute_to_register_map_low; + return (map >> (attribute_index * 4)) & 0b1111; + } BitField<0, 16, u32> output_mask; diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 971ce5b7a9..dbad167e92 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -71,12 +71,13 @@ OutputVertex OutputVertex::FromRegisters(Math::Vec4 output_regs[16], co return ret; } -void UnitState::LoadInput(const AttributeBuffer& input, int num_attributes) { - // Setup input register table - const auto& attribute_register_map = g_state.regs.vs.input_register_map; +void UnitState::LoadInput(const Regs::ShaderConfig& config, const AttributeBuffer& input) { + const unsigned max_attribute = config.max_input_attribute_index; - for (int i = 0; i < num_attributes; i++) - registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; + for (unsigned attr = 0; attr <= max_attribute; ++attr) { + unsigned reg = config.GetRegisterForAttribute(attr); + registers.input[reg] = input.attr[attr]; + } } MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index cb38ec0a6c..43a8b848c8 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -137,10 +137,10 @@ struct UnitState { /** * Loads the unit state with an input vertex. * - * @param input Input vertex into the shader - * @param num_attributes The number of vertex shader attributes to load + * @param config Shader configuration registers corresponding to the unit. + * @param input Attribute buffer to load into the input registers. */ - void LoadInput(const AttributeBuffer& input, int num_attributes); + void LoadInput(const Regs::ShaderConfig& config, const AttributeBuffer& input); }; struct ShaderSetup { diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index d803aebbf6..81522b8f58 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp @@ -669,13 +669,13 @@ void InterpreterEngine::Run(const ShaderSetup& setup, UnitState& state) const { DebugData InterpreterEngine::ProduceDebugInfo(const ShaderSetup& setup, const AttributeBuffer& input, - int num_attributes) const { + const Regs::ShaderConfig& config) const { UnitState state; DebugData debug_data; // Setup input register table boost::fill(state.registers.input, Math::Vec4::AssignToAll(float24::Zero())); - state.LoadInput(input, num_attributes); + state.LoadInput(config, input); RunInterpreter(setup, state, debug_data, setup.engine_data.entry_point); return debug_data; } diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h index 593e021576..d7a61e1228 100644 --- a/src/video_core/shader/shader_interpreter.h +++ b/src/video_core/shader/shader_interpreter.h @@ -19,12 +19,11 @@ public: /** * Produce debug information based on the given shader and input vertex * @param input Input vertex into the shader - * @param num_attributes The number of vertex shader attributes * @param config Configuration object for the shader pipeline * @return Debug information for this shader with regards to the given vertex */ DebugData ProduceDebugInfo(const ShaderSetup& setup, const AttributeBuffer& input, - int num_attributes) const; + const Regs::ShaderConfig& config) const; }; } // namespace From 92bf5c88e6f85ebeef161a0056c86c66bc25c6e7 Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Sun, 18 Dec 2016 17:58:30 -0800 Subject: [PATCH 5/9] VideoCore: Split shader output writing from semantic loading --- src/video_core/command_processor.cpp | 14 ++++++++------ src/video_core/shader/shader.cpp | 29 +++++++++++++--------------- src/video_core/shader/shader.h | 5 +++-- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index fef0b4ceb0..4955ff9f96 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -151,10 +151,11 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, static_cast(&immediate_input)); Shader::UnitState shader_unit; + Shader::AttributeBuffer output{}; + shader_unit.LoadInput(regs.vs, immediate_input); shader_engine->Run(g_state.vs, shader_unit); - auto output_vertex = Shader::OutputVertex::FromRegisters( - shader_unit.registers.output, regs, regs.vs.output_mask); + shader_unit.WriteOutput(regs.vs, output); // Send to renderer using Pica::Shader::OutputVertex; @@ -163,7 +164,8 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); }; - g_state.primitive_assembler.SubmitVertex(output_vertex, AddTriangle); + g_state.primitive_assembler.SubmitVertex( + Shader::OutputVertex::FromAttributeBuffer(regs, output), AddTriangle); } } } @@ -281,7 +283,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { if (!vertex_cache_hit) { // Initialize data for the current vertex - Shader::AttributeBuffer input; + Shader::AttributeBuffer input, output{}; loader.LoadVertex(base_address, index, vertex, input, memory_accesses); // Send to vertex shader @@ -290,10 +292,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { (void*)&input); shader_unit.LoadInput(regs.vs, input); shader_engine->Run(g_state.vs, shader_unit); + shader_unit.WriteOutput(regs.vs, output); // Retrieve vertex from register data - output_vertex = Shader::OutputVertex::FromRegisters(shader_unit.registers.output, - regs, regs.vs.output_mask); + output_vertex = Shader::OutputVertex::FromAttributeBuffer(regs, output); if (is_indexed) { vertex_cache[vertex_cache_pos] = output_vertex; diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index dbad167e92..99a22c2dd7 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -4,6 +4,7 @@ #include #include +#include "common/bit_set.h" #include "common/logging/log.h" #include "common/microprofile.h" #include "video_core/pica.h" @@ -19,22 +20,13 @@ namespace Pica { namespace Shader { -OutputVertex OutputVertex::FromRegisters(Math::Vec4 output_regs[16], const Regs& regs, - u32 output_mask) { +OutputVertex OutputVertex::FromAttributeBuffer(const Regs& regs, AttributeBuffer& input) { // Setup output data OutputVertex ret; - // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to - // figure out what those circumstances are and enable the remaining outputs then. - unsigned index = 0; - for (unsigned i = 0; i < 7; ++i) { - if (index >= regs.vs_output_total) - break; - - if ((output_mask & (1 << i)) == 0) - continue; - - const auto& output_register_map = regs.vs_output_attributes[index]; + unsigned int num_attributes = regs.vs_output_total; + for (unsigned int i = 0; i < num_attributes; ++i) { + const auto& output_register_map = regs.vs_output_attributes[i]; u32 semantics[4] = {output_register_map.map_x, output_register_map.map_y, output_register_map.map_z, output_register_map.map_w}; @@ -42,15 +34,13 @@ OutputVertex OutputVertex::FromRegisters(Math::Vec4 output_regs[16], co for (unsigned comp = 0; comp < 4; ++comp) { float24* out = ((float24*)&ret) + semantics[comp]; if (semantics[comp] != Regs::VSOutputAttributes::INVALID) { - *out = output_regs[i][comp]; + *out = input.attr[i][comp]; } else { // Zero output so that attributes which aren't output won't have denormals in them, // which would slow us down later. memset(out, 0, sizeof(*out)); } } - - index++; } // The hardware takes the absolute and saturates vertex colors like this, *before* doing @@ -80,6 +70,13 @@ void UnitState::LoadInput(const Regs::ShaderConfig& config, const AttributeBuffe } } +void UnitState::WriteOutput(const Regs::ShaderConfig& config, AttributeBuffer& output) { + unsigned int output_i = 0; + for (unsigned int reg : Common::BitSet(config.output_mask)) { + output.attr[output_i++] = registers.output[reg]; + } +} + MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); #ifdef ARCHITECTURE_x86_64 diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 43a8b848c8..00bd723cfb 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -74,8 +74,7 @@ struct OutputVertex { return ret; } - static OutputVertex FromRegisters(Math::Vec4 output_regs[16], const Regs& regs, - u32 output_mask); + static OutputVertex FromAttributeBuffer(const Regs& regs, AttributeBuffer& output); }; static_assert(std::is_pod::value, "Structure is not POD"); static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); @@ -141,6 +140,8 @@ struct UnitState { * @param input Attribute buffer to load into the input registers. */ void LoadInput(const Regs::ShaderConfig& config, const AttributeBuffer& input); + + void WriteOutput(const Regs::ShaderConfig& config, AttributeBuffer& output); }; struct ShaderSetup { From d36ec905b1d9536198e584915024ed65f0342ab2 Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Sun, 18 Dec 2016 21:48:12 -0800 Subject: [PATCH 6/9] Common: Optimize BitSet iterator --- src/common/bit_set.h | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/src/common/bit_set.h b/src/common/bit_set.h index 3059d0cb0d..9c2e6b28c0 100644 --- a/src/common/bit_set.h +++ b/src/common/bit_set.h @@ -121,22 +121,19 @@ public: class Iterator { public: Iterator(const Iterator& other) : m_val(other.m_val), m_bit(other.m_bit) {} - Iterator(IntTy val, int bit) : m_val(val), m_bit(bit) {} + Iterator(IntTy val) : m_val(val), m_bit(0) {} Iterator& operator=(Iterator other) { new (this) Iterator(other); return *this; } int operator*() { - return m_bit; + return m_bit + ComputeLsb(); } Iterator& operator++() { - if (m_val == 0) { - m_bit = -1; - } else { - int bit = LeastSignificantSetBit(m_val); - m_val &= ~(1 << bit); - m_bit = bit; - } + int lsb = ComputeLsb(); + m_val >>= lsb + 1; + m_bit += lsb + 1; + m_has_lsb = false; return *this; } Iterator operator++(int _) { @@ -145,15 +142,24 @@ public: return other; } bool operator==(Iterator other) const { - return m_bit == other.m_bit; + return m_val == other.m_val; } bool operator!=(Iterator other) const { - return m_bit != other.m_bit; + return m_val != other.m_val; } private: + int ComputeLsb() { + if (!m_has_lsb) { + m_lsb = LeastSignificantSetBit(m_val); + m_has_lsb = true; + } + return m_lsb; + } IntTy m_val; int m_bit; + int m_lsb = -1; + bool m_has_lsb = false; }; BitSet() : m_val(0) {} @@ -221,11 +227,10 @@ public: } Iterator begin() const { - Iterator it(m_val, 0); - return ++it; + return Iterator(m_val); } Iterator end() const { - return Iterator(m_val, -1); + return Iterator(0); } IntTy m_val; From 8ed9f9d49f716487f14736c48a7850129a5910ba Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Sun, 18 Dec 2016 23:42:29 -0800 Subject: [PATCH 7/9] VideoCore/Shader: Clean up OutputVertex::FromAttributeBuffer This also fixes a long-standing but neverthless harmless memory corruption bug, whech the padding of the OutputVertex struct would get corrupted by unused attributes. --- src/video_core/pica.h | 3 ++- src/video_core/shader/shader.cpp | 23 ++++++++++++++--------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/src/video_core/pica.h b/src/video_core/pica.h index ac81a3d0f9..e326f77274 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -99,7 +99,8 @@ struct Regs { TEXCOORD1_U = 14, TEXCOORD1_V = 15, - // TODO: Not verified + TEXCOORD0_W = 16, + VIEW_X = 18, VIEW_Y = 19, VIEW_Z = 20, diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 99a22c2dd7..2c6e45ac4b 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -22,23 +22,28 @@ namespace Shader { OutputVertex OutputVertex::FromAttributeBuffer(const Regs& regs, AttributeBuffer& input) { // Setup output data - OutputVertex ret; + union { + OutputVertex ret{}; + std::array vertex_slots; + }; + static_assert(sizeof(vertex_slots) <= sizeof(ret), "Struct and array have different sizes."); unsigned int num_attributes = regs.vs_output_total; + ASSERT(num_attributes <= 7); for (unsigned int i = 0; i < num_attributes; ++i) { const auto& output_register_map = regs.vs_output_attributes[i]; - u32 semantics[4] = {output_register_map.map_x, output_register_map.map_y, - output_register_map.map_z, output_register_map.map_w}; + Regs::VSOutputAttributes::Semantic semantics[4] = { + output_register_map.map_x, output_register_map.map_y, output_register_map.map_z, + output_register_map.map_w}; for (unsigned comp = 0; comp < 4; ++comp) { - float24* out = ((float24*)&ret) + semantics[comp]; - if (semantics[comp] != Regs::VSOutputAttributes::INVALID) { + Regs::VSOutputAttributes::Semantic semantic = semantics[comp]; + float24* out = &vertex_slots[semantic]; + if (semantic < vertex_slots.size()) { *out = input.attr[i][comp]; - } else { - // Zero output so that attributes which aren't output won't have denormals in them, - // which would slow us down later. - memset(out, 0, sizeof(*out)); + } else if (semantic != Regs::VSOutputAttributes::INVALID) { + LOG_ERROR(HW_GPU, "Invalid/unknown semantic id: %u", (unsigned int)semantic); } } } From dcdffabfe69d0cecd2d8c0c1f217b884b20af643 Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Sun, 18 Dec 2016 23:43:37 -0800 Subject: [PATCH 8/9] VideoCore: Extract swrast-specific data from OutputVertex --- src/video_core/clipper.cpp | 24 +++++++++------- src/video_core/rasterizer.cpp | 7 ++--- src/video_core/rasterizer.h | 40 ++++++++++++++++++++++---- src/video_core/shader/shader.cpp | 2 +- src/video_core/shader/shader.h | 49 +++++++++----------------------- 5 files changed, 64 insertions(+), 58 deletions(-) diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp index 05b5cea73a..0774ffc535 100644 --- a/src/video_core/clipper.cpp +++ b/src/video_core/clipper.cpp @@ -18,6 +18,8 @@ #include "video_core/rasterizer.h" #include "video_core/shader/shader.h" +using Pica::Rasterizer::Vertex; + namespace Pica { namespace Clipper { @@ -29,20 +31,20 @@ public: float24::FromFloat32(0), float24::FromFloat32(0))) : coeffs(coeffs), bias(bias) {} - bool IsInside(const OutputVertex& vertex) const { + bool IsInside(const Vertex& vertex) const { return Math::Dot(vertex.pos + bias, coeffs) <= float24::FromFloat32(0); } - bool IsOutSide(const OutputVertex& vertex) const { + bool IsOutSide(const Vertex& vertex) const { return !IsInside(vertex); } - OutputVertex GetIntersection(const OutputVertex& v0, const OutputVertex& v1) const { + Vertex GetIntersection(const Vertex& v0, const Vertex& v1) const { float24 dp = Math::Dot(v0.pos + bias, coeffs); float24 dp_prev = Math::Dot(v1.pos + bias, coeffs); float24 factor = dp_prev / (dp_prev - dp); - return OutputVertex::Lerp(factor, v0, v1); + return Vertex::Lerp(factor, v0, v1); } private: @@ -51,7 +53,7 @@ private: Math::Vec4 bias; }; -static void InitScreenCoordinates(OutputVertex& vtx) { +static void InitScreenCoordinates(Vertex& vtx) { struct { float24 halfsize_x; float24 offset_x; @@ -91,8 +93,8 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu // introduces at most 1 new vertex to the polygon. Since we start with a triangle and have a // fixed 6 clipping planes, the maximum number of vertices of the clipped polygon is 3 + 6 = 9. static const size_t MAX_VERTICES = 9; - static_vector buffer_a = {v0, v1, v2}; - static_vector buffer_b; + static_vector buffer_a = {v0, v1, v2}; + static_vector buffer_b; auto* output_list = &buffer_a; auto* input_list = &buffer_b; @@ -123,7 +125,7 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu std::swap(input_list, output_list); output_list->clear(); - const OutputVertex* reference_vertex = &input_list->back(); + const Vertex* reference_vertex = &input_list->back(); for (const auto& vertex : *input_list) { // NOTE: This algorithm changes vertex order in some cases! @@ -148,9 +150,9 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu InitScreenCoordinates((*output_list)[1]); for (size_t i = 0; i < output_list->size() - 2; i++) { - OutputVertex& vtx0 = (*output_list)[0]; - OutputVertex& vtx1 = (*output_list)[i + 1]; - OutputVertex& vtx2 = (*output_list)[i + 2]; + Vertex& vtx0 = (*output_list)[0]; + Vertex& vtx1 = (*output_list)[i + 1]; + Vertex& vtx2 = (*output_list)[i + 2]; InitScreenCoordinates(vtx2); diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index b9f5d45333..0674eb85e9 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -307,8 +307,8 @@ MICROPROFILE_DEFINE(GPU_Rasterization, "GPU", "Rasterization", MP_RGB(50, 50, 24 * Helper function for ProcessTriangle with the "reversed" flag to allow for implementing * culling via recursion. */ -static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader::OutputVertex& v1, - const Shader::OutputVertex& v2, bool reversed = false) { +static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Vertex& v2, + bool reversed = false) { const auto& regs = g_state.regs; MICROPROFILE_SCOPE(GPU_Rasterization); @@ -1276,8 +1276,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader } } -void ProcessTriangle(const Shader::OutputVertex& v0, const Shader::OutputVertex& v1, - const Shader::OutputVertex& v2) { +void ProcessTriangle(const Vertex& v0, const Vertex& v1, const Vertex& v2) { ProcessTriangleInternal(v0, v1, v2); } diff --git a/src/video_core/rasterizer.h b/src/video_core/rasterizer.h index 6cbda30677..3a72ac3430 100644 --- a/src/video_core/rasterizer.h +++ b/src/video_core/rasterizer.h @@ -4,16 +4,44 @@ #pragma once -namespace Pica { +#include "video_core/shader/shader.h" -namespace Shader { -struct OutputVertex; -} +namespace Pica { namespace Rasterizer { -void ProcessTriangle(const Shader::OutputVertex& v0, const Shader::OutputVertex& v1, - const Shader::OutputVertex& v2); +struct Vertex : Shader::OutputVertex { + Vertex(const OutputVertex& v) : OutputVertex(v) {} + + // Attributes used to store intermediate results + // position after perspective divide + Math::Vec3 screenpos; + + // Linear interpolation + // factor: 0=this, 1=vtx + void Lerp(float24 factor, const Vertex& vtx) { + pos = pos * factor + vtx.pos * (float24::FromFloat32(1) - factor); + + // TODO: Should perform perspective correct interpolation here... + tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor); + tc1 = tc1 * factor + vtx.tc1 * (float24::FromFloat32(1) - factor); + tc2 = tc2 * factor + vtx.tc2 * (float24::FromFloat32(1) - factor); + + screenpos = screenpos * factor + vtx.screenpos * (float24::FromFloat32(1) - factor); + + color = color * factor + vtx.color * (float24::FromFloat32(1) - factor); + } + + // Linear interpolation + // factor: 0=v0, 1=v1 + static Vertex Lerp(float24 factor, const Vertex& v0, const Vertex& v1) { + Vertex ret = v0; + ret.Lerp(factor, v1); + return ret; + } +}; + +void ProcessTriangle(const Vertex& v0, const Vertex& v1, const Vertex& v2); } // namespace Rasterizer diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 2c6e45ac4b..f5f7ea61da 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -26,7 +26,7 @@ OutputVertex OutputVertex::FromAttributeBuffer(const Regs& regs, AttributeBuffer OutputVertex ret{}; std::array vertex_slots; }; - static_assert(sizeof(vertex_slots) <= sizeof(ret), "Struct and array have different sizes."); + static_assert(sizeof(vertex_slots) == sizeof(ret), "Struct and array have different sizes."); unsigned int num_attributes = regs.vs_output_total; ASSERT(num_attributes <= 7); diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 00bd723cfb..b188d3edf7 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -28,9 +28,6 @@ struct AttributeBuffer { }; struct OutputVertex { - OutputVertex() = default; - - // VS output attributes Math::Vec4 pos; Math::Vec4 quat; Math::Vec4 color; @@ -42,42 +39,22 @@ struct OutputVertex { INSERT_PADDING_WORDS(1); Math::Vec2 tc2; - // Padding for optimal alignment - INSERT_PADDING_WORDS(4); - - // Attributes used to store intermediate results - - // position after perspective divide - Math::Vec3 screenpos; - INSERT_PADDING_WORDS(1); - - // Linear interpolation - // factor: 0=this, 1=vtx - void Lerp(float24 factor, const OutputVertex& vtx) { - pos = pos * factor + vtx.pos * (float24::FromFloat32(1) - factor); - - // TODO: Should perform perspective correct interpolation here... - tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor); - tc1 = tc1 * factor + vtx.tc1 * (float24::FromFloat32(1) - factor); - tc2 = tc2 * factor + vtx.tc2 * (float24::FromFloat32(1) - factor); - - screenpos = screenpos * factor + vtx.screenpos * (float24::FromFloat32(1) - factor); - - color = color * factor + vtx.color * (float24::FromFloat32(1) - factor); - } - - // Linear interpolation - // factor: 0=v0, 1=v1 - static OutputVertex Lerp(float24 factor, const OutputVertex& v0, const OutputVertex& v1) { - OutputVertex ret = v0; - ret.Lerp(factor, v1); - return ret; - } - static OutputVertex FromAttributeBuffer(const Regs& regs, AttributeBuffer& output); }; +#define ASSERT_POS(var, pos) \ + static_assert(offsetof(OutputVertex, var) == pos * sizeof(float24), "Semantic at wrong " \ + "offset.") +ASSERT_POS(pos, Regs::VSOutputAttributes::POSITION_X); +ASSERT_POS(quat, Regs::VSOutputAttributes::QUATERNION_X); +ASSERT_POS(color, Regs::VSOutputAttributes::COLOR_R); +ASSERT_POS(tc0, Regs::VSOutputAttributes::TEXCOORD0_U); +ASSERT_POS(tc1, Regs::VSOutputAttributes::TEXCOORD1_U); +ASSERT_POS(tc0_w, Regs::VSOutputAttributes::TEXCOORD0_W); +ASSERT_POS(view, Regs::VSOutputAttributes::VIEW_X); +ASSERT_POS(tc2, Regs::VSOutputAttributes::TEXCOORD2_U); +#undef ASSERT_POS static_assert(std::is_pod::value, "Structure is not POD"); -static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); +static_assert(sizeof(OutputVertex) == 24 * sizeof(float), "OutputVertex has invalid size"); /** * This structure contains the state information that needs to be unique for a shader unit. The 3DS From 37a4ea046d80973d59ddb7735a0ffbf0bfd93ad0 Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Fri, 27 Jan 2017 18:10:54 -0800 Subject: [PATCH 9/9] VideoCore: Make PrimitiveAssembler const-correct --- src/video_core/primitive_assembly.cpp | 2 +- src/video_core/primitive_assembly.h | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/video_core/primitive_assembly.cpp b/src/video_core/primitive_assembly.cpp index be73772901..e71ff57198 100644 --- a/src/video_core/primitive_assembly.cpp +++ b/src/video_core/primitive_assembly.cpp @@ -14,7 +14,7 @@ PrimitiveAssembler::PrimitiveAssembler(Regs::TriangleTopology topolo : topology(topology), buffer_index(0) {} template -void PrimitiveAssembler::SubmitVertex(VertexType& vtx, +void PrimitiveAssembler::SubmitVertex(const VertexType& vtx, TriangleHandler triangle_handler) { switch (topology) { // TODO: Figure out what's different with TriangleTopology::Shader. diff --git a/src/video_core/primitive_assembly.h b/src/video_core/primitive_assembly.h index 0384d5984f..24da47382b 100644 --- a/src/video_core/primitive_assembly.h +++ b/src/video_core/primitive_assembly.h @@ -15,7 +15,8 @@ namespace Pica { */ template struct PrimitiveAssembler { - using TriangleHandler = std::function; + using TriangleHandler = + std::function; PrimitiveAssembler(Regs::TriangleTopology topology = Regs::TriangleTopology::List); @@ -25,7 +26,7 @@ struct PrimitiveAssembler { * NOTE: We could specify the triangle handler in the constructor, but this way we can * keep event and handler code next to each other. */ - void SubmitVertex(VertexType& vtx, TriangleHandler triangle_handler); + void SubmitVertex(const VertexType& vtx, TriangleHandler triangle_handler); /** * Resets the internal state of the PrimitiveAssembler.