From 251f29dd7fa530997cd6d27a8db28c4a39efd127 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Thu, 28 Apr 2016 21:43:49 +0200 Subject: [PATCH] Optimize the vertex loader, nearly doubling its speed. --- src/video_core/vertex_loader.cpp | 85 ++++++++++++++++++++------------ src/video_core/vertex_loader.h | 1 - 2 files changed, 54 insertions(+), 32 deletions(-) diff --git a/src/video_core/vertex_loader.cpp b/src/video_core/vertex_loader.cpp index c2630d9eb..38682d2be 100644 --- a/src/video_core/vertex_loader.cpp +++ b/src/video_core/vertex_loader.cpp @@ -46,13 +46,11 @@ void VertexLoader::Setup(const Pica::Regs& regs) { u32 attribute_index = loader_config.GetComponent(component); if (attribute_index < 12) { - int element_size = attribute_config.GetElementSizeInBytes(attribute_index); - offset = Common::AlignUp(offset, element_size); + offset = Common::AlignUp(offset, attribute_config.GetElementSizeInBytes(attribute_index)); vertex_attribute_sources[attribute_index] = loader_config.data_offset + offset; vertex_attribute_strides[attribute_index] = static_cast(loader_config.byte_count); vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index); vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index); - vertex_attribute_element_size[attribute_index] = element_size; offset += attribute_config.GetStride(attribute_index); } else if (attribute_index < 16) { // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively @@ -68,38 +66,63 @@ void VertexLoader::Setup(const Pica::Regs& regs) { void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, MemoryAccesses& memory_accesses) { for (int i = 0; i < num_total_attributes; ++i) { if (vertex_attribute_elements[i] != 0) { + // Load per-vertex data from the loader arrays + u32 source_addr = base_address + vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex; + + if (g_debug_context && Pica::g_debug_context->recorder) { + memory_accesses.AddAccess(source_addr, + (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4 + : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1); + } + + switch (vertex_attribute_formats[i]) { + case Regs::VertexAttributeFormat::BYTE: + { + const s8* srcdata = reinterpret_cast(Memory::GetPhysicalPointer(source_addr)); + for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { + input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); + } + break; + } + case Regs::VertexAttributeFormat::UBYTE: + { + const u8* srcdata = reinterpret_cast(Memory::GetPhysicalPointer(source_addr)); + for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { + input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); + } + break; + } + case Regs::VertexAttributeFormat::SHORT: + { + const s16* srcdata = reinterpret_cast(Memory::GetPhysicalPointer(source_addr)); + for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { + input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); + } + break; + } + case Regs::VertexAttributeFormat::FLOAT: + { + const float* srcdata = reinterpret_cast(Memory::GetPhysicalPointer(source_addr)); + for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { + input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); + } + break; + } + } + // Default attribute values set if array elements have < 4 components. This // is *not* carried over from the default attribute settings even if they're // enabled for this attribute. - static const float24 zero = float24::FromFloat32(0.0f); - static const float24 one = float24::FromFloat32(1.0f); - input.attr[i] = Math::Vec4(zero, zero, zero, one); - - // Load per-vertex data from the loader arrays - for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { - u32 source_addr = base_address + vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i]; - const u8* srcdata = Memory::GetPhysicalPointer(source_addr); - - if (g_debug_context && Pica::g_debug_context->recorder) { - memory_accesses.AddAccess(source_addr, - (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4 - : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1); - } - - const float srcval = - (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE) ? *reinterpret_cast(srcdata) : - (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? *reinterpret_cast(srcdata) : - (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? *reinterpret_cast(srcdata) : - *reinterpret_cast(srcdata); - - input.attr[i][comp] = float24::FromFloat32(srcval); - LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f", - comp, i, vertex, index, - base_address, - vertex_attribute_sources[i], - vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i], - input.attr[i][comp].ToFloat32()); + for (unsigned int comp = vertex_attribute_elements[i]; comp < 4; ++comp) { + input.attr[i][comp] = comp == 3 ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); } + + LOG_TRACE(HW_GPU, "Loaded %d components of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f %f %f %f", + vertex_attribute_elements[i], i, vertex, index, + base_address, + vertex_attribute_sources[i], + vertex_attribute_strides[i] * vertex, + input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); } else if (vertex_attribute_is_default[i]) { // Load the default attribute if we're configured to do so input.attr[i] = g_state.vs.default_attributes[i]; diff --git a/src/video_core/vertex_loader.h b/src/video_core/vertex_loader.h index 40c5e83c9..7267ea9c6 100644 --- a/src/video_core/vertex_loader.h +++ b/src/video_core/vertex_loader.h @@ -47,7 +47,6 @@ private: u32 vertex_attribute_strides[16] = {}; Regs::VertexAttributeFormat vertex_attribute_formats[16] = {}; u32 vertex_attribute_elements[16] = {}; - u32 vertex_attribute_element_size[16] = {}; bool vertex_attribute_is_default[16]; int num_total_attributes; };