mirror of
https://git.suyu.dev/suyu/suyu.git
synced 2024-11-30 02:32:46 +01:00
gl_rasterizer: Implement quads topology
This commit is contained in:
parent
393042c09c
commit
3e2380327a
8 changed files with 236 additions and 46 deletions
|
@ -27,6 +27,8 @@ add_library(video_core STATIC
|
|||
renderer_base.h
|
||||
renderer_opengl/gl_buffer_cache.cpp
|
||||
renderer_opengl/gl_buffer_cache.h
|
||||
renderer_opengl/gl_primitive_assembler.cpp
|
||||
renderer_opengl/gl_primitive_assembler.h
|
||||
renderer_opengl/gl_rasterizer.cpp
|
||||
renderer_opengl/gl_rasterizer.h
|
||||
renderer_opengl/gl_rasterizer_cache.cpp
|
||||
|
|
|
@ -744,6 +744,12 @@ public:
|
|||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(end_addr_high) << 32) |
|
||||
end_addr_low);
|
||||
}
|
||||
|
||||
/// Adjust the index buffer offset so it points to the first desired index.
|
||||
GPUVAddr IndexStart() const {
|
||||
return StartAddress() + static_cast<size_t>(first) *
|
||||
static_cast<size_t>(FormatSizeInBytes());
|
||||
}
|
||||
} index_array;
|
||||
|
||||
INSERT_PADDING_WORDS(0x7);
|
||||
|
|
|
@ -34,7 +34,7 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size
|
|||
}
|
||||
|
||||
AlignBuffer(alignment);
|
||||
GLintptr uploaded_offset = buffer_offset;
|
||||
const GLintptr uploaded_offset = buffer_offset;
|
||||
|
||||
Memory::ReadBlock(*cpu_addr, buffer_ptr, size);
|
||||
|
||||
|
@ -57,13 +57,23 @@ GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t s
|
|||
std::size_t alignment) {
|
||||
AlignBuffer(alignment);
|
||||
std::memcpy(buffer_ptr, raw_pointer, size);
|
||||
GLintptr uploaded_offset = buffer_offset;
|
||||
const GLintptr uploaded_offset = buffer_offset;
|
||||
|
||||
buffer_ptr += size;
|
||||
buffer_offset += size;
|
||||
return uploaded_offset;
|
||||
}
|
||||
|
||||
std::tuple<u8*, GLintptr> OGLBufferCache::ReserveMemory(std::size_t size, std::size_t alignment) {
|
||||
AlignBuffer(alignment);
|
||||
u8* const uploaded_ptr = buffer_ptr;
|
||||
const GLintptr uploaded_offset = buffer_offset;
|
||||
|
||||
buffer_ptr += size;
|
||||
buffer_offset += size;
|
||||
return std::make_tuple(uploaded_ptr, uploaded_offset);
|
||||
}
|
||||
|
||||
void OGLBufferCache::Map(std::size_t max_size) {
|
||||
bool invalidate;
|
||||
std::tie(buffer_ptr, buffer_offset_base, invalidate) =
|
||||
|
@ -74,6 +84,7 @@ void OGLBufferCache::Map(std::size_t max_size) {
|
|||
InvalidateAll();
|
||||
}
|
||||
}
|
||||
|
||||
void OGLBufferCache::Unmap() {
|
||||
stream_buffer.Unmap(buffer_offset - buffer_offset_base);
|
||||
}
|
||||
|
@ -84,7 +95,7 @@ GLuint OGLBufferCache::GetHandle() const {
|
|||
|
||||
void OGLBufferCache::AlignBuffer(std::size_t alignment) {
|
||||
// Align the offset, not the mapped pointer
|
||||
GLintptr offset_aligned =
|
||||
const GLintptr offset_aligned =
|
||||
static_cast<GLintptr>(Common::AlignUp(static_cast<std::size_t>(buffer_offset), alignment));
|
||||
buffer_ptr += offset_aligned - buffer_offset;
|
||||
buffer_offset = offset_aligned;
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
#include <cstddef>
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/rasterizer_cache.h"
|
||||
|
@ -33,11 +34,17 @@ class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBuffer
|
|||
public:
|
||||
explicit OGLBufferCache(std::size_t size);
|
||||
|
||||
/// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
|
||||
/// allocated.
|
||||
GLintptr UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
|
||||
bool cache = true);
|
||||
|
||||
/// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
|
||||
GLintptr UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4);
|
||||
|
||||
/// Reserves memory to be used by host's CPU. Returns mapped address and offset.
|
||||
std::tuple<u8*, GLintptr> ReserveMemory(std::size_t size, std::size_t alignment = 4);
|
||||
|
||||
void Map(std::size_t max_size);
|
||||
void Unmap();
|
||||
|
||||
|
|
64
src/video_core/renderer_opengl/gl_primitive_assembler.cpp
Normal file
64
src/video_core/renderer_opengl/gl_primitive_assembler.cpp
Normal file
|
@ -0,0 +1,64 @@
|
|||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/renderer_opengl/gl_buffer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_primitive_assembler.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
constexpr u32 TRIANGLES_PER_QUAD = 6;
|
||||
constexpr std::array<u32, TRIANGLES_PER_QUAD> QUAD_MAP = {0, 1, 2, 0, 2, 3};
|
||||
|
||||
PrimitiveAssembler::PrimitiveAssembler(OGLBufferCache& buffer_cache) : buffer_cache(buffer_cache) {}
|
||||
|
||||
PrimitiveAssembler::~PrimitiveAssembler() = default;
|
||||
|
||||
std::size_t PrimitiveAssembler::CalculateQuadSize(u32 count) const {
|
||||
ASSERT_MSG(count % 4 == 0, "Quad count is expected to be a multiple of 4");
|
||||
return (count / 4) * TRIANGLES_PER_QUAD * sizeof(GLuint);
|
||||
}
|
||||
|
||||
GLintptr PrimitiveAssembler::MakeQuadArray(u32 first, u32 count) {
|
||||
const std::size_t size{CalculateQuadSize(count)};
|
||||
auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(size);
|
||||
|
||||
for (u32 primitive = 0; primitive < count / 4; ++primitive) {
|
||||
for (u32 i = 0; i < TRIANGLES_PER_QUAD; ++i) {
|
||||
const u32 index = first + primitive * 4 + QUAD_MAP[i];
|
||||
std::memcpy(dst_pointer, &index, sizeof(index));
|
||||
dst_pointer += sizeof(index);
|
||||
}
|
||||
}
|
||||
|
||||
return index_offset;
|
||||
}
|
||||
|
||||
GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size_t index_size,
|
||||
u32 count) {
|
||||
const std::size_t map_size{CalculateQuadSize(count)};
|
||||
auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size);
|
||||
|
||||
auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
|
||||
const boost::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
|
||||
const u8* source{Memory::GetPointer(*cpu_addr)};
|
||||
|
||||
for (u32 primitive = 0; primitive < count / 4; ++primitive) {
|
||||
for (std::size_t i = 0; i < TRIANGLES_PER_QUAD; ++i) {
|
||||
const u32 index = primitive * 4 + QUAD_MAP[i];
|
||||
const u8* src_offset = source + (index * index_size);
|
||||
|
||||
std::memcpy(dst_pointer, src_offset, index_size);
|
||||
dst_pointer += index_size;
|
||||
}
|
||||
}
|
||||
|
||||
return index_offset;
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
33
src/video_core/renderer_opengl/gl_primitive_assembler.h
Normal file
33
src/video_core/renderer_opengl/gl_primitive_assembler.h
Normal file
|
@ -0,0 +1,33 @@
|
|||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class OGLBufferCache;
|
||||
|
||||
class PrimitiveAssembler {
|
||||
public:
|
||||
explicit PrimitiveAssembler(OGLBufferCache& buffer_cache);
|
||||
~PrimitiveAssembler();
|
||||
|
||||
/// Calculates the size required by MakeQuadArray and MakeQuadIndexed.
|
||||
std::size_t CalculateQuadSize(u32 count) const;
|
||||
|
||||
GLintptr MakeQuadArray(u32 first, u32 count);
|
||||
|
||||
GLintptr MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size_t index_size, u32 count);
|
||||
|
||||
private:
|
||||
OGLBufferCache& buffer_cache;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
|
@ -42,6 +42,41 @@ MICROPROFILE_DEFINE(OpenGL_Framebuffer, "OpenGL", "Framebuffer Setup", MP_RGB(12
|
|||
MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
|
||||
MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255, 100, 100));
|
||||
|
||||
struct DrawParameters {
|
||||
GLenum primitive_mode;
|
||||
GLsizei count;
|
||||
GLint current_instance;
|
||||
bool use_indexed;
|
||||
|
||||
GLint vertex_first;
|
||||
|
||||
GLenum index_format;
|
||||
GLint base_vertex;
|
||||
GLintptr index_buffer_offset;
|
||||
|
||||
void DispatchDraw() const {
|
||||
if (use_indexed) {
|
||||
const auto index_buffer_ptr = reinterpret_cast<const void*>(index_buffer_offset);
|
||||
if (current_instance > 0) {
|
||||
glDrawElementsInstancedBaseVertexBaseInstance(primitive_mode, count, index_format,
|
||||
index_buffer_ptr, 1, base_vertex,
|
||||
current_instance);
|
||||
} else {
|
||||
glDrawElementsBaseVertex(primitive_mode, count, index_format, index_buffer_ptr,
|
||||
base_vertex);
|
||||
}
|
||||
} else {
|
||||
if (current_instance > 0) {
|
||||
glDrawArraysInstancedBaseInstance(primitive_mode, vertex_first, count, 1,
|
||||
current_instance);
|
||||
} else {
|
||||
glDrawArrays(primitive_mode, vertex_first, count);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info)
|
||||
: emu_window{window}, screen_info{info}, buffer_cache(STREAM_BUFFER_SIZE) {
|
||||
|
@ -172,6 +207,53 @@ void RasterizerOpenGL::SetupVertexArrays() {
|
|||
}
|
||||
}
|
||||
|
||||
DrawParameters RasterizerOpenGL::SetupDraw() {
|
||||
const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
|
||||
const auto& regs = gpu.regs;
|
||||
const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
|
||||
|
||||
DrawParameters params{};
|
||||
params.current_instance = gpu.state.current_instance;
|
||||
|
||||
if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) {
|
||||
MICROPROFILE_SCOPE(OpenGL_PrimitiveAssembly);
|
||||
|
||||
params.use_indexed = true;
|
||||
params.primitive_mode = GL_TRIANGLES;
|
||||
|
||||
if (is_indexed) {
|
||||
params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format);
|
||||
params.count = (regs.index_array.count / 4) * 6;
|
||||
params.index_buffer_offset = primitive_assembler.MakeQuadIndexed(
|
||||
regs.index_array.IndexStart(), regs.index_array.FormatSizeInBytes(),
|
||||
regs.index_array.count);
|
||||
params.base_vertex = static_cast<GLint>(regs.vb_element_base);
|
||||
} else {
|
||||
// MakeQuadArray always generates u32 indexes
|
||||
params.index_format = GL_UNSIGNED_INT;
|
||||
params.count = (regs.vertex_buffer.count / 4) * 6;
|
||||
params.index_buffer_offset =
|
||||
primitive_assembler.MakeQuadArray(regs.vertex_buffer.first, params.count);
|
||||
}
|
||||
return params;
|
||||
}
|
||||
|
||||
params.use_indexed = is_indexed;
|
||||
params.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology);
|
||||
|
||||
if (is_indexed) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Index);
|
||||
params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format);
|
||||
params.count = regs.index_array.count;
|
||||
params.index_buffer_offset =
|
||||
buffer_cache.UploadMemory(regs.index_array.IndexStart(), CalculateIndexBufferSize());
|
||||
params.base_vertex = static_cast<GLint>(regs.vb_element_base);
|
||||
} else {
|
||||
params.count = regs.vertex_buffer.count;
|
||||
params.vertex_first = regs.vertex_buffer.first;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupShaders() {
|
||||
MICROPROFILE_SCOPE(OpenGL_Shader);
|
||||
const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
|
||||
|
@ -256,6 +338,13 @@ std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
|
|||
return size;
|
||||
}
|
||||
|
||||
std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const {
|
||||
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
|
||||
|
||||
return static_cast<std::size_t>(regs.index_array.count) *
|
||||
static_cast<std::size_t>(regs.index_array.FormatSizeInBytes());
|
||||
}
|
||||
|
||||
bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) {
|
||||
accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays;
|
||||
DrawArrays();
|
||||
|
@ -459,16 +548,23 @@ void RasterizerOpenGL::DrawArrays() {
|
|||
|
||||
// Draw the vertex batch
|
||||
const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
|
||||
const u64 index_buffer_size{static_cast<u64>(regs.index_array.count) *
|
||||
static_cast<u64>(regs.index_array.FormatSizeInBytes())};
|
||||
|
||||
state.draw.vertex_buffer = buffer_cache.GetHandle();
|
||||
state.Apply();
|
||||
|
||||
std::size_t buffer_size = CalculateVertexArraysSize();
|
||||
|
||||
// Add space for index buffer (keeping in mind non-core primitives)
|
||||
switch (regs.draw.topology) {
|
||||
case Maxwell::PrimitiveTopology::Quads:
|
||||
buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) +
|
||||
primitive_assembler.CalculateQuadSize(regs.vertex_buffer.count);
|
||||
break;
|
||||
default:
|
||||
if (is_indexed) {
|
||||
buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + index_buffer_size;
|
||||
buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + CalculateIndexBufferSize();
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Uniform space for the 5 shader stages
|
||||
|
@ -482,20 +578,7 @@ void RasterizerOpenGL::DrawArrays() {
|
|||
buffer_cache.Map(buffer_size);
|
||||
|
||||
SetupVertexArrays();
|
||||
|
||||
// If indexed mode, copy the index buffer
|
||||
GLintptr index_buffer_offset = 0;
|
||||
if (is_indexed) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Index);
|
||||
|
||||
// Adjust the index buffer offset so it points to the first desired index.
|
||||
auto index_start = regs.index_array.StartAddress();
|
||||
index_start += static_cast<size_t>(regs.index_array.first) *
|
||||
static_cast<size_t>(regs.index_array.FormatSizeInBytes());
|
||||
|
||||
index_buffer_offset = buffer_cache.UploadMemory(index_start, index_buffer_size);
|
||||
}
|
||||
|
||||
DrawParameters params = SetupDraw();
|
||||
SetupShaders();
|
||||
|
||||
buffer_cache.Unmap();
|
||||
|
@ -503,31 +586,8 @@ void RasterizerOpenGL::DrawArrays() {
|
|||
shader_program_manager->ApplyTo(state);
|
||||
state.Apply();
|
||||
|
||||
const GLenum primitive_mode{MaxwellToGL::PrimitiveTopology(regs.draw.topology)};
|
||||
if (is_indexed) {
|
||||
const GLint base_vertex{static_cast<GLint>(regs.vb_element_base)};
|
||||
|
||||
if (gpu.state.current_instance > 0) {
|
||||
glDrawElementsInstancedBaseVertexBaseInstance(
|
||||
primitive_mode, regs.index_array.count,
|
||||
MaxwellToGL::IndexFormat(regs.index_array.format),
|
||||
reinterpret_cast<const void*>(index_buffer_offset), 1, base_vertex,
|
||||
gpu.state.current_instance);
|
||||
} else {
|
||||
glDrawElementsBaseVertex(primitive_mode, regs.index_array.count,
|
||||
MaxwellToGL::IndexFormat(regs.index_array.format),
|
||||
reinterpret_cast<const void*>(index_buffer_offset),
|
||||
base_vertex);
|
||||
}
|
||||
} else {
|
||||
if (gpu.state.current_instance > 0) {
|
||||
glDrawArraysInstancedBaseInstance(primitive_mode, regs.vertex_buffer.first,
|
||||
regs.vertex_buffer.count, 1,
|
||||
gpu.state.current_instance);
|
||||
} else {
|
||||
glDrawArrays(primitive_mode, regs.vertex_buffer.first, regs.vertex_buffer.count);
|
||||
}
|
||||
}
|
||||
// Execute draw call
|
||||
params.DispatchDraw();
|
||||
|
||||
// Disable scissor test
|
||||
state.scissor.enabled = false;
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include "video_core/rasterizer_cache.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/renderer_opengl/gl_buffer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_primitive_assembler.h"
|
||||
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_cache.h"
|
||||
|
@ -38,6 +39,7 @@ class EmuWindow;
|
|||
namespace OpenGL {
|
||||
|
||||
struct ScreenInfo;
|
||||
struct DrawParameters;
|
||||
|
||||
class RasterizerOpenGL : public VideoCore::RasterizerInterface {
|
||||
public:
|
||||
|
@ -192,12 +194,17 @@ private:
|
|||
static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
|
||||
OGLBufferCache buffer_cache;
|
||||
OGLFramebuffer framebuffer;
|
||||
PrimitiveAssembler primitive_assembler{buffer_cache};
|
||||
GLint uniform_buffer_alignment;
|
||||
|
||||
std::size_t CalculateVertexArraysSize() const;
|
||||
|
||||
std::size_t CalculateIndexBufferSize() const;
|
||||
|
||||
void SetupVertexArrays();
|
||||
|
||||
DrawParameters SetupDraw();
|
||||
|
||||
void SetupShaders();
|
||||
|
||||
enum class AccelDraw { Disabled, Arrays, Indexed };
|
||||
|
|
Loading…
Reference in a new issue