gl_rasterizer: implement AccelerateDrawBatch to emulate PICA shader on hardware
This commit is contained in:
parent
15d14be3cc
commit
9b448a0739
3 changed files with 376 additions and 36 deletions
|
@ -66,5 +66,10 @@ public:
|
||||||
ScreenInfo& screen_info) {
|
ScreenInfo& screen_info) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Attempt to draw using hardware shaders
|
||||||
|
virtual bool AccelerateDrawBatch(bool is_indexed) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
} // namespace VideoCore
|
} // namespace VideoCore
|
||||||
|
|
|
@ -12,6 +12,7 @@
|
||||||
#include "common/logging/log.h"
|
#include "common/logging/log.h"
|
||||||
#include "common/math_util.h"
|
#include "common/math_util.h"
|
||||||
#include "common/microprofile.h"
|
#include "common/microprofile.h"
|
||||||
|
#include "common/scope_exit.h"
|
||||||
#include "common/vector_math.h"
|
#include "common/vector_math.h"
|
||||||
#include "core/hw/gpu.h"
|
#include "core/hw/gpu.h"
|
||||||
#include "video_core/pica_state.h"
|
#include "video_core/pica_state.h"
|
||||||
|
@ -26,13 +27,17 @@
|
||||||
using PixelFormat = SurfaceParams::PixelFormat;
|
using PixelFormat = SurfaceParams::PixelFormat;
|
||||||
using SurfaceType = SurfaceParams::SurfaceType;
|
using SurfaceType = SurfaceParams::SurfaceType;
|
||||||
|
|
||||||
|
MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Array Setup", MP_RGB(255, 128, 0));
|
||||||
|
MICROPROFILE_DEFINE(OpenGL_VS, "OpenGL", "Vertex Shader Setup", MP_RGB(192, 128, 128));
|
||||||
|
MICROPROFILE_DEFINE(OpenGL_GS, "OpenGL", "Geometry Shader Setup", MP_RGB(128, 192, 128));
|
||||||
MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
|
MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
|
||||||
MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255));
|
MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255));
|
||||||
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
|
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
|
||||||
|
|
||||||
RasterizerOpenGL::RasterizerOpenGL()
|
RasterizerOpenGL::RasterizerOpenGL()
|
||||||
: shader_dirty(true), vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE),
|
: shader_dirty(true), vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE),
|
||||||
uniform_buffer(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE) {
|
uniform_buffer(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE),
|
||||||
|
index_buffer(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE) {
|
||||||
// Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0
|
// Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0
|
||||||
state.clip_distance[0] = true;
|
state.clip_distance[0] = true;
|
||||||
|
|
||||||
|
@ -46,13 +51,9 @@ RasterizerOpenGL::RasterizerOpenGL()
|
||||||
texture_cube_sampler.Create();
|
texture_cube_sampler.Create();
|
||||||
state.texture_cube_unit.sampler = texture_cube_sampler.sampler.handle;
|
state.texture_cube_unit.sampler = texture_cube_sampler.sampler.handle;
|
||||||
|
|
||||||
// Generate VBO, VAO and UBO
|
// Generate VAO
|
||||||
vertex_array.Create();
|
sw_vao.Create();
|
||||||
|
hw_vao.Create();
|
||||||
state.draw.vertex_array = vertex_array.handle;
|
|
||||||
state.draw.vertex_buffer = vertex_buffer.GetHandle();
|
|
||||||
state.draw.uniform_buffer = uniform_buffer.GetHandle();
|
|
||||||
state.Apply();
|
|
||||||
|
|
||||||
uniform_block_data.dirty = true;
|
uniform_block_data.dirty = true;
|
||||||
|
|
||||||
|
@ -67,10 +68,18 @@ RasterizerOpenGL::RasterizerOpenGL()
|
||||||
uniform_block_data.proctex_diff_lut_dirty = true;
|
uniform_block_data.proctex_diff_lut_dirty = true;
|
||||||
|
|
||||||
glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment);
|
glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment);
|
||||||
|
uniform_size_aligned_vs =
|
||||||
|
Common::AlignUp<size_t>(sizeof(VSUniformData), uniform_buffer_alignment);
|
||||||
|
uniform_size_aligned_gs =
|
||||||
|
Common::AlignUp<size_t>(sizeof(GSUniformData), uniform_buffer_alignment);
|
||||||
uniform_size_aligned_fs =
|
uniform_size_aligned_fs =
|
||||||
Common::AlignUp<size_t>(sizeof(UniformData), uniform_buffer_alignment);
|
Common::AlignUp<size_t>(sizeof(UniformData), uniform_buffer_alignment);
|
||||||
|
|
||||||
// Set vertex attributes
|
// Set vertex attributes for software shader path
|
||||||
|
state.draw.vertex_array = sw_vao.handle;
|
||||||
|
state.draw.vertex_buffer = vertex_buffer.GetHandle();
|
||||||
|
state.Apply();
|
||||||
|
|
||||||
glVertexAttribPointer(GLShader::ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE,
|
glVertexAttribPointer(GLShader::ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE,
|
||||||
sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position));
|
sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position));
|
||||||
glEnableVertexAttribArray(GLShader::ATTRIBUTE_POSITION);
|
glEnableVertexAttribArray(GLShader::ATTRIBUTE_POSITION);
|
||||||
|
@ -176,6 +185,11 @@ RasterizerOpenGL::RasterizerOpenGL()
|
||||||
glActiveTexture(TextureUnits::ProcTexDiffLUT.Enum());
|
glActiveTexture(TextureUnits::ProcTexDiffLUT.Enum());
|
||||||
glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, proctex_diff_lut_buffer.handle);
|
glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, proctex_diff_lut_buffer.handle);
|
||||||
|
|
||||||
|
// Bind index buffer for hardware shader path
|
||||||
|
state.draw.vertex_array = hw_vao.handle;
|
||||||
|
state.Apply();
|
||||||
|
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_buffer.GetHandle());
|
||||||
|
|
||||||
shader_program_manager =
|
shader_program_manager =
|
||||||
std::make_unique<ShaderProgramManager>(GLAD_GL_ARB_separate_shader_objects);
|
std::make_unique<ShaderProgramManager>(GLAD_GL_ARB_separate_shader_objects);
|
||||||
|
|
||||||
|
@ -258,10 +272,253 @@ void RasterizerOpenGL::AddTriangle(const Pica::Shader::OutputVertex& v0,
|
||||||
vertex_batch.emplace_back(v2, AreQuaternionsOpposite(v0.quat, v2.quat));
|
vertex_batch.emplace_back(v2, AreQuaternionsOpposite(v0.quat, v2.quat));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static constexpr std::array<GLenum, 4> vs_attrib_types{
|
||||||
|
GL_BYTE, // VertexAttributeFormat::BYTE
|
||||||
|
GL_UNSIGNED_BYTE, // VertexAttributeFormat::UBYTE
|
||||||
|
GL_SHORT, // VertexAttributeFormat::SHORT
|
||||||
|
GL_FLOAT // VertexAttributeFormat::FLOAT
|
||||||
|
};
|
||||||
|
|
||||||
|
struct VertexArrayInfo {
|
||||||
|
u32 vs_input_index_min;
|
||||||
|
u32 vs_input_index_max;
|
||||||
|
u32 vs_input_size;
|
||||||
|
};
|
||||||
|
|
||||||
|
RasterizerOpenGL::VertexArrayInfo RasterizerOpenGL::AnalyzeVertexArray(bool is_indexed) {
|
||||||
|
const auto& regs = Pica::g_state.regs;
|
||||||
|
const auto& vertex_attributes = regs.pipeline.vertex_attributes;
|
||||||
|
|
||||||
|
u32 vertex_min;
|
||||||
|
u32 vertex_max;
|
||||||
|
if (is_indexed) {
|
||||||
|
const auto& index_info = regs.pipeline.index_array;
|
||||||
|
PAddr address = vertex_attributes.GetPhysicalBaseAddress() + index_info.offset;
|
||||||
|
const u8* index_address_8 = Memory::GetPhysicalPointer(address);
|
||||||
|
const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8);
|
||||||
|
bool index_u16 = index_info.format != 0;
|
||||||
|
|
||||||
|
vertex_min = 0xFFFF;
|
||||||
|
vertex_max = 0;
|
||||||
|
std::size_t size = regs.pipeline.num_vertices * (index_u16 ? 2 : 1);
|
||||||
|
res_cache.FlushRegion(address, size, nullptr);
|
||||||
|
for (u32 index = 0; index < regs.pipeline.num_vertices; ++index) {
|
||||||
|
u32 vertex = index_u16 ? index_address_16[index] : index_address_8[index];
|
||||||
|
vertex_min = std::min(vertex_min, vertex);
|
||||||
|
vertex_max = std::max(vertex_max, vertex);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
vertex_min = regs.pipeline.vertex_offset;
|
||||||
|
vertex_max = regs.pipeline.vertex_offset + regs.pipeline.num_vertices - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 vertex_num = vertex_max - vertex_min + 1;
|
||||||
|
u32 vs_input_size = 0;
|
||||||
|
for (auto& loader : vertex_attributes.attribute_loaders) {
|
||||||
|
if (loader.component_count != 0) {
|
||||||
|
vs_input_size += loader.byte_count * vertex_num;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {vertex_min, vertex_max, vs_input_size};
|
||||||
|
}
|
||||||
|
|
||||||
|
void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset,
|
||||||
|
GLuint vs_input_index_min, GLuint vs_input_index_max) {
|
||||||
|
MICROPROFILE_SCOPE(OpenGL_VAO);
|
||||||
|
const auto& regs = Pica::g_state.regs;
|
||||||
|
const auto& vertex_attributes = regs.pipeline.vertex_attributes;
|
||||||
|
PAddr base_address = vertex_attributes.GetPhysicalBaseAddress();
|
||||||
|
|
||||||
|
state.draw.vertex_array = hw_vao.handle;
|
||||||
|
state.draw.vertex_buffer = vertex_buffer.GetHandle();
|
||||||
|
state.Apply();
|
||||||
|
|
||||||
|
std::array<bool, 16> enable_attributes{};
|
||||||
|
|
||||||
|
for (const auto& loader : vertex_attributes.attribute_loaders) {
|
||||||
|
if (loader.component_count == 0 || loader.byte_count == 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 offset = 0;
|
||||||
|
for (u32 comp = 0; comp < loader.component_count && comp < 12; ++comp) {
|
||||||
|
u32 attribute_index = loader.GetComponent(comp);
|
||||||
|
if (attribute_index < 12) {
|
||||||
|
if (vertex_attributes.GetNumElements(attribute_index) != 0) {
|
||||||
|
offset = Common::AlignUp(
|
||||||
|
offset, vertex_attributes.GetElementSizeInBytes(attribute_index));
|
||||||
|
|
||||||
|
u32 input_reg = regs.vs.GetRegisterForAttribute(attribute_index);
|
||||||
|
GLint size = vertex_attributes.GetNumElements(attribute_index);
|
||||||
|
GLenum type = vs_attrib_types[static_cast<u32>(
|
||||||
|
vertex_attributes.GetFormat(attribute_index))];
|
||||||
|
GLsizei stride = loader.byte_count;
|
||||||
|
glVertexAttribPointer(input_reg, size, type, GL_FALSE, stride,
|
||||||
|
reinterpret_cast<GLvoid*>(buffer_offset + offset));
|
||||||
|
enable_attributes[input_reg] = true;
|
||||||
|
|
||||||
|
offset += vertex_attributes.GetStride(attribute_index);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings,
|
||||||
|
// respectively
|
||||||
|
offset = Common::AlignUp(offset, 4);
|
||||||
|
offset += (attribute_index - 11) * 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PAddr data_addr =
|
||||||
|
base_address + loader.data_offset + (vs_input_index_min * loader.byte_count);
|
||||||
|
|
||||||
|
u32 vertex_num = vs_input_index_max - vs_input_index_min + 1;
|
||||||
|
u32 data_size = loader.byte_count * vertex_num;
|
||||||
|
|
||||||
|
res_cache.FlushRegion(data_addr, data_size, nullptr);
|
||||||
|
std::memcpy(array_ptr, Memory::GetPhysicalPointer(data_addr), data_size);
|
||||||
|
|
||||||
|
array_ptr += data_size;
|
||||||
|
buffer_offset += data_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (std::size_t i = 0; i < enable_attributes.size(); ++i) {
|
||||||
|
if (enable_attributes[i] != hw_vao_enabled_attributes[i]) {
|
||||||
|
if (enable_attributes[i]) {
|
||||||
|
glEnableVertexAttribArray(i);
|
||||||
|
} else {
|
||||||
|
glDisableVertexAttribArray(i);
|
||||||
|
}
|
||||||
|
hw_vao_enabled_attributes[i] = enable_attributes[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (vertex_attributes.IsDefaultAttribute(i)) {
|
||||||
|
u32 reg = regs.vs.GetRegisterForAttribute(i);
|
||||||
|
if (!enable_attributes[reg]) {
|
||||||
|
const auto& attr = Pica::g_state.input_default_attributes.attr[i];
|
||||||
|
glVertexAttrib4f(reg, attr.x.ToFloat32(), attr.y.ToFloat32(), attr.z.ToFloat32(),
|
||||||
|
attr.w.ToFloat32());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool RasterizerOpenGL::SetupVertexShader() {
|
||||||
|
MICROPROFILE_SCOPE(OpenGL_VS);
|
||||||
|
GLShader::PicaVSConfig vs_config(Pica::g_state.regs, Pica::g_state.vs);
|
||||||
|
return shader_program_manager->UseProgrammableVertexShader(vs_config, Pica::g_state.vs);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool RasterizerOpenGL::SetupGeometryShader() {
|
||||||
|
MICROPROFILE_SCOPE(OpenGL_GS);
|
||||||
|
const auto& regs = Pica::g_state.regs;
|
||||||
|
if (regs.pipeline.use_gs == Pica::PipelineRegs::UseGS::No) {
|
||||||
|
GLShader::PicaFixedGSConfig gs_config(regs);
|
||||||
|
shader_program_manager->UseFixedGeometryShader(gs_config);
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
GLShader::PicaGSConfig gs_config(regs, Pica::g_state.gs);
|
||||||
|
return shader_program_manager->UseProgrammableGeometryShader(gs_config, Pica::g_state.gs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) {
|
||||||
|
const auto& regs = Pica::g_state.regs;
|
||||||
|
if (regs.pipeline.use_gs != Pica::PipelineRegs::UseGS::No) {
|
||||||
|
if (regs.pipeline.gs_config.mode != Pica::PipelineRegs::GSMode::Point) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (regs.pipeline.triangle_topology != Pica::PipelineRegs::TriangleTopology::Shader) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!SetupVertexShader())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (!SetupGeometryShader())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
Draw(true, is_indexed);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static GLenum GetCurrentPrimitiveMode(bool use_gs) {
|
||||||
|
const auto& regs = Pica::g_state.regs;
|
||||||
|
if (use_gs) {
|
||||||
|
switch ((regs.gs.max_input_attribute_index + 1) /
|
||||||
|
(regs.pipeline.vs_outmap_total_minus_1_a + 1)) {
|
||||||
|
case 1:
|
||||||
|
return GL_POINTS;
|
||||||
|
case 2:
|
||||||
|
return GL_LINES;
|
||||||
|
case 4:
|
||||||
|
return GL_LINES_ADJACENCY;
|
||||||
|
case 3:
|
||||||
|
return GL_TRIANGLES;
|
||||||
|
case 6:
|
||||||
|
return GL_TRIANGLES_ADJACENCY;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
switch (regs.pipeline.triangle_topology) {
|
||||||
|
case Pica::PipelineRegs::TriangleTopology::Shader:
|
||||||
|
case Pica::PipelineRegs::TriangleTopology::List:
|
||||||
|
return GL_TRIANGLES;
|
||||||
|
case Pica::PipelineRegs::TriangleTopology::Fan:
|
||||||
|
return GL_TRIANGLE_FAN;
|
||||||
|
case Pica::PipelineRegs::TriangleTopology::Strip:
|
||||||
|
return GL_TRIANGLE_STRIP;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void RasterizerOpenGL::AccelerateDrawBatchInternal(bool is_indexed, bool use_gs) {
|
||||||
|
const auto& regs = Pica::g_state.regs;
|
||||||
|
GLenum primitive_mode = GetCurrentPrimitiveMode(use_gs);
|
||||||
|
|
||||||
|
auto [vs_input_index_min, vs_input_index_max, vs_input_size] = AnalyzeVertexArray(is_indexed);
|
||||||
|
|
||||||
|
state.draw.vertex_buffer = vertex_buffer.GetHandle();
|
||||||
|
state.Apply();
|
||||||
|
|
||||||
|
u8* buffer_ptr;
|
||||||
|
GLintptr buffer_offset;
|
||||||
|
std::tie(buffer_ptr, buffer_offset, std::ignore) = vertex_buffer.Map(vs_input_size, 4);
|
||||||
|
SetupVertexArray(buffer_ptr, buffer_offset, vs_input_index_min, vs_input_index_max);
|
||||||
|
vertex_buffer.Unmap(vs_input_size);
|
||||||
|
|
||||||
|
shader_program_manager->ApplyTo(state);
|
||||||
|
state.Apply();
|
||||||
|
|
||||||
|
if (is_indexed) {
|
||||||
|
bool index_u16 = regs.pipeline.index_array.format != 0;
|
||||||
|
std::size_t index_buffer_size = regs.pipeline.num_vertices * (index_u16 ? 2 : 1);
|
||||||
|
const u8* index_data =
|
||||||
|
Memory::GetPhysicalPointer(regs.pipeline.vertex_attributes.GetPhysicalBaseAddress() +
|
||||||
|
regs.pipeline.index_array.offset);
|
||||||
|
std::tie(buffer_ptr, buffer_offset, std::ignore) = index_buffer.Map(index_buffer_size, 4);
|
||||||
|
std::memcpy(buffer_ptr, index_data, index_buffer_size);
|
||||||
|
index_buffer.Unmap(index_buffer_size);
|
||||||
|
|
||||||
|
glDrawRangeElementsBaseVertex(
|
||||||
|
primitive_mode, vs_input_index_min, vs_input_index_max, regs.pipeline.num_vertices,
|
||||||
|
index_u16 ? GL_UNSIGNED_SHORT : GL_UNSIGNED_BYTE,
|
||||||
|
reinterpret_cast<const void*>(buffer_offset), -static_cast<GLint>(vs_input_index_min));
|
||||||
|
} else {
|
||||||
|
glDrawArrays(primitive_mode, 0, regs.pipeline.num_vertices);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::DrawTriangles() {
|
void RasterizerOpenGL::DrawTriangles() {
|
||||||
if (vertex_batch.empty())
|
if (vertex_batch.empty())
|
||||||
return;
|
return;
|
||||||
|
Draw(false, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
void RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) {
|
||||||
MICROPROFILE_SCOPE(OpenGL_Drawing);
|
MICROPROFILE_SCOPE(OpenGL_Drawing);
|
||||||
const auto& regs = Pica::g_state.regs;
|
const auto& regs = Pica::g_state.regs;
|
||||||
|
|
||||||
|
@ -474,7 +731,8 @@ void RasterizerOpenGL::DrawTriangles() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sync the uniform data
|
// Sync the uniform data
|
||||||
UploadUniforms();
|
const bool use_gs = regs.pipeline.use_gs == Pica::PipelineRegs::UseGS::Yes;
|
||||||
|
UploadUniforms(accelerate, use_gs);
|
||||||
|
|
||||||
// Viewport can have negative offsets or larger
|
// Viewport can have negative offsets or larger
|
||||||
// dimensions than our framebuffer sub-rect.
|
// dimensions than our framebuffer sub-rect.
|
||||||
|
@ -487,22 +745,30 @@ void RasterizerOpenGL::DrawTriangles() {
|
||||||
state.scissor.height = draw_rect.GetHeight();
|
state.scissor.height = draw_rect.GetHeight();
|
||||||
state.Apply();
|
state.Apply();
|
||||||
|
|
||||||
shader_program_manager->UseTrivialVertexShader();
|
|
||||||
shader_program_manager->UseTrivialGeometryShader();
|
|
||||||
shader_program_manager->ApplyTo(state);
|
|
||||||
state.Apply();
|
|
||||||
|
|
||||||
// Draw the vertex batch
|
// Draw the vertex batch
|
||||||
size_t max_vertices = 3 * (vertex_buffer.GetSize() / (3 * sizeof(HardwareVertex)));
|
if (accelerate) {
|
||||||
for (size_t base_vertex = 0; base_vertex < vertex_batch.size(); base_vertex += max_vertices) {
|
AccelerateDrawBatchInternal(is_indexed, use_gs);
|
||||||
size_t vertices = std::min(max_vertices, vertex_batch.size() - base_vertex);
|
} else {
|
||||||
size_t vertex_size = vertices * sizeof(HardwareVertex);
|
state.draw.vertex_array = sw_vao.handle;
|
||||||
u8* vbo;
|
state.draw.vertex_buffer = vertex_buffer.GetHandle();
|
||||||
GLintptr offset;
|
shader_program_manager->UseTrivialVertexShader();
|
||||||
std::tie(vbo, offset, std::ignore) = vertex_buffer.Map(vertex_size, sizeof(HardwareVertex));
|
shader_program_manager->UseTrivialGeometryShader();
|
||||||
memcpy(vbo, vertex_batch.data() + base_vertex, vertex_size);
|
shader_program_manager->ApplyTo(state);
|
||||||
vertex_buffer.Unmap(vertex_size);
|
state.Apply();
|
||||||
glDrawArrays(GL_TRIANGLES, offset / sizeof(HardwareVertex), (GLsizei)vertices);
|
|
||||||
|
std::size_t max_vertices = 3 * (VERTEX_BUFFER_SIZE / (3 * sizeof(HardwareVertex)));
|
||||||
|
for (std::size_t base_vertex = 0; base_vertex < vertex_batch.size();
|
||||||
|
base_vertex += max_vertices) {
|
||||||
|
std::size_t vertices = std::min(max_vertices, vertex_batch.size() - base_vertex);
|
||||||
|
std::size_t vertex_size = vertices * sizeof(HardwareVertex);
|
||||||
|
u8* vbo;
|
||||||
|
GLintptr offset;
|
||||||
|
std::tie(vbo, offset, std::ignore) =
|
||||||
|
vertex_buffer.Map(vertex_size, sizeof(HardwareVertex));
|
||||||
|
std::memcpy(vbo, vertex_batch.data() + base_vertex, vertex_size);
|
||||||
|
vertex_buffer.Unmap(vertex_size);
|
||||||
|
glDrawArrays(GL_TRIANGLES, offset / sizeof(HardwareVertex), (GLsizei)vertices);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Disable scissor test
|
// Disable scissor test
|
||||||
|
@ -1648,18 +1914,53 @@ void RasterizerOpenGL::SyncLightDistanceAttenuationScale(int light_index) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::UploadUniforms() {
|
void RasterizerOpenGL::UploadUniforms(bool accelerate_draw, bool use_gs) {
|
||||||
if (!uniform_block_data.dirty)
|
// glBindBufferRange below also changes the generic buffer binding point, so we sync the state
|
||||||
|
// first
|
||||||
|
state.draw.uniform_buffer = uniform_buffer.GetHandle();
|
||||||
|
state.Apply();
|
||||||
|
|
||||||
|
bool sync_vs = accelerate_draw;
|
||||||
|
bool sync_gs = accelerate_draw && use_gs;
|
||||||
|
bool sync_fs = uniform_block_data.dirty;
|
||||||
|
|
||||||
|
if (!sync_vs && !sync_gs && !sync_fs)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
size_t uniform_size = uniform_size_aligned_fs;
|
size_t uniform_size =
|
||||||
|
uniform_size_aligned_vs + uniform_size_aligned_gs + uniform_size_aligned_fs;
|
||||||
|
size_t used_bytes = 0;
|
||||||
u8* uniforms;
|
u8* uniforms;
|
||||||
GLintptr offset;
|
GLintptr offset;
|
||||||
std::tie(uniforms, offset, std::ignore) =
|
bool invalidate;
|
||||||
|
std::tie(uniforms, offset, invalidate) =
|
||||||
uniform_buffer.Map(uniform_size, uniform_buffer_alignment);
|
uniform_buffer.Map(uniform_size, uniform_buffer_alignment);
|
||||||
std::memcpy(uniforms, &uniform_block_data.data, sizeof(UniformData));
|
|
||||||
uniform_buffer.Unmap(uniform_size);
|
if (sync_vs) {
|
||||||
glBindBufferRange(GL_UNIFORM_BUFFER, 0, uniform_buffer.GetHandle(), offset,
|
VSUniformData vs_uniforms;
|
||||||
sizeof(UniformData));
|
vs_uniforms.uniforms.SetFromRegs(Pica::g_state.regs.vs, Pica::g_state.vs);
|
||||||
uniform_block_data.dirty = false;
|
std::memcpy(uniforms + used_bytes, &vs_uniforms, sizeof(vs_uniforms));
|
||||||
|
glBindBufferRange(GL_UNIFORM_BUFFER, static_cast<GLuint>(UniformBindings::VS),
|
||||||
|
uniform_buffer.GetHandle(), offset + used_bytes, sizeof(VSUniformData));
|
||||||
|
used_bytes += uniform_size_aligned_vs;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sync_gs) {
|
||||||
|
GSUniformData gs_uniforms;
|
||||||
|
gs_uniforms.uniforms.SetFromRegs(Pica::g_state.regs.gs, Pica::g_state.gs);
|
||||||
|
std::memcpy(uniforms + used_bytes, &gs_uniforms, sizeof(gs_uniforms));
|
||||||
|
glBindBufferRange(GL_UNIFORM_BUFFER, static_cast<GLuint>(UniformBindings::GS),
|
||||||
|
uniform_buffer.GetHandle(), offset + used_bytes, sizeof(GSUniformData));
|
||||||
|
used_bytes += uniform_size_aligned_gs;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sync_fs || invalidate) {
|
||||||
|
std::memcpy(uniforms + used_bytes, &uniform_block_data.data, sizeof(UniformData));
|
||||||
|
glBindBufferRange(GL_UNIFORM_BUFFER, static_cast<GLuint>(UniformBindings::Common),
|
||||||
|
uniform_buffer.GetHandle(), offset + used_bytes, sizeof(UniformData));
|
||||||
|
uniform_block_data.dirty = false;
|
||||||
|
used_bytes += uniform_size_aligned_fs;
|
||||||
|
}
|
||||||
|
|
||||||
|
uniform_buffer.Unmap(used_bytes);
|
||||||
}
|
}
|
||||||
|
|
|
@ -50,6 +50,7 @@ public:
|
||||||
bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override;
|
bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override;
|
||||||
bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr,
|
bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr,
|
||||||
u32 pixel_stride, ScreenInfo& screen_info) override;
|
u32 pixel_stride, ScreenInfo& screen_info) override;
|
||||||
|
bool AccelerateDrawBatch(bool is_indexed) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
struct SamplerInfo {
|
struct SamplerInfo {
|
||||||
|
@ -73,6 +74,7 @@ private:
|
||||||
|
|
||||||
/// Structure that the hardware rendered vertices are composed of
|
/// Structure that the hardware rendered vertices are composed of
|
||||||
struct HardwareVertex {
|
struct HardwareVertex {
|
||||||
|
HardwareVertex() = default;
|
||||||
HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion) {
|
HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion) {
|
||||||
position[0] = v.pos.x.ToFloat32();
|
position[0] = v.pos.x.ToFloat32();
|
||||||
position[1] = v.pos.y.ToFloat32();
|
position[1] = v.pos.y.ToFloat32();
|
||||||
|
@ -216,7 +218,32 @@ private:
|
||||||
void SyncLightDistanceAttenuationScale(int light_index);
|
void SyncLightDistanceAttenuationScale(int light_index);
|
||||||
|
|
||||||
/// Upload the uniform blocks to the uniform buffer object
|
/// Upload the uniform blocks to the uniform buffer object
|
||||||
void UploadUniforms();
|
void UploadUniforms(bool accelerate_draw, bool use_gs);
|
||||||
|
|
||||||
|
/// Generic draw function for DrawTriangles and AccelerateDrawBatch
|
||||||
|
void Draw(bool accelerate, bool is_indexed);
|
||||||
|
|
||||||
|
/// Internal implementation for AccelerateDrawBatch
|
||||||
|
void AccelerateDrawBatchInternal(bool is_indexed, bool use_gs);
|
||||||
|
|
||||||
|
struct VertexArrayInfo {
|
||||||
|
u32 vs_input_index_min;
|
||||||
|
u32 vs_input_index_max;
|
||||||
|
u32 vs_input_size;
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Retrieve the range and the size of the input vertex
|
||||||
|
VertexArrayInfo AnalyzeVertexArray(bool is_indexed);
|
||||||
|
|
||||||
|
/// Setup vertex array for AccelerateDrawBatch
|
||||||
|
void SetupVertexArray(u8* array_ptr, GLintptr buffer_offset, GLuint vs_input_index_min,
|
||||||
|
GLuint vs_input_index_max);
|
||||||
|
|
||||||
|
/// Setup vertex shader for AccelerateDrawBatch
|
||||||
|
bool SetupVertexShader();
|
||||||
|
|
||||||
|
/// Setup geometry shader for AccelerateDrawBatch
|
||||||
|
bool SetupGeometryShader();
|
||||||
|
|
||||||
OpenGLState state;
|
OpenGLState state;
|
||||||
|
|
||||||
|
@ -242,14 +269,21 @@ private:
|
||||||
|
|
||||||
// They shall be big enough for about one frame.
|
// They shall be big enough for about one frame.
|
||||||
static constexpr size_t VERTEX_BUFFER_SIZE = 32 * 1024 * 1024;
|
static constexpr size_t VERTEX_BUFFER_SIZE = 32 * 1024 * 1024;
|
||||||
|
static constexpr size_t INDEX_BUFFER_SIZE = 1 * 1024 * 1024;
|
||||||
static constexpr size_t UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024;
|
static constexpr size_t UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024;
|
||||||
|
|
||||||
|
OGLVertexArray sw_vao; // VAO for software shader draw
|
||||||
|
OGLVertexArray hw_vao; // VAO for hardware shader / accelerate draw
|
||||||
|
std::array<bool, 16> hw_vao_enabled_attributes{};
|
||||||
|
|
||||||
std::array<SamplerInfo, 3> texture_samplers;
|
std::array<SamplerInfo, 3> texture_samplers;
|
||||||
OGLVertexArray vertex_array;
|
|
||||||
OGLStreamBuffer vertex_buffer;
|
OGLStreamBuffer vertex_buffer;
|
||||||
OGLStreamBuffer uniform_buffer;
|
OGLStreamBuffer uniform_buffer;
|
||||||
|
OGLStreamBuffer index_buffer;
|
||||||
OGLFramebuffer framebuffer;
|
OGLFramebuffer framebuffer;
|
||||||
GLint uniform_buffer_alignment;
|
GLint uniform_buffer_alignment;
|
||||||
|
size_t uniform_size_aligned_vs;
|
||||||
|
size_t uniform_size_aligned_gs;
|
||||||
size_t uniform_size_aligned_fs;
|
size_t uniform_size_aligned_fs;
|
||||||
|
|
||||||
SamplerInfo texture_cube_sampler;
|
SamplerInfo texture_cube_sampler;
|
||||||
|
|
Loading…
Reference in a new issue