From 46177901b8d76f16223ac2716abcd6393faae25a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 23 May 2019 20:19:56 -0300 Subject: [PATCH 1/2] gl_device: Add test to detect broken component indexing Component indexing on AMD's proprietary driver is broken. This commit adds a test to detect when we are on a driver that can't successfully manage component indexing. It dispatches a dummy draw with just one vertex shader that writes to an indexed SSBO from the GPU with data sent through uniforms, it then reads that data from the CPU and compares the expected output. --- src/video_core/renderer_opengl/gl_device.cpp | 54 ++++++++++++++++++++ src/video_core/renderer_opengl/gl_device.h | 6 +++ 2 files changed, 60 insertions(+) diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 38497678a3..5f86574cd3 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -2,11 +2,14 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include #include #include "common/logging/log.h" +#include "common/scope_exit.h" #include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" namespace OpenGL { @@ -24,6 +27,7 @@ Device::Device() { max_vertex_attributes = GetInteger(GL_MAX_VERTEX_ATTRIBS); max_varyings = GetInteger(GL_MAX_VARYING_VECTORS); has_variable_aoffi = TestVariableAoffi(); + has_component_indexing_bug = TestComponentIndexingBug(); } Device::Device(std::nullptr_t) { @@ -31,6 +35,7 @@ Device::Device(std::nullptr_t) { max_vertex_attributes = 16; max_varyings = 15; has_variable_aoffi = true; + has_component_indexing_bug = false; } bool Device::TestVariableAoffi() { @@ -51,4 +56,53 @@ void main() { return supported; } +bool Device::TestComponentIndexingBug() { + constexpr char log_message[] = "Renderer_ComponentIndexingBug: {}"; + const GLchar* COMPONENT_TEST = R"(#version 430 core +layout (std430, binding = 0) buffer OutputBuffer { + uint output_value; +}; +layout (std140, binding = 0) uniform InputBuffer { + uvec4 input_value[4096]; +}; +layout (location = 0) uniform uint idx; +void main() { + output_value = input_value[idx >> 2][idx & 3]; +})"; + const GLuint shader{glCreateShaderProgramv(GL_VERTEX_SHADER, 1, &COMPONENT_TEST)}; + SCOPE_EXIT({ glDeleteProgram(shader); }); + glUseProgram(shader); + + OGLVertexArray vao; + vao.Create(); + glBindVertexArray(vao.handle); + + constexpr std::array values{0, 0, 0, 0, 0x1236327, 0x985482, 0x872753, 0x2378432}; + OGLBuffer ubo; + ubo.Create(); + glNamedBufferData(ubo.handle, sizeof(values), values.data(), GL_STATIC_DRAW); + glBindBufferBase(GL_UNIFORM_BUFFER, 0, ubo.handle); + + OGLBuffer ssbo; + ssbo.Create(); + glNamedBufferStorage(ssbo.handle, sizeof(GLuint), nullptr, GL_CLIENT_STORAGE_BIT); + + for (GLuint index = 4; index < 8; ++index) { + glInvalidateBufferData(ssbo.handle); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, ssbo.handle); + + glProgramUniform1ui(shader, 0, index); + glDrawArrays(GL_POINTS, 0, 1); + + GLuint result; + glGetNamedBufferSubData(ssbo.handle, 0, sizeof(result), &result); + if (result != values.at(index)) { + LOG_INFO(Render_OpenGL, log_message, true); + return true; + } + } + LOG_INFO(Render_OpenGL, log_message, false); + return false; +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index de8490682c..8c8c937600 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -30,13 +30,19 @@ public: return has_variable_aoffi; } + bool HasComponentIndexingBug() const { + return has_component_indexing_bug; + } + private: static bool TestVariableAoffi(); + static bool TestComponentIndexingBug(); std::size_t uniform_buffer_alignment{}; u32 max_vertex_attributes{}; u32 max_varyings{}; bool has_variable_aoffi{}; + bool has_component_indexing_bug{}; }; } // namespace OpenGL From d8827b07b5c823903609e00b8f0e1c27fa34fb91 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 23 May 2019 20:27:54 -0300 Subject: [PATCH 2/2] gl_shader_decompiler: Use an if based cbuf indexing for broken drivers The following code is broken on AMD's proprietary GLSL compiler: ```glsl uint idx = ...; vec4 values = ...; float some_value = values[idx & 3]; ``` It index the wrong components, to fix this the following pessimized code is emitted when that bug is present: ```glsl uint idx = ...; vec4 values = ...; float some_value; if ((idx & 3) == 0) some_value = values.x; if ((idx & 3) == 1) some_value = values.y; if ((idx & 3) == 2) some_value = values.z; if ((idx & 3) == 3) some_value = values.w; ``` --- .../renderer_opengl/gl_shader_decompiler.cpp | 23 ++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 6d4658c8bf..4a40e4843c 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -577,9 +577,26 @@ private: if (std::holds_alternative(*offset)) { // Indirect access const std::string final_offset = code.GenerateTemporary(); - code.AddLine("uint {} = (ftou({}) / 4);", final_offset, Visit(offset)); - return fmt::format("{}[{} / 4][{} % 4]", GetConstBuffer(cbuf->GetIndex()), - final_offset, final_offset); + code.AddLine("uint {} = ftou({}) >> 2;", final_offset, Visit(offset)); + + if (!device.HasComponentIndexingBug()) { + return fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()), + final_offset, final_offset); + } + + // AMD's proprietary GLSL compiler emits ill code for variable component access. + // To bypass this driver bug generate 4 ifs, one per each component. + const std::string pack = code.GenerateTemporary(); + code.AddLine("vec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()), + final_offset); + + const std::string result = code.GenerateTemporary(); + code.AddLine("float {};", result); + for (u32 swizzle = 0; swizzle < 4; ++swizzle) { + code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, + pack, GetSwizzle(swizzle)); + } + return result; } UNREACHABLE_MSG("Unmanaged offset node type");