From 186873420f9f10c65814db8d3a3388cbd21f8444 Mon Sep 17 00:00:00 2001 From: Subv Date: Thu, 20 Aug 2015 10:10:35 -0500 Subject: [PATCH 01/10] GPU/Rasterizer: Corrected the stencil implementation. Verified the behavior with hardware tests. --- src/video_core/pica.h | 20 ++++++++++++++----- src/video_core/rasterizer.cpp | 37 +++++++++++++++++++++++------------ 2 files changed, 39 insertions(+), 18 deletions(-) diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 58b924f9e..6383e5d56 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -441,8 +441,12 @@ struct Regs { }; enum class StencilAction : u32 { - Keep = 0, - Xor = 5, + Keep = 0, + + Replace = 2, + Increment = 3, + Decrement = 4, + Invert = 5 }; struct { @@ -481,23 +485,29 @@ struct Regs { struct { union { + // Raw value of this register + u32 raw_func; + // If true, enable stencil testing BitField< 0, 1, u32> enable; // Comparison operation for stencil testing BitField< 4, 3, CompareFunc> func; - // Value to calculate the new stencil value from - BitField< 8, 8, u32> replacement_value; + // Mask used to control writing to the stencil buffer + BitField< 8, 8, u32> write_mask; // Value to compare against for stencil testing BitField<16, 8, u32> reference_value; // Mask to apply on stencil test inputs - BitField<24, 8, u32> mask; + BitField<24, 8, u32> input_mask; }; union { + // Raw value of this register + u32 raw_op; + // Action to perform when the stencil test fails BitField< 0, 3, StencilAction> action_stencil_fail; diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index b83798b0f..000b4542b 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -215,14 +215,25 @@ static void SetStencil(int x, int y, u8 value) { } } -// TODO: Should the stencil mask be applied to the "dest" or "ref" operands? Most likely not! -static u8 PerformStencilAction(Regs::StencilAction action, u8 dest, u8 ref) { +// TODO: Should the stencil mask be applied to the "old_stencil" or "ref" operands? Most likely not! +static u8 PerformStencilAction(Regs::StencilAction action, u8 old_stencil, u8 ref) { switch (action) { case Regs::StencilAction::Keep: - return dest; + return old_stencil; - case Regs::StencilAction::Xor: - return dest ^ ref; + case Regs::StencilAction::Replace: + return ref; + + case Regs::StencilAction::Increment: + // Saturated increment + return std::min(old_stencil, 254) + 1; + + case Regs::StencilAction::Decrement: + // Saturated decrement + return std::max(old_stencil, 1) - 1; + + case Regs::StencilAction::Invert: + return ~old_stencil; default: LOG_CRITICAL(HW_GPU, "Unknown stencil action %x", (int)action); @@ -782,8 +793,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, u8 old_stencil = 0; if (stencil_action_enable) { old_stencil = GetStencil(x >> 4, y >> 4); - u8 dest = old_stencil & stencil_test.mask; - u8 ref = stencil_test.reference_value & stencil_test.mask; + u8 dest = old_stencil & stencil_test.input_mask; + u8 ref = stencil_test.reference_value & stencil_test.input_mask; bool pass = false; switch (stencil_test.func) { @@ -821,8 +832,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, } if (!pass) { - u8 new_stencil = PerformStencilAction(stencil_test.action_stencil_fail, old_stencil, stencil_test.replacement_value); - SetStencil(x >> 4, y >> 4, new_stencil); + u8 new_stencil = PerformStencilAction(stencil_test.action_stencil_fail, old_stencil, stencil_test.reference_value); + SetStencil(x >> 4, y >> 4, (new_stencil & stencil_test.write_mask) | (old_stencil & ~stencil_test.write_mask)); continue; } } @@ -873,8 +884,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, if (!pass) { if (stencil_action_enable) { - u8 new_stencil = PerformStencilAction(stencil_test.action_depth_fail, old_stencil, stencil_test.replacement_value); - SetStencil(x >> 4, y >> 4, new_stencil); + u8 new_stencil = PerformStencilAction(stencil_test.action_depth_fail, old_stencil, stencil_test.reference_value); + SetStencil(x >> 4, y >> 4, (new_stencil & stencil_test.write_mask) | (old_stencil & ~stencil_test.write_mask)); } continue; } @@ -884,8 +895,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, if (stencil_action_enable) { // TODO: What happens if stencil testing is enabled, but depth testing is not? Will stencil get updated anyway? - u8 new_stencil = PerformStencilAction(stencil_test.action_depth_pass, old_stencil, stencil_test.replacement_value); - SetStencil(x >> 4, y >> 4, new_stencil); + u8 new_stencil = PerformStencilAction(stencil_test.action_depth_pass, old_stencil, stencil_test.reference_value); + SetStencil(x >> 4, y >> 4, (new_stencil & stencil_test.write_mask) | (old_stencil & ~stencil_test.write_mask)); } } From 46f660a789ff02f9cd86600ed82e0936b049b176 Mon Sep 17 00:00:00 2001 From: Subv Date: Thu, 20 Aug 2015 10:11:09 -0500 Subject: [PATCH 02/10] GLRasterizer: Implemented stencil testing in the hw renderer. --- .../renderer_opengl/gl_rasterizer.cpp | 13 ++++++++++-- src/video_core/renderer_opengl/gl_state.cpp | 9 ++++++++ src/video_core/renderer_opengl/gl_state.h | 3 +++ src/video_core/renderer_opengl/pica_to_gl.h | 21 +++++++++++++++++++ 4 files changed, 44 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 9f1552adf..962c659e0 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -268,7 +268,8 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { break; // Stencil test - case PICA_REG_INDEX(output_merger.stencil_test): + case PICA_REG_INDEX(output_merger.stencil_test.raw_func): + case PICA_REG_INDEX(output_merger.stencil_test.raw_op): SyncStencilTest(); break; @@ -675,7 +676,15 @@ void RasterizerOpenGL::SyncLogicOp() { } void RasterizerOpenGL::SyncStencilTest() { - // TODO: Implement stencil test, mask, and op + const auto& regs = Pica::g_state.regs; + state.stencil.test_enabled = regs.output_merger.stencil_test.enable && regs.framebuffer.depth_format == Pica::Regs::DepthFormat::D24S8; + state.stencil.test_func = PicaToGL::CompareFunc(regs.output_merger.stencil_test.func); + state.stencil.test_ref = regs.output_merger.stencil_test.reference_value; + state.stencil.test_mask = regs.output_merger.stencil_test.input_mask; + state.stencil.write_mask = regs.output_merger.stencil_test.write_mask; + state.stencil.action_stencil_fail = PicaToGL::StencilOp(regs.output_merger.stencil_test.action_stencil_fail); + state.stencil.action_depth_fail = PicaToGL::StencilOp(regs.output_merger.stencil_test.action_depth_fail); + state.stencil.action_depth_pass = PicaToGL::StencilOp(regs.output_merger.stencil_test.action_depth_pass); } void RasterizerOpenGL::SyncDepthTest() { diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 871324014..ba47ce8b8 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -26,6 +26,9 @@ OpenGLState::OpenGLState() { stencil.test_ref = 0; stencil.test_mask = -1; stencil.write_mask = -1; + stencil.action_depth_fail = GL_KEEP; + stencil.action_depth_pass = GL_KEEP; + stencil.action_stencil_fail = GL_KEEP; blend.enabled = false; blend.src_rgb_func = GL_ONE; @@ -105,6 +108,12 @@ void OpenGLState::Apply() { glStencilFunc(stencil.test_func, stencil.test_ref, stencil.test_mask); } + if (stencil.action_depth_fail != cur_state.stencil.action_depth_fail || + stencil.action_depth_pass != cur_state.stencil.action_depth_pass || + stencil.action_stencil_fail != cur_state.stencil.action_stencil_fail) { + glStencilOp(stencil.action_stencil_fail, stencil.action_depth_fail, stencil.action_depth_pass); + } + // Stencil mask if (stencil.write_mask != cur_state.stencil.write_mask) { glStencilMask(stencil.write_mask); diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 3e2379021..81e7e0877 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -32,6 +32,9 @@ public: GLint test_ref; // GL_STENCIL_REF GLuint test_mask; // GL_STENCIL_VALUE_MASK GLuint write_mask; // GL_STENCIL_WRITEMASK + GLenum action_stencil_fail; // GL_STENCIL_FAIL + GLenum action_depth_fail; // GL_STENCIL_PASS_DEPTH_FAIL + GLenum action_depth_pass; // GL_STENCIL_PASS_DEPTH_PASS } stencil; struct { diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h index 3b562da86..a8f84a411 100644 --- a/src/video_core/renderer_opengl/pica_to_gl.h +++ b/src/video_core/renderer_opengl/pica_to_gl.h @@ -152,6 +152,27 @@ inline GLenum CompareFunc(Pica::Regs::CompareFunc func) { return compare_func_table[(unsigned)func]; } +inline GLenum StencilOp(Pica::Regs::StencilAction action) { + static const GLenum stencil_op_table[] = { + GL_KEEP, // StencilAction::Keep + GL_KEEP, + GL_REPLACE, // StencilAction::Replace + GL_INCR, // StencilAction::Increment + GL_DECR, // StencilAction::Decrement + GL_INVERT // StencilAction::Invert + }; + + // Range check table for input + if ((unsigned)action >= ARRAY_SIZE(stencil_op_table)) { + LOG_CRITICAL(Render_OpenGL, "Unknown stencil op %d", action); + UNREACHABLE(); + + return GL_KEEP; + } + + return stencil_op_table[(unsigned)action]; +} + inline std::array ColorRGBA8(const u8* bytes) { return { { bytes[0] / 255.0f, bytes[1] / 255.0f, From e74825e3d09c924f03ff63408bccefbccab19bae Mon Sep 17 00:00:00 2001 From: Subv Date: Thu, 20 Aug 2015 10:31:39 -0500 Subject: [PATCH 03/10] Rasterizer: Abstract duplicated stencil code into a lambda. --- src/video_core/rasterizer.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 000b4542b..c768a5eea 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -791,6 +791,12 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, } u8 old_stencil = 0; + + auto UpdateStencil = [stencil_test, x, y, &old_stencil](Pica::Regs::StencilAction action) { + u8 new_stencil = PerformStencilAction(action, old_stencil, stencil_test.reference_value); + SetStencil(x >> 4, y >> 4, (new_stencil & stencil_test.write_mask) | (old_stencil & ~stencil_test.write_mask)); + }; + if (stencil_action_enable) { old_stencil = GetStencil(x >> 4, y >> 4); u8 dest = old_stencil & stencil_test.input_mask; @@ -832,8 +838,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, } if (!pass) { - u8 new_stencil = PerformStencilAction(stencil_test.action_stencil_fail, old_stencil, stencil_test.reference_value); - SetStencil(x >> 4, y >> 4, (new_stencil & stencil_test.write_mask) | (old_stencil & ~stencil_test.write_mask)); + UpdateStencil(stencil_test.action_stencil_fail); continue; } } @@ -884,8 +889,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, if (!pass) { if (stencil_action_enable) { - u8 new_stencil = PerformStencilAction(stencil_test.action_depth_fail, old_stencil, stencil_test.reference_value); - SetStencil(x >> 4, y >> 4, (new_stencil & stencil_test.write_mask) | (old_stencil & ~stencil_test.write_mask)); + UpdateStencil(stencil_test.action_depth_fail); } continue; } @@ -895,8 +899,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, if (stencil_action_enable) { // TODO: What happens if stencil testing is enabled, but depth testing is not? Will stencil get updated anyway? - u8 new_stencil = PerformStencilAction(stencil_test.action_depth_pass, old_stencil, stencil_test.reference_value); - SetStencil(x >> 4, y >> 4, (new_stencil & stencil_test.write_mask) | (old_stencil & ~stencil_test.write_mask)); + UpdateStencil(stencil_test.action_depth_pass); } } From 8e6336d96bf7ee0e31ca51064c85f1b65913fe7a Mon Sep 17 00:00:00 2001 From: Subv Date: Fri, 21 Aug 2015 09:48:43 -0500 Subject: [PATCH 04/10] SWRenderer: The stencil depth_pass action is executed even if depth testing is disabled. The HW renderer already did this. --- src/video_core/rasterizer.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index c768a5eea..696839da6 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -888,21 +888,19 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, } if (!pass) { - if (stencil_action_enable) { + if (stencil_action_enable) UpdateStencil(stencil_test.action_depth_fail); - } continue; } if (output_merger.depth_write_enable) SetDepth(x >> 4, y >> 4, z); - - if (stencil_action_enable) { - // TODO: What happens if stencil testing is enabled, but depth testing is not? Will stencil get updated anyway? - UpdateStencil(stencil_test.action_depth_pass); - } } + // The stencil depth_pass action is executed even if depth testing is disabled + if (stencil_action_enable) + UpdateStencil(stencil_test.action_depth_pass); + auto dest = GetPixel(x >> 4, y >> 4); Math::Vec4 blend_output = combiner_output; From b3e530d005324d915974a4d36a2588656364ab17 Mon Sep 17 00:00:00 2001 From: Subv Date: Fri, 21 Aug 2015 10:09:15 -0500 Subject: [PATCH 05/10] SWRasterizer: Removed a todo. Verified with hwtests. --- src/video_core/rasterizer.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 696839da6..c6557bf1a 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -215,7 +215,6 @@ static void SetStencil(int x, int y, u8 value) { } } -// TODO: Should the stencil mask be applied to the "old_stencil" or "ref" operands? Most likely not! static u8 PerformStencilAction(Regs::StencilAction action, u8 old_stencil, u8 ref) { switch (action) { case Regs::StencilAction::Keep: From fef1462371b51fb5540817b76d30630868b2a961 Mon Sep 17 00:00:00 2001 From: Subv Date: Fri, 21 Aug 2015 10:35:25 -0500 Subject: [PATCH 06/10] SWRasterizer: Implemented stencil action 1 (GL_ZERO). Verified with hwtests. --- src/video_core/pica.h | 2 +- src/video_core/rasterizer.cpp | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 6383e5d56..a47dddd75 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -442,7 +442,7 @@ struct Regs { enum class StencilAction : u32 { Keep = 0, - + Zero = 1, Replace = 2, Increment = 3, Decrement = 4, diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index c6557bf1a..25911e0a2 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -220,6 +220,9 @@ static u8 PerformStencilAction(Regs::StencilAction action, u8 old_stencil, u8 re case Regs::StencilAction::Keep: return old_stencil; + case Regs::StencilAction::Zero: + return 0; + case Regs::StencilAction::Replace: return ref; From e43eb130d4852e396f77480b48a49811c6889bbd Mon Sep 17 00:00:00 2001 From: Subv Date: Fri, 21 Aug 2015 10:59:49 -0500 Subject: [PATCH 07/10] HWRasterizer: Implemented stencil op 1 (GL_ZERO) --- src/video_core/renderer_opengl/pica_to_gl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h index a8f84a411..af5e66e88 100644 --- a/src/video_core/renderer_opengl/pica_to_gl.h +++ b/src/video_core/renderer_opengl/pica_to_gl.h @@ -155,7 +155,7 @@ inline GLenum CompareFunc(Pica::Regs::CompareFunc func) { inline GLenum StencilOp(Pica::Regs::StencilAction action) { static const GLenum stencil_op_table[] = { GL_KEEP, // StencilAction::Keep - GL_KEEP, + GL_ZERO, // StencilAction::Zero GL_REPLACE, // StencilAction::Replace GL_INCR, // StencilAction::Increment GL_DECR, // StencilAction::Decrement From 7c1f84a92b9727765a755c312e90b09b0cf6ed1d Mon Sep 17 00:00:00 2001 From: Subv Date: Fri, 21 Aug 2015 11:01:42 -0500 Subject: [PATCH 08/10] SWRasterizer: Implemented stencil ops 6 and 7. IncrementWrap and DecrementWrap, verified with hwtests. --- src/video_core/pica.h | 14 ++++++++------ src/video_core/rasterizer.cpp | 6 ++++++ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/video_core/pica.h b/src/video_core/pica.h index a47dddd75..87cf705e7 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -441,12 +441,14 @@ struct Regs { }; enum class StencilAction : u32 { - Keep = 0, - Zero = 1, - Replace = 2, - Increment = 3, - Decrement = 4, - Invert = 5 + Keep = 0, + Zero = 1, + Replace = 2, + Increment = 3, + Decrement = 4, + Invert = 5, + IncrementWrap = 6, + DecrementWrap = 7 }; struct { diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 25911e0a2..b6c0e1bff 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -237,6 +237,12 @@ static u8 PerformStencilAction(Regs::StencilAction action, u8 old_stencil, u8 re case Regs::StencilAction::Invert: return ~old_stencil; + case Regs::StencilAction::IncrementWrap: + return old_stencil + 1; + + case Regs::StencilAction::DecrementWrap: + return old_stencil - 1; + default: LOG_CRITICAL(HW_GPU, "Unknown stencil action %x", (int)action); UNIMPLEMENTED(); From 0c7da9b81530f399b082677899e9bffc3fcd8e2a Mon Sep 17 00:00:00 2001 From: Subv Date: Fri, 21 Aug 2015 11:05:56 -0500 Subject: [PATCH 09/10] HWRasterizer: Implemented stencil ops 6 and 7. --- src/video_core/renderer_opengl/pica_to_gl.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h index af5e66e88..12806fad5 100644 --- a/src/video_core/renderer_opengl/pica_to_gl.h +++ b/src/video_core/renderer_opengl/pica_to_gl.h @@ -159,7 +159,9 @@ inline GLenum StencilOp(Pica::Regs::StencilAction action) { GL_REPLACE, // StencilAction::Replace GL_INCR, // StencilAction::Increment GL_DECR, // StencilAction::Decrement - GL_INVERT // StencilAction::Invert + GL_INVERT, // StencilAction::Invert + GL_INCR_WRAP, // StencilAction::IncrementWrap + GL_DECR_WRAP // StencilAction::DecrementWrap }; // Range check table for input From 583d777b1a844259e6a389c420281388fce68dc4 Mon Sep 17 00:00:00 2001 From: Subv Date: Mon, 24 Aug 2015 11:08:01 -0500 Subject: [PATCH 10/10] HWRenderer: Added a workaround for the Intel Windows driver bug that causes glTexSubImage2D to not change the stencil buffer. Reported here https://communities.intel.com/message/324464 --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 962c659e0..c3829d5c6 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -869,8 +869,15 @@ void RasterizerOpenGL::ReloadDepthBuffer() { state.Apply(); glActiveTexture(GL_TEXTURE0); - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, fb_depth_texture.width, fb_depth_texture.height, - fb_depth_texture.gl_format, fb_depth_texture.gl_type, temp_fb_depth_buffer.get()); + if (fb_depth_texture.format == Pica::Regs::DepthFormat::D24S8) { + // TODO(Subv): There is a bug with Intel Windows drivers that makes glTexSubImage2D not change the stencil buffer. + // The bug has been reported to Intel (https://communities.intel.com/message/324464) + glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH24_STENCIL8, fb_depth_texture.width, fb_depth_texture.height, 0, + GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, temp_fb_depth_buffer.get()); + } else { + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, fb_depth_texture.width, fb_depth_texture.height, + fb_depth_texture.gl_format, fb_depth_texture.gl_type, temp_fb_depth_buffer.get()); + } state.texture_units[0].texture_2d = 0; state.Apply();