From 781912e854863d6f565eab8b53b5b0869ab27ab9 Mon Sep 17 00:00:00 2001 From: wwylele Date: Wed, 11 Apr 2018 15:47:02 +0300 Subject: [PATCH] gl_rasterize: implement shadow mapping using image load/store --- .../renderer_opengl/gl_rasterizer.cpp | 174 +++++++++++++-- .../renderer_opengl/gl_rasterizer.h | 5 + .../renderer_opengl/gl_rasterizer_cache.cpp | 5 + .../renderer_opengl/gl_shader_gen.cpp | 207 +++++++++++++++++- .../renderer_opengl/gl_shader_gen.h | 4 + .../renderer_opengl/gl_shader_manager.cpp | 15 ++ .../renderer_opengl/gl_shader_manager.h | 4 +- src/video_core/renderer_opengl/gl_state.cpp | 58 +++++ src/video_core/renderer_opengl/gl_state.h | 19 ++ 9 files changed, 464 insertions(+), 27 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index b19c04f6f..1318a332e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -38,6 +38,15 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true), vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE), uniform_buffer(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE), index_buffer(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE) { + + allow_shadow = GLAD_GL_ARB_shader_image_load_store && GLAD_GL_ARB_shader_image_size && + GLAD_GL_ARB_framebuffer_no_attachments; + if (!allow_shadow) { + NGLOG_WARNING( + Render_OpenGL, + "Shadow might not be able to render because of unsupported OpenGL extensions."); + } + // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0 state.clip_distance[0] = true; @@ -237,6 +246,7 @@ void RasterizerOpenGL::SyncEntireState() { SyncFogColor(); SyncProcTexNoise(); + SyncShadowBias(); } /** @@ -533,12 +543,16 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { MICROPROFILE_SCOPE(OpenGL_Drawing); const auto& regs = Pica::g_state.regs; + bool shadow_rendering = regs.framebuffer.output_merger.fragment_operation_mode == + Pica::FramebufferRegs::FragmentOperationMode::Shadow; + const bool has_stencil = regs.framebuffer.framebuffer.depth_format == Pica::FramebufferRegs::DepthFormat::D24S8; - const bool write_color_fb = - state.color_mask.red_enabled == GL_TRUE || state.color_mask.green_enabled == GL_TRUE || - state.color_mask.blue_enabled == GL_TRUE || state.color_mask.alpha_enabled == GL_TRUE; + const bool write_color_fb = shadow_rendering || state.color_mask.red_enabled == GL_TRUE || + state.color_mask.green_enabled == GL_TRUE || + state.color_mask.blue_enabled == GL_TRUE || + state.color_mask.alpha_enabled == GL_TRUE; const bool write_depth_fb = (state.depth.test_enabled && state.depth.write_mask == GL_TRUE) || @@ -547,7 +561,7 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { const bool using_color_fb = regs.framebuffer.framebuffer.GetColorBufferPhysicalAddress() != 0 && write_color_fb; const bool using_depth_fb = - regs.framebuffer.framebuffer.GetDepthBufferPhysicalAddress() != 0 && + !shadow_rendering && regs.framebuffer.framebuffer.GetDepthBufferPhysicalAddress() != 0 && (write_depth_fb || regs.framebuffer.output_merger.depth_test_enable != 0 || (has_stencil && state.stencil.test_enabled)); @@ -591,24 +605,39 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { state.draw.draw_framebuffer = framebuffer.handle; state.Apply(); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, - color_surface != nullptr ? color_surface->texture.handle : 0, 0); - if (depth_surface != nullptr) { - if (has_stencil) { - // attach both depth and stencil - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - depth_surface->texture.handle, 0); - } else { - // attach depth - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, - depth_surface->texture.handle, 0); - // clear stencil attachment - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + if (shadow_rendering) { + if (!allow_shadow || color_surface == nullptr) { + return true; } - } else { - // clear both depth and stencil attachment + glFramebufferParameteri(GL_DRAW_FRAMEBUFFER, GL_FRAMEBUFFER_DEFAULT_WIDTH, + color_surface->width * color_surface->res_scale); + glFramebufferParameteri(GL_DRAW_FRAMEBUFFER, GL_FRAMEBUFFER_DEFAULT_HEIGHT, + color_surface->height * color_surface->res_scale); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + state.image_shadow_buffer = color_surface->texture.handle; + } else { + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + color_surface != nullptr ? color_surface->texture.handle : 0, 0); + if (depth_surface != nullptr) { + if (has_stencil) { + // attach both depth and stencil + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, + GL_TEXTURE_2D, depth_surface->texture.handle, 0); + } else { + // attach depth + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, + depth_surface->texture.handle, 0); + // clear stencil attachment + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + } + } else { + // clear both depth and stencil attachment + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + 0, 0); + } } // Sync the viewport @@ -658,6 +687,82 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { if (texture_index == 0) { using TextureType = Pica::TexturingRegs::TextureConfig::TextureType; switch (texture.config.type.Value()) { + case TextureType::Shadow2D: { + if (!allow_shadow) + continue; + + Surface surface = res_cache.GetTextureSurface(texture); + if (surface != nullptr) { + state.image_shadow_texture_px = surface->texture.handle; + } else { + state.image_shadow_texture_px = 0; + } + continue; + } + case TextureType::ShadowCube: { + if (!allow_shadow) + continue; + Pica::Texture::TextureInfo info = Pica::Texture::TextureInfo::FromPicaRegister( + texture.config, texture.format); + Surface surface; + + using CubeFace = Pica::TexturingRegs::CubeFace; + info.physical_address = + regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveX); + surface = res_cache.GetTextureSurface(info); + if (surface != nullptr) { + state.image_shadow_texture_px = surface->texture.handle; + } else { + state.image_shadow_texture_px = 0; + } + + info.physical_address = + regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeX); + surface = res_cache.GetTextureSurface(info); + if (surface != nullptr) { + state.image_shadow_texture_nx = surface->texture.handle; + } else { + state.image_shadow_texture_nx = 0; + } + + info.physical_address = + regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveY); + surface = res_cache.GetTextureSurface(info); + if (surface != nullptr) { + state.image_shadow_texture_py = surface->texture.handle; + } else { + state.image_shadow_texture_py = 0; + } + + info.physical_address = + regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeY); + surface = res_cache.GetTextureSurface(info); + if (surface != nullptr) { + state.image_shadow_texture_ny = surface->texture.handle; + } else { + state.image_shadow_texture_ny = 0; + } + + info.physical_address = + regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveZ); + surface = res_cache.GetTextureSurface(info); + if (surface != nullptr) { + state.image_shadow_texture_pz = surface->texture.handle; + } else { + state.image_shadow_texture_pz = 0; + } + + info.physical_address = + regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeZ); + surface = res_cache.GetTextureSurface(info); + if (surface != nullptr) { + state.image_shadow_texture_nz = surface->texture.handle; + } else { + state.image_shadow_texture_nz = 0; + } + + continue; + } case TextureType::TextureCube: using CubeFace = Pica::TexturingRegs::CubeFace; TextureCubeConfig config; @@ -791,8 +896,22 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { state.texture_units[texture_index].texture_2d = 0; } state.texture_cube_unit.texture_cube = 0; + if (allow_shadow) { + state.image_shadow_texture_px = 0; + state.image_shadow_texture_nx = 0; + state.image_shadow_texture_py = 0; + state.image_shadow_texture_ny = 0; + state.image_shadow_texture_pz = 0; + state.image_shadow_texture_nz = 0; + state.image_shadow_buffer = 0; + } state.Apply(); + if (shadow_rendering) { + glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | + GL_TEXTURE_UPDATE_BARRIER_BIT | GL_FRAMEBUFFER_BARRIER_BIT); + } + // Mark framebuffer surfaces as dirty MathUtil::Rectangle draw_rect_unscaled{ draw_rect.left / res_scale, draw_rect.top / res_scale, draw_rect.right / res_scale, @@ -951,6 +1070,10 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { SyncColorWriteMask(); break; + case PICA_REG_INDEX(framebuffer.shadow): + SyncShadowBias(); + break; + // Scissor test case PICA_REG_INDEX(rasterizer.scissor_test.mode): shader_dirty = true; @@ -1926,6 +2049,19 @@ void RasterizerOpenGL::SyncLightDistanceAttenuationScale(int light_index) { } } +void RasterizerOpenGL::SyncShadowBias() { + const auto& shadow = Pica::g_state.regs.framebuffer.shadow; + GLfloat constant = Pica::float16::FromRaw(shadow.constant).ToFloat32(); + GLfloat linear = Pica::float16::FromRaw(shadow.linear).ToFloat32(); + + if (constant != uniform_block_data.data.shadow_bias_constant || + linear != uniform_block_data.data.shadow_bias_linear) { + uniform_block_data.data.shadow_bias_constant = constant; + uniform_block_data.data.shadow_bias_linear = linear; + uniform_block_data.dirty = true; + } +} + void RasterizerOpenGL::UploadUniforms(bool accelerate_draw, bool use_gs) { // glBindBufferRange below also changes the generic buffer binding point, so we sync the state // first diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 547b73aae..34058796b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -217,6 +217,9 @@ private: /// Syncs the specified light's distance attenuation scale to match the PICA register void SyncLightDistanceAttenuationScale(int light_index); + /// Syncs the shadow rendering bias to match the PICA register + void SyncShadowBias(); + /// Upload the uniform blocks to the uniform buffer object void UploadUniforms(bool accelerate_draw, bool use_gs); @@ -315,4 +318,6 @@ private: OGLBuffer proctex_diff_lut_buffer; OGLTexture proctex_diff_lut; std::array proctex_diff_lut_data{}; + + bool allow_shadow; }; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 8c6558813..f2f1d73c5 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -303,6 +303,11 @@ static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle& src_rec buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; } + // TODO (wwylele): use GL_NEAREST for shadow map texture + // Note: shadow map is treated as RGBA8 format in PICA, as well as in the rasterizer cache, but + // doing linear intepolation componentwise would cause incorrect value. However, for a + // well-programmed game this code path should be rarely executed for shadow map with + // inconsistent scale. glBlitFramebuffer(src_rect.left, src_rect.bottom, src_rect.right, src_rect.top, dst_rect.left, dst_rect.bottom, dst_rect.right, dst_rect.top, buffers, buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST); diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index cea74a36f..0ae9794de 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -49,6 +49,8 @@ layout (std140) uniform shader_data { int alphatest_ref; float depth_scale; float depth_offset; + float shadow_bias_constant; + float shadow_bias_linear; int scissor_x1; int scissor_y1; int scissor_x2; @@ -220,6 +222,12 @@ PicaFSConfig PicaFSConfig::BuildFromRegs(const Pica::Regs& regs) { state.proctex.lut_filter = regs.texturing.proctex_lut.filter; } + state.shadow_rendering = regs.framebuffer.output_merger.fragment_operation_mode == + Pica::FramebufferRegs::FragmentOperationMode::Shadow; + + state.shadow_texture_orthographic = regs.texturing.shadow.orthographic != 0; + state.shadow_texture_bias = regs.texturing.shadow.bias << 1; + return res; } @@ -300,10 +308,9 @@ static std::string SampleTexture(const PicaFSConfig& config, unsigned texture_un case TexturingRegs::TextureConfig::TextureCube: return "texture(tex_cube, vec3(texcoord0, texcoord0_w))"; case TexturingRegs::TextureConfig::Shadow2D: + return "shadowTexture(texcoord0, texcoord0_w)"; case TexturingRegs::TextureConfig::ShadowCube: - NGLOG_CRITICAL(HW_GPU, "Unhandled shadow texture"); - UNIMPLEMENTED(); - return "vec4(1.0)"; // stubbed to avoid rendering with wrong shadow + return "shadowTextureCube(texcoord0, texcoord0_w)"; default: LOG_CRITICAL(HW_GPU, "Unhandled texture type %x", static_cast(state.texture0_type)); @@ -1181,7 +1188,13 @@ float ProcTexNoiseCoef(vec2 x) { std::string GenerateFragmentShader(const PicaFSConfig& config, bool separable_shader) { const auto& state = config.state; - std::string out = "#version 330 core\n"; + std::string out = R"( +#version 330 core +#extension GL_ARB_shader_image_load_store : enable +#extension GL_ARB_shader_image_size : enable +#define ALLOW_SHADOW (defined(GL_ARB_shader_image_load_store) && defined(GL_ARB_shader_image_size)) +)"; + if (separable_shader) { out += "#extension GL_ARB_separate_shader_objects : enable\n"; } @@ -1204,6 +1217,16 @@ uniform samplerBuffer proctex_color_map; uniform samplerBuffer proctex_alpha_map; uniform samplerBuffer proctex_lut; uniform samplerBuffer proctex_diff_lut; + +#if ALLOW_SHADOW +layout(r32ui) uniform readonly uimage2D shadow_texture_px; +layout(r32ui) uniform readonly uimage2D shadow_texture_nx; +layout(r32ui) uniform readonly uimage2D shadow_texture_py; +layout(r32ui) uniform readonly uimage2D shadow_texture_ny; +layout(r32ui) uniform readonly uimage2D shadow_texture_pz; +layout(r32ui) uniform readonly uimage2D shadow_texture_nz; +layout(r32ui) uniform uimage2D shadow_buffer; +#endif )"; out += UniformBlockDef; @@ -1248,6 +1271,147 @@ vec4 byteround(vec4 x) { return round(x * 255.0) * (1.0 / 255.0); } +#if ALLOW_SHADOW + +uvec2 DecodeShadow(uint pixel) { + return uvec2(pixel >> 8, pixel & 0xFFu); +} + +uint EncodeShadow(uvec2 pixel) { + return (pixel.x << 8) | pixel.y; +} + +float CompareShadow(uint pixel, uint z) { + uvec2 p = DecodeShadow(pixel); + return mix(float(p.y) * (1.0 / 255.0), 0.0, p.x <= z); +} + +float SampleShadow2D(ivec2 uv, uint z) { + if (any(bvec4( lessThan(uv, ivec2(0)), greaterThanEqual(uv, imageSize(shadow_texture_px)) ))) + return 1.0; + return CompareShadow(imageLoad(shadow_texture_px, uv).x, z); +} + +float mix2(vec4 s, vec2 a) { + vec2 t = mix(s.xy, s.zw, a.yy); + return mix(t.x, t.y, a.x); +} + +vec4 shadowTexture(vec2 uv, float w) { +)"; + if (!config.state.shadow_texture_orthographic) { + out += "uv /= w;"; + } + out += "uint z = uint(max(0, int(min(abs(w), 1.0) * 0xFFFFFF) - " + + std::to_string(state.shadow_texture_bias) + "));"; + out += R"( + vec2 coord = vec2(imageSize(shadow_texture_px)) * uv - vec2(0.5); + vec2 coord_floor = floor(coord); + vec2 f = coord - coord_floor; + ivec2 i = ivec2(coord_floor); + vec4 s = vec4( + SampleShadow2D(i , z), + SampleShadow2D(i + ivec2(1, 0), z), + SampleShadow2D(i + ivec2(0, 1), z), + SampleShadow2D(i + ivec2(1, 1), z)); + return vec4(mix2(s, f)); +} + +vec4 shadowTextureCube(vec2 uv, float w) { + ivec2 size = imageSize(shadow_texture_px); + vec3 c = vec3(uv, w); + vec3 a = abs(c); + if (a.x > a.y && a.x > a.z) { + w = a.x; + uv = -c.zy; + if (c.x < 0.0) uv.x = -uv.x; + } else if (a.y > a.z) { + w = a.y; + uv = c.xz; + if (c.y < 0.0) uv.y = -uv.y; + } else { + w = a.z; + uv = -c.xy; + if (c.z > 0.0) uv.x = -uv.x; + } +)"; + out += "uint z = uint(max(0, int(min(w, 1.0) * 0xFFFFFF) - " + + std::to_string(state.shadow_texture_bias) + "));"; + out += R"( + vec2 coord = vec2(size) * (uv / w * vec2(0.5) + vec2(0.5)) - vec2(0.5); + vec2 coord_floor = floor(coord); + vec2 f = coord - coord_floor; + ivec2 i00 = ivec2(coord_floor); + ivec2 i10 = i00 + ivec2(1, 0); + ivec2 i01 = i00 + ivec2(0, 1); + ivec2 i11 = i00 + ivec2(1, 1); + ivec2 cmin = ivec2(0), cmax = size - ivec2(1, 1); + i00 = clamp(i00, cmin, cmax); + i10 = clamp(i10, cmin, cmax); + i01 = clamp(i01, cmin, cmax); + i11 = clamp(i11, cmin, cmax); + uvec4 pixels; + // This part should have been refactored into functions, + // but many drivers don't like passing uimage2D as parameters + if (a.x > a.y && a.x > a.z) { + if (c.x > 0.0) + pixels = uvec4( + imageLoad(shadow_texture_px, i00).r, + imageLoad(shadow_texture_px, i10).r, + imageLoad(shadow_texture_px, i01).r, + imageLoad(shadow_texture_px, i11).r); + else + pixels = uvec4( + imageLoad(shadow_texture_nx, i00).r, + imageLoad(shadow_texture_nx, i10).r, + imageLoad(shadow_texture_nx, i01).r, + imageLoad(shadow_texture_nx, i11).r); + } else if (a.y > a.z) { + if (c.y > 0.0) + pixels = uvec4( + imageLoad(shadow_texture_py, i00).r, + imageLoad(shadow_texture_py, i10).r, + imageLoad(shadow_texture_py, i01).r, + imageLoad(shadow_texture_py, i11).r); + else + pixels = uvec4( + imageLoad(shadow_texture_ny, i00).r, + imageLoad(shadow_texture_ny, i10).r, + imageLoad(shadow_texture_ny, i01).r, + imageLoad(shadow_texture_ny, i11).r); + } else { + if (c.z > 0.0) + pixels = uvec4( + imageLoad(shadow_texture_pz, i00).r, + imageLoad(shadow_texture_pz, i10).r, + imageLoad(shadow_texture_pz, i01).r, + imageLoad(shadow_texture_pz, i11).r); + else + pixels = uvec4( + imageLoad(shadow_texture_nz, i00).r, + imageLoad(shadow_texture_nz, i10).r, + imageLoad(shadow_texture_nz, i01).r, + imageLoad(shadow_texture_nz, i11).r); + } + vec4 s = vec4( + CompareShadow(pixels.x, z), + CompareShadow(pixels.y, z), + CompareShadow(pixels.z, z), + CompareShadow(pixels.w, z)); + return vec4(mix2(s, f)); +} + +#else + +vec4 shadowTexture(vec2 uv, float w) { + return vec4(1.0); +} + +vec4 shadowTextureCube(vec2 uv, float w) { + return vec4(1.0); +} + +#endif )"; if (config.state.proctex.enable) @@ -1331,9 +1495,38 @@ vec4 secondary_fragment_color = vec4(0.0); return out; } - out += "gl_FragDepth = depth;\n"; - // Round the final fragment color to maintain the PICA's 8 bits of precision - out += "color = byteround(last_tex_env_out);\n"; + if (state.shadow_rendering) { + out += R"( +#if ALLOW_SHADOW +uint d = uint(clamp(depth, 0.0, 1.0) * 0xFFFFFF); +uint s = uint(last_tex_env_out.g * 0xFF); +ivec2 image_coord = ivec2(gl_FragCoord.xy); + +uint old = imageLoad(shadow_buffer, image_coord).x; +uint new; +uint old2; +do { + old2 = old; + + uvec2 ref = DecodeShadow(old); + if (d < ref.x) { + if (s == 0u) { + ref.x = d; + } else { + s = uint(float(s) / (shadow_bias_constant + shadow_bias_linear * float(d) / float(ref.x))); + ref.y = min(s, ref.y); + } + } + new = EncodeShadow(ref); + +} while ((old = imageAtomicCompSwap(shadow_buffer, image_coord, old, new)) != old2); +#endif // ALLOW_SHADOW +)"; + } else { + out += "gl_FragDepth = depth;\n"; + // Round the final fragment color to maintain the PICA's 8 bits of precision + out += "color = byteround(last_tex_env_out);\n"; + } out += "}"; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index ada4060e2..e3f3fd5bd 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -110,6 +110,10 @@ struct PicaFSConfigState { u32 lut_offset; Pica::TexturingRegs::ProcTexFilter lut_filter; } proctex; + + bool shadow_rendering; + bool shadow_texture_orthographic; + u32 shadow_texture_bias; }; /** diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 71acdc5ff..e70b5d87a 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -36,6 +36,13 @@ static void SetShaderSamplerBinding(GLuint shader, const char* name, } } +static void SetShaderImageBinding(GLuint shader, const char* name, GLuint binding) { + GLint uniform_tex = glGetUniformLocation(shader, name); + if (uniform_tex != -1) { + glUniform1i(uniform_tex, static_cast(binding)); + } +} + static void SetShaderSamplerBindings(GLuint shader) { OpenGLState cur_state = OpenGLState::GetCurState(); GLuint old_program = std::exchange(cur_state.draw.shader_program, shader); @@ -56,6 +63,14 @@ static void SetShaderSamplerBindings(GLuint shader) { SetShaderSamplerBinding(shader, "proctex_lut", TextureUnits::ProcTexLUT); SetShaderSamplerBinding(shader, "proctex_diff_lut", TextureUnits::ProcTexDiffLUT); + SetShaderImageBinding(shader, "shadow_buffer", ImageUnits::ShadowBuffer); + SetShaderImageBinding(shader, "shadow_texture_px", ImageUnits::ShadowTexturePX); + SetShaderImageBinding(shader, "shadow_texture_nx", ImageUnits::ShadowTextureNX); + SetShaderImageBinding(shader, "shadow_texture_py", ImageUnits::ShadowTexturePY); + SetShaderImageBinding(shader, "shadow_texture_ny", ImageUnits::ShadowTextureNY); + SetShaderImageBinding(shader, "shadow_texture_pz", ImageUnits::ShadowTexturePZ); + SetShaderImageBinding(shader, "shadow_texture_nz", ImageUnits::ShadowTextureNZ); + cur_state.draw.shader_program = old_program; cur_state.Apply(); } diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 364b8090c..73acc3297 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -32,6 +32,8 @@ struct UniformData { GLint alphatest_ref; GLfloat depth_scale; GLfloat depth_offset; + GLfloat shadow_bias_constant; + GLfloat shadow_bias_linear; GLint scissor_x1; GLint scissor_y1; GLint scissor_x2; @@ -48,7 +50,7 @@ struct UniformData { }; static_assert( - sizeof(UniformData) == 0x460, + sizeof(UniformData) == 0x470, "The size of the UniformData structure has changed, update the structure in the shader"); static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec"); diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 124a41cd9..95dbd591b 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -65,6 +65,14 @@ OpenGLState::OpenGLState() { proctex_alpha_map.texture_buffer = 0; proctex_noise_lut.texture_buffer = 0; + image_shadow_buffer = 0; + image_shadow_texture_px = 0; + image_shadow_texture_nx = 0; + image_shadow_texture_py = 0; + image_shadow_texture_ny = 0; + image_shadow_texture_pz = 0; + image_shadow_texture_nz = 0; + draw.read_framebuffer = 0; draw.draw_framebuffer = 0; draw.vertex_array = 0; @@ -255,6 +263,42 @@ void OpenGLState::Apply() const { glBindTexture(GL_TEXTURE_BUFFER, proctex_diff_lut.texture_buffer); } + // Shadow Images + if (image_shadow_buffer != cur_state.image_shadow_buffer) { + glBindImageTexture(ImageUnits::ShadowBuffer, image_shadow_buffer, 0, GL_FALSE, 0, + GL_READ_WRITE, GL_R32UI); + } + + if (image_shadow_texture_px != cur_state.image_shadow_texture_px) { + glBindImageTexture(ImageUnits::ShadowTexturePX, image_shadow_texture_px, 0, GL_FALSE, 0, + GL_READ_ONLY, GL_R32UI); + } + + if (image_shadow_texture_nx != cur_state.image_shadow_texture_nx) { + glBindImageTexture(ImageUnits::ShadowTextureNX, image_shadow_texture_nx, 0, GL_FALSE, 0, + GL_READ_ONLY, GL_R32UI); + } + + if (image_shadow_texture_py != cur_state.image_shadow_texture_py) { + glBindImageTexture(ImageUnits::ShadowTexturePY, image_shadow_texture_py, 0, GL_FALSE, 0, + GL_READ_ONLY, GL_R32UI); + } + + if (image_shadow_texture_ny != cur_state.image_shadow_texture_ny) { + glBindImageTexture(ImageUnits::ShadowTextureNY, image_shadow_texture_ny, 0, GL_FALSE, 0, + GL_READ_ONLY, GL_R32UI); + } + + if (image_shadow_texture_pz != cur_state.image_shadow_texture_pz) { + glBindImageTexture(ImageUnits::ShadowTexturePZ, image_shadow_texture_pz, 0, GL_FALSE, 0, + GL_READ_ONLY, GL_R32UI); + } + + if (image_shadow_texture_nz != cur_state.image_shadow_texture_nz) { + glBindImageTexture(ImageUnits::ShadowTextureNZ, image_shadow_texture_nz, 0, GL_FALSE, 0, + GL_READ_ONLY, GL_R32UI); + } + // Framebuffer if (draw.read_framebuffer != cur_state.draw.read_framebuffer) { glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer); @@ -344,6 +388,20 @@ OpenGLState& OpenGLState::ResetTexture(GLuint handle) { proctex_lut.texture_buffer = 0; if (proctex_diff_lut.texture_buffer == handle) proctex_diff_lut.texture_buffer = 0; + if (image_shadow_buffer == handle) + image_shadow_buffer = 0; + if (image_shadow_texture_px == handle) + image_shadow_texture_px = 0; + if (image_shadow_texture_nx == handle) + image_shadow_texture_nx = 0; + if (image_shadow_texture_py == handle) + image_shadow_texture_py = 0; + if (image_shadow_texture_ny == handle) + image_shadow_texture_ny = 0; + if (image_shadow_texture_pz == handle) + image_shadow_texture_pz = 0; + if (image_shadow_texture_nz == handle) + image_shadow_texture_nz = 0; return *this; } diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 29a0aabb5..ebc217349 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -31,6 +31,16 @@ constexpr TextureUnit TextureCube{10}; } // namespace TextureUnits +namespace ImageUnits { +constexpr GLuint ShadowBuffer = 0; +constexpr GLuint ShadowTexturePX = 1; +constexpr GLuint ShadowTextureNX = 2; +constexpr GLuint ShadowTexturePY = 3; +constexpr GLuint ShadowTextureNY = 4; +constexpr GLuint ShadowTexturePZ = 5; +constexpr GLuint ShadowTextureNZ = 6; +} // namespace ImageUnits + class OpenGLState { public: struct { @@ -121,6 +131,15 @@ public: GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER } proctex_diff_lut; + // GL_IMAGE_BINDING_NAME + GLuint image_shadow_buffer; + GLuint image_shadow_texture_px; + GLuint image_shadow_texture_nx; + GLuint image_shadow_texture_py; + GLuint image_shadow_texture_ny; + GLuint image_shadow_texture_pz; + GLuint image_shadow_texture_nz; + struct { GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING GLuint draw_framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING