From a955f02771643b733e719087bc7653ec9286a00b Mon Sep 17 00:00:00 2001 From: GPUCode <47210458+GPUCode@users.noreply.github.com> Date: Tue, 1 Aug 2023 03:35:41 +0300 Subject: [PATCH] rasterizer_cache: Remove runtime allocation caching (#6705) * rasterizer_cache: Sentence surfaces * gl_texture_runtime: Remove runtime side allocation cache * rasterizer_cache: Adjust surface scale during reinterpreration * Fixes pixelated outlines. Also allows to remove the d24s8 specific hack and is more generic in general * rasterizer_cache: Remove Expand flag * Begone! * rasterizer_cache: Cache framebuffers with surface id * rasterizer_cache: Sentence texture cubes * renderer_opengl: Move texture mailbox to separate file * Makes renderer_opengl cleaner overall and allows to report removal threshold from runtime instead of hardcoding. Vulkan requires this * rasterizer_cache: Dont flush cache on layout change * rasterizer_cache: Overhaul framebuffer management * video_core: Remove duplicate * rasterizer_cache: Sentence custom surfaces * Vulkan cannot destroy images immediately so this ensures we use our garbage collector for that purpose --- src/common/slot_vector.h | 19 +- src/core/core.cpp | 2 - src/video_core/CMakeLists.txt | 3 +- .../rasterizer_cache/framebuffer_base.cpp | 73 ----- .../rasterizer_cache/framebuffer_base.h | 154 +++++++--- .../rasterizer_cache/rasterizer_cache.h | 254 ++++++++--------- .../rasterizer_cache/rasterizer_cache_base.h | 35 ++- src/video_core/rasterizer_cache/slot_id.h | 1 + .../rasterizer_cache/surface_base.h | 4 + .../rasterizer_cache/surface_params.cpp | 9 - .../rasterizer_cache/surface_params.h | 7 +- src/video_core/rasterizer_cache/utils.h | 1 + src/video_core/renderer_base.h | 2 - .../renderer_opengl/frame_dumper_opengl.cpp | 2 +- .../renderer_opengl/gl_rasterizer.cpp | 44 +-- .../renderer_opengl/gl_rasterizer.h | 4 +- src/video_core/renderer_opengl/gl_state.cpp | 2 - .../renderer_opengl/gl_texture_mailbox.cpp | 194 +++++++++++++ .../renderer_opengl/gl_texture_mailbox.h | 92 ++++++ .../renderer_opengl/gl_texture_runtime.cpp | 255 +++++++---------- .../renderer_opengl/gl_texture_runtime.h | 100 +++---- .../renderer_opengl/renderer_opengl.cpp | 267 ++---------------- .../renderer_opengl/renderer_opengl.h | 19 +- 23 files changed, 734 insertions(+), 809 deletions(-) delete mode 100644 src/video_core/rasterizer_cache/framebuffer_base.cpp create mode 100644 src/video_core/renderer_opengl/gl_texture_mailbox.cpp create mode 100644 src/video_core/renderer_opengl/gl_texture_mailbox.h diff --git a/src/common/slot_vector.h b/src/common/slot_vector.h index 9a383f2a0..1969afe64 100644 --- a/src/common/slot_vector.h +++ b/src/common/slot_vector.h @@ -62,12 +62,29 @@ public: return SlotId{index}; } + template + [[nodiscard]] SlotId swap_and_insert(SlotId existing_id, Args&&... args) noexcept { + const u32 index = FreeValueIndex(); + T& existing_value = values[existing_id.index].object; + + new (&values[index].object) T(std::move(existing_value)); + existing_value.~T(); + new (&values[existing_id.index].object) T(std::forward(args)...); + SetStorageBit(index); + + return SlotId{index}; + } + void erase(SlotId id) noexcept { values[id.index].object.~T(); free_list.push_back(id.index); ResetStorageBit(id.index); } + size_t size() const noexcept { + return values_capacity - free_list.size(); + } + private: struct NonTrivialDummy { NonTrivialDummy() noexcept {} @@ -93,7 +110,7 @@ private: return ((stored_bitset[index / 64] >> (index % 64)) & 1) != 0; } - void ValidateIndex(SlotId id) const noexcept { + void ValidateIndex([[maybe_unused]] SlotId id) const noexcept { DEBUG_ASSERT(id); DEBUG_ASSERT(id.index / 64 < stored_bitset.size()); DEBUG_ASSERT(((stored_bitset[id.index / 64] >> (id.index % 64)) & 1) != 0); diff --git a/src/core/core.cpp b/src/core/core.cpp index 019c55f20..c11d8b151 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -617,9 +617,7 @@ void System::ApplySettings() { if (VideoCore::g_renderer) { auto& settings = VideoCore::g_renderer->Settings(); settings.bg_color_update_requested = true; - settings.sampler_update_requested = true; settings.shader_update_requested = true; - settings.texture_filter_update_requested = true; } if (IsPoweredOn()) { diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index b2cc2884f..824a6b310 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -34,7 +34,6 @@ add_library(video_core STATIC regs_texturing.h renderer_base.cpp renderer_base.h - rasterizer_cache/framebuffer_base.cpp rasterizer_cache/framebuffer_base.h rasterizer_cache/pixel_format.cpp rasterizer_cache/pixel_format.h @@ -76,6 +75,8 @@ add_library(video_core STATIC renderer_opengl/gl_state.h renderer_opengl/gl_stream_buffer.cpp renderer_opengl/gl_stream_buffer.h + renderer_opengl/gl_texture_mailbox.cpp + renderer_opengl/gl_texture_mailbox.h renderer_opengl/gl_texture_runtime.cpp renderer_opengl/gl_texture_runtime.h renderer_opengl/gl_vars.cpp diff --git a/src/video_core/rasterizer_cache/framebuffer_base.cpp b/src/video_core/rasterizer_cache/framebuffer_base.cpp deleted file mode 100644 index 8f7a5f7c2..000000000 --- a/src/video_core/rasterizer_cache/framebuffer_base.cpp +++ /dev/null @@ -1,73 +0,0 @@ -// Copyright 2023 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "video_core/rasterizer_cache/framebuffer_base.h" -#include "video_core/rasterizer_cache/surface_base.h" -#include "video_core/regs.h" - -namespace VideoCore { - -FramebufferBase::FramebufferBase() = default; - -FramebufferBase::FramebufferBase(const Pica::Regs& regs, const SurfaceBase* color, u32 color_level, - const SurfaceBase* depth_stencil, u32 depth_level, - Common::Rectangle surfaces_rect) { - res_scale = color ? color->res_scale : (depth_stencil ? depth_stencil->res_scale : 1u); - - // Determine the draw rectangle (render area + scissor) - const Common::Rectangle viewport_rect = regs.rasterizer.GetViewportRect(); - draw_rect.left = - std::clamp(static_cast(surfaces_rect.left) + viewport_rect.left * res_scale, - surfaces_rect.left, surfaces_rect.right); - draw_rect.top = - std::clamp(static_cast(surfaces_rect.bottom) + viewport_rect.top * res_scale, - surfaces_rect.bottom, surfaces_rect.top); - draw_rect.right = - std::clamp(static_cast(surfaces_rect.left) + viewport_rect.right * res_scale, - surfaces_rect.left, surfaces_rect.right); - draw_rect.bottom = - std::clamp(static_cast(surfaces_rect.bottom) + viewport_rect.bottom * res_scale, - surfaces_rect.bottom, surfaces_rect.top); - - // Update viewport - viewport.x = static_cast(surfaces_rect.left) + viewport_rect.left * res_scale; - viewport.y = static_cast(surfaces_rect.bottom) + viewport_rect.bottom * res_scale; - viewport.width = static_cast(viewport_rect.GetWidth() * res_scale); - viewport.height = static_cast(viewport_rect.GetHeight() * res_scale); - - // Scissor checks are window-, not viewport-relative, which means that if the cached texture - // sub-rect changes, the scissor bounds also need to be updated. - scissor_rect.left = - static_cast(surfaces_rect.left + regs.rasterizer.scissor_test.x1 * res_scale); - scissor_rect.bottom = - static_cast(surfaces_rect.bottom + regs.rasterizer.scissor_test.y1 * res_scale); - - // x2, y2 have +1 added to cover the entire pixel area, otherwise you might get cracks when - // scaling or doing multisampling. - scissor_rect.right = - static_cast(surfaces_rect.left + (regs.rasterizer.scissor_test.x2 + 1) * res_scale); - scissor_rect.top = - static_cast(surfaces_rect.bottom + (regs.rasterizer.scissor_test.y2 + 1) * res_scale); - - // Rendering to mipmaps is something quite rare so log it when it occurs. - if (color_level != 0) { - LOG_WARNING(HW_GPU, "Game is rendering to color mipmap {}", color_level); - } - if (depth_level != 0) { - LOG_WARNING(HW_GPU, "Game is rendering to depth mipmap {}", depth_level); - } - - // Query surface invalidation intervals - const Common::Rectangle draw_rect_unscaled{draw_rect / res_scale}; - if (color) { - color_params = *color; - intervals[0] = color->GetSubRectInterval(draw_rect_unscaled, color_level); - } - if (depth_stencil) { - depth_params = *depth_stencil; - intervals[1] = depth_stencil->GetSubRectInterval(draw_rect_unscaled, depth_level); - } -} - -} // namespace VideoCore diff --git a/src/video_core/rasterizer_cache/framebuffer_base.h b/src/video_core/rasterizer_cache/framebuffer_base.h index 932bf73a4..afd25cc21 100644 --- a/src/video_core/rasterizer_cache/framebuffer_base.h +++ b/src/video_core/rasterizer_cache/framebuffer_base.h @@ -4,12 +4,11 @@ #pragma once +#include "common/hash.h" #include "common/math_util.h" +#include "video_core/rasterizer_cache/slot_id.h" #include "video_core/rasterizer_cache/surface_params.h" - -namespace Pica { -struct Regs; -} +#include "video_core/regs_rasterizer.h" namespace VideoCore { @@ -22,31 +21,109 @@ struct ViewportInfo { s32 height; }; +struct FramebufferParams { + SurfaceId color_id; + SurfaceId depth_id; + u32 color_level; + u32 depth_level; + bool shadow_rendering; + INSERT_PADDING_BYTES(3); + + bool operator==(const FramebufferParams& params) const noexcept { + return std::memcmp(this, ¶ms, sizeof(FramebufferParams)) == 0; + } + + u64 Hash() const noexcept { + return Common::ComputeHash64(this, sizeof(FramebufferParams)); + } + + u32 Index(VideoCore::SurfaceType type) const noexcept { + switch (type) { + case VideoCore::SurfaceType::Color: + return 0; + case VideoCore::SurfaceType::Depth: + case VideoCore::SurfaceType::DepthStencil: + return 1; + default: + LOG_CRITICAL(HW_GPU, "Unknown surface type in framebuffer"); + return 0; + } + } +}; +static_assert(std::has_unique_object_representations_v, + "FramebufferParams is not suitable for hashing"); + +template +class RasterizerCache; + /** - * A framebuffer is a lightweight abstraction over a pair of surfaces and provides - * metadata about them. + * @brief FramebufferHelper is a RAII wrapper over backend specific framebuffer handle that + * provides the viewport/scissor/draw rectanges and performs automatic rasterizer cache invalidation + * when out of scope. */ -class FramebufferBase { +template +class FramebufferHelper { public: - FramebufferBase(); - FramebufferBase(const Pica::Regs& regs, const SurfaceBase* color, u32 color_level, - const SurfaceBase* depth_stencil, u32 depth_level, - Common::Rectangle surfaces_rect); + explicit FramebufferHelper(RasterizerCache* res_cache_, typename T::Framebuffer* fb_, + const Pica::RasterizerRegs& regs, + Common::Rectangle surfaces_rect) + : res_cache{res_cache_}, fb{fb_} { + const u32 res_scale = fb->Scale(); - SurfaceParams ColorParams() const noexcept { - return color_params; + // Determine the draw rectangle (render area + scissor) + const Common::Rectangle viewport_rect = regs.GetViewportRect(); + draw_rect.left = + std::clamp(static_cast(surfaces_rect.left) + viewport_rect.left * res_scale, + surfaces_rect.left, surfaces_rect.right); + draw_rect.top = + std::clamp(static_cast(surfaces_rect.bottom) + viewport_rect.top * res_scale, + surfaces_rect.bottom, surfaces_rect.top); + draw_rect.right = + std::clamp(static_cast(surfaces_rect.left) + viewport_rect.right * res_scale, + surfaces_rect.left, surfaces_rect.right); + draw_rect.bottom = std::clamp(static_cast(surfaces_rect.bottom) + + viewport_rect.bottom * res_scale, + surfaces_rect.bottom, surfaces_rect.top); + + // Update viewport + viewport.x = static_cast(surfaces_rect.left) + viewport_rect.left * res_scale; + viewport.y = static_cast(surfaces_rect.bottom) + viewport_rect.bottom * res_scale; + viewport.width = static_cast(viewport_rect.GetWidth() * res_scale); + viewport.height = static_cast(viewport_rect.GetHeight() * res_scale); + + // Scissor checks are window-, not viewport-relative, which means that if the cached texture + // sub-rect changes, the scissor bounds also need to be updated. + scissor_rect.left = static_cast(surfaces_rect.left + regs.scissor_test.x1 * res_scale); + scissor_rect.bottom = + static_cast(surfaces_rect.bottom + regs.scissor_test.y1 * res_scale); + + // x2, y2 have +1 added to cover the entire pixel area, otherwise you might get cracks when + // scaling or doing multisampling. + scissor_rect.right = + static_cast(surfaces_rect.left + (regs.scissor_test.x2 + 1) * res_scale); + scissor_rect.top = + static_cast(surfaces_rect.bottom + (regs.scissor_test.y2 + 1) * res_scale); } - SurfaceParams DepthParams() const noexcept { - return depth_params; + ~FramebufferHelper() { + const Common::Rectangle draw_rect_unscaled{draw_rect / fb->Scale()}; + const auto invalidate = [&](SurfaceId surface_id, u32 level) { + const auto& surface = res_cache->GetSurface(surface_id); + const SurfaceInterval interval = surface.GetSubRectInterval(draw_rect_unscaled, level); + const PAddr addr = boost::icl::first(interval); + const u32 size = boost::icl::length(interval); + res_cache->InvalidateRegion(addr, size, surface_id); + }; + if (fb->color_id) { + invalidate(fb->color_id, fb->color_level); + } + if (fb->depth_id) { + invalidate(fb->depth_id, fb->depth_level); + } } - SurfaceInterval Interval(SurfaceType type) const noexcept { - return intervals[Index(type)]; - } - - u32 ResolutionScale() const noexcept { - return res_scale; + typename T::Framebuffer* Framebuffer() const noexcept { + return fb; } Common::Rectangle DrawRect() const noexcept { @@ -61,28 +138,21 @@ public: return viewport; } -protected: - u32 Index(VideoCore::SurfaceType type) const noexcept { - switch (type) { - case VideoCore::SurfaceType::Color: - return 0; - case VideoCore::SurfaceType::Depth: - case VideoCore::SurfaceType::DepthStencil: - return 1; - default: - LOG_CRITICAL(HW_GPU, "Unknown surface type in framebuffer"); - return 0; - } - } - -protected: - SurfaceParams color_params{}; - SurfaceParams depth_params{}; - std::array intervals{}; - Common::Rectangle scissor_rect{}; - Common::Rectangle draw_rect{}; +private: + RasterizerCache* res_cache; + typename T::Framebuffer* fb; + Common::Rectangle scissor_rect; + Common::Rectangle draw_rect; ViewportInfo viewport; - u32 res_scale{1}; }; } // namespace VideoCore + +namespace std { +template <> +struct hash { + std::size_t operator()(const VideoCore::FramebufferParams& params) const noexcept { + return params.Hash(); + } +}; +} // namespace std diff --git a/src/video_core/rasterizer_cache/rasterizer_cache.h b/src/video_core/rasterizer_cache/rasterizer_cache.h index 512f216c0..8eda92275 100644 --- a/src/video_core/rasterizer_cache/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache/rasterizer_cache.h @@ -37,7 +37,7 @@ RasterizerCache::RasterizerCache(Memory::MemorySystem& memory_, Pica::Regs& regs_, RendererBase& renderer_) : memory{memory_}, custom_tex_manager{custom_tex_manager_}, runtime{runtime_}, regs{regs_}, renderer{renderer_}, resolution_scale_factor{renderer.GetResolutionScaleFactor()}, - use_filter{Settings::values.texture_filter.GetValue() != Settings::TextureFilter::None}, + filter{Settings::values.texture_filter.GetValue()}, dump_textures{Settings::values.dump_textures.GetValue()}, use_custom_textures{Settings::values.custom_textures.GetValue()} { using TextureConfig = Pica::TexturingRegs::TextureConfig; @@ -76,17 +76,21 @@ RasterizerCache::~RasterizerCache() { template void RasterizerCache::TickFrame() { custom_tex_manager.TickFrame(); + RunGarbageCollector(); + + const auto new_filter = Settings::values.texture_filter.GetValue(); + if (filter != new_filter) [[unlikely]] { + filter = new_filter; + UnregisterAll(); + } const u32 scale_factor = renderer.GetResolutionScaleFactor(); const bool resolution_scale_changed = resolution_scale_factor != scale_factor; const bool use_custom_texture_changed = Settings::values.custom_textures.GetValue() != use_custom_textures; - const bool texture_filter_changed = - renderer.Settings().texture_filter_update_requested.exchange(false); - if (resolution_scale_changed || texture_filter_changed || use_custom_texture_changed) { + if (resolution_scale_changed || use_custom_texture_changed) { resolution_scale_factor = scale_factor; - use_filter = Settings::values.texture_filter.GetValue() != Settings::TextureFilter::None; use_custom_textures = Settings::values.custom_textures.GetValue(); if (use_custom_textures) { custom_tex_manager.FindCustomTextures(); @@ -95,6 +99,34 @@ void RasterizerCache::TickFrame() { } } +template +void RasterizerCache::RunGarbageCollector() { + frame_tick++; + for (auto it = sentenced.begin(); it != sentenced.end();) { + const auto [surface_id, tick] = *it; + if (frame_tick - tick <= runtime.RemoveThreshold()) { + it++; + continue; + } + RemoveFramebuffers(surface_id); + slot_surfaces.erase(surface_id); + it = sentenced.erase(it); + } +} + +template +void RasterizerCache::RemoveFramebuffers(SurfaceId surface_id) { + for (auto it = framebuffers.begin(); it != framebuffers.end();) { + const auto& params = it->first; + if (params.color_id == surface_id || params.depth_id == surface_id) { + slot_framebuffers.erase(it->second); + it = framebuffers.erase(it); + } else { + it++; + } + } +} + template bool RasterizerCache::AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) { const DebugScope scope{runtime, Common::Vec4f{0.f, 0.f, 1.f, 1.f}, @@ -322,29 +354,46 @@ template void RasterizerCache::CopySurface(Surface& src_surface, Surface& dst_surface, SurfaceInterval copy_interval) { MICROPROFILE_SCOPE(RasterizerCache_CopySurface); - const PAddr copy_addr = copy_interval.lower(); const SurfaceParams subrect_params = dst_surface.FromInterval(copy_interval); - const auto dst_rect = dst_surface.GetScaledSubRect(subrect_params); ASSERT(subrect_params.GetInterval() == copy_interval); if (src_surface.type == SurfaceType::Fill) { const TextureClear clear = { .texture_level = dst_surface.LevelOf(copy_addr), - .texture_rect = dst_rect, + .texture_rect = dst_surface.GetScaledSubRect(subrect_params), .value = src_surface.MakeClearValue(copy_addr, dst_surface.pixel_format), }; runtime.ClearTexture(dst_surface, clear); return; } - const TextureBlit blit = { - .src_level = src_surface.LevelOf(copy_addr), - .dst_level = dst_surface.LevelOf(copy_addr), - .src_rect = src_surface.GetScaledSubRect(subrect_params), - .dst_rect = dst_rect, - }; - runtime.BlitTextures(src_surface, dst_surface, blit); + const u32 src_scale = src_surface.res_scale; + const u32 dst_scale = dst_surface.res_scale; + if (src_scale > dst_scale) { + dst_surface.ScaleUp(src_scale); + } + + const auto src_rect = src_surface.GetScaledSubRect(subrect_params); + const auto dst_rect = dst_surface.GetScaledSubRect(subrect_params); + if (src_scale == dst_scale) { + const TextureCopy copy = { + .src_level = src_surface.LevelOf(copy_addr), + .dst_level = dst_surface.LevelOf(copy_addr), + .src_offset = {src_rect.left, src_rect.bottom}, + .dst_offset = {dst_rect.left, dst_rect.bottom}, + .extent = {src_rect.GetWidth(), src_rect.GetHeight()}, + }; + runtime.CopyTextures(src_surface, dst_surface, copy); + } else { + const TextureBlit blit = { + .src_level = src_surface.LevelOf(copy_addr), + .dst_level = dst_surface.LevelOf(copy_addr), + .src_rect = src_rect, + .dst_rect = dst_rect, + }; + runtime.BlitTextures(src_surface, dst_surface, blit); + } } template @@ -361,33 +410,7 @@ SurfaceId RasterizerCache::GetSurface(const SurfaceParams& params, ScaleMatch SurfaceId surface_id = FindMatch(params, match_res_scale); if (!surface_id) { - u16 target_res_scale = params.res_scale; - if (match_res_scale != ScaleMatch::Exact) { - // This surface may have a subrect of another surface with a higher res_scale, find - // it to adjust our params - SurfaceParams find_params = params; - SurfaceId expandable_id = FindMatch(find_params, match_res_scale); - if (expandable_id) { - Surface& expandable = slot_surfaces[expandable_id]; - if (expandable.res_scale > target_res_scale) { - target_res_scale = expandable.res_scale; - } - } - // Keep res_scale when reinterpreting d24s8 -> rgba8 - if (params.pixel_format == PixelFormat::RGBA8) { - find_params.pixel_format = PixelFormat::D24S8; - expandable_id = FindMatch(find_params, match_res_scale); - if (expandable_id) { - Surface& expandable = slot_surfaces[expandable_id]; - if (expandable.res_scale > target_res_scale) { - target_res_scale = expandable.res_scale; - } - } - } - } - SurfaceParams new_params = params; - new_params.res_scale = target_res_scale; - surface_id = CreateSurface(new_params); + surface_id = CreateSurface(params); RegisterSurface(surface_id); } @@ -429,31 +452,6 @@ typename RasterizerCache::SurfaceRect_Tuple RasterizerCache::GetSurfaceSub aligned_params.UpdateParams(); } - // Check for a surface we can expand before creating a new one - if (!surface_id) { - surface_id = FindMatch(aligned_params, match_res_scale); - if (surface_id) { - Surface& surface = slot_surfaces[surface_id]; - aligned_params.width = aligned_params.stride; - aligned_params.UpdateParams(); - - SurfaceParams new_params = surface; - new_params.addr = std::min(aligned_params.addr, surface.addr); - new_params.end = std::max(aligned_params.end, surface.end); - new_params.size = new_params.end - new_params.addr; - new_params.height = - new_params.size / aligned_params.BytesInPixels(aligned_params.stride); - new_params.UpdateParams(); - ASSERT(new_params.size % aligned_params.BytesInPixels(aligned_params.stride) == 0); - - SurfaceId new_surface_id = CreateSurface(new_params); - DuplicateSurface(surface_id, new_surface_id); - UnregisterSurface(surface_id); - RegisterSurface(new_surface_id); - surface_id = new_surface_id; - } - } - // No subrect found - create and return a new surface if (!surface_id) { SurfaceParams new_params = aligned_params; @@ -499,7 +497,7 @@ SurfaceId RasterizerCache::GetTextureSurface(const Pica::Texture::TextureInfo params.levels = max_level + 1; params.is_tiled = true; params.pixel_format = PixelFormatFromTextureFormat(info.format); - params.res_scale = use_filter ? resolution_scale_factor : 1; + params.res_scale = filter != Settings::TextureFilter::None ? resolution_scale_factor : 1; params.UpdateParams(); const u32 min_width = info.width >> max_level; @@ -552,7 +550,7 @@ typename T::Surface& RasterizerCache::GetTextureCube(const TextureCubeConfig& .height = config.width, .stride = config.width, .levels = config.levels, - .res_scale = use_filter ? resolution_scale_factor : 1, + .res_scale = filter != Settings::TextureFilter::None ? resolution_scale_factor : 1, .texture_type = TextureType::CubeMap, .pixel_format = PixelFormatFromTextureFormat(config.format), .type = SurfaceType::Texture, @@ -609,8 +607,8 @@ typename T::Surface& RasterizerCache::GetTextureCube(const TextureCubeConfig& } template -typename T::Framebuffer RasterizerCache::GetFramebufferSurfaces(bool using_color_fb, - bool using_depth_fb) { +FramebufferHelper RasterizerCache::GetFramebufferSurfaces(bool using_color_fb, + bool using_depth_fb) { const auto& config = regs.framebuffer.framebuffer; const s32 framebuffer_width = config.GetWidth(); @@ -692,35 +690,20 @@ typename T::Framebuffer RasterizerCache::GetFramebufferSurfaces(bool using_co boost::icl::length(depth_vp_interval)); } - render_targets = RenderTargets{ + fb_params = FramebufferParams{ .color_id = color_id, .depth_id = depth_id, + .color_level = color_level, + .depth_level = depth_level, + .shadow_rendering = regs.framebuffer.IsShadowRendering(), }; - return Framebuffer{runtime, color_surface, color_level, depth_surface, - depth_level, regs, fb_rect}; -} + auto [it, new_framebuffer] = framebuffers.try_emplace(fb_params); + if (new_framebuffer) { + it->second = slot_framebuffers.insert(runtime, fb_params, color_surface, depth_surface); + } -template -void RasterizerCache::InvalidateFramebuffer(const Framebuffer& framebuffer) { - const auto invalidate = [&](SurfaceId surface_id) { - if (!surface_id) { - return; - } - Surface& surface = slot_surfaces[surface_id]; - const SurfaceInterval interval = framebuffer.Interval(surface.type); - const PAddr addr = boost::icl::first(interval); - const u32 size = boost::icl::length(interval); - InvalidateRegion(addr, size, surface_id); - }; - const bool has_color = framebuffer.HasAttachment(SurfaceType::Color); - const bool has_depth = framebuffer.HasAttachment(SurfaceType::DepthStencil); - if (has_color) { - invalidate(render_targets.color_id); - } - if (has_depth) { - invalidate(render_targets.depth_id); - } + return FramebufferHelper{this, &slot_framebuffers[it->second], regs.rasterizer, fb_rect}; } template @@ -875,9 +858,6 @@ SurfaceId RasterizerCache::FindMatch(const SurfaceParams& params, ScaleMatch surface.CanReinterpret(params); return std::make_pair(matched, copy_interval); }); - IsMatch_Helper(std::integral_constant{}, [&] { - return std::make_pair(surface.CanExpand(params), surface.GetInterval()); - }); IsMatch_Helper(std::integral_constant{}, [&] { return std::make_pair(surface.CanTexCopy(params), surface.GetInterval()); }); @@ -1068,14 +1048,12 @@ bool RasterizerCache::UploadCustomSurface(SurfaceId surface_id, SurfaceInterv const auto upload = [this, level, surface_id, material]() -> bool { Surface& surface = slot_surfaces[surface_id]; - if (False(surface.flags & SurfaceFlagBits::Custom)) { - LOG_ERROR(HW_GPU, "Surface is not suitable for custom upload, aborting!"); - return false; - } - if (!surface.IsCustom() && !surface.Swap(material)) { - LOG_ERROR(HW_GPU, "Custom compressed format {} unsupported by host GPU", - material->format); - return false; + ASSERT_MSG(True(surface.flags & SurfaceFlagBits::Custom), + "Surface is not suitable for custom upload, aborting!"); + if (!surface.IsCustom()) { + const SurfaceId old_id = + slot_surfaces.swap_and_insert(surface_id, runtime, surface, material); + sentenced.emplace_back(old_id, frame_tick); } surface.UploadCustom(material, level); if (custom_tex_manager.SkipMipmaps()) { @@ -1159,6 +1137,10 @@ bool RasterizerCache::ValidateByReinterpretation(Surface& surface, SurfacePar if (boost::icl::is_empty(copy_interval & interval)) { return false; } + const u32 res_scale = src_surface.res_scale; + if (res_scale > surface.res_scale) { + surface.ScaleUp(res_scale); + } const PAddr addr = boost::icl::lower(interval); const SurfaceParams copy_params = surface.FromInterval(copy_interval); const TextureBlit reinterpret = { @@ -1229,25 +1211,24 @@ void RasterizerCache::FlushRegion(PAddr addr, u32 size, SurfaceId flush_surfa SurfaceRegions flushed_intervals; for (const auto& [region, surface_id] : RangeFromInterval(dirty_regions, flush_interval)) { - // Small sizes imply that this most likely comes from the cpu, flush the entire region - // the point is to avoid thousands of small writes every frame if the cpu decides to - // access that region, anything higher than 8 you're guaranteed it comes from a service - auto interval = size <= 8 ? region : region & flush_interval; if (flush_surface_id && surface_id != flush_surface_id) { continue; } + // Small sizes imply that this most likely comes from the cpu, flush the entire region + // the point is to avoid thousands of small writes every frame if the cpu decides to + // access that region, anything higher than 8 you're guaranteed it comes from a service + const auto interval = size <= 8 ? region : region & flush_interval; + Surface& surface = slot_surfaces[surface_id]; + ASSERT_MSG(surface.IsRegionValid(interval), "Region owner has invalid regions"); + const DebugScope scope{runtime, Common::Vec4f{0.f, 0.f, 0.f, 1.f}, "RasterizerCache::FlushRegion (from {:#x} to {:#x})", interval.lower(), interval.upper()}; - // Sanity check, this surface is the last one that marked this region dirty - Surface& surface = slot_surfaces[surface_id]; - ASSERT(surface.IsRegionValid(interval)); - - if (surface.type == SurfaceType::Fill) { + SCOPE_EXIT({ flushed_intervals += interval; }); + if (surface.IsFill()) { DownloadFillSurface(surface, interval); - flushed_intervals += interval; continue; } @@ -1261,8 +1242,6 @@ void RasterizerCache::FlushRegion(PAddr addr, u32 size, SurfaceId flush_surfa } DownloadSurface(surface, download_interval); } - - flushed_intervals += interval; } // Reset dirty regions @@ -1294,7 +1273,6 @@ void RasterizerCache::InvalidateRegion(PAddr addr, u32 size, SurfaceId region if (surface_id == region_owner_id) { return; } - // If the CPU is invalidating this region we want to remove it // to (likely) mark the memory pages as uncached if (!region_owner_id && size <= 8) { @@ -1302,14 +1280,12 @@ void RasterizerCache::InvalidateRegion(PAddr addr, u32 size, SurfaceId region remove_surfaces.push_back(surface_id); return; } - - surface.MarkInvalid(surface.GetInterval() & invalid_interval); - - // If the surface has no salvageable data it should be removed - // from the cache to avoid clogging the data structure. - if (surface.IsFullyInvalid()) { - remove_surfaces.push_back(surface_id); + const auto interval = surface.GetInterval() & invalid_interval; + surface.MarkInvalid(interval); + if (!surface.IsFullyInvalid()) { + return; } + remove_surfaces.push_back(surface_id); }); if (region_owner_id) { @@ -1318,15 +1294,30 @@ void RasterizerCache::InvalidateRegion(PAddr addr, u32 size, SurfaceId region dirty_regions.erase(invalid_interval); } - for (const SurfaceId remove_surface_id : remove_surfaces) { - UnregisterSurface(remove_surface_id); + for (const SurfaceId surface_id : remove_surfaces) { + UnregisterSurface(surface_id); + if (!slot_surfaces[surface_id].IsFill()) { + sentenced.emplace_back(surface_id, frame_tick); + } else { + slot_surfaces.erase(surface_id); + } } remove_surfaces.clear(); } template SurfaceId RasterizerCache::CreateSurface(const SurfaceParams& params) { - SurfaceId surface_id = slot_surfaces.insert(runtime, params); + const SurfaceId surface_id = [&] { + const auto it = std::find_if(sentenced.begin(), sentenced.end(), [&](const auto& pair) { + return slot_surfaces[pair.first] == params; + }); + if (it != sentenced.end()) { + const SurfaceId surface_id = it->first; + sentenced.erase(it); + return surface_id; + } + return slot_surfaces.insert(runtime, params); + }(); Surface& surface = slot_surfaces[surface_id]; surface.MarkInvalid(surface.GetInterval()); return surface_id; @@ -1368,8 +1359,6 @@ void RasterizerCache::UnregisterSurface(SurfaceId surface_id) { surfaces.erase(vector_it); }); - SCOPE_EXIT({ slot_surfaces.erase(surface_id); }); - if (False(surface.flags & SurfaceFlagBits::Tracked)) { return; } @@ -1383,7 +1372,7 @@ void RasterizerCache::UnregisterSurface(SurfaceId surface_id) { } if (std::none_of(cube.face_ids.begin(), cube.face_ids.end(), [](SurfaceId id) { return id; })) { - slot_surfaces.erase(cube.surface_id); + sentenced.emplace_back(cube.surface_id, frame_tick); return true; } return false; @@ -1400,7 +1389,6 @@ void RasterizerCache::UnregisterAll() { } texture_cube_cache.clear(); remove_surfaces.clear(); - runtime.Reset(); } template diff --git a/src/video_core/rasterizer_cache/rasterizer_cache_base.h b/src/video_core/rasterizer_cache/rasterizer_cache_base.h index 2b76f4076..acb8f52ab 100644 --- a/src/video_core/rasterizer_cache/rasterizer_cache_base.h +++ b/src/video_core/rasterizer_cache/rasterizer_cache_base.h @@ -5,11 +5,13 @@ #pragma once #include +#include #include #include #include #include #include +#include "video_core/rasterizer_cache/framebuffer_base.h" #include "video_core/rasterizer_cache/sampler_params.h" #include "video_core/rasterizer_cache/surface_params.h" #include "video_core/rasterizer_cache/texture_cube.h" @@ -26,6 +28,10 @@ namespace Pica::Texture { struct TextureInfo; } +namespace Settings { +enum class TextureFilter : u32; +} + namespace VideoCore { enum class ScaleMatch { @@ -38,9 +44,8 @@ enum class MatchFlags { Exact = 1 << 0, ///< Surface perfectly matches params SubRect = 1 << 1, ///< Surface encompasses params Copy = 1 << 2, ///< Surface that can be used as a copy source - Expand = 1 << 3, ///< Surface that can expand params - TexCopy = 1 << 4, ///< Surface that will match a display transfer "texture copy" parameters - Reinterpret = 1 << 5, ///< Surface might have different pixel format. + TexCopy = 1 << 3, ///< Surface that will match a display transfer "texture copy" parameters + Reinterpret = 1 << 4, ///< Surface might have different pixel format. }; DECLARE_ENUM_FLAG_OPERATORS(MatchFlags); @@ -66,11 +71,6 @@ class RasterizerCache { using SurfaceRect_Tuple = std::pair>; using PageMap = boost::icl::interval_map; - struct RenderTargets { - SurfaceId color_id; - SurfaceId depth_id; - }; - public: explicit RasterizerCache(Memory::MemorySystem& memory, CustomTexManager& custom_tex_manager, Runtime& runtime, Pica::Regs& regs, RendererBase& renderer); @@ -115,10 +115,7 @@ public: Surface& GetTextureCube(const TextureCubeConfig& config); /// Get the color and depth surfaces based on the framebuffer configuration - Framebuffer GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb); - - /// Marks the draw rectangle defined in framebuffer as invalid - void InvalidateFramebuffer(const Framebuffer& framebuffer); + FramebufferHelper GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb); /// Get a surface that matches a "texture copy" display transfer config SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params); @@ -161,6 +158,12 @@ private: SurfaceId FindMatch(const SurfaceParams& params, ScaleMatch match_scale_type, std::optional validate_interval = std::nullopt); + /// Unregisters sentenced surfaces that have surpassed the destruction threshold. + void RunGarbageCollector(); + + /// Removes any framebuffers that reference the provided surface_id. + void RemoveFramebuffers(SurfaceId surface_id); + /// Transfers ownership of a memory region from src_surface to dest_surface void DuplicateSurface(SurfaceId src_id, SurfaceId dst_id); @@ -209,15 +212,19 @@ private: RendererBase& renderer; std::unordered_map texture_cube_cache; tsl::robin_pg_map, Common::IdentityHash> page_table; + std::unordered_map framebuffers; std::unordered_map samplers; + std::list> sentenced; Common::SlotVector slot_surfaces; Common::SlotVector slot_samplers; + Common::SlotVector slot_framebuffers; SurfaceMap dirty_regions; PageMap cached_pages; std::vector remove_surfaces; u32 resolution_scale_factor; - RenderTargets render_targets; - bool use_filter; + u64 frame_tick{}; + FramebufferParams fb_params; + Settings::TextureFilter filter; bool dump_textures; bool use_custom_textures; }; diff --git a/src/video_core/rasterizer_cache/slot_id.h b/src/video_core/rasterizer_cache/slot_id.h index b76805be9..84f3396e1 100644 --- a/src/video_core/rasterizer_cache/slot_id.h +++ b/src/video_core/rasterizer_cache/slot_id.h @@ -10,6 +10,7 @@ namespace VideoCore { using SurfaceId = Common::SlotId; using SamplerId = Common::SlotId; +using FramebufferId = Common::SlotId; /// Fake surface ID for null surfaces constexpr SurfaceId NULL_SURFACE_ID{0}; diff --git a/src/video_core/rasterizer_cache/surface_base.h b/src/video_core/rasterizer_cache/surface_base.h index 9be06e0df..7e44dcc85 100644 --- a/src/video_core/rasterizer_cache/surface_base.h +++ b/src/video_core/rasterizer_cache/surface_base.h @@ -46,6 +46,10 @@ public: /// Returns true if the surface contains a custom material with a normal map. bool HasNormalMap() const noexcept; + bool IsFill() const noexcept { + return type == SurfaceType::Fill; + } + bool Overlaps(PAddr overlap_addr, size_t overlap_size) const noexcept { const PAddr overlap_end = overlap_addr + static_cast(overlap_size); return addr < overlap_end && overlap_addr < end; diff --git a/src/video_core/rasterizer_cache/surface_params.cpp b/src/video_core/rasterizer_cache/surface_params.cpp index 87b6a7274..2f1496193 100644 --- a/src/video_core/rasterizer_cache/surface_params.cpp +++ b/src/video_core/rasterizer_cache/surface_params.cpp @@ -34,15 +34,6 @@ bool SurfaceParams::CanReinterpret(const SurfaceParams& other_surface) { GetSubRect(other_surface).right <= stride; } -bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const { - return pixel_format != PixelFormat::Invalid && pixel_format == expanded_surface.pixel_format && - addr <= expanded_surface.end && expanded_surface.addr <= end && - is_tiled == expanded_surface.is_tiled && stride == expanded_surface.stride && - (std::max(expanded_surface.addr, addr) - std::min(expanded_surface.addr, addr)) % - BytesInPixels(stride * (is_tiled ? 8 : 1)) == - 0; -} - bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const { const SurfaceInterval copy_interval = texcopy_params.GetInterval(); if (pixel_format == PixelFormat::Invalid || addr > texcopy_params.addr || diff --git a/src/video_core/rasterizer_cache/surface_params.h b/src/video_core/rasterizer_cache/surface_params.h index 74f880d2a..77a8ea107 100644 --- a/src/video_core/rasterizer_cache/surface_params.h +++ b/src/video_core/rasterizer_cache/surface_params.h @@ -26,9 +26,6 @@ public: /// Returns true if other_surface can be used for reinterpretion. bool CanReinterpret(const SurfaceParams& other_surface); - /// Returns true if params can be expanded to match expanded_surface - bool CanExpand(const SurfaceParams& expanded_surface) const; - /// Returns true if params can be used for texcopy bool CanTexCopy(const SurfaceParams& texcopy_params) const; @@ -56,6 +53,10 @@ public: /// Returns a string identifier of the params object std::string DebugName(bool scaled, bool custom = false) const noexcept; + bool operator==(const SurfaceParams& other) const noexcept { + return std::memcmp(this, &other, sizeof(SurfaceParams)) == 0; + } + [[nodiscard]] SurfaceInterval GetInterval() const noexcept { return SurfaceInterval{addr, end}; } diff --git a/src/video_core/rasterizer_cache/utils.h b/src/video_core/rasterizer_cache/utils.h index aeb7d7451..3971bc4be 100644 --- a/src/video_core/rasterizer_cache/utils.h +++ b/src/video_core/rasterizer_cache/utils.h @@ -67,6 +67,7 @@ struct StagingData { }; class SurfaceParams; +struct FramebufferParams; u32 MipLevels(u32 width, u32 height, u32 max_level); diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index 038d5705f..d7fba9597 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -31,9 +31,7 @@ struct RendererSettings { std::function screenshot_complete_callback; Layout::FramebufferLayout screenshot_framebuffer_layout; // Renderer - std::atomic_bool texture_filter_update_requested{false}; std::atomic_bool bg_color_update_requested{false}; - std::atomic_bool sampler_update_requested{false}; std::atomic_bool shader_update_requested{false}; }; diff --git a/src/video_core/renderer_opengl/frame_dumper_opengl.cpp b/src/video_core/renderer_opengl/frame_dumper_opengl.cpp index 1d4ac8c32..dd8720b10 100644 --- a/src/video_core/renderer_opengl/frame_dumper_opengl.cpp +++ b/src/video_core/renderer_opengl/frame_dumper_opengl.cpp @@ -7,7 +7,7 @@ #include #include "core/frontend/emu_window.h" #include "video_core/renderer_opengl/frame_dumper_opengl.h" -#include "video_core/renderer_opengl/renderer_opengl.h" +#include "video_core/renderer_opengl/gl_texture_mailbox.h" namespace OpenGL { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 037767385..e09f4e8f6 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -386,21 +386,20 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { (write_depth_fb || regs.framebuffer.output_merger.depth_test_enable != 0 || (has_stencil && state.stencil.test_enabled)); - const Framebuffer framebuffer = - res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb); - const bool has_color = framebuffer.HasAttachment(SurfaceType::Color); - if (!has_color && shadow_rendering) { + const auto fb_helper = res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb); + const Framebuffer* framebuffer = fb_helper.Framebuffer(); + if (!framebuffer->color_id && framebuffer->shadow_rendering) { return true; } // Bind the framebuffer surfaces if (shadow_rendering) { - state.image_shadow_buffer = framebuffer.Attachment(SurfaceType::Color); + state.image_shadow_buffer = framebuffer->Attachment(SurfaceType::Color); } - state.draw.draw_framebuffer = framebuffer.Handle(); + state.draw.draw_framebuffer = framebuffer->Handle(); // Sync the viewport - const auto viewport = framebuffer.Viewport(); + const auto viewport = fb_helper.Viewport(); state.viewport.x = static_cast(viewport.x); state.viewport.y = static_cast(viewport.y); state.viewport.width = static_cast(viewport.width); @@ -408,21 +407,15 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { // Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. // Enable scissor test to prevent drawing outside of the framebuffer region - const auto draw_rect = framebuffer.DrawRect(); + const auto draw_rect = fb_helper.DrawRect(); state.scissor.enabled = true; state.scissor.x = draw_rect.left; state.scissor.y = draw_rect.bottom; state.scissor.width = draw_rect.GetWidth(); state.scissor.height = draw_rect.GetHeight(); - const int res_scale = static_cast(framebuffer.ResolutionScale()); - if (uniform_block_data.data.framebuffer_scale != res_scale) { - uniform_block_data.data.framebuffer_scale = res_scale; - uniform_block_data.dirty = true; - } - // Update scissor uniforms - const auto [scissor_x1, scissor_y2, scissor_x2, scissor_y1] = framebuffer.Scissor(); + const auto [scissor_x1, scissor_y2, scissor_x2, scissor_y1] = fb_helper.Scissor(); if (uniform_block_data.data.scissor_x1 != scissor_x1 || uniform_block_data.data.scissor_x2 != scissor_x2 || uniform_block_data.data.scissor_y1 != scissor_y1 || @@ -486,13 +479,12 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { GL_TEXTURE_UPDATE_BARRIER_BIT | GL_FRAMEBUFFER_BARRIER_BIT); } - res_cache.InvalidateFramebuffer(framebuffer); use_custom_normal = false; return succeeded; } -void RasterizerOpenGL::SyncTextureUnits(const Framebuffer& framebuffer) { +void RasterizerOpenGL::SyncTextureUnits(const Framebuffer* framebuffer) { using TextureType = Pica::TexturingRegs::TextureConfig::TextureType; const auto pica_textures = regs.texturing.GetTextures(); @@ -603,27 +595,15 @@ void RasterizerOpenGL::BindMaterial(u32 texture_index, Surface& surface) { } } -bool RasterizerOpenGL::IsFeedbackLoop(u32 texture_index, const Framebuffer& framebuffer, +bool RasterizerOpenGL::IsFeedbackLoop(u32 texture_index, const Framebuffer* framebuffer, Surface& surface) { - const GLuint color_attachment = framebuffer.Attachment(SurfaceType::Color); + const GLuint color_attachment = framebuffer->Attachment(SurfaceType::Color); const bool is_feedback_loop = color_attachment == surface.Handle(); if (!is_feedback_loop) { return false; } - // Make a temporary copy of the framebuffer to sample from - Surface temp_surface{runtime, framebuffer.ColorParams()}; - const VideoCore::TextureCopy copy = { - .src_level = 0, - .dst_level = 0, - .src_layer = 0, - .dst_layer = 0, - .src_offset = {0, 0}, - .dst_offset = {0, 0}, - .extent = {temp_surface.GetScaledWidth(), temp_surface.GetScaledHeight()}, - }; - runtime.CopyTextures(surface, temp_surface, copy); - state.texture_units[texture_index].texture_2d = temp_surface.Handle(); + state.texture_units[texture_index].texture_2d = surface.CopyHandle(); return true; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 51f064868..54f5e0016 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -93,7 +93,7 @@ private: void SyncAndUploadLUTsLF(); /// Syncs all enabled PICA texture units - void SyncTextureUnits(const Framebuffer& framebuffer); + void SyncTextureUnits(const Framebuffer* framebuffer); /// Binds the PICA shadow cube required for shadow mapping void BindShadowCube(const Pica::TexturingRegs::FullTextureConfig& texture); @@ -102,7 +102,7 @@ private: void BindTextureCube(const Pica::TexturingRegs::FullTextureConfig& texture); /// Makes a temporary copy of the framebuffer if a feedback loop is detected - bool IsFeedbackLoop(u32 texture_index, const Framebuffer& framebuffer, Surface& surface); + bool IsFeedbackLoop(u32 texture_index, const Framebuffer* framebuffer, Surface& surface); /// Unbinds all special texture unit 0 texture configurations void UnbindSpecial(); diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 89944f80e..6e2d15380 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -3,8 +3,6 @@ // Refer to the license.txt file included. #include -#include "common/common_funcs.h" -#include "common/logging/log.h" #include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_vars.h" diff --git a/src/video_core/renderer_opengl/gl_texture_mailbox.cpp b/src/video_core/renderer_opengl/gl_texture_mailbox.cpp new file mode 100644 index 000000000..11a8b6eb9 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_texture_mailbox.cpp @@ -0,0 +1,194 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/renderer_opengl/gl_state.h" +#include "video_core/renderer_opengl/gl_texture_mailbox.h" + +namespace OpenGL { + +OGLTextureMailbox::OGLTextureMailbox(bool has_debug_tool_) : has_debug_tool{has_debug_tool_} { + for (auto& frame : swap_chain) { + free_queue.push(&frame); + } +} + +OGLTextureMailbox::~OGLTextureMailbox() { + // Lock the mutex and clear out the present and free_queues and notify any people who are + // blocked to prevent deadlock on shutdown + std::scoped_lock lock(swap_chain_lock); + free_queue = {}; + present_queue.clear(); + present_cv.notify_all(); + free_cv.notify_all(); +} + +void OGLTextureMailbox::ReloadPresentFrame(Frontend::Frame* frame, u32 height, u32 width) { + frame->present.Release(); + frame->present.Create(); + GLint previous_draw_fbo{}; + glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &previous_draw_fbo); + glBindFramebuffer(GL_FRAMEBUFFER, frame->present.handle); + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, + frame->color.handle); + if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { + LOG_CRITICAL(Render_OpenGL, "Failed to recreate present FBO!"); + } + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, previous_draw_fbo); + frame->color_reloaded = false; +} + +void OGLTextureMailbox::ReloadRenderFrame(Frontend::Frame* frame, u32 width, u32 height) { + OpenGLState prev_state = OpenGLState::GetCurState(); + OpenGLState state = OpenGLState::GetCurState(); + + // Recreate the color texture attachment + frame->color.Release(); + frame->color.Create(); + state.renderbuffer = frame->color.handle; + state.Apply(); + glRenderbufferStorage(GL_RENDERBUFFER, GL_RGBA8, width, height); + + // Recreate the FBO for the render target + frame->render.Release(); + frame->render.Create(); + state.draw.read_framebuffer = frame->render.handle; + state.draw.draw_framebuffer = frame->render.handle; + state.Apply(); + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, + frame->color.handle); + if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { + LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!"); + } + prev_state.Apply(); + frame->width = width; + frame->height = height; + frame->color_reloaded = true; +} + +Frontend::Frame* OGLTextureMailbox::GetRenderFrame() { + std::unique_lock lock{swap_chain_lock}; + + // If theres no free frames, we will reuse the oldest render frame + if (free_queue.empty()) { + auto frame = present_queue.back(); + present_queue.pop_back(); + return frame; + } + + Frontend::Frame* frame = free_queue.front(); + free_queue.pop(); + return frame; +} + +void OGLTextureMailbox::ReleaseRenderFrame(Frontend::Frame* frame) { + std::unique_lock lock{swap_chain_lock}; + present_queue.push_front(frame); + present_cv.notify_one(); + + DebugNotifyNextFrame(); +} + +void OGLTextureMailbox::LoadPresentFrame() { + // Free the previous frame and add it back to the free queue + if (previous_frame) { + free_queue.push(previous_frame); + free_cv.notify_one(); + } + + // The newest entries are pushed to the front of the queue + Frontend::Frame* frame = present_queue.front(); + present_queue.pop_front(); + // Remove all old entries from the present queue and move them back to the free_queue + for (auto f : present_queue) { + free_queue.push(f); + } + present_queue.clear(); + previous_frame = frame; +} + +Frontend::Frame* OGLTextureMailbox::TryGetPresentFrame(int timeout_ms) { + DebugWaitForNextFrame(); + + std::unique_lock lock{swap_chain_lock}; + // Wait for new entries in the present_queue + present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms), + [&] { return !present_queue.empty(); }); + if (present_queue.empty()) { + // Timed out waiting for a frame to draw so return the previous frame + return previous_frame; + } + + LoadPresentFrame(); + return previous_frame; +} + +void OGLTextureMailbox::DebugNotifyNextFrame() { + if (!has_debug_tool) { + return; + } + frame_for_debug++; + std::scoped_lock lock{debug_synch_mutex}; + debug_synch_condition.notify_one(); +} + +void OGLTextureMailbox::DebugWaitForNextFrame() { + if (!has_debug_tool) { + return; + } + const int last_frame = frame_for_debug; + std::unique_lock lock{debug_synch_mutex}; + debug_synch_condition.wait(lock, [this, last_frame] { return frame_for_debug > last_frame; }); +} + +Frontend::Frame* OGLVideoDumpingMailbox::GetRenderFrame() { + std::unique_lock lock{swap_chain_lock}; + + // If theres no free frames, we will wait until one shows up + if (free_queue.empty()) { + free_cv.wait(lock, [&] { return (!free_queue.empty() || quit); }); + if (quit) { + throw OGLTextureMailboxException("VideoDumpingMailbox quitting"); + } + + if (free_queue.empty()) { + LOG_CRITICAL(Render_OpenGL, "Could not get free frame"); + return nullptr; + } + } + + Frontend::Frame* frame = free_queue.front(); + free_queue.pop(); + return frame; +} + +void OGLVideoDumpingMailbox::LoadPresentFrame() { + // Free the previous frame and add it back to the free queue + if (previous_frame) { + free_queue.push(previous_frame); + free_cv.notify_one(); + } + + Frontend::Frame* frame = present_queue.back(); + present_queue.pop_back(); + previous_frame = frame; + + // Do not remove entries from the present_queue, as video dumping would require + // that we preserve all frames +} + +Frontend::Frame* OGLVideoDumpingMailbox::TryGetPresentFrame(int timeout_ms) { + std::unique_lock lock{swap_chain_lock}; + // Wait for new entries in the present_queue + present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms), + [&] { return !present_queue.empty(); }); + if (present_queue.empty()) { + // Timed out waiting for a frame + return nullptr; + } + + LoadPresentFrame(); + return previous_frame; +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_mailbox.h b/src/video_core/renderer_opengl/gl_texture_mailbox.h new file mode 100644 index 000000000..67646a3ed --- /dev/null +++ b/src/video_core/renderer_opengl/gl_texture_mailbox.h @@ -0,0 +1,92 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include + +#include "core/frontend/emu_window.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" + +namespace Frontend { +struct Frame { + u32 width{}; ///< Width of the frame (to detect resize) + u32 height{}; ///< Height of the frame + bool color_reloaded = false; ///< Texture attachment was recreated (ie: resized) + OpenGL::OGLRenderbuffer color{}; ///< Buffer shared between the render/present FBO + OpenGL::OGLFramebuffer render{}; ///< FBO created on the render thread + OpenGL::OGLFramebuffer present{}; ///< FBO created on the present thread + GLsync render_fence{}; ///< Fence created on the render thread + GLsync present_fence{}; ///< Fence created on the presentation thread +}; +} // namespace Frontend + +namespace OpenGL { + +// If the size of this is too small, it ends up creating a soft cap on FPS as the renderer will have +// to wait on available presentation frames. There doesn't seem to be much of a downside to a larger +// number but 9 swap textures at 60FPS presentation allows for 800% speed so thats probably fine +#ifdef ANDROID +// Reduce the size of swap_chain, since the UI only allows upto 200% speed. +constexpr std::size_t SWAP_CHAIN_SIZE = 6; +#else +constexpr std::size_t SWAP_CHAIN_SIZE = 9; +#endif + +class OGLTextureMailbox : public Frontend::TextureMailbox { +public: + explicit OGLTextureMailbox(bool has_debug_tool = false); + ~OGLTextureMailbox() override; + + void ReloadPresentFrame(Frontend::Frame* frame, u32 height, u32 width) override; + void ReloadRenderFrame(Frontend::Frame* frame, u32 width, u32 height) override; + void ReleaseRenderFrame(Frontend::Frame* frame) override; + + Frontend::Frame* GetRenderFrame() override; + Frontend::Frame* TryGetPresentFrame(int timeout_ms) override; + + /// This is virtual as it is to be overriden in OGLVideoDumpingMailbox below. + virtual void LoadPresentFrame(); + +private: + /// Signal that a new frame is available (called from GPU thread) + void DebugNotifyNextFrame(); + + /// Wait for a new frame to be available (called from presentation thread) + void DebugWaitForNextFrame(); + +public: + std::mutex swap_chain_lock; + std::condition_variable free_cv; + std::condition_variable present_cv; + std::array swap_chain{}; + std::queue free_queue{}; + std::deque present_queue{}; + Frontend::Frame* previous_frame = nullptr; + std::mutex debug_synch_mutex; + std::condition_variable debug_synch_condition; + std::atomic_int frame_for_debug{}; + const bool has_debug_tool; ///< When true, using a GPU debugger, so keep frames in lock-step +}; + +class OGLTextureMailboxException : public std::runtime_error { +public: + using std::runtime_error::runtime_error; +}; + +/// This mailbox is different in that it will never discard rendered frames +class OGLVideoDumpingMailbox : public OGLTextureMailbox { +public: + void LoadPresentFrame() override; + Frontend::Frame* GetRenderFrame() override; + Frontend::Frame* TryGetPresentFrame(int timeout_ms) override; + +public: + bool quit = false; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_runtime.cpp b/src/video_core/renderer_opengl/gl_texture_runtime.cpp index 2786b9d67..586236e73 100644 --- a/src/video_core/renderer_opengl/gl_texture_runtime.cpp +++ b/src/video_core/renderer_opengl/gl_texture_runtime.cpp @@ -5,10 +5,10 @@ #include "common/scope_exit.h" #include "common/settings.h" #include "video_core/custom_textures/material.h" -#include "video_core/regs.h" #include "video_core/renderer_base.h" #include "video_core/renderer_opengl/gl_driver.h" #include "video_core/renderer_opengl/gl_state.h" +#include "video_core/renderer_opengl/gl_texture_mailbox.h" #include "video_core/renderer_opengl/gl_texture_runtime.h" #include "video_core/renderer_opengl/pica_to_gl.h" @@ -22,6 +22,8 @@ using VideoCore::SurfaceFlagBits; using VideoCore::SurfaceType; using VideoCore::TextureType; +constexpr GLenum TEMP_UNIT = GL_TEXTURE15; + constexpr FormatTuple DEFAULT_TUPLE = {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}; static constexpr std::array DEPTH_TUPLES = {{ @@ -58,13 +60,6 @@ static constexpr std::array CUSTOM_TUPLES = {{ {GL_COMPRESSED_RGBA_ASTC_8x6, GL_COMPRESSED_RGBA_ASTC_8x6, GL_UNSIGNED_BYTE}, }}; -struct FramebufferInfo { - GLuint color; - GLuint depth; - u32 color_level; - u32 depth_level; -}; - [[nodiscard]] GLbitfield MakeBufferMask(SurfaceType type) { switch (type) { case SurfaceType::Color: @@ -128,9 +123,8 @@ TextureRuntime::TextureRuntime(const Driver& driver_, VideoCore::RendererBase& r TextureRuntime::~TextureRuntime() = default; -void TextureRuntime::Reset() { - alloc_cache.clear(); - framebuffer_cache.clear(); +u32 TextureRuntime::RemoveThreshold() { + return SWAP_CHAIN_SIZE; } bool TextureRuntime::NeedsConversion(VideoCore::PixelFormat pixel_format) const { @@ -151,6 +145,10 @@ VideoCore::StagingData TextureRuntime::FindStaging(u32 size, bool upload) { } const FormatTuple& TextureRuntime::GetFormatTuple(PixelFormat pixel_format) const { + if (pixel_format == PixelFormat::Invalid) { + return DEFAULT_TUPLE; + } + const auto type = GetFormatType(pixel_format); const std::size_t format_index = static_cast(pixel_format); @@ -171,74 +169,6 @@ const FormatTuple& TextureRuntime::GetFormatTuple(VideoCore::CustomPixelFormat p return CUSTOM_TUPLES[format_index]; } -void TextureRuntime::Recycle(const HostTextureTag tag, Allocation&& alloc) { - alloc_cache.emplace(tag, std::move(alloc)); -} - -Allocation TextureRuntime::Allocate(const VideoCore::SurfaceParams& params, - const VideoCore::Material* material) { - const GLenum target = params.texture_type == VideoCore::TextureType::CubeMap - ? GL_TEXTURE_CUBE_MAP - : GL_TEXTURE_2D; - const bool is_custom = material != nullptr; - const bool has_normal = material && material->Map(MapType::Normal); - const auto& tuple = - is_custom ? GetFormatTuple(params.custom_format) : GetFormatTuple(params.pixel_format); - const HostTextureTag key = { - .width = params.width, - .height = params.height, - .levels = params.levels, - .res_scale = params.res_scale, - .tuple = tuple, - .type = params.texture_type, - .is_custom = is_custom, - .has_normal = has_normal, - }; - - if (auto it = alloc_cache.find(key); it != alloc_cache.end()) { - auto alloc{std::move(it->second)}; - alloc_cache.erase(it); - return alloc; - } - - const GLuint old_tex = OpenGLState::GetCurState().texture_units[0].texture_2d; - glActiveTexture(GL_TEXTURE0); - - std::array textures{}; - std::array handles{}; - - textures[0] = MakeHandle(target, params.width, params.height, params.levels, tuple, - params.DebugName(false)); - handles.fill(textures[0].handle); - - if (params.res_scale != 1) { - const u32 scaled_width = is_custom ? params.width : params.GetScaledWidth(); - const u32 scaled_height = is_custom ? params.height : params.GetScaledHeight(); - const auto& scaled_tuple = is_custom ? GetFormatTuple(PixelFormat::RGBA8) : tuple; - textures[1] = MakeHandle(target, scaled_width, scaled_height, params.levels, scaled_tuple, - params.DebugName(true, is_custom)); - handles[1] = textures[1].handle; - } - if (has_normal) { - textures[2] = MakeHandle(target, params.width, params.height, params.levels, tuple, - params.DebugName(true, is_custom)); - handles[2] = textures[2].handle; - } - - glBindTexture(GL_TEXTURE_2D, old_tex); - - return Allocation{ - .textures = std::move(textures), - .handles = std::move(handles), - .tuple = tuple, - .width = params.width, - .height = params.height, - .levels = params.levels, - .res_scale = params.res_scale, - .is_custom = is_custom, - }; -} - bool TextureRuntime::Reinterpret(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit) { const PixelFormat src_format = source.pixel_format; @@ -353,40 +283,90 @@ void TextureRuntime::GenerateMipmaps(Surface& surface) { } Surface::Surface(TextureRuntime& runtime_, const VideoCore::SurfaceParams& params) - : SurfaceBase{params}, driver{&runtime_.GetDriver()}, runtime{&runtime_} { + : SurfaceBase{params}, driver{&runtime_.GetDriver()}, runtime{&runtime_}, + tuple{runtime->GetFormatTuple(pixel_format)} { if (pixel_format == PixelFormat::Invalid) { return; } - alloc = runtime->Allocate(params); + glActiveTexture(TEMP_UNIT); + const GLenum target = + texture_type == VideoCore::TextureType::CubeMap ? GL_TEXTURE_CUBE_MAP : GL_TEXTURE_2D; + + textures[0] = MakeHandle(target, width, height, levels, tuple, DebugName(false)); + if (res_scale != 1) { + textures[1] = MakeHandle(target, GetScaledWidth(), GetScaledHeight(), levels, tuple, + DebugName(true, false)); + } } -Surface::~Surface() { - if (pixel_format == PixelFormat::Invalid || !alloc) { +Surface::Surface(TextureRuntime& runtime, const VideoCore::SurfaceBase& surface, + const VideoCore::Material* mat) + : SurfaceBase{surface}, tuple{runtime.GetFormatTuple(mat->format)} { + if (mat && !driver->IsCustomFormatSupported(mat->format)) { return; } - runtime->Recycle(MakeTag(), std::move(alloc)); + + glActiveTexture(TEMP_UNIT); + const GLenum target = + texture_type == VideoCore::TextureType::CubeMap ? GL_TEXTURE_CUBE_MAP : GL_TEXTURE_2D; + + custom_format = mat->format; + material = mat; + + textures[0] = MakeHandle(target, mat->width, mat->height, levels, tuple, DebugName(false)); + if (res_scale != 1) { + textures[1] = MakeHandle(target, mat->width, mat->height, levels, DEFAULT_TUPLE, + DebugName(true, true)); + } + const bool has_normal = mat->Map(MapType::Normal); + if (has_normal) { + textures[2] = + MakeHandle(target, mat->width, mat->height, levels, tuple, DebugName(true, true)); + } +} + +Surface::~Surface() = default; + +GLuint Surface::Handle(u32 index) const noexcept { + if (!textures[index].handle) { + return textures[0].handle; + } + return textures[index].handle; +} + +GLuint Surface::CopyHandle() noexcept { + if (!copy_texture.handle) { + copy_texture = MakeHandle(GL_TEXTURE_2D, GetScaledWidth(), GetScaledHeight(), levels, tuple, + DebugName(true)); + } + + for (u32 level = 0; level < levels; level++) { + const u32 width = GetScaledWidth() >> level; + const u32 height = GetScaledHeight() >> level; + glCopyImageSubData(Handle(1), GL_TEXTURE_2D, level, 0, 0, 0, copy_texture.handle, + GL_TEXTURE_2D, level, 0, 0, 0, width, height, 1); + } + + return copy_texture.handle; } void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const VideoCore::StagingData& staging) { ASSERT(stride * GetFormatBytesPerPixel(pixel_format) % 4 == 0); - const GLuint old_tex = OpenGLState::GetCurState().texture_units[0].texture_2d; const u32 unscaled_width = upload.texture_rect.GetWidth(); const u32 unscaled_height = upload.texture_rect.GetHeight(); glPixelStorei(GL_UNPACK_ROW_LENGTH, unscaled_width); - glActiveTexture(GL_TEXTURE0); + glActiveTexture(TEMP_UNIT); glBindTexture(GL_TEXTURE_2D, Handle(0)); - const auto& tuple = alloc.tuple; glTexSubImage2D(GL_TEXTURE_2D, upload.texture_level, upload.texture_rect.left, upload.texture_rect.bottom, unscaled_width, unscaled_height, tuple.format, tuple.type, staging.mapped.data()); glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); - glBindTexture(GL_TEXTURE_2D, old_tex); const VideoCore::TextureBlit blit = { .src_level = upload.texture_level, @@ -400,14 +380,12 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, } void Surface::UploadCustom(const VideoCore::Material* material, u32 level) { - const GLuint old_tex = OpenGLState::GetCurState().texture_units[0].texture_2d; - const auto& tuple = alloc.tuple; const u32 width = material->width; const u32 height = material->height; const auto color = material->textures[0]; const Common::Rectangle filter_rect{0U, height, width, 0U}; - glActiveTexture(GL_TEXTURE0); + glActiveTexture(TEMP_UNIT); glPixelStorei(GL_UNPACK_ROW_LENGTH, width); const auto upload = [&](u32 index, VideoCore::CustomTexture* texture) { @@ -440,7 +418,6 @@ void Surface::UploadCustom(const VideoCore::Material* material, u32 level) { } glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); - glBindTexture(GL_TEXTURE_2D, old_tex); } void Surface::Download(const VideoCore::BufferTextureCopy& download, @@ -491,6 +468,7 @@ bool Surface::DownloadWithoutFbo(const VideoCore::BufferTextureCopy& download, const auto& tuple = runtime->GetFormatTuple(pixel_format); const u32 unscaled_width = download.texture_rect.GetWidth(); + glActiveTexture(TEMP_UNIT); glPixelStorei(GL_PACK_ROW_LENGTH, unscaled_width); SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); }); @@ -541,27 +519,24 @@ void Surface::Attach(GLenum target, u32 level, u32 layer, bool scaled) { } } -bool Surface::Swap(const VideoCore::Material* mat) { - const VideoCore::CustomPixelFormat format{mat->format}; - if (!driver->IsCustomFormatSupported(format)) { - return false; +void Surface::ScaleUp(u32 new_scale) { + if (res_scale == new_scale || new_scale == 1) { + return; } - runtime->Recycle(MakeTag(), std::move(alloc)); - SurfaceParams params = *this; - params.width = mat->width; - params.height = mat->height; - params.custom_format = mat->format; - alloc = runtime->Allocate(params, mat); + res_scale = new_scale; + textures[1] = MakeHandle(GL_TEXTURE_2D, GetScaledWidth(), GetScaledHeight(), levels, tuple, + DebugName(true)); - LOG_DEBUG(Render_OpenGL, "Swapped {}x{} {} surface at address {:#x} to {}x{} {}", - GetScaledWidth(), GetScaledHeight(), VideoCore::PixelFormatAsString(pixel_format), - addr, width, height, VideoCore::CustomPixelFormatAsString(format)); - - custom_format = format; - material = mat; - - return true; + VideoCore::TextureBlit blit = { + .src_rect = GetRect(), + .dst_rect = GetScaledRect(), + }; + for (u32 level = 0; level < levels; level++) { + blit.src_level = level; + blit.dst_level = level; + BlitScale(blit, true); + } } u32 Surface::GetInternalBytesPerPixel() const { @@ -591,27 +566,11 @@ void Surface::BlitScale(const VideoCore::TextureBlit& blit, bool up_scale) { blit.dst_rect.right, blit.dst_rect.top, buffer_mask, filter); } -HostTextureTag Surface::MakeTag() const noexcept { - return HostTextureTag{ - .width = alloc.width, - .height = alloc.height, - .levels = alloc.levels, - .res_scale = alloc.res_scale, - .tuple = alloc.tuple, - .type = texture_type, - .is_custom = alloc.is_custom, - .has_normal = HasNormalMap(), - }; -} +Framebuffer::Framebuffer(TextureRuntime& runtime, const VideoCore::FramebufferParams& params, + const Surface* color, const Surface* depth) + : VideoCore::FramebufferParams{params}, res_scale{color ? color->res_scale + : (depth ? depth->res_scale : 1u)} { -Framebuffer::Framebuffer(TextureRuntime& runtime, const Surface* color, u32 color_level, - const Surface* depth_stencil, u32 depth_level, const Pica::Regs& regs, - Common::Rectangle surfaces_rect) - : VideoCore::FramebufferBase{regs, color, color_level, - depth_stencil, depth_level, surfaces_rect} { - - const bool shadow_rendering = regs.framebuffer.IsShadowRendering(); - const bool has_stencil = regs.framebuffer.HasStencil(); if (shadow_rendering && !color) { return; } @@ -619,33 +578,15 @@ Framebuffer::Framebuffer(TextureRuntime& runtime, const Surface* color, u32 colo if (color) { attachments[0] = color->Handle(); } - if (depth_stencil) { - attachments[1] = depth_stencil->Handle(); + if (depth) { + attachments[1] = depth->Handle(); } - const FramebufferInfo info = { - .color = attachments[0], - .depth = attachments[1], - .color_level = color_level, - .depth_level = depth_level, - }; - - const u64 hash = Common::ComputeHash64(&info, sizeof(FramebufferInfo)); - auto [it, new_framebuffer] = runtime.framebuffer_cache.try_emplace(hash); - - if (!new_framebuffer) { - handle = it->second.handle; - return; - } - - const GLuint old_fbo = OpenGLState::GetCurState().draw.draw_framebuffer; - - OGLFramebuffer& framebuffer = it->second; framebuffer.Create(); - handle = it->second.handle; - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer.handle); - SCOPE_EXIT({ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_fbo); }); + OpenGLState state = OpenGLState::GetCurState(); + state.draw.draw_framebuffer = framebuffer.handle; + state.Apply(); if (shadow_rendering) { glFramebufferParameteri(GL_DRAW_FRAMEBUFFER, GL_FRAMEBUFFER_DEFAULT_WIDTH, @@ -658,13 +599,13 @@ Framebuffer::Framebuffer(TextureRuntime& runtime, const Surface* color, u32 colo } else { glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, color ? color->Handle() : 0, color_level); - if (depth_stencil) { - if (has_stencil) { + if (depth) { + if (depth->pixel_format == PixelFormat::D24S8) { glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, - GL_TEXTURE_2D, depth_stencil->Handle(), depth_level); + GL_TEXTURE_2D, depth->Handle(), depth_level); } else { glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, - depth_stencil->Handle(), depth_level); + depth->Handle(), depth_level); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); } diff --git a/src/video_core/renderer_opengl/gl_texture_runtime.h b/src/video_core/renderer_opengl/gl_texture_runtime.h index 654e321eb..02914624b 100644 --- a/src/video_core/renderer_opengl/gl_texture_runtime.h +++ b/src/video_core/renderer_opengl/gl_texture_runtime.h @@ -27,46 +27,6 @@ struct FormatTuple { } }; -struct HostTextureTag { - u32 width; - u32 height; - u32 levels; - u32 res_scale; - FormatTuple tuple; - VideoCore::TextureType type; - bool is_custom; - bool has_normal; - - bool operator==(const HostTextureTag& other) const noexcept { - return std::tie(tuple, type, width, height, levels, res_scale, is_custom, has_normal) == - std::tie(other.tuple, other.type, other.width, other.height, other.levels, - other.res_scale, other.is_custom, other.has_normal); - } - - struct Hash { - const u64 operator()(const HostTextureTag& tag) const { - return Common::ComputeHash64(&tag, sizeof(HostTextureTag)); - } - }; -}; -static_assert(std::has_unique_object_representations_v, - "HostTextureTag is not suitable for hashing!"); - -struct Allocation { - std::array textures; - std::array handles; - FormatTuple tuple; - u32 width; - u32 height; - u32 levels; - u32 res_scale; - bool is_custom; - - operator bool() const noexcept { - return textures[0].handle; - } -}; - class Surface; class Driver; @@ -82,8 +42,8 @@ public: explicit TextureRuntime(const Driver& driver, VideoCore::RendererBase& renderer); ~TextureRuntime(); - /// Clears all cached runtime resources - void Reset(); + /// Returns the removal threshold ticks for the garbage collector + u32 RemoveThreshold(); /// Returns true if the provided pixel format cannot be used natively by the runtime. bool NeedsConversion(VideoCore::PixelFormat pixel_format) const; @@ -111,13 +71,6 @@ public: void GenerateMipmaps(Surface& surface); private: - /// Takes back ownership of the allocation for recycling - void Recycle(const HostTextureTag tag, Allocation&& alloc); - - /// Allocates a texture with the specified dimentions and format - Allocation Allocate(const VideoCore::SurfaceParams& params, - const VideoCore::Material* material = nullptr); - /// Returns the OpenGL driver class const Driver& GetDriver() const { return driver; @@ -127,8 +80,6 @@ private: const Driver& driver; BlitHelper blit_helper; std::vector staging_buffer; - std::unordered_multimap alloc_cache; - std::unordered_map> framebuffer_cache; std::array draw_fbos; std::array read_fbos; }; @@ -136,6 +87,8 @@ private: class Surface : public VideoCore::SurfaceBase { public: explicit Surface(TextureRuntime& runtime, const VideoCore::SurfaceParams& params); + explicit Surface(TextureRuntime& runtime, const VideoCore::SurfaceBase& surface, + const VideoCore::Material* material); ~Surface(); Surface(const Surface&) = delete; @@ -144,13 +97,15 @@ public: Surface(Surface&& o) noexcept = default; Surface& operator=(Surface&& o) noexcept = default; - [[nodiscard]] GLuint Handle(u32 index = 1) const noexcept { - return alloc.handles[index]; + [[nodiscard]] const FormatTuple& Tuple() const noexcept { + return tuple; } - [[nodiscard]] const FormatTuple& Tuple() const noexcept { - return alloc.tuple; - } + /// Returns the texture handle at index, otherwise the first one if not valid. + GLuint Handle(u32 index = 1) const noexcept; + + /// Returns a copy of the upscaled texture handle, used for feedback loops. + GLuint CopyHandle() noexcept; /// Uploads pixel data in staging to a rectangle region of the surface texture void Upload(const VideoCore::BufferTextureCopy& upload, const VideoCore::StagingData& staging); @@ -165,8 +120,8 @@ public: /// Attaches a handle of surface to the specified framebuffer target void Attach(GLenum target, u32 level, u32 layer, bool scaled = true); - /// Swaps the internal allocation to match the provided material - bool Swap(const VideoCore::Material* material); + /// Scales up the surface to match the new resolution scale. + void ScaleUp(u32 new_scale); /// Returns the bpp of the internal surface format u32 GetInternalBytesPerPixel() const; @@ -179,24 +134,32 @@ private: bool DownloadWithoutFbo(const VideoCore::BufferTextureCopy& download, const VideoCore::StagingData& staging); - /// Returns the texture tag of the current allocation - HostTextureTag MakeTag() const noexcept; - private: const Driver* driver; TextureRuntime* runtime; - Allocation alloc{}; + std::array textures; + OGLTexture copy_texture; + FormatTuple tuple; }; -class Framebuffer : public VideoCore::FramebufferBase { +class Framebuffer : public VideoCore::FramebufferParams { public: - explicit Framebuffer(TextureRuntime& runtime, const Surface* color, u32 color_level, - const Surface* depth_stencil, u32 depth_level, const Pica::Regs& regs, - Common::Rectangle surfaces_rect); + explicit Framebuffer(TextureRuntime& runtime, const VideoCore::FramebufferParams& params, + const Surface* color, const Surface* depth_stencil); ~Framebuffer(); + Framebuffer(const Framebuffer&) = delete; + Framebuffer& operator=(const Framebuffer&) = delete; + + Framebuffer(Framebuffer&& o) noexcept = default; + Framebuffer& operator=(Framebuffer&& o) noexcept = default; + + [[nodiscard]] u32 Scale() const noexcept { + return res_scale; + } + [[nodiscard]] GLuint Handle() const noexcept { - return handle; + return framebuffer.handle; } [[nodiscard]] GLuint Attachment(VideoCore::SurfaceType type) const noexcept { @@ -208,8 +171,9 @@ public: } private: + u32 res_scale{1}; std::array attachments{}; - GLuint handle{}; + OGLFramebuffer framebuffer; }; class Sampler { diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 9d48e23ca..26f3808ac 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -2,20 +2,18 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include #include "common/logging/log.h" #include "common/microprofile.h" #include "common/settings.h" #include "core/core.h" -#include "core/dumping/backend.h" #include "core/frontend/emu_window.h" #include "core/frontend/framebuffer_layout.h" #include "core/hw/hw.h" #include "core/hw/lcd.h" #include "core/memory.h" -#include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_state.h" +#include "video_core/renderer_opengl/gl_texture_mailbox.h" #include "video_core/renderer_opengl/gl_vars.h" #include "video_core/renderer_opengl/post_processing_opengl.h" #include "video_core/renderer_opengl/renderer_opengl.h" @@ -31,232 +29,6 @@ namespace OpenGL { MICROPROFILE_DEFINE(OpenGL_RenderFrame, "OpenGL", "Render Frame", MP_RGB(128, 128, 64)); MICROPROFILE_DEFINE(OpenGL_WaitPresent, "OpenGL", "Wait For Present", MP_RGB(128, 128, 128)); -// If the size of this is too small, it ends up creating a soft cap on FPS as the renderer will have -// to wait on available presentation frames. There doesn't seem to be much of a downside to a larger -// number but 9 swap textures at 60FPS presentation allows for 800% speed so thats probably fine -#ifdef ANDROID -// Reduce the size of swap_chain, since the UI only allows upto 200% speed. -constexpr std::size_t SWAP_CHAIN_SIZE = 6; -#else -constexpr std::size_t SWAP_CHAIN_SIZE = 9; -#endif - -class OGLTextureMailboxException : public std::runtime_error { -public: - using std::runtime_error::runtime_error; -}; - -class OGLTextureMailbox : public Frontend::TextureMailbox { -public: - std::mutex swap_chain_lock; - std::condition_variable free_cv; - std::condition_variable present_cv; - std::array swap_chain{}; - std::queue free_queue{}; - std::deque present_queue{}; - Frontend::Frame* previous_frame = nullptr; - - OGLTextureMailbox(bool has_debug_tool_ = false) : has_debug_tool{has_debug_tool_} { - for (auto& frame : swap_chain) { - free_queue.push(&frame); - } - } - - ~OGLTextureMailbox() override { - // lock the mutex and clear out the present and free_queues and notify any people who are - // blocked to prevent deadlock on shutdown - std::scoped_lock lock(swap_chain_lock); - std::queue().swap(free_queue); - present_queue.clear(); - present_cv.notify_all(); - free_cv.notify_all(); - } - - void ReloadPresentFrame(Frontend::Frame* frame, u32 height, u32 width) override { - frame->present.Release(); - frame->present.Create(); - GLint previous_draw_fbo{}; - glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &previous_draw_fbo); - glBindFramebuffer(GL_FRAMEBUFFER, frame->present.handle); - glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, - frame->color.handle); - if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { - LOG_CRITICAL(Render_OpenGL, "Failed to recreate present FBO!"); - } - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, previous_draw_fbo); - frame->color_reloaded = false; - } - - void ReloadRenderFrame(Frontend::Frame* frame, u32 width, u32 height) override { - OpenGLState prev_state = OpenGLState::GetCurState(); - OpenGLState state = OpenGLState::GetCurState(); - - // Recreate the color texture attachment - frame->color.Release(); - frame->color.Create(); - state.renderbuffer = frame->color.handle; - state.Apply(); - glRenderbufferStorage(GL_RENDERBUFFER, GL_RGBA8, width, height); - - // Recreate the FBO for the render target - frame->render.Release(); - frame->render.Create(); - state.draw.read_framebuffer = frame->render.handle; - state.draw.draw_framebuffer = frame->render.handle; - state.Apply(); - glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, - frame->color.handle); - if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { - LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!"); - } - prev_state.Apply(); - frame->width = width; - frame->height = height; - frame->color_reloaded = true; - } - - Frontend::Frame* GetRenderFrame() override { - std::unique_lock lock(swap_chain_lock); - - // If theres no free frames, we will reuse the oldest render frame - if (free_queue.empty()) { - auto frame = present_queue.back(); - present_queue.pop_back(); - return frame; - } - - Frontend::Frame* frame = free_queue.front(); - free_queue.pop(); - return frame; - } - - void ReleaseRenderFrame(Frontend::Frame* frame) override { - std::unique_lock lock(swap_chain_lock); - present_queue.push_front(frame); - present_cv.notify_one(); - - DebugNotifyNextFrame(); - } - - // This is virtual as it is to be overriden in OGLVideoDumpingMailbox below. - virtual void LoadPresentFrame() { - // free the previous frame and add it back to the free queue - if (previous_frame) { - free_queue.push(previous_frame); - free_cv.notify_one(); - } - - // the newest entries are pushed to the front of the queue - Frontend::Frame* frame = present_queue.front(); - present_queue.pop_front(); - // remove all old entries from the present queue and move them back to the free_queue - for (auto f : present_queue) { - free_queue.push(f); - } - present_queue.clear(); - previous_frame = frame; - } - - Frontend::Frame* TryGetPresentFrame(int timeout_ms) override { - DebugWaitForNextFrame(); - - std::unique_lock lock(swap_chain_lock); - // wait for new entries in the present_queue - present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms), - [&] { return !present_queue.empty(); }); - if (present_queue.empty()) { - // timed out waiting for a frame to draw so return the previous frame - return previous_frame; - } - - LoadPresentFrame(); - return previous_frame; - } - -private: - std::mutex debug_synch_mutex; - std::condition_variable debug_synch_condition; - std::atomic_int frame_for_debug{}; - const bool has_debug_tool; // When true, using a GPU debugger, so keep frames in lock-step - - /// Signal that a new frame is available (called from GPU thread) - void DebugNotifyNextFrame() { - if (!has_debug_tool) { - return; - } - frame_for_debug++; - std::lock_guard lock{debug_synch_mutex}; - debug_synch_condition.notify_one(); - } - - /// Wait for a new frame to be available (called from presentation thread) - void DebugWaitForNextFrame() { - if (!has_debug_tool) { - return; - } - const int last_frame = frame_for_debug; - std::unique_lock lock{debug_synch_mutex}; - debug_synch_condition.wait(lock, - [this, last_frame] { return frame_for_debug > last_frame; }); - } -}; - -/// This mailbox is different in that it will never discard rendered frames -class OGLVideoDumpingMailbox : public OGLTextureMailbox { -public: - bool quit = false; - - Frontend::Frame* GetRenderFrame() override { - std::unique_lock lock(swap_chain_lock); - - // If theres no free frames, we will wait until one shows up - if (free_queue.empty()) { - free_cv.wait(lock, [&] { return (!free_queue.empty() || quit); }); - if (quit) { - throw OGLTextureMailboxException("VideoDumpingMailbox quitting"); - } - - if (free_queue.empty()) { - LOG_CRITICAL(Render_OpenGL, "Could not get free frame"); - return nullptr; - } - } - - Frontend::Frame* frame = free_queue.front(); - free_queue.pop(); - return frame; - } - - void LoadPresentFrame() override { - // free the previous frame and add it back to the free queue - if (previous_frame) { - free_queue.push(previous_frame); - free_cv.notify_one(); - } - - Frontend::Frame* frame = present_queue.back(); - present_queue.pop_back(); - previous_frame = frame; - - // Do not remove entries from the present_queue, as video dumping would require - // that we preserve all frames - } - - Frontend::Frame* TryGetPresentFrame(int timeout_ms) override { - std::unique_lock lock(swap_chain_lock); - // wait for new entries in the present_queue - present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms), - [&] { return !present_queue.empty(); }); - if (present_queue.empty()) { - // timed out waiting for a frame - return nullptr; - } - - LoadPresentFrame(); - return previous_frame; - } -}; - /** * Vertex structure that the drawn screen rectangles are composed of. */ @@ -559,8 +331,15 @@ void RendererOpenGL::InitOpenGLObjects() { glClearColor(Settings::values.bg_red.GetValue(), Settings::values.bg_green.GetValue(), Settings::values.bg_blue.GetValue(), 0.0f); - filter_sampler.Create(); - ReloadSampler(); + for (size_t i = 0; i < samplers.size(); i++) { + samplers[i].Create(); + glSamplerParameteri(samplers[i].handle, GL_TEXTURE_MIN_FILTER, + i == 0 ? GL_NEAREST : GL_LINEAR); + glSamplerParameteri(samplers[i].handle, GL_TEXTURE_MAG_FILTER, + i == 0 ? GL_NEAREST : GL_LINEAR); + glSamplerParameteri(samplers[i].handle, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glSamplerParameteri(samplers[i].handle, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + } ReloadShader(); @@ -608,15 +387,6 @@ void RendererOpenGL::InitOpenGLObjects() { state.Apply(); } -void RendererOpenGL::ReloadSampler() { - glSamplerParameteri(filter_sampler.handle, GL_TEXTURE_MIN_FILTER, - Settings::values.filter_mode ? GL_LINEAR : GL_NEAREST); - glSamplerParameteri(filter_sampler.handle, GL_TEXTURE_MAG_FILTER, - Settings::values.filter_mode ? GL_LINEAR : GL_NEAREST); - glSamplerParameteri(filter_sampler.handle, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glSamplerParameteri(filter_sampler.handle, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); -} - void RendererOpenGL::ReloadShader() { // Link shaders and get variable locations std::string shader_data; @@ -793,13 +563,14 @@ void RendererOpenGL::DrawSingleScreen(const ScreenInfo& screen_info, float x, fl } const u32 scale_factor = GetResolutionScaleFactor(); + const GLuint sampler = samplers[Settings::values.filter_mode.GetValue()].handle; glUniform4f(uniform_i_resolution, static_cast(screen_info.texture.width * scale_factor), static_cast(screen_info.texture.height * scale_factor), 1.0f / static_cast(screen_info.texture.width * scale_factor), 1.0f / static_cast(screen_info.texture.height * scale_factor)); glUniform4f(uniform_o_resolution, h, w, 1.0f / h, 1.0f / w); state.texture_units[0].texture_2d = screen_info.display_texture; - state.texture_units[0].sampler = filter_sampler.handle; + state.texture_units[0].sampler = sampler; state.Apply(); glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices.data()); @@ -862,6 +633,7 @@ void RendererOpenGL::DrawSingleScreenStereo(const ScreenInfo& screen_info_l, } const u32 scale_factor = GetResolutionScaleFactor(); + const GLuint sampler = samplers[Settings::values.filter_mode.GetValue()].handle; glUniform4f(uniform_i_resolution, static_cast(screen_info_l.texture.width * scale_factor), static_cast(screen_info_l.texture.height * scale_factor), @@ -870,8 +642,8 @@ void RendererOpenGL::DrawSingleScreenStereo(const ScreenInfo& screen_info_l, glUniform4f(uniform_o_resolution, h, w, 1.0f / h, 1.0f / w); state.texture_units[0].texture_2d = screen_info_l.display_texture; state.texture_units[1].texture_2d = screen_info_r.display_texture; - state.texture_units[0].sampler = filter_sampler.handle; - state.texture_units[1].sampler = filter_sampler.handle; + state.texture_units[0].sampler = sampler; + state.texture_units[1].sampler = sampler; state.Apply(); glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices.data()); @@ -894,11 +666,6 @@ void RendererOpenGL::DrawScreens(const Layout::FramebufferLayout& layout, bool f Settings::values.bg_blue.GetValue(), 0.0f); } - if (settings.sampler_update_requested.exchange(false)) { - // Set the new filtering mode for the sampler - ReloadSampler(); - } - if (settings.shader_update_requested.exchange(false)) { // Update fragment shader before drawing shader.Release(); @@ -1119,7 +886,7 @@ void RendererOpenGL::TryPresent(int timeout_ms, bool is_secondary) { void RendererOpenGL::PrepareVideoDumping() { auto* mailbox = static_cast(frame_dumper.mailbox.get()); { - std::unique_lock lock(mailbox->swap_chain_lock); + std::scoped_lock lock{mailbox->swap_chain_lock}; mailbox->quit = false; } frame_dumper.StartDumping(); @@ -1129,7 +896,7 @@ void RendererOpenGL::CleanupVideoDumping() { frame_dumper.StopDumping(); auto* mailbox = static_cast(frame_dumper.mailbox.get()); { - std::unique_lock lock(mailbox->swap_chain_lock); + std::scoped_lock lock{mailbox->swap_chain_lock}; mailbox->quit = true; } mailbox->free_cv.notify_one(); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 088a817f2..be7c39e8b 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -21,20 +21,6 @@ namespace Core { class System; } -namespace Frontend { - -struct Frame { - u32 width{}; /// Width of the frame (to detect resize) - u32 height{}; /// Height of the frame - bool color_reloaded = false; /// Texture attachment was recreated (ie: resized) - OpenGL::OGLRenderbuffer color{}; /// Buffer shared between the render/present FBO - OpenGL::OGLFramebuffer render{}; /// FBO created on the render thread - OpenGL::OGLFramebuffer present{}; /// FBO created on the present thread - GLsync render_fence{}; /// Fence created on the render thread - GLsync present_fence{}; /// Fence created on the presentation thread -}; -} // namespace Frontend - namespace OpenGL { /// Structure used for storing information about the textures for each 3DS screen @@ -72,7 +58,6 @@ public: private: void InitOpenGLObjects(); - void ReloadSampler(); void ReloadShader(); void PrepareRendertarget(); void RenderScreenshot(); @@ -109,9 +94,9 @@ private: OGLBuffer vertex_buffer; OGLProgram shader; OGLFramebuffer screenshot_framebuffer; - OGLSampler filter_sampler; + std::array samplers; - /// Display information for top and bottom screens respectively + // Display information for top and bottom screens respectively std::array screen_infos; // Shader uniform location indices