From e783b0d4a97a67eb3380c64f3fc6d65d41a51872 Mon Sep 17 00:00:00 2001 From: GPUCode <47210458+GPUCode@users.noreply.github.com> Date: Tue, 18 Jul 2023 17:31:31 +0300 Subject: [PATCH] rasterizer_cache: Fixes to (unaligned) texture downloads (#6697) * rasterizer_cache: Header cleanup * gl_texture_runtime: Fix incorrect stride in single scanline downloads * texture_codec: Fix unaligned texture downloads --- src/video_core/CMakeLists.txt | 2 + .../rasterizer_cache/rasterizer_cache.h | 3 +- .../rasterizer_cache/rasterizer_cache_base.h | 9 +--- src/video_core/rasterizer_cache/slot_id.h | 21 ++++++++ .../rasterizer_cache/texture_codec.h | 14 ++--- .../rasterizer_cache/texture_cube.h | 52 +++++++++++++++++++ src/video_core/rasterizer_cache/utils.h | 46 ---------------- .../renderer_opengl/gl_texture_runtime.cpp | 32 ++++++------ .../renderer_opengl/gl_texture_runtime.h | 1 + 9 files changed, 105 insertions(+), 75 deletions(-) create mode 100644 src/video_core/rasterizer_cache/slot_id.h create mode 100644 src/video_core/rasterizer_cache/texture_cube.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 581bbba46..b2cc2884f 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -42,11 +42,13 @@ add_library(video_core STATIC rasterizer_cache/rasterizer_cache.h rasterizer_cache/rasterizer_cache_base.h rasterizer_cache/sampler_params.h + rasterizer_cache/slot_id.h rasterizer_cache/surface_base.cpp rasterizer_cache/surface_base.h rasterizer_cache/surface_params.cpp rasterizer_cache/surface_params.h rasterizer_cache/texture_codec.h + rasterizer_cache/texture_cube.h rasterizer_cache/utils.cpp rasterizer_cache/utils.h renderer_opengl/frame_dumper_opengl.cpp diff --git a/src/video_core/rasterizer_cache/rasterizer_cache.h b/src/video_core/rasterizer_cache/rasterizer_cache.h index 31771d552..1133e90b0 100644 --- a/src/video_core/rasterizer_cache/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache/rasterizer_cache.h @@ -14,6 +14,7 @@ #include "core/memory.h" #include "video_core/custom_textures/custom_tex_manager.h" #include "video_core/rasterizer_cache/rasterizer_cache_base.h" +#include "video_core/rasterizer_cache/surface_base.h" #include "video_core/regs.h" #include "video_core/renderer_base.h" #include "video_core/texture/texture_decode.h" @@ -1212,7 +1213,7 @@ void RasterizerCache::ClearAll(bool flush) { // Remove the whole cache without really looking at it. cached_pages -= flush_interval; - dirty_regions -= SurfaceInterval(0x0, 0xFFFFFFFF); + dirty_regions.clear(); page_table.clear(); remove_surfaces.clear(); } diff --git a/src/video_core/rasterizer_cache/rasterizer_cache_base.h b/src/video_core/rasterizer_cache/rasterizer_cache_base.h index 57c7473bc..2b76f4076 100644 --- a/src/video_core/rasterizer_cache/rasterizer_cache_base.h +++ b/src/video_core/rasterizer_cache/rasterizer_cache_base.h @@ -11,7 +11,8 @@ #include #include #include "video_core/rasterizer_cache/sampler_params.h" -#include "video_core/rasterizer_cache/surface_base.h" +#include "video_core/rasterizer_cache/surface_params.h" +#include "video_core/rasterizer_cache/texture_cube.h" namespace Memory { class MemorySystem; @@ -70,12 +71,6 @@ class RasterizerCache { SurfaceId depth_id; }; - struct TextureCube { - SurfaceId surface_id; - std::array face_ids; - std::array ticks; - }; - public: explicit RasterizerCache(Memory::MemorySystem& memory, CustomTexManager& custom_tex_manager, Runtime& runtime, Pica::Regs& regs, RendererBase& renderer); diff --git a/src/video_core/rasterizer_cache/slot_id.h b/src/video_core/rasterizer_cache/slot_id.h new file mode 100644 index 000000000..b76805be9 --- /dev/null +++ b/src/video_core/rasterizer_cache/slot_id.h @@ -0,0 +1,21 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/slot_vector.h" + +#pragma once + +namespace VideoCore { + +using SurfaceId = Common::SlotId; +using SamplerId = Common::SlotId; + +/// Fake surface ID for null surfaces +constexpr SurfaceId NULL_SURFACE_ID{0}; +/// Fake surface ID for null cube surfaces +constexpr SurfaceId NULL_SURFACE_CUBE_ID{1}; +/// Fake sampler ID for null samplers +constexpr SamplerId NULL_SAMPLER_ID{0}; + +} // namespace VideoCore diff --git a/src/video_core/rasterizer_cache/texture_codec.h b/src/video_core/rasterizer_cache/texture_codec.h index a2a4c5425..01e90f461 100644 --- a/src/video_core/rasterizer_cache/texture_codec.h +++ b/src/video_core/rasterizer_cache/texture_codec.h @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #pragma once + #include #include #include @@ -264,6 +265,7 @@ static constexpr void MortonCopy(u32 width, u32 height, u32 start_offset, u32 en const u32 aligned_down_start_offset = Common::AlignDown(start_offset, tile_size); const u32 aligned_start_offset = Common::AlignUp(start_offset, tile_size); const u32 aligned_end_offset = Common::AlignDown(end_offset, tile_size); + const u32 begin_pixel_index = aligned_down_start_offset * 8 / GetFormatBpp(format); ASSERT(!morton_to_linear || (aligned_start_offset == start_offset && aligned_end_offset == end_offset)); @@ -271,12 +273,12 @@ static constexpr void MortonCopy(u32 width, u32 height, u32 start_offset, u32 en // In OpenGL the texture origin is in the bottom left corner as opposed to other // APIs that have it at the top left. To avoid flipping texture coordinates in // the shader we read/write the linear buffer from the bottom up - u32 linear_offset = ((height - 8) * width) * aligned_bytes_per_pixel; + u32 x = (begin_pixel_index % (width * 8)) / 8; + u32 y = (begin_pixel_index / (width * 8)) * 8; + u32 linear_offset = ((height - 8 - y) * width + x) * aligned_bytes_per_pixel; u32 tiled_offset = 0; - u32 x = 0; - u32 y = 0; - const auto LinearNextTile = [&] { + const auto linear_next_tile = [&] { x = (x + 8) % width; linear_offset += 8 * aligned_bytes_per_pixel; if (!x) { @@ -300,7 +302,7 @@ static constexpr void MortonCopy(u32 width, u32 height, u32 start_offset, u32 en std::min(aligned_start_offset, end_offset) - start_offset); tiled_offset += aligned_start_offset - start_offset; - LinearNextTile(); + linear_next_tile(); } // If the copy spans multiple tiles, copy the fully aligned tiles in between. @@ -313,7 +315,7 @@ static constexpr void MortonCopy(u32 width, u32 height, u32 start_offset, u32 en auto tiled_data = tiled_buffer.subspan(tiled_offset, tile_size); MortonCopyTile(width, tiled_data, linear_data); tiled_offset += tile_size; - LinearNextTile(); + linear_next_tile(); } } diff --git a/src/video_core/rasterizer_cache/texture_cube.h b/src/video_core/rasterizer_cache/texture_cube.h new file mode 100644 index 000000000..204dcb7f8 --- /dev/null +++ b/src/video_core/rasterizer_cache/texture_cube.h @@ -0,0 +1,52 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/hash.h" +#include "video_core/rasterizer_cache/slot_id.h" +#include "video_core/regs_texturing.h" + +namespace VideoCore { + +struct TextureCube { + SurfaceId surface_id; + std::array face_ids; + std::array ticks; +}; + +struct TextureCubeConfig { + PAddr px; + PAddr nx; + PAddr py; + PAddr ny; + PAddr pz; + PAddr nz; + u32 width; + u32 levels; + Pica::TexturingRegs::TextureFormat format; + + bool operator==(const TextureCubeConfig& rhs) const { + return std::memcmp(this, &rhs, sizeof(TextureCubeConfig)) == 0; + } + + bool operator!=(const TextureCubeConfig& rhs) const { + return std::memcmp(this, &rhs, sizeof(TextureCubeConfig)) != 0; + } + + const u64 Hash() const { + return Common::ComputeHash64(this, sizeof(TextureCubeConfig)); + } +}; + +} // namespace VideoCore + +namespace std { +template <> +struct hash { + std::size_t operator()(const VideoCore::TextureCubeConfig& config) const noexcept { + return config.Hash(); + } +}; +} // namespace std diff --git a/src/video_core/rasterizer_cache/utils.h b/src/video_core/rasterizer_cache/utils.h index 3f6ee1f97..aeb7d7451 100644 --- a/src/video_core/rasterizer_cache/utils.h +++ b/src/video_core/rasterizer_cache/utils.h @@ -5,24 +5,11 @@ #pragma once #include -#include "common/hash.h" #include "common/math_util.h" -#include "common/slot_vector.h" #include "common/vector_math.h" -#include "video_core/regs_texturing.h" namespace VideoCore { -using SurfaceId = Common::SlotId; -using SamplerId = Common::SlotId; - -/// Fake surface ID for null surfaces -constexpr SurfaceId NULL_SURFACE_ID{0}; -/// Fake surface ID for null cube surfaces -constexpr SurfaceId NULL_SURFACE_CUBE_ID{1}; -/// Fake sampler ID for null samplers -constexpr SamplerId NULL_SAMPLER_ID{0}; - struct Offset { u32 x = 0; u32 y = 0; @@ -79,30 +66,6 @@ struct StagingData { std::span mapped; }; -struct TextureCubeConfig { - PAddr px; - PAddr nx; - PAddr py; - PAddr ny; - PAddr pz; - PAddr nz; - u32 width; - u32 levels; - Pica::TexturingRegs::TextureFormat format; - - bool operator==(const TextureCubeConfig& rhs) const { - return std::memcmp(this, &rhs, sizeof(TextureCubeConfig)) == 0; - } - - bool operator!=(const TextureCubeConfig& rhs) const { - return std::memcmp(this, &rhs, sizeof(TextureCubeConfig)) != 0; - } - - const u64 Hash() const { - return Common::ComputeHash64(this, sizeof(TextureCubeConfig)); - } -}; - class SurfaceParams; u32 MipLevels(u32 width, u32 height, u32 max_level); @@ -134,12 +97,3 @@ void DecodeTexture(const SurfaceParams& surface_info, PAddr start_addr, PAddr en std::span source, std::span dest, bool convert = false); } // namespace VideoCore - -namespace std { -template <> -struct hash { - std::size_t operator()(const VideoCore::TextureCubeConfig& config) const noexcept { - return config.Hash(); - } -}; -} // namespace std diff --git a/src/video_core/renderer_opengl/gl_texture_runtime.cpp b/src/video_core/renderer_opengl/gl_texture_runtime.cpp index 6f09de276..2786b9d67 100644 --- a/src/video_core/renderer_opengl/gl_texture_runtime.cpp +++ b/src/video_core/renderer_opengl/gl_texture_runtime.cpp @@ -484,20 +484,19 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, bool Surface::DownloadWithoutFbo(const VideoCore::BufferTextureCopy& download, const VideoCore::StagingData& staging) { - const bool is_full_download = download.texture_rect == GetRect(); - const bool has_sub_image = driver->HasArbGetTextureSubImage(); - if (driver->IsOpenGLES() || (!is_full_download && !has_sub_image)) { + if (driver->IsOpenGLES()) { return false; } - const GLuint old_tex = OpenGLState::GetCurState().texture_units[0].texture_2d; const auto& tuple = runtime->GetFormatTuple(pixel_format); + const u32 unscaled_width = download.texture_rect.GetWidth(); - glActiveTexture(GL_TEXTURE0); - glPixelStorei(GL_PACK_ROW_LENGTH, static_cast(stride)); + glPixelStorei(GL_PACK_ROW_LENGTH, unscaled_width); SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); }); // Prefer glGetTextureSubImage in most cases since it's the fastest and most convenient option + const bool is_full_download = download.texture_rect == GetRect(); + const bool has_sub_image = driver->HasArbGetTextureSubImage(); if (has_sub_image) { const GLsizei buf_size = static_cast(staging.mapped.size()); glGetTextureSubImage(Handle(0), download.texture_level, download.texture_rect.left, @@ -505,16 +504,19 @@ bool Surface::DownloadWithoutFbo(const VideoCore::BufferTextureCopy& download, download.texture_rect.GetHeight(), 1, tuple.format, tuple.type, buf_size, staging.mapped.data()); return true; + } else if (is_full_download) { + // This should only trigger for full texture downloads in oldish intel drivers + // that only support up to 4.3 + OpenGLState state = OpenGLState::GetCurState(); + state.texture_units[0].texture_2d = Handle(0); + state.Apply(); + + glGetTexImage(GL_TEXTURE_2D, download.texture_level, tuple.format, tuple.type, + staging.mapped.data()); + + return true; } - - // This should only trigger for full texture downloads in oldish intel drivers - // that only support up to 4.3 - glBindTexture(GL_TEXTURE_2D, Handle(0)); - glGetTexImage(GL_TEXTURE_2D, download.texture_level, tuple.format, tuple.type, - staging.mapped.data()); - glBindTexture(GL_TEXTURE_2D, old_tex); - - return true; + return false; } void Surface::Attach(GLenum target, u32 level, u32 layer, bool scaled) { diff --git a/src/video_core/renderer_opengl/gl_texture_runtime.h b/src/video_core/renderer_opengl/gl_texture_runtime.h index 0551394df..654e321eb 100644 --- a/src/video_core/renderer_opengl/gl_texture_runtime.h +++ b/src/video_core/renderer_opengl/gl_texture_runtime.h @@ -6,6 +6,7 @@ #include "video_core/rasterizer_cache/framebuffer_base.h" #include "video_core/rasterizer_cache/rasterizer_cache_base.h" +#include "video_core/rasterizer_cache/surface_base.h" #include "video_core/renderer_opengl/gl_blit_helper.h" namespace VideoCore {