From 2e655f73b84ac656d6d5da564cc4be88c33722c8 Mon Sep 17 00:00:00 2001 From: GPUCode <47210458+GPUCode@users.noreply.github.com> Date: Sun, 7 May 2023 02:34:28 +0300 Subject: [PATCH] Rasterizer cache refactor v2 (#6479) * rasterizer_cache: Switch to template * Eliminates all opengl references in the rasterizer cache headers thus completing the backend abstraction * rasterizer_cache: Switch to page table * Surface storage isn't particularly interval sensitive so we can use a page table to make it faster * rasterizer_cache: Move sampler management out of rasterizer cache * rasterizer_cache: Remove shared_ptr usage * Switches to yuzu's slot vector for improved memory locality. * rasterizer_cache: Rework reinterpretation lookup * citra_qt: Per game texture filter * rasterizer_cache: Log additional settings * gl_texture_runtime: Resolve shadow map comment * rasterizer_cache: Don't use float for viewport * gl_texture_runtime: Fix custom allocation recycling * rasterizer_cache: Minor cleanups * Cleanup texture cubes when all the faces have been unregistered from the cache * custom_tex_manager: Allow multiple hash mappings per texture * code: Move slot vector to common * rasterizer_cache: Prevent texture cube crashes * rasterizer_cache: Improve mipmap validation * CanSubRect now works properly when validating multi-level surfaces, for example Dark Moon validates a 4 level surface from a 3 level one and it works * gl_blit_handler: Unbind sampler on reinterpretation --- .../configuration/configure_enhancements.cpp | 9 +- src/common/CMakeLists.txt | 1 + src/common/settings.cpp | 2 + src/common/slot_vector.h | 154 ++ src/video_core/CMakeLists.txt | 7 +- .../custom_textures/custom_tex_manager.cpp | 60 +- .../custom_textures/custom_tex_manager.h | 2 +- src/video_core/custom_textures/material.cpp | 11 +- src/video_core/custom_textures/material.h | 7 +- .../format_reinterpreter/d24s8_to_rgba8.frag | 21 +- .../format_reinterpreter/fullscreen_quad.vert | 2 +- .../format_reinterpreter/rgba4_to_rgb5a1.frag | 19 +- .../rasterizer_cache/framebuffer_base.cpp | 14 +- .../rasterizer_cache/framebuffer_base.h | 13 +- .../rasterizer_cache/rasterizer_cache.cpp | 1206 ------------- .../rasterizer_cache/rasterizer_cache.h | 1498 +++++++++++++++-- .../rasterizer_cache/rasterizer_cache_base.h | 229 +++ .../rasterizer_cache/sampler_params.h | 43 + .../rasterizer_cache/surface_base.cpp | 28 +- .../rasterizer_cache/surface_base.h | 25 +- .../rasterizer_cache/surface_params.cpp | 17 +- .../rasterizer_cache/surface_params.h | 9 +- src/video_core/rasterizer_cache/utils.h | 25 +- .../renderer_opengl/gl_blit_helper.cpp | 137 +- .../renderer_opengl/gl_blit_helper.h | 18 +- .../gl_format_reinterpreter.cpp | 134 -- .../renderer_opengl/gl_format_reinterpreter.h | 76 - .../renderer_opengl/gl_rasterizer.cpp | 155 +- .../renderer_opengl/gl_rasterizer.h | 27 +- .../renderer_opengl/gl_rasterizer_cache.cpp | 10 + .../renderer_opengl/gl_texture_runtime.cpp | 136 +- .../renderer_opengl/gl_texture_runtime.h | 70 +- 32 files changed, 2238 insertions(+), 1927 deletions(-) create mode 100644 src/common/slot_vector.h create mode 100644 src/video_core/rasterizer_cache/rasterizer_cache_base.h create mode 100644 src/video_core/rasterizer_cache/sampler_params.h delete mode 100644 src/video_core/renderer_opengl/gl_format_reinterpreter.cpp delete mode 100644 src/video_core/renderer_opengl/gl_format_reinterpreter.h create mode 100644 src/video_core/renderer_opengl/gl_rasterizer_cache.cpp diff --git a/src/citra_qt/configuration/configure_enhancements.cpp b/src/citra_qt/configuration/configure_enhancements.cpp index 91e45caee..90dfbf44d 100644 --- a/src/citra_qt/configuration/configure_enhancements.cpp +++ b/src/citra_qt/configuration/configure_enhancements.cpp @@ -208,12 +208,13 @@ void ConfigureEnhancements::SetupPerGameUI() { ConfigurationShared::SetColoredComboBox( ui->resolution_factor_combobox, ui->widget_resolution, - static_cast(Settings::values.resolution_factor.GetValue(true))); + static_cast(Settings::values.resolution_factor.GetValue(true))); - ConfigurationShared::SetColoredComboBox(ui->texture_filter_combobox, ui->widget_texture_filter, - 0); + ConfigurationShared::SetColoredComboBox( + ui->texture_filter_combobox, ui->widget_texture_filter, + static_cast(Settings::values.texture_filter.GetValue(true))); ConfigurationShared::SetColoredComboBox( ui->layout_combobox, ui->widget_layout, - static_cast(Settings::values.layout_option.GetValue(true))); + static_cast(Settings::values.layout_option.GetValue(true))); } diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 4784d60db..9efb1eac1 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -100,6 +100,7 @@ add_library(citra_common STATIC scope_exit.h settings.cpp settings.h + slot_vector.h serialization/atomic.h serialization/boost_discrete_interval.hpp serialization/boost_flat_set.h diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 994655cb9..eea0a6f71 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -163,6 +163,8 @@ void LogSettings() { log_setting("Layout_LargeScreenProportion", values.large_screen_proportion.GetValue()); log_setting("Utility_DumpTextures", values.dump_textures.GetValue()); log_setting("Utility_CustomTextures", values.custom_textures.GetValue()); + log_setting("Utility_PreloadTextures", values.preload_textures.GetValue()); + log_setting("Utility_AsyncCustomLoading", values.async_custom_loading.GetValue()); log_setting("Utility_UseDiskShaderCache", values.use_disk_shader_cache.GetValue()); log_setting("Audio_Emulation", GetAudioEmulationName(values.audio_emulation.GetValue())); log_setting("Audio_OutputType", values.output_type.GetValue()); diff --git a/src/common/slot_vector.h b/src/common/slot_vector.h new file mode 100644 index 000000000..9a383f2a0 --- /dev/null +++ b/src/common/slot_vector.h @@ -0,0 +1,154 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include "common/assert.h" +#include "common/common_types.h" + +namespace Common { + +struct SlotId { + static constexpr u32 INVALID_INDEX = std::numeric_limits::max(); + + constexpr auto operator<=>(const SlotId&) const noexcept = default; + + constexpr explicit operator bool() const noexcept { + return index != INVALID_INDEX; + } + + u32 index = INVALID_INDEX; +}; + +template +class SlotVector { +public: + ~SlotVector() noexcept { + size_t index = 0; + for (u64 bits : stored_bitset) { + for (size_t bit = 0; bits; ++bit, bits >>= 1) { + if ((bits & 1) != 0) { + values[index + bit].object.~T(); + } + } + index += 64; + } + delete[] values; + } + + [[nodiscard]] T& operator[](SlotId id) noexcept { + ValidateIndex(id); + return values[id.index].object; + } + + [[nodiscard]] const T& operator[](SlotId id) const noexcept { + ValidateIndex(id); + return values[id.index].object; + } + + template + [[nodiscard]] SlotId insert(Args&&... args) noexcept { + const u32 index = FreeValueIndex(); + new (&values[index].object) T(std::forward(args)...); + SetStorageBit(index); + + return SlotId{index}; + } + + void erase(SlotId id) noexcept { + values[id.index].object.~T(); + free_list.push_back(id.index); + ResetStorageBit(id.index); + } + +private: + struct NonTrivialDummy { + NonTrivialDummy() noexcept {} + }; + + union Entry { + Entry() noexcept : dummy{} {} + ~Entry() noexcept {} + + NonTrivialDummy dummy; + T object; + }; + + void SetStorageBit(u32 index) noexcept { + stored_bitset[index / 64] |= u64(1) << (index % 64); + } + + void ResetStorageBit(u32 index) noexcept { + stored_bitset[index / 64] &= ~(u64(1) << (index % 64)); + } + + bool ReadStorageBit(u32 index) noexcept { + return ((stored_bitset[index / 64] >> (index % 64)) & 1) != 0; + } + + void ValidateIndex(SlotId id) const noexcept { + DEBUG_ASSERT(id); + DEBUG_ASSERT(id.index / 64 < stored_bitset.size()); + DEBUG_ASSERT(((stored_bitset[id.index / 64] >> (id.index % 64)) & 1) != 0); + } + + [[nodiscard]] u32 FreeValueIndex() noexcept { + if (free_list.empty()) { + Reserve(values_capacity ? (values_capacity << 1) : 1); + } + + const u32 free_index = free_list.back(); + free_list.pop_back(); + return free_index; + } + + void Reserve(size_t new_capacity) noexcept { + Entry* const new_values = new Entry[new_capacity]; + size_t index = 0; + for (u64 bits : stored_bitset) { + for (size_t bit = 0; bits; ++bit, bits >>= 1) { + const size_t i = index + bit; + if ((bits & 1) == 0) { + continue; + } + T& old_value = values[i].object; + new (&new_values[i].object) T(std::move(old_value)); + old_value.~T(); + } + index += 64; + } + + stored_bitset.resize((new_capacity + 63) / 64); + + const size_t old_free_size = free_list.size(); + free_list.resize(old_free_size + (new_capacity - values_capacity)); + std::iota(free_list.begin() + old_free_size, free_list.end(), + static_cast(values_capacity)); + + delete[] values; + values = new_values; + values_capacity = new_capacity; + } + + Entry* values = nullptr; + size_t values_capacity = 0; + + std::vector stored_bitset; + std::vector free_list; +}; + +} // namespace Common + +template <> +struct std::hash { + size_t operator()(const Common::SlotId& id) const noexcept { + return std::hash{}(id.index); + } +}; diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 154fe02e2..547ed2d22 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -40,6 +40,8 @@ add_library(video_core STATIC rasterizer_cache/pixel_format.h rasterizer_cache/rasterizer_cache.cpp rasterizer_cache/rasterizer_cache.h + rasterizer_cache/rasterizer_cache_base.h + rasterizer_cache/sampler_params.h rasterizer_cache/surface_base.cpp rasterizer_cache/surface_base.h rasterizer_cache/surface_params.cpp @@ -53,10 +55,9 @@ add_library(video_core STATIC renderer_opengl/gl_blit_helper.h renderer_opengl/gl_driver.cpp renderer_opengl/gl_driver.h - renderer_opengl/gl_format_reinterpreter.cpp - renderer_opengl/gl_format_reinterpreter.h renderer_opengl/gl_rasterizer.cpp renderer_opengl/gl_rasterizer.h + renderer_opengl/gl_rasterizer_cache.cpp renderer_opengl/gl_resource_manager.cpp renderer_opengl/gl_resource_manager.h renderer_opengl/gl_shader_decompiler.cpp @@ -126,7 +127,7 @@ target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE}) create_target_directory_groups(video_core) target_link_libraries(video_core PUBLIC citra_common citra_core) -target_link_libraries(video_core PRIVATE glad json-headers dds-ktx nihstro-headers Boost::serialization) +target_link_libraries(video_core PRIVATE Boost::serialization dds-ktx glad json-headers nihstro-headers tsl::robin_map) set_target_properties(video_core PROPERTIES INTERPROCEDURAL_OPTIMIZATION ${ENABLE_LTO}) if ("x86_64" IN_LIST ARCHITECTURE) diff --git a/src/video_core/custom_textures/custom_tex_manager.cpp b/src/video_core/custom_textures/custom_tex_manager.cpp index 305ef782f..8bb2adb42 100644 --- a/src/video_core/custom_textures/custom_tex_manager.cpp +++ b/src/video_core/custom_textures/custom_tex_manager.cpp @@ -13,6 +13,7 @@ #include "core/frontend/image_interface.h" #include "video_core/custom_textures/custom_tex_manager.h" #include "video_core/rasterizer_cache/surface_params.h" +#include "video_core/rasterizer_cache/utils.h" namespace VideoCore { @@ -21,7 +22,7 @@ namespace { MICROPROFILE_DEFINE(CustomTexManager_TickFrame, "CustomTexManager", "TickFrame", MP_RGB(54, 16, 32)); -constexpr std::size_t MAX_UPLOADS_PER_TICK = 16; +constexpr std::size_t MAX_UPLOADS_PER_TICK = 8; bool IsPow2(u32 value) { return value != 0 && (value & (value - 1)) == 0; @@ -111,11 +112,14 @@ void CustomTexManager::FindCustomTextures() { if (!ParseFilename(file, texture)) { continue; } - auto& material = material_map[texture->hash]; - if (!material) { - material = std::make_unique(); + for (const u64 hash : texture->hashes) { + auto& material = material_map[hash]; + if (!material) { + material = std::make_unique(); + } + material->hash = hash; + material->AddMapTexture(texture); } - material->AddMapTexture(texture); } textures_loaded = true; } @@ -145,21 +149,25 @@ bool CustomTexManager::ParseFilename(const FileUtil::FSTEntry& file, CustomTextu parts.pop_back(); } - // First check if the path is mapped directly to a hash - // before trying to parse the texture filename. + // First look if this file is mapped to any number of hashes. + std::vector& hashes = texture->hashes; const auto it = path_to_hash_map.find(file.virtualName); if (it != path_to_hash_map.end()) { - texture->hash = it->second; - } else { - u32 width; - u32 height; - u32 format; - unsigned long long hash{}; - if (std::sscanf(parts.back().c_str(), "tex1_%ux%u_%llX_%u", &width, &height, &hash, - &format) != 4) { - return false; - } - texture->hash = hash; + hashes = it->second; + } + + // It's also possible for pack creators to retain the default texture name + // still map the texture to another hash. Support that as well. + u32 width; + u32 height; + u32 format; + unsigned long long hash{}; + const bool is_parsed = std::sscanf(parts.back().c_str(), "tex1_%ux%u_%llX_%u", &width, &height, + &hash, &format) == 4; + const bool is_mapped = + !hashes.empty() && std::find(hashes.begin(), hashes.end(), hash) != hashes.end(); + if (is_parsed && !is_mapped) { + hashes.push_back(hash); } texture->path = file.physicalName; @@ -181,9 +189,9 @@ void CustomTexManager::WriteConfig() { json["description"] = "A graphics pack"; auto& options = json["options"]; - options["skip_mipmap"] = skip_mipmap; - options["flip_png_files"] = flip_png_files; - options["use_new_hash"] = use_new_hash; + options["skip_mipmap"] = false; + options["flip_png_files"] = true; + options["use_new_hash"] = true; FileUtil::IOFile file{pack_config, "w"}; const std::string output = json.dump(4); @@ -311,7 +319,7 @@ void CustomTexManager::ReadConfig(const std::string& load_path) { return; } - nlohmann::json json = nlohmann::json::parse(config); + nlohmann::json json = nlohmann::json::parse(config, nullptr, false, true); const auto& options = json["options"]; skip_mipmap = options["skip_mipmap"].get(); @@ -330,13 +338,7 @@ void CustomTexManager::ReadConfig(const std::string& load_path) { const auto parse = [&](const std::string& file) { const std::string filename{FileUtil::GetFilename(file)}; auto [it, new_hash] = path_to_hash_map.try_emplace(filename); - if (!new_hash) { - LOG_ERROR(Render, - "File {} with key {} already exists and is mapped to {:#016X}, skipping", - file, material.key(), path_to_hash_map[filename]); - return; - } - it->second = hash; + it->second.push_back(hash); }; const auto value = material.value(); if (value.is_string()) { diff --git a/src/video_core/custom_textures/custom_tex_manager.h b/src/video_core/custom_textures/custom_tex_manager.h index 007218857..1e7a0d4d8 100644 --- a/src/video_core/custom_textures/custom_tex_manager.h +++ b/src/video_core/custom_textures/custom_tex_manager.h @@ -81,7 +81,7 @@ private: Frontend::ImageInterface& image_interface; std::unordered_set dumped_textures; std::unordered_map> material_map; - std::unordered_map path_to_hash_map; + std::unordered_map> path_to_hash_map; std::vector> custom_textures; std::list async_uploads; std::unique_ptr workers; diff --git a/src/video_core/custom_textures/material.cpp b/src/video_core/custom_textures/material.cpp index 9d76c20e7..d7bee5da4 100644 --- a/src/video_core/custom_textures/material.cpp +++ b/src/video_core/custom_textures/material.cpp @@ -55,6 +55,11 @@ CustomTexture::CustomTexture(Frontend::ImageInterface& image_interface_) CustomTexture::~CustomTexture() = default; void CustomTexture::LoadFromDisk(bool flip_png) { + std::scoped_lock lock{decode_mutex}; + if (IsLoaded()) { + return; + } + FileUtil::IOFile file{path, "rb"}; std::vector input(file.GetSize()); if (file.ReadBytes(input.data(), input.size()) != input.size()) { @@ -71,7 +76,6 @@ void CustomTexture::LoadFromDisk(bool flip_png) { break; default: LOG_ERROR(Render, "Unknown file format {}", file_format); - return; } } @@ -102,8 +106,7 @@ void Material::LoadFromDisk(bool flip_png) noexcept { } texture->LoadFromDisk(flip_png); size += texture->data.size(); - LOG_DEBUG(Render, "Loading {} map {} with hash {:#016X}", MapTypeName(texture->type), - texture->path, texture->hash); + LOG_DEBUG(Render, "Loading {} map {}", MapTypeName(texture->type), texture->path); } if (!textures[0]) { LOG_ERROR(Render, "Unable to create material without color texture!"); @@ -121,7 +124,7 @@ void Material::LoadFromDisk(bool flip_png) noexcept { LOG_ERROR(Render, "{} map {} of material with hash {:#016X} has dimentions {}x{} " "which do not match the color texture dimentions {}x{}", - MapTypeName(texture->type), texture->path, texture->hash, texture->width, + MapTypeName(texture->type), texture->path, hash, texture->width, texture->height, width, height); state = DecodeState::Failed; return; diff --git a/src/video_core/custom_textures/material.h b/src/video_core/custom_textures/material.h index 6c43695d0..69d6a838c 100644 --- a/src/video_core/custom_textures/material.h +++ b/src/video_core/custom_textures/material.h @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -39,7 +40,7 @@ public: void LoadFromDisk(bool flip_png); [[nodiscard]] bool IsParsed() const noexcept { - return file_format != CustomFileFormat::None && hash != 0; + return file_format != CustomFileFormat::None && !hashes.empty(); } [[nodiscard]] bool IsLoaded() const noexcept { @@ -56,7 +57,8 @@ public: std::string path; u32 width; u32 height; - u64 hash; + std::vector hashes; + std::mutex decode_mutex; CustomPixelFormat format; CustomFileFormat file_format; std::vector data; @@ -67,6 +69,7 @@ struct Material { u32 width; u32 height; u64 size; + u64 hash; CustomPixelFormat format; std::array textures; std::atomic state{}; diff --git a/src/video_core/host_shaders/format_reinterpreter/d24s8_to_rgba8.frag b/src/video_core/host_shaders/format_reinterpreter/d24s8_to_rgba8.frag index ae0f5a36d..17c8e77f7 100644 --- a/src/video_core/host_shaders/format_reinterpreter/d24s8_to_rgba8.frag +++ b/src/video_core/host_shaders/format_reinterpreter/d24s8_to_rgba8.frag @@ -4,29 +4,18 @@ //? #version 430 core -layout(location = 0) in mediump vec2 dst_coord; +layout(location = 0) in mediump vec2 tex_coord; layout(location = 0) out lowp vec4 frag_color; layout(binding = 0) uniform highp sampler2D depth; layout(binding = 1) uniform lowp usampler2D stencil; -uniform mediump ivec2 dst_size; -uniform mediump ivec2 src_size; -uniform mediump ivec2 src_offset; void main() { - mediump ivec2 tex_coord; - if (src_size == dst_size) { - tex_coord = ivec2(dst_coord); - } else { - highp int tex_index = int(dst_coord.y) * dst_size.x + int(dst_coord.x); - mediump int y = tex_index / src_size.x; - tex_coord = ivec2(tex_index - y * src_size.x, y); - } - tex_coord -= src_offset; - + mediump vec2 coord = tex_coord * vec2(textureSize(depth, 0)); + mediump ivec2 tex_icoord = ivec2(coord); highp uint depth_val = - uint(texelFetch(depth, tex_coord, 0).x * (exp2(32.0) - 1.0)); - lowp uint stencil_val = texelFetch(stencil, tex_coord, 0).x; + uint(texelFetch(depth, tex_icoord, 0).x * (exp2(32.0) - 1.0)); + lowp uint stencil_val = texelFetch(stencil, tex_icoord, 0).x; highp uvec4 components = uvec4(stencil_val, (uvec3(depth_val) >> uvec3(24u, 16u, 8u)) & 0x000000FFu); frag_color = vec4(components) / (exp2(8.0) - 1.0); diff --git a/src/video_core/host_shaders/format_reinterpreter/fullscreen_quad.vert b/src/video_core/host_shaders/format_reinterpreter/fullscreen_quad.vert index 62928913c..3c5e771bc 100644 --- a/src/video_core/host_shaders/format_reinterpreter/fullscreen_quad.vert +++ b/src/video_core/host_shaders/format_reinterpreter/fullscreen_quad.vert @@ -6,7 +6,7 @@ layout(location = 0) out vec2 dst_coord; -uniform mediump ivec2 dst_size; +layout(location = 0) uniform mediump ivec2 dst_size; const vec2 vertices[4] = vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0)); diff --git a/src/video_core/host_shaders/format_reinterpreter/rgba4_to_rgb5a1.frag b/src/video_core/host_shaders/format_reinterpreter/rgba4_to_rgb5a1.frag index fb1a874f9..99ca820da 100644 --- a/src/video_core/host_shaders/format_reinterpreter/rgba4_to_rgb5a1.frag +++ b/src/video_core/host_shaders/format_reinterpreter/rgba4_to_rgb5a1.frag @@ -4,26 +4,15 @@ //? #version 430 core -layout(location = 0) in mediump vec2 dst_coord; +layout(location = 0) in mediump vec2 tex_coord; layout(location = 0) out lowp vec4 frag_color; layout(binding = 0) uniform lowp sampler2D source; -uniform mediump ivec2 dst_size; -uniform mediump ivec2 src_size; -uniform mediump ivec2 src_offset; void main() { - mediump ivec2 tex_coord; - if (src_size == dst_size) { - tex_coord = ivec2(dst_coord); - } else { - highp int tex_index = int(dst_coord.y) * dst_size.x + int(dst_coord.x); - mediump int y = tex_index / src_size.x; - tex_coord = ivec2(tex_index - y * src_size.x, y); - } - tex_coord -= src_offset; - - lowp ivec4 rgba4 = ivec4(texelFetch(source, tex_coord, 0) * (exp2(4.0) - 1.0)); + mediump vec2 coord = tex_coord * vec2(textureSize(source, 0)); + mediump ivec2 tex_icoord = ivec2(coord); + lowp ivec4 rgba4 = ivec4(texelFetch(source, tex_icoord, 0) * (exp2(4.0) - 1.0)); lowp ivec3 rgb5 = ((rgba4.rgb << ivec3(1, 2, 3)) | (rgba4.gba >> ivec3(3, 2, 1))) & 0x1F; frag_color = vec4(vec3(rgb5) / (exp2(5.0) - 1.0), rgba4.a & 0x01); diff --git a/src/video_core/rasterizer_cache/framebuffer_base.cpp b/src/video_core/rasterizer_cache/framebuffer_base.cpp index 7573254af..8f7a5f7c2 100644 --- a/src/video_core/rasterizer_cache/framebuffer_base.cpp +++ b/src/video_core/rasterizer_cache/framebuffer_base.cpp @@ -10,9 +10,9 @@ namespace VideoCore { FramebufferBase::FramebufferBase() = default; -FramebufferBase::FramebufferBase(const Pica::Regs& regs, const SurfaceBase* const color, - u32 color_level, const SurfaceBase* const depth_stencil, - u32 depth_level, Common::Rectangle surfaces_rect) { +FramebufferBase::FramebufferBase(const Pica::Regs& regs, const SurfaceBase* color, u32 color_level, + const SurfaceBase* depth_stencil, u32 depth_level, + Common::Rectangle surfaces_rect) { res_scale = color ? color->res_scale : (depth_stencil ? depth_stencil->res_scale : 1u); // Determine the draw rectangle (render area + scissor) @@ -31,10 +31,10 @@ FramebufferBase::FramebufferBase(const Pica::Regs& regs, const SurfaceBase* cons surfaces_rect.bottom, surfaces_rect.top); // Update viewport - viewport.x = static_cast(surfaces_rect.left + viewport_rect.left * res_scale); - viewport.y = static_cast(surfaces_rect.bottom + viewport_rect.bottom * res_scale); - viewport.width = static_cast(viewport_rect.GetWidth() * res_scale); - viewport.height = static_cast(viewport_rect.GetHeight() * res_scale); + viewport.x = static_cast(surfaces_rect.left) + viewport_rect.left * res_scale; + viewport.y = static_cast(surfaces_rect.bottom) + viewport_rect.bottom * res_scale; + viewport.width = static_cast(viewport_rect.GetWidth() * res_scale); + viewport.height = static_cast(viewport_rect.GetHeight() * res_scale); // Scissor checks are window-, not viewport-relative, which means that if the cached texture // sub-rect changes, the scissor bounds also need to be updated. diff --git a/src/video_core/rasterizer_cache/framebuffer_base.h b/src/video_core/rasterizer_cache/framebuffer_base.h index 23efebb8b..932bf73a4 100644 --- a/src/video_core/rasterizer_cache/framebuffer_base.h +++ b/src/video_core/rasterizer_cache/framebuffer_base.h @@ -16,10 +16,10 @@ namespace VideoCore { class SurfaceBase; struct ViewportInfo { - f32 x; - f32 y; - f32 width; - f32 height; + s32 x; + s32 y; + s32 width; + s32 height; }; /** @@ -29,8 +29,8 @@ struct ViewportInfo { class FramebufferBase { public: FramebufferBase(); - FramebufferBase(const Pica::Regs& regs, const SurfaceBase* const color, u32 color_level, - const SurfaceBase* const depth_stencil, u32 depth_level, + FramebufferBase(const Pica::Regs& regs, const SurfaceBase* color, u32 color_level, + const SurfaceBase* depth_stencil, u32 depth_level, Common::Rectangle surfaces_rect); SurfaceParams ColorParams() const noexcept { @@ -66,6 +66,7 @@ protected: switch (type) { case VideoCore::SurfaceType::Color: return 0; + case VideoCore::SurfaceType::Depth: case VideoCore::SurfaceType::DepthStencil: return 1; default: diff --git a/src/video_core/rasterizer_cache/rasterizer_cache.cpp b/src/video_core/rasterizer_cache/rasterizer_cache.cpp index f40aeb885..3f83d1a3a 100644 --- a/src/video_core/rasterizer_cache/rasterizer_cache.cpp +++ b/src/video_core/rasterizer_cache/rasterizer_cache.cpp @@ -2,1223 +2,17 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include -#include -#include "common/alignment.h" -#include "common/logging/log.h" #include "common/microprofile.h" -#include "common/settings.h" -#include "core/memory.h" -#include "video_core/custom_textures/custom_tex_manager.h" -#include "video_core/rasterizer_cache/rasterizer_cache.h" -#include "video_core/regs.h" -#include "video_core/renderer_base.h" -#include "video_core/renderer_opengl/gl_texture_runtime.h" namespace VideoCore { -namespace { - MICROPROFILE_DEFINE(RasterizerCache_CopySurface, "RasterizerCache", "CopySurface", MP_RGB(128, 192, 64)); MICROPROFILE_DEFINE(RasterizerCache_UploadSurface, "RasterizerCache", "UploadSurface", MP_RGB(128, 192, 64)); -MICROPROFILE_DEFINE(RasterizerCache_ComputeHash, "RasterizerCache", "ComputeHash", - MP_RGB(32, 64, 192)); MICROPROFILE_DEFINE(RasterizerCache_DownloadSurface, "RasterizerCache", "DownloadSurface", MP_RGB(128, 192, 64)); MICROPROFILE_DEFINE(RasterizerCache_Invalidation, "RasterizerCache", "Invalidation", MP_RGB(128, 64, 192)); -MICROPROFILE_DEFINE(RasterizerCache_Flush, "RasterizerCache", "Flush", MP_RGB(128, 64, 192)); - -constexpr auto RangeFromInterval(const auto& map, const auto& interval) { - return boost::make_iterator_range(map.equal_range(interval)); -} - -enum MatchFlags { - Exact = 1 << 0, ///< Surfaces perfectly match - SubRect = 1 << 1, ///< Surface encompasses params - Copy = 1 << 2, ///< Surface we can copy from - Expand = 1 << 3, ///< Surface that can expand params - TexCopy = 1 << 4, ///< Surface that will match a display transfer "texture copy" parameters -}; - -/// Get the best surface match (and its match type) for the given flags -template -auto FindMatch(const auto& surface_cache, const SurfaceParams& params, ScaleMatch match_scale_type, - std::optional validate_interval = std::nullopt) { - RasterizerCache::SurfaceRef match_surface = nullptr; - bool match_valid = false; - u32 match_scale = 0; - SurfaceInterval match_interval{}; - - for (const auto& pair : RangeFromInterval(surface_cache, params.GetInterval())) { - for (const auto& surface : pair.second) { - const bool res_scale_matched = match_scale_type == ScaleMatch::Exact - ? (params.res_scale == surface->res_scale) - : (params.res_scale <= surface->res_scale); - // Validity will be checked in GetCopyableInterval - const bool is_valid = - find_flags & MatchFlags::Copy - ? true - : surface->IsRegionValid(validate_interval.value_or(params.GetInterval())); - - auto IsMatch_Helper = [&](auto check_type, auto match_fn) { - if (!(find_flags & check_type)) - return; - - bool matched; - SurfaceInterval surface_interval; - std::tie(matched, surface_interval) = match_fn(); - if (!matched) - return; - - if (!res_scale_matched && match_scale_type != ScaleMatch::Ignore && - surface->type != SurfaceType::Fill) - return; - - // Found a match, update only if this is better than the previous one - auto UpdateMatch = [&] { - match_surface = surface; - match_valid = is_valid; - match_scale = surface->res_scale; - match_interval = surface_interval; - }; - - if (surface->res_scale > match_scale) { - UpdateMatch(); - return; - } else if (surface->res_scale < match_scale) { - return; - } - - if (is_valid && !match_valid) { - UpdateMatch(); - return; - } else if (is_valid != match_valid) { - return; - } - - if (boost::icl::length(surface_interval) > boost::icl::length(match_interval)) { - UpdateMatch(); - } - }; - IsMatch_Helper(std::integral_constant{}, [&] { - return std::make_pair(surface->ExactMatch(params), surface->GetInterval()); - }); - IsMatch_Helper(std::integral_constant{}, [&] { - return std::make_pair(surface->CanSubRect(params), surface->GetInterval()); - }); - IsMatch_Helper(std::integral_constant{}, [&] { - ASSERT(validate_interval); - auto copy_interval = - surface->GetCopyableInterval(params.FromInterval(*validate_interval)); - bool matched = boost::icl::length(copy_interval & *validate_interval) != 0 && - surface->CanCopy(params, copy_interval); - return std::make_pair(matched, copy_interval); - }); - IsMatch_Helper(std::integral_constant{}, [&] { - return std::make_pair(surface->CanExpand(params), surface->GetInterval()); - }); - IsMatch_Helper(std::integral_constant{}, [&] { - return std::make_pair(surface->CanTexCopy(params), surface->GetInterval()); - }); - } - } - return match_surface; -} - -} // Anonymous namespace - -RasterizerCache::RasterizerCache(Memory::MemorySystem& memory_, - CustomTexManager& custom_tex_manager_, - OpenGL::TextureRuntime& runtime_, Pica::Regs& regs_, - RendererBase& renderer_) - : memory{memory_}, custom_tex_manager{custom_tex_manager_}, runtime{runtime_}, regs{regs_}, - renderer{renderer_}, resolution_scale_factor{renderer.GetResolutionScaleFactor()}, - use_filter{Settings::values.texture_filter.GetValue() != Settings::TextureFilter::None}, - dump_textures{Settings::values.dump_textures.GetValue()}, - use_custom_textures{Settings::values.custom_textures.GetValue()} {} - -RasterizerCache::~RasterizerCache() { -#ifndef ANDROID - // This is for switching renderers, which is unsupported on Android, and costly on shutdown - ClearAll(false); -#endif -} - -void RasterizerCache::TickFrame() { - custom_tex_manager.TickFrame(); - - const u32 scale_factor = renderer.GetResolutionScaleFactor(); - const bool resolution_scale_changed = resolution_scale_factor != scale_factor; - const bool use_custom_texture_changed = - Settings::values.custom_textures.GetValue() != use_custom_textures; - const bool texture_filter_changed = - renderer.Settings().texture_filter_update_requested.exchange(false); - - if (resolution_scale_changed || texture_filter_changed || use_custom_texture_changed) { - resolution_scale_factor = scale_factor; - use_filter = Settings::values.texture_filter.GetValue() != Settings::TextureFilter::None; - use_custom_textures = Settings::values.custom_textures.GetValue(); - if (use_custom_textures) { - custom_tex_manager.FindCustomTextures(); - } - FlushAll(); - while (!surface_cache.empty()) { - UnregisterSurface(*surface_cache.begin()->second.begin()); - } - texture_cube_cache.clear(); - runtime.Reset(); - } -} - -bool RasterizerCache::AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) { - // Texture copy size is aligned to 16 byte units - const u32 copy_size = Common::AlignDown(config.texture_copy.size, 16); - if (copy_size == 0) { - return false; - } - - u32 input_gap = config.texture_copy.input_gap * 16; - u32 input_width = config.texture_copy.input_width * 16; - if (input_width == 0 && input_gap != 0) { - return false; - } - if (input_gap == 0 || input_width >= copy_size) { - input_width = copy_size; - input_gap = 0; - } - if (copy_size % input_width != 0) { - return false; - } - - u32 output_gap = config.texture_copy.output_gap * 16; - u32 output_width = config.texture_copy.output_width * 16; - if (output_width == 0 && output_gap != 0) { - return false; - } - if (output_gap == 0 || output_width >= copy_size) { - output_width = copy_size; - output_gap = 0; - } - if (copy_size % output_width != 0) { - return false; - } - - SurfaceParams src_params; - src_params.addr = config.GetPhysicalInputAddress(); - src_params.stride = input_width + input_gap; // stride in bytes - src_params.width = input_width; // width in bytes - src_params.height = copy_size / input_width; - src_params.size = ((src_params.height - 1) * src_params.stride) + src_params.width; - src_params.end = src_params.addr + src_params.size; - - const auto [src_surface, src_rect] = GetTexCopySurface(src_params); - if (!src_surface) { - return false; - } - - // If the output gap is nonzero ensure the output width matches the source rectangle width, - // otherwise we cannot use hardware accelerated texture copy. The former is in terms of bytes - // not pixels so first get the unscaled copy width and calculate the bytes this corresponds to. - // Note that tiled textures are laid out sequentially in memory, so we multiply that by eight - // to get the correct byte count. - if (output_gap != 0 && - (output_width != src_surface->BytesInPixels(src_rect.GetWidth() / src_surface->res_scale) * - (src_surface->is_tiled ? 8 : 1) || - output_gap % src_surface->BytesInPixels(src_surface->is_tiled ? 64 : 1) != 0)) { - return false; - } - - SurfaceParams dst_params = *src_surface; - dst_params.addr = config.GetPhysicalOutputAddress(); - dst_params.width = src_rect.GetWidth() / src_surface->res_scale; - dst_params.stride = dst_params.width + src_surface->PixelsInBytes( - src_surface->is_tiled ? output_gap / 8 : output_gap); - dst_params.height = src_rect.GetHeight() / src_surface->res_scale; - dst_params.res_scale = src_surface->res_scale; - dst_params.UpdateParams(); - - // Since we are going to invalidate the gap if there is one, we will have to load it first - const bool load_gap = output_gap != 0; - const auto [dst_surface, dst_rect] = - GetSurfaceSubRect(dst_params, ScaleMatch::Upscale, load_gap); - - if (!dst_surface || dst_surface->type == SurfaceType::Texture || - !CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format)) { - return false; - } - - ASSERT(src_rect.GetWidth() == dst_rect.GetWidth()); - - const TextureCopy texture_copy = { - .src_level = src_surface->LevelOf(src_params.addr), - .dst_level = dst_surface->LevelOf(dst_params.addr), - .src_offset = {src_rect.left, src_rect.bottom}, - .dst_offset = {dst_rect.left, dst_rect.bottom}, - .extent = {src_rect.GetWidth(), src_rect.GetHeight()}, - }; - runtime.CopyTextures(*src_surface, *dst_surface, texture_copy); - - InvalidateRegion(dst_params.addr, dst_params.size, dst_surface); - return true; -} - -bool RasterizerCache::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { - SurfaceParams src_params; - src_params.addr = config.GetPhysicalInputAddress(); - src_params.width = config.output_width; - src_params.stride = config.input_width; - src_params.height = config.output_height; - src_params.is_tiled = !config.input_linear; - src_params.pixel_format = PixelFormatFromGPUPixelFormat(config.input_format); - src_params.UpdateParams(); - - SurfaceParams dst_params; - dst_params.addr = config.GetPhysicalOutputAddress(); - dst_params.width = config.scaling != config.NoScale ? config.output_width.Value() / 2 - : config.output_width.Value(); - dst_params.height = config.scaling == config.ScaleXY ? config.output_height.Value() / 2 - : config.output_height.Value(); - dst_params.is_tiled = config.input_linear != config.dont_swizzle; - dst_params.pixel_format = PixelFormatFromGPUPixelFormat(config.output_format); - dst_params.UpdateParams(); - - auto [src_surface, src_rect] = GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true); - if (!src_surface) { - return false; - } - - dst_params.res_scale = src_surface->res_scale; - - const auto [dst_surface, dst_rect] = GetSurfaceSubRect(dst_params, ScaleMatch::Upscale, false); - if (!dst_surface) { - return false; - } - - if (src_surface->is_tiled != dst_surface->is_tiled) { - std::swap(src_rect.top, src_rect.bottom); - } - if (config.flip_vertically) { - std::swap(src_rect.top, src_rect.bottom); - } - - if (!CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format)) { - return false; - } - - const TextureBlit texture_blit = { - .src_level = src_surface->LevelOf(src_params.addr), - .dst_level = dst_surface->LevelOf(dst_params.addr), - .src_rect = src_rect, - .dst_rect = dst_rect, - }; - runtime.BlitTextures(*src_surface, *dst_surface, texture_blit); - - InvalidateRegion(dst_params.addr, dst_params.size, dst_surface); - return true; -} - -bool RasterizerCache::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { - SurfaceParams params; - params.addr = config.GetStartAddress(); - params.end = config.GetEndAddress(); - params.size = params.end - params.addr; - params.type = SurfaceType::Fill; - params.res_scale = std::numeric_limits::max(); - - SurfaceRef fill_surface = std::make_shared(runtime, params); - - std::memcpy(&fill_surface->fill_data[0], &config.value_32bit, sizeof(u32)); - if (config.fill_32bit) { - fill_surface->fill_size = 4; - } else if (config.fill_24bit) { - fill_surface->fill_size = 3; - } else { - fill_surface->fill_size = 2; - } - - RegisterSurface(fill_surface); - InvalidateRegion(fill_surface->addr, fill_surface->size, fill_surface); - return true; -} - -void RasterizerCache::CopySurface(const SurfaceRef& src_surface, const SurfaceRef& dst_surface, - SurfaceInterval copy_interval) { - MICROPROFILE_SCOPE(RasterizerCache_CopySurface); - - const PAddr copy_addr = copy_interval.lower(); - const SurfaceParams subrect_params = dst_surface->FromInterval(copy_interval); - const auto dst_rect = dst_surface->GetScaledSubRect(subrect_params); - ASSERT(subrect_params.GetInterval() == copy_interval && src_surface != dst_surface); - - if (src_surface->type == SurfaceType::Fill) { - const TextureClear clear = { - .texture_level = dst_surface->LevelOf(copy_addr), - .texture_rect = dst_rect, - .value = src_surface->MakeClearValue(copy_addr, dst_surface->pixel_format), - }; - runtime.ClearTexture(*dst_surface, clear); - return; - } - - const TextureBlit blit = { - .src_level = src_surface->LevelOf(copy_addr), - .dst_level = dst_surface->LevelOf(copy_addr), - .src_rect = src_surface->GetScaledSubRect(subrect_params), - .dst_rect = dst_rect, - }; - runtime.BlitTextures(*src_surface, *dst_surface, blit); -} - -RasterizerCache::SurfaceRef RasterizerCache::GetSurface(const SurfaceParams& params, - ScaleMatch match_res_scale, - bool load_if_create) { - if (params.addr == 0 || params.height * params.width == 0) { - return nullptr; - } - // Use GetSurfaceSubRect instead - ASSERT(params.width == params.stride); - - ASSERT(!params.is_tiled || (params.width % 8 == 0 && params.height % 8 == 0)); - - // Check for an exact match in existing surfaces - SurfaceRef surface = FindMatch(surface_cache, params, match_res_scale); - - if (!surface) { - u16 target_res_scale = params.res_scale; - if (match_res_scale != ScaleMatch::Exact) { - // This surface may have a subrect of another surface with a higher res_scale, find - // it to adjust our params - SurfaceParams find_params = params; - SurfaceRef expandable = - FindMatch(surface_cache, find_params, match_res_scale); - if (expandable && expandable->res_scale > target_res_scale) { - target_res_scale = expandable->res_scale; - } - // Keep res_scale when reinterpreting d24s8 -> rgba8 - if (params.pixel_format == PixelFormat::RGBA8) { - find_params.pixel_format = PixelFormat::D24S8; - expandable = - FindMatch(surface_cache, find_params, match_res_scale); - if (expandable && expandable->res_scale > target_res_scale) { - target_res_scale = expandable->res_scale; - } - } - } - SurfaceParams new_params = params; - new_params.res_scale = target_res_scale; - surface = CreateSurface(new_params); - RegisterSurface(surface); - } - - if (load_if_create) { - ValidateSurface(surface, params.addr, params.size); - } - - return surface; -} - -RasterizerCache::SurfaceRect_Tuple RasterizerCache::GetSurfaceSubRect(const SurfaceParams& params, - ScaleMatch match_res_scale, - bool load_if_create) { - if (params.addr == 0 || params.height * params.width == 0) { - return std::make_tuple(nullptr, Common::Rectangle{}); - } - - // Attempt to find encompassing surface - SurfaceRef surface = FindMatch(surface_cache, params, match_res_scale); - - // Check if FindMatch failed because of res scaling - // If that's the case create a new surface with - // the dimensions of the lower res_scale surface - // to suggest it should not be used again - if (!surface && match_res_scale != ScaleMatch::Ignore) { - surface = FindMatch(surface_cache, params, ScaleMatch::Ignore); - if (surface) { - SurfaceParams new_params = *surface; - new_params.res_scale = params.res_scale; - - surface = CreateSurface(new_params); - RegisterSurface(surface); - } - } - - SurfaceParams aligned_params = params; - if (params.is_tiled) { - aligned_params.height = Common::AlignUp(params.height, 8); - aligned_params.width = Common::AlignUp(params.width, 8); - aligned_params.stride = Common::AlignUp(params.stride, 8); - aligned_params.UpdateParams(); - } - - // Check for a surface we can expand before creating a new one - if (!surface) { - surface = FindMatch(surface_cache, aligned_params, match_res_scale); - if (surface) { - aligned_params.width = aligned_params.stride; - aligned_params.UpdateParams(); - - SurfaceParams new_params = *surface; - new_params.addr = std::min(aligned_params.addr, surface->addr); - new_params.end = std::max(aligned_params.end, surface->end); - new_params.size = new_params.end - new_params.addr; - new_params.height = - new_params.size / aligned_params.BytesInPixels(aligned_params.stride); - new_params.UpdateParams(); - ASSERT(new_params.size % aligned_params.BytesInPixels(aligned_params.stride) == 0); - - SurfaceRef new_surface = CreateSurface(new_params); - DuplicateSurface(surface, new_surface); - UnregisterSurface(surface); - - surface = new_surface; - RegisterSurface(new_surface); - } - } - - // No subrect found - create and return a new surface - if (!surface) { - SurfaceParams new_params = aligned_params; - // Can't have gaps in a surface - new_params.width = aligned_params.stride; - new_params.UpdateParams(); - // GetSurface will create the new surface and possibly adjust res_scale if necessary - surface = GetSurface(new_params, match_res_scale, load_if_create); - } else if (load_if_create) { - ValidateSurface(surface, aligned_params.addr, aligned_params.size); - } - - return std::make_tuple(surface, surface->GetScaledSubRect(params)); -} - -RasterizerCache::SurfaceRef RasterizerCache::GetTextureSurface( - const Pica::TexturingRegs::FullTextureConfig& config) { - const auto info = Pica::Texture::TextureInfo::FromPicaRegister(config.config, config.format); - const u32 max_level = MipLevels(info.width, info.height, config.config.lod.max_level) - 1; - return GetTextureSurface(info, max_level); -} - -RasterizerCache::SurfaceRef RasterizerCache::GetTextureSurface( - const Pica::Texture::TextureInfo& info, u32 max_level) { - if (info.physical_address == 0) [[unlikely]] { - return nullptr; - } - - SurfaceParams params; - params.addr = info.physical_address; - params.width = info.width; - params.height = info.height; - params.levels = max_level + 1; - params.is_tiled = true; - params.pixel_format = PixelFormatFromTextureFormat(info.format); - params.res_scale = use_filter ? resolution_scale_factor : 1; - params.UpdateParams(); - - u32 min_width = info.width >> max_level; - u32 min_height = info.height >> max_level; - if (min_width % 8 != 0 || min_height % 8 != 0) { - // This code is for 8x4 and 4x4 textures (commonly used by games for health bar) - // The implementation might not be accurate and needs further testing. - if (min_width % 4 == 0 && min_height % 4 == 0 && min_width * min_height <= 32) { - const auto [src_surface, rect] = GetSurfaceSubRect(params, ScaleMatch::Ignore, true); - params.res_scale = src_surface->res_scale; - SurfaceRef tmp_surface = CreateSurface(params); - - const TextureBlit blit = { - .src_level = src_surface->LevelOf(params.addr), - .dst_level = 0, - .src_rect = rect, - .dst_rect = tmp_surface->GetScaledRect(), - }; - runtime.BlitTextures(*src_surface, *tmp_surface, blit); - return tmp_surface; - } - - LOG_CRITICAL(HW_GPU, "Texture size ({}x{}) is not multiple of 4", min_width, min_height); - return nullptr; - } - if (info.width != (min_width << max_level) || info.height != (min_height << max_level)) { - LOG_CRITICAL(HW_GPU, "Texture size ({}x{}) does not support required mipmap level ({})", - params.width, params.height, max_level); - return nullptr; - } - - return GetSurface(params, ScaleMatch::Ignore, true); -} - -RasterizerCache::SurfaceRef RasterizerCache::GetTextureCube(const TextureCubeConfig& config) { - auto [it, new_surface] = texture_cube_cache.try_emplace(config); - TextureCube& cube = it->second; - - if (new_surface) { - SurfaceParams cube_params = { - .addr = config.px, - .width = config.width, - .height = config.width, - .stride = config.width, - .levels = config.levels, - .res_scale = use_filter ? resolution_scale_factor : 1, - .texture_type = TextureType::CubeMap, - .pixel_format = PixelFormatFromTextureFormat(config.format), - .type = SurfaceType::Texture, - }; - cube_params.UpdateParams(); - cube.surface = CreateSurface(cube_params); - } - - const u32 scaled_size = cube.surface->GetScaledWidth(); - const std::array addresses = {config.px, config.nx, config.py, config.ny, config.pz, config.nz}; - - for (u32 i = 0; i < addresses.size(); i++) { - if (!addresses[i]) { - continue; - } - - Pica::Texture::TextureInfo info = { - .physical_address = addresses[i], - .width = config.width, - .height = config.width, - .format = config.format, - }; - info.SetDefaultStride(); - - SurfaceRef& face_surface = cube.faces[i]; - if (!face_surface || !face_surface->registered) { - face_surface = GetTextureSurface(info, config.levels - 1); - ASSERT(face_surface->levels == config.levels); - } - if (cube.ticks[i] != face_surface->ModificationTick()) { - for (u32 level = 0; level < face_surface->levels; level++) { - const TextureCopy texture_copy = { - .src_level = level, - .dst_level = level, - .src_layer = 0, - .dst_layer = i, - .src_offset = {0, 0}, - .dst_offset = {0, 0}, - .extent = {scaled_size >> level, scaled_size >> level}, - }; - runtime.CopyTextures(*face_surface, *cube.surface, texture_copy); - } - cube.ticks[i] = face_surface->ModificationTick(); - } - } - - return cube.surface; -} - -OpenGL::Framebuffer RasterizerCache::GetFramebufferSurfaces(bool using_color_fb, - bool using_depth_fb) { - const auto& config = regs.framebuffer.framebuffer; - - const s32 framebuffer_width = config.GetWidth(); - const s32 framebuffer_height = config.GetHeight(); - const auto viewport_rect = regs.rasterizer.GetViewportRect(); - const Common::Rectangle viewport_clamped = { - static_cast(std::clamp(viewport_rect.left, 0, framebuffer_width)), - static_cast(std::clamp(viewport_rect.top, 0, framebuffer_height)), - static_cast(std::clamp(viewport_rect.right, 0, framebuffer_width)), - static_cast(std::clamp(viewport_rect.bottom, 0, framebuffer_height)), - }; - - // get color and depth surfaces - SurfaceParams color_params; - color_params.is_tiled = true; - color_params.res_scale = resolution_scale_factor; - color_params.width = config.GetWidth(); - color_params.height = config.GetHeight(); - SurfaceParams depth_params = color_params; - - color_params.addr = config.GetColorBufferPhysicalAddress(); - color_params.pixel_format = PixelFormatFromColorFormat(config.color_format); - color_params.UpdateParams(); - - depth_params.addr = config.GetDepthBufferPhysicalAddress(); - depth_params.pixel_format = PixelFormatFromDepthFormat(config.depth_format); - depth_params.UpdateParams(); - - auto color_vp_interval = color_params.GetSubRectInterval(viewport_clamped); - auto depth_vp_interval = depth_params.GetSubRectInterval(viewport_clamped); - - // Make sure that framebuffers don't overlap if both color and depth are being used - if (using_color_fb && using_depth_fb && - boost::icl::length(color_vp_interval & depth_vp_interval)) { - LOG_CRITICAL(HW_GPU, "Color and depth framebuffer memory regions overlap; " - "overlapping framebuffers not supported!"); - using_depth_fb = false; - } - - Common::Rectangle color_rect{}; - SurfaceRef color_surface = nullptr; - u32 color_level{}; - if (using_color_fb) - std::tie(color_surface, color_rect) = - GetSurfaceSubRect(color_params, ScaleMatch::Exact, false); - - Common::Rectangle depth_rect{}; - SurfaceRef depth_surface = nullptr; - u32 depth_level{}; - if (using_depth_fb) - std::tie(depth_surface, depth_rect) = - GetSurfaceSubRect(depth_params, ScaleMatch::Exact, false); - - Common::Rectangle fb_rect{}; - if (color_surface && depth_surface) { - fb_rect = color_rect; - // Color and Depth surfaces must have the same dimensions and offsets - if (color_rect.bottom != depth_rect.bottom || color_rect.top != depth_rect.top || - color_rect.left != depth_rect.left || color_rect.right != depth_rect.right) { - color_surface = GetSurface(color_params, ScaleMatch::Exact, false); - depth_surface = GetSurface(depth_params, ScaleMatch::Exact, false); - fb_rect = color_surface->GetScaledRect(); - } - } else if (color_surface) { - fb_rect = color_rect; - } else if (depth_surface) { - fb_rect = depth_rect; - } - - if (color_surface) { - color_level = color_surface->LevelOf(color_params.addr); - ValidateSurface(color_surface, boost::icl::first(color_vp_interval), - boost::icl::length(color_vp_interval)); - } - if (depth_surface) { - depth_level = depth_surface->LevelOf(depth_params.addr); - ValidateSurface(depth_surface, boost::icl::first(depth_vp_interval), - boost::icl::length(depth_vp_interval)); - } - - render_targets = RenderTargets{ - .color_surface = color_surface, - .depth_surface = depth_surface, - }; - - return OpenGL::Framebuffer{ - runtime, color_surface.get(), color_level, depth_surface.get(), depth_level, regs, fb_rect}; -} - -void RasterizerCache::InvalidateFramebuffer(const OpenGL::Framebuffer& framebuffer) { - if (framebuffer.HasAttachment(SurfaceType::Color)) { - const auto interval = framebuffer.Interval(SurfaceType::Color); - InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval), - render_targets.color_surface); - } - if (framebuffer.HasAttachment(SurfaceType::DepthStencil)) { - const auto interval = framebuffer.Interval(SurfaceType::DepthStencil); - InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval), - render_targets.depth_surface); - } -} - -RasterizerCache::SurfaceRect_Tuple RasterizerCache::GetTexCopySurface(const SurfaceParams& params) { - Common::Rectangle rect{}; - - SurfaceRef match_surface = - FindMatch(surface_cache, params, ScaleMatch::Ignore); - - if (match_surface) { - ValidateSurface(match_surface, params.addr, params.size); - - SurfaceParams match_subrect; - if (params.width != params.stride) { - const u32 tiled_size = match_surface->is_tiled ? 8 : 1; - match_subrect = params; - match_subrect.width = match_surface->PixelsInBytes(params.width) / tiled_size; - match_subrect.stride = match_surface->PixelsInBytes(params.stride) / tiled_size; - match_subrect.height *= tiled_size; - } else { - match_subrect = match_surface->FromInterval(params.GetInterval()); - ASSERT(match_subrect.GetInterval() == params.GetInterval()); - } - - rect = match_surface->GetScaledSubRect(match_subrect); - } - - return std::make_tuple(match_surface, rect); -} - -void RasterizerCache::DuplicateSurface(const SurfaceRef& src_surface, - const SurfaceRef& dest_surface) { - ASSERT(dest_surface->addr <= src_surface->addr && dest_surface->end >= src_surface->end); - - const auto src_rect = src_surface->GetScaledRect(); - const auto dst_rect = dest_surface->GetScaledSubRect(*src_surface); - ASSERT(src_rect.GetWidth() == dst_rect.GetWidth()); - - const TextureCopy copy = { - .src_level = 0, - .dst_level = 0, - .src_offset = {src_rect.left, src_rect.bottom}, - .dst_offset = {dst_rect.left, dst_rect.bottom}, - .extent = {src_rect.GetWidth(), src_rect.GetHeight()}, - }; - runtime.CopyTextures(*src_surface, *dest_surface, copy); - - dest_surface->invalid_regions -= src_surface->GetInterval(); - dest_surface->invalid_regions += src_surface->invalid_regions; - - SurfaceRegions regions; - for (const auto& pair : RangeFromInterval(dirty_regions, src_surface->GetInterval())) { - if (pair.second == src_surface) { - regions += pair.first; - } - } - for (const auto& interval : regions) { - dirty_regions.set({interval, dest_surface}); - } -} - -void RasterizerCache::ValidateSurface(const SurfaceRef& surface, PAddr addr, u32 size) { - if (size == 0) [[unlikely]] { - return; - } - - const SurfaceInterval validate_interval(addr, addr + size); - if (surface->type == SurfaceType::Fill) { - ASSERT(surface->IsRegionValid(validate_interval)); - return; - } - - SurfaceRegions validate_regions = surface->invalid_regions & validate_interval; - - auto notify_validated = [&](SurfaceInterval interval) { - surface->MarkValid(interval); - validate_regions.erase(interval); - }; - - u32 level = surface->LevelOf(addr); - SurfaceInterval level_interval = surface->LevelInterval(level); - while (!validate_regions.empty()) { - // Take an invalid interval from the validation regions and clamp it - // to the current level interval since FromInterval cannot process - // intervals that span multiple levels. If the interval is empty - // then we have validated the entire level so move to the next. - const auto interval = *validate_regions.begin() & level_interval; - if (boost::icl::is_empty(interval)) { - level_interval = surface->LevelInterval(++level); - continue; - } - - // Look for a valid surface to copy from. - const SurfaceParams params = surface->FromInterval(interval); - const SurfaceRef copy_surface = - FindMatch(surface_cache, params, ScaleMatch::Ignore, interval); - - if (copy_surface) { - const SurfaceInterval copy_interval = copy_surface->GetCopyableInterval(params); - CopySurface(copy_surface, surface, copy_interval); - notify_validated(copy_interval); - continue; - } - - // Try to find surface in cache with different format - // that can can be reinterpreted to the requested format. - if (ValidateByReinterpretation(surface, params, interval)) { - notify_validated(interval); - continue; - } - // Could not find a matching reinterpreter, check if we need to implement a - // reinterpreter - if (NoUnimplementedReinterpretations(surface, params, interval) && - !IntervalHasInvalidPixelFormat(params, interval)) { - // No surfaces were found in the cache that had a matching bit-width. - // If the region was created entirely on the GPU, - // assume it was a developer mistake and skip flushing. - if (boost::icl::contains(dirty_regions, interval)) { - LOG_DEBUG(HW_GPU, "Region created fully on GPU and reinterpretation is " - "invalid. Skipping validation"); - validate_regions.erase(interval); - continue; - } - } - - // Load data from 3DS memory - FlushRegion(params.addr, params.size); - UploadSurface(surface, interval); - notify_validated(params.GetInterval()); - } - - // Filtered mipmaps often look really bad. We can achieve better quality by - // generating them from the base level. - if (surface->res_scale != 1 && level != 0) { - runtime.GenerateMipmaps(*surface); - } -} - -void RasterizerCache::UploadSurface(const SurfaceRef& surface, SurfaceInterval interval) { - MICROPROFILE_SCOPE(RasterizerCache_UploadSurface); - - const SurfaceParams load_info = surface->FromInterval(interval); - ASSERT(load_info.addr >= surface->addr && load_info.end <= surface->end); - - const auto staging = runtime.FindStaging( - load_info.width * load_info.height * surface->GetInternalBytesPerPixel(), true); - - MemoryRef source_ptr = memory.GetPhysicalRef(load_info.addr); - if (!source_ptr) [[unlikely]] { - return; - } - - const auto upload_data = source_ptr.GetWriteBytes(load_info.end - load_info.addr); - DecodeTexture(load_info, load_info.addr, load_info.end, upload_data, staging.mapped, - runtime.NeedsConversion(surface->pixel_format)); - - if (use_custom_textures) { - const u64 hash = ComputeCustomHash(load_info, staging.mapped, upload_data); - if (UploadCustomSurface(surface, load_info, hash)) { - return; - } - } - if (dump_textures && !surface->is_custom) { - const u64 hash = Common::ComputeHash64(upload_data.data(), upload_data.size()); - const u32 level = surface->LevelOf(load_info.addr); - custom_tex_manager.DumpTexture(load_info, level, upload_data, hash); - } - - const BufferTextureCopy upload = { - .buffer_offset = 0, - .buffer_size = staging.size, - .texture_rect = surface->GetSubRect(load_info), - .texture_level = surface->LevelOf(load_info.addr), - }; - surface->Upload(upload, staging); -} - -bool RasterizerCache::UploadCustomSurface(const SurfaceRef& surface, const SurfaceParams& load_info, - u64 hash) { - const u32 level = surface->LevelOf(load_info.addr); - const bool is_base_level = level == 0; - Material* material = custom_tex_manager.GetMaterial(hash); - - if (!material) { - return surface->IsCustom(); - } - if (!is_base_level && custom_tex_manager.SkipMipmaps()) { - return true; - } - - surface->is_custom = true; - - const auto upload = [this, level, surface, material]() -> bool { - if (!surface->IsCustom() && !surface->Swap(material)) { - LOG_ERROR(HW_GPU, "Custom compressed format {} unsupported by host GPU", - material->format); - return false; - } - surface->UploadCustom(material, level); - if (custom_tex_manager.SkipMipmaps()) { - runtime.GenerateMipmaps(*surface); - } - return true; - }; - return custom_tex_manager.Decode(material, std::move(upload)); -} - -u64 RasterizerCache::ComputeCustomHash(const SurfaceParams& load_info, std::span decoded, - std::span upload_data) { - MICROPROFILE_SCOPE(RasterizerCache_ComputeHash); - - if (custom_tex_manager.UseNewHash()) { - return Common::ComputeHash64(upload_data.data(), upload_data.size()); - } - return Common::ComputeHash64(decoded.data(), decoded.size()); -} - -void RasterizerCache::DownloadSurface(const SurfaceRef& surface, SurfaceInterval interval) { - MICROPROFILE_SCOPE(RasterizerCache_DownloadSurface); - - const SurfaceParams flush_info = surface->FromInterval(interval); - const u32 flush_start = boost::icl::first(interval); - const u32 flush_end = boost::icl::last_next(interval); - ASSERT(flush_start >= surface->addr && flush_end <= surface->end); - - const auto staging = runtime.FindStaging( - flush_info.width * flush_info.height * surface->GetInternalBytesPerPixel(), false); - - const BufferTextureCopy download = { - .buffer_offset = 0, - .buffer_size = staging.size, - .texture_rect = surface->GetSubRect(flush_info), - .texture_level = surface->LevelOf(flush_start), - }; - surface->Download(download, staging); - - MemoryRef dest_ptr = memory.GetPhysicalRef(flush_start); - if (!dest_ptr) [[unlikely]] { - return; - } - - const auto download_dest = dest_ptr.GetWriteBytes(flush_end - flush_start); - EncodeTexture(flush_info, flush_start, flush_end, staging.mapped, download_dest, - runtime.NeedsConversion(surface->pixel_format)); -} - -void RasterizerCache::DownloadFillSurface(const SurfaceRef& surface, SurfaceInterval interval) { - const u32 flush_start = boost::icl::first(interval); - const u32 flush_end = boost::icl::last_next(interval); - ASSERT(flush_start >= surface->addr && flush_end <= surface->end); - - MemoryRef dest_ptr = memory.GetPhysicalRef(flush_start); - if (!dest_ptr) [[unlikely]] { - return; - } - - const u32 start_offset = flush_start - surface->addr; - const u32 download_size = - std::clamp(flush_end - flush_start, 0u, static_cast(dest_ptr.GetSize())); - const u32 coarse_start_offset = start_offset - (start_offset % surface->fill_size); - const u32 backup_bytes = start_offset % surface->fill_size; - - std::array backup_data; - if (backup_bytes) { - std::memcpy(backup_data.data(), &dest_ptr[coarse_start_offset], backup_bytes); - } - - for (u32 offset = coarse_start_offset; offset < download_size; offset += surface->fill_size) { - std::memcpy(&dest_ptr[offset], &surface->fill_data[0], - std::min(surface->fill_size, download_size - offset)); - } - - if (backup_bytes) { - std::memcpy(&dest_ptr[coarse_start_offset], &backup_data[0], backup_bytes); - } -} - -bool RasterizerCache::NoUnimplementedReinterpretations(const SurfaceRef& surface, - SurfaceParams params, - const SurfaceInterval& interval) { - static constexpr std::array all_formats{ - PixelFormat::RGBA8, PixelFormat::RGB8, PixelFormat::RGB5A1, PixelFormat::RGB565, - PixelFormat::RGBA4, PixelFormat::IA8, PixelFormat::RG8, PixelFormat::I8, - PixelFormat::A8, PixelFormat::IA4, PixelFormat::I4, PixelFormat::A4, - PixelFormat::ETC1, PixelFormat::ETC1A4, PixelFormat::D16, PixelFormat::D24, - PixelFormat::D24S8, - }; - bool implemented = true; - for (PixelFormat format : all_formats) { - if (GetFormatBpp(format) == surface->GetFormatBpp()) { - params.pixel_format = format; - // This could potentially be expensive, - // although experimentally it hasn't been too bad - SurfaceRef test_surface = - FindMatch(surface_cache, params, ScaleMatch::Ignore, interval); - if (test_surface) { - LOG_WARNING(HW_GPU, "Missing pixel_format reinterpreter: {} -> {}", - PixelFormatAsString(format), - PixelFormatAsString(surface->pixel_format)); - implemented = false; - } - } - } - return implemented; -} - -bool RasterizerCache::IntervalHasInvalidPixelFormat(const SurfaceParams& params, - const SurfaceInterval& interval) { - for (const auto& set : RangeFromInterval(surface_cache, interval)) { - for (const auto& surface : set.second) { - if (surface->pixel_format == PixelFormat::Invalid) { - LOG_DEBUG(HW_GPU, "Surface {:#x} found with invalid pixel format", surface->addr); - return true; - } - } - } - return false; -} - -bool RasterizerCache::ValidateByReinterpretation(const SurfaceRef& surface, SurfaceParams params, - const SurfaceInterval& interval) { - const PixelFormat dest_format = surface->pixel_format; - for (const auto& reinterpreter : runtime.GetPossibleReinterpretations(dest_format)) { - params.pixel_format = reinterpreter->GetSourceFormat(); - SurfaceRef reinterpret_surface = - FindMatch(surface_cache, params, ScaleMatch::Ignore, interval); - - if (reinterpret_surface) { - auto reinterpret_interval = reinterpret_surface->GetCopyableInterval(params); - auto reinterpret_params = surface->FromInterval(reinterpret_interval); - auto src_rect = reinterpret_surface->GetScaledSubRect(reinterpret_params); - auto dest_rect = surface->GetScaledSubRect(reinterpret_params); - reinterpreter->Reinterpret(*reinterpret_surface, src_rect, *surface, dest_rect); - - return true; - } - } - - return false; -} - -void RasterizerCache::ClearAll(bool flush) { - const auto flush_interval = PageMap::interval_type::right_open(0x0, 0xFFFFFFFF); - // Force flush all surfaces from the cache - if (flush) { - FlushRegion(0x0, 0xFFFFFFFF); - } - // Unmark all of the marked pages - for (auto& pair : RangeFromInterval(cached_pages, flush_interval)) { - const auto interval = pair.first & flush_interval; - - const PAddr interval_start_addr = boost::icl::first(interval) << Memory::CITRA_PAGE_BITS; - const PAddr interval_end_addr = boost::icl::last_next(interval) << Memory::CITRA_PAGE_BITS; - const u32 interval_size = interval_end_addr - interval_start_addr; - - memory.RasterizerMarkRegionCached(interval_start_addr, interval_size, false); - } - - // Remove the whole cache without really looking at it. - cached_pages -= flush_interval; - dirty_regions -= SurfaceInterval(0x0, 0xFFFFFFFF); - surface_cache -= SurfaceInterval(0x0, 0xFFFFFFFF); - remove_surfaces.clear(); -} - -void RasterizerCache::FlushRegion(PAddr addr, u32 size, SurfaceRef flush_surface) { - if (size == 0) - return; - - const SurfaceInterval flush_interval(addr, addr + size); - SurfaceRegions flushed_intervals; - - for (auto& pair : RangeFromInterval(dirty_regions, flush_interval)) { - // small sizes imply that this most likely comes from the cpu, flush the entire region - // the point is to avoid thousands of small writes every frame if the cpu decides to - // access that region, anything higher than 8 you're guaranteed it comes from a service - const auto interval = size <= 8 ? pair.first : pair.first & flush_interval; - auto& surface = pair.second; - - if (flush_surface && surface != flush_surface) - continue; - - // Sanity check, this surface is the last one that marked this region dirty - ASSERT(surface->IsRegionValid(interval)); - - if (surface->type == SurfaceType::Fill) { - DownloadFillSurface(surface, interval); - } else { - DownloadSurface(surface, interval); - } - - flushed_intervals += interval; - } - - // Reset dirty regions - dirty_regions -= flushed_intervals; -} - -void RasterizerCache::FlushAll() { - FlushRegion(0, 0xFFFFFFFF); -} - -void RasterizerCache::InvalidateRegion(PAddr addr, u32 size, const SurfaceRef& region_owner) { - if (size == 0) - return; - - const SurfaceInterval invalid_interval(addr, addr + size); - - if (region_owner) { - ASSERT(region_owner->type != SurfaceType::Texture); - ASSERT(addr >= region_owner->addr && addr + size <= region_owner->end); - // Surfaces can't have a gap - ASSERT(region_owner->width == region_owner->stride); - region_owner->MarkValid(invalid_interval); - } - - for (const auto& pair : RangeFromInterval(surface_cache, invalid_interval)) { - for (const auto& cached_surface : pair.second) { - if (cached_surface == region_owner) - continue; - - // If cpu is invalidating this region we want to remove it - // to (likely) mark the memory pages as uncached - if (!region_owner && size <= 8) { - FlushRegion(cached_surface->addr, cached_surface->size, cached_surface); - remove_surfaces.push_back(cached_surface); - continue; - } - - const auto interval = cached_surface->GetInterval() & invalid_interval; - cached_surface->MarkInvalid(interval); - - // If the surface has no salvageable data it should be removed from the cache to avoid - // clogging the data structure - if (cached_surface->IsFullyInvalid()) { - remove_surfaces.push_back(cached_surface); - } - } - } - - if (region_owner) { - dirty_regions.set({invalid_interval, region_owner}); - } else { - dirty_regions.erase(invalid_interval); - } - - for (const SurfaceRef& remove_surface : remove_surfaces) { - UnregisterSurface(remove_surface); - } - remove_surfaces.clear(); -} - -RasterizerCache::SurfaceRef RasterizerCache::CreateSurface(const SurfaceParams& params) { - SurfaceRef surface = std::make_shared(runtime, params); - surface->MarkInvalid(surface->GetInterval()); - return surface; -} - -void RasterizerCache::RegisterSurface(const SurfaceRef& surface) { - if (surface->registered) { - return; - } - surface->registered = true; - surface_cache.add({surface->GetInterval(), SurfaceSet{surface}}); - UpdatePagesCachedCount(surface->addr, surface->size, 1); -} - -void RasterizerCache::UnregisterSurface(const SurfaceRef& surface) { - if (!surface->registered) { - return; - } - surface->registered = false; - UpdatePagesCachedCount(surface->addr, surface->size, -1); - surface_cache.subtract({surface->GetInterval(), SurfaceSet{surface}}); -} - -void RasterizerCache::UpdatePagesCachedCount(PAddr addr, u32 size, int delta) { - const u32 num_pages = - ((addr + size - 1) >> Memory::CITRA_PAGE_BITS) - (addr >> Memory::CITRA_PAGE_BITS) + 1; - const u32 page_start = addr >> Memory::CITRA_PAGE_BITS; - const u32 page_end = page_start + num_pages; - - // Interval maps will erase segments if count reaches 0, so if delta is negative we have to - // subtract after iterating - const auto pages_interval = PageMap::interval_type::right_open(page_start, page_end); - if (delta > 0) { - cached_pages.add({pages_interval, delta}); - } - - for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) { - const auto interval = pair.first & pages_interval; - const int count = pair.second; - - const PAddr interval_start_addr = boost::icl::first(interval) << Memory::CITRA_PAGE_BITS; - const PAddr interval_end_addr = boost::icl::last_next(interval) << Memory::CITRA_PAGE_BITS; - const u32 interval_size = interval_end_addr - interval_start_addr; - - if (delta > 0 && count == delta) { - memory.RasterizerMarkRegionCached(interval_start_addr, interval_size, true); - } else if (delta < 0 && count == -delta) { - memory.RasterizerMarkRegionCached(interval_start_addr, interval_size, false); - } else { - ASSERT(count >= 0); - } - } - - if (delta < 0) { - cached_pages.add({pages_interval, delta}); - } -} } // namespace VideoCore diff --git a/src/video_core/rasterizer_cache/rasterizer_cache.h b/src/video_core/rasterizer_cache/rasterizer_cache.h index c2653a66b..cf387aa43 100644 --- a/src/video_core/rasterizer_cache/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache/rasterizer_cache.h @@ -4,188 +4,1410 @@ #pragma once -#include -#include -#include -#include -#include -#include "video_core/rasterizer_cache/surface_base.h" -#include "video_core/rasterizer_cache/surface_params.h" -#include "video_core/renderer_opengl/gl_texture_runtime.h" +#include +#include +#include +#include "common/alignment.h" +#include "common/logging/log.h" +#include "common/microprofile.h" +#include "common/settings.h" +#include "core/memory.h" +#include "video_core/custom_textures/custom_tex_manager.h" +#include "video_core/rasterizer_cache/rasterizer_cache_base.h" +#include "video_core/regs.h" +#include "video_core/renderer_base.h" #include "video_core/texture/texture_decode.h" -namespace Memory { -class MemorySystem; -} - -namespace Pica { -struct Regs; -} - namespace VideoCore { -enum class ScaleMatch { - Exact, // only accept same res scale - Upscale, // only allow higher scale than params - Ignore // accept every scaled res -}; +MICROPROFILE_DECLARE(RasterizerCache_CopySurface); +MICROPROFILE_DECLARE(RasterizerCache_UploadSurface); +MICROPROFILE_DECLARE(RasterizerCache_DownloadSurface); +MICROPROFILE_DECLARE(RasterizerCache_Invalidation); -class CustomTexManager; -class CustomTexture; -class RendererBase; +constexpr auto RangeFromInterval(const auto& map, const auto& interval) { + return boost::make_iterator_range(map.equal_range(interval)); +} -class RasterizerCache : NonCopyable { -public: - using SurfaceRef = std::shared_ptr; +template +RasterizerCache::RasterizerCache(Memory::MemorySystem& memory_, + CustomTexManager& custom_tex_manager_, Runtime& runtime_, + Pica::Regs& regs_, RendererBase& renderer_) + : memory{memory_}, custom_tex_manager{custom_tex_manager_}, runtime{runtime_}, regs{regs_}, + renderer{renderer_}, resolution_scale_factor{renderer.GetResolutionScaleFactor()}, + use_filter{Settings::values.texture_filter.GetValue() != Settings::TextureFilter::None}, + dump_textures{Settings::values.dump_textures.GetValue()}, + use_custom_textures{Settings::values.custom_textures.GetValue()} { + using TextureConfig = Pica::TexturingRegs::TextureConfig; - // Declare rasterizer interval types - using SurfaceSet = std::set; - using SurfaceMap = boost::icl::interval_map; - using SurfaceCache = boost::icl::interval_map; + // Create null handles for all cached resources + void(slot_surfaces.insert(runtime, SurfaceParams{ + .width = 1, + .height = 1, + .stride = 1, + .texture_type = VideoCore::TextureType::Texture2D, + .pixel_format = VideoCore::PixelFormat::RGBA8, + .type = VideoCore::SurfaceType::Color, + })); + void(slot_surfaces.insert(runtime, SurfaceParams{ + .width = 1, + .height = 1, + .stride = 1, + .texture_type = TextureType::CubeMap, + .pixel_format = PixelFormat::RGBA8, + .type = SurfaceType::Color, + })); + void(slot_samplers.insert(runtime, SamplerParams{ + .mag_filter = TextureConfig::TextureFilter::Linear, + .min_filter = TextureConfig::TextureFilter::Linear, + .mip_filter = TextureConfig::TextureFilter::Linear, + .wrap_s = TextureConfig::WrapMode::ClampToBorder, + .wrap_t = TextureConfig::WrapMode::ClampToBorder, + })); +} - static_assert(std::is_same() && - std::is_same(), - "Incorrect interval types"); +template +RasterizerCache::~RasterizerCache() { + ClearAll(false); +} - using SurfaceRect_Tuple = std::tuple>; - using PageMap = boost::icl::interval_map; +template +void RasterizerCache::TickFrame() { + custom_tex_manager.TickFrame(); - struct RenderTargets { - SurfaceRef color_surface; - SurfaceRef depth_surface; + const u32 scale_factor = renderer.GetResolutionScaleFactor(); + const bool resolution_scale_changed = resolution_scale_factor != scale_factor; + const bool use_custom_texture_changed = + Settings::values.custom_textures.GetValue() != use_custom_textures; + const bool texture_filter_changed = + renderer.Settings().texture_filter_update_requested.exchange(false); + + if (resolution_scale_changed || texture_filter_changed || use_custom_texture_changed) { + resolution_scale_factor = scale_factor; + use_filter = Settings::values.texture_filter.GetValue() != Settings::TextureFilter::None; + use_custom_textures = Settings::values.custom_textures.GetValue(); + if (use_custom_textures) { + custom_tex_manager.FindCustomTextures(); + } + UnregisterAll(); + } +} + +template +bool RasterizerCache::AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) { + // Texture copy size is aligned to 16 byte units + const u32 copy_size = Common::AlignDown(config.texture_copy.size, 16); + if (copy_size == 0) { + return false; + } + + u32 input_gap = config.texture_copy.input_gap * 16; + u32 input_width = config.texture_copy.input_width * 16; + if (input_width == 0 && input_gap != 0) { + return false; + } + if (input_gap == 0 || input_width >= copy_size) { + input_width = copy_size; + input_gap = 0; + } + if (copy_size % input_width != 0) { + return false; + } + + u32 output_gap = config.texture_copy.output_gap * 16; + u32 output_width = config.texture_copy.output_width * 16; + if (output_width == 0 && output_gap != 0) { + return false; + } + if (output_gap == 0 || output_width >= copy_size) { + output_width = copy_size; + output_gap = 0; + } + if (copy_size % output_width != 0) { + return false; + } + + SurfaceParams src_params; + src_params.addr = config.GetPhysicalInputAddress(); + src_params.stride = input_width + input_gap; // stride in bytes + src_params.width = input_width; // width in bytes + src_params.height = copy_size / input_width; + src_params.size = ((src_params.height - 1) * src_params.stride) + src_params.width; + src_params.end = src_params.addr + src_params.size; + + const auto [src_surface_id, src_rect] = GetTexCopySurface(src_params); + if (!src_surface_id) { + return false; + } + + const SurfaceParams src_info = slot_surfaces[src_surface_id]; + if (output_gap != 0 && + (output_width != src_info.BytesInPixels(src_rect.GetWidth() / src_info.res_scale) * + (src_info.is_tiled ? 8 : 1) || + output_gap % src_info.BytesInPixels(src_info.is_tiled ? 64 : 1) != 0)) { + return false; + } + + SurfaceParams dst_params = src_info; + dst_params.addr = config.GetPhysicalOutputAddress(); + dst_params.width = src_rect.GetWidth() / src_info.res_scale; + dst_params.stride = + dst_params.width + src_info.PixelsInBytes(src_info.is_tiled ? output_gap / 8 : output_gap); + dst_params.height = src_rect.GetHeight() / src_info.res_scale; + dst_params.res_scale = src_info.res_scale; + dst_params.UpdateParams(); + + // Since we are going to invalidate the gap if there is one, we will have to load it first + const bool load_gap = output_gap != 0; + const auto [dst_surface_id, dst_rect] = + GetSurfaceSubRect(dst_params, ScaleMatch::Upscale, load_gap); + if (!dst_surface_id) { + return false; + } + + Surface& src_surface = slot_surfaces[src_surface_id]; + Surface& dst_surface = slot_surfaces[dst_surface_id]; + + if (dst_surface.type == SurfaceType::Texture || + !CheckFormatsBlittable(src_surface.pixel_format, dst_surface.pixel_format)) { + return false; + } + + ASSERT(src_rect.GetWidth() == dst_rect.GetWidth()); + + const TextureCopy texture_copy = { + .src_level = src_surface.LevelOf(src_params.addr), + .dst_level = dst_surface.LevelOf(dst_params.addr), + .src_offset = {src_rect.left, src_rect.bottom}, + .dst_offset = {dst_rect.left, dst_rect.bottom}, + .extent = {src_rect.GetWidth(), src_rect.GetHeight()}, + }; + runtime.CopyTextures(src_surface, dst_surface, texture_copy); + + InvalidateRegion(dst_params.addr, dst_params.size, dst_surface_id); + return true; +} + +template +bool RasterizerCache::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { + SurfaceParams src_params; + src_params.addr = config.GetPhysicalInputAddress(); + src_params.width = config.output_width; + src_params.stride = config.input_width; + src_params.height = config.output_height; + src_params.is_tiled = !config.input_linear; + src_params.pixel_format = PixelFormatFromGPUPixelFormat(config.input_format); + src_params.UpdateParams(); + + SurfaceParams dst_params; + dst_params.addr = config.GetPhysicalOutputAddress(); + dst_params.width = config.scaling != config.NoScale ? config.output_width.Value() / 2 + : config.output_width.Value(); + dst_params.height = config.scaling == config.ScaleXY ? config.output_height.Value() / 2 + : config.output_height.Value(); + dst_params.is_tiled = config.input_linear != config.dont_swizzle; + dst_params.pixel_format = PixelFormatFromGPUPixelFormat(config.output_format); + dst_params.UpdateParams(); + + auto [src_surface_id, src_rect] = GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true); + if (!src_surface_id) { + return false; + } + + dst_params.res_scale = slot_surfaces[src_surface_id].res_scale; + + const auto [dst_surface_id, dst_rect] = + GetSurfaceSubRect(dst_params, ScaleMatch::Upscale, false); + if (!dst_surface_id) { + return false; + } + + Surface& src_surface = slot_surfaces[src_surface_id]; + Surface& dst_surface = slot_surfaces[dst_surface_id]; + + if (src_surface.is_tiled != dst_surface.is_tiled) { + std::swap(src_rect.top, src_rect.bottom); + } + if (config.flip_vertically) { + std::swap(src_rect.top, src_rect.bottom); + } + + if (!CheckFormatsBlittable(src_surface.pixel_format, dst_surface.pixel_format)) { + return false; + } + + const TextureBlit texture_blit = { + .src_level = src_surface.LevelOf(src_params.addr), + .dst_level = dst_surface.LevelOf(dst_params.addr), + .src_rect = src_rect, + .dst_rect = dst_rect, + }; + runtime.BlitTextures(src_surface, dst_surface, texture_blit); + + InvalidateRegion(dst_params.addr, dst_params.size, dst_surface_id); + return true; +} + +template +bool RasterizerCache::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { + SurfaceParams params; + params.addr = config.GetStartAddress(); + params.end = config.GetEndAddress(); + params.size = params.end - params.addr; + params.type = SurfaceType::Fill; + params.res_scale = std::numeric_limits::max(); + + SurfaceId fill_surface_id = slot_surfaces.insert(runtime, params); + Surface& fill_surface = slot_surfaces[fill_surface_id]; + + std::memcpy(&fill_surface.fill_data[0], &config.value_32bit, sizeof(u32)); + if (config.fill_32bit) { + fill_surface.fill_size = 4; + } else if (config.fill_24bit) { + fill_surface.fill_size = 3; + } else { + fill_surface.fill_size = 2; + } + + RegisterSurface(fill_surface_id); + InvalidateRegion(fill_surface.addr, fill_surface.size, fill_surface_id); + return true; +} + +template +typename T::Surface& RasterizerCache::GetSurface(SurfaceId surface_id) { + return slot_surfaces[surface_id]; +} + +template +typename T::Sampler& RasterizerCache::GetSampler(SamplerId sampler_id) { + return slot_samplers[sampler_id]; +} + +template +typename T::Sampler& RasterizerCache::GetSampler( + const Pica::TexturingRegs::TextureConfig& config) { + const SamplerParams params = { + .mag_filter = config.mag_filter, + .min_filter = config.min_filter, + .mip_filter = config.mip_filter, + .wrap_s = config.wrap_s, + .wrap_t = config.wrap_t, + .border_color = config.border_color.raw, + .lod_min = config.lod.min_level, + .lod_max = config.lod.max_level, + .lod_bias = config.lod.bias, }; - struct TextureCube { - SurfaceRef surface{}; - std::array faces{}; - std::array ticks{}; + auto [it, is_new] = samplers.try_emplace(params); + if (is_new) { + it->second = slot_samplers.insert(runtime, params); + } + + return slot_samplers[it->second]; +} + +template +void RasterizerCache::CopySurface(Surface& src_surface, Surface& dst_surface, + SurfaceInterval copy_interval) { + MICROPROFILE_SCOPE(RasterizerCache_CopySurface); + + const PAddr copy_addr = copy_interval.lower(); + const SurfaceParams subrect_params = dst_surface.FromInterval(copy_interval); + const auto dst_rect = dst_surface.GetScaledSubRect(subrect_params); + ASSERT(subrect_params.GetInterval() == copy_interval); + + if (src_surface.type == SurfaceType::Fill) { + const TextureClear clear = { + .texture_level = dst_surface.LevelOf(copy_addr), + .texture_rect = dst_rect, + .value = src_surface.MakeClearValue(copy_addr, dst_surface.pixel_format), + }; + runtime.ClearTexture(dst_surface, clear); + return; + } + + const TextureBlit blit = { + .src_level = src_surface.LevelOf(copy_addr), + .dst_level = dst_surface.LevelOf(copy_addr), + .src_rect = src_surface.GetScaledSubRect(subrect_params), + .dst_rect = dst_rect, + }; + runtime.BlitTextures(src_surface, dst_surface, blit); +} + +template +SurfaceId RasterizerCache::GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, + bool load_if_create) { + if (params.addr == 0 || params.height * params.width == 0) { + return {}; + } + // Use GetSurfaceSubRect instead + ASSERT(params.width == params.stride); + ASSERT(!params.is_tiled || (params.width % 8 == 0 && params.height % 8 == 0)); + + // Check for an exact match in existing surfaces + SurfaceId surface_id = FindMatch(params, match_res_scale); + + if (!surface_id) { + u16 target_res_scale = params.res_scale; + if (match_res_scale != ScaleMatch::Exact) { + // This surface may have a subrect of another surface with a higher res_scale, find + // it to adjust our params + SurfaceParams find_params = params; + SurfaceId expandable_id = FindMatch(find_params, match_res_scale); + if (expandable_id) { + Surface& expandable = slot_surfaces[expandable_id]; + if (expandable.res_scale > target_res_scale) { + target_res_scale = expandable.res_scale; + } + } + // Keep res_scale when reinterpreting d24s8 -> rgba8 + if (params.pixel_format == PixelFormat::RGBA8) { + find_params.pixel_format = PixelFormat::D24S8; + expandable_id = FindMatch(find_params, match_res_scale); + if (expandable_id) { + Surface& expandable = slot_surfaces[expandable_id]; + if (expandable.res_scale > target_res_scale) { + target_res_scale = expandable.res_scale; + } + } + } + } + SurfaceParams new_params = params; + new_params.res_scale = target_res_scale; + surface_id = CreateSurface(new_params); + RegisterSurface(surface_id); + } + + if (load_if_create) { + ValidateSurface(surface_id, params.addr, params.size); + } + + return surface_id; +} + +template +typename RasterizerCache::SurfaceRect_Tuple RasterizerCache::GetSurfaceSubRect( + const SurfaceParams& params, ScaleMatch match_res_scale, bool load_if_create) { + if (params.addr == 0 || params.height * params.width == 0) { + return std::make_pair(SurfaceId{}, Common::Rectangle{}); + } + + // Attempt to find encompassing surface + SurfaceId surface_id = FindMatch(params, match_res_scale); + + // Check if FindMatch failed because of res scaling. If that's the case create a new surface + // with the dimensions of the lower res_scale surface to suggest it should not be used again. + if (!surface_id && match_res_scale != ScaleMatch::Ignore) { + surface_id = FindMatch(params, ScaleMatch::Ignore); + if (surface_id) { + SurfaceParams new_params = slot_surfaces[surface_id]; + new_params.res_scale = params.res_scale; + + surface_id = CreateSurface(new_params); + RegisterSurface(surface_id); + } + } + + SurfaceParams aligned_params = params; + if (params.is_tiled) { + aligned_params.height = Common::AlignUp(params.height, 8); + aligned_params.width = Common::AlignUp(params.width, 8); + aligned_params.stride = Common::AlignUp(params.stride, 8); + aligned_params.UpdateParams(); + } + + // Check for a surface we can expand before creating a new one + if (!surface_id) { + surface_id = FindMatch(aligned_params, match_res_scale); + if (surface_id) { + Surface& surface = slot_surfaces[surface_id]; + aligned_params.width = aligned_params.stride; + aligned_params.UpdateParams(); + + SurfaceParams new_params = surface; + new_params.addr = std::min(aligned_params.addr, surface.addr); + new_params.end = std::max(aligned_params.end, surface.end); + new_params.size = new_params.end - new_params.addr; + new_params.height = + new_params.size / aligned_params.BytesInPixels(aligned_params.stride); + new_params.UpdateParams(); + ASSERT(new_params.size % aligned_params.BytesInPixels(aligned_params.stride) == 0); + + SurfaceId new_surface_id = CreateSurface(new_params); + DuplicateSurface(surface_id, new_surface_id); + UnregisterSurface(surface_id); + RegisterSurface(new_surface_id); + surface_id = new_surface_id; + } + } + + // No subrect found - create and return a new surface + if (!surface_id) { + SurfaceParams new_params = aligned_params; + // Can't have gaps in a surface + new_params.width = aligned_params.stride; + new_params.UpdateParams(); + // GetSurface will create the new surface and possibly adjust res_scale if necessary + surface_id = GetSurface(new_params, match_res_scale, load_if_create); + } else if (load_if_create) { + ValidateSurface(surface_id, aligned_params.addr, aligned_params.size); + } + + return std::make_pair(surface_id, slot_surfaces[surface_id].GetScaledSubRect(params)); +} + +template +typename T::Surface& RasterizerCache::GetTextureSurface( + const Pica::TexturingRegs::FullTextureConfig& config) { + const auto info = Pica::Texture::TextureInfo::FromPicaRegister(config.config, config.format); + const u32 max_level = MipLevels(info.width, info.height, config.config.lod.max_level) - 1; + const SurfaceId surface_id = GetTextureSurface(info, max_level); + return slot_surfaces[surface_id]; +} + +template +SurfaceId RasterizerCache::GetTextureSurface(const Pica::Texture::TextureInfo& info, + u32 max_level) { + if (info.physical_address == 0) [[unlikely]] { + // Can occur when texture addr is null or its memory is unmapped/invalid + // HACK: In this case, the correct behaviour for the PICA is to use the last + // rendered colour. But because this would be impractical to implement, the + // next best alternative is to use a clear texture, essentially skipping + // the geometry in question. + // For example: a bug in Pokemon X/Y causes NULL-texture squares to be drawn + // on the male character's face, which in the OpenGL default appear black. + return NULL_SURFACE_ID; + } + + SurfaceParams params; + params.addr = info.physical_address; + params.width = info.width; + params.height = info.height; + params.levels = max_level + 1; + params.is_tiled = true; + params.pixel_format = PixelFormatFromTextureFormat(info.format); + params.res_scale = use_filter ? resolution_scale_factor : 1; + params.UpdateParams(); + + const u32 min_width = info.width >> max_level; + const u32 min_height = info.height >> max_level; + if (min_width % 8 != 0 || min_height % 8 != 0) { + if (min_width % 4 == 0 && min_height % 4 == 0) { + const auto [src_surface_id, rect] = GetSurfaceSubRect(params, ScaleMatch::Ignore, true); + Surface& src_surface = slot_surfaces[src_surface_id]; + + params.res_scale = src_surface.res_scale; + SurfaceId tmp_surface_id = CreateSurface(params); + Surface& tmp_surface = slot_surfaces[tmp_surface_id]; + + const TextureBlit blit = { + .src_level = src_surface.LevelOf(params.addr), + .dst_level = 0, + .src_rect = rect, + .dst_rect = tmp_surface.GetScaledRect(), + }; + runtime.BlitTextures(src_surface, tmp_surface, blit); + return tmp_surface_id; + } + + LOG_CRITICAL(HW_GPU, "Texture size ({}x{}) is not multiple of 4", min_width, min_height); + return NULL_SURFACE_ID; + } + if (info.width != (min_width << max_level) || info.height != (min_height << max_level)) { + LOG_CRITICAL(HW_GPU, "Texture size ({}x{}) does not support required mipmap level ({})", + params.width, params.height, max_level); + return NULL_SURFACE_ID; + } + + SurfaceId surface_id = GetSurface(params, ScaleMatch::Ignore, true); + return surface_id ? surface_id : NULL_SURFACE_ID; +} + +template +typename T::Surface& RasterizerCache::GetTextureCube(const TextureCubeConfig& config) { + if (config.width == 0) [[unlikely]] { + return slot_surfaces[NULL_SURFACE_CUBE_ID]; + } + + auto [it, new_surface] = texture_cube_cache.try_emplace(config); + TextureCube& cube = it->second; + + if (new_surface) { + SurfaceParams cube_params = { + .addr = config.px, + .width = config.width, + .height = config.width, + .stride = config.width, + .levels = config.levels, + .res_scale = use_filter ? resolution_scale_factor : 1, + .texture_type = TextureType::CubeMap, + .pixel_format = PixelFormatFromTextureFormat(config.format), + .type = SurfaceType::Texture, + }; + cube_params.UpdateParams(); + cube.surface_id = CreateSurface(cube_params); + } + + const u32 scaled_size = slot_surfaces[cube.surface_id].GetScaledWidth(); + const std::array addresses = {config.px, config.nx, config.py, config.ny, config.pz, config.nz}; + + Pica::Texture::TextureInfo info = { + .width = config.width, + .height = config.width, + .format = config.format, + }; + info.SetDefaultStride(); + + for (u32 i = 0; i < addresses.size(); i++) { + if (!addresses[i]) { + continue; + } + + SurfaceId& face_id = cube.face_ids[i]; + if (!face_id) { + info.physical_address = addresses[i]; + face_id = GetTextureSurface(info, config.levels - 1); + ASSERT_MSG(slot_surfaces[face_id].levels >= config.levels, + "Texture cube face levels are not enough to validate the levels requested"); + } + Surface& surface = slot_surfaces[face_id]; + surface.flags |= SurfaceFlagBits::Tracked; + if (cube.ticks[i] == surface.modification_tick) { + continue; + } + cube.ticks[i] = surface.modification_tick; + Surface& cube_surface = slot_surfaces[cube.surface_id]; + for (u32 level = 0; level < config.levels; level++) { + const u32 width_lod = scaled_size >> level; + const TextureCopy texture_copy = { + .src_level = level, + .dst_level = level, + .src_layer = 0, + .dst_layer = i, + .src_offset = {0, 0}, + .dst_offset = {0, 0}, + .extent = {width_lod, width_lod}, + }; + runtime.CopyTextures(surface, cube_surface, texture_copy); + } + } + + return slot_surfaces[cube.surface_id]; +} + +template +typename T::Framebuffer RasterizerCache::GetFramebufferSurfaces(bool using_color_fb, + bool using_depth_fb) { + const auto& config = regs.framebuffer.framebuffer; + + const s32 framebuffer_width = config.GetWidth(); + const s32 framebuffer_height = config.GetHeight(); + const auto viewport_rect = regs.rasterizer.GetViewportRect(); + const Common::Rectangle viewport_clamped = { + static_cast(std::clamp(viewport_rect.left, 0, framebuffer_width)), + static_cast(std::clamp(viewport_rect.top, 0, framebuffer_height)), + static_cast(std::clamp(viewport_rect.right, 0, framebuffer_width)), + static_cast(std::clamp(viewport_rect.bottom, 0, framebuffer_height)), }; -public: - RasterizerCache(Memory::MemorySystem& memory, CustomTexManager& custom_tex_manager, - OpenGL::TextureRuntime& runtime, Pica::Regs& regs, RendererBase& renderer); - ~RasterizerCache(); + // get color and depth surfaces + SurfaceParams color_params; + color_params.is_tiled = true; + color_params.res_scale = resolution_scale_factor; + color_params.width = config.GetWidth(); + color_params.height = config.GetHeight(); + SurfaceParams depth_params = color_params; - /// Notify the cache that a new frame has been queued - void TickFrame(); + color_params.addr = config.GetColorBufferPhysicalAddress(); + color_params.pixel_format = PixelFormatFromColorFormat(config.color_format); + color_params.UpdateParams(); - /// Perform hardware accelerated texture copy according to the provided configuration - bool AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config); + depth_params.addr = config.GetDepthBufferPhysicalAddress(); + depth_params.pixel_format = PixelFormatFromDepthFormat(config.depth_format); + depth_params.UpdateParams(); - /// Perform hardware accelerated display transfer according to the provided configuration - bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config); + auto color_vp_interval = color_params.GetSubRectInterval(viewport_clamped); + auto depth_vp_interval = depth_params.GetSubRectInterval(viewport_clamped); - /// Perform hardware accelerated memory fill according to the provided configuration - bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config); + // Make sure that framebuffers don't overlap if both color and depth are being used + if (using_color_fb && using_depth_fb && + boost::icl::length(color_vp_interval & depth_vp_interval)) { + LOG_CRITICAL(HW_GPU, "Color and depth framebuffer memory regions overlap; " + "overlapping framebuffers not supported!"); + using_depth_fb = false; + } - /// Copy one surface's region to another - void CopySurface(const SurfaceRef& src_surface, const SurfaceRef& dst_surface, - SurfaceInterval copy_interval); + Common::Rectangle color_rect{}; + SurfaceId color_id{}; + u32 color_level{}; + if (using_color_fb) + std::tie(color_id, color_rect) = GetSurfaceSubRect(color_params, ScaleMatch::Exact, false); - /// Load a texture from 3DS memory to OpenGL and cache it (if not already cached) - SurfaceRef GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, - bool load_if_create); + Common::Rectangle depth_rect{}; + SurfaceId depth_id{}; + u32 depth_level{}; + if (using_depth_fb) + std::tie(depth_id, depth_rect) = GetSurfaceSubRect(depth_params, ScaleMatch::Exact, false); - /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from - /// 3DS memory to OpenGL and caches it (if not already cached) - SurfaceRect_Tuple GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale, - bool load_if_create); + Common::Rectangle fb_rect{}; + if (color_id && depth_id) { + fb_rect = color_rect; + // Color and Depth surfaces must have the same dimensions and offsets + if (color_rect.bottom != depth_rect.bottom || color_rect.top != depth_rect.top || + color_rect.left != depth_rect.left || color_rect.right != depth_rect.right) { + color_id = GetSurface(color_params, ScaleMatch::Exact, false); + depth_id = GetSurface(depth_params, ScaleMatch::Exact, false); + fb_rect = slot_surfaces[color_id].GetScaledRect(); + } + } else if (color_id) { + fb_rect = color_rect; + } else if (depth_id) { + fb_rect = depth_rect; + } - /// Get a surface based on the texture configuration - SurfaceRef GetTextureSurface(const Pica::TexturingRegs::FullTextureConfig& config); - SurfaceRef GetTextureSurface(const Pica::Texture::TextureInfo& info, u32 max_level = 0); + const Surface* color_surface = color_id ? &slot_surfaces[color_id] : nullptr; + const Surface* depth_surface = depth_id ? &slot_surfaces[depth_id] : nullptr; - /// Get a texture cube based on the texture configuration - SurfaceRef GetTextureCube(const TextureCubeConfig& config); + if (color_id) { + color_level = color_surface->LevelOf(color_params.addr); + ValidateSurface(color_id, boost::icl::first(color_vp_interval), + boost::icl::length(color_vp_interval)); + } + if (depth_id) { + depth_level = depth_surface->LevelOf(depth_params.addr); + ValidateSurface(depth_id, boost::icl::first(depth_vp_interval), + boost::icl::length(depth_vp_interval)); + } - /// Get the color and depth surfaces based on the framebuffer configuration - OpenGL::Framebuffer GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb); + render_targets = RenderTargets{ + .color_id = color_id, + .depth_id = depth_id, + }; - /// Marks the draw rectangle defined in framebuffer as invalid - void InvalidateFramebuffer(const OpenGL::Framebuffer& framebuffer); + return Framebuffer{runtime, color_surface, color_level, depth_surface, + depth_level, regs, fb_rect}; +} - /// Get a surface that matches a "texture copy" display transfer config - SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params); +template +void RasterizerCache::InvalidateFramebuffer(const Framebuffer& framebuffer) { + const auto invalidate = [&](SurfaceId surface_id) { + if (!surface_id) { + return; + } + Surface& surface = slot_surfaces[surface_id]; + const SurfaceInterval interval = framebuffer.Interval(surface.type); + const PAddr addr = boost::icl::first(interval); + const u32 size = boost::icl::length(interval); + InvalidateRegion(addr, size, surface_id); + }; + const bool has_color = framebuffer.HasAttachment(SurfaceType::Color); + const bool has_depth = framebuffer.HasAttachment(SurfaceType::DepthStencil); + if (has_color) { + invalidate(render_targets.color_id); + } + if (has_depth) { + invalidate(render_targets.depth_id); + } +} - /// Write any cached resources overlapping the region back to memory (if dirty) - void FlushRegion(PAddr addr, u32 size, SurfaceRef flush_surface = nullptr); +template +typename RasterizerCache::SurfaceRect_Tuple RasterizerCache::GetTexCopySurface( + const SurfaceParams& params) { + Common::Rectangle rect{}; - /// Mark region as being invalidated by region_owner (nullptr if 3DS memory) - void InvalidateRegion(PAddr addr, u32 size, const SurfaceRef& region_owner); + SurfaceId match_id = FindMatch(params, ScaleMatch::Ignore); - /// Flush all cached resources tracked by this cache manager - void FlushAll(); + if (match_id) { + ValidateSurface(match_id, params.addr, params.size); - /// Clear all cached resources tracked by this cache manager - void ClearAll(bool flush); + SurfaceParams match_subrect; + Surface& match_surface = slot_surfaces[match_id]; + if (params.width != params.stride) { + const u32 tiled_size = match_surface.is_tiled ? 8 : 1; + match_subrect = params; + match_subrect.width = match_surface.PixelsInBytes(params.width) / tiled_size; + match_subrect.stride = match_surface.PixelsInBytes(params.stride) / tiled_size; + match_subrect.height *= tiled_size; + } else { + match_subrect = match_surface.FromInterval(params.GetInterval()); + ASSERT(match_subrect.GetInterval() == params.GetInterval()); + } -private: - /// Transfers ownership of a memory region from src_surface to dest_surface - void DuplicateSurface(const SurfaceRef& src_surface, const SurfaceRef& dest_surface); + rect = match_surface.GetScaledSubRect(match_subrect); + } - /// Update surface's texture for given region when necessary - void ValidateSurface(const SurfaceRef& surface, PAddr addr, u32 size); + return std::make_pair(match_id, rect); +} - /// Copies pixel data in interval from the guest VRAM to the host GPU surface - void UploadSurface(const SurfaceRef& surface, SurfaceInterval interval); +template +template +void RasterizerCache::ForEachSurfaceInRegion(PAddr addr, size_t size, Func&& func) { + using FuncReturn = typename std::invoke_result::type; + static constexpr bool BOOL_BREAK = std::is_same_v; + boost::container::small_vector surfaces; + ForEachPage(addr, size, [this, &surfaces, addr, size, func](u64 page) { + const auto it = page_table.find(page); + if (it == page_table.end()) { + if constexpr (BOOL_BREAK) { + return false; + } else { + return; + } + } + for (const SurfaceId surface_id : it->second) { + Surface& surface = slot_surfaces[surface_id]; + if (True(surface.flags & SurfaceFlagBits::Picked)) { + continue; + } + if (!surface.Overlaps(addr, size)) { + continue; + } - /// Uploads a custom texture identified with hash to the target surface - bool UploadCustomSurface(const SurfaceRef& surface, const SurfaceParams& load_info, u64 hash); + surface.flags |= SurfaceFlagBits::Picked; + surfaces.push_back(surface_id); + if constexpr (BOOL_BREAK) { + if (func(surface_id, surface)) { + return true; + } + } else { + func(surface_id, surface); + } + } + if constexpr (BOOL_BREAK) { + return false; + } + }); + for (const SurfaceId surface_id : surfaces) { + slot_surfaces[surface_id].flags &= ~SurfaceFlagBits::Picked; + } +} - /// Returns the hash used to lookup the custom surface. - u64 ComputeCustomHash(const SurfaceParams& load_info, std::span decoded, - std::span upload_data); +template +template +SurfaceId RasterizerCache::FindMatch(const SurfaceParams& params, ScaleMatch match_scale_type, + std::optional validate_interval) { + SurfaceId match_id{}; + bool match_valid = false; + u32 match_scale = 0; + SurfaceInterval match_interval{}; - /// Copies pixel data in interval from the host GPU surface to the guest VRAM - void DownloadSurface(const SurfaceRef& surface, SurfaceInterval interval); + ForEachSurfaceInRegion(params.addr, params.size, [&](SurfaceId surface_id, Surface& surface) { + const bool res_scale_matched = match_scale_type == ScaleMatch::Exact + ? (params.res_scale == surface.res_scale) + : (params.res_scale <= surface.res_scale); + const bool is_valid = + True(find_flags & MatchFlags::Copy) + ? true + : surface.IsRegionValid(validate_interval.value_or(params.GetInterval())); - /// Downloads a fill surface to guest VRAM - void DownloadFillSurface(const SurfaceRef& surface, SurfaceInterval interval); + auto IsMatch_Helper = [&](auto check_type, auto match_fn) { + if (False(find_flags & check_type)) + return; - /// Returns false if there is a surface in the cache at the interval with the same bit-width, - bool NoUnimplementedReinterpretations(const SurfaceRef& surface, SurfaceParams params, - const SurfaceInterval& interval); + bool matched; + SurfaceInterval surface_interval; + std::tie(matched, surface_interval) = match_fn(); + if (!matched) + return; - /// Return true if a surface with an invalid pixel format exists at the interval - bool IntervalHasInvalidPixelFormat(const SurfaceParams& params, - const SurfaceInterval& interval); + if (!res_scale_matched && match_scale_type != ScaleMatch::Ignore && + surface.type != SurfaceType::Fill) + return; - /// Attempt to find a reinterpretable surface in the cache and use it to copy for validation - bool ValidateByReinterpretation(const SurfaceRef& surface, SurfaceParams params, - const SurfaceInterval& interval); + // Found a match, update only if this is better than the previous one + auto UpdateMatch = [&] { + match_id = surface_id; + match_valid = is_valid; + match_scale = surface.res_scale; + match_interval = surface_interval; + }; - /// Create a new surface - SurfaceRef CreateSurface(const SurfaceParams& params); + if (surface.res_scale > match_scale) { + UpdateMatch(); + return; + } else if (surface.res_scale < match_scale) { + return; + } - /// Register surface into the cache - void RegisterSurface(const SurfaceRef& surface); + if (is_valid && !match_valid) { + UpdateMatch(); + return; + } else if (is_valid != match_valid) { + return; + } - /// Remove surface from the cache - void UnregisterSurface(const SurfaceRef& surface); + if (boost::icl::length(surface_interval) > boost::icl::length(match_interval)) { + UpdateMatch(); + } + }; + IsMatch_Helper(std::integral_constant{}, [&] { + return std::make_pair(surface.ExactMatch(params), surface.GetInterval()); + }); + IsMatch_Helper(std::integral_constant{}, [&] { + return std::make_pair(surface.CanSubRect(params), surface.GetInterval()); + }); + IsMatch_Helper(std::integral_constant{}, [&] { + ASSERT(validate_interval); + const SurfaceInterval copy_interval = + surface.GetCopyableInterval(params.FromInterval(*validate_interval)); + const bool matched = boost::icl::length(copy_interval & *validate_interval) != 0 && + surface.CanCopy(params, copy_interval); + return std::make_pair(matched, copy_interval); + }); + IsMatch_Helper(std::integral_constant{}, [&] { + ASSERT(validate_interval); + const bool matched = + !boost::icl::contains(surface.invalid_regions, *validate_interval) && + surface.CanReinterpret(params); + return std::make_pair(matched, surface.GetInterval()); + }); + IsMatch_Helper(std::integral_constant{}, [&] { + return std::make_pair(surface.CanExpand(params), surface.GetInterval()); + }); + IsMatch_Helper(std::integral_constant{}, [&] { + return std::make_pair(surface.CanTexCopy(params), surface.GetInterval()); + }); + }); + return match_id; +} - /// Increase/decrease the number of surface in pages touching the specified region - void UpdatePagesCachedCount(PAddr addr, u32 size, int delta); +template +void RasterizerCache::DuplicateSurface(SurfaceId src_id, SurfaceId dst_id) { + Surface& src_surface = slot_surfaces[src_id]; + Surface& dst_surface = slot_surfaces[dst_id]; + ASSERT(dst_surface.addr <= src_surface.addr && dst_surface.end >= src_surface.end); -private: - Memory::MemorySystem& memory; - CustomTexManager& custom_tex_manager; - OpenGL::TextureRuntime& runtime; - Pica::Regs& regs; - RendererBase& renderer; - SurfaceCache surface_cache; - PageMap cached_pages; - SurfaceMap dirty_regions; - std::vector remove_surfaces; - u32 resolution_scale_factor; - RenderTargets render_targets; - std::unordered_map texture_cube_cache; - bool use_filter; - bool dump_textures; - bool use_custom_textures; -}; + const auto src_rect = src_surface.GetScaledRect(); + const auto dst_rect = dst_surface.GetScaledSubRect(src_surface); + ASSERT(src_rect.GetWidth() == dst_rect.GetWidth()); + + const TextureCopy copy = { + .src_level = 0, + .dst_level = 0, + .src_offset = {src_rect.left, src_rect.bottom}, + .dst_offset = {dst_rect.left, dst_rect.bottom}, + .extent = {src_rect.GetWidth(), src_rect.GetHeight()}, + }; + runtime.CopyTextures(src_surface, dst_surface, copy); + + dst_surface.invalid_regions -= src_surface.GetInterval(); + dst_surface.invalid_regions += src_surface.invalid_regions; + + SurfaceRegions regions; + for (const auto& pair : RangeFromInterval(dirty_regions, src_surface.GetInterval())) { + if (pair.second == src_id) { + regions += pair.first; + } + } + for (const auto& interval : regions) { + dirty_regions.set({interval, dst_id}); + } +} + +template +void RasterizerCache::ValidateSurface(SurfaceId surface_id, PAddr addr, u32 size) { + if (size == 0) [[unlikely]] { + return; + } + + Surface& surface = slot_surfaces[surface_id]; + const SurfaceInterval validate_interval(addr, addr + size); + + if (surface.type == SurfaceType::Fill) { + ASSERT_MSG(surface.IsRegionValid(validate_interval), + "Attempted to validate a non-valid fill surface"); + return; + } + + SurfaceRegions validate_regions = surface.invalid_regions & validate_interval; + + auto notify_validated = [&](SurfaceInterval interval) { + surface.MarkValid(interval); + validate_regions.erase(interval); + }; + + u32 level = surface.LevelOf(addr); + SurfaceInterval level_interval = surface.LevelInterval(level); + while (!validate_regions.empty()) { + // Take an invalid interval from the validation regions and clamp it + // to the current level interval. If the interval is empty + // then we have validated the entire level so move to the next. + const auto interval = *validate_regions.begin() & level_interval; + if (boost::icl::is_empty(interval)) { + level_interval = surface.LevelInterval(++level); + continue; + } + + // Look for a valid surface to copy from. + const SurfaceParams params = surface.FromInterval(interval); + const SurfaceId copy_surface_id = + FindMatch(params, ScaleMatch::Ignore, interval); + if (copy_surface_id && copy_surface_id != surface_id) { + Surface& copy_surface = slot_surfaces[copy_surface_id]; + const SurfaceInterval copy_interval = copy_surface.GetCopyableInterval(params); + CopySurface(copy_surface, surface, copy_interval); + notify_validated(copy_interval); + continue; + } + + // Try to find surface in cache with different format + // that can can be reinterpreted to the requested format. + if (ValidateByReinterpretation(surface, params, interval)) { + notify_validated(interval); + continue; + } + + FlushRegion(params.addr, params.size); + if (!use_custom_textures || !UploadCustomSurface(surface_id, interval)) { + UploadSurface(surface, interval); + } + notify_validated(params.GetInterval()); + } + + // Filtered mipmaps often look really bad. We can achieve better quality by + // generating them from the base level. + if (surface.res_scale != 1 && level != 0) { + runtime.GenerateMipmaps(surface); + } +} + +template +void RasterizerCache::UploadSurface(Surface& surface, SurfaceInterval interval) { + MICROPROFILE_SCOPE(RasterizerCache_UploadSurface); + + const SurfaceParams load_info = surface.FromInterval(interval); + ASSERT(load_info.addr >= surface.addr && load_info.end <= surface.end); + + const auto staging = runtime.FindStaging( + load_info.width * load_info.height * surface.GetInternalBytesPerPixel(), true); + + MemoryRef source_ptr = memory.GetPhysicalRef(load_info.addr); + if (!source_ptr) [[unlikely]] { + return; + } + + const auto upload_data = source_ptr.GetWriteBytes(load_info.end - load_info.addr); + DecodeTexture(load_info, load_info.addr, load_info.end, upload_data, staging.mapped, + runtime.NeedsConversion(surface.pixel_format)); + + if (dump_textures && False(surface.flags & SurfaceFlagBits::Custom)) { + const u64 hash = Common::ComputeHash64(upload_data.data(), upload_data.size()); + const u32 level = surface.LevelOf(load_info.addr); + custom_tex_manager.DumpTexture(load_info, level, upload_data, hash); + } + + const BufferTextureCopy upload = { + .buffer_offset = 0, + .buffer_size = staging.size, + .texture_rect = surface.GetSubRect(load_info), + .texture_level = surface.LevelOf(load_info.addr), + }; + surface.Upload(upload, staging); +} + +template +bool RasterizerCache::UploadCustomSurface(SurfaceId surface_id, SurfaceInterval interval) { + MICROPROFILE_SCOPE(RasterizerCache_UploadSurface); + + Surface& surface = slot_surfaces[surface_id]; + const SurfaceParams load_info = surface.FromInterval(interval); + ASSERT(load_info.addr >= surface.addr && load_info.end <= surface.end); + + MemoryRef source_ptr = memory.GetPhysicalRef(load_info.addr); + if (!source_ptr) [[unlikely]] { + return false; + } + + const auto upload_data = source_ptr.GetWriteBytes(load_info.end - load_info.addr); + const u64 hash = [&] { + if (!custom_tex_manager.UseNewHash()) { + const u32 width = load_info.width; + const u32 height = load_info.height; + const u32 bpp = surface.GetInternalBytesPerPixel(); + auto decoded = std::vector(width * height * bpp); + DecodeTexture(load_info, load_info.addr, load_info.end, upload_data, decoded, false); + return Common::ComputeHash64(decoded.data(), decoded.size()); + } else { + return Common::ComputeHash64(upload_data.data(), upload_data.size()); + } + }(); + + const u32 level = surface.LevelOf(load_info.addr); + Material* material = custom_tex_manager.GetMaterial(hash); + + if (!material) { + return surface.IsCustom(); + } + if (level != 0 && custom_tex_manager.SkipMipmaps()) { + return true; + } + + surface.flags |= SurfaceFlagBits::Custom; + + const auto upload = [this, level, surface_id, material]() -> bool { + Surface& surface = slot_surfaces[surface_id]; + if (False(surface.flags & SurfaceFlagBits::Custom)) { + LOG_ERROR(HW_GPU, "Surface is not suitable for custom upload, aborting!"); + return false; + } + if (!surface.IsCustom() && !surface.Swap(material)) { + LOG_ERROR(HW_GPU, "Custom compressed format {} unsupported by host GPU", + material->format); + return false; + } + surface.UploadCustom(material, level); + if (custom_tex_manager.SkipMipmaps()) { + runtime.GenerateMipmaps(surface); + } + return true; + }; + return custom_tex_manager.Decode(material, std::move(upload)); +} + +template +void RasterizerCache::DownloadSurface(Surface& surface, SurfaceInterval interval) { + MICROPROFILE_SCOPE(RasterizerCache_DownloadSurface); + + const SurfaceParams flush_info = surface.FromInterval(interval); + const u32 flush_start = boost::icl::first(interval); + const u32 flush_end = boost::icl::last_next(interval); + ASSERT(flush_start >= surface.addr && flush_end <= surface.end); + + const auto staging = runtime.FindStaging( + flush_info.width * flush_info.height * surface.GetInternalBytesPerPixel(), false); + + const BufferTextureCopy download = { + .buffer_offset = 0, + .buffer_size = staging.size, + .texture_rect = surface.GetSubRect(flush_info), + .texture_level = surface.LevelOf(flush_start), + }; + surface.Download(download, staging); + + MemoryRef dest_ptr = memory.GetPhysicalRef(flush_start); + if (!dest_ptr) [[unlikely]] { + return; + } + + const auto download_dest = dest_ptr.GetWriteBytes(flush_end - flush_start); + EncodeTexture(flush_info, flush_start, flush_end, staging.mapped, download_dest, + runtime.NeedsConversion(surface.pixel_format)); +} + +template +void RasterizerCache::DownloadFillSurface(Surface& surface, SurfaceInterval interval) { + const u32 flush_start = boost::icl::first(interval); + const u32 flush_end = boost::icl::last_next(interval); + ASSERT(flush_start >= surface.addr && flush_end <= surface.end); + + MemoryRef dest_ptr = memory.GetPhysicalRef(flush_start); + if (!dest_ptr) [[unlikely]] { + return; + } + + const u32 start_offset = flush_start - surface.addr; + const u32 download_size = + std::clamp(flush_end - flush_start, 0u, static_cast(dest_ptr.GetSize())); + const u32 coarse_start_offset = start_offset - (start_offset % surface.fill_size); + const u32 backup_bytes = start_offset % surface.fill_size; + + std::array backup_data; + if (backup_bytes) { + std::memcpy(backup_data.data(), &dest_ptr[coarse_start_offset], backup_bytes); + } + + for (u32 offset = coarse_start_offset; offset < download_size; offset += surface.fill_size) { + std::memcpy(&dest_ptr[offset], &surface.fill_data[0], + std::min(surface.fill_size, download_size - offset)); + } + + if (backup_bytes) { + std::memcpy(&dest_ptr[coarse_start_offset], &backup_data[0], backup_bytes); + } +} + +template +bool RasterizerCache::ValidateByReinterpretation(Surface& surface, SurfaceParams params, + const SurfaceInterval& interval) { + const bool is_gpu_modified = boost::icl::contains(dirty_regions, interval); + SurfaceId reinterpret_id = + FindMatch(params, ScaleMatch::Ignore, interval); + if (reinterpret_id) { + Surface& src_surface = slot_surfaces[reinterpret_id]; + if (src_surface.stride == surface.stride) { + const SurfaceInterval copy_interval = src_surface.GetCopyableInterval(params); + if (boost::icl::is_empty(copy_interval)) { + return false; + } + const PAddr addr = boost::icl::lower(interval); + const SurfaceParams copy_params = surface.FromInterval(copy_interval); + const TextureBlit reinterpret = { + .src_level = src_surface.LevelOf(addr), + .dst_level = surface.LevelOf(addr), + .src_rect = src_surface.GetScaledSubRect(copy_params), + .dst_rect = surface.GetScaledSubRect(copy_params), + }; + return runtime.Reinterpret(src_surface, surface, reinterpret); + } + LOG_INFO(HW_GPU, "Unimplemented dimentional reinterpretatation {}x{} -> {}x{}", + src_surface.width, src_surface.height, surface.width, surface.height); + // A surface with matching bit width was found but couldn't be reinterpreted + // due to mismatching stride. It's probably a developer mistake so skip flushing + // if it was created on the GPU. + return is_gpu_modified; + } + + // No surfaces were found in the cache that had a matching bit-width. + // If there's a surface with invalid format it means the region was cleared + // so we don't want to skip validation in that case. + const bool has_invalid = IntervalHasInvalidPixelFormat(params, interval); + return !has_invalid && is_gpu_modified; +} + +template +bool RasterizerCache::IntervalHasInvalidPixelFormat(const SurfaceParams& params, + SurfaceInterval interval) { + bool invalid_format_found = false; + const PAddr addr = boost::icl::lower(interval); + const u32 size = boost::icl::length(interval); + ForEachSurfaceInRegion(addr, size, [&](SurfaceId surface_id, Surface& surface) { + if (surface.pixel_format == PixelFormat::Invalid) { + invalid_format_found = true; + return true; + } + return false; + }); + return invalid_format_found; +} + +template +void RasterizerCache::ClearAll(bool flush) { + const auto flush_interval = PageMap::interval_type::right_open(0x0, 0xFFFFFFFF); + // Force flush all surfaces from the cache + if (flush) { + FlushRegion(0x0, 0xFFFFFFFF); + } + // Unmark all of the marked pages + for (auto& pair : RangeFromInterval(cached_pages, flush_interval)) { + const auto interval = pair.first & flush_interval; + + const PAddr interval_start_addr = boost::icl::first(interval) << Memory::CITRA_PAGE_BITS; + const PAddr interval_end_addr = boost::icl::last_next(interval) << Memory::CITRA_PAGE_BITS; + const u32 interval_size = interval_end_addr - interval_start_addr; + + memory.RasterizerMarkRegionCached(interval_start_addr, interval_size, false); + } + + // Remove the whole cache without really looking at it. + cached_pages -= flush_interval; + dirty_regions -= SurfaceInterval(0x0, 0xFFFFFFFF); + page_table.clear(); + remove_surfaces.clear(); +} + +template +void RasterizerCache::FlushRegion(PAddr addr, u32 size, SurfaceId flush_surface_id) { + if (size == 0) [[unlikely]] { + return; + } + + const SurfaceInterval flush_interval(addr, addr + size); + SurfaceRegions flushed_intervals; + + for (const auto& [region, surface_id] : RangeFromInterval(dirty_regions, flush_interval)) { + // Small sizes imply that this most likely comes from the cpu, flush the entire region + // the point is to avoid thousands of small writes every frame if the cpu decides to + // access that region, anything higher than 8 you're guaranteed it comes from a service + const auto interval = size <= 8 ? region : region & flush_interval; + if (flush_surface_id && surface_id != flush_surface_id) { + continue; + } + + // Sanity check, this surface is the last one that marked this region dirty + Surface& surface = slot_surfaces[surface_id]; + ASSERT(surface.IsRegionValid(interval)); + + if (surface.type == SurfaceType::Fill) { + DownloadFillSurface(surface, interval); + } else { + DownloadSurface(surface, interval); + } + + flushed_intervals += interval; + } + + // Reset dirty regions + dirty_regions -= flushed_intervals; +} + +template +void RasterizerCache::FlushAll() { + FlushRegion(0, 0xFFFFFFFF); +} + +template +void RasterizerCache::InvalidateRegion(PAddr addr, u32 size, SurfaceId region_owner_id) { + if (size == 0) [[unlikely]] { + return; + } + + const SurfaceInterval invalid_interval(addr, addr + size); + + if (region_owner_id) { + Surface& region_owner = slot_surfaces[region_owner_id]; + ASSERT(region_owner.type != SurfaceType::Texture); + ASSERT(addr >= region_owner.addr && addr + size <= region_owner.end); + ASSERT(region_owner.width == region_owner.stride); + region_owner.MarkValid(invalid_interval); + } + + ForEachSurfaceInRegion(addr, size, [&](SurfaceId surface_id, Surface& surface) { + if (surface_id == region_owner_id) { + return; + } + + // If the CPU is invalidating this region we want to remove it + // to (likely) mark the memory pages as uncached + if (!region_owner_id && size <= 8) { + FlushRegion(surface.addr, surface.size, surface_id); + remove_surfaces.push_back(surface_id); + return; + } + + surface.MarkInvalid(surface.GetInterval() & invalid_interval); + + // If the surface has no salvageable data it should be removed + // from the cache to avoid clogging the data structure. + if (surface.IsFullyInvalid()) { + remove_surfaces.push_back(surface_id); + } + }); + + if (region_owner_id) { + dirty_regions.set({invalid_interval, region_owner_id}); + } else { + dirty_regions.erase(invalid_interval); + } + + for (const SurfaceId remove_surface_id : remove_surfaces) { + UnregisterSurface(remove_surface_id); + } + remove_surfaces.clear(); +} + +template +SurfaceId RasterizerCache::CreateSurface(const SurfaceParams& params) { + SurfaceId surface_id = slot_surfaces.insert(runtime, params); + Surface& surface = slot_surfaces[surface_id]; + surface.MarkInvalid(surface.GetInterval()); + return surface_id; +} + +template +void RasterizerCache::RegisterSurface(SurfaceId surface_id) { + Surface& surface = slot_surfaces[surface_id]; + ASSERT_MSG(False(surface.flags & SurfaceFlagBits::Registered), + "Trying to register an already registered surface"); + + surface.flags |= SurfaceFlagBits::Registered; + UpdatePagesCachedCount(surface.addr, surface.size, 1); + ForEachPage(surface.addr, surface.size, + [this, surface_id](u64 page) { page_table[page].push_back(surface_id); }); +} + +template +void RasterizerCache::UnregisterSurface(SurfaceId surface_id) { + Surface& surface = slot_surfaces[surface_id]; + ASSERT_MSG(True(surface.flags & SurfaceFlagBits::Registered), + "Trying to unregister an already unregistered surface"); + + surface.flags &= ~SurfaceFlagBits::Registered; + UpdatePagesCachedCount(surface.addr, surface.size, -1); + ForEachPage(surface.addr, surface.size, [this, surface_id](u64 page) { + const auto page_it = page_table.find(page); + if (page_it == page_table.end()) { + ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << CITRA_PAGEBITS); + return; + } + std::vector& surfaces = page_it.value(); + const auto vector_it = std::find(surfaces.begin(), surfaces.end(), surface_id); + if (vector_it == surfaces.end()) { + ASSERT_MSG(false, "Unregistering unregistered surface in page=0x{:x}", + page << CITRA_PAGEBITS); + return; + } + surfaces.erase(vector_it); + }); + + if (True(surface.flags & SurfaceFlagBits::Tracked)) { + auto it = texture_cube_cache.begin(); + while (it != texture_cube_cache.end()) { + std::array& face_ids = it->second.face_ids; + const auto array_it = std::find(face_ids.begin(), face_ids.end(), surface_id); + if (array_it != face_ids.end()) { + *array_it = SurfaceId{}; + } + if (std::none_of(face_ids.begin(), face_ids.end(), [](SurfaceId id) { return id; })) { + slot_surfaces.erase(it->second.surface_id); + it = texture_cube_cache.erase(it); + continue; + } + it++; + } + } + + slot_surfaces.erase(surface_id); +} + +template +void RasterizerCache::UnregisterAll() { + FlushAll(); + for (auto& [page, surfaces] : page_table) { + while (!surfaces.empty()) { + UnregisterSurface(surfaces.back()); + } + } + texture_cube_cache.clear(); + remove_surfaces.clear(); + runtime.Reset(); +} + +template +void RasterizerCache::UpdatePagesCachedCount(PAddr addr, u32 size, int delta) { + const u32 num_pages = + ((addr + size - 1) >> Memory::CITRA_PAGE_BITS) - (addr >> Memory::CITRA_PAGE_BITS) + 1; + const u32 page_start = addr >> Memory::CITRA_PAGE_BITS; + const u32 page_end = page_start + num_pages; + + // Interval maps will erase segments if count reaches 0, so if delta is negative we have to + // subtract after iterating + const auto pages_interval = PageMap::interval_type::right_open(page_start, page_end); + if (delta > 0) { + cached_pages.add({pages_interval, delta}); + } + + for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) { + const auto interval = pair.first & pages_interval; + const int count = pair.second; + + const PAddr interval_start_addr = boost::icl::first(interval) << Memory::CITRA_PAGE_BITS; + const PAddr interval_end_addr = boost::icl::last_next(interval) << Memory::CITRA_PAGE_BITS; + const u32 interval_size = interval_end_addr - interval_start_addr; + + if (delta > 0 && count == delta) { + memory.RasterizerMarkRegionCached(interval_start_addr, interval_size, true); + } else if (delta < 0 && count == -delta) { + memory.RasterizerMarkRegionCached(interval_start_addr, interval_size, false); + } else { + ASSERT(count >= 0); + } + } + + if (delta < 0) { + cached_pages.add({pages_interval, delta}); + } +} } // namespace VideoCore diff --git a/src/video_core/rasterizer_cache/rasterizer_cache_base.h b/src/video_core/rasterizer_cache/rasterizer_cache_base.h new file mode 100644 index 000000000..eb638ae34 --- /dev/null +++ b/src/video_core/rasterizer_cache/rasterizer_cache_base.h @@ -0,0 +1,229 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include "video_core/rasterizer_cache/sampler_params.h" +#include "video_core/rasterizer_cache/surface_base.h" + +namespace Memory { +class MemorySystem; +} + +namespace Pica { +struct Regs; +} + +namespace Pica::Texture { +struct TextureInfo; +} + +namespace VideoCore { + +enum class ScaleMatch { + Exact, ///< Only accept same res scale + Upscale, ///< Only allow higher scale than params + Ignore ///< Accept every scaled res +}; + +enum class MatchFlags { + Exact = 1 << 0, ///< Surface perfectly matches params + SubRect = 1 << 1, ///< Surface encompasses params + Copy = 1 << 2, ///< Surface that can be used as a copy source + Expand = 1 << 3, ///< Surface that can expand params + TexCopy = 1 << 4, ///< Surface that will match a display transfer "texture copy" parameters + Reinterpret = 1 << 5, ///< Surface might have different pixel format. +}; + +DECLARE_ENUM_FLAG_OPERATORS(MatchFlags); + +class CustomTexManager; +class RendererBase; + +template +class RasterizerCache { + /// Address shift for caching surfaces into a hash table + static constexpr u64 CITRA_PAGEBITS = 18; + + using Runtime = typename T::Runtime; + using Sampler = typename T::Sampler; + using Surface = typename T::Surface; + using Framebuffer = typename T::Framebuffer; + + using SurfaceMap = boost::icl::interval_map; + + using SurfaceRect_Tuple = std::pair>; + using PageMap = boost::icl::interval_map; + + struct RenderTargets { + SurfaceId color_id; + SurfaceId depth_id; + }; + + struct TextureCube { + SurfaceId surface_id; + std::array face_ids; + std::array ticks; + }; + +public: + explicit RasterizerCache(Memory::MemorySystem& memory, CustomTexManager& custom_tex_manager, + Runtime& runtime, Pica::Regs& regs, RendererBase& renderer); + ~RasterizerCache(); + + /// Notify the cache that a new frame has been queued + void TickFrame(); + + /// Perform hardware accelerated texture copy according to the provided configuration + bool AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config); + + /// Perform hardware accelerated display transfer according to the provided configuration + bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config); + + /// Perform hardware accelerated memory fill according to the provided configuration + bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config); + + /// Returns a reference to the surface object assigned to surface_id + Surface& GetSurface(SurfaceId surface_id); + + /// Returns a reference to the sampler object matching the provided configuration + Sampler& GetSampler(const Pica::TexturingRegs::TextureConfig& config); + Sampler& GetSampler(SamplerId sampler_id); + + /// Copy one surface's region to another + void CopySurface(Surface& src_surface, Surface& dst_surface, SurfaceInterval copy_interval); + + /// Load a texture from 3DS memory to OpenGL and cache it (if not already cached) + SurfaceId GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, + bool load_if_create); + + /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from + /// 3DS memory to OpenGL and caches it (if not already cached) + SurfaceRect_Tuple GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale, + bool load_if_create); + + /// Get a surface based on the texture configuration + Surface& GetTextureSurface(const Pica::TexturingRegs::FullTextureConfig& config); + SurfaceId GetTextureSurface(const Pica::Texture::TextureInfo& info, u32 max_level = 0); + + /// Get a texture cube based on the texture configuration + Surface& GetTextureCube(const TextureCubeConfig& config); + + /// Get the color and depth surfaces based on the framebuffer configuration + Framebuffer GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb); + + /// Marks the draw rectangle defined in framebuffer as invalid + void InvalidateFramebuffer(const Framebuffer& framebuffer); + + /// Get a surface that matches a "texture copy" display transfer config + SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params); + + /// Write any cached resources overlapping the region back to memory (if dirty) + void FlushRegion(PAddr addr, u32 size, SurfaceId flush_surface = {}); + + /// Mark region as being invalidated by region_owner (nullptr if 3DS memory) + void InvalidateRegion(PAddr addr, u32 size, SurfaceId region_owner = {}); + + /// Flush all cached resources tracked by this cache manager + void FlushAll(); + + /// Clear all cached resources tracked by this cache manager + void ClearAll(bool flush); + +private: + /// Iterate over all page indices in a range + template + void ForEachPage(PAddr addr, size_t size, Func&& func) { + static constexpr bool RETURNS_BOOL = std::is_same_v, bool>; + const u64 page_end = (addr + size - 1) >> CITRA_PAGEBITS; + for (u64 page = addr >> CITRA_PAGEBITS; page <= page_end; ++page) { + if constexpr (RETURNS_BOOL) { + if (func(page)) { + break; + } + } else { + func(page); + } + } + } + + /// Iterates over all the surfaces in a region calling func + template + void ForEachSurfaceInRegion(PAddr addr, size_t size, Func&& func); + + /// Get the best surface match (and its match type) for the given flags + template + SurfaceId FindMatch(const SurfaceParams& params, ScaleMatch match_scale_type, + std::optional validate_interval = std::nullopt); + + /// Transfers ownership of a memory region from src_surface to dest_surface + void DuplicateSurface(SurfaceId src_id, SurfaceId dst_id); + + /// Update surface's texture for given region when necessary + void ValidateSurface(SurfaceId surface, PAddr addr, u32 size); + + /// Copies pixel data in interval from the guest VRAM to the host GPU surface + void UploadSurface(Surface& surface, SurfaceInterval interval); + + /// Uploads a custom texture identified with hash to the target surface + bool UploadCustomSurface(SurfaceId surface_id, SurfaceInterval interval); + + /// Copies pixel data in interval from the host GPU surface to the guest VRAM + void DownloadSurface(Surface& surface, SurfaceInterval interval); + + /// Downloads a fill surface to guest VRAM + void DownloadFillSurface(Surface& surface, SurfaceInterval interval); + + /// Attempt to find a reinterpretable surface in the cache and use it to copy for validation + bool ValidateByReinterpretation(Surface& surface, SurfaceParams params, + const SurfaceInterval& interval); + + /// Return true if a surface with an invalid pixel format exists at the interval + bool IntervalHasInvalidPixelFormat(const SurfaceParams& params, SurfaceInterval interval); + + /// Create a new surface + SurfaceId CreateSurface(const SurfaceParams& params); + + /// Register surface into the cache + void RegisterSurface(SurfaceId surface); + + /// Remove surface from the cache + void UnregisterSurface(SurfaceId surface); + + /// Unregisters all surfaces from the cache + void UnregisterAll(); + + /// Increase/decrease the number of surface in pages touching the specified region + void UpdatePagesCachedCount(PAddr addr, u32 size, int delta); + +private: + Memory::MemorySystem& memory; + CustomTexManager& custom_tex_manager; + Runtime& runtime; + Pica::Regs& regs; + RendererBase& renderer; + std::unordered_map texture_cube_cache; + tsl::robin_pg_map, Common::IdentityHash> page_table; + std::unordered_map samplers; + Common::SlotVector slot_surfaces; + Common::SlotVector slot_samplers; + SurfaceMap dirty_regions; + PageMap cached_pages; + std::vector remove_surfaces; + u32 resolution_scale_factor; + RenderTargets render_targets; + bool use_filter; + bool dump_textures; + bool use_custom_textures; +}; + +} // namespace VideoCore diff --git a/src/video_core/rasterizer_cache/sampler_params.h b/src/video_core/rasterizer_cache/sampler_params.h new file mode 100644 index 000000000..fcb47394e --- /dev/null +++ b/src/video_core/rasterizer_cache/sampler_params.h @@ -0,0 +1,43 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include "common/hash.h" +#include "video_core/regs_texturing.h" + +namespace VideoCore { + +struct SamplerParams { + using TextureConfig = Pica::TexturingRegs::TextureConfig; + TextureConfig::TextureFilter mag_filter; + TextureConfig::TextureFilter min_filter; + TextureConfig::TextureFilter mip_filter; + TextureConfig::WrapMode wrap_s; + TextureConfig::WrapMode wrap_t; + u32 border_color = 0; + u32 lod_min = 0; + u32 lod_max = 0; + s32 lod_bias = 0; + + auto operator<=>(const SamplerParams&) const noexcept = default; + + const u64 Hash() const { + return Common::ComputeHash64(this, sizeof(SamplerParams)); + } +}; +static_assert(std::has_unique_object_representations_v, + "SamplerParams is not suitable for hashing"); + +} // namespace VideoCore + +namespace std { +template <> +struct hash { + std::size_t operator()(const VideoCore::SamplerParams& params) const noexcept { + return params.Hash(); + } +}; +} // namespace std diff --git a/src/video_core/rasterizer_cache/surface_base.cpp b/src/video_core/rasterizer_cache/surface_base.cpp index 0826be1ca..1ad8bfa37 100644 --- a/src/video_core/rasterizer_cache/surface_base.cpp +++ b/src/video_core/rasterizer_cache/surface_base.cpp @@ -45,13 +45,16 @@ bool SurfaceBase::CanFill(const SurfaceParams& dest_surface, SurfaceInterval fil } bool SurfaceBase::CanCopy(const SurfaceParams& dest_surface, SurfaceInterval copy_interval) const { - SurfaceParams subrect_params = dest_surface.FromInterval(copy_interval); + const SurfaceParams subrect_params = dest_surface.FromInterval(copy_interval); ASSERT(subrect_params.GetInterval() == copy_interval); - if (CanSubRect(subrect_params)) - return true; - if (CanFill(dest_surface, copy_interval)) + if (CanSubRect(subrect_params)) { return true; + } + + if (CanFill(dest_surface, copy_interval)) { + return true; + } return false; } @@ -102,6 +105,23 @@ SurfaceInterval SurfaceBase::GetCopyableInterval(const SurfaceParams& params) co return result; } +Extent SurfaceBase::RealExtent(bool scaled) { + const bool is_custom = IsCustom(); + u32 real_width = width; + u32 real_height = height; + if (is_custom) { + real_width = material->width; + real_height = material->height; + } else if (scaled) { + real_width = GetScaledWidth(); + real_height = GetScaledHeight(); + } + return Extent{ + .width = real_width, + .height = real_height, + }; +} + bool SurfaceBase::HasNormalMap() const noexcept { return material && material->Map(MapType::Normal) != nullptr; } diff --git a/src/video_core/rasterizer_cache/surface_base.h b/src/video_core/rasterizer_cache/surface_base.h index 3deb989b5..9be06e0df 100644 --- a/src/video_core/rasterizer_cache/surface_base.h +++ b/src/video_core/rasterizer_cache/surface_base.h @@ -6,6 +6,7 @@ #include #include "video_core/rasterizer_cache/surface_params.h" +#include "video_core/rasterizer_cache/utils.h" namespace VideoCore { @@ -13,6 +14,15 @@ using SurfaceRegions = boost::icl::interval_set(overlap_size); + return addr < overlap_end && overlap_addr < end; + } + u64 ModificationTick() const noexcept { return modification_tick; } bool IsCustom() const noexcept { - return is_custom && custom_format != CustomPixelFormat::Invalid; + return True(flags & SurfaceFlagBits::Custom) && custom_format != CustomPixelFormat::Invalid; } bool IsRegionValid(SurfaceInterval interval) const { - return (invalid_regions.find(interval) == invalid_regions.end()); + return invalid_regions.find(interval) == invalid_regions.end(); } void MarkValid(SurfaceInterval interval) { @@ -65,8 +83,7 @@ private: std::array MakeFillBuffer(PAddr copy_addr); public: - bool registered = false; - bool is_custom = false; + SurfaceFlagBits flags{}; const Material* material = nullptr; SurfaceRegions invalid_regions; u32 fill_size = 0; diff --git a/src/video_core/rasterizer_cache/surface_params.cpp b/src/video_core/rasterizer_cache/surface_params.cpp index 7caae3c9e..b34ddee21 100644 --- a/src/video_core/rasterizer_cache/surface_params.cpp +++ b/src/video_core/rasterizer_cache/surface_params.cpp @@ -15,14 +15,23 @@ bool SurfaceParams::ExactMatch(const SurfaceParams& other_surface) const { } bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const { + const u32 level = LevelOf(sub_surface.addr); return sub_surface.addr >= addr && sub_surface.end <= end && sub_surface.pixel_format == pixel_format && pixel_format != PixelFormat::Invalid && sub_surface.is_tiled == is_tiled && - (sub_surface.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 && - (sub_surface.stride == stride || sub_surface.height <= (is_tiled ? 8u : 1u)) && + (sub_surface.addr - mipmap_offsets[level]) % BytesInPixels(is_tiled ? 64 : 1) == 0 && + (sub_surface.stride == (stride >> level) || + sub_surface.height <= (is_tiled ? 8u : 1u)) && GetSubRect(sub_surface).right <= stride; } +bool SurfaceParams::CanReinterpret(const SurfaceParams& other_surface) { + return other_surface.addr >= addr && other_surface.end <= end && + pixel_format != PixelFormat::Invalid && GetFormatBpp() == other_surface.GetFormatBpp() && + other_surface.is_tiled == is_tiled && + (other_surface.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0; +} + bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const { return pixel_format != PixelFormat::Invalid && pixel_format == expanded_surface.pixel_format && addr <= expanded_surface.end && expanded_surface.addr <= end && @@ -206,7 +215,9 @@ SurfaceInterval SurfaceParams::LevelInterval(u32 level) const { } u32 SurfaceParams::LevelOf(PAddr level_addr) const { - ASSERT(level_addr >= addr && level_addr <= end); + if (level_addr < addr || level_addr > end) { + return 0; + } u32 level = levels - 1; while (mipmap_offsets[level] > level_addr) { diff --git a/src/video_core/rasterizer_cache/surface_params.h b/src/video_core/rasterizer_cache/surface_params.h index e71f4716b..74f880d2a 100644 --- a/src/video_core/rasterizer_cache/surface_params.h +++ b/src/video_core/rasterizer_cache/surface_params.h @@ -4,11 +4,15 @@ #pragma once +#include +#include "common/math_util.h" #include "video_core/custom_textures/custom_format.h" -#include "video_core/rasterizer_cache/utils.h" +#include "video_core/rasterizer_cache/pixel_format.h" namespace VideoCore { +using SurfaceInterval = boost::icl::right_open_interval; + constexpr std::size_t MAX_PICA_LEVELS = 8; class SurfaceParams { @@ -19,6 +23,9 @@ public: /// Returns true if sub_surface is a subrect of params bool CanSubRect(const SurfaceParams& sub_surface) const; + /// Returns true if other_surface can be used for reinterpretion. + bool CanReinterpret(const SurfaceParams& other_surface); + /// Returns true if params can be expanded to match expanded_surface bool CanExpand(const SurfaceParams& expanded_surface) const; diff --git a/src/video_core/rasterizer_cache/utils.h b/src/video_core/rasterizer_cache/utils.h index c56376694..678e27aba 100644 --- a/src/video_core/rasterizer_cache/utils.h +++ b/src/video_core/rasterizer_cache/utils.h @@ -4,28 +4,31 @@ #pragma once -#include #include -#include #include "common/hash.h" #include "common/math_util.h" +#include "common/slot_vector.h" #include "common/vector_math.h" -#include "video_core/rasterizer_cache/pixel_format.h" +#include "video_core/regs_texturing.h" namespace VideoCore { -using SurfaceInterval = boost::icl::right_open_interval; +using SurfaceId = Common::SlotId; +using SamplerId = Common::SlotId; + +/// Fake surface ID for null surfaces +constexpr SurfaceId NULL_SURFACE_ID{0}; +/// Fake surface ID for null cube surfaces +constexpr SurfaceId NULL_SURFACE_CUBE_ID{1}; +/// Fake sampler ID for null samplers +constexpr SamplerId NULL_SAMPLER_ID{0}; struct Offset { - constexpr auto operator<=>(const Offset&) const noexcept = default; - u32 x = 0; u32 y = 0; }; struct Extent { - constexpr auto operator<=>(const Extent&) const noexcept = default; - u32 width = 1; u32 height = 1; }; @@ -71,9 +74,9 @@ struct BufferTextureCopy { }; struct StagingData { - u32 size = 0; - std::span mapped{}; - u64 buffer_offset = 0; + u32 size; + std::span mapped; + u64 buffer_offset; }; struct TextureCubeConfig { diff --git a/src/video_core/renderer_opengl/gl_blit_helper.cpp b/src/video_core/renderer_opengl/gl_blit_helper.cpp index b8223aa0a..1f0475c37 100644 --- a/src/video_core/renderer_opengl/gl_blit_helper.cpp +++ b/src/video_core/renderer_opengl/gl_blit_helper.cpp @@ -2,12 +2,16 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "common/scope_exit.h" #include "common/settings.h" #include "video_core/rasterizer_cache/pixel_format.h" #include "video_core/renderer_opengl/gl_blit_helper.h" +#include "video_core/renderer_opengl/gl_driver.h" #include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_texture_runtime.h" +#include "video_core/host_shaders/format_reinterpreter/d24s8_to_rgba8_frag.h" +#include "video_core/host_shaders/format_reinterpreter/rgba4_to_rgb5a1_frag.h" #include "video_core/host_shaders/full_screen_triangle_vert.h" #include "video_core/host_shaders/texture_filtering/bicubic_frag.h" #include "video_core/host_shaders/texture_filtering/nearest_neighbor_frag.h" @@ -49,8 +53,8 @@ OGLProgram CreateProgram(std::string_view frag) { } // Anonymous namespace -BlitHelper::BlitHelper(TextureRuntime& runtime_) - : runtime{runtime_}, linear_sampler{CreateSampler(GL_LINEAR)}, +BlitHelper::BlitHelper(const Driver& driver_) + : driver{driver_}, linear_sampler{CreateSampler(GL_LINEAR)}, nearest_sampler{CreateSampler(GL_NEAREST)}, bicubic_program{CreateProgram( HostShaders::BICUBIC_FRAG)}, nearest_program{CreateProgram(HostShaders::NEAREST_NEIGHBOR_FRAG)}, @@ -58,34 +62,104 @@ BlitHelper::BlitHelper(TextureRuntime& runtime_) xbrz_program{CreateProgram(HostShaders::XBRZ_FREESCALE_FRAG)}, gradient_x_program{CreateProgram(HostShaders::X_GRADIENT_FRAG)}, gradient_y_program{CreateProgram(HostShaders::Y_GRADIENT_FRAG)}, - refine_program{CreateProgram(HostShaders::REFINE_FRAG)} { + refine_program{CreateProgram(HostShaders::REFINE_FRAG)}, + d24s8_to_rgba8{CreateProgram(HostShaders::D24S8_TO_RGBA8_FRAG)}, + rgba4_to_rgb5a1{CreateProgram(HostShaders::RGBA4_TO_RGB5A1_FRAG)} { vao.Create(); - filter_fbo.Create(); + draw_fbo.Create(); state.draw.vertex_array = vao.handle; for (u32 i = 0; i < 3; i++) { state.texture_units[i].sampler = i == 2 ? nearest_sampler.handle : linear_sampler.handle; } + if (driver.IsOpenGLES()) { + LOG_INFO(Render_OpenGL, + "Texture views are unsupported, reinterpretation will do intermediate copy"); + temp_tex.Create(); + use_texture_view = false; + } } BlitHelper::~BlitHelper() = default; +bool BlitHelper::ConvertDS24S8ToRGBA8(Surface& source, Surface& dest, + const VideoCore::TextureBlit& blit) { + OpenGLState prev_state = OpenGLState::GetCurState(); + SCOPE_EXIT({ prev_state.Apply(); }); + + state.texture_units[0].texture_2d = source.Handle(); + state.texture_units[0].sampler = 0; + state.texture_units[1].sampler = 0; + + if (use_texture_view) { + temp_tex.Create(); + glActiveTexture(GL_TEXTURE1); + glTextureView(temp_tex.handle, GL_TEXTURE_2D, source.Handle(), GL_DEPTH24_STENCIL8, 0, 1, 0, + 1); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + } else if (blit.src_rect.top > temp_rect.top || blit.src_rect.right > temp_rect.right) { + temp_tex.Release(); + temp_tex.Create(); + state.texture_units[1].texture_2d = temp_tex.handle; + state.Apply(); + glActiveTexture(GL_TEXTURE1); + glTexStorage2D(GL_TEXTURE_2D, 1, GL_DEPTH24_STENCIL8, blit.src_rect.right, + blit.src_rect.top); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + temp_rect = blit.src_rect; + } + state.texture_units[1].texture_2d = temp_tex.handle; + state.Apply(); + + glActiveTexture(GL_TEXTURE1); + if (!use_texture_view) { + glCopyImageSubData(source.Handle(), GL_TEXTURE_2D, 0, blit.src_rect.left, + blit.src_rect.bottom, 0, temp_tex.handle, GL_TEXTURE_2D, 0, + blit.src_rect.left, blit.src_rect.bottom, 0, blit.src_rect.GetWidth(), + blit.src_rect.GetHeight(), 1); + } + glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX); + + SetParams(d24s8_to_rgba8, source.RealExtent(), blit.src_rect); + Draw(d24s8_to_rgba8, dest.Handle(), draw_fbo.handle, 0, blit.dst_rect); + + if (use_texture_view) { + temp_tex.Release(); + } + + // Restore the sampler handles + state.texture_units[0].sampler = linear_sampler.handle; + state.texture_units[1].sampler = linear_sampler.handle; + + return true; +} + +bool BlitHelper::ConvertRGBA4ToRGB5A1(Surface& source, Surface& dest, + const VideoCore::TextureBlit& blit) { + OpenGLState prev_state = OpenGLState::GetCurState(); + SCOPE_EXIT({ prev_state.Apply(); }); + + state.texture_units[0].texture_2d = source.Handle(); + + SetParams(rgba4_to_rgb5a1, source.RealExtent(), blit.src_rect); + Draw(rgba4_to_rgb5a1, dest.Handle(), draw_fbo.handle, 0, blit.dst_rect); + + return true; +} + bool BlitHelper::Filter(Surface& surface, const VideoCore::TextureBlit& blit) { - // Filtering to depth stencil surfaces isn't supported. - if (surface.type == SurfaceType::Depth || surface.type == SurfaceType::DepthStencil) { + const auto filter = Settings::values.texture_filter.GetValue(); + const bool is_depth = + surface.type == SurfaceType::Depth || surface.type == SurfaceType::DepthStencil; + if (filter == Settings::TextureFilter::None || is_depth) { return false; } - // Avoid filtering for mipmaps as the result often looks terrible. if (blit.src_level != 0) { return true; } - const OpenGLState prev_state = OpenGLState::GetCurState(); - state.texture_units[0].texture_2d = surface.Handle(0); - - const auto filter{Settings::values.texture_filter.GetValue()}; switch (filter) { - case TextureFilter::None: - break; case TextureFilter::Anime4K: FilterAnime4K(surface, blit); break; @@ -101,15 +175,19 @@ bool BlitHelper::Filter(Surface& surface, const VideoCore::TextureBlit& blit) { case TextureFilter::xBRZ: FilterXbrz(surface, blit); break; + default: + LOG_ERROR(Render_OpenGL, "Unknown texture filter {}", filter); } - prev_state.Apply(); return true; } void BlitHelper::FilterAnime4K(Surface& surface, const VideoCore::TextureBlit& blit) { static constexpr u8 internal_scale_factor = 2; + const OpenGLState prev_state = OpenGLState::GetCurState(); + SCOPE_EXIT({ prev_state.Apply(); }); + const auto& tuple = surface.Tuple(); const u32 src_width = blit.src_rect.GetWidth(); const u32 src_height = blit.src_rect.GetHeight(); @@ -149,7 +227,7 @@ void BlitHelper::FilterAnime4K(Surface& surface, const VideoCore::TextureBlit& b Draw(gradient_y_program, LUMAD.tex.handle, LUMAD.fbo.handle, 0, temp_rect); // refine pass - Draw(refine_program, surface.Handle(), filter_fbo.handle, blit.dst_level, blit.dst_rect); + Draw(refine_program, surface.Handle(), draw_fbo.handle, blit.dst_level, blit.dst_rect); // These will have handles from the previous texture that was filtered, reset them to avoid // binding invalid textures. @@ -160,25 +238,36 @@ void BlitHelper::FilterAnime4K(Surface& surface, const VideoCore::TextureBlit& b } void BlitHelper::FilterBicubic(Surface& surface, const VideoCore::TextureBlit& blit) { - SetParams(bicubic_program, surface.Extent(), blit.src_rect); - Draw(bicubic_program, surface.Handle(), filter_fbo.handle, blit.dst_level, blit.dst_rect); + const OpenGLState prev_state = OpenGLState::GetCurState(); + SCOPE_EXIT({ prev_state.Apply(); }); + state.texture_units[0].texture_2d = surface.Handle(0); + SetParams(bicubic_program, surface.RealExtent(false), blit.src_rect); + Draw(bicubic_program, surface.Handle(), draw_fbo.handle, blit.dst_level, blit.dst_rect); } void BlitHelper::FilterNearest(Surface& surface, const VideoCore::TextureBlit& blit) { + const OpenGLState prev_state = OpenGLState::GetCurState(); + SCOPE_EXIT({ prev_state.Apply(); }); state.texture_units[2].texture_2d = surface.Handle(0); - SetParams(nearest_program, surface.Extent(), blit.src_rect); - Draw(nearest_program, surface.Handle(), filter_fbo.handle, blit.dst_level, blit.dst_rect); + SetParams(nearest_program, surface.RealExtent(false), blit.src_rect); + Draw(nearest_program, surface.Handle(), draw_fbo.handle, blit.dst_level, blit.dst_rect); } void BlitHelper::FilterScaleForce(Surface& surface, const VideoCore::TextureBlit& blit) { - SetParams(scale_force_program, surface.Extent(), blit.src_rect); - Draw(scale_force_program, surface.Handle(), filter_fbo.handle, blit.dst_level, blit.dst_rect); + const OpenGLState prev_state = OpenGLState::GetCurState(); + SCOPE_EXIT({ prev_state.Apply(); }); + state.texture_units[0].texture_2d = surface.Handle(0); + SetParams(scale_force_program, surface.RealExtent(false), blit.src_rect); + Draw(scale_force_program, surface.Handle(), draw_fbo.handle, blit.dst_level, blit.dst_rect); } void BlitHelper::FilterXbrz(Surface& surface, const VideoCore::TextureBlit& blit) { + const OpenGLState prev_state = OpenGLState::GetCurState(); + SCOPE_EXIT({ prev_state.Apply(); }); + state.texture_units[0].texture_2d = surface.Handle(0); glProgramUniform1f(xbrz_program.handle, 2, static_cast(surface.res_scale)); - SetParams(xbrz_program, surface.Extent(), blit.src_rect); - Draw(xbrz_program, surface.Handle(), filter_fbo.handle, blit.dst_level, blit.dst_rect); + SetParams(xbrz_program, surface.RealExtent(false), blit.src_rect); + Draw(xbrz_program, surface.Handle(), draw_fbo.handle, blit.dst_level, blit.dst_rect); } void BlitHelper::SetParams(OGLProgram& program, const VideoCore::Extent& src_extent, @@ -206,7 +295,7 @@ void BlitHelper::Draw(OGLProgram& program, GLuint dst_tex, GLuint dst_fbo, u32 d dst_level); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + glDrawArrays(GL_TRIANGLES, 0, 3); } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_blit_helper.h b/src/video_core/renderer_opengl/gl_blit_helper.h index 3c156b502..a0cf2a4be 100644 --- a/src/video_core/renderer_opengl/gl_blit_helper.h +++ b/src/video_core/renderer_opengl/gl_blit_helper.h @@ -15,16 +15,20 @@ struct TextureBlit; namespace OpenGL { -class TextureRuntime; +class Driver; class Surface; class BlitHelper { public: - BlitHelper(TextureRuntime& runtime); + explicit BlitHelper(const Driver& driver); ~BlitHelper(); bool Filter(Surface& surface, const VideoCore::TextureBlit& blit); + bool ConvertDS24S8ToRGBA8(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit); + + bool ConvertRGBA4ToRGB5A1(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit); + private: void FilterAnime4K(Surface& surface, const VideoCore::TextureBlit& blit); @@ -43,10 +47,10 @@ private: Common::Rectangle dst_rect); private: - TextureRuntime& runtime; + const Driver& driver; OGLVertexArray vao; OpenGLState state; - OGLFramebuffer filter_fbo; + OGLFramebuffer draw_fbo; OGLSampler linear_sampler; OGLSampler nearest_sampler; @@ -57,6 +61,12 @@ private: OGLProgram gradient_x_program; OGLProgram gradient_y_program; OGLProgram refine_program; + OGLProgram d24s8_to_rgba8; + OGLProgram rgba4_to_rgb5a1; + + OGLTexture temp_tex; + Common::Rectangle temp_rect{}; + bool use_texture_view{true}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp b/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp deleted file mode 100644 index 5019f79b7..000000000 --- a/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp +++ /dev/null @@ -1,134 +0,0 @@ -// Copyright 2022 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/scope_exit.h" -#include "video_core/renderer_opengl/gl_format_reinterpreter.h" -#include "video_core/renderer_opengl/gl_state.h" -#include "video_core/renderer_opengl/gl_texture_runtime.h" - -#include "video_core/host_shaders/format_reinterpreter/d24s8_to_rgba8_frag.h" -#include "video_core/host_shaders/format_reinterpreter/fullscreen_quad_vert.h" -#include "video_core/host_shaders/format_reinterpreter/rgba4_to_rgb5a1_frag.h" - -namespace OpenGL { - -RGBA4toRGB5A1::RGBA4toRGB5A1() { - program.Create(HostShaders::FULLSCREEN_QUAD_VERT, HostShaders::RGBA4_TO_RGB5A1_FRAG); - dst_size_loc = glGetUniformLocation(program.handle, "dst_size"); - src_size_loc = glGetUniformLocation(program.handle, "src_size"); - src_offset_loc = glGetUniformLocation(program.handle, "src_offset"); - vao.Create(); -} - -void RGBA4toRGB5A1::Reinterpret(Surface& source, Common::Rectangle src_rect, Surface& dest, - Common::Rectangle dst_rect) { - OpenGLState prev_state = OpenGLState::GetCurState(); - SCOPE_EXIT({ prev_state.Apply(); }); - - OpenGLState state; - state.texture_units[0].texture_2d = source.Handle(); - state.draw.draw_framebuffer = draw_fbo.handle; - state.draw.shader_program = program.handle; - state.draw.vertex_array = vao.handle; - state.viewport = {static_cast(dst_rect.left), static_cast(dst_rect.bottom), - static_cast(dst_rect.GetWidth()), - static_cast(dst_rect.GetHeight())}; - state.Apply(); - - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dest.Handle(), - 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - - glUniform2i(dst_size_loc, dst_rect.GetWidth(), dst_rect.GetHeight()); - glUniform2i(src_size_loc, src_rect.GetWidth(), src_rect.GetHeight()); - glUniform2i(src_offset_loc, src_rect.left, src_rect.bottom); - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); -} - -ShaderD24S8toRGBA8::ShaderD24S8toRGBA8() { - program.Create(HostShaders::FULLSCREEN_QUAD_VERT, HostShaders::D24S8_TO_RGBA8_FRAG); - dst_size_loc = glGetUniformLocation(program.handle, "dst_size"); - src_size_loc = glGetUniformLocation(program.handle, "src_size"); - src_offset_loc = glGetUniformLocation(program.handle, "src_offset"); - vao.Create(); - - auto state = OpenGLState::GetCurState(); - auto cur_program = state.draw.shader_program; - state.draw.shader_program = program.handle; - state.Apply(); - glUniform1i(glGetUniformLocation(program.handle, "stencil"), 1); - state.draw.shader_program = cur_program; - state.Apply(); - - // Nvidia seem to be the only one to support D24S8 views, at least on windows - // so for everyone else it will do an intermediate copy before running through the shader - std::string_view vendor{reinterpret_cast(glGetString(GL_VENDOR))}; - if (vendor.find("NVIDIA") != vendor.npos) { - use_texture_view = true; - } else { - LOG_INFO(Render_OpenGL, - "Texture views are unsupported, reinterpretation will do intermediate copy"); - temp_tex.Create(); - } -} - -void ShaderD24S8toRGBA8::Reinterpret(Surface& source, Common::Rectangle src_rect, - Surface& dest, Common::Rectangle dst_rect) { - OpenGLState prev_state = OpenGLState::GetCurState(); - SCOPE_EXIT({ prev_state.Apply(); }); - - OpenGLState state; - state.texture_units[0].texture_2d = source.Handle(); - - if (use_texture_view) { - temp_tex.Create(); - glActiveTexture(GL_TEXTURE1); - glTextureView(temp_tex.handle, GL_TEXTURE_2D, source.Handle(), GL_DEPTH24_STENCIL8, 0, 1, 0, - 1); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - } else if (src_rect.top > temp_rect.top || src_rect.right > temp_rect.right) { - temp_tex.Release(); - temp_tex.Create(); - state.texture_units[1].texture_2d = temp_tex.handle; - state.Apply(); - glActiveTexture(GL_TEXTURE1); - glTexStorage2D(GL_TEXTURE_2D, 1, GL_DEPTH24_STENCIL8, src_rect.right, src_rect.top); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - temp_rect = src_rect; - } - - state.texture_units[1].texture_2d = temp_tex.handle; - state.draw.draw_framebuffer = draw_fbo.handle; - state.draw.shader_program = program.handle; - state.draw.vertex_array = vao.handle; - state.viewport = {static_cast(dst_rect.left), static_cast(dst_rect.bottom), - static_cast(dst_rect.GetWidth()), - static_cast(dst_rect.GetHeight())}; - state.Apply(); - - glActiveTexture(GL_TEXTURE1); - if (!use_texture_view) { - glCopyImageSubData(source.Handle(), GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0, - temp_tex.handle, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0, - src_rect.GetWidth(), src_rect.GetHeight(), 1); - } - glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX); - - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dest.Handle(), - 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - - glUniform2i(dst_size_loc, dst_rect.GetWidth(), dst_rect.GetHeight()); - glUniform2i(src_size_loc, src_rect.GetWidth(), src_rect.GetHeight()); - glUniform2i(src_offset_loc, src_rect.left, src_rect.bottom); - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - - if (use_texture_view) { - temp_tex.Release(); - } -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_format_reinterpreter.h b/src/video_core/renderer_opengl/gl_format_reinterpreter.h deleted file mode 100644 index b4b9468eb..000000000 --- a/src/video_core/renderer_opengl/gl_format_reinterpreter.h +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright 2022 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "common/math_util.h" -#include "video_core/rasterizer_cache/pixel_format.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" - -namespace OpenGL { - -class Surface; - -class FormatReinterpreterBase { -public: - FormatReinterpreterBase() { - read_fbo.Create(); - draw_fbo.Create(); - } - - virtual ~FormatReinterpreterBase() = default; - - virtual VideoCore::PixelFormat GetSourceFormat() const = 0; - virtual void Reinterpret(Surface& source, Common::Rectangle src_rect, Surface& dest, - Common::Rectangle dst_rect) = 0; - -protected: - OGLFramebuffer read_fbo; - OGLFramebuffer draw_fbo; -}; - -using ReinterpreterList = std::vector>; - -class RGBA4toRGB5A1 final : public FormatReinterpreterBase { -public: - RGBA4toRGB5A1(); - - VideoCore::PixelFormat GetSourceFormat() const override { - return VideoCore::PixelFormat::RGBA4; - } - - void Reinterpret(Surface& source, Common::Rectangle src_rect, Surface& dest, - Common::Rectangle dst_rect) override; - -private: - OGLProgram program; - GLint dst_size_loc{-1}; - GLint src_size_loc{-1}; - GLint src_offset_loc{-1}; - OGLVertexArray vao; -}; - -class ShaderD24S8toRGBA8 final : public FormatReinterpreterBase { -public: - ShaderD24S8toRGBA8(); - - VideoCore::PixelFormat GetSourceFormat() const override { - return VideoCore::PixelFormat::D24S8; - } - - void Reinterpret(Surface& source, Common::Rectangle src_rect, Surface& dest, - Common::Rectangle dst_rect) override; - -private: - bool use_texture_view{}; - OGLProgram program{}; - GLint dst_size_loc{-1}; - GLint src_size_loc{-1}; - GLint src_offset_loc{-1}; - OGLVertexArray vao{}; - OGLTexture temp_tex{}; - Common::Rectangle temp_rect{0, 0, 0, 0}; -}; - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 937f30f7c..a34ad34a6 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -25,7 +25,7 @@ MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Array Setup", MP_RGB(255, 128, MICROPROFILE_DEFINE(OpenGL_VS, "OpenGL", "Vertex Shader Setup", MP_RGB(192, 128, 128)); MICROPROFILE_DEFINE(OpenGL_GS, "OpenGL", "Geometry Shader Setup", MP_RGB(128, 192, 128)); MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); -MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); +MICROPROFILE_DEFINE(OpenGL_Display, "OpenGL", "Display", MP_RGB(128, 128, 192)); using VideoCore::SurfaceType; @@ -97,16 +97,6 @@ RasterizerOpenGL::RasterizerOpenGL(Memory::MemorySystem& memory, u8 framebuffer_data[4] = {0, 0, 0, 1}; glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 1, 1, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data); - // Create sampler objects - for (std::size_t i = 0; i < texture_samplers.size(); ++i) { - texture_samplers[i].Create(); - state.texture_units[i].sampler = texture_samplers[i].sampler.handle; - } - - // Create cubemap texture and sampler objects - texture_cube_sampler.Create(); - state.texture_cube_unit.sampler = texture_cube_sampler.sampler.handle; - // Generate VAO sw_vao.Create(); hw_vao.Create(); @@ -251,14 +241,14 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset, } } - PAddr data_addr = + const PAddr data_addr = base_address + loader.data_offset + (vs_input_index_min * loader.byte_count); - u32 vertex_num = vs_input_index_max - vs_input_index_min + 1; - u32 data_size = loader.byte_count * vertex_num; + const u32 vertex_num = vs_input_index_max - vs_input_index_min + 1; + const u32 data_size = loader.byte_count * vertex_num; - res_cache.FlushRegion(data_addr, data_size, nullptr); - std::memcpy(array_ptr, VideoCore::g_memory->GetPhysicalPointer(data_addr), data_size); + res_cache.FlushRegion(data_addr, data_size); + std::memcpy(array_ptr, memory.GetPhysicalPointer(data_addr), data_size); array_ptr += data_size; buffer_offset += data_size; @@ -287,8 +277,7 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset, bool RasterizerOpenGL::SetupVertexShader() { MICROPROFILE_SCOPE(OpenGL_VS); - return shader_program_manager->UseProgrammableVertexShader(Pica::g_state.regs, - Pica::g_state.vs); + return shader_program_manager->UseProgrammableVertexShader(regs, Pica::g_state.vs); } bool RasterizerOpenGL::SetupGeometryShader() { @@ -400,8 +389,7 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { const Framebuffer framebuffer = res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb); const bool has_color = framebuffer.HasAttachment(SurfaceType::Color); - const bool has_depth_stencil = framebuffer.HasAttachment(SurfaceType::DepthStencil); - if (!has_color && (shadow_rendering || !has_depth_stencil)) { + if (!has_color && shadow_rendering) { return true; } @@ -520,8 +508,9 @@ void RasterizerOpenGL::SyncTextureUnits(const Framebuffer& framebuffer) { if (texture_index == 0) { switch (texture.config.type.Value()) { case TextureType::Shadow2D: { - auto surface = res_cache.GetTextureSurface(texture); - state.image_shadow_texture_px = surface->Handle(); + Surface& surface = res_cache.GetTextureSurface(texture); + surface.flags |= VideoCore::SurfaceFlagBits::ShadowMap; + state.image_shadow_texture_px = surface.Handle(); continue; } case TextureType::ShadowCube: { @@ -538,22 +527,14 @@ void RasterizerOpenGL::SyncTextureUnits(const Framebuffer& framebuffer) { } // Sync texture unit sampler - texture_samplers[texture_index].SyncWithConfig(texture.config); + Sampler& sampler = res_cache.GetSampler(texture.config); + state.texture_units[texture_index].sampler = sampler.Handle(); // Bind the texture provided by the rasterizer cache - auto surface = res_cache.GetTextureSurface(texture); - if (!surface) { - // Can occur when texture addr is null or its memory is unmapped/invalid - // HACK: In this case, the correct behaviour for the PICA is to use the last - // rendered colour. But because this would be impractical to implement, the - // next best alternative is to use a clear texture, essentially skipping - // the geometry in question. - // For example: a bug in Pokemon X/Y causes NULL-texture squares to be drawn - // on the male character's face, which in the OpenGL default appear black. - state.texture_units[texture_index].texture_2d = default_texture; - } else if (!IsFeedbackLoop(texture_index, framebuffer, *surface)) { - BindMaterial(texture_index, *surface); - state.texture_units[texture_index].texture_2d = surface->Handle(); + Surface& surface = res_cache.GetTextureSurface(texture); + if (!IsFeedbackLoop(texture_index, framebuffer, surface)) { + BindMaterial(texture_index, surface); + state.texture_units[texture_index].texture_2d = surface.Handle(); } } } @@ -570,8 +551,10 @@ void RasterizerOpenGL::BindShadowCube(const Pica::TexturingRegs::FullTextureConf const u32 binding = static_cast(face); info.physical_address = regs.texturing.GetCubePhysicalAddress(face); - auto surface = res_cache.GetTextureSurface(info); - state.image_shadow_texture[binding] = surface->Handle(); + VideoCore::SurfaceId surface_id = res_cache.GetTextureSurface(info); + Surface& surface = res_cache.GetSurface(surface_id); + surface.flags |= VideoCore::SurfaceFlagBits::ShadowMap; + state.image_shadow_texture[binding] = surface.Handle(); } } @@ -589,10 +572,11 @@ void RasterizerOpenGL::BindTextureCube(const Pica::TexturingRegs::FullTextureCon .format = texture.format, }; - auto surface = res_cache.GetTextureCube(config); - texture_cube_sampler.SyncWithConfig(texture.config); + Surface& surface = res_cache.GetTextureCube(config); + Sampler& sampler = res_cache.GetSampler(texture.config); - state.texture_cube_unit.texture_cube = surface->Handle(); + state.texture_cube_unit.texture_cube = surface.Handle(); + state.texture_cube_unit.sampler = sampler.Handle(); state.texture_units[0].texture_2d = 0; } @@ -608,7 +592,7 @@ void RasterizerOpenGL::BindMaterial(u32 texture_index, Surface& surface) { glBindSampler(unit.id, sampler); }; - const GLuint sampler = texture_samplers[texture_index].sampler.handle; + const GLuint sampler = state.texture_units[texture_index].sampler; if (surface.HasNormalMap()) { if (regs.lighting.disable) { LOG_WARNING(Render_OpenGL, "Custom normal map used but scene has no light enabled"); @@ -726,24 +710,20 @@ void RasterizerOpenGL::NotifyFixedFunctionPicaRegisterChanged(u32 id) { } void RasterizerOpenGL::FlushAll() { - MICROPROFILE_SCOPE(OpenGL_CacheManagement); res_cache.FlushAll(); } void RasterizerOpenGL::FlushRegion(PAddr addr, u32 size) { - MICROPROFILE_SCOPE(OpenGL_CacheManagement); res_cache.FlushRegion(addr, size); } void RasterizerOpenGL::InvalidateRegion(PAddr addr, u32 size) { - MICROPROFILE_SCOPE(OpenGL_CacheManagement); - res_cache.InvalidateRegion(addr, size, nullptr); + res_cache.InvalidateRegion(addr, size); } void RasterizerOpenGL::FlushAndInvalidateRegion(PAddr addr, u32 size) { - MICROPROFILE_SCOPE(OpenGL_CacheManagement); res_cache.FlushRegion(addr, size); - res_cache.InvalidateRegion(addr, size, nullptr); + res_cache.InvalidateRegion(addr, size); } void RasterizerOpenGL::ClearAll(bool flush) { @@ -768,7 +748,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& con if (framebuffer_addr == 0) { return false; } - MICROPROFILE_SCOPE(OpenGL_CacheManagement); + MICROPROFILE_SCOPE(OpenGL_Display); VideoCore::SurfaceParams src_params; src_params.addr = framebuffer_addr; @@ -779,85 +759,27 @@ bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& con src_params.pixel_format = VideoCore::PixelFormatFromGPUPixelFormat(config.color_format); src_params.UpdateParams(); - auto [src_surface, src_rect] = + const auto [src_surface_id, src_rect] = res_cache.GetSurfaceSubRect(src_params, VideoCore::ScaleMatch::Ignore, true); - - if (src_surface == nullptr) { + if (!src_surface_id) { return false; } - const u32 scaled_width = src_surface->GetScaledWidth(); - const u32 scaled_height = src_surface->GetScaledHeight(); + const Surface& src_surface = res_cache.GetSurface(src_surface_id); + const u32 scaled_width = src_surface.GetScaledWidth(); + const u32 scaled_height = src_surface.GetScaledHeight(); screen_info.display_texcoords = Common::Rectangle( (float)src_rect.bottom / (float)scaled_height, (float)src_rect.left / (float)scaled_width, (float)src_rect.top / (float)scaled_height, (float)src_rect.right / (float)scaled_width); - screen_info.display_texture = src_surface->Handle(); + screen_info.display_texture = src_surface.Handle(); return true; } -void RasterizerOpenGL::SamplerInfo::Create() { - sampler.Create(); - mag_filter = min_filter = mip_filter = TextureConfig::Linear; - wrap_s = wrap_t = TextureConfig::Repeat; - border_color = 0; - lod_min = lod_max = 0; - - // default is 1000 and -1000 - // Other attributes have correct defaults - glSamplerParameterf(sampler.handle, GL_TEXTURE_MAX_LOD, static_cast(lod_max)); - glSamplerParameterf(sampler.handle, GL_TEXTURE_MIN_LOD, static_cast(lod_min)); -} - -void RasterizerOpenGL::SamplerInfo::SyncWithConfig( - const Pica::TexturingRegs::TextureConfig& config) { - - GLuint s = sampler.handle; - - if (mag_filter != config.mag_filter) { - mag_filter = config.mag_filter; - glSamplerParameteri(s, GL_TEXTURE_MAG_FILTER, PicaToGL::TextureMagFilterMode(mag_filter)); - } - - if (min_filter != config.min_filter || mip_filter != config.mip_filter) { - min_filter = config.min_filter; - mip_filter = config.mip_filter; - glSamplerParameteri(s, GL_TEXTURE_MIN_FILTER, - PicaToGL::TextureMinFilterMode(min_filter, mip_filter)); - } - - if (wrap_s != config.wrap_s) { - wrap_s = config.wrap_s; - glSamplerParameteri(s, GL_TEXTURE_WRAP_S, PicaToGL::WrapMode(wrap_s)); - } - if (wrap_t != config.wrap_t) { - wrap_t = config.wrap_t; - glSamplerParameteri(s, GL_TEXTURE_WRAP_T, PicaToGL::WrapMode(wrap_t)); - } - - if (wrap_s == TextureConfig::ClampToBorder || wrap_t == TextureConfig::ClampToBorder) { - if (border_color != config.border_color.raw) { - border_color = config.border_color.raw; - auto gl_color = PicaToGL::ColorRGBA8(border_color); - glSamplerParameterfv(s, GL_TEXTURE_BORDER_COLOR, gl_color.AsArray()); - } - } - - if (lod_min != config.lod.min_level) { - lod_min = config.lod.min_level; - glSamplerParameterf(s, GL_TEXTURE_MIN_LOD, static_cast(lod_min)); - } - - if (lod_max != config.lod.max_level) { - lod_max = config.lod.max_level; - glSamplerParameterf(s, GL_TEXTURE_MAX_LOD, static_cast(lod_max)); - } -} - void RasterizerOpenGL::SyncClipEnabled() { - state.clip_distance[1] = Pica::g_state.regs.rasterizer.clip_enable != 0; + state.clip_distance[1] = regs.rasterizer.clip_enable != 0; } void RasterizerOpenGL::SyncCullMode() { @@ -885,7 +807,7 @@ void RasterizerOpenGL::SyncCullMode() { } void RasterizerOpenGL::SyncBlendEnabled() { - state.blend.enabled = (Pica::g_state.regs.framebuffer.output_merger.alphablend_enable == 1); + state.blend.enabled = (regs.framebuffer.output_merger.alphablend_enable == 1); } void RasterizerOpenGL::SyncBlendFuncs() { @@ -904,8 +826,7 @@ void RasterizerOpenGL::SyncBlendFuncs() { } void RasterizerOpenGL::SyncBlendColor() { - auto blend_color = - PicaToGL::ColorRGBA8(Pica::g_state.regs.framebuffer.output_merger.blend_const.raw); + auto blend_color = PicaToGL::ColorRGBA8(regs.framebuffer.output_merger.blend_const.raw); state.blend.color.red = blend_color[0]; state.blend.color.green = blend_color[1]; state.blend.color.blue = blend_color[2]; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 73a25cdd5..51f064868 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -55,28 +55,6 @@ private: void SyncFixedState() override; void NotifyFixedFunctionPicaRegisterChanged(u32 id) override; - struct SamplerInfo { - using TextureConfig = Pica::TexturingRegs::TextureConfig; - - OGLSampler sampler; - - /// Creates the sampler object, initializing its state so that it's in sync with the - /// SamplerInfo struct. - void Create(); - /// Syncs the sampler object with the config, updating any necessary state. - void SyncWithConfig(const TextureConfig& config); - - private: - TextureConfig::TextureFilter mag_filter; - TextureConfig::TextureFilter min_filter; - TextureConfig::TextureFilter mip_filter; - TextureConfig::WrapMode wrap_s; - TextureConfig::WrapMode wrap_t; - u32 border_color; - u32 lod_min; - u32 lod_max; - }; - /// Syncs the clip enabled status to match the PICA register void SyncClipEnabled(); @@ -156,14 +134,13 @@ private: OpenGLState state; GLuint default_texture; TextureRuntime runtime; - VideoCore::RasterizerCache res_cache; + RasterizerCache res_cache; std::unique_ptr shader_program_manager; OGLVertexArray sw_vao; // VAO for software shader draw OGLVertexArray hw_vao; // VAO for hardware shader / accelerate draw std::array hw_vao_enabled_attributes{}; - std::array texture_samplers; GLsizeiptr texture_buffer_size; OGLStreamBuffer vertex_buffer; OGLStreamBuffer uniform_buffer; @@ -175,8 +152,6 @@ private: std::size_t uniform_size_aligned_vs; std::size_t uniform_size_aligned_fs; - SamplerInfo texture_cube_sampler; - OGLTexture texture_buffer_lut_lf; OGLTexture texture_buffer_lut_rg; OGLTexture texture_buffer_lut_rgba; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp new file mode 100644 index 000000000..48c05e312 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -0,0 +1,10 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/rasterizer_cache/rasterizer_cache.h" +#include "video_core/renderer_opengl/gl_texture_runtime.h" + +namespace VideoCore { +template class RasterizerCache; +} // namespace VideoCore diff --git a/src/video_core/renderer_opengl/gl_texture_runtime.cpp b/src/video_core/renderer_opengl/gl_texture_runtime.cpp index b461575a2..fe2c19070 100644 --- a/src/video_core/renderer_opengl/gl_texture_runtime.cpp +++ b/src/video_core/renderer_opengl/gl_texture_runtime.cpp @@ -9,6 +9,7 @@ #include "video_core/renderer_opengl/gl_driver.h" #include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_texture_runtime.h" +#include "video_core/renderer_opengl/pica_to_gl.h" namespace OpenGL { @@ -16,6 +17,7 @@ namespace { using VideoCore::MapType; using VideoCore::PixelFormat; +using VideoCore::SurfaceFlagBits; using VideoCore::SurfaceType; using VideoCore::TextureType; @@ -116,20 +118,11 @@ struct FramebufferInfo { } // Anonymous namespace TextureRuntime::TextureRuntime(const Driver& driver_, VideoCore::RendererBase& renderer) - : driver{driver_}, blit_helper{*this} { + : driver{driver_}, blit_helper{driver} { for (std::size_t i = 0; i < draw_fbos.size(); ++i) { draw_fbos[i].Create(); read_fbos[i].Create(); } - - auto add_reinterpreter = [this](PixelFormat dest, - std::unique_ptr&& obj) { - const u32 dst_index = static_cast(dest); - return reinterpreters[dst_index].push_back(std::move(obj)); - }; - - add_reinterpreter(PixelFormat::RGBA8, std::make_unique()); - add_reinterpreter(PixelFormat::RGB5A1, std::make_unique()); } TextureRuntime::~TextureRuntime() = default; @@ -241,14 +234,30 @@ Allocation TextureRuntime::Allocate(const VideoCore::SurfaceParams& params, .height = params.height, .levels = params.levels, .res_scale = params.res_scale, + .is_custom = is_custom, }; } -bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClear& clear) { - const auto prev_state = OpenGLState::GetCurState(); +bool TextureRuntime::Reinterpret(Surface& source, Surface& dest, + const VideoCore::TextureBlit& blit) { + const PixelFormat src_format = source.pixel_format; + const PixelFormat dst_format = dest.pixel_format; + ASSERT_MSG(src_format != dst_format, "Reinterpretation with the same format is invalid"); + if (src_format == PixelFormat::D24S8 && dst_format == PixelFormat::RGBA8) { + blit_helper.ConvertDS24S8ToRGBA8(source, dest, blit); + } else if (src_format == PixelFormat::RGBA4 && dst_format == PixelFormat::RGB5A1) { + blit_helper.ConvertRGBA4ToRGB5A1(source, dest, blit); + } else { + LOG_WARNING(Render_OpenGL, "Unimplemented reinterpretation {} -> {}", + VideoCore::PixelFormatAsString(src_format), + VideoCore::PixelFormatAsString(dst_format)); + return false; + } + return true; +} - // Setup scissor rectangle according to the clear rectangle - OpenGLState state; +bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClear& clear) { + OpenGLState state = OpenGLState::GetCurState(); state.scissor.enabled = true; state.scissor.x = clear.texture_rect.left; state.scissor.y = clear.texture_rect.bottom; @@ -257,42 +266,27 @@ bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClea state.draw.draw_framebuffer = draw_fbos[FboIndex(surface.type)].handle; state.Apply(); + surface.Attach(GL_DRAW_FRAMEBUFFER, clear.texture_level, 0); + switch (surface.type) { case SurfaceType::Color: case SurfaceType::Texture: - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, - surface.Handle(), clear.texture_level); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, - 0); - state.color_mask.red_enabled = true; state.color_mask.green_enabled = true; state.color_mask.blue_enabled = true; state.color_mask.alpha_enabled = true; state.Apply(); - glClearBufferfv(GL_COLOR, 0, clear.value.color.AsArray()); break; case SurfaceType::Depth: - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, - surface.Handle(), clear.texture_level); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - state.depth.write_mask = GL_TRUE; state.Apply(); - glClearBufferfv(GL_DEPTH, 0, &clear.value.depth); break; case SurfaceType::DepthStencil: - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - surface.Handle(), clear.texture_level); - state.depth.write_mask = GL_TRUE; state.stencil.write_mask = -1; state.Apply(); - glClearBufferfi(GL_DEPTH_STENCIL, 0, clear.value.depth, clear.value.stencil); break; default: @@ -300,7 +294,6 @@ bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClea return false; } - prev_state.Apply(); return true; } @@ -329,13 +322,12 @@ bool TextureRuntime::BlitTextures(Surface& source, Surface& dest, source.Attach(GL_READ_FRAMEBUFFER, blit.src_level, blit.src_layer); dest.Attach(GL_DRAW_FRAMEBUFFER, blit.dst_level, blit.dst_layer); - // TODO (wwylele): use GL_NEAREST for shadow map texture // Note: shadow map is treated as RGBA8 format in PICA, as well as in the rasterizer cache, but - // doing linear intepolation componentwise would cause incorrect value. However, for a - // well-programmed game this code path should be rarely executed for shadow map with - // inconsistent scale. + // doing linear intepolation componentwise would cause incorrect value. const GLbitfield buffer_mask = MakeBufferMask(source.type); - const GLenum filter = buffer_mask == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST; + const bool is_shadow_map = True(source.flags & SurfaceFlagBits::ShadowMap); + const GLenum filter = + buffer_mask == GL_COLOR_BUFFER_BIT && !is_shadow_map ? GL_LINEAR : GL_NEAREST; glBlitFramebuffer(blit.src_rect.left, blit.src_rect.bottom, blit.src_rect.right, blit.src_rect.top, blit.dst_rect.left, blit.dst_rect.bottom, blit.dst_rect.right, blit.dst_rect.top, buffer_mask, filter); @@ -359,11 +351,6 @@ void TextureRuntime::GenerateMipmaps(Surface& surface) { } } -const ReinterpreterList& TextureRuntime::GetPossibleReinterpretations( - PixelFormat dest_format) const { - return reinterpreters[static_cast(dest_format)]; -} - Surface::Surface(TextureRuntime& runtime_, const VideoCore::SurfaceParams& params) : SurfaceBase{params}, driver{&runtime_.GetDriver()}, runtime{&runtime_} { if (pixel_format == PixelFormat::Invalid) { @@ -422,15 +409,19 @@ void Surface::UploadCustom(const VideoCore::Material* material, u32 level) { glActiveTexture(GL_TEXTURE0); glPixelStorei(GL_UNPACK_ROW_LENGTH, width); - glBindTexture(GL_TEXTURE_2D, Handle(0)); - if (VideoCore::IsCustomFormatCompressed(custom_format)) { - const GLsizei image_size = static_cast(color->data.size()); - glCompressedTexSubImage2D(GL_TEXTURE_2D, level, 0, 0, width, height, tuple.format, - image_size, color->data.data()); - } else { - glTexSubImage2D(GL_TEXTURE_2D, level, 0, 0, width, height, tuple.format, tuple.type, - color->data.data()); - } + const auto upload = [&](u32 index, VideoCore::CustomTexture* texture) { + glBindTexture(GL_TEXTURE_2D, Handle(index)); + if (VideoCore::IsCustomFormatCompressed(custom_format)) { + const GLsizei image_size = static_cast(texture->data.size()); + glCompressedTexSubImage2D(GL_TEXTURE_2D, level, 0, 0, width, height, tuple.format, + image_size, texture->data.data()); + } else { + glTexSubImage2D(GL_TEXTURE_2D, level, 0, 0, width, height, tuple.format, tuple.type, + texture->data.data()); + } + }; + + upload(0, color); const VideoCore::TextureBlit blit = { .src_rect = filter_rect, @@ -444,15 +435,7 @@ void Surface::UploadCustom(const VideoCore::Material* material, u32 level) { if (!texture) { continue; } - glBindTexture(GL_TEXTURE_2D, Handle(i + 1)); - if (VideoCore::IsCustomFormatCompressed(custom_format)) { - const GLsizei image_size = static_cast(texture->data.size()); - glCompressedTexSubImage2D(GL_TEXTURE_2D, level, 0, 0, width, height, tuple.format, - image_size, texture->data.data()); - } else { - glTexSubImage2D(GL_TEXTURE_2D, level, 0, 0, width, height, tuple.format, tuple.type, - texture->data.data()); - } + upload(i + 1, texture); } glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); @@ -572,7 +555,6 @@ bool Surface::Swap(const VideoCore::Material* mat) { GetScaledWidth(), GetScaledHeight(), VideoCore::PixelFormatAsString(pixel_format), addr, width, height, VideoCore::CustomPixelFormatAsString(format)); - is_custom = true; custom_format = format; material = mat; @@ -614,13 +596,13 @@ HostTextureTag Surface::MakeTag() const noexcept { .res_scale = alloc.res_scale, .tuple = alloc.tuple, .type = texture_type, - .is_custom = is_custom, + .is_custom = alloc.is_custom, .has_normal = HasNormalMap(), }; } -Framebuffer::Framebuffer(TextureRuntime& runtime, Surface* const color, u32 color_level, - Surface* const depth_stencil, u32 depth_level, const Pica::Regs& regs, +Framebuffer::Framebuffer(TextureRuntime& runtime, const Surface* color, u32 color_level, + const Surface* depth_stencil, u32 depth_level, const Pica::Regs& regs, Common::Rectangle surfaces_rect) : VideoCore::FramebufferBase{regs, color, color_level, depth_stencil, depth_level, surfaces_rect} { @@ -692,4 +674,30 @@ Framebuffer::Framebuffer(TextureRuntime& runtime, Surface* const color, u32 colo Framebuffer::~Framebuffer() = default; +Sampler::Sampler(TextureRuntime&, VideoCore::SamplerParams params) { + const GLenum mag_filter = PicaToGL::TextureMagFilterMode(params.mag_filter); + const GLenum min_filter = PicaToGL::TextureMinFilterMode(params.min_filter, params.mip_filter); + const GLenum wrap_s = PicaToGL::WrapMode(params.wrap_s); + const GLenum wrap_t = PicaToGL::WrapMode(params.wrap_t); + const Common::Vec4f gl_color = PicaToGL::ColorRGBA8(params.border_color); + const float lod_min = params.lod_min; + const float lod_max = params.lod_max; + + sampler.Create(); + + const GLuint handle = sampler.handle; + glSamplerParameteri(handle, GL_TEXTURE_MAG_FILTER, mag_filter); + glSamplerParameteri(handle, GL_TEXTURE_MIN_FILTER, min_filter); + + glSamplerParameteri(handle, GL_TEXTURE_WRAP_S, wrap_s); + glSamplerParameteri(handle, GL_TEXTURE_WRAP_T, wrap_t); + + glSamplerParameterfv(handle, GL_TEXTURE_BORDER_COLOR, gl_color.AsArray()); + + glSamplerParameterf(handle, GL_TEXTURE_MIN_LOD, lod_min); + glSamplerParameterf(handle, GL_TEXTURE_MAX_LOD, lod_max); +} + +Sampler::~Sampler() = default; + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_runtime.h b/src/video_core/renderer_opengl/gl_texture_runtime.h index 48fa1d68a..aab561f8a 100644 --- a/src/video_core/renderer_opengl/gl_texture_runtime.h +++ b/src/video_core/renderer_opengl/gl_texture_runtime.h @@ -5,9 +5,8 @@ #pragma once #include "video_core/rasterizer_cache/framebuffer_base.h" -#include "video_core/rasterizer_cache/surface_base.h" +#include "video_core/rasterizer_cache/rasterizer_cache_base.h" #include "video_core/renderer_opengl/gl_blit_helper.h" -#include "video_core/renderer_opengl/gl_format_reinterpreter.h" namespace VideoCore { struct Material; @@ -60,6 +59,7 @@ struct Allocation { u32 height; u32 levels; u32 res_scale; + bool is_custom; operator bool() const noexcept { return textures[0].handle; @@ -76,7 +76,6 @@ class Driver; class TextureRuntime { friend class Surface; friend class Framebuffer; - friend class BlitHelper; public: explicit TextureRuntime(const Driver& driver, VideoCore::RendererBase& renderer); @@ -95,12 +94,8 @@ public: const FormatTuple& GetFormatTuple(VideoCore::PixelFormat pixel_format) const; const FormatTuple& GetFormatTuple(VideoCore::CustomPixelFormat pixel_format); - /// Takes back ownership of the allocation for recycling - void Recycle(const HostTextureTag tag, Allocation&& alloc); - - /// Allocates a texture with the specified dimentions and format - Allocation Allocate(const VideoCore::SurfaceParams& params, - const VideoCore::Material* material = nullptr); + /// Attempts to reinterpret a rectangle of source to another rectangle of dest + bool Reinterpret(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit); /// Fills the rectangle of the texture with the clear value provided bool ClearTexture(Surface& surface, const VideoCore::TextureClear& clear); @@ -114,10 +109,14 @@ public: /// Generates mipmaps for all the available levels of the texture void GenerateMipmaps(Surface& surface); - /// Returns all source formats that support reinterpretation to the dest format - const ReinterpreterList& GetPossibleReinterpretations(VideoCore::PixelFormat dest_format) const; - private: + /// Takes back ownership of the allocation for recycling + void Recycle(const HostTextureTag tag, Allocation&& alloc); + + /// Allocates a texture with the specified dimentions and format + Allocation Allocate(const VideoCore::SurfaceParams& params, + const VideoCore::Material* material = nullptr); + /// Returns the OpenGL driver class const Driver& GetDriver() const { return driver; @@ -127,7 +126,6 @@ private: const Driver& driver; BlitHelper blit_helper; std::vector staging_buffer; - std::array reinterpreters; std::unordered_multimap alloc_cache; std::unordered_map> framebuffer_cache; std::array draw_fbos; @@ -145,24 +143,14 @@ public: Surface(Surface&& o) noexcept = default; Surface& operator=(Surface&& o) noexcept = default; - /// Returns the surface image handle at the provided index. - GLuint Handle(u32 index = 1) const noexcept { + [[nodiscard]] GLuint Handle(u32 index = 1) const noexcept { return alloc.handles[index]; } - /// Returns the tuple of the surface allocation. - const FormatTuple& Tuple() const noexcept { + [[nodiscard]] const FormatTuple& Tuple() const noexcept { return alloc.tuple; } - /// Returns the extent of the underlying surface allocation - VideoCore::Extent Extent() const noexcept { - return { - .width = alloc.width, - .height = alloc.height, - }; - } - /// Uploads pixel data in staging to a rectangle region of the surface texture void Upload(const VideoCore::BufferTextureCopy& upload, const VideoCore::StagingData& staging); @@ -201,8 +189,8 @@ private: class Framebuffer : public VideoCore::FramebufferBase { public: - explicit Framebuffer(TextureRuntime& runtime, Surface* const color, u32 color_level, - Surface* const depth_stencil, u32 depth_level, const Pica::Regs& regs, + explicit Framebuffer(TextureRuntime& runtime, const Surface* color, u32 color_level, + const Surface* depth_stencil, u32 depth_level, const Pica::Regs& regs, Common::Rectangle surfaces_rect); ~Framebuffer(); @@ -223,4 +211,32 @@ private: GLuint handle{}; }; +class Sampler { +public: + explicit Sampler(TextureRuntime&, VideoCore::SamplerParams params); + ~Sampler(); + + Sampler(const Sampler&) = delete; + Sampler& operator=(const Sampler&) = delete; + + Sampler(Sampler&&) = default; + Sampler& operator=(Sampler&&) = default; + + [[nodiscard]] GLuint Handle() const noexcept { + return sampler.handle; + } + +private: + OGLSampler sampler; +}; + +struct Traits { + using Runtime = OpenGL::TextureRuntime; + using Sampler = OpenGL::Sampler; + using Surface = OpenGL::Surface; + using Framebuffer = OpenGL::Framebuffer; +}; + +using RasterizerCache = VideoCore::RasterizerCache; + } // namespace OpenGL