rasterizer_cache: Remove runtime allocation caching (#6705)

* rasterizer_cache: Sentence surfaces

* gl_texture_runtime: Remove runtime side allocation cache

* rasterizer_cache: Adjust surface scale during reinterpreration

* Fixes pixelated outlines. Also allows to remove the d24s8 specific hack and is more generic in general

* rasterizer_cache: Remove Expand flag

* Begone!

* rasterizer_cache: Cache framebuffers with surface id

* rasterizer_cache: Sentence texture cubes

* renderer_opengl: Move texture mailbox to separate file

* Makes renderer_opengl cleaner overall and allows to report removal threshold from runtime instead of hardcoding. Vulkan requires this

* rasterizer_cache: Dont flush cache on layout change

* rasterizer_cache: Overhaul framebuffer management

* video_core: Remove duplicate

* rasterizer_cache: Sentence custom surfaces

* Vulkan cannot destroy images immediately so this ensures we use our garbage collector for that purpose
This commit is contained in:
GPUCode 2023-08-01 03:35:41 +03:00 committed by GitHub
parent 3fedc68230
commit a955f02771
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
23 changed files with 734 additions and 809 deletions

View file

@ -62,12 +62,29 @@ public:
return SlotId{index};
}
template <typename... Args>
[[nodiscard]] SlotId swap_and_insert(SlotId existing_id, Args&&... args) noexcept {
const u32 index = FreeValueIndex();
T& existing_value = values[existing_id.index].object;
new (&values[index].object) T(std::move(existing_value));
existing_value.~T();
new (&values[existing_id.index].object) T(std::forward<Args>(args)...);
SetStorageBit(index);
return SlotId{index};
}
void erase(SlotId id) noexcept {
values[id.index].object.~T();
free_list.push_back(id.index);
ResetStorageBit(id.index);
}
size_t size() const noexcept {
return values_capacity - free_list.size();
}
private:
struct NonTrivialDummy {
NonTrivialDummy() noexcept {}
@ -93,7 +110,7 @@ private:
return ((stored_bitset[index / 64] >> (index % 64)) & 1) != 0;
}
void ValidateIndex(SlotId id) const noexcept {
void ValidateIndex([[maybe_unused]] SlotId id) const noexcept {
DEBUG_ASSERT(id);
DEBUG_ASSERT(id.index / 64 < stored_bitset.size());
DEBUG_ASSERT(((stored_bitset[id.index / 64] >> (id.index % 64)) & 1) != 0);

View file

@ -617,9 +617,7 @@ void System::ApplySettings() {
if (VideoCore::g_renderer) {
auto& settings = VideoCore::g_renderer->Settings();
settings.bg_color_update_requested = true;
settings.sampler_update_requested = true;
settings.shader_update_requested = true;
settings.texture_filter_update_requested = true;
}
if (IsPoweredOn()) {

View file

@ -34,7 +34,6 @@ add_library(video_core STATIC
regs_texturing.h
renderer_base.cpp
renderer_base.h
rasterizer_cache/framebuffer_base.cpp
rasterizer_cache/framebuffer_base.h
rasterizer_cache/pixel_format.cpp
rasterizer_cache/pixel_format.h
@ -76,6 +75,8 @@ add_library(video_core STATIC
renderer_opengl/gl_state.h
renderer_opengl/gl_stream_buffer.cpp
renderer_opengl/gl_stream_buffer.h
renderer_opengl/gl_texture_mailbox.cpp
renderer_opengl/gl_texture_mailbox.h
renderer_opengl/gl_texture_runtime.cpp
renderer_opengl/gl_texture_runtime.h
renderer_opengl/gl_vars.cpp

View file

@ -1,73 +0,0 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "video_core/rasterizer_cache/framebuffer_base.h"
#include "video_core/rasterizer_cache/surface_base.h"
#include "video_core/regs.h"
namespace VideoCore {
FramebufferBase::FramebufferBase() = default;
FramebufferBase::FramebufferBase(const Pica::Regs& regs, const SurfaceBase* color, u32 color_level,
const SurfaceBase* depth_stencil, u32 depth_level,
Common::Rectangle<u32> surfaces_rect) {
res_scale = color ? color->res_scale : (depth_stencil ? depth_stencil->res_scale : 1u);
// Determine the draw rectangle (render area + scissor)
const Common::Rectangle viewport_rect = regs.rasterizer.GetViewportRect();
draw_rect.left =
std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left * res_scale,
surfaces_rect.left, surfaces_rect.right);
draw_rect.top =
std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top * res_scale,
surfaces_rect.bottom, surfaces_rect.top);
draw_rect.right =
std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.right * res_scale,
surfaces_rect.left, surfaces_rect.right);
draw_rect.bottom =
std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.bottom * res_scale,
surfaces_rect.bottom, surfaces_rect.top);
// Update viewport
viewport.x = static_cast<s32>(surfaces_rect.left) + viewport_rect.left * res_scale;
viewport.y = static_cast<s32>(surfaces_rect.bottom) + viewport_rect.bottom * res_scale;
viewport.width = static_cast<s32>(viewport_rect.GetWidth() * res_scale);
viewport.height = static_cast<s32>(viewport_rect.GetHeight() * res_scale);
// Scissor checks are window-, not viewport-relative, which means that if the cached texture
// sub-rect changes, the scissor bounds also need to be updated.
scissor_rect.left =
static_cast<s32>(surfaces_rect.left + regs.rasterizer.scissor_test.x1 * res_scale);
scissor_rect.bottom =
static_cast<s32>(surfaces_rect.bottom + regs.rasterizer.scissor_test.y1 * res_scale);
// x2, y2 have +1 added to cover the entire pixel area, otherwise you might get cracks when
// scaling or doing multisampling.
scissor_rect.right =
static_cast<s32>(surfaces_rect.left + (regs.rasterizer.scissor_test.x2 + 1) * res_scale);
scissor_rect.top =
static_cast<s32>(surfaces_rect.bottom + (regs.rasterizer.scissor_test.y2 + 1) * res_scale);
// Rendering to mipmaps is something quite rare so log it when it occurs.
if (color_level != 0) {
LOG_WARNING(HW_GPU, "Game is rendering to color mipmap {}", color_level);
}
if (depth_level != 0) {
LOG_WARNING(HW_GPU, "Game is rendering to depth mipmap {}", depth_level);
}
// Query surface invalidation intervals
const Common::Rectangle draw_rect_unscaled{draw_rect / res_scale};
if (color) {
color_params = *color;
intervals[0] = color->GetSubRectInterval(draw_rect_unscaled, color_level);
}
if (depth_stencil) {
depth_params = *depth_stencil;
intervals[1] = depth_stencil->GetSubRectInterval(draw_rect_unscaled, depth_level);
}
}
} // namespace VideoCore

View file

@ -4,12 +4,11 @@
#pragma once
#include "common/hash.h"
#include "common/math_util.h"
#include "video_core/rasterizer_cache/slot_id.h"
#include "video_core/rasterizer_cache/surface_params.h"
namespace Pica {
struct Regs;
}
#include "video_core/regs_rasterizer.h"
namespace VideoCore {
@ -22,31 +21,109 @@ struct ViewportInfo {
s32 height;
};
struct FramebufferParams {
SurfaceId color_id;
SurfaceId depth_id;
u32 color_level;
u32 depth_level;
bool shadow_rendering;
INSERT_PADDING_BYTES(3);
bool operator==(const FramebufferParams& params) const noexcept {
return std::memcmp(this, &params, sizeof(FramebufferParams)) == 0;
}
u64 Hash() const noexcept {
return Common::ComputeHash64(this, sizeof(FramebufferParams));
}
u32 Index(VideoCore::SurfaceType type) const noexcept {
switch (type) {
case VideoCore::SurfaceType::Color:
return 0;
case VideoCore::SurfaceType::Depth:
case VideoCore::SurfaceType::DepthStencil:
return 1;
default:
LOG_CRITICAL(HW_GPU, "Unknown surface type in framebuffer");
return 0;
}
}
};
static_assert(std::has_unique_object_representations_v<FramebufferParams>,
"FramebufferParams is not suitable for hashing");
template <class T>
class RasterizerCache;
/**
* A framebuffer is a lightweight abstraction over a pair of surfaces and provides
* metadata about them.
* @brief FramebufferHelper is a RAII wrapper over backend specific framebuffer handle that
* provides the viewport/scissor/draw rectanges and performs automatic rasterizer cache invalidation
* when out of scope.
*/
class FramebufferBase {
template <class T>
class FramebufferHelper {
public:
FramebufferBase();
FramebufferBase(const Pica::Regs& regs, const SurfaceBase* color, u32 color_level,
const SurfaceBase* depth_stencil, u32 depth_level,
Common::Rectangle<u32> surfaces_rect);
explicit FramebufferHelper(RasterizerCache<T>* res_cache_, typename T::Framebuffer* fb_,
const Pica::RasterizerRegs& regs,
Common::Rectangle<u32> surfaces_rect)
: res_cache{res_cache_}, fb{fb_} {
const u32 res_scale = fb->Scale();
SurfaceParams ColorParams() const noexcept {
return color_params;
// Determine the draw rectangle (render area + scissor)
const Common::Rectangle viewport_rect = regs.GetViewportRect();
draw_rect.left =
std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left * res_scale,
surfaces_rect.left, surfaces_rect.right);
draw_rect.top =
std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top * res_scale,
surfaces_rect.bottom, surfaces_rect.top);
draw_rect.right =
std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.right * res_scale,
surfaces_rect.left, surfaces_rect.right);
draw_rect.bottom = std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) +
viewport_rect.bottom * res_scale,
surfaces_rect.bottom, surfaces_rect.top);
// Update viewport
viewport.x = static_cast<s32>(surfaces_rect.left) + viewport_rect.left * res_scale;
viewport.y = static_cast<s32>(surfaces_rect.bottom) + viewport_rect.bottom * res_scale;
viewport.width = static_cast<s32>(viewport_rect.GetWidth() * res_scale);
viewport.height = static_cast<s32>(viewport_rect.GetHeight() * res_scale);
// Scissor checks are window-, not viewport-relative, which means that if the cached texture
// sub-rect changes, the scissor bounds also need to be updated.
scissor_rect.left = static_cast<s32>(surfaces_rect.left + regs.scissor_test.x1 * res_scale);
scissor_rect.bottom =
static_cast<s32>(surfaces_rect.bottom + regs.scissor_test.y1 * res_scale);
// x2, y2 have +1 added to cover the entire pixel area, otherwise you might get cracks when
// scaling or doing multisampling.
scissor_rect.right =
static_cast<s32>(surfaces_rect.left + (regs.scissor_test.x2 + 1) * res_scale);
scissor_rect.top =
static_cast<s32>(surfaces_rect.bottom + (regs.scissor_test.y2 + 1) * res_scale);
}
SurfaceParams DepthParams() const noexcept {
return depth_params;
~FramebufferHelper() {
const Common::Rectangle draw_rect_unscaled{draw_rect / fb->Scale()};
const auto invalidate = [&](SurfaceId surface_id, u32 level) {
const auto& surface = res_cache->GetSurface(surface_id);
const SurfaceInterval interval = surface.GetSubRectInterval(draw_rect_unscaled, level);
const PAddr addr = boost::icl::first(interval);
const u32 size = boost::icl::length(interval);
res_cache->InvalidateRegion(addr, size, surface_id);
};
if (fb->color_id) {
invalidate(fb->color_id, fb->color_level);
}
if (fb->depth_id) {
invalidate(fb->depth_id, fb->depth_level);
}
}
SurfaceInterval Interval(SurfaceType type) const noexcept {
return intervals[Index(type)];
}
u32 ResolutionScale() const noexcept {
return res_scale;
typename T::Framebuffer* Framebuffer() const noexcept {
return fb;
}
Common::Rectangle<u32> DrawRect() const noexcept {
@ -61,28 +138,21 @@ public:
return viewport;
}
protected:
u32 Index(VideoCore::SurfaceType type) const noexcept {
switch (type) {
case VideoCore::SurfaceType::Color:
return 0;
case VideoCore::SurfaceType::Depth:
case VideoCore::SurfaceType::DepthStencil:
return 1;
default:
LOG_CRITICAL(HW_GPU, "Unknown surface type in framebuffer");
return 0;
}
}
protected:
SurfaceParams color_params{};
SurfaceParams depth_params{};
std::array<SurfaceInterval, 2> intervals{};
Common::Rectangle<s32> scissor_rect{};
Common::Rectangle<u32> draw_rect{};
private:
RasterizerCache<T>* res_cache;
typename T::Framebuffer* fb;
Common::Rectangle<s32> scissor_rect;
Common::Rectangle<u32> draw_rect;
ViewportInfo viewport;
u32 res_scale{1};
};
} // namespace VideoCore
namespace std {
template <>
struct hash<VideoCore::FramebufferParams> {
std::size_t operator()(const VideoCore::FramebufferParams& params) const noexcept {
return params.Hash();
}
};
} // namespace std

View file

@ -37,7 +37,7 @@ RasterizerCache<T>::RasterizerCache(Memory::MemorySystem& memory_,
Pica::Regs& regs_, RendererBase& renderer_)
: memory{memory_}, custom_tex_manager{custom_tex_manager_}, runtime{runtime_}, regs{regs_},
renderer{renderer_}, resolution_scale_factor{renderer.GetResolutionScaleFactor()},
use_filter{Settings::values.texture_filter.GetValue() != Settings::TextureFilter::None},
filter{Settings::values.texture_filter.GetValue()},
dump_textures{Settings::values.dump_textures.GetValue()},
use_custom_textures{Settings::values.custom_textures.GetValue()} {
using TextureConfig = Pica::TexturingRegs::TextureConfig;
@ -76,17 +76,21 @@ RasterizerCache<T>::~RasterizerCache() {
template <class T>
void RasterizerCache<T>::TickFrame() {
custom_tex_manager.TickFrame();
RunGarbageCollector();
const auto new_filter = Settings::values.texture_filter.GetValue();
if (filter != new_filter) [[unlikely]] {
filter = new_filter;
UnregisterAll();
}
const u32 scale_factor = renderer.GetResolutionScaleFactor();
const bool resolution_scale_changed = resolution_scale_factor != scale_factor;
const bool use_custom_texture_changed =
Settings::values.custom_textures.GetValue() != use_custom_textures;
const bool texture_filter_changed =
renderer.Settings().texture_filter_update_requested.exchange(false);
if (resolution_scale_changed || texture_filter_changed || use_custom_texture_changed) {
if (resolution_scale_changed || use_custom_texture_changed) {
resolution_scale_factor = scale_factor;
use_filter = Settings::values.texture_filter.GetValue() != Settings::TextureFilter::None;
use_custom_textures = Settings::values.custom_textures.GetValue();
if (use_custom_textures) {
custom_tex_manager.FindCustomTextures();
@ -95,6 +99,34 @@ void RasterizerCache<T>::TickFrame() {
}
}
template <class T>
void RasterizerCache<T>::RunGarbageCollector() {
frame_tick++;
for (auto it = sentenced.begin(); it != sentenced.end();) {
const auto [surface_id, tick] = *it;
if (frame_tick - tick <= runtime.RemoveThreshold()) {
it++;
continue;
}
RemoveFramebuffers(surface_id);
slot_surfaces.erase(surface_id);
it = sentenced.erase(it);
}
}
template <class T>
void RasterizerCache<T>::RemoveFramebuffers(SurfaceId surface_id) {
for (auto it = framebuffers.begin(); it != framebuffers.end();) {
const auto& params = it->first;
if (params.color_id == surface_id || params.depth_id == surface_id) {
slot_framebuffers.erase(it->second);
it = framebuffers.erase(it);
} else {
it++;
}
}
}
template <class T>
bool RasterizerCache<T>::AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) {
const DebugScope scope{runtime, Common::Vec4f{0.f, 0.f, 1.f, 1.f},
@ -322,29 +354,46 @@ template <class T>
void RasterizerCache<T>::CopySurface(Surface& src_surface, Surface& dst_surface,
SurfaceInterval copy_interval) {
MICROPROFILE_SCOPE(RasterizerCache_CopySurface);
const PAddr copy_addr = copy_interval.lower();
const SurfaceParams subrect_params = dst_surface.FromInterval(copy_interval);
const auto dst_rect = dst_surface.GetScaledSubRect(subrect_params);
ASSERT(subrect_params.GetInterval() == copy_interval);
if (src_surface.type == SurfaceType::Fill) {
const TextureClear clear = {
.texture_level = dst_surface.LevelOf(copy_addr),
.texture_rect = dst_rect,
.texture_rect = dst_surface.GetScaledSubRect(subrect_params),
.value = src_surface.MakeClearValue(copy_addr, dst_surface.pixel_format),
};
runtime.ClearTexture(dst_surface, clear);
return;
}
const TextureBlit blit = {
.src_level = src_surface.LevelOf(copy_addr),
.dst_level = dst_surface.LevelOf(copy_addr),
.src_rect = src_surface.GetScaledSubRect(subrect_params),
.dst_rect = dst_rect,
};
runtime.BlitTextures(src_surface, dst_surface, blit);
const u32 src_scale = src_surface.res_scale;
const u32 dst_scale = dst_surface.res_scale;
if (src_scale > dst_scale) {
dst_surface.ScaleUp(src_scale);
}
const auto src_rect = src_surface.GetScaledSubRect(subrect_params);
const auto dst_rect = dst_surface.GetScaledSubRect(subrect_params);
if (src_scale == dst_scale) {
const TextureCopy copy = {
.src_level = src_surface.LevelOf(copy_addr),
.dst_level = dst_surface.LevelOf(copy_addr),
.src_offset = {src_rect.left, src_rect.bottom},
.dst_offset = {dst_rect.left, dst_rect.bottom},
.extent = {src_rect.GetWidth(), src_rect.GetHeight()},
};
runtime.CopyTextures(src_surface, dst_surface, copy);
} else {
const TextureBlit blit = {
.src_level = src_surface.LevelOf(copy_addr),
.dst_level = dst_surface.LevelOf(copy_addr),
.src_rect = src_rect,
.dst_rect = dst_rect,
};
runtime.BlitTextures(src_surface, dst_surface, blit);
}
}
template <class T>
@ -361,33 +410,7 @@ SurfaceId RasterizerCache<T>::GetSurface(const SurfaceParams& params, ScaleMatch
SurfaceId surface_id = FindMatch<MatchFlags::Exact>(params, match_res_scale);
if (!surface_id) {
u16 target_res_scale = params.res_scale;
if (match_res_scale != ScaleMatch::Exact) {
// This surface may have a subrect of another surface with a higher res_scale, find
// it to adjust our params
SurfaceParams find_params = params;
SurfaceId expandable_id = FindMatch<MatchFlags::Expand>(find_params, match_res_scale);
if (expandable_id) {
Surface& expandable = slot_surfaces[expandable_id];
if (expandable.res_scale > target_res_scale) {
target_res_scale = expandable.res_scale;
}
}
// Keep res_scale when reinterpreting d24s8 -> rgba8
if (params.pixel_format == PixelFormat::RGBA8) {
find_params.pixel_format = PixelFormat::D24S8;
expandable_id = FindMatch<MatchFlags::Expand>(find_params, match_res_scale);
if (expandable_id) {
Surface& expandable = slot_surfaces[expandable_id];
if (expandable.res_scale > target_res_scale) {
target_res_scale = expandable.res_scale;
}
}
}
}
SurfaceParams new_params = params;
new_params.res_scale = target_res_scale;
surface_id = CreateSurface(new_params);
surface_id = CreateSurface(params);
RegisterSurface(surface_id);
}
@ -429,31 +452,6 @@ typename RasterizerCache<T>::SurfaceRect_Tuple RasterizerCache<T>::GetSurfaceSub
aligned_params.UpdateParams();
}
// Check for a surface we can expand before creating a new one
if (!surface_id) {
surface_id = FindMatch<MatchFlags::Expand>(aligned_params, match_res_scale);
if (surface_id) {
Surface& surface = slot_surfaces[surface_id];
aligned_params.width = aligned_params.stride;
aligned_params.UpdateParams();
SurfaceParams new_params = surface;
new_params.addr = std::min(aligned_params.addr, surface.addr);
new_params.end = std::max(aligned_params.end, surface.end);
new_params.size = new_params.end - new_params.addr;
new_params.height =
new_params.size / aligned_params.BytesInPixels(aligned_params.stride);
new_params.UpdateParams();
ASSERT(new_params.size % aligned_params.BytesInPixels(aligned_params.stride) == 0);
SurfaceId new_surface_id = CreateSurface(new_params);
DuplicateSurface(surface_id, new_surface_id);
UnregisterSurface(surface_id);
RegisterSurface(new_surface_id);
surface_id = new_surface_id;
}
}
// No subrect found - create and return a new surface
if (!surface_id) {
SurfaceParams new_params = aligned_params;
@ -499,7 +497,7 @@ SurfaceId RasterizerCache<T>::GetTextureSurface(const Pica::Texture::TextureInfo
params.levels = max_level + 1;
params.is_tiled = true;
params.pixel_format = PixelFormatFromTextureFormat(info.format);
params.res_scale = use_filter ? resolution_scale_factor : 1;
params.res_scale = filter != Settings::TextureFilter::None ? resolution_scale_factor : 1;
params.UpdateParams();
const u32 min_width = info.width >> max_level;
@ -552,7 +550,7 @@ typename T::Surface& RasterizerCache<T>::GetTextureCube(const TextureCubeConfig&
.height = config.width,
.stride = config.width,
.levels = config.levels,
.res_scale = use_filter ? resolution_scale_factor : 1,
.res_scale = filter != Settings::TextureFilter::None ? resolution_scale_factor : 1,
.texture_type = TextureType::CubeMap,
.pixel_format = PixelFormatFromTextureFormat(config.format),
.type = SurfaceType::Texture,
@ -609,8 +607,8 @@ typename T::Surface& RasterizerCache<T>::GetTextureCube(const TextureCubeConfig&
}
template <class T>
typename T::Framebuffer RasterizerCache<T>::GetFramebufferSurfaces(bool using_color_fb,
bool using_depth_fb) {
FramebufferHelper<T> RasterizerCache<T>::GetFramebufferSurfaces(bool using_color_fb,
bool using_depth_fb) {
const auto& config = regs.framebuffer.framebuffer;
const s32 framebuffer_width = config.GetWidth();
@ -692,35 +690,20 @@ typename T::Framebuffer RasterizerCache<T>::GetFramebufferSurfaces(bool using_co
boost::icl::length(depth_vp_interval));
}
render_targets = RenderTargets{
fb_params = FramebufferParams{
.color_id = color_id,
.depth_id = depth_id,
.color_level = color_level,
.depth_level = depth_level,
.shadow_rendering = regs.framebuffer.IsShadowRendering(),
};
return Framebuffer{runtime, color_surface, color_level, depth_surface,
depth_level, regs, fb_rect};
}
auto [it, new_framebuffer] = framebuffers.try_emplace(fb_params);
if (new_framebuffer) {
it->second = slot_framebuffers.insert(runtime, fb_params, color_surface, depth_surface);
}
template <class T>
void RasterizerCache<T>::InvalidateFramebuffer(const Framebuffer& framebuffer) {
const auto invalidate = [&](SurfaceId surface_id) {
if (!surface_id) {
return;
}
Surface& surface = slot_surfaces[surface_id];
const SurfaceInterval interval = framebuffer.Interval(surface.type);
const PAddr addr = boost::icl::first(interval);
const u32 size = boost::icl::length(interval);
InvalidateRegion(addr, size, surface_id);
};
const bool has_color = framebuffer.HasAttachment(SurfaceType::Color);
const bool has_depth = framebuffer.HasAttachment(SurfaceType::DepthStencil);
if (has_color) {
invalidate(render_targets.color_id);
}
if (has_depth) {
invalidate(render_targets.depth_id);
}
return FramebufferHelper<T>{this, &slot_framebuffers[it->second], regs.rasterizer, fb_rect};
}
template <class T>
@ -875,9 +858,6 @@ SurfaceId RasterizerCache<T>::FindMatch(const SurfaceParams& params, ScaleMatch
surface.CanReinterpret(params);
return std::make_pair(matched, copy_interval);
});
IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Expand>{}, [&] {
return std::make_pair(surface.CanExpand(params), surface.GetInterval());
});
IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::TexCopy>{}, [&] {
return std::make_pair(surface.CanTexCopy(params), surface.GetInterval());
});
@ -1068,14 +1048,12 @@ bool RasterizerCache<T>::UploadCustomSurface(SurfaceId surface_id, SurfaceInterv
const auto upload = [this, level, surface_id, material]() -> bool {
Surface& surface = slot_surfaces[surface_id];
if (False(surface.flags & SurfaceFlagBits::Custom)) {
LOG_ERROR(HW_GPU, "Surface is not suitable for custom upload, aborting!");
return false;
}
if (!surface.IsCustom() && !surface.Swap(material)) {
LOG_ERROR(HW_GPU, "Custom compressed format {} unsupported by host GPU",
material->format);
return false;
ASSERT_MSG(True(surface.flags & SurfaceFlagBits::Custom),
"Surface is not suitable for custom upload, aborting!");
if (!surface.IsCustom()) {
const SurfaceId old_id =
slot_surfaces.swap_and_insert(surface_id, runtime, surface, material);
sentenced.emplace_back(old_id, frame_tick);
}
surface.UploadCustom(material, level);
if (custom_tex_manager.SkipMipmaps()) {
@ -1159,6 +1137,10 @@ bool RasterizerCache<T>::ValidateByReinterpretation(Surface& surface, SurfacePar
if (boost::icl::is_empty(copy_interval & interval)) {
return false;
}
const u32 res_scale = src_surface.res_scale;
if (res_scale > surface.res_scale) {
surface.ScaleUp(res_scale);
}
const PAddr addr = boost::icl::lower(interval);
const SurfaceParams copy_params = surface.FromInterval(copy_interval);
const TextureBlit reinterpret = {
@ -1229,25 +1211,24 @@ void RasterizerCache<T>::FlushRegion(PAddr addr, u32 size, SurfaceId flush_surfa
SurfaceRegions flushed_intervals;
for (const auto& [region, surface_id] : RangeFromInterval(dirty_regions, flush_interval)) {
// Small sizes imply that this most likely comes from the cpu, flush the entire region
// the point is to avoid thousands of small writes every frame if the cpu decides to
// access that region, anything higher than 8 you're guaranteed it comes from a service
auto interval = size <= 8 ? region : region & flush_interval;
if (flush_surface_id && surface_id != flush_surface_id) {
continue;
}
// Small sizes imply that this most likely comes from the cpu, flush the entire region
// the point is to avoid thousands of small writes every frame if the cpu decides to
// access that region, anything higher than 8 you're guaranteed it comes from a service
const auto interval = size <= 8 ? region : region & flush_interval;
Surface& surface = slot_surfaces[surface_id];
ASSERT_MSG(surface.IsRegionValid(interval), "Region owner has invalid regions");
const DebugScope scope{runtime, Common::Vec4f{0.f, 0.f, 0.f, 1.f},
"RasterizerCache::FlushRegion (from {:#x} to {:#x})",
interval.lower(), interval.upper()};
// Sanity check, this surface is the last one that marked this region dirty
Surface& surface = slot_surfaces[surface_id];
ASSERT(surface.IsRegionValid(interval));
if (surface.type == SurfaceType::Fill) {
SCOPE_EXIT({ flushed_intervals += interval; });
if (surface.IsFill()) {
DownloadFillSurface(surface, interval);
flushed_intervals += interval;
continue;
}
@ -1261,8 +1242,6 @@ void RasterizerCache<T>::FlushRegion(PAddr addr, u32 size, SurfaceId flush_surfa
}
DownloadSurface(surface, download_interval);
}
flushed_intervals += interval;
}
// Reset dirty regions
@ -1294,7 +1273,6 @@ void RasterizerCache<T>::InvalidateRegion(PAddr addr, u32 size, SurfaceId region
if (surface_id == region_owner_id) {
return;
}
// If the CPU is invalidating this region we want to remove it
// to (likely) mark the memory pages as uncached
if (!region_owner_id && size <= 8) {
@ -1302,14 +1280,12 @@ void RasterizerCache<T>::InvalidateRegion(PAddr addr, u32 size, SurfaceId region
remove_surfaces.push_back(surface_id);
return;
}
surface.MarkInvalid(surface.GetInterval() & invalid_interval);
// If the surface has no salvageable data it should be removed
// from the cache to avoid clogging the data structure.
if (surface.IsFullyInvalid()) {
remove_surfaces.push_back(surface_id);
const auto interval = surface.GetInterval() & invalid_interval;
surface.MarkInvalid(interval);
if (!surface.IsFullyInvalid()) {
return;
}
remove_surfaces.push_back(surface_id);
});
if (region_owner_id) {
@ -1318,15 +1294,30 @@ void RasterizerCache<T>::InvalidateRegion(PAddr addr, u32 size, SurfaceId region
dirty_regions.erase(invalid_interval);
}
for (const SurfaceId remove_surface_id : remove_surfaces) {
UnregisterSurface(remove_surface_id);
for (const SurfaceId surface_id : remove_surfaces) {
UnregisterSurface(surface_id);
if (!slot_surfaces[surface_id].IsFill()) {
sentenced.emplace_back(surface_id, frame_tick);
} else {
slot_surfaces.erase(surface_id);
}
}
remove_surfaces.clear();
}
template <class T>
SurfaceId RasterizerCache<T>::CreateSurface(const SurfaceParams& params) {
SurfaceId surface_id = slot_surfaces.insert(runtime, params);
const SurfaceId surface_id = [&] {
const auto it = std::find_if(sentenced.begin(), sentenced.end(), [&](const auto& pair) {
return slot_surfaces[pair.first] == params;
});
if (it != sentenced.end()) {
const SurfaceId surface_id = it->first;
sentenced.erase(it);
return surface_id;
}
return slot_surfaces.insert(runtime, params);
}();
Surface& surface = slot_surfaces[surface_id];
surface.MarkInvalid(surface.GetInterval());
return surface_id;
@ -1368,8 +1359,6 @@ void RasterizerCache<T>::UnregisterSurface(SurfaceId surface_id) {
surfaces.erase(vector_it);
});
SCOPE_EXIT({ slot_surfaces.erase(surface_id); });
if (False(surface.flags & SurfaceFlagBits::Tracked)) {
return;
}
@ -1383,7 +1372,7 @@ void RasterizerCache<T>::UnregisterSurface(SurfaceId surface_id) {
}
if (std::none_of(cube.face_ids.begin(), cube.face_ids.end(),
[](SurfaceId id) { return id; })) {
slot_surfaces.erase(cube.surface_id);
sentenced.emplace_back(cube.surface_id, frame_tick);
return true;
}
return false;
@ -1400,7 +1389,6 @@ void RasterizerCache<T>::UnregisterAll() {
}
texture_cube_cache.clear();
remove_surfaces.clear();
runtime.Reset();
}
template <class T>

View file

@ -5,11 +5,13 @@
#pragma once
#include <functional>
#include <list>
#include <optional>
#include <unordered_map>
#include <vector>
#include <boost/icl/interval_map.hpp>
#include <tsl/robin_map.h>
#include "video_core/rasterizer_cache/framebuffer_base.h"
#include "video_core/rasterizer_cache/sampler_params.h"
#include "video_core/rasterizer_cache/surface_params.h"
#include "video_core/rasterizer_cache/texture_cube.h"
@ -26,6 +28,10 @@ namespace Pica::Texture {
struct TextureInfo;
}
namespace Settings {
enum class TextureFilter : u32;
}
namespace VideoCore {
enum class ScaleMatch {
@ -38,9 +44,8 @@ enum class MatchFlags {
Exact = 1 << 0, ///< Surface perfectly matches params
SubRect = 1 << 1, ///< Surface encompasses params
Copy = 1 << 2, ///< Surface that can be used as a copy source
Expand = 1 << 3, ///< Surface that can expand params
TexCopy = 1 << 4, ///< Surface that will match a display transfer "texture copy" parameters
Reinterpret = 1 << 5, ///< Surface might have different pixel format.
TexCopy = 1 << 3, ///< Surface that will match a display transfer "texture copy" parameters
Reinterpret = 1 << 4, ///< Surface might have different pixel format.
};
DECLARE_ENUM_FLAG_OPERATORS(MatchFlags);
@ -66,11 +71,6 @@ class RasterizerCache {
using SurfaceRect_Tuple = std::pair<SurfaceId, Common::Rectangle<u32>>;
using PageMap = boost::icl::interval_map<u32, int>;
struct RenderTargets {
SurfaceId color_id;
SurfaceId depth_id;
};
public:
explicit RasterizerCache(Memory::MemorySystem& memory, CustomTexManager& custom_tex_manager,
Runtime& runtime, Pica::Regs& regs, RendererBase& renderer);
@ -115,10 +115,7 @@ public:
Surface& GetTextureCube(const TextureCubeConfig& config);
/// Get the color and depth surfaces based on the framebuffer configuration
Framebuffer GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb);
/// Marks the draw rectangle defined in framebuffer as invalid
void InvalidateFramebuffer(const Framebuffer& framebuffer);
FramebufferHelper<T> GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb);
/// Get a surface that matches a "texture copy" display transfer config
SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params);
@ -161,6 +158,12 @@ private:
SurfaceId FindMatch(const SurfaceParams& params, ScaleMatch match_scale_type,
std::optional<SurfaceInterval> validate_interval = std::nullopt);
/// Unregisters sentenced surfaces that have surpassed the destruction threshold.
void RunGarbageCollector();
/// Removes any framebuffers that reference the provided surface_id.
void RemoveFramebuffers(SurfaceId surface_id);
/// Transfers ownership of a memory region from src_surface to dest_surface
void DuplicateSurface(SurfaceId src_id, SurfaceId dst_id);
@ -209,15 +212,19 @@ private:
RendererBase& renderer;
std::unordered_map<TextureCubeConfig, TextureCube> texture_cube_cache;
tsl::robin_pg_map<u64, std::vector<SurfaceId>, Common::IdentityHash<u64>> page_table;
std::unordered_map<FramebufferParams, FramebufferId> framebuffers;
std::unordered_map<SamplerParams, SamplerId> samplers;
std::list<std::pair<SurfaceId, u64>> sentenced;
Common::SlotVector<Surface> slot_surfaces;
Common::SlotVector<Sampler> slot_samplers;
Common::SlotVector<Framebuffer> slot_framebuffers;
SurfaceMap dirty_regions;
PageMap cached_pages;
std::vector<SurfaceId> remove_surfaces;
u32 resolution_scale_factor;
RenderTargets render_targets;
bool use_filter;
u64 frame_tick{};
FramebufferParams fb_params;
Settings::TextureFilter filter;
bool dump_textures;
bool use_custom_textures;
};

View file

@ -10,6 +10,7 @@ namespace VideoCore {
using SurfaceId = Common::SlotId;
using SamplerId = Common::SlotId;
using FramebufferId = Common::SlotId;
/// Fake surface ID for null surfaces
constexpr SurfaceId NULL_SURFACE_ID{0};

View file

@ -46,6 +46,10 @@ public:
/// Returns true if the surface contains a custom material with a normal map.
bool HasNormalMap() const noexcept;
bool IsFill() const noexcept {
return type == SurfaceType::Fill;
}
bool Overlaps(PAddr overlap_addr, size_t overlap_size) const noexcept {
const PAddr overlap_end = overlap_addr + static_cast<PAddr>(overlap_size);
return addr < overlap_end && overlap_addr < end;

View file

@ -34,15 +34,6 @@ bool SurfaceParams::CanReinterpret(const SurfaceParams& other_surface) {
GetSubRect(other_surface).right <= stride;
}
bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const {
return pixel_format != PixelFormat::Invalid && pixel_format == expanded_surface.pixel_format &&
addr <= expanded_surface.end && expanded_surface.addr <= end &&
is_tiled == expanded_surface.is_tiled && stride == expanded_surface.stride &&
(std::max(expanded_surface.addr, addr) - std::min(expanded_surface.addr, addr)) %
BytesInPixels(stride * (is_tiled ? 8 : 1)) ==
0;
}
bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const {
const SurfaceInterval copy_interval = texcopy_params.GetInterval();
if (pixel_format == PixelFormat::Invalid || addr > texcopy_params.addr ||

View file

@ -26,9 +26,6 @@ public:
/// Returns true if other_surface can be used for reinterpretion.
bool CanReinterpret(const SurfaceParams& other_surface);
/// Returns true if params can be expanded to match expanded_surface
bool CanExpand(const SurfaceParams& expanded_surface) const;
/// Returns true if params can be used for texcopy
bool CanTexCopy(const SurfaceParams& texcopy_params) const;
@ -56,6 +53,10 @@ public:
/// Returns a string identifier of the params object
std::string DebugName(bool scaled, bool custom = false) const noexcept;
bool operator==(const SurfaceParams& other) const noexcept {
return std::memcmp(this, &other, sizeof(SurfaceParams)) == 0;
}
[[nodiscard]] SurfaceInterval GetInterval() const noexcept {
return SurfaceInterval{addr, end};
}

View file

@ -67,6 +67,7 @@ struct StagingData {
};
class SurfaceParams;
struct FramebufferParams;
u32 MipLevels(u32 width, u32 height, u32 max_level);

View file

@ -31,9 +31,7 @@ struct RendererSettings {
std::function<void()> screenshot_complete_callback;
Layout::FramebufferLayout screenshot_framebuffer_layout;
// Renderer
std::atomic_bool texture_filter_update_requested{false};
std::atomic_bool bg_color_update_requested{false};
std::atomic_bool sampler_update_requested{false};
std::atomic_bool shader_update_requested{false};
};

View file

@ -7,7 +7,7 @@
#include <utility>
#include "core/frontend/emu_window.h"
#include "video_core/renderer_opengl/frame_dumper_opengl.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
#include "video_core/renderer_opengl/gl_texture_mailbox.h"
namespace OpenGL {

View file

@ -386,21 +386,20 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) {
(write_depth_fb || regs.framebuffer.output_merger.depth_test_enable != 0 ||
(has_stencil && state.stencil.test_enabled));
const Framebuffer framebuffer =
res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb);
const bool has_color = framebuffer.HasAttachment(SurfaceType::Color);
if (!has_color && shadow_rendering) {
const auto fb_helper = res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb);
const Framebuffer* framebuffer = fb_helper.Framebuffer();
if (!framebuffer->color_id && framebuffer->shadow_rendering) {
return true;
}
// Bind the framebuffer surfaces
if (shadow_rendering) {
state.image_shadow_buffer = framebuffer.Attachment(SurfaceType::Color);
state.image_shadow_buffer = framebuffer->Attachment(SurfaceType::Color);
}
state.draw.draw_framebuffer = framebuffer.Handle();
state.draw.draw_framebuffer = framebuffer->Handle();
// Sync the viewport
const auto viewport = framebuffer.Viewport();
const auto viewport = fb_helper.Viewport();
state.viewport.x = static_cast<GLint>(viewport.x);
state.viewport.y = static_cast<GLint>(viewport.y);
state.viewport.width = static_cast<GLsizei>(viewport.width);
@ -408,21 +407,15 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) {
// Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect.
// Enable scissor test to prevent drawing outside of the framebuffer region
const auto draw_rect = framebuffer.DrawRect();
const auto draw_rect = fb_helper.DrawRect();
state.scissor.enabled = true;
state.scissor.x = draw_rect.left;
state.scissor.y = draw_rect.bottom;
state.scissor.width = draw_rect.GetWidth();
state.scissor.height = draw_rect.GetHeight();
const int res_scale = static_cast<int>(framebuffer.ResolutionScale());
if (uniform_block_data.data.framebuffer_scale != res_scale) {
uniform_block_data.data.framebuffer_scale = res_scale;
uniform_block_data.dirty = true;
}
// Update scissor uniforms
const auto [scissor_x1, scissor_y2, scissor_x2, scissor_y1] = framebuffer.Scissor();
const auto [scissor_x1, scissor_y2, scissor_x2, scissor_y1] = fb_helper.Scissor();
if (uniform_block_data.data.scissor_x1 != scissor_x1 ||
uniform_block_data.data.scissor_x2 != scissor_x2 ||
uniform_block_data.data.scissor_y1 != scissor_y1 ||
@ -486,13 +479,12 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) {
GL_TEXTURE_UPDATE_BARRIER_BIT | GL_FRAMEBUFFER_BARRIER_BIT);
}
res_cache.InvalidateFramebuffer(framebuffer);
use_custom_normal = false;
return succeeded;
}
void RasterizerOpenGL::SyncTextureUnits(const Framebuffer& framebuffer) {
void RasterizerOpenGL::SyncTextureUnits(const Framebuffer* framebuffer) {
using TextureType = Pica::TexturingRegs::TextureConfig::TextureType;
const auto pica_textures = regs.texturing.GetTextures();
@ -603,27 +595,15 @@ void RasterizerOpenGL::BindMaterial(u32 texture_index, Surface& surface) {
}
}
bool RasterizerOpenGL::IsFeedbackLoop(u32 texture_index, const Framebuffer& framebuffer,
bool RasterizerOpenGL::IsFeedbackLoop(u32 texture_index, const Framebuffer* framebuffer,
Surface& surface) {
const GLuint color_attachment = framebuffer.Attachment(SurfaceType::Color);
const GLuint color_attachment = framebuffer->Attachment(SurfaceType::Color);
const bool is_feedback_loop = color_attachment == surface.Handle();
if (!is_feedback_loop) {
return false;
}
// Make a temporary copy of the framebuffer to sample from
Surface temp_surface{runtime, framebuffer.ColorParams()};
const VideoCore::TextureCopy copy = {
.src_level = 0,
.dst_level = 0,
.src_layer = 0,
.dst_layer = 0,
.src_offset = {0, 0},
.dst_offset = {0, 0},
.extent = {temp_surface.GetScaledWidth(), temp_surface.GetScaledHeight()},
};
runtime.CopyTextures(surface, temp_surface, copy);
state.texture_units[texture_index].texture_2d = temp_surface.Handle();
state.texture_units[texture_index].texture_2d = surface.CopyHandle();
return true;
}

View file

@ -93,7 +93,7 @@ private:
void SyncAndUploadLUTsLF();
/// Syncs all enabled PICA texture units
void SyncTextureUnits(const Framebuffer& framebuffer);
void SyncTextureUnits(const Framebuffer* framebuffer);
/// Binds the PICA shadow cube required for shadow mapping
void BindShadowCube(const Pica::TexturingRegs::FullTextureConfig& texture);
@ -102,7 +102,7 @@ private:
void BindTextureCube(const Pica::TexturingRegs::FullTextureConfig& texture);
/// Makes a temporary copy of the framebuffer if a feedback loop is detected
bool IsFeedbackLoop(u32 texture_index, const Framebuffer& framebuffer, Surface& surface);
bool IsFeedbackLoop(u32 texture_index, const Framebuffer* framebuffer, Surface& surface);
/// Unbinds all special texture unit 0 texture configurations
void UnbindSpecial();

View file

@ -3,8 +3,6 @@
// Refer to the license.txt file included.
#include <glad/glad.h>
#include "common/common_funcs.h"
#include "common/logging/log.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_vars.h"

View file

@ -0,0 +1,194 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_texture_mailbox.h"
namespace OpenGL {
OGLTextureMailbox::OGLTextureMailbox(bool has_debug_tool_) : has_debug_tool{has_debug_tool_} {
for (auto& frame : swap_chain) {
free_queue.push(&frame);
}
}
OGLTextureMailbox::~OGLTextureMailbox() {
// Lock the mutex and clear out the present and free_queues and notify any people who are
// blocked to prevent deadlock on shutdown
std::scoped_lock lock(swap_chain_lock);
free_queue = {};
present_queue.clear();
present_cv.notify_all();
free_cv.notify_all();
}
void OGLTextureMailbox::ReloadPresentFrame(Frontend::Frame* frame, u32 height, u32 width) {
frame->present.Release();
frame->present.Create();
GLint previous_draw_fbo{};
glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &previous_draw_fbo);
glBindFramebuffer(GL_FRAMEBUFFER, frame->present.handle);
glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
frame->color.handle);
if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
LOG_CRITICAL(Render_OpenGL, "Failed to recreate present FBO!");
}
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, previous_draw_fbo);
frame->color_reloaded = false;
}
void OGLTextureMailbox::ReloadRenderFrame(Frontend::Frame* frame, u32 width, u32 height) {
OpenGLState prev_state = OpenGLState::GetCurState();
OpenGLState state = OpenGLState::GetCurState();
// Recreate the color texture attachment
frame->color.Release();
frame->color.Create();
state.renderbuffer = frame->color.handle;
state.Apply();
glRenderbufferStorage(GL_RENDERBUFFER, GL_RGBA8, width, height);
// Recreate the FBO for the render target
frame->render.Release();
frame->render.Create();
state.draw.read_framebuffer = frame->render.handle;
state.draw.draw_framebuffer = frame->render.handle;
state.Apply();
glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
frame->color.handle);
if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!");
}
prev_state.Apply();
frame->width = width;
frame->height = height;
frame->color_reloaded = true;
}
Frontend::Frame* OGLTextureMailbox::GetRenderFrame() {
std::unique_lock lock{swap_chain_lock};
// If theres no free frames, we will reuse the oldest render frame
if (free_queue.empty()) {
auto frame = present_queue.back();
present_queue.pop_back();
return frame;
}
Frontend::Frame* frame = free_queue.front();
free_queue.pop();
return frame;
}
void OGLTextureMailbox::ReleaseRenderFrame(Frontend::Frame* frame) {
std::unique_lock lock{swap_chain_lock};
present_queue.push_front(frame);
present_cv.notify_one();
DebugNotifyNextFrame();
}
void OGLTextureMailbox::LoadPresentFrame() {
// Free the previous frame and add it back to the free queue
if (previous_frame) {
free_queue.push(previous_frame);
free_cv.notify_one();
}
// The newest entries are pushed to the front of the queue
Frontend::Frame* frame = present_queue.front();
present_queue.pop_front();
// Remove all old entries from the present queue and move them back to the free_queue
for (auto f : present_queue) {
free_queue.push(f);
}
present_queue.clear();
previous_frame = frame;
}
Frontend::Frame* OGLTextureMailbox::TryGetPresentFrame(int timeout_ms) {
DebugWaitForNextFrame();
std::unique_lock lock{swap_chain_lock};
// Wait for new entries in the present_queue
present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms),
[&] { return !present_queue.empty(); });
if (present_queue.empty()) {
// Timed out waiting for a frame to draw so return the previous frame
return previous_frame;
}
LoadPresentFrame();
return previous_frame;
}
void OGLTextureMailbox::DebugNotifyNextFrame() {
if (!has_debug_tool) {
return;
}
frame_for_debug++;
std::scoped_lock lock{debug_synch_mutex};
debug_synch_condition.notify_one();
}
void OGLTextureMailbox::DebugWaitForNextFrame() {
if (!has_debug_tool) {
return;
}
const int last_frame = frame_for_debug;
std::unique_lock lock{debug_synch_mutex};
debug_synch_condition.wait(lock, [this, last_frame] { return frame_for_debug > last_frame; });
}
Frontend::Frame* OGLVideoDumpingMailbox::GetRenderFrame() {
std::unique_lock lock{swap_chain_lock};
// If theres no free frames, we will wait until one shows up
if (free_queue.empty()) {
free_cv.wait(lock, [&] { return (!free_queue.empty() || quit); });
if (quit) {
throw OGLTextureMailboxException("VideoDumpingMailbox quitting");
}
if (free_queue.empty()) {
LOG_CRITICAL(Render_OpenGL, "Could not get free frame");
return nullptr;
}
}
Frontend::Frame* frame = free_queue.front();
free_queue.pop();
return frame;
}
void OGLVideoDumpingMailbox::LoadPresentFrame() {
// Free the previous frame and add it back to the free queue
if (previous_frame) {
free_queue.push(previous_frame);
free_cv.notify_one();
}
Frontend::Frame* frame = present_queue.back();
present_queue.pop_back();
previous_frame = frame;
// Do not remove entries from the present_queue, as video dumping would require
// that we preserve all frames
}
Frontend::Frame* OGLVideoDumpingMailbox::TryGetPresentFrame(int timeout_ms) {
std::unique_lock lock{swap_chain_lock};
// Wait for new entries in the present_queue
present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms),
[&] { return !present_queue.empty(); });
if (present_queue.empty()) {
// Timed out waiting for a frame
return nullptr;
}
LoadPresentFrame();
return previous_frame;
}
} // namespace OpenGL

View file

@ -0,0 +1,92 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <condition_variable>
#include <deque>
#include <mutex>
#include <queue>
#include "core/frontend/emu_window.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace Frontend {
struct Frame {
u32 width{}; ///< Width of the frame (to detect resize)
u32 height{}; ///< Height of the frame
bool color_reloaded = false; ///< Texture attachment was recreated (ie: resized)
OpenGL::OGLRenderbuffer color{}; ///< Buffer shared between the render/present FBO
OpenGL::OGLFramebuffer render{}; ///< FBO created on the render thread
OpenGL::OGLFramebuffer present{}; ///< FBO created on the present thread
GLsync render_fence{}; ///< Fence created on the render thread
GLsync present_fence{}; ///< Fence created on the presentation thread
};
} // namespace Frontend
namespace OpenGL {
// If the size of this is too small, it ends up creating a soft cap on FPS as the renderer will have
// to wait on available presentation frames. There doesn't seem to be much of a downside to a larger
// number but 9 swap textures at 60FPS presentation allows for 800% speed so thats probably fine
#ifdef ANDROID
// Reduce the size of swap_chain, since the UI only allows upto 200% speed.
constexpr std::size_t SWAP_CHAIN_SIZE = 6;
#else
constexpr std::size_t SWAP_CHAIN_SIZE = 9;
#endif
class OGLTextureMailbox : public Frontend::TextureMailbox {
public:
explicit OGLTextureMailbox(bool has_debug_tool = false);
~OGLTextureMailbox() override;
void ReloadPresentFrame(Frontend::Frame* frame, u32 height, u32 width) override;
void ReloadRenderFrame(Frontend::Frame* frame, u32 width, u32 height) override;
void ReleaseRenderFrame(Frontend::Frame* frame) override;
Frontend::Frame* GetRenderFrame() override;
Frontend::Frame* TryGetPresentFrame(int timeout_ms) override;
/// This is virtual as it is to be overriden in OGLVideoDumpingMailbox below.
virtual void LoadPresentFrame();
private:
/// Signal that a new frame is available (called from GPU thread)
void DebugNotifyNextFrame();
/// Wait for a new frame to be available (called from presentation thread)
void DebugWaitForNextFrame();
public:
std::mutex swap_chain_lock;
std::condition_variable free_cv;
std::condition_variable present_cv;
std::array<Frontend::Frame, SWAP_CHAIN_SIZE> swap_chain{};
std::queue<Frontend::Frame*> free_queue{};
std::deque<Frontend::Frame*> present_queue{};
Frontend::Frame* previous_frame = nullptr;
std::mutex debug_synch_mutex;
std::condition_variable debug_synch_condition;
std::atomic_int frame_for_debug{};
const bool has_debug_tool; ///< When true, using a GPU debugger, so keep frames in lock-step
};
class OGLTextureMailboxException : public std::runtime_error {
public:
using std::runtime_error::runtime_error;
};
/// This mailbox is different in that it will never discard rendered frames
class OGLVideoDumpingMailbox : public OGLTextureMailbox {
public:
void LoadPresentFrame() override;
Frontend::Frame* GetRenderFrame() override;
Frontend::Frame* TryGetPresentFrame(int timeout_ms) override;
public:
bool quit = false;
};
} // namespace OpenGL

View file

@ -5,10 +5,10 @@
#include "common/scope_exit.h"
#include "common/settings.h"
#include "video_core/custom_textures/material.h"
#include "video_core/regs.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_opengl/gl_driver.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_texture_mailbox.h"
#include "video_core/renderer_opengl/gl_texture_runtime.h"
#include "video_core/renderer_opengl/pica_to_gl.h"
@ -22,6 +22,8 @@ using VideoCore::SurfaceFlagBits;
using VideoCore::SurfaceType;
using VideoCore::TextureType;
constexpr GLenum TEMP_UNIT = GL_TEXTURE15;
constexpr FormatTuple DEFAULT_TUPLE = {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE};
static constexpr std::array<FormatTuple, 4> DEPTH_TUPLES = {{
@ -58,13 +60,6 @@ static constexpr std::array<FormatTuple, 8> CUSTOM_TUPLES = {{
{GL_COMPRESSED_RGBA_ASTC_8x6, GL_COMPRESSED_RGBA_ASTC_8x6, GL_UNSIGNED_BYTE},
}};
struct FramebufferInfo {
GLuint color;
GLuint depth;
u32 color_level;
u32 depth_level;
};
[[nodiscard]] GLbitfield MakeBufferMask(SurfaceType type) {
switch (type) {
case SurfaceType::Color:
@ -128,9 +123,8 @@ TextureRuntime::TextureRuntime(const Driver& driver_, VideoCore::RendererBase& r
TextureRuntime::~TextureRuntime() = default;
void TextureRuntime::Reset() {
alloc_cache.clear();
framebuffer_cache.clear();
u32 TextureRuntime::RemoveThreshold() {
return SWAP_CHAIN_SIZE;
}
bool TextureRuntime::NeedsConversion(VideoCore::PixelFormat pixel_format) const {
@ -151,6 +145,10 @@ VideoCore::StagingData TextureRuntime::FindStaging(u32 size, bool upload) {
}
const FormatTuple& TextureRuntime::GetFormatTuple(PixelFormat pixel_format) const {
if (pixel_format == PixelFormat::Invalid) {
return DEFAULT_TUPLE;
}
const auto type = GetFormatType(pixel_format);
const std::size_t format_index = static_cast<std::size_t>(pixel_format);
@ -171,74 +169,6 @@ const FormatTuple& TextureRuntime::GetFormatTuple(VideoCore::CustomPixelFormat p
return CUSTOM_TUPLES[format_index];
}
void TextureRuntime::Recycle(const HostTextureTag tag, Allocation&& alloc) {
alloc_cache.emplace(tag, std::move(alloc));
}
Allocation TextureRuntime::Allocate(const VideoCore::SurfaceParams& params,
const VideoCore::Material* material) {
const GLenum target = params.texture_type == VideoCore::TextureType::CubeMap
? GL_TEXTURE_CUBE_MAP
: GL_TEXTURE_2D;
const bool is_custom = material != nullptr;
const bool has_normal = material && material->Map(MapType::Normal);
const auto& tuple =
is_custom ? GetFormatTuple(params.custom_format) : GetFormatTuple(params.pixel_format);
const HostTextureTag key = {
.width = params.width,
.height = params.height,
.levels = params.levels,
.res_scale = params.res_scale,
.tuple = tuple,
.type = params.texture_type,
.is_custom = is_custom,
.has_normal = has_normal,
};
if (auto it = alloc_cache.find(key); it != alloc_cache.end()) {
auto alloc{std::move(it->second)};
alloc_cache.erase(it);
return alloc;
}
const GLuint old_tex = OpenGLState::GetCurState().texture_units[0].texture_2d;
glActiveTexture(GL_TEXTURE0);
std::array<OGLTexture, 3> textures{};
std::array<GLuint, 3> handles{};
textures[0] = MakeHandle(target, params.width, params.height, params.levels, tuple,
params.DebugName(false));
handles.fill(textures[0].handle);
if (params.res_scale != 1) {
const u32 scaled_width = is_custom ? params.width : params.GetScaledWidth();
const u32 scaled_height = is_custom ? params.height : params.GetScaledHeight();
const auto& scaled_tuple = is_custom ? GetFormatTuple(PixelFormat::RGBA8) : tuple;
textures[1] = MakeHandle(target, scaled_width, scaled_height, params.levels, scaled_tuple,
params.DebugName(true, is_custom));
handles[1] = textures[1].handle;
}
if (has_normal) {
textures[2] = MakeHandle(target, params.width, params.height, params.levels, tuple,
params.DebugName(true, is_custom));
handles[2] = textures[2].handle;
}
glBindTexture(GL_TEXTURE_2D, old_tex);
return Allocation{
.textures = std::move(textures),
.handles = std::move(handles),
.tuple = tuple,
.width = params.width,
.height = params.height,
.levels = params.levels,
.res_scale = params.res_scale,
.is_custom = is_custom,
};
}
bool TextureRuntime::Reinterpret(Surface& source, Surface& dest,
const VideoCore::TextureBlit& blit) {
const PixelFormat src_format = source.pixel_format;
@ -353,40 +283,90 @@ void TextureRuntime::GenerateMipmaps(Surface& surface) {
}
Surface::Surface(TextureRuntime& runtime_, const VideoCore::SurfaceParams& params)
: SurfaceBase{params}, driver{&runtime_.GetDriver()}, runtime{&runtime_} {
: SurfaceBase{params}, driver{&runtime_.GetDriver()}, runtime{&runtime_},
tuple{runtime->GetFormatTuple(pixel_format)} {
if (pixel_format == PixelFormat::Invalid) {
return;
}
alloc = runtime->Allocate(params);
glActiveTexture(TEMP_UNIT);
const GLenum target =
texture_type == VideoCore::TextureType::CubeMap ? GL_TEXTURE_CUBE_MAP : GL_TEXTURE_2D;
textures[0] = MakeHandle(target, width, height, levels, tuple, DebugName(false));
if (res_scale != 1) {
textures[1] = MakeHandle(target, GetScaledWidth(), GetScaledHeight(), levels, tuple,
DebugName(true, false));
}
}
Surface::~Surface() {
if (pixel_format == PixelFormat::Invalid || !alloc) {
Surface::Surface(TextureRuntime& runtime, const VideoCore::SurfaceBase& surface,
const VideoCore::Material* mat)
: SurfaceBase{surface}, tuple{runtime.GetFormatTuple(mat->format)} {
if (mat && !driver->IsCustomFormatSupported(mat->format)) {
return;
}
runtime->Recycle(MakeTag(), std::move(alloc));
glActiveTexture(TEMP_UNIT);
const GLenum target =
texture_type == VideoCore::TextureType::CubeMap ? GL_TEXTURE_CUBE_MAP : GL_TEXTURE_2D;
custom_format = mat->format;
material = mat;
textures[0] = MakeHandle(target, mat->width, mat->height, levels, tuple, DebugName(false));
if (res_scale != 1) {
textures[1] = MakeHandle(target, mat->width, mat->height, levels, DEFAULT_TUPLE,
DebugName(true, true));
}
const bool has_normal = mat->Map(MapType::Normal);
if (has_normal) {
textures[2] =
MakeHandle(target, mat->width, mat->height, levels, tuple, DebugName(true, true));
}
}
Surface::~Surface() = default;
GLuint Surface::Handle(u32 index) const noexcept {
if (!textures[index].handle) {
return textures[0].handle;
}
return textures[index].handle;
}
GLuint Surface::CopyHandle() noexcept {
if (!copy_texture.handle) {
copy_texture = MakeHandle(GL_TEXTURE_2D, GetScaledWidth(), GetScaledHeight(), levels, tuple,
DebugName(true));
}
for (u32 level = 0; level < levels; level++) {
const u32 width = GetScaledWidth() >> level;
const u32 height = GetScaledHeight() >> level;
glCopyImageSubData(Handle(1), GL_TEXTURE_2D, level, 0, 0, 0, copy_texture.handle,
GL_TEXTURE_2D, level, 0, 0, 0, width, height, 1);
}
return copy_texture.handle;
}
void Surface::Upload(const VideoCore::BufferTextureCopy& upload,
const VideoCore::StagingData& staging) {
ASSERT(stride * GetFormatBytesPerPixel(pixel_format) % 4 == 0);
const GLuint old_tex = OpenGLState::GetCurState().texture_units[0].texture_2d;
const u32 unscaled_width = upload.texture_rect.GetWidth();
const u32 unscaled_height = upload.texture_rect.GetHeight();
glPixelStorei(GL_UNPACK_ROW_LENGTH, unscaled_width);
glActiveTexture(GL_TEXTURE0);
glActiveTexture(TEMP_UNIT);
glBindTexture(GL_TEXTURE_2D, Handle(0));
const auto& tuple = alloc.tuple;
glTexSubImage2D(GL_TEXTURE_2D, upload.texture_level, upload.texture_rect.left,
upload.texture_rect.bottom, unscaled_width, unscaled_height, tuple.format,
tuple.type, staging.mapped.data());
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
glBindTexture(GL_TEXTURE_2D, old_tex);
const VideoCore::TextureBlit blit = {
.src_level = upload.texture_level,
@ -400,14 +380,12 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload,
}
void Surface::UploadCustom(const VideoCore::Material* material, u32 level) {
const GLuint old_tex = OpenGLState::GetCurState().texture_units[0].texture_2d;
const auto& tuple = alloc.tuple;
const u32 width = material->width;
const u32 height = material->height;
const auto color = material->textures[0];
const Common::Rectangle filter_rect{0U, height, width, 0U};
glActiveTexture(GL_TEXTURE0);
glActiveTexture(TEMP_UNIT);
glPixelStorei(GL_UNPACK_ROW_LENGTH, width);
const auto upload = [&](u32 index, VideoCore::CustomTexture* texture) {
@ -440,7 +418,6 @@ void Surface::UploadCustom(const VideoCore::Material* material, u32 level) {
}
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
glBindTexture(GL_TEXTURE_2D, old_tex);
}
void Surface::Download(const VideoCore::BufferTextureCopy& download,
@ -491,6 +468,7 @@ bool Surface::DownloadWithoutFbo(const VideoCore::BufferTextureCopy& download,
const auto& tuple = runtime->GetFormatTuple(pixel_format);
const u32 unscaled_width = download.texture_rect.GetWidth();
glActiveTexture(TEMP_UNIT);
glPixelStorei(GL_PACK_ROW_LENGTH, unscaled_width);
SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); });
@ -541,27 +519,24 @@ void Surface::Attach(GLenum target, u32 level, u32 layer, bool scaled) {
}
}
bool Surface::Swap(const VideoCore::Material* mat) {
const VideoCore::CustomPixelFormat format{mat->format};
if (!driver->IsCustomFormatSupported(format)) {
return false;
void Surface::ScaleUp(u32 new_scale) {
if (res_scale == new_scale || new_scale == 1) {
return;
}
runtime->Recycle(MakeTag(), std::move(alloc));
SurfaceParams params = *this;
params.width = mat->width;
params.height = mat->height;
params.custom_format = mat->format;
alloc = runtime->Allocate(params, mat);
res_scale = new_scale;
textures[1] = MakeHandle(GL_TEXTURE_2D, GetScaledWidth(), GetScaledHeight(), levels, tuple,
DebugName(true));
LOG_DEBUG(Render_OpenGL, "Swapped {}x{} {} surface at address {:#x} to {}x{} {}",
GetScaledWidth(), GetScaledHeight(), VideoCore::PixelFormatAsString(pixel_format),
addr, width, height, VideoCore::CustomPixelFormatAsString(format));
custom_format = format;
material = mat;
return true;
VideoCore::TextureBlit blit = {
.src_rect = GetRect(),
.dst_rect = GetScaledRect(),
};
for (u32 level = 0; level < levels; level++) {
blit.src_level = level;
blit.dst_level = level;
BlitScale(blit, true);
}
}
u32 Surface::GetInternalBytesPerPixel() const {
@ -591,27 +566,11 @@ void Surface::BlitScale(const VideoCore::TextureBlit& blit, bool up_scale) {
blit.dst_rect.right, blit.dst_rect.top, buffer_mask, filter);
}
HostTextureTag Surface::MakeTag() const noexcept {
return HostTextureTag{
.width = alloc.width,
.height = alloc.height,
.levels = alloc.levels,
.res_scale = alloc.res_scale,
.tuple = alloc.tuple,
.type = texture_type,
.is_custom = alloc.is_custom,
.has_normal = HasNormalMap(),
};
}
Framebuffer::Framebuffer(TextureRuntime& runtime, const VideoCore::FramebufferParams& params,
const Surface* color, const Surface* depth)
: VideoCore::FramebufferParams{params}, res_scale{color ? color->res_scale
: (depth ? depth->res_scale : 1u)} {
Framebuffer::Framebuffer(TextureRuntime& runtime, const Surface* color, u32 color_level,
const Surface* depth_stencil, u32 depth_level, const Pica::Regs& regs,
Common::Rectangle<u32> surfaces_rect)
: VideoCore::FramebufferBase{regs, color, color_level,
depth_stencil, depth_level, surfaces_rect} {
const bool shadow_rendering = regs.framebuffer.IsShadowRendering();
const bool has_stencil = regs.framebuffer.HasStencil();
if (shadow_rendering && !color) {
return;
}
@ -619,33 +578,15 @@ Framebuffer::Framebuffer(TextureRuntime& runtime, const Surface* color, u32 colo
if (color) {
attachments[0] = color->Handle();
}
if (depth_stencil) {
attachments[1] = depth_stencil->Handle();
if (depth) {
attachments[1] = depth->Handle();
}
const FramebufferInfo info = {
.color = attachments[0],
.depth = attachments[1],
.color_level = color_level,
.depth_level = depth_level,
};
const u64 hash = Common::ComputeHash64(&info, sizeof(FramebufferInfo));
auto [it, new_framebuffer] = runtime.framebuffer_cache.try_emplace(hash);
if (!new_framebuffer) {
handle = it->second.handle;
return;
}
const GLuint old_fbo = OpenGLState::GetCurState().draw.draw_framebuffer;
OGLFramebuffer& framebuffer = it->second;
framebuffer.Create();
handle = it->second.handle;
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer.handle);
SCOPE_EXIT({ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_fbo); });
OpenGLState state = OpenGLState::GetCurState();
state.draw.draw_framebuffer = framebuffer.handle;
state.Apply();
if (shadow_rendering) {
glFramebufferParameteri(GL_DRAW_FRAMEBUFFER, GL_FRAMEBUFFER_DEFAULT_WIDTH,
@ -658,13 +599,13 @@ Framebuffer::Framebuffer(TextureRuntime& runtime, const Surface* color, u32 colo
} else {
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
color ? color->Handle() : 0, color_level);
if (depth_stencil) {
if (has_stencil) {
if (depth) {
if (depth->pixel_format == PixelFormat::D24S8) {
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
GL_TEXTURE_2D, depth_stencil->Handle(), depth_level);
GL_TEXTURE_2D, depth->Handle(), depth_level);
} else {
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
depth_stencil->Handle(), depth_level);
depth->Handle(), depth_level);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
0);
}

View file

@ -27,46 +27,6 @@ struct FormatTuple {
}
};
struct HostTextureTag {
u32 width;
u32 height;
u32 levels;
u32 res_scale;
FormatTuple tuple;
VideoCore::TextureType type;
bool is_custom;
bool has_normal;
bool operator==(const HostTextureTag& other) const noexcept {
return std::tie(tuple, type, width, height, levels, res_scale, is_custom, has_normal) ==
std::tie(other.tuple, other.type, other.width, other.height, other.levels,
other.res_scale, other.is_custom, other.has_normal);
}
struct Hash {
const u64 operator()(const HostTextureTag& tag) const {
return Common::ComputeHash64(&tag, sizeof(HostTextureTag));
}
};
};
static_assert(std::has_unique_object_representations_v<HostTextureTag>,
"HostTextureTag is not suitable for hashing!");
struct Allocation {
std::array<OGLTexture, 3> textures;
std::array<GLuint, 3> handles;
FormatTuple tuple;
u32 width;
u32 height;
u32 levels;
u32 res_scale;
bool is_custom;
operator bool() const noexcept {
return textures[0].handle;
}
};
class Surface;
class Driver;
@ -82,8 +42,8 @@ public:
explicit TextureRuntime(const Driver& driver, VideoCore::RendererBase& renderer);
~TextureRuntime();
/// Clears all cached runtime resources
void Reset();
/// Returns the removal threshold ticks for the garbage collector
u32 RemoveThreshold();
/// Returns true if the provided pixel format cannot be used natively by the runtime.
bool NeedsConversion(VideoCore::PixelFormat pixel_format) const;
@ -111,13 +71,6 @@ public:
void GenerateMipmaps(Surface& surface);
private:
/// Takes back ownership of the allocation for recycling
void Recycle(const HostTextureTag tag, Allocation&& alloc);
/// Allocates a texture with the specified dimentions and format
Allocation Allocate(const VideoCore::SurfaceParams& params,
const VideoCore::Material* material = nullptr);
/// Returns the OpenGL driver class
const Driver& GetDriver() const {
return driver;
@ -127,8 +80,6 @@ private:
const Driver& driver;
BlitHelper blit_helper;
std::vector<u8> staging_buffer;
std::unordered_multimap<HostTextureTag, Allocation, HostTextureTag::Hash> alloc_cache;
std::unordered_map<u64, OGLFramebuffer, Common::IdentityHash<u64>> framebuffer_cache;
std::array<OGLFramebuffer, 3> draw_fbos;
std::array<OGLFramebuffer, 3> read_fbos;
};
@ -136,6 +87,8 @@ private:
class Surface : public VideoCore::SurfaceBase {
public:
explicit Surface(TextureRuntime& runtime, const VideoCore::SurfaceParams& params);
explicit Surface(TextureRuntime& runtime, const VideoCore::SurfaceBase& surface,
const VideoCore::Material* material);
~Surface();
Surface(const Surface&) = delete;
@ -144,13 +97,15 @@ public:
Surface(Surface&& o) noexcept = default;
Surface& operator=(Surface&& o) noexcept = default;
[[nodiscard]] GLuint Handle(u32 index = 1) const noexcept {
return alloc.handles[index];
[[nodiscard]] const FormatTuple& Tuple() const noexcept {
return tuple;
}
[[nodiscard]] const FormatTuple& Tuple() const noexcept {
return alloc.tuple;
}
/// Returns the texture handle at index, otherwise the first one if not valid.
GLuint Handle(u32 index = 1) const noexcept;
/// Returns a copy of the upscaled texture handle, used for feedback loops.
GLuint CopyHandle() noexcept;
/// Uploads pixel data in staging to a rectangle region of the surface texture
void Upload(const VideoCore::BufferTextureCopy& upload, const VideoCore::StagingData& staging);
@ -165,8 +120,8 @@ public:
/// Attaches a handle of surface to the specified framebuffer target
void Attach(GLenum target, u32 level, u32 layer, bool scaled = true);
/// Swaps the internal allocation to match the provided material
bool Swap(const VideoCore::Material* material);
/// Scales up the surface to match the new resolution scale.
void ScaleUp(u32 new_scale);
/// Returns the bpp of the internal surface format
u32 GetInternalBytesPerPixel() const;
@ -179,24 +134,32 @@ private:
bool DownloadWithoutFbo(const VideoCore::BufferTextureCopy& download,
const VideoCore::StagingData& staging);
/// Returns the texture tag of the current allocation
HostTextureTag MakeTag() const noexcept;
private:
const Driver* driver;
TextureRuntime* runtime;
Allocation alloc{};
std::array<OGLTexture, 3> textures;
OGLTexture copy_texture;
FormatTuple tuple;
};
class Framebuffer : public VideoCore::FramebufferBase {
class Framebuffer : public VideoCore::FramebufferParams {
public:
explicit Framebuffer(TextureRuntime& runtime, const Surface* color, u32 color_level,
const Surface* depth_stencil, u32 depth_level, const Pica::Regs& regs,
Common::Rectangle<u32> surfaces_rect);
explicit Framebuffer(TextureRuntime& runtime, const VideoCore::FramebufferParams& params,
const Surface* color, const Surface* depth_stencil);
~Framebuffer();
Framebuffer(const Framebuffer&) = delete;
Framebuffer& operator=(const Framebuffer&) = delete;
Framebuffer(Framebuffer&& o) noexcept = default;
Framebuffer& operator=(Framebuffer&& o) noexcept = default;
[[nodiscard]] u32 Scale() const noexcept {
return res_scale;
}
[[nodiscard]] GLuint Handle() const noexcept {
return handle;
return framebuffer.handle;
}
[[nodiscard]] GLuint Attachment(VideoCore::SurfaceType type) const noexcept {
@ -208,8 +171,9 @@ public:
}
private:
u32 res_scale{1};
std::array<GLuint, 2> attachments{};
GLuint handle{};
OGLFramebuffer framebuffer;
};
class Sampler {

View file

@ -2,20 +2,18 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <queue>
#include "common/logging/log.h"
#include "common/microprofile.h"
#include "common/settings.h"
#include "core/core.h"
#include "core/dumping/backend.h"
#include "core/frontend/emu_window.h"
#include "core/frontend/framebuffer_layout.h"
#include "core/hw/hw.h"
#include "core/hw/lcd.h"
#include "core/memory.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_texture_mailbox.h"
#include "video_core/renderer_opengl/gl_vars.h"
#include "video_core/renderer_opengl/post_processing_opengl.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
@ -31,232 +29,6 @@ namespace OpenGL {
MICROPROFILE_DEFINE(OpenGL_RenderFrame, "OpenGL", "Render Frame", MP_RGB(128, 128, 64));
MICROPROFILE_DEFINE(OpenGL_WaitPresent, "OpenGL", "Wait For Present", MP_RGB(128, 128, 128));
// If the size of this is too small, it ends up creating a soft cap on FPS as the renderer will have
// to wait on available presentation frames. There doesn't seem to be much of a downside to a larger
// number but 9 swap textures at 60FPS presentation allows for 800% speed so thats probably fine
#ifdef ANDROID
// Reduce the size of swap_chain, since the UI only allows upto 200% speed.
constexpr std::size_t SWAP_CHAIN_SIZE = 6;
#else
constexpr std::size_t SWAP_CHAIN_SIZE = 9;
#endif
class OGLTextureMailboxException : public std::runtime_error {
public:
using std::runtime_error::runtime_error;
};
class OGLTextureMailbox : public Frontend::TextureMailbox {
public:
std::mutex swap_chain_lock;
std::condition_variable free_cv;
std::condition_variable present_cv;
std::array<Frontend::Frame, SWAP_CHAIN_SIZE> swap_chain{};
std::queue<Frontend::Frame*> free_queue{};
std::deque<Frontend::Frame*> present_queue{};
Frontend::Frame* previous_frame = nullptr;
OGLTextureMailbox(bool has_debug_tool_ = false) : has_debug_tool{has_debug_tool_} {
for (auto& frame : swap_chain) {
free_queue.push(&frame);
}
}
~OGLTextureMailbox() override {
// lock the mutex and clear out the present and free_queues and notify any people who are
// blocked to prevent deadlock on shutdown
std::scoped_lock lock(swap_chain_lock);
std::queue<Frontend::Frame*>().swap(free_queue);
present_queue.clear();
present_cv.notify_all();
free_cv.notify_all();
}
void ReloadPresentFrame(Frontend::Frame* frame, u32 height, u32 width) override {
frame->present.Release();
frame->present.Create();
GLint previous_draw_fbo{};
glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &previous_draw_fbo);
glBindFramebuffer(GL_FRAMEBUFFER, frame->present.handle);
glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
frame->color.handle);
if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
LOG_CRITICAL(Render_OpenGL, "Failed to recreate present FBO!");
}
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, previous_draw_fbo);
frame->color_reloaded = false;
}
void ReloadRenderFrame(Frontend::Frame* frame, u32 width, u32 height) override {
OpenGLState prev_state = OpenGLState::GetCurState();
OpenGLState state = OpenGLState::GetCurState();
// Recreate the color texture attachment
frame->color.Release();
frame->color.Create();
state.renderbuffer = frame->color.handle;
state.Apply();
glRenderbufferStorage(GL_RENDERBUFFER, GL_RGBA8, width, height);
// Recreate the FBO for the render target
frame->render.Release();
frame->render.Create();
state.draw.read_framebuffer = frame->render.handle;
state.draw.draw_framebuffer = frame->render.handle;
state.Apply();
glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
frame->color.handle);
if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!");
}
prev_state.Apply();
frame->width = width;
frame->height = height;
frame->color_reloaded = true;
}
Frontend::Frame* GetRenderFrame() override {
std::unique_lock<std::mutex> lock(swap_chain_lock);
// If theres no free frames, we will reuse the oldest render frame
if (free_queue.empty()) {
auto frame = present_queue.back();
present_queue.pop_back();
return frame;
}
Frontend::Frame* frame = free_queue.front();
free_queue.pop();
return frame;
}
void ReleaseRenderFrame(Frontend::Frame* frame) override {
std::unique_lock<std::mutex> lock(swap_chain_lock);
present_queue.push_front(frame);
present_cv.notify_one();
DebugNotifyNextFrame();
}
// This is virtual as it is to be overriden in OGLVideoDumpingMailbox below.
virtual void LoadPresentFrame() {
// free the previous frame and add it back to the free queue
if (previous_frame) {
free_queue.push(previous_frame);
free_cv.notify_one();
}
// the newest entries are pushed to the front of the queue
Frontend::Frame* frame = present_queue.front();
present_queue.pop_front();
// remove all old entries from the present queue and move them back to the free_queue
for (auto f : present_queue) {
free_queue.push(f);
}
present_queue.clear();
previous_frame = frame;
}
Frontend::Frame* TryGetPresentFrame(int timeout_ms) override {
DebugWaitForNextFrame();
std::unique_lock<std::mutex> lock(swap_chain_lock);
// wait for new entries in the present_queue
present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms),
[&] { return !present_queue.empty(); });
if (present_queue.empty()) {
// timed out waiting for a frame to draw so return the previous frame
return previous_frame;
}
LoadPresentFrame();
return previous_frame;
}
private:
std::mutex debug_synch_mutex;
std::condition_variable debug_synch_condition;
std::atomic_int frame_for_debug{};
const bool has_debug_tool; // When true, using a GPU debugger, so keep frames in lock-step
/// Signal that a new frame is available (called from GPU thread)
void DebugNotifyNextFrame() {
if (!has_debug_tool) {
return;
}
frame_for_debug++;
std::lock_guard lock{debug_synch_mutex};
debug_synch_condition.notify_one();
}
/// Wait for a new frame to be available (called from presentation thread)
void DebugWaitForNextFrame() {
if (!has_debug_tool) {
return;
}
const int last_frame = frame_for_debug;
std::unique_lock lock{debug_synch_mutex};
debug_synch_condition.wait(lock,
[this, last_frame] { return frame_for_debug > last_frame; });
}
};
/// This mailbox is different in that it will never discard rendered frames
class OGLVideoDumpingMailbox : public OGLTextureMailbox {
public:
bool quit = false;
Frontend::Frame* GetRenderFrame() override {
std::unique_lock<std::mutex> lock(swap_chain_lock);
// If theres no free frames, we will wait until one shows up
if (free_queue.empty()) {
free_cv.wait(lock, [&] { return (!free_queue.empty() || quit); });
if (quit) {
throw OGLTextureMailboxException("VideoDumpingMailbox quitting");
}
if (free_queue.empty()) {
LOG_CRITICAL(Render_OpenGL, "Could not get free frame");
return nullptr;
}
}
Frontend::Frame* frame = free_queue.front();
free_queue.pop();
return frame;
}
void LoadPresentFrame() override {
// free the previous frame and add it back to the free queue
if (previous_frame) {
free_queue.push(previous_frame);
free_cv.notify_one();
}
Frontend::Frame* frame = present_queue.back();
present_queue.pop_back();
previous_frame = frame;
// Do not remove entries from the present_queue, as video dumping would require
// that we preserve all frames
}
Frontend::Frame* TryGetPresentFrame(int timeout_ms) override {
std::unique_lock<std::mutex> lock(swap_chain_lock);
// wait for new entries in the present_queue
present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms),
[&] { return !present_queue.empty(); });
if (present_queue.empty()) {
// timed out waiting for a frame
return nullptr;
}
LoadPresentFrame();
return previous_frame;
}
};
/**
* Vertex structure that the drawn screen rectangles are composed of.
*/
@ -559,8 +331,15 @@ void RendererOpenGL::InitOpenGLObjects() {
glClearColor(Settings::values.bg_red.GetValue(), Settings::values.bg_green.GetValue(),
Settings::values.bg_blue.GetValue(), 0.0f);
filter_sampler.Create();
ReloadSampler();
for (size_t i = 0; i < samplers.size(); i++) {
samplers[i].Create();
glSamplerParameteri(samplers[i].handle, GL_TEXTURE_MIN_FILTER,
i == 0 ? GL_NEAREST : GL_LINEAR);
glSamplerParameteri(samplers[i].handle, GL_TEXTURE_MAG_FILTER,
i == 0 ? GL_NEAREST : GL_LINEAR);
glSamplerParameteri(samplers[i].handle, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glSamplerParameteri(samplers[i].handle, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
}
ReloadShader();
@ -608,15 +387,6 @@ void RendererOpenGL::InitOpenGLObjects() {
state.Apply();
}
void RendererOpenGL::ReloadSampler() {
glSamplerParameteri(filter_sampler.handle, GL_TEXTURE_MIN_FILTER,
Settings::values.filter_mode ? GL_LINEAR : GL_NEAREST);
glSamplerParameteri(filter_sampler.handle, GL_TEXTURE_MAG_FILTER,
Settings::values.filter_mode ? GL_LINEAR : GL_NEAREST);
glSamplerParameteri(filter_sampler.handle, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glSamplerParameteri(filter_sampler.handle, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
}
void RendererOpenGL::ReloadShader() {
// Link shaders and get variable locations
std::string shader_data;
@ -793,13 +563,14 @@ void RendererOpenGL::DrawSingleScreen(const ScreenInfo& screen_info, float x, fl
}
const u32 scale_factor = GetResolutionScaleFactor();
const GLuint sampler = samplers[Settings::values.filter_mode.GetValue()].handle;
glUniform4f(uniform_i_resolution, static_cast<float>(screen_info.texture.width * scale_factor),
static_cast<float>(screen_info.texture.height * scale_factor),
1.0f / static_cast<float>(screen_info.texture.width * scale_factor),
1.0f / static_cast<float>(screen_info.texture.height * scale_factor));
glUniform4f(uniform_o_resolution, h, w, 1.0f / h, 1.0f / w);
state.texture_units[0].texture_2d = screen_info.display_texture;
state.texture_units[0].sampler = filter_sampler.handle;
state.texture_units[0].sampler = sampler;
state.Apply();
glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices.data());
@ -862,6 +633,7 @@ void RendererOpenGL::DrawSingleScreenStereo(const ScreenInfo& screen_info_l,
}
const u32 scale_factor = GetResolutionScaleFactor();
const GLuint sampler = samplers[Settings::values.filter_mode.GetValue()].handle;
glUniform4f(uniform_i_resolution,
static_cast<float>(screen_info_l.texture.width * scale_factor),
static_cast<float>(screen_info_l.texture.height * scale_factor),
@ -870,8 +642,8 @@ void RendererOpenGL::DrawSingleScreenStereo(const ScreenInfo& screen_info_l,
glUniform4f(uniform_o_resolution, h, w, 1.0f / h, 1.0f / w);
state.texture_units[0].texture_2d = screen_info_l.display_texture;
state.texture_units[1].texture_2d = screen_info_r.display_texture;
state.texture_units[0].sampler = filter_sampler.handle;
state.texture_units[1].sampler = filter_sampler.handle;
state.texture_units[0].sampler = sampler;
state.texture_units[1].sampler = sampler;
state.Apply();
glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices.data());
@ -894,11 +666,6 @@ void RendererOpenGL::DrawScreens(const Layout::FramebufferLayout& layout, bool f
Settings::values.bg_blue.GetValue(), 0.0f);
}
if (settings.sampler_update_requested.exchange(false)) {
// Set the new filtering mode for the sampler
ReloadSampler();
}
if (settings.shader_update_requested.exchange(false)) {
// Update fragment shader before drawing
shader.Release();
@ -1119,7 +886,7 @@ void RendererOpenGL::TryPresent(int timeout_ms, bool is_secondary) {
void RendererOpenGL::PrepareVideoDumping() {
auto* mailbox = static_cast<OGLVideoDumpingMailbox*>(frame_dumper.mailbox.get());
{
std::unique_lock lock(mailbox->swap_chain_lock);
std::scoped_lock lock{mailbox->swap_chain_lock};
mailbox->quit = false;
}
frame_dumper.StartDumping();
@ -1129,7 +896,7 @@ void RendererOpenGL::CleanupVideoDumping() {
frame_dumper.StopDumping();
auto* mailbox = static_cast<OGLVideoDumpingMailbox*>(frame_dumper.mailbox.get());
{
std::unique_lock lock(mailbox->swap_chain_lock);
std::scoped_lock lock{mailbox->swap_chain_lock};
mailbox->quit = true;
}
mailbox->free_cv.notify_one();

View file

@ -21,20 +21,6 @@ namespace Core {
class System;
}
namespace Frontend {
struct Frame {
u32 width{}; /// Width of the frame (to detect resize)
u32 height{}; /// Height of the frame
bool color_reloaded = false; /// Texture attachment was recreated (ie: resized)
OpenGL::OGLRenderbuffer color{}; /// Buffer shared between the render/present FBO
OpenGL::OGLFramebuffer render{}; /// FBO created on the render thread
OpenGL::OGLFramebuffer present{}; /// FBO created on the present thread
GLsync render_fence{}; /// Fence created on the render thread
GLsync present_fence{}; /// Fence created on the presentation thread
};
} // namespace Frontend
namespace OpenGL {
/// Structure used for storing information about the textures for each 3DS screen
@ -72,7 +58,6 @@ public:
private:
void InitOpenGLObjects();
void ReloadSampler();
void ReloadShader();
void PrepareRendertarget();
void RenderScreenshot();
@ -109,9 +94,9 @@ private:
OGLBuffer vertex_buffer;
OGLProgram shader;
OGLFramebuffer screenshot_framebuffer;
OGLSampler filter_sampler;
std::array<OGLSampler, 2> samplers;
/// Display information for top and bottom screens respectively
// Display information for top and bottom screens respectively
std::array<ScreenInfo, 3> screen_infos;
// Shader uniform location indices