citra/src/video_core/rasterizer_cache/cached_surface.cpp
2022-08-22 08:00:30 +03:00

476 lines
20 KiB
C++

// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/microprofile.h"
#include "common/scope_exit.h"
#include "common/texture.h"
#include "core/core.h"
#include "video_core/rasterizer_cache/cached_surface.h"
#include "video_core/rasterizer_cache/morton_swizzle.h"
#include "video_core/rasterizer_cache/rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/texture_downloader_es.h"
#include "video_core/renderer_opengl/texture_filters/texture_filterer.h"
namespace OpenGL {
static Aspect ToAspect(SurfaceType type) {
switch (type) {
case SurfaceType::Color:
case SurfaceType::Texture:
case SurfaceType::Fill:
return Aspect::Color;
case SurfaceType::Depth:
return Aspect::Depth;
case SurfaceType::DepthStencil:
return Aspect::DepthStencil;
default:
LOG_CRITICAL(Render_OpenGL, "Unknown SurfaceType {}", type);
UNREACHABLE();
}
return Aspect::Color;
}
CachedSurface::~CachedSurface() {
if (texture.handle) {
auto tag = is_custom ? HostTextureTag{GetFormatTuple(PixelFormat::RGBA8),
custom_tex_info.width, custom_tex_info.height}
: HostTextureTag{GetFormatTuple(pixel_format), GetScaledWidth(),
GetScaledHeight()};
owner.host_texture_recycler.emplace(tag, std::move(texture));
}
}
MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64));
void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) {
ASSERT(type != SurfaceType::Fill);
const bool need_swap =
GLES && (pixel_format == PixelFormat::RGBA8 || pixel_format == PixelFormat::RGB8);
const u8* const texture_src_data = VideoCore::g_memory->GetPhysicalPointer(addr);
if (texture_src_data == nullptr)
return;
if (gl_buffer.empty()) {
gl_buffer.resize(width * height * GetBytesPerPixel(pixel_format));
}
// TODO: Should probably be done in ::Memory:: and check for other regions too
if (load_start < Memory::VRAM_VADDR_END && load_end > Memory::VRAM_VADDR_END)
load_end = Memory::VRAM_VADDR_END;
if (load_start < Memory::VRAM_VADDR && load_end > Memory::VRAM_VADDR)
load_start = Memory::VRAM_VADDR;
MICROPROFILE_SCOPE(OpenGL_SurfaceLoad);
ASSERT(load_start >= addr && load_end <= end);
const u32 start_offset = load_start - addr;
if (!is_tiled) {
ASSERT(type == SurfaceType::Color);
if (need_swap) {
// TODO(liushuyu): check if the byteswap here is 100% correct
// cannot fully test this
if (pixel_format == PixelFormat::RGBA8) {
for (std::size_t i = start_offset; i < load_end - addr; i += 4) {
gl_buffer[i] = texture_src_data[i + 3];
gl_buffer[i + 1] = texture_src_data[i + 2];
gl_buffer[i + 2] = texture_src_data[i + 1];
gl_buffer[i + 3] = texture_src_data[i];
}
} else if (pixel_format == PixelFormat::RGB8) {
for (std::size_t i = start_offset; i < load_end - addr; i += 3) {
gl_buffer[i] = texture_src_data[i + 2];
gl_buffer[i + 1] = texture_src_data[i + 1];
gl_buffer[i + 2] = texture_src_data[i];
}
}
} else {
std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset,
load_end - load_start);
}
} else {
if (type == SurfaceType::Texture) {
Pica::Texture::TextureInfo tex_info{};
tex_info.width = width;
tex_info.height = height;
tex_info.format = static_cast<Pica::TexturingRegs::TextureFormat>(pixel_format);
tex_info.SetDefaultStride();
tex_info.physical_address = addr;
const SurfaceInterval load_interval(load_start, load_end);
const auto rect = GetSubRect(FromInterval(load_interval));
ASSERT(FromInterval(load_interval).GetInterval() == load_interval);
for (unsigned y = rect.bottom; y < rect.top; ++y) {
for (unsigned x = rect.left; x < rect.right; ++x) {
auto vec4 =
Pica::Texture::LookupTexture(texture_src_data, x, height - 1 - y, tex_info);
const std::size_t offset = (x + (width * y)) * 4;
std::memcpy(&gl_buffer[offset], vec4.AsArray(), 4);
}
}
} else {
morton_to_gl_fns[static_cast<std::size_t>(pixel_format)](stride, height, &gl_buffer[0],
addr, load_start, load_end);
}
}
}
MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64));
void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) {
u8* const dst_buffer = VideoCore::g_memory->GetPhysicalPointer(addr);
if (dst_buffer == nullptr)
return;
ASSERT(gl_buffer.size() == width * height * GetBytesPerPixel(pixel_format));
// TODO: Should probably be done in ::Memory:: and check for other regions too
// same as loadglbuffer()
if (flush_start < Memory::VRAM_VADDR_END && flush_end > Memory::VRAM_VADDR_END)
flush_end = Memory::VRAM_VADDR_END;
if (flush_start < Memory::VRAM_VADDR && flush_end > Memory::VRAM_VADDR)
flush_start = Memory::VRAM_VADDR;
MICROPROFILE_SCOPE(OpenGL_SurfaceFlush);
ASSERT(flush_start >= addr && flush_end <= end);
const u32 start_offset = flush_start - addr;
const u32 end_offset = flush_end - addr;
if (type == SurfaceType::Fill) {
const u32 coarse_start_offset = start_offset - (start_offset % fill_size);
const u32 backup_bytes = start_offset % fill_size;
std::array<u8, 4> backup_data;
if (backup_bytes)
std::memcpy(&backup_data[0], &dst_buffer[coarse_start_offset], backup_bytes);
for (u32 offset = coarse_start_offset; offset < end_offset; offset += fill_size) {
std::memcpy(&dst_buffer[offset], &fill_data[0],
std::min(fill_size, end_offset - offset));
}
if (backup_bytes)
std::memcpy(&dst_buffer[coarse_start_offset], &backup_data[0], backup_bytes);
} else if (!is_tiled) {
ASSERT(type == SurfaceType::Color);
if (pixel_format == PixelFormat::RGBA8 && GLES) {
for (std::size_t i = start_offset; i < flush_end - addr; i += 4) {
dst_buffer[i] = gl_buffer[i + 3];
dst_buffer[i + 1] = gl_buffer[i + 2];
dst_buffer[i + 2] = gl_buffer[i + 1];
dst_buffer[i + 3] = gl_buffer[i];
}
} else if (pixel_format == PixelFormat::RGB8 && GLES) {
for (std::size_t i = start_offset; i < flush_end - addr; i += 3) {
dst_buffer[i] = gl_buffer[i + 2];
dst_buffer[i + 1] = gl_buffer[i + 1];
dst_buffer[i + 2] = gl_buffer[i];
}
} else {
std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset],
flush_end - flush_start);
}
} else {
gl_to_morton_fns[static_cast<std::size_t>(pixel_format)](stride, height, &gl_buffer[0],
addr, flush_start, flush_end);
}
}
bool CachedSurface::LoadCustomTexture(u64 tex_hash) {
auto& custom_tex_cache = Core::System::GetInstance().CustomTexCache();
const auto& image_interface = Core::System::GetInstance().GetImageInterface();
if (custom_tex_cache.IsTextureCached(tex_hash)) {
custom_tex_info = custom_tex_cache.LookupTexture(tex_hash);
return true;
}
if (!custom_tex_cache.CustomTextureExists(tex_hash)) {
return false;
}
const auto& path_info = custom_tex_cache.LookupTexturePathInfo(tex_hash);
if (!image_interface->DecodePNG(custom_tex_info.tex, custom_tex_info.width,
custom_tex_info.height, path_info.path)) {
LOG_ERROR(Render_OpenGL, "Failed to load custom texture {}", path_info.path);
return false;
}
const std::bitset<32> width_bits(custom_tex_info.width);
const std::bitset<32> height_bits(custom_tex_info.height);
if (width_bits.count() != 1 || height_bits.count() != 1) {
LOG_ERROR(Render_OpenGL, "Texture {} size is not a power of 2", path_info.path);
return false;
}
LOG_DEBUG(Render_OpenGL, "Loaded custom texture from {}", path_info.path);
Common::FlipRGBA8Texture(custom_tex_info.tex, custom_tex_info.width, custom_tex_info.height);
custom_tex_cache.CacheTexture(tex_hash, custom_tex_info.tex, custom_tex_info.width,
custom_tex_info.height);
return true;
}
void CachedSurface::DumpTexture(GLuint target_tex, u64 tex_hash) {
// Make sure the texture size is a power of 2
// If not, the surface is actually a framebuffer
std::bitset<32> width_bits(width);
std::bitset<32> height_bits(height);
if (width_bits.count() != 1 || height_bits.count() != 1) {
LOG_WARNING(Render_OpenGL, "Not dumping {:016X} because size isn't a power of 2 ({}x{})",
tex_hash, width, height);
return;
}
// Dump texture to RGBA8 and encode as PNG
const auto& image_interface = Core::System::GetInstance().GetImageInterface();
auto& custom_tex_cache = Core::System::GetInstance().CustomTexCache();
std::string dump_path =
fmt::format("{}textures/{:016X}/", FileUtil::GetUserPath(FileUtil::UserPath::DumpDir),
Core::System::GetInstance().Kernel().GetCurrentProcess()->codeset->program_id);
if (!FileUtil::CreateFullPath(dump_path)) {
LOG_ERROR(Render, "Unable to create {}", dump_path);
return;
}
dump_path += fmt::format("tex1_{}x{}_{:016X}_{}.png", width, height, tex_hash, pixel_format);
if (!custom_tex_cache.IsTextureDumped(tex_hash) && !FileUtil::Exists(dump_path)) {
custom_tex_cache.SetTextureDumped(tex_hash);
LOG_INFO(Render_OpenGL, "Dumping texture to {}", dump_path);
std::vector<u8> decoded_texture;
decoded_texture.resize(width * height * 4);
OpenGLState state = OpenGLState::GetCurState();
GLuint old_texture = state.texture_units[0].texture_2d;
state.Apply();
/*
GetTexImageOES is used even if not using OpenGL ES to work around a small issue that
happens if using custom textures with texture dumping at the same.
Let's say there's 2 textures that are both 32x32 and one of them gets replaced with a
higher quality 256x256 texture. If the 256x256 texture is displayed first and the
32x32 texture gets uploaded to the same underlying OpenGL texture, the 32x32 texture
will appear in the corner of the 256x256 texture. If texture dumping is enabled and
the 32x32 is undumped, Citra will attempt to dump it. Since the underlying OpenGL
texture is still 256x256, Citra crashes because it thinks the texture is only 32x32.
GetTexImageOES conveniently only dumps the specified region, and works on both
desktop and ES.
*/
owner.texture_downloader_es->GetTexImage(GL_TEXTURE_2D, 0, GL_RGBA, GL_UNSIGNED_BYTE,
height, width, &decoded_texture[0]);
state.texture_units[0].texture_2d = old_texture;
state.Apply();
Common::FlipRGBA8Texture(decoded_texture, width, height);
if (!image_interface->EncodePNG(dump_path, decoded_texture, width, height))
LOG_ERROR(Render_OpenGL, "Failed to save decoded texture");
}
}
MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64));
void CachedSurface::UploadGLTexture(Common::Rectangle<u32> rect) {
if (type == SurfaceType::Fill) {
return;
}
MICROPROFILE_SCOPE(OpenGL_TextureUL);
ASSERT(gl_buffer.size() == width * height * GetBytesPerPixel(pixel_format));
u64 tex_hash = 0;
if (Settings::values.dump_textures || Settings::values.custom_textures) {
tex_hash = Common::ComputeHash64(gl_buffer.data(), gl_buffer.size());
}
if (Settings::values.custom_textures) {
is_custom = LoadCustomTexture(tex_hash);
}
// Load data from memory to the surface
GLint x0 = static_cast<GLint>(rect.left);
GLint y0 = static_cast<GLint>(rect.bottom);
std::size_t buffer_offset = (y0 * stride + x0) * GetBytesPerPixel(pixel_format);
const FormatTuple& tuple = GetFormatTuple(pixel_format);
GLuint target_tex = texture.handle;
// If not 1x scale, create 1x texture that we will blit from to replace texture subrect in
// surface
OGLTexture unscaled_tex;
if (res_scale != 1) {
x0 = 0;
y0 = 0;
if (is_custom) {
const auto& tuple = GetFormatTuple(PixelFormat::RGBA8);
unscaled_tex =
owner.AllocateSurfaceTexture(tuple, custom_tex_info.width, custom_tex_info.height);
} else {
unscaled_tex = owner.AllocateSurfaceTexture(tuple, rect.GetWidth(), rect.GetHeight());
}
target_tex = unscaled_tex.handle;
}
OpenGLState cur_state = OpenGLState::GetCurState();
GLuint old_tex = cur_state.texture_units[0].texture_2d;
cur_state.texture_units[0].texture_2d = target_tex;
cur_state.Apply();
// Ensure no bad interactions with GL_UNPACK_ALIGNMENT
ASSERT(stride * GetBytesPerPixel(pixel_format) % 4 == 0);
if (is_custom) {
if (res_scale == 1) {
texture = owner.AllocateSurfaceTexture(GetFormatTuple(PixelFormat::RGBA8),
custom_tex_info.width, custom_tex_info.height);
cur_state.texture_units[0].texture_2d = texture.handle;
cur_state.Apply();
}
// Always going to be using rgba8
glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(custom_tex_info.width));
glActiveTexture(GL_TEXTURE0);
glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, custom_tex_info.width, custom_tex_info.height,
GL_RGBA, GL_UNSIGNED_BYTE, custom_tex_info.tex.data());
} else {
glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(stride));
glActiveTexture(GL_TEXTURE0);
glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast<GLsizei>(rect.GetWidth()),
static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
&gl_buffer[buffer_offset]);
}
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
if (Settings::values.dump_textures && !is_custom) {
DumpTexture(target_tex, tex_hash);
}
cur_state.texture_units[0].texture_2d = old_tex;
cur_state.Apply();
if (res_scale != 1) {
auto scaled_rect = rect;
scaled_rect.left *= res_scale;
scaled_rect.top *= res_scale;
scaled_rect.right *= res_scale;
scaled_rect.bottom *= res_scale;
const u32 width = is_custom ? custom_tex_info.width : rect.GetWidth();
const u32 height = is_custom ? custom_tex_info.height : rect.GetHeight();
const Common::Rectangle<u32> from_rect{0, height, width, 0};
if (!owner.texture_filterer->Filter(unscaled_tex, from_rect, texture, scaled_rect, type)) {
const Aspect aspect = ToAspect(type);
runtime.BlitTextures(unscaled_tex, {aspect, from_rect}, texture, {aspect, scaled_rect});
}
}
InvalidateAllWatcher();
}
MICROPROFILE_DEFINE(OpenGL_TextureDL, "OpenGL", "Texture Download", MP_RGB(128, 192, 64));
void CachedSurface::DownloadGLTexture(const Common::Rectangle<u32>& rect) {
if (type == SurfaceType::Fill) {
return;
}
MICROPROFILE_SCOPE(OpenGL_TextureDL);
if (gl_buffer.empty()) {
gl_buffer.resize(width * height * GetBytesPerPixel(pixel_format));
}
OpenGLState state = OpenGLState::GetCurState();
OpenGLState prev_state = state;
SCOPE_EXIT({ prev_state.Apply(); });
const FormatTuple& tuple = GetFormatTuple(pixel_format);
// Ensure no bad interactions with GL_PACK_ALIGNMENT
ASSERT(stride * GetBytesPerPixel(pixel_format) % 4 == 0);
glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(stride));
const std::size_t buffer_offset =
(rect.bottom * stride + rect.left) * GetBytesPerPixel(pixel_format);
// If not 1x scale, blit scaled texture to a new 1x texture and use that to flush
const Aspect aspect = ToAspect(type);
if (res_scale != 1) {
auto scaled_rect = rect;
scaled_rect.left *= res_scale;
scaled_rect.top *= res_scale;
scaled_rect.right *= res_scale;
scaled_rect.bottom *= res_scale;
const Common::Rectangle<u32> unscaled_tex_rect{0, rect.GetHeight(), rect.GetWidth(), 0};
auto unscaled_tex = owner.AllocateSurfaceTexture(tuple, rect.GetWidth(), rect.GetHeight());
// Blit scaled texture to the unscaled one
runtime.BlitTextures(texture, {aspect, scaled_rect}, unscaled_tex,
{aspect, unscaled_tex_rect});
state.texture_units[0].texture_2d = unscaled_tex.handle;
state.Apply();
glActiveTexture(GL_TEXTURE0);
if (GLES) {
owner.texture_downloader_es->GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type,
rect.GetHeight(), rect.GetWidth(),
&gl_buffer[buffer_offset]);
} else {
glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, &gl_buffer[buffer_offset]);
}
} else {
runtime.ReadTexture(texture, {aspect, rect}, tuple, gl_buffer.data());
}
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
}
bool CachedSurface::CanFill(const SurfaceParams& dest_surface,
SurfaceInterval fill_interval) const {
if (type == SurfaceType::Fill && IsRegionValid(fill_interval) &&
boost::icl::first(fill_interval) >= addr &&
boost::icl::last_next(fill_interval) <= end && // dest_surface is within our fill range
dest_surface.FromInterval(fill_interval).GetInterval() ==
fill_interval) { // make sure interval is a rectangle in dest surface
if (fill_size * 8 != dest_surface.GetFormatBpp()) {
// Check if bits repeat for our fill_size
const u32 dest_bytes_per_pixel = std::max(dest_surface.GetFormatBpp() / 8, 1u);
std::vector<u8> fill_test(fill_size * dest_bytes_per_pixel);
for (u32 i = 0; i < dest_bytes_per_pixel; ++i)
std::memcpy(&fill_test[i * fill_size], &fill_data[0], fill_size);
for (u32 i = 0; i < fill_size; ++i)
if (std::memcmp(&fill_test[dest_bytes_per_pixel * i], &fill_test[0],
dest_bytes_per_pixel) != 0)
return false;
if (dest_surface.GetFormatBpp() == 4 && (fill_test[0] & 0xF) != (fill_test[0] >> 4))
return false;
}
return true;
}
return false;
}
bool CachedSurface::CanCopy(const SurfaceParams& dest_surface,
SurfaceInterval copy_interval) const {
SurfaceParams subrect_params = dest_surface.FromInterval(copy_interval);
ASSERT(subrect_params.GetInterval() == copy_interval);
if (CanSubRect(subrect_params))
return true;
if (CanFill(dest_surface, copy_interval))
return true;
return false;
}
} // namespace OpenGL