diff --git a/src/citra/config.cpp b/src/citra/config.cpp index e2cc08f6a..b95812964 100644 --- a/src/citra/config.cpp +++ b/src/citra/config.cpp @@ -89,7 +89,7 @@ void Config::ReadValues() { Settings::values.use_hw_renderer = sdl2_config->GetBoolean("Renderer", "use_hw_renderer", true); Settings::values.use_shader_jit = sdl2_config->GetBoolean("Renderer", "use_shader_jit", true); Settings::values.resolution_factor = - (float)sdl2_config->GetReal("Renderer", "resolution_factor", 1.0); + static_cast(sdl2_config->GetInteger("Renderer", "resolution_factor", 1)); Settings::values.use_vsync = sdl2_config->GetBoolean("Renderer", "use_vsync", false); Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true); Settings::values.frame_limit = diff --git a/src/citra_qt/configuration/config.cpp b/src/citra_qt/configuration/config.cpp index c0a6115da..602a19e13 100644 --- a/src/citra_qt/configuration/config.cpp +++ b/src/citra_qt/configuration/config.cpp @@ -75,7 +75,8 @@ void Config::ReadValues() { qt_config->beginGroup("Renderer"); Settings::values.use_hw_renderer = qt_config->value("use_hw_renderer", true).toBool(); Settings::values.use_shader_jit = qt_config->value("use_shader_jit", true).toBool(); - Settings::values.resolution_factor = qt_config->value("resolution_factor", 1.0).toFloat(); + Settings::values.resolution_factor = + static_cast(qt_config->value("resolution_factor", 1).toInt()); Settings::values.use_vsync = qt_config->value("use_vsync", false).toBool(); Settings::values.use_frame_limit = qt_config->value("use_frame_limit", true).toBool(); Settings::values.frame_limit = qt_config->value("frame_limit", 100).toInt(); @@ -240,7 +241,7 @@ void Config::SaveValues() { qt_config->beginGroup("Renderer"); qt_config->setValue("use_hw_renderer", Settings::values.use_hw_renderer); qt_config->setValue("use_shader_jit", Settings::values.use_shader_jit); - qt_config->setValue("resolution_factor", (double)Settings::values.resolution_factor); + qt_config->setValue("resolution_factor", Settings::values.resolution_factor); qt_config->setValue("use_vsync", Settings::values.use_vsync); qt_config->setValue("use_frame_limit", Settings::values.use_frame_limit); qt_config->setValue("frame_limit", Settings::values.frame_limit); diff --git a/src/citra_qt/configuration/configure_graphics.cpp b/src/citra_qt/configuration/configure_graphics.cpp index 935532807..03f62657b 100644 --- a/src/citra_qt/configuration/configure_graphics.cpp +++ b/src/citra_qt/configuration/configure_graphics.cpp @@ -23,81 +23,11 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent) ConfigureGraphics::~ConfigureGraphics() {} -enum class Resolution : int { - Auto, - Scale1x, - Scale2x, - Scale3x, - Scale4x, - Scale5x, - Scale6x, - Scale7x, - Scale8x, - Scale9x, - Scale10x, -}; - -float ToResolutionFactor(Resolution option) { - switch (option) { - case Resolution::Auto: - return 0.f; - case Resolution::Scale1x: - return 1.f; - case Resolution::Scale2x: - return 2.f; - case Resolution::Scale3x: - return 3.f; - case Resolution::Scale4x: - return 4.f; - case Resolution::Scale5x: - return 5.f; - case Resolution::Scale6x: - return 6.f; - case Resolution::Scale7x: - return 7.f; - case Resolution::Scale8x: - return 8.f; - case Resolution::Scale9x: - return 9.f; - case Resolution::Scale10x: - return 10.f; - } - return 0.f; -} - -Resolution FromResolutionFactor(float factor) { - if (factor == 0.f) { - return Resolution::Auto; - } else if (factor == 1.f) { - return Resolution::Scale1x; - } else if (factor == 2.f) { - return Resolution::Scale2x; - } else if (factor == 3.f) { - return Resolution::Scale3x; - } else if (factor == 4.f) { - return Resolution::Scale4x; - } else if (factor == 5.f) { - return Resolution::Scale5x; - } else if (factor == 6.f) { - return Resolution::Scale6x; - } else if (factor == 7.f) { - return Resolution::Scale7x; - } else if (factor == 8.f) { - return Resolution::Scale8x; - } else if (factor == 9.f) { - return Resolution::Scale9x; - } else if (factor == 10.f) { - return Resolution::Scale10x; - } - return Resolution::Auto; -} - void ConfigureGraphics::setConfiguration() { ui->toggle_hw_renderer->setChecked(Settings::values.use_hw_renderer); ui->resolution_factor_combobox->setEnabled(Settings::values.use_hw_renderer); ui->toggle_shader_jit->setChecked(Settings::values.use_shader_jit); - ui->resolution_factor_combobox->setCurrentIndex( - static_cast(FromResolutionFactor(Settings::values.resolution_factor))); + ui->resolution_factor_combobox->setCurrentIndex(Settings::values.resolution_factor); ui->toggle_vsync->setChecked(Settings::values.use_vsync); ui->toggle_frame_limit->setChecked(Settings::values.use_frame_limit); ui->frame_limit->setValue(Settings::values.frame_limit); @@ -109,7 +39,7 @@ void ConfigureGraphics::applyConfiguration() { Settings::values.use_hw_renderer = ui->toggle_hw_renderer->isChecked(); Settings::values.use_shader_jit = ui->toggle_shader_jit->isChecked(); Settings::values.resolution_factor = - ToResolutionFactor(static_cast(ui->resolution_factor_combobox->currentIndex())); + static_cast(ui->resolution_factor_combobox->currentIndex()); Settings::values.use_vsync = ui->toggle_vsync->isChecked(); Settings::values.use_frame_limit = ui->toggle_frame_limit->isChecked(); Settings::values.frame_limit = ui->frame_limit->value(); diff --git a/src/common/math_util.h b/src/common/math_util.h index 45a1ed367..fa1d61dac 100644 --- a/src/common/math_util.h +++ b/src/common/math_util.h @@ -29,7 +29,7 @@ struct Rectangle { T right; T bottom; - Rectangle() {} + Rectangle() = default; Rectangle(T left, T top, T right, T bottom) : left(left), top(top), right(right), bottom(bottom) {} diff --git a/src/core/frontend/framebuffer_layout.cpp b/src/core/frontend/framebuffer_layout.cpp index e9f778fcb..7af9556b1 100644 --- a/src/core/frontend/framebuffer_layout.cpp +++ b/src/core/frontend/framebuffer_layout.cpp @@ -16,8 +16,8 @@ static const float TOP_SCREEN_ASPECT_RATIO = static const float BOT_SCREEN_ASPECT_RATIO = static_cast(Core::kScreenBottomHeight) / Core::kScreenBottomWidth; -float FramebufferLayout::GetScalingRatio() const { - return static_cast(top_screen.GetWidth()) / Core::kScreenTopWidth; +u16 FramebufferLayout::GetScalingRatio() const { + return static_cast(((top_screen.GetWidth() - 1) / Core::kScreenTopWidth) + 1); } // Finds the largest size subrectangle contained in window area that is confined to the aspect ratio diff --git a/src/core/frontend/framebuffer_layout.h b/src/core/frontend/framebuffer_layout.h index 4983cf103..0d826be9e 100644 --- a/src/core/frontend/framebuffer_layout.h +++ b/src/core/frontend/framebuffer_layout.h @@ -21,7 +21,7 @@ struct FramebufferLayout { * Returns the ration of pixel size of the top screen, compared to the native size of the 3DS * screen. */ - float GetScalingRatio() const; + u16 GetScalingRatio() const; }; /** diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp index 62b6938d4..c886d4f61 100644 --- a/src/core/hle/kernel/vm_manager.cpp +++ b/src/core/hle/kernel/vm_manager.cpp @@ -58,7 +58,6 @@ void VMManager::Reset() { page_table.pointers.fill(nullptr); page_table.attributes.fill(Memory::PageType::Unmapped); - page_table.cached_res_count.fill(0); UpdatePageTableForVMA(initial_vma); } diff --git a/src/core/hle/service/gsp/gsp_gpu.cpp b/src/core/hle/service/gsp/gsp_gpu.cpp index 19d0e439f..bafe03b5d 100644 --- a/src/core/hle/service/gsp/gsp_gpu.cpp +++ b/src/core/hle/service/gsp/gsp_gpu.cpp @@ -465,7 +465,7 @@ static void ExecuteCommand(const Command& command, u32 thread_id) { command.dma_request.size, Memory::FlushMode::Flush); Memory::RasterizerFlushVirtualRegion(command.dma_request.dest_address, command.dma_request.size, - Memory::FlushMode::FlushAndInvalidate); + Memory::FlushMode::Invalidate); // TODO(Subv): These memory accesses should not go through the application's memory mapping. // They should go through the GSP module's memory mapping. diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index 350b9a6a2..8ba679ea7 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp @@ -96,20 +96,11 @@ static void MemoryFill(const Regs::MemoryFillConfig& config) { u8* start = Memory::GetPhysicalPointer(start_addr); u8* end = Memory::GetPhysicalPointer(end_addr); - // TODO: Consider always accelerating and returning vector of - // regions that the accelerated fill did not cover to - // reduce/eliminate the fill that the cpu has to do. - // This would also mean that the flush below is not needed. - // Fill should first flush all surfaces that touch but are - // not completely within the fill range. - // Then fill all completely covered surfaces, and return the - // regions that were between surfaces or within the touching - // ones for cpu to manually fill here. if (VideoCore::g_renderer->Rasterizer()->AccelerateFill(config)) return; - Memory::RasterizerFlushAndInvalidateRegion(config.GetStartAddress(), - config.GetEndAddress() - config.GetStartAddress()); + Memory::RasterizerInvalidateRegion(config.GetStartAddress(), + config.GetEndAddress() - config.GetStartAddress()); if (config.fill_24bit) { // fill with 24-bit values @@ -199,7 +190,7 @@ static void DisplayTransfer(const Regs::DisplayTransferConfig& config) { u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format); Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), input_size); - Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), output_size); + Memory::RasterizerInvalidateRegion(config.GetPhysicalOutputAddress(), output_size); for (u32 y = 0; y < output_height; ++y) { for (u32 x = 0; x < output_width; ++x) { @@ -363,8 +354,10 @@ static void TextureCopy(const Regs::DisplayTransferConfig& config) { size_t contiguous_output_size = config.texture_copy.size / output_width * (output_width + output_gap); - Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), - static_cast(contiguous_output_size)); + // Only need to flush output if it has a gap + const auto FlushInvalidate_fn = (output_gap != 0) ? Memory::RasterizerFlushAndInvalidateRegion + : Memory::RasterizerInvalidateRegion; + FlushInvalidate_fn(config.GetPhysicalOutputAddress(), static_cast(contiguous_output_size)); u32 remaining_input = input_width; u32 remaining_output = output_width; @@ -570,4 +563,4 @@ void Shutdown() { LOG_DEBUG(HW_GPU, "shutdown OK"); } -} // namespace +} // namespace GPU diff --git a/src/core/memory.cpp b/src/core/memory.cpp index d420fe35c..7ef368878 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -50,7 +50,6 @@ static void MapPages(PageTable& page_table, u32 base, u32 size, u8* memory, Page page_table.attributes[base] = type; page_table.pointers[base] = memory; - page_table.cached_res_count[base] = 0; base += 1; if (memory != nullptr) @@ -200,7 +199,7 @@ void Write(const VAddr vaddr, const T data) { ASSERT_MSG(false, "Mapped memory page without a pointer @ %08X", vaddr); break; case PageType::RasterizerCachedMemory: { - RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::FlushAndInvalidate); + RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Invalidate); std::memcpy(GetPointerFromVMA(vaddr), &data, sizeof(T)); break; } @@ -208,7 +207,7 @@ void Write(const VAddr vaddr, const T data) { WriteMMIO(GetMMIOHandler(vaddr), vaddr, data); break; case PageType::RasterizerCachedSpecial: { - RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::FlushAndInvalidate); + RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Invalidate); WriteMMIO(GetMMIOHandler(vaddr), vaddr, data); break; } @@ -334,7 +333,7 @@ u8* GetPhysicalPointer(PAddr address) { return target_pointer; } -void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) { +void RasterizerMarkRegionCached(PAddr start, u32 size, bool cached) { if (start == 0) { return; } @@ -355,14 +354,10 @@ void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) { } VAddr vaddr = *maybe_vaddr; - u8& res_count = current_page_table->cached_res_count[vaddr >> PAGE_BITS]; - ASSERT_MSG(count_delta <= UINT8_MAX - res_count, - "Rasterizer resource cache counter overflow!"); - ASSERT_MSG(count_delta >= -res_count, "Rasterizer resource cache counter underflow!"); + PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS]; - // Switch page type to cached if now cached - if (res_count == 0) { - PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS]; + if (cached) { + // Switch page type to cached if now cached switch (page_type) { case PageType::Unmapped: // It is not necessary for a process to have this region mapped into its address @@ -378,13 +373,8 @@ void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) { default: UNREACHABLE(); } - } - - res_count += count_delta; - - // Switch page type to uncached if now uncached - if (res_count == 0) { - PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS]; + } else { + // Switch page type to uncached if now uncached switch (page_type) { case PageType::Unmapped: // It is not necessary for a process to have this region mapped into its address @@ -414,52 +404,69 @@ void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) { } void RasterizerFlushRegion(PAddr start, u32 size) { - if (VideoCore::g_renderer != nullptr) { - VideoCore::g_renderer->Rasterizer()->FlushRegion(start, size); + if (VideoCore::g_renderer == nullptr) { + return; } + + VideoCore::g_renderer->Rasterizer()->FlushRegion(start, size); +} + +void RasterizerInvalidateRegion(PAddr start, u32 size) { + if (VideoCore::g_renderer == nullptr) { + return; + } + + VideoCore::g_renderer->Rasterizer()->InvalidateRegion(start, size); } void RasterizerFlushAndInvalidateRegion(PAddr start, u32 size) { // Since pages are unmapped on shutdown after video core is shutdown, the renderer may be // null here - if (VideoCore::g_renderer != nullptr) { - VideoCore::g_renderer->Rasterizer()->FlushAndInvalidateRegion(start, size); + if (VideoCore::g_renderer == nullptr) { + return; } + + VideoCore::g_renderer->Rasterizer()->FlushAndInvalidateRegion(start, size); } void RasterizerFlushVirtualRegion(VAddr start, u32 size, FlushMode mode) { // Since pages are unmapped on shutdown after video core is shutdown, the renderer may be // null here - if (VideoCore::g_renderer != nullptr) { - VAddr end = start + size; - - auto CheckRegion = [&](VAddr region_start, VAddr region_end) { - if (start >= region_end || end <= region_start) { - // No overlap with region - return; - } - - VAddr overlap_start = std::max(start, region_start); - VAddr overlap_end = std::min(end, region_end); - - PAddr physical_start = TryVirtualToPhysicalAddress(overlap_start).value(); - u32 overlap_size = overlap_end - overlap_start; - - auto* rasterizer = VideoCore::g_renderer->Rasterizer(); - switch (mode) { - case FlushMode::Flush: - rasterizer->FlushRegion(physical_start, overlap_size); - break; - case FlushMode::FlushAndInvalidate: - rasterizer->FlushAndInvalidateRegion(physical_start, overlap_size); - break; - } - }; - - CheckRegion(LINEAR_HEAP_VADDR, LINEAR_HEAP_VADDR_END); - CheckRegion(NEW_LINEAR_HEAP_VADDR, NEW_LINEAR_HEAP_VADDR_END); - CheckRegion(VRAM_VADDR, VRAM_VADDR_END); + if (VideoCore::g_renderer == nullptr) { + return; } + + VAddr end = start + size; + + auto CheckRegion = [&](VAddr region_start, VAddr region_end) { + if (start >= region_end || end <= region_start) { + // No overlap with region + return; + } + + VAddr overlap_start = std::max(start, region_start); + VAddr overlap_end = std::min(end, region_end); + + PAddr physical_start = TryVirtualToPhysicalAddress(overlap_start).value(); + u32 overlap_size = overlap_end - overlap_start; + + auto* rasterizer = VideoCore::g_renderer->Rasterizer(); + switch (mode) { + case FlushMode::Flush: + rasterizer->FlushRegion(physical_start, overlap_size); + break; + case FlushMode::Invalidate: + rasterizer->InvalidateRegion(physical_start, overlap_size); + break; + case FlushMode::FlushAndInvalidate: + rasterizer->FlushAndInvalidateRegion(physical_start, overlap_size); + break; + } + }; + + CheckRegion(LINEAR_HEAP_VADDR, LINEAR_HEAP_VADDR_END); + CheckRegion(NEW_LINEAR_HEAP_VADDR, NEW_LINEAR_HEAP_VADDR_END); + CheckRegion(VRAM_VADDR, VRAM_VADDR_END); } u8 Read8(const VAddr addr) { @@ -588,7 +595,7 @@ void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const voi } case PageType::RasterizerCachedMemory: { RasterizerFlushVirtualRegion(current_vaddr, static_cast(copy_amount), - FlushMode::FlushAndInvalidate); + FlushMode::Invalidate); std::memcpy(GetPointerFromVMA(process, current_vaddr), src_buffer, copy_amount); break; } @@ -596,7 +603,7 @@ void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const voi MMIORegionPointer handler = GetMMIOHandler(page_table, current_vaddr); DEBUG_ASSERT(handler); RasterizerFlushVirtualRegion(current_vaddr, static_cast(copy_amount), - FlushMode::FlushAndInvalidate); + FlushMode::Invalidate); handler->WriteBlock(current_vaddr, src_buffer, copy_amount); break; } @@ -648,7 +655,7 @@ void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const size } case PageType::RasterizerCachedMemory: { RasterizerFlushVirtualRegion(current_vaddr, static_cast(copy_amount), - FlushMode::FlushAndInvalidate); + FlushMode::Invalidate); std::memset(GetPointerFromVMA(process, current_vaddr), 0, copy_amount); break; } @@ -656,7 +663,7 @@ void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const size MMIORegionPointer handler = GetMMIOHandler(page_table, current_vaddr); DEBUG_ASSERT(handler); RasterizerFlushVirtualRegion(current_vaddr, static_cast(copy_amount), - FlushMode::FlushAndInvalidate); + FlushMode::Invalidate); handler->WriteBlock(current_vaddr, zeros.data(), copy_amount); break; } diff --git a/src/core/memory.h b/src/core/memory.h index 1527ec4c7..4ec5e8d60 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -72,12 +72,6 @@ struct PageTable { * the corresponding entry in `pointers` MUST be set to null. */ std::array attributes; - - /** - * Indicates the number of externally cached resources touching a page that should be - * flushed before the memory is accessed - */ - std::array cached_res_count; }; /// Physical memory regions as seen from the ARM11 @@ -245,16 +239,20 @@ boost::optional PhysicalToVirtualAddress(PAddr addr); u8* GetPhysicalPointer(PAddr address); /** - * Adds the supplied value to the rasterizer resource cache counter of each - * page touching the region. + * Mark each page touching the region as cached. */ -void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta); +void RasterizerMarkRegionCached(PAddr start, u32 size, bool cached); /** * Flushes any externally cached rasterizer resources touching the given region. */ void RasterizerFlushRegion(PAddr start, u32 size); +/** + * Invalidates any externally cached rasterizer resources touching the given region. + */ +void RasterizerInvalidateRegion(PAddr start, u32 size); + /** * Flushes and invalidates any externally cached rasterizer resources touching the given region. */ @@ -263,6 +261,8 @@ void RasterizerFlushAndInvalidateRegion(PAddr start, u32 size); enum class FlushMode { /// Write back modified surfaces to RAM Flush, + /// Remove region from the cache + Invalidate, /// Write back modified surfaces to RAM, and also remove them from the cache FlushAndInvalidate, }; diff --git a/src/core/settings.h b/src/core/settings.h index c0896b940..1b639cf46 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -95,7 +95,7 @@ struct Values { // Renderer bool use_hw_renderer; bool use_shader_jit; - float resolution_factor; + u16 resolution_factor; bool use_vsync; bool use_frame_limit; u16 frame_limit; diff --git a/src/tests/core/arm/arm_test_common.cpp b/src/tests/core/arm/arm_test_common.cpp index 484713a92..8520f53b2 100644 --- a/src/tests/core/arm/arm_test_common.cpp +++ b/src/tests/core/arm/arm_test_common.cpp @@ -20,7 +20,6 @@ TestEnvironment::TestEnvironment(bool mutable_memory_) page_table->pointers.fill(nullptr); page_table->attributes.fill(Memory::PageType::Unmapped); - page_table->cached_res_count.fill(0); Memory::MapIoRegion(*page_table, 0x00000000, 0x80000000, test_memory); Memory::MapIoRegion(*page_table, 0x80000000, 0x80000000, test_memory); diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 8ef7e74c7..1d4c98189 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -38,6 +38,9 @@ public: /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory virtual void FlushRegion(PAddr addr, u32 size) = 0; + /// Notify rasterizer that any caches of the specified region should be invalidated + virtual void InvalidateRegion(PAddr addr, u32 size) = 0; + /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory /// and invalidated virtual void FlushAndInvalidateRegion(PAddr addr, u32 size) = 0; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 599e6e2af..8d2d1698a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -7,8 +7,8 @@ #include #include #include +#include "common/alignment.h" #include "common/assert.h" -#include "common/color.h" #include "common/logging/log.h" #include "common/math_util.h" #include "common/microprofile.h" @@ -23,6 +23,9 @@ #include "video_core/renderer_opengl/pica_to_gl.h" #include "video_core/renderer_opengl/renderer_opengl.h" +using PixelFormat = SurfaceParams::PixelFormat; +using SurfaceType = SurfaceParams::SurfaceType; + MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); @@ -227,21 +230,68 @@ void RasterizerOpenGL::DrawTriangles() { MICROPROFILE_SCOPE(OpenGL_Drawing); const auto& regs = Pica::g_state.regs; - // Sync and bind the framebuffer surfaces - CachedSurface* color_surface; - CachedSurface* depth_surface; - MathUtil::Rectangle rect; - std::tie(color_surface, depth_surface, rect) = - res_cache.GetFramebufferSurfaces(regs.framebuffer.framebuffer); + const bool has_stencil = + regs.framebuffer.framebuffer.depth_format == Pica::FramebufferRegs::DepthFormat::D24S8; + const bool write_color_fb = + state.color_mask.red_enabled == GL_TRUE || state.color_mask.green_enabled == GL_TRUE || + state.color_mask.blue_enabled == GL_TRUE || state.color_mask.alpha_enabled == GL_TRUE; + + const bool write_depth_fb = + (state.depth.test_enabled && state.depth.write_mask == GL_TRUE) || + (has_stencil && state.stencil.test_enabled && state.stencil.write_mask != 0); + + const bool using_color_fb = + regs.framebuffer.framebuffer.GetColorBufferPhysicalAddress() != 0 && write_color_fb; + const bool using_depth_fb = + regs.framebuffer.framebuffer.GetDepthBufferPhysicalAddress() != 0 && + (write_depth_fb || regs.framebuffer.output_merger.depth_test_enable != 0 || + (has_stencil && state.stencil.test_enabled)); + + MathUtil::Rectangle viewport_rect_unscaled{ + // These registers hold half-width and half-height, so must be multiplied by 2 + regs.rasterizer.viewport_corner.x, // left + regs.rasterizer.viewport_corner.y + // top + static_cast(Pica::float24::FromRaw(regs.rasterizer.viewport_size_y).ToFloat32() * + 2), + regs.rasterizer.viewport_corner.x + // right + static_cast(Pica::float24::FromRaw(regs.rasterizer.viewport_size_x).ToFloat32() * + 2), + regs.rasterizer.viewport_corner.y // bottom + }; + + Surface color_surface; + Surface depth_surface; + MathUtil::Rectangle surfaces_rect; + std::tie(color_surface, depth_surface, surfaces_rect) = + res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect_unscaled); + + const u16 res_scale = color_surface != nullptr + ? color_surface->res_scale + : (depth_surface == nullptr ? 1u : depth_surface->res_scale); + + MathUtil::Rectangle draw_rect{ + static_cast(MathUtil::Clamp(static_cast(surfaces_rect.left) + + viewport_rect_unscaled.left * res_scale, + surfaces_rect.left, surfaces_rect.right)), // Left + static_cast(MathUtil::Clamp(static_cast(surfaces_rect.bottom) + + viewport_rect_unscaled.top * res_scale, + surfaces_rect.bottom, surfaces_rect.top)), // Top + static_cast(MathUtil::Clamp(static_cast(surfaces_rect.left) + + viewport_rect_unscaled.right * res_scale, + surfaces_rect.left, surfaces_rect.right)), // Right + static_cast(MathUtil::Clamp(static_cast(surfaces_rect.bottom) + + viewport_rect_unscaled.bottom * res_scale, + surfaces_rect.bottom, surfaces_rect.top))}; // Bottom + + // Bind the framebuffer surfaces state.draw.draw_framebuffer = framebuffer.handle; state.Apply(); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, color_surface != nullptr ? color_surface->texture.handle : 0, 0); if (depth_surface != nullptr) { - if (regs.framebuffer.framebuffer.depth_format == - Pica::FramebufferRegs::DepthFormat::D24S8) { + if (has_stencil) { // attach both depth and stencil glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, depth_surface->texture.handle, 0); @@ -259,38 +309,30 @@ void RasterizerOpenGL::DrawTriangles() { } // Sync the viewport - // These registers hold half-width and half-height, so must be multiplied by 2 - GLsizei viewport_width = - (GLsizei)Pica::float24::FromRaw(regs.rasterizer.viewport_size_x).ToFloat32() * 2; - GLsizei viewport_height = - (GLsizei)Pica::float24::FromRaw(regs.rasterizer.viewport_size_y).ToFloat32() * 2; + state.viewport.x = + static_cast(surfaces_rect.left) + viewport_rect_unscaled.left * res_scale; + state.viewport.y = + static_cast(surfaces_rect.bottom) + viewport_rect_unscaled.bottom * res_scale; + state.viewport.width = static_cast(viewport_rect_unscaled.GetWidth() * res_scale); + state.viewport.height = static_cast(viewport_rect_unscaled.GetHeight() * res_scale); - glViewport( - (GLint)(rect.left + regs.rasterizer.viewport_corner.x * color_surface->res_scale_width), - (GLint)(rect.bottom + regs.rasterizer.viewport_corner.y * color_surface->res_scale_height), - (GLsizei)(viewport_width * color_surface->res_scale_width), - (GLsizei)(viewport_height * color_surface->res_scale_height)); - - if (uniform_block_data.data.framebuffer_scale[0] != color_surface->res_scale_width || - uniform_block_data.data.framebuffer_scale[1] != color_surface->res_scale_height) { - - uniform_block_data.data.framebuffer_scale[0] = color_surface->res_scale_width; - uniform_block_data.data.framebuffer_scale[1] = color_surface->res_scale_height; + if (uniform_block_data.data.framebuffer_scale != res_scale) { + uniform_block_data.data.framebuffer_scale = res_scale; uniform_block_data.dirty = true; } // Scissor checks are window-, not viewport-relative, which means that if the cached texture // sub-rect changes, the scissor bounds also need to be updated. - GLint scissor_x1 = static_cast( - rect.left + regs.rasterizer.scissor_test.x1 * color_surface->res_scale_width); - GLint scissor_y1 = static_cast( - rect.bottom + regs.rasterizer.scissor_test.y1 * color_surface->res_scale_height); + GLint scissor_x1 = + static_cast(surfaces_rect.left + regs.rasterizer.scissor_test.x1 * res_scale); + GLint scissor_y1 = + static_cast(surfaces_rect.bottom + regs.rasterizer.scissor_test.y1 * res_scale); // x2, y2 have +1 added to cover the entire pixel area, otherwise you might get cracks when // scaling or doing multisampling. - GLint scissor_x2 = static_cast( - rect.left + (regs.rasterizer.scissor_test.x2 + 1) * color_surface->res_scale_width); - GLint scissor_y2 = static_cast( - rect.bottom + (regs.rasterizer.scissor_test.y2 + 1) * color_surface->res_scale_height); + GLint scissor_x2 = + static_cast(surfaces_rect.left + (regs.rasterizer.scissor_test.x2 + 1) * res_scale); + GLint scissor_y2 = static_cast(surfaces_rect.bottom + + (regs.rasterizer.scissor_test.y2 + 1) * res_scale); if (uniform_block_data.data.scissor_x1 != scissor_x1 || uniform_block_data.data.scissor_x2 != scissor_x2 || @@ -311,7 +353,7 @@ void RasterizerOpenGL::DrawTriangles() { if (texture.enabled) { texture_samplers[texture_index].SyncWithConfig(texture.config); - CachedSurface* surface = res_cache.GetTextureSurface(texture); + Surface surface = res_cache.GetTextureSurface(texture); if (surface != nullptr) { state.texture_units[texture_index].texture_2d = surface->texture.handle; } else { @@ -380,6 +422,15 @@ void RasterizerOpenGL::DrawTriangles() { uniform_block_data.dirty = false; } + // Viewport can have negative offsets or larger + // dimensions than our framebuffer sub-rect. + // Enable scissor test to prevent drawing + // outside of the framebuffer region + state.scissor.enabled = true; + state.scissor.x = draw_rect.left; + state.scissor.y = draw_rect.bottom; + state.scissor.width = draw_rect.GetWidth(); + state.scissor.height = draw_rect.GetHeight(); state.Apply(); // Draw the vertex batch @@ -387,16 +438,8 @@ void RasterizerOpenGL::DrawTriangles() { GL_STREAM_DRAW); glDrawArrays(GL_TRIANGLES, 0, (GLsizei)vertex_batch.size()); - // Mark framebuffer surfaces as dirty - // TODO: Restrict invalidation area to the viewport - if (color_surface != nullptr) { - color_surface->dirty = true; - res_cache.FlushRegion(color_surface->addr, color_surface->size, color_surface, true); - } - if (depth_surface != nullptr) { - depth_surface->dirty = true; - res_cache.FlushRegion(depth_surface->addr, depth_surface->size, depth_surface, true); - } + // Disable scissor test + state.scissor.enabled = false; vertex_batch.clear(); @@ -405,6 +448,22 @@ void RasterizerOpenGL::DrawTriangles() { state.texture_units[texture_index].texture_2d = 0; } state.Apply(); + + // Mark framebuffer surfaces as dirty + MathUtil::Rectangle draw_rect_unscaled{ + draw_rect.left / res_scale, draw_rect.top / res_scale, draw_rect.right / res_scale, + draw_rect.bottom / res_scale}; + + if (color_surface != nullptr && write_color_fb) { + auto interval = color_surface->GetSubRectInterval(draw_rect_unscaled); + res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval), + color_surface); + } + if (depth_surface != nullptr && write_depth_fb) { + auto interval = depth_surface->GetSubRectInterval(draw_rect_unscaled); + res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval), + depth_surface); + } } void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { @@ -893,227 +952,164 @@ void RasterizerOpenGL::FlushAll() { void RasterizerOpenGL::FlushRegion(PAddr addr, u32 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); - res_cache.FlushRegion(addr, size, nullptr, false); + res_cache.FlushRegion(addr, size); +} + +void RasterizerOpenGL::InvalidateRegion(PAddr addr, u32 size) { + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + res_cache.InvalidateRegion(addr, size, nullptr); } void RasterizerOpenGL::FlushAndInvalidateRegion(PAddr addr, u32 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); - res_cache.FlushRegion(addr, size, nullptr, true); + res_cache.FlushRegion(addr, size); + res_cache.InvalidateRegion(addr, size, nullptr); } bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { MICROPROFILE_SCOPE(OpenGL_Blits); - CachedSurface src_params; + SurfaceParams src_params; src_params.addr = config.GetPhysicalInputAddress(); - // It's important to use the correct source input width to properly skip over parts of the input - // image which will be cropped from the output but still affect the stride of the input image. - src_params.width = config.input_width; - // Using the output's height is fine because we don't read or skip over the remaining part of - // the image, and it allows for smaller texture cache lookup rectangles. + src_params.width = config.output_width; + src_params.stride = config.input_width; src_params.height = config.output_height; src_params.is_tiled = !config.input_linear; - src_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.input_format); + src_params.pixel_format = SurfaceParams::PixelFormatFromGPUPixelFormat(config.input_format); + src_params.UpdateParams(); - CachedSurface dst_params; + SurfaceParams dst_params; dst_params.addr = config.GetPhysicalOutputAddress(); - dst_params.width = - config.scaling != config.NoScale ? config.output_width / 2 : config.output_width.Value(); - dst_params.height = - config.scaling == config.ScaleXY ? config.output_height / 2 : config.output_height.Value(); + dst_params.width = config.scaling != config.NoScale ? config.output_width.Value() / 2 + : config.output_width.Value(); + dst_params.height = config.scaling == config.ScaleXY ? config.output_height.Value() / 2 + : config.output_height.Value(); dst_params.is_tiled = config.input_linear != config.dont_swizzle; - dst_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.output_format); + dst_params.pixel_format = SurfaceParams::PixelFormatFromGPUPixelFormat(config.output_format); + dst_params.UpdateParams(); - MathUtil::Rectangle src_rect; - CachedSurface* src_surface = res_cache.GetSurfaceRect(src_params, false, true, src_rect); - - if (src_surface == nullptr) { + MathUtil::Rectangle src_rect; + Surface src_surface; + std::tie(src_surface, src_rect) = + res_cache.GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true); + if (src_surface == nullptr) return false; - } - // Adjust the source rectangle to take into account parts of the input lines being cropped - if (config.input_width > config.output_width) { - src_rect.right -= static_cast((config.input_width - config.output_width) * - src_surface->res_scale_width); - } + dst_params.res_scale = src_surface->res_scale; - // Require destination surface to have same resolution scale as source to preserve scaling - dst_params.res_scale_width = src_surface->res_scale_width; - dst_params.res_scale_height = src_surface->res_scale_height; - - MathUtil::Rectangle dst_rect; - CachedSurface* dst_surface = res_cache.GetSurfaceRect(dst_params, true, false, dst_rect); - - if (dst_surface == nullptr) { + MathUtil::Rectangle dst_rect; + Surface dst_surface; + std::tie(dst_surface, dst_rect) = + res_cache.GetSurfaceSubRect(dst_params, ScaleMatch::Upscale, false); + if (dst_surface == nullptr) return false; - } - // Don't accelerate if the src and dst surfaces are the same - if (src_surface == dst_surface) { + if (src_surface->is_tiled != dst_surface->is_tiled) + std::swap(src_rect.top, src_rect.bottom); + + if (config.flip_vertically) + std::swap(src_rect.top, src_rect.bottom); + + if (!res_cache.BlitSurfaces(src_surface, src_rect, dst_surface, dst_rect)) return false; - } - if (config.flip_vertically) { - std::swap(dst_rect.top, dst_rect.bottom); - } - - if (!res_cache.TryBlitSurfaces(src_surface, src_rect, dst_surface, dst_rect)) { - return false; - } - - u32 dst_size = dst_params.width * dst_params.height * - CachedSurface::GetFormatBpp(dst_params.pixel_format) / 8; - dst_surface->dirty = true; - res_cache.FlushRegion(config.GetPhysicalOutputAddress(), dst_size, dst_surface, true); + res_cache.InvalidateRegion(dst_params.addr, dst_params.size, dst_surface); return true; } bool RasterizerOpenGL::AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) { - // TODO(tfarley): Try to hardware accelerate this - return false; -} - -bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { - MICROPROFILE_SCOPE(OpenGL_Blits); - using PixelFormat = CachedSurface::PixelFormat; - using SurfaceType = CachedSurface::SurfaceType; - - CachedSurface* dst_surface = res_cache.TryGetFillSurface(config); - - if (dst_surface == nullptr) { + u32 copy_size = Common::AlignDown(config.texture_copy.size, 16); + if (copy_size == 0) { return false; } - OpenGLState cur_state = OpenGLState::GetCurState(); - - SurfaceType dst_type = CachedSurface::GetFormatType(dst_surface->pixel_format); - - GLuint old_fb = cur_state.draw.draw_framebuffer; - cur_state.draw.draw_framebuffer = framebuffer.handle; - // TODO: When scissor test is implemented, need to disable scissor test in cur_state here so - // Clear call isn't affected - cur_state.Apply(); - - if (dst_type == SurfaceType::Color || dst_type == SurfaceType::Texture) { - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, - dst_surface->texture.handle, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, - 0); - - GLfloat color_values[4] = {0.0f, 0.0f, 0.0f, 0.0f}; - - // TODO: Handle additional pixel format and fill value size combinations to accelerate more - // cases - // For instance, checking if fill value's bytes/bits repeat to allow filling - // I8/A8/I4/A4/... - // Currently only handles formats that are multiples of the fill value size - - if (config.fill_24bit) { - switch (dst_surface->pixel_format) { - case PixelFormat::RGB8: - color_values[0] = config.value_24bit_r / 255.0f; - color_values[1] = config.value_24bit_g / 255.0f; - color_values[2] = config.value_24bit_b / 255.0f; - break; - default: - return false; - } - } else if (config.fill_32bit) { - u32 value = config.value_32bit; - - switch (dst_surface->pixel_format) { - case PixelFormat::RGBA8: - color_values[0] = (value >> 24) / 255.0f; - color_values[1] = ((value >> 16) & 0xFF) / 255.0f; - color_values[2] = ((value >> 8) & 0xFF) / 255.0f; - color_values[3] = (value & 0xFF) / 255.0f; - break; - default: - return false; - } - } else { - u16 value_16bit = config.value_16bit.Value(); - Math::Vec4 color; - - switch (dst_surface->pixel_format) { - case PixelFormat::RGBA8: - color_values[0] = (value_16bit >> 8) / 255.0f; - color_values[1] = (value_16bit & 0xFF) / 255.0f; - color_values[2] = color_values[0]; - color_values[3] = color_values[1]; - break; - case PixelFormat::RGB5A1: - color = Color::DecodeRGB5A1((const u8*)&value_16bit); - color_values[0] = color[0] / 31.0f; - color_values[1] = color[1] / 31.0f; - color_values[2] = color[2] / 31.0f; - color_values[3] = color[3]; - break; - case PixelFormat::RGB565: - color = Color::DecodeRGB565((const u8*)&value_16bit); - color_values[0] = color[0] / 31.0f; - color_values[1] = color[1] / 63.0f; - color_values[2] = color[2] / 31.0f; - break; - case PixelFormat::RGBA4: - color = Color::DecodeRGBA4((const u8*)&value_16bit); - color_values[0] = color[0] / 15.0f; - color_values[1] = color[1] / 15.0f; - color_values[2] = color[2] / 15.0f; - color_values[3] = color[3] / 15.0f; - break; - case PixelFormat::IA8: - case PixelFormat::RG8: - color_values[0] = (value_16bit >> 8) / 255.0f; - color_values[1] = (value_16bit & 0xFF) / 255.0f; - break; - default: - return false; - } - } - - cur_state.color_mask.red_enabled = GL_TRUE; - cur_state.color_mask.green_enabled = GL_TRUE; - cur_state.color_mask.blue_enabled = GL_TRUE; - cur_state.color_mask.alpha_enabled = GL_TRUE; - cur_state.Apply(); - glClearBufferfv(GL_COLOR, 0, color_values); - } else if (dst_type == SurfaceType::Depth) { - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, - dst_surface->texture.handle, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - - GLfloat value_float; - if (dst_surface->pixel_format == CachedSurface::PixelFormat::D16) { - value_float = config.value_32bit / 65535.0f; // 2^16 - 1 - } else if (dst_surface->pixel_format == CachedSurface::PixelFormat::D24) { - value_float = config.value_32bit / 16777215.0f; // 2^24 - 1 - } - - cur_state.depth.write_mask = GL_TRUE; - cur_state.Apply(); - glClearBufferfv(GL_DEPTH, 0, &value_float); - } else if (dst_type == SurfaceType::DepthStencil) { - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - dst_surface->texture.handle, 0); - - GLfloat value_float = (config.value_32bit & 0xFFFFFF) / 16777215.0f; // 2^24 - 1 - GLint value_int = (config.value_32bit >> 24); - - cur_state.depth.write_mask = GL_TRUE; - cur_state.stencil.write_mask = 0xFF; - cur_state.Apply(); - glClearBufferfi(GL_DEPTH_STENCIL, 0, value_float, value_int); + u32 input_gap = config.texture_copy.input_gap * 16; + u32 input_width = config.texture_copy.input_width * 16; + if (input_width == 0 && input_gap != 0) { + return false; + } + if (input_gap == 0 || input_width >= copy_size) { + input_width = copy_size; + input_gap = 0; + } + if (copy_size % input_width != 0) { + return false; } - cur_state.draw.draw_framebuffer = old_fb; - // TODO: Return scissor test to previous value when scissor test is implemented - cur_state.Apply(); + u32 output_gap = config.texture_copy.output_gap * 16; + u32 output_width = config.texture_copy.output_width * 16; + if (output_width == 0 && output_gap != 0) { + return false; + } + if (output_gap == 0 || output_width >= copy_size) { + output_width = copy_size; + output_gap = 0; + } + if (copy_size % output_width != 0) { + return false; + } - dst_surface->dirty = true; - res_cache.FlushRegion(dst_surface->addr, dst_surface->size, dst_surface, true); + SurfaceParams src_params; + src_params.addr = config.GetPhysicalInputAddress(); + src_params.stride = input_width + input_gap; // stride in bytes + src_params.width = input_width; // width in bytes + src_params.height = copy_size / input_width; + src_params.size = ((src_params.height - 1) * src_params.stride) + src_params.width; + src_params.end = src_params.addr + src_params.size; + + MathUtil::Rectangle src_rect; + Surface src_surface; + std::tie(src_surface, src_rect) = res_cache.GetTexCopySurface(src_params); + if (src_surface == nullptr) { + return false; + } + + if (output_gap != 0 && + (output_width != + src_surface->BytesInPixels(src_rect.GetWidth() / src_surface->res_scale) * + (src_surface->is_tiled ? 8 : 1) || + output_gap % src_surface->BytesInPixels(src_surface->is_tiled ? 64 : 1) != 0)) { + return false; + } + + SurfaceParams dst_params = *src_surface; + dst_params.addr = config.GetPhysicalOutputAddress(); + dst_params.width = src_rect.GetWidth() / src_surface->res_scale; + dst_params.stride = + dst_params.width + + src_surface->PixelsInBytes(src_surface->is_tiled ? output_gap / 8 : output_gap); + dst_params.height = src_rect.GetHeight() / src_surface->res_scale; + dst_params.res_scale = src_surface->res_scale; + dst_params.UpdateParams(); + + // Since we are going to invalidate the gap if there is one, we will have to load it first + const bool load_gap = output_gap != 0; + MathUtil::Rectangle dst_rect; + Surface dst_surface; + std::tie(dst_surface, dst_rect) = + res_cache.GetSurfaceSubRect(dst_params, ScaleMatch::Upscale, load_gap); + if (src_surface == nullptr) { + return false; + } + + if (dst_surface->type == SurfaceType::Texture) { + return false; + } + + if (!res_cache.BlitSurfaces(src_surface, src_rect, dst_surface, dst_rect)) { + return false; + } + + res_cache.InvalidateRegion(dst_params.addr, dst_params.size, dst_surface); + return true; +} + +bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { + Surface dst_surface = res_cache.GetFillSurface(config); + if (dst_surface == nullptr) + return false; + + res_cache.InvalidateRegion(dst_surface->addr, dst_surface->size, dst_surface); return true; } @@ -1125,16 +1121,19 @@ bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& con } MICROPROFILE_SCOPE(OpenGL_CacheManagement); - CachedSurface src_params; + SurfaceParams src_params; src_params.addr = framebuffer_addr; - src_params.width = config.width; + src_params.width = std::min(config.width.Value(), pixel_stride); src_params.height = config.height; - src_params.pixel_stride = pixel_stride; + src_params.stride = pixel_stride; src_params.is_tiled = false; - src_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.color_format); + src_params.pixel_format = SurfaceParams::PixelFormatFromGPUPixelFormat(config.color_format); + src_params.UpdateParams(); - MathUtil::Rectangle src_rect; - CachedSurface* src_surface = res_cache.GetSurfaceRect(src_params, false, true, src_rect); + MathUtil::Rectangle src_rect; + Surface src_surface; + std::tie(src_surface, src_rect) = + res_cache.GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true); if (src_surface == nullptr) { return false; @@ -1144,8 +1143,8 @@ bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& con u32 scaled_height = src_surface->GetScaledHeight(); screen_info.display_texcoords = MathUtil::Rectangle( - (float)src_rect.top / (float)scaled_height, (float)src_rect.left / (float)scaled_width, - (float)src_rect.bottom / (float)scaled_height, (float)src_rect.right / (float)scaled_width); + (float)src_rect.bottom / (float)scaled_height, (float)src_rect.left / (float)scaled_width, + (float)src_rect.top / (float)scaled_height, (float)src_rect.right / (float)scaled_width); screen_info.display_texture = src_surface->texture.handle; @@ -1158,8 +1157,8 @@ void RasterizerOpenGL::SamplerInfo::Create() { wrap_s = wrap_t = TextureConfig::Repeat; border_color = 0; - glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, - GL_LINEAR); // default is GL_LINEAR_MIPMAP_LINEAR + // default is GL_LINEAR_MIPMAP_LINEAR + glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); // Other attributes have correct defaults } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 46c62961c..18808b1e4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -43,6 +43,7 @@ public: void NotifyPicaRegisterChanged(u32 id) override; void FlushAll() override; void FlushRegion(PAddr addr, u32 size) override; + void InvalidateRegion(PAddr addr, u32 size) override; void FlushAndInvalidateRegion(PAddr addr, u32 size) override; bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) override; bool AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) override; @@ -135,7 +136,7 @@ private: // the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. // Not following that rule will cause problems on some AMD drivers. struct UniformData { - alignas(8) GLvec2 framebuffer_scale; + GLint framebuffer_scale; GLint alphatest_ref; GLfloat depth_scale; GLfloat depth_offset; @@ -155,7 +156,7 @@ private: }; static_assert( - sizeof(UniformData) == 0x470, + sizeof(UniformData) == 0x460, "The size of the UniformData structure has changed, update the structure in the shader"); static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec"); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index f37894e7a..aef06873d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -6,14 +6,20 @@ #include #include #include +#include #include #include #include +#include +#include #include +#include "common/alignment.h" #include "common/bit_field.h" +#include "common/color.h" #include "common/logging/log.h" #include "common/math_util.h" #include "common/microprofile.h" +#include "common/scope_exit.h" #include "common/vector_math.h" #include "core/frontend/emu_window.h" #include "core/memory.h" @@ -25,13 +31,16 @@ #include "video_core/utils.h" #include "video_core/video_core.h" +using SurfaceType = SurfaceParams::SurfaceType; +using PixelFormat = SurfaceParams::PixelFormat; + struct FormatTuple { GLint internal_format; GLenum format; GLenum type; }; -static const std::array fb_format_tuples = {{ +static constexpr std::array fb_format_tuples = {{ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8}, // RGBA8 {GL_RGB8, GL_BGR, GL_UNSIGNED_BYTE}, // RGB8 {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1}, // RGB5A1 @@ -39,89 +48,204 @@ static const std::array fb_format_tuples = {{ {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4}, // RGBA4 }}; -static const std::array depth_format_tuples = {{ +static constexpr std::array depth_format_tuples = {{ {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16 {}, {GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT}, // D24 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24S8 }}; -RasterizerCacheOpenGL::RasterizerCacheOpenGL() { - transfer_framebuffers[0].Create(); - transfer_framebuffers[1].Create(); -} +static constexpr FormatTuple tex_tuple = {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}; -RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { - FlushAll(); -} - -static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width, u32 height, - u32 bytes_per_pixel, u32 gl_bytes_per_pixel, u8* morton_data, - u8* gl_data, bool morton_to_gl) { - using PixelFormat = CachedSurface::PixelFormat; - - u8* data_ptrs[2]; - u32 depth_stencil_shifts[2] = {24, 8}; - - if (morton_to_gl) { - std::swap(depth_stencil_shifts[0], depth_stencil_shifts[1]); +static const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { + const SurfaceType type = SurfaceParams::GetFormatType(pixel_format); + if (type == SurfaceType::Color) { + ASSERT(static_cast(pixel_format) < fb_format_tuples.size()); + return fb_format_tuples[static_cast(pixel_format)]; + } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) { + size_t tuple_idx = static_cast(pixel_format) - 14; + ASSERT(tuple_idx < depth_format_tuples.size()); + return depth_format_tuples[tuple_idx]; } + return tex_tuple; +} - if (pixel_format == PixelFormat::D24S8) { - for (unsigned y = 0; y < height; ++y) { - for (unsigned x = 0; x < width; ++x) { - const u32 coarse_y = y & ~7; - u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + - coarse_y * width * bytes_per_pixel; - u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel; +template +constexpr auto RangeFromInterval(Map& map, const Interval& interval) { + return boost::make_iterator_range(map.equal_range(interval)); +} - data_ptrs[morton_to_gl] = morton_data + morton_offset; - data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index]; +static u16 GetResolutionScaleFactor() { + return !Settings::values.resolution_factor + ? VideoCore::g_emu_window->GetFramebufferLayout().GetScalingRatio() + : Settings::values.resolution_factor; +} - // Swap depth and stencil value ordering since 3DS does not match OpenGL - u32 depth_stencil; - memcpy(&depth_stencil, data_ptrs[1], sizeof(u32)); - depth_stencil = (depth_stencil << depth_stencil_shifts[0]) | - (depth_stencil >> depth_stencil_shifts[1]); - - memcpy(data_ptrs[0], &depth_stencil, sizeof(u32)); - } - } - } else { - for (unsigned y = 0; y < height; ++y) { - for (unsigned x = 0; x < width; ++x) { - const u32 coarse_y = y & ~7; - u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + - coarse_y * width * bytes_per_pixel; - u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel; - - data_ptrs[morton_to_gl] = morton_data + morton_offset; - data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index]; - - memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel); +template +static void MortonCopyTile(u32 stride, u8* tile_buffer, u8* gl_buffer) { + constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8; + constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); + for (u32 y = 0; y < 8; ++y) { + for (u32 x = 0; x < 8; ++x) { + u8* tile_ptr = tile_buffer + VideoCore::MortonInterleave(x, y) * bytes_per_pixel; + u8* gl_ptr = gl_buffer + ((7 - y) * stride + x) * gl_bytes_per_pixel; + if (morton_to_gl) { + if (format == PixelFormat::D24S8) { + gl_ptr[0] = tile_ptr[3]; + std::memcpy(gl_ptr + 1, tile_ptr, 3); + } else { + std::memcpy(gl_ptr, tile_ptr, bytes_per_pixel); + } + } else { + if (format == PixelFormat::D24S8) { + std::memcpy(tile_ptr, gl_ptr + 1, 3); + tile_ptr[3] = gl_ptr[0]; + } else { + std::memcpy(tile_ptr, gl_ptr, bytes_per_pixel); + } } } } } -void RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, - CachedSurface::SurfaceType type, - const MathUtil::Rectangle& src_rect, - const MathUtil::Rectangle& dst_rect) { - using SurfaceType = CachedSurface::SurfaceType; +template +static void MortonCopy(u32 stride, u32 height, u8* gl_buffer, PAddr base, PAddr start, PAddr end) { + constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8; + constexpr u32 tile_size = bytes_per_pixel * 64; + constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); + static_assert(gl_bytes_per_pixel >= bytes_per_pixel, ""); + gl_buffer += gl_bytes_per_pixel - bytes_per_pixel; + + const PAddr aligned_down_start = base + Common::AlignDown(start - base, tile_size); + const PAddr aligned_start = base + Common::AlignUp(start - base, tile_size); + const PAddr aligned_end = base + Common::AlignDown(end - base, tile_size); + + ASSERT(!morton_to_gl || (aligned_start == start && aligned_end == end)); + + const u32 begin_pixel_index = (aligned_down_start - base) / bytes_per_pixel; + u32 x = (begin_pixel_index % (stride * 8)) / 8; + u32 y = (begin_pixel_index / (stride * 8)) * 8; + + gl_buffer += ((height - 8 - y) * stride + x) * gl_bytes_per_pixel; + + auto glbuf_next_tile = [&] { + x = (x + 8) % stride; + gl_buffer += 8 * gl_bytes_per_pixel; + if (!x) { + y += 8; + gl_buffer -= stride * 9 * gl_bytes_per_pixel; + } + }; + + u8* tile_buffer = Memory::GetPhysicalPointer(start); + + if (start < aligned_start && !morton_to_gl) { + std::array tmp_buf; + MortonCopyTile(stride, &tmp_buf[0], gl_buffer); + std::memcpy(tile_buffer, &tmp_buf[start - aligned_down_start], + std::min(aligned_start, end) - start); + + tile_buffer += aligned_start - start; + glbuf_next_tile(); + } + + const u8* const buffer_end = tile_buffer + aligned_end - aligned_start; + while (tile_buffer < buffer_end) { + MortonCopyTile(stride, tile_buffer, gl_buffer); + tile_buffer += tile_size; + glbuf_next_tile(); + } + + if (end > std::max(aligned_start, aligned_end) && !morton_to_gl) { + std::array tmp_buf; + MortonCopyTile(stride, &tmp_buf[0], gl_buffer); + std::memcpy(tile_buffer, &tmp_buf[0], end - aligned_end); + } +} + +static constexpr std::array morton_to_gl_fns = { + MortonCopy, // 0 + MortonCopy, // 1 + MortonCopy, // 2 + MortonCopy, // 3 + MortonCopy, // 4 + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, // 5 - 13 + MortonCopy, // 14 + nullptr, // 15 + MortonCopy, // 16 + MortonCopy // 17 +}; + +static constexpr std::array gl_to_morton_fns = { + MortonCopy, // 0 + MortonCopy, // 1 + MortonCopy, // 2 + MortonCopy, // 3 + MortonCopy, // 4 + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, // 5 - 13 + MortonCopy, // 14 + nullptr, // 15 + MortonCopy, // 16 + MortonCopy // 17 +}; + +// Allocate an uninitialized texture of appropriate size and format for the surface +static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tuple, u32 width, + u32 height) { OpenGLState cur_state = OpenGLState::GetCurState(); + // Keep track of previous texture bindings + GLuint old_tex = cur_state.texture_units[0].texture_2d; + cur_state.texture_units[0].texture_2d = texture; + cur_state.Apply(); + glActiveTexture(GL_TEXTURE0); + + glTexImage2D(GL_TEXTURE_2D, 0, format_tuple.internal_format, width, height, 0, + format_tuple.format, format_tuple.type, nullptr); + + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + + // Restore previous texture bindings + cur_state.texture_units[0].texture_2d = old_tex; + cur_state.Apply(); +} + +static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle& src_rect, GLuint dst_tex, + const MathUtil::Rectangle& dst_rect, SurfaceType type, + GLuint read_fb_handle, GLuint draw_fb_handle) { + OpenGLState state = OpenGLState::GetCurState(); + + OpenGLState prev_state = state; + SCOPE_EXIT({ prev_state.Apply(); }); + // Make sure textures aren't bound to texture units, since going to bind them to framebuffer // components - OpenGLState::ResetTexture(src_tex); - OpenGLState::ResetTexture(dst_tex); + state.ResetTexture(src_tex); + state.ResetTexture(dst_tex); - // Keep track of previous framebuffer bindings - GLuint old_fbs[2] = {cur_state.draw.read_framebuffer, cur_state.draw.draw_framebuffer}; - cur_state.draw.read_framebuffer = transfer_framebuffers[0].handle; - cur_state.draw.draw_framebuffer = transfer_framebuffers[1].handle; - cur_state.Apply(); + state.draw.read_framebuffer = read_fb_handle; + state.draw.draw_framebuffer = draw_fb_handle; + state.Apply(); u32 buffers = 0; @@ -159,641 +283,1324 @@ void RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; } - glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left, - dst_rect.top, dst_rect.right, dst_rect.bottom, buffers, + glBlitFramebuffer(src_rect.left, src_rect.bottom, src_rect.right, src_rect.top, dst_rect.left, + dst_rect.bottom, dst_rect.right, dst_rect.top, buffers, buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST); - // Restore previous framebuffer bindings - cur_state.draw.read_framebuffer = old_fbs[0]; - cur_state.draw.draw_framebuffer = old_fbs[1]; - cur_state.Apply(); -} - -bool RasterizerCacheOpenGL::TryBlitSurfaces(CachedSurface* src_surface, - const MathUtil::Rectangle& src_rect, - CachedSurface* dst_surface, - const MathUtil::Rectangle& dst_rect) { - - if (!CachedSurface::CheckFormatsBlittable(src_surface->pixel_format, - dst_surface->pixel_format)) { - return false; - } - - BlitTextures(src_surface->texture.handle, dst_surface->texture.handle, - CachedSurface::GetFormatType(src_surface->pixel_format), src_rect, dst_rect); return true; } -static void AllocateSurfaceTexture(GLuint texture, CachedSurface::PixelFormat pixel_format, - u32 width, u32 height) { - // Allocate an uninitialized texture of appropriate size and format for the surface - using SurfaceType = CachedSurface::SurfaceType; +static bool FillSurface(const Surface& surface, const u8* fill_data, + const MathUtil::Rectangle& fill_rect, GLuint draw_fb_handle) { + OpenGLState state = OpenGLState::GetCurState(); + + OpenGLState prev_state = state; + SCOPE_EXIT({ prev_state.Apply(); }); + + state.ResetTexture(surface->texture.handle); + + state.scissor.enabled = true; + state.scissor.x = static_cast(fill_rect.left); + state.scissor.y = static_cast(fill_rect.bottom); + state.scissor.width = static_cast(fill_rect.GetWidth()); + state.scissor.height = static_cast(fill_rect.GetHeight()); + + state.draw.draw_framebuffer = draw_fb_handle; + state.Apply(); + + if (surface->type == SurfaceType::Color || surface->type == SurfaceType::Texture) { + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + surface->texture.handle, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + + Pica::Texture::TextureInfo tex_info{}; + tex_info.format = static_cast(surface->pixel_format); + Math::Vec4 color = Pica::Texture::LookupTexture(fill_data, 0, 0, tex_info); + + std::array color_values = {color.x / 255.f, color.y / 255.f, color.z / 255.f, + color.w / 255.f}; + + state.color_mask.red_enabled = GL_TRUE; + state.color_mask.green_enabled = GL_TRUE; + state.color_mask.blue_enabled = GL_TRUE; + state.color_mask.alpha_enabled = GL_TRUE; + state.Apply(); + glClearBufferfv(GL_COLOR, 0, &color_values[0]); + } else if (surface->type == SurfaceType::Depth) { + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, + surface->texture.handle, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + + u32 value_32bit = 0; + GLfloat value_float; + + if (surface->pixel_format == SurfaceParams::PixelFormat::D16) { + std::memcpy(&value_32bit, fill_data, 2); + value_float = value_32bit / 65535.0f; // 2^16 - 1 + } else if (surface->pixel_format == SurfaceParams::PixelFormat::D24) { + std::memcpy(&value_32bit, fill_data, 3); + value_float = value_32bit / 16777215.0f; // 2^24 - 1 + } + + state.depth.write_mask = GL_TRUE; + state.Apply(); + glClearBufferfv(GL_DEPTH, 0, &value_float); + } else if (surface->type == SurfaceType::DepthStencil) { + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + surface->texture.handle, 0); + + u32 value_32bit; + std::memcpy(&value_32bit, fill_data, sizeof(u32)); + + GLfloat value_float = (value_32bit & 0xFFFFFF) / 16777215.0f; // 2^24 - 1 + GLint value_int = (value_32bit >> 24); + + state.depth.write_mask = GL_TRUE; + state.stencil.write_mask = -1; + state.Apply(); + glClearBufferfi(GL_DEPTH_STENCIL, 0, value_float, value_int); + } + return true; +} + +SurfaceParams SurfaceParams::FromInterval(SurfaceInterval interval) const { + SurfaceParams params = *this; + const u32 tiled_size = is_tiled ? 8 : 1; + const u32 stride_tiled_bytes = BytesInPixels(stride * tiled_size); + PAddr aligned_start = + addr + Common::AlignDown(boost::icl::first(interval) - addr, stride_tiled_bytes); + PAddr aligned_end = + addr + Common::AlignUp(boost::icl::last_next(interval) - addr, stride_tiled_bytes); + + if (aligned_end - aligned_start > stride_tiled_bytes) { + params.addr = aligned_start; + params.height = (aligned_end - aligned_start) / BytesInPixels(stride); + } else { + // 1 row + ASSERT(aligned_end - aligned_start == stride_tiled_bytes); + const u32 tiled_alignment = BytesInPixels(is_tiled ? 8 * 8 : 1); + aligned_start = + addr + Common::AlignDown(boost::icl::first(interval) - addr, tiled_alignment); + aligned_end = + addr + Common::AlignUp(boost::icl::last_next(interval) - addr, tiled_alignment); + params.addr = aligned_start; + params.width = PixelsInBytes(aligned_end - aligned_start) / tiled_size; + params.stride = params.width; + params.height = tiled_size; + } + params.UpdateParams(); + + return params; +} + +SurfaceInterval SurfaceParams::GetSubRectInterval(MathUtil::Rectangle unscaled_rect) const { + if (unscaled_rect.GetHeight() == 0 || unscaled_rect.GetWidth() == 0) { + return {}; + } + + if (is_tiled) { + unscaled_rect.left = Common::AlignDown(unscaled_rect.left, 8) * 8; + unscaled_rect.bottom = Common::AlignDown(unscaled_rect.bottom, 8) / 8; + unscaled_rect.right = Common::AlignUp(unscaled_rect.right, 8) * 8; + unscaled_rect.top = Common::AlignUp(unscaled_rect.top, 8) / 8; + } + + const u32 stride_tiled = !is_tiled ? stride : stride * 8; + + const u32 pixel_offset = + stride_tiled * (!is_tiled ? unscaled_rect.bottom : (height / 8) - unscaled_rect.top) + + unscaled_rect.left; + + const u32 pixels = (unscaled_rect.GetHeight() - 1) * stride_tiled + unscaled_rect.GetWidth(); + + return {addr + BytesInPixels(pixel_offset), addr + BytesInPixels(pixel_offset + pixels)}; +} + +MathUtil::Rectangle SurfaceParams::GetSubRect(const SurfaceParams& sub_surface) const { + const u32 begin_pixel_index = PixelsInBytes(sub_surface.addr - addr); + + if (is_tiled) { + const int x0 = (begin_pixel_index % (stride * 8)) / 8; + const int y0 = (begin_pixel_index / (stride * 8)) * 8; + // Top to bottom + return MathUtil::Rectangle(x0, height - y0, x0 + sub_surface.width, + height - (y0 + sub_surface.height)); + } + + const int x0 = begin_pixel_index % stride; + const int y0 = begin_pixel_index / stride; + // Bottom to top + return MathUtil::Rectangle(x0, y0 + sub_surface.height, x0 + sub_surface.width, y0); +} + +MathUtil::Rectangle SurfaceParams::GetScaledSubRect(const SurfaceParams& sub_surface) const { + auto rect = GetSubRect(sub_surface); + rect.left = rect.left * res_scale; + rect.right = rect.right * res_scale; + rect.top = rect.top * res_scale; + rect.bottom = rect.bottom * res_scale; + return rect; +} + +bool SurfaceParams::ExactMatch(const SurfaceParams& other_surface) const { + return std::tie(other_surface.addr, other_surface.width, other_surface.height, + other_surface.stride, other_surface.pixel_format, other_surface.is_tiled) == + std::tie(addr, width, height, stride, pixel_format, is_tiled) && + pixel_format != PixelFormat::Invalid; +} + +bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const { + return sub_surface.addr >= addr && sub_surface.end <= end && + sub_surface.pixel_format == pixel_format && pixel_format != PixelFormat::Invalid && + sub_surface.is_tiled == is_tiled && + (sub_surface.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 && + (sub_surface.stride == stride || sub_surface.height <= (is_tiled ? 8u : 1u)) && + GetSubRect(sub_surface).left + sub_surface.width <= stride; +} + +bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const { + return pixel_format != PixelFormat::Invalid && pixel_format == expanded_surface.pixel_format && + addr <= expanded_surface.end && expanded_surface.addr <= end && + is_tiled == expanded_surface.is_tiled && stride == expanded_surface.stride && + (std::max(expanded_surface.addr, addr) - std::min(expanded_surface.addr, addr)) % + BytesInPixels(stride * (is_tiled ? 8 : 1)) == + 0; +} + +bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const { + if (pixel_format == PixelFormat::Invalid || addr > texcopy_params.addr || + end < texcopy_params.end) { + return false; + } + if (texcopy_params.width != texcopy_params.stride) { + const u32 tile_stride = BytesInPixels(stride * (is_tiled ? 8 : 1)); + return (texcopy_params.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 && + texcopy_params.width % BytesInPixels(is_tiled ? 64 : 1) == 0 && + (texcopy_params.height == 1 || texcopy_params.stride == tile_stride) && + ((texcopy_params.addr - addr) % tile_stride) + texcopy_params.width <= tile_stride; + } + return FromInterval(texcopy_params.GetInterval()).GetInterval() == texcopy_params.GetInterval(); +} + +bool CachedSurface::CanFill(const SurfaceParams& dest_surface, + SurfaceInterval fill_interval) const { + if (type == SurfaceType::Fill && IsRegionValid(fill_interval) && + boost::icl::first(fill_interval) >= addr && + boost::icl::last_next(fill_interval) <= end && // dest_surface is within our fill range + dest_surface.FromInterval(fill_interval).GetInterval() == + fill_interval) { // make sure interval is a rectangle in dest surface + if (fill_size * 8 != dest_surface.GetFormatBpp()) { + // Check if bits repeat for our fill_size + const u32 dest_bytes_per_pixel = std::max(dest_surface.GetFormatBpp() / 8, 1u); + std::vector fill_test(fill_size * dest_bytes_per_pixel); + + for (u32 i = 0; i < dest_bytes_per_pixel; ++i) + std::memcpy(&fill_test[i * fill_size], &fill_data[0], fill_size); + + for (u32 i = 0; i < fill_size; ++i) + if (std::memcmp(&fill_test[dest_bytes_per_pixel * i], &fill_test[0], + dest_bytes_per_pixel) != 0) + return false; + + if (dest_surface.GetFormatBpp() == 4 && (fill_test[0] & 0xF) != (fill_test[0] >> 4)) + return false; + } + return true; + } + return false; +} + +bool CachedSurface::CanCopy(const SurfaceParams& dest_surface, + SurfaceInterval copy_interval) const { + SurfaceParams subrect_params = dest_surface.FromInterval(copy_interval); + ASSERT(subrect_params.GetInterval() == copy_interval); + if (CanSubRect(subrect_params)) + return true; + + if (CanFill(dest_surface, copy_interval)) + return true; + + return false; +} + +SurfaceInterval SurfaceParams::GetCopyableInterval(const Surface& src_surface) const { + SurfaceInterval result{}; + const auto valid_regions = + SurfaceRegions(GetInterval() & src_surface->GetInterval()) - src_surface->invalid_regions; + for (auto& valid_interval : valid_regions) { + const SurfaceInterval aligned_interval{ + addr + Common::AlignUp(boost::icl::first(valid_interval) - addr, + BytesInPixels(is_tiled ? 8 * 8 : 1)), + addr + Common::AlignDown(boost::icl::last_next(valid_interval) - addr, + BytesInPixels(is_tiled ? 8 * 8 : 1))}; + + if (BytesInPixels(is_tiled ? 8 * 8 : 1) > boost::icl::length(valid_interval) || + boost::icl::length(aligned_interval) == 0) { + continue; + } + + // Get the rectangle within aligned_interval + const u32 stride_bytes = BytesInPixels(stride) * (is_tiled ? 8 : 1); + SurfaceInterval rect_interval{ + addr + Common::AlignUp(boost::icl::first(aligned_interval) - addr, stride_bytes), + addr + Common::AlignDown(boost::icl::last_next(aligned_interval) - addr, stride_bytes), + }; + if (boost::icl::first(rect_interval) > boost::icl::last_next(rect_interval)) { + // 1 row + rect_interval = aligned_interval; + } else if (boost::icl::length(rect_interval) == 0) { + // 2 rows that do not make a rectangle, return the larger one + const SurfaceInterval row1{boost::icl::first(aligned_interval), + boost::icl::first(rect_interval)}; + const SurfaceInterval row2{boost::icl::first(rect_interval), + boost::icl::last_next(aligned_interval)}; + rect_interval = (boost::icl::length(row1) > boost::icl::length(row2)) ? row1 : row2; + } + + if (boost::icl::length(rect_interval) > boost::icl::length(result)) { + result = rect_interval; + } + } + return result; +} + +void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surface& dst_surface, + SurfaceInterval copy_interval) { + SurfaceParams subrect_params = dst_surface->FromInterval(copy_interval); + ASSERT(subrect_params.GetInterval() == copy_interval); + + ASSERT(src_surface != dst_surface); + + // This is only called when CanCopy is true, no need to run checks here + if (src_surface->type == SurfaceType::Fill) { + // FillSurface needs a 4 bytes buffer + const u32 fill_offset = + (boost::icl::first(copy_interval) - src_surface->addr) % src_surface->fill_size; + std::array fill_buffer; + + u32 fill_buff_pos = fill_offset; + for (int i : {0, 1, 2, 3}) + fill_buffer[i] = src_surface->fill_data[fill_buff_pos++ % src_surface->fill_size]; + + FillSurface(dst_surface, &fill_buffer[0], dst_surface->GetScaledSubRect(subrect_params), + draw_framebuffer.handle); + return; + } + if (src_surface->CanSubRect(subrect_params)) { + BlitTextures(src_surface->texture.handle, src_surface->GetScaledSubRect(subrect_params), + dst_surface->texture.handle, dst_surface->GetScaledSubRect(subrect_params), + src_surface->type, read_framebuffer.handle, draw_framebuffer.handle); + return; + } + UNREACHABLE(); +} + +MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192)); +void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) { + ASSERT(type != SurfaceType::Fill); + + const u8* const texture_src_data = Memory::GetPhysicalPointer(addr); + if (texture_src_data == nullptr) + return; + + if (gl_buffer == nullptr) { + gl_buffer_size = width * height * GetGLBytesPerPixel(pixel_format); + gl_buffer.reset(new u8[gl_buffer_size]); + } + + // TODO: Should probably be done in ::Memory:: and check for other regions too + if (load_start < Memory::VRAM_VADDR_END && load_end > Memory::VRAM_VADDR_END) + load_end = Memory::VRAM_VADDR_END; + + if (load_start < Memory::VRAM_VADDR && load_end > Memory::VRAM_VADDR) + load_start = Memory::VRAM_VADDR; + + MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); + + ASSERT(load_start >= addr && load_end <= end); + const u32 start_offset = load_start - addr; + + if (!is_tiled) { + ASSERT(type == SurfaceType::Color); + std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset, + load_end - load_start); + } else { + if (type == SurfaceType::Texture) { + Pica::Texture::TextureInfo tex_info{}; + tex_info.width = width; + tex_info.height = height; + tex_info.format = static_cast(pixel_format); + tex_info.SetDefaultStride(); + tex_info.physical_address = addr; + + const SurfaceInterval load_interval(load_start, load_end); + const auto rect = GetSubRect(FromInterval(load_interval)); + ASSERT(FromInterval(load_interval).GetInterval() == load_interval); + + for (unsigned y = rect.bottom; y < rect.top; ++y) { + for (unsigned x = rect.left; x < rect.right; ++x) { + auto vec4 = + Pica::Texture::LookupTexture(texture_src_data, x, height - 1 - y, tex_info); + const size_t offset = (x + (width * y)) * 4; + std::memcpy(&gl_buffer[offset], vec4.AsArray(), 4); + } + } + } else { + morton_to_gl_fns[static_cast(pixel_format)](stride, height, &gl_buffer[0], addr, + load_start, load_end); + } + } +} + +MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); +void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) { + u8* const dst_buffer = Memory::GetPhysicalPointer(addr); + if (dst_buffer == nullptr) + return; + + ASSERT(gl_buffer_size == width * height * GetGLBytesPerPixel(pixel_format)); + + // TODO: Should probably be done in ::Memory:: and check for other regions too + // same as loadglbuffer() + if (flush_start < Memory::VRAM_VADDR_END && flush_end > Memory::VRAM_VADDR_END) + flush_end = Memory::VRAM_VADDR_END; + + if (flush_start < Memory::VRAM_VADDR && flush_end > Memory::VRAM_VADDR) + flush_start = Memory::VRAM_VADDR; + + MICROPROFILE_SCOPE(OpenGL_SurfaceFlush); + + ASSERT(flush_start >= addr && flush_end <= end); + const u32 start_offset = flush_start - addr; + const u32 end_offset = flush_end - addr; + + if (type == SurfaceType::Fill) { + const u32 coarse_start_offset = start_offset - (start_offset % fill_size); + const u32 backup_bytes = start_offset % fill_size; + std::array backup_data; + if (backup_bytes) + std::memcpy(&backup_data[0], &dst_buffer[coarse_start_offset], backup_bytes); + + for (u32 offset = coarse_start_offset; offset < end_offset; offset += fill_size) { + std::memcpy(&dst_buffer[offset], &fill_data[0], + std::min(fill_size, end_offset - offset)); + } + + if (backup_bytes) + std::memcpy(&dst_buffer[coarse_start_offset], &backup_data[0], backup_bytes); + } else if (!is_tiled) { + ASSERT(type == SurfaceType::Color); + std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset], flush_end - flush_start); + } else { + gl_to_morton_fns[static_cast(pixel_format)](stride, height, &gl_buffer[0], addr, + flush_start, flush_end); + } +} + +MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192)); +void CachedSurface::UploadGLTexture(const MathUtil::Rectangle& rect, GLuint read_fb_handle, + GLuint draw_fb_handle) { + if (type == SurfaceType::Fill) + return; + + MICROPROFILE_SCOPE(OpenGL_TextureUL); + + ASSERT(gl_buffer_size == width * height * GetGLBytesPerPixel(pixel_format)); + + // Load data from memory to the surface + GLint x0 = static_cast(rect.left); + GLint y0 = static_cast(rect.bottom); + size_t buffer_offset = (y0 * stride + x0) * GetGLBytesPerPixel(pixel_format); + + const FormatTuple& tuple = GetFormatTuple(pixel_format); + GLuint target_tex = texture.handle; + + // If not 1x scale, create 1x texture that we will blit from to replace texture subrect in + // surface + OGLTexture unscaled_tex; + if (res_scale != 1) { + x0 = 0; + y0 = 0; + + unscaled_tex.Create(); + AllocateSurfaceTexture(unscaled_tex.handle, tuple, rect.GetWidth(), rect.GetHeight()); + target_tex = unscaled_tex.handle; + } OpenGLState cur_state = OpenGLState::GetCurState(); - // Keep track of previous texture bindings GLuint old_tex = cur_state.texture_units[0].texture_2d; - cur_state.texture_units[0].texture_2d = texture; + cur_state.texture_units[0].texture_2d = target_tex; cur_state.Apply(); + + // Ensure no bad interactions with GL_UNPACK_ALIGNMENT + ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0); + glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(stride)); + glActiveTexture(GL_TEXTURE0); + glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast(rect.GetWidth()), + static_cast(rect.GetHeight()), tuple.format, tuple.type, + &gl_buffer[buffer_offset]); - SurfaceType type = CachedSurface::GetFormatType(pixel_format); + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); - FormatTuple tuple; - if (type == SurfaceType::Color) { - ASSERT((size_t)pixel_format < fb_format_tuples.size()); - tuple = fb_format_tuples[(unsigned int)pixel_format]; - } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) { - size_t tuple_idx = (size_t)pixel_format - 14; - ASSERT(tuple_idx < depth_format_tuples.size()); - tuple = depth_format_tuples[tuple_idx]; - } else { - tuple = {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}; - } - - glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, width, height, 0, tuple.format, - tuple.type, nullptr); - - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - - // Restore previous texture bindings cur_state.texture_units[0].texture_2d = old_tex; cur_state.Apply(); + + if (res_scale != 1) { + auto scaled_rect = rect; + scaled_rect.left *= res_scale; + scaled_rect.top *= res_scale; + scaled_rect.right *= res_scale; + scaled_rect.bottom *= res_scale; + + BlitTextures(unscaled_tex.handle, {0, rect.GetHeight(), rect.GetWidth(), 0}, texture.handle, + scaled_rect, type, read_fb_handle, draw_fb_handle); + } } -MICROPROFILE_DEFINE(OpenGL_SurfaceUpload, "OpenGL", "Surface Upload", MP_RGB(128, 64, 192)); -CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bool match_res_scale, - bool load_if_create) { - using PixelFormat = CachedSurface::PixelFormat; - using SurfaceType = CachedSurface::SurfaceType; +MICROPROFILE_DEFINE(OpenGL_TextureDL, "OpenGL", "Texture Download", MP_RGB(128, 192, 64)); +void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle& rect, GLuint read_fb_handle, + GLuint draw_fb_handle) { + if (type == SurfaceType::Fill) + return; - if (params.addr == 0) { - return nullptr; + MICROPROFILE_SCOPE(OpenGL_TextureDL); + + if (gl_buffer == nullptr) { + gl_buffer_size = width * height * GetGLBytesPerPixel(pixel_format); + gl_buffer.reset(new u8[gl_buffer_size]); } - u32 params_size = - params.width * params.height * CachedSurface::GetFormatBpp(params.pixel_format) / 8; + OpenGLState state = OpenGLState::GetCurState(); + OpenGLState prev_state = state; + SCOPE_EXIT({ prev_state.Apply(); }); + + const FormatTuple& tuple = GetFormatTuple(pixel_format); + + // Ensure no bad interactions with GL_PACK_ALIGNMENT + ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0); + glPixelStorei(GL_PACK_ROW_LENGTH, static_cast(stride)); + size_t buffer_offset = (rect.bottom * stride + rect.left) * GetGLBytesPerPixel(pixel_format); + + // If not 1x scale, blit scaled texture to a new 1x texture and use that to flush + if (res_scale != 1) { + auto scaled_rect = rect; + scaled_rect.left *= res_scale; + scaled_rect.top *= res_scale; + scaled_rect.right *= res_scale; + scaled_rect.bottom *= res_scale; + + OGLTexture unscaled_tex; + unscaled_tex.Create(); + + MathUtil::Rectangle unscaled_tex_rect{0, rect.GetHeight(), rect.GetWidth(), 0}; + AllocateSurfaceTexture(unscaled_tex.handle, tuple, rect.GetWidth(), rect.GetHeight()); + BlitTextures(texture.handle, scaled_rect, unscaled_tex.handle, unscaled_tex_rect, type, + read_fb_handle, draw_fb_handle); + + state.texture_units[0].texture_2d = unscaled_tex.handle; + state.Apply(); + + glActiveTexture(GL_TEXTURE0); + glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, &gl_buffer[buffer_offset]); + } else { + state.ResetTexture(texture.handle); + state.draw.read_framebuffer = read_fb_handle; + state.Apply(); + + if (type == SurfaceType::Color || type == SurfaceType::Texture) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + texture.handle, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + 0, 0); + } else if (type == SurfaceType::Depth) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, + texture.handle, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + } else { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + texture.handle, 0); + } + glReadPixels(static_cast(rect.left), static_cast(rect.bottom), + static_cast(rect.GetWidth()), static_cast(rect.GetHeight()), + tuple.format, tuple.type, &gl_buffer[buffer_offset]); + } + + glPixelStorei(GL_PACK_ROW_LENGTH, 0); +} + +enum MatchFlags { + Invalid = 1, // Flag that can be applied to other match types, invalid matches require + // validation before they can be used + Exact = 1 << 1, // Surfaces perfectly match + SubRect = 1 << 2, // Surface encompasses params + Copy = 1 << 3, // Surface we can copy from + Expand = 1 << 4, // Surface that can expand params + TexCopy = 1 << 5 // Surface that will match a display transfer "texture copy" parameters +}; + +constexpr MatchFlags operator|(MatchFlags lhs, MatchFlags rhs) { + return static_cast(static_cast(lhs) | static_cast(rhs)); +} + +/// Get the best surface match (and its match type) for the given flags +template +Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params, + ScaleMatch match_scale_type, + boost::optional validate_interval = boost::none) { + Surface match_surface = nullptr; + bool match_valid = false; + u32 match_scale = 0; + SurfaceInterval match_interval{}; + + for (auto& pair : RangeFromInterval(surface_cache, params.GetInterval())) { + for (auto& surface : pair.second) { + bool res_scale_matched = match_scale_type == ScaleMatch::Exact + ? (params.res_scale == surface->res_scale) + : (params.res_scale <= surface->res_scale); + // validity will be checked in GetCopyableInterval + bool is_valid = + find_flags & MatchFlags::Copy + ? true + : surface->IsRegionValid(validate_interval.value_or(params.GetInterval())); + + if (!(find_flags & MatchFlags::Invalid) && !is_valid) + continue; + + auto IsMatch_Helper = [&](auto check_type, auto match_fn) { + if (!(find_flags & check_type)) + return; + + bool matched; + SurfaceInterval surface_interval; + std::tie(matched, surface_interval) = match_fn(); + if (!matched) + return; + + if (!res_scale_matched && match_scale_type != ScaleMatch::Ignore && + surface->type != SurfaceType::Fill) + return; + + // Found a match, update only if this is better than the previous one + auto UpdateMatch = [&] { + match_surface = surface; + match_valid = is_valid; + match_scale = surface->res_scale; + match_interval = surface_interval; + }; + + if (surface->res_scale > match_scale) { + UpdateMatch(); + return; + } else if (surface->res_scale < match_scale) { + return; + } + + if (is_valid && !match_valid) { + UpdateMatch(); + return; + } else if (is_valid != match_valid) { + return; + } + + if (boost::icl::length(surface_interval) > boost::icl::length(match_interval)) { + UpdateMatch(); + } + }; + IsMatch_Helper(std::integral_constant{}, [&] { + return std::make_pair(surface->ExactMatch(params), surface->GetInterval()); + }); + IsMatch_Helper(std::integral_constant{}, [&] { + return std::make_pair(surface->CanSubRect(params), surface->GetInterval()); + }); + IsMatch_Helper(std::integral_constant{}, [&] { + auto copy_interval = + params.FromInterval(*validate_interval).GetCopyableInterval(surface); + bool matched = boost::icl::length(copy_interval & *validate_interval) != 0 && + surface->CanCopy(params, copy_interval); + return std::make_pair(matched, copy_interval); + }); + IsMatch_Helper(std::integral_constant{}, [&] { + return std::make_pair(surface->CanExpand(params), surface->GetInterval()); + }); + IsMatch_Helper(std::integral_constant{}, [&] { + return std::make_pair(surface->CanTexCopy(params), surface->GetInterval()); + }); + } + } + return match_surface; +} + +RasterizerCacheOpenGL::RasterizerCacheOpenGL() { + read_framebuffer.Create(); + draw_framebuffer.Create(); + + attributeless_vao.Create(); + + d24s8_abgr_buffer.Create(); + d24s8_abgr_buffer_size = 0; + + const char* vs_source = R"( +#version 330 core +const vec2 vertices[4] = vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0)); +void main() { + gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0); +} +)"; + const char* fs_source = R"( +#version 330 core + +uniform samplerBuffer tbo; +uniform vec2 tbo_size; +uniform vec4 viewport; + +out vec4 color; + +void main() { + vec2 tbo_coord = (gl_FragCoord.xy - viewport.xy) * tbo_size / viewport.zw; + int tbo_offset = int(tbo_coord.y) * int(tbo_size.x) + int(tbo_coord.x); + color = texelFetch(tbo, tbo_offset).rabg; +} +)"; + d24s8_abgr_shader.Create(vs_source, fs_source); + + OpenGLState state = OpenGLState::GetCurState(); + GLuint old_program = state.draw.shader_program; + state.draw.shader_program = d24s8_abgr_shader.handle; + state.Apply(); + + GLint tbo_u_id = glGetUniformLocation(d24s8_abgr_shader.handle, "tbo"); + ASSERT(tbo_u_id != -1); + glUniform1i(tbo_u_id, 0); + + state.draw.shader_program = old_program; + state.Apply(); + + d24s8_abgr_tbo_size_u_id = glGetUniformLocation(d24s8_abgr_shader.handle, "tbo_size"); + ASSERT(d24s8_abgr_tbo_size_u_id != -1); + d24s8_abgr_viewport_u_id = glGetUniformLocation(d24s8_abgr_shader.handle, "viewport"); + ASSERT(d24s8_abgr_viewport_u_id != -1); +} + +RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { + FlushAll(); + while (!surface_cache.empty()) + UnregisterSurface(*surface_cache.begin()->second.begin()); +} + +bool RasterizerCacheOpenGL::BlitSurfaces(const Surface& src_surface, + const MathUtil::Rectangle& src_rect, + const Surface& dst_surface, + const MathUtil::Rectangle& dst_rect) { + if (!SurfaceParams::CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format)) + return false; + + return BlitTextures(src_surface->texture.handle, src_rect, dst_surface->texture.handle, + dst_rect, src_surface->type, read_framebuffer.handle, + draw_framebuffer.handle); +} + +void RasterizerCacheOpenGL::ConvertD24S8toABGR(GLuint src_tex, + const MathUtil::Rectangle& src_rect, + GLuint dst_tex, + const MathUtil::Rectangle& dst_rect) { + OpenGLState prev_state = OpenGLState::GetCurState(); + SCOPE_EXIT({ prev_state.Apply(); }); + + OpenGLState state; + state.draw.read_framebuffer = read_framebuffer.handle; + state.draw.draw_framebuffer = draw_framebuffer.handle; + state.Apply(); + + glBindBuffer(GL_PIXEL_PACK_BUFFER, d24s8_abgr_buffer.handle); + + GLsizeiptr target_pbo_size = src_rect.GetWidth() * src_rect.GetHeight() * 4; + if (target_pbo_size > d24s8_abgr_buffer_size) { + d24s8_abgr_buffer_size = target_pbo_size * 2; + glBufferData(GL_PIXEL_PACK_BUFFER, d24s8_abgr_buffer_size, nullptr, GL_STREAM_COPY); + } + + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, src_tex, + 0); + glReadPixels(static_cast(src_rect.left), static_cast(src_rect.bottom), + static_cast(src_rect.GetWidth()), + static_cast(src_rect.GetHeight()), GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, + 0); + + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + + // PBO now contains src_tex in RABG format + state.draw.shader_program = d24s8_abgr_shader.handle; + state.draw.vertex_array = attributeless_vao.handle; + state.viewport.x = static_cast(dst_rect.left); + state.viewport.y = static_cast(dst_rect.bottom); + state.viewport.width = static_cast(dst_rect.GetWidth()); + state.viewport.height = static_cast(dst_rect.GetHeight()); + state.Apply(); + + OGLTexture tbo; + tbo.Create(); + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_BUFFER, tbo.handle); + glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA8, d24s8_abgr_buffer.handle); + + glUniform2f(d24s8_abgr_tbo_size_u_id, static_cast(src_rect.GetWidth()), + static_cast(src_rect.GetHeight())); + glUniform4f(d24s8_abgr_viewport_u_id, static_cast(state.viewport.x), + static_cast(state.viewport.y), static_cast(state.viewport.width), + static_cast(state.viewport.height)); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + + glBindTexture(GL_TEXTURE_BUFFER, 0); +} + +Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, + bool load_if_create) { + if (params.addr == 0 || params.height * params.width == 0) { + return nullptr; + } + // Use GetSurfaceSubRect instead + ASSERT(params.width == params.stride); + + ASSERT(!params.is_tiled || (params.width % 8 == 0 && params.height % 8 == 0)); // Check for an exact match in existing surfaces - CachedSurface* best_exact_surface = nullptr; - float exact_surface_goodness = -1.f; + Surface surface = + FindMatch(surface_cache, params, match_res_scale); - auto surface_interval = - boost::icl::interval::right_open(params.addr, params.addr + params_size); - auto range = surface_cache.equal_range(surface_interval); - for (auto it = range.first; it != range.second; ++it) { - for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) { - CachedSurface* surface = it2->get(); - - // Check if the request matches the surface exactly - if (params.addr == surface->addr && params.width == surface->width && - params.height == surface->height && params.pixel_format == surface->pixel_format) { - // Make sure optional param-matching criteria are fulfilled - bool tiling_match = (params.is_tiled == surface->is_tiled); - bool res_scale_match = (params.res_scale_width == surface->res_scale_width && - params.res_scale_height == surface->res_scale_height); - if (!match_res_scale || res_scale_match) { - // Prioritize same-tiling and highest resolution surfaces - float match_goodness = - (float)tiling_match + surface->res_scale_width * surface->res_scale_height; - if (match_goodness > exact_surface_goodness || surface->dirty) { - exact_surface_goodness = match_goodness; - best_exact_surface = surface; - } + if (surface == nullptr) { + u16 target_res_scale = params.res_scale; + if (match_res_scale != ScaleMatch::Exact) { + // This surface may have a subrect of another surface with a higher res_scale, find it + // to adjust our params + SurfaceParams find_params = params; + Surface expandable = FindMatch( + surface_cache, find_params, match_res_scale); + if (expandable != nullptr && expandable->res_scale > target_res_scale) { + target_res_scale = expandable->res_scale; + } + // Keep res_scale when reinterpreting d24s8 -> rgba8 + if (params.pixel_format == PixelFormat::RGBA8) { + find_params.pixel_format = PixelFormat::D24S8; + expandable = FindMatch( + surface_cache, find_params, match_res_scale); + if (expandable != nullptr && expandable->res_scale > target_res_scale) { + target_res_scale = expandable->res_scale; } } } + SurfaceParams new_params = params; + new_params.res_scale = target_res_scale; + surface = CreateSurface(new_params); + RegisterSurface(surface); } - // Return the best exact surface if found - if (best_exact_surface != nullptr) { - return best_exact_surface; + if (load_if_create) { + ValidateSurface(surface, params.addr, params.size); } - // No matching surfaces found, so create a new one - u8* texture_src_data = Memory::GetPhysicalPointer(params.addr); - if (texture_src_data == nullptr) { - return nullptr; - } - - MICROPROFILE_SCOPE(OpenGL_SurfaceUpload); - - // Stride only applies to linear images. - ASSERT(params.pixel_stride == 0 || !params.is_tiled); - - std::shared_ptr new_surface = std::make_shared(); - - new_surface->addr = params.addr; - new_surface->size = params_size; - - new_surface->texture.Create(); - new_surface->width = params.width; - new_surface->height = params.height; - new_surface->pixel_stride = params.pixel_stride; - new_surface->res_scale_width = params.res_scale_width; - new_surface->res_scale_height = params.res_scale_height; - - new_surface->is_tiled = params.is_tiled; - new_surface->pixel_format = params.pixel_format; - new_surface->dirty = false; - - if (!load_if_create) { - // Don't load any data; just allocate the surface's texture - AllocateSurfaceTexture(new_surface->texture.handle, new_surface->pixel_format, - new_surface->GetScaledWidth(), new_surface->GetScaledHeight()); - } else { - // TODO: Consider attempting subrect match in existing surfaces and direct blit here instead - // of memory upload below if that's a common scenario in some game - - Memory::RasterizerFlushRegion(params.addr, params_size); - - // Load data from memory to the new surface - OpenGLState cur_state = OpenGLState::GetCurState(); - - GLuint old_tex = cur_state.texture_units[0].texture_2d; - cur_state.texture_units[0].texture_2d = new_surface->texture.handle; - cur_state.Apply(); - glActiveTexture(GL_TEXTURE0); - - if (!new_surface->is_tiled) { - // TODO: Ensure this will always be a color format, not a depth or other format - ASSERT((size_t)new_surface->pixel_format < fb_format_tuples.size()); - const FormatTuple& tuple = fb_format_tuples[(unsigned int)params.pixel_format]; - - glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)new_surface->pixel_stride); - glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0, - tuple.format, tuple.type, texture_src_data); - glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); - } else { - SurfaceType type = CachedSurface::GetFormatType(new_surface->pixel_format); - if (type != SurfaceType::Depth && type != SurfaceType::DepthStencil) { - FormatTuple tuple; - if ((size_t)params.pixel_format < fb_format_tuples.size()) { - tuple = fb_format_tuples[(unsigned int)params.pixel_format]; - } else { - // Texture - tuple = {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}; - } - - std::vector> tex_buffer(params.width * params.height); - - Pica::Texture::TextureInfo tex_info; - tex_info.width = params.width; - tex_info.height = params.height; - tex_info.format = (Pica::TexturingRegs::TextureFormat)params.pixel_format; - tex_info.SetDefaultStride(); - tex_info.physical_address = params.addr; - - for (unsigned y = 0; y < params.height; ++y) { - for (unsigned x = 0; x < params.width; ++x) { - tex_buffer[x + params.width * y] = Pica::Texture::LookupTexture( - texture_src_data, x, params.height - 1 - y, tex_info); - } - } - - glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, - 0, GL_RGBA, GL_UNSIGNED_BYTE, tex_buffer.data()); - } else { - // Depth/Stencil formats need special treatment since they aren't sampleable using - // LookupTexture and can't use RGBA format - size_t tuple_idx = (size_t)params.pixel_format - 14; - ASSERT(tuple_idx < depth_format_tuples.size()); - const FormatTuple& tuple = depth_format_tuples[tuple_idx]; - - u32 bytes_per_pixel = CachedSurface::GetFormatBpp(params.pixel_format) / 8; - - // OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type - bool use_4bpp = (params.pixel_format == PixelFormat::D24); - - u32 gl_bytes_per_pixel = use_4bpp ? 4 : bytes_per_pixel; - - std::vector temp_fb_depth_buffer(params.width * params.height * - gl_bytes_per_pixel); - - u8* temp_fb_depth_buffer_ptr = - use_4bpp ? temp_fb_depth_buffer.data() + 1 : temp_fb_depth_buffer.data(); - - MortonCopyPixels(params.pixel_format, params.width, params.height, bytes_per_pixel, - gl_bytes_per_pixel, texture_src_data, temp_fb_depth_buffer_ptr, - true); - - glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, - 0, tuple.format, tuple.type, temp_fb_depth_buffer.data()); - } - } - - // If not 1x scale, blit 1x texture to a new scaled texture and replace texture in surface - if (new_surface->res_scale_width != 1.f || new_surface->res_scale_height != 1.f) { - OGLTexture scaled_texture; - scaled_texture.Create(); - - AllocateSurfaceTexture(scaled_texture.handle, new_surface->pixel_format, - new_surface->GetScaledWidth(), new_surface->GetScaledHeight()); - BlitTextures(new_surface->texture.handle, scaled_texture.handle, - CachedSurface::GetFormatType(new_surface->pixel_format), - MathUtil::Rectangle(0, 0, new_surface->width, new_surface->height), - MathUtil::Rectangle(0, 0, new_surface->GetScaledWidth(), - new_surface->GetScaledHeight())); - - new_surface->texture.Release(); - new_surface->texture.handle = scaled_texture.handle; - scaled_texture.handle = 0; - cur_state.texture_units[0].texture_2d = new_surface->texture.handle; - cur_state.Apply(); - } - - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - - cur_state.texture_units[0].texture_2d = old_tex; - cur_state.Apply(); - } - - Memory::RasterizerMarkRegionCached(new_surface->addr, new_surface->size, 1); - surface_cache.add(std::make_pair(boost::icl::interval::right_open( - new_surface->addr, new_surface->addr + new_surface->size), - std::set>({new_surface}))); - return new_surface.get(); + return surface; } -CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params, - bool match_res_scale, bool load_if_create, - MathUtil::Rectangle& out_rect) { - if (params.addr == 0) { - return nullptr; +SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& params, + ScaleMatch match_res_scale, + bool load_if_create) { + if (params.addr == 0 || params.height * params.width == 0) { + return std::make_tuple(nullptr, MathUtil::Rectangle{}); } - u32 total_pixels = params.width * params.height; - u32 params_size = total_pixels * CachedSurface::GetFormatBpp(params.pixel_format) / 8; + // Attempt to find encompassing surface + Surface surface = FindMatch(surface_cache, params, + match_res_scale); - // Attempt to find encompassing surfaces - CachedSurface* best_subrect_surface = nullptr; - float subrect_surface_goodness = -1.f; + // Check if FindMatch failed because of res scaling + // If that's the case create a new surface with + // the dimensions of the lower res_scale surface + // to suggest it should not be used again + if (surface == nullptr && match_res_scale != ScaleMatch::Ignore) { + surface = FindMatch(surface_cache, params, + ScaleMatch::Ignore); + if (surface != nullptr) { + ASSERT(surface->res_scale < params.res_scale); + SurfaceParams new_params = *surface; + new_params.res_scale = params.res_scale; - auto surface_interval = - boost::icl::interval::right_open(params.addr, params.addr + params_size); - auto cache_upper_bound = surface_cache.upper_bound(surface_interval); - for (auto it = surface_cache.lower_bound(surface_interval); it != cache_upper_bound; ++it) { - for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) { - CachedSurface* surface = it2->get(); - - // Check if the request is contained in the surface - if (params.addr >= surface->addr && - params.addr + params_size - 1 <= surface->addr + surface->size - 1 && - params.pixel_format == surface->pixel_format) { - // Make sure optional param-matching criteria are fulfilled - bool tiling_match = (params.is_tiled == surface->is_tiled); - bool res_scale_match = (params.res_scale_width == surface->res_scale_width && - params.res_scale_height == surface->res_scale_height); - if (!match_res_scale || res_scale_match) { - // Prioritize same-tiling and highest resolution surfaces - float match_goodness = - (float)tiling_match + surface->res_scale_width * surface->res_scale_height; - if (match_goodness > subrect_surface_goodness || surface->dirty) { - subrect_surface_goodness = match_goodness; - best_subrect_surface = surface; - } - } - } + surface = CreateSurface(new_params); + RegisterSurface(surface); } } - // Return the best subrect surface if found - if (best_subrect_surface != nullptr) { - unsigned int bytes_per_pixel = - (CachedSurface::GetFormatBpp(best_subrect_surface->pixel_format) / 8); + SurfaceParams aligned_params = params; + if (params.is_tiled) { + aligned_params.height = Common::AlignUp(params.height, 8); + aligned_params.width = Common::AlignUp(params.width, 8); + aligned_params.stride = Common::AlignUp(params.stride, 8); + aligned_params.UpdateParams(); + } - int x0, y0; + // Check for a surface we can expand before creating a new one + if (surface == nullptr) { + surface = FindMatch(surface_cache, aligned_params, + match_res_scale); + if (surface != nullptr) { + aligned_params.width = aligned_params.stride; + aligned_params.UpdateParams(); - if (!params.is_tiled) { - u32 begin_pixel_index = (params.addr - best_subrect_surface->addr) / bytes_per_pixel; - x0 = begin_pixel_index % best_subrect_surface->width; - y0 = begin_pixel_index / best_subrect_surface->width; + SurfaceParams new_params = *surface; + new_params.addr = std::min(aligned_params.addr, surface->addr); + new_params.end = std::max(aligned_params.end, surface->end); + new_params.size = new_params.end - new_params.addr; + new_params.height = + new_params.size / aligned_params.BytesInPixels(aligned_params.stride); + ASSERT(new_params.size % aligned_params.BytesInPixels(aligned_params.stride) == 0); - out_rect = MathUtil::Rectangle(x0, y0, x0 + params.width, y0 + params.height); - } else { - u32 bytes_per_tile = 8 * 8 * bytes_per_pixel; - u32 tiles_per_row = best_subrect_surface->width / 8; + Surface new_surface = CreateSurface(new_params); + DuplicateSurface(surface, new_surface); - u32 begin_tile_index = (params.addr - best_subrect_surface->addr) / bytes_per_tile; - x0 = begin_tile_index % tiles_per_row * 8; - y0 = begin_tile_index / tiles_per_row * 8; + // Delete the expanded surface, this can't be done safely yet + // because it may still be in use + remove_surfaces.emplace(surface); - // Tiled surfaces are flipped vertically in the rasterizer vs. 3DS memory. - out_rect = - MathUtil::Rectangle(x0, best_subrect_surface->height - y0, x0 + params.width, - best_subrect_surface->height - (y0 + params.height)); + surface = new_surface; + RegisterSurface(new_surface); } - - out_rect.left = (int)(out_rect.left * best_subrect_surface->res_scale_width); - out_rect.right = (int)(out_rect.right * best_subrect_surface->res_scale_width); - out_rect.top = (int)(out_rect.top * best_subrect_surface->res_scale_height); - out_rect.bottom = (int)(out_rect.bottom * best_subrect_surface->res_scale_height); - - return best_subrect_surface; } // No subrect found - create and return a new surface - if (!params.is_tiled) { - out_rect = MathUtil::Rectangle(0, 0, (int)(params.width * params.res_scale_width), - (int)(params.height * params.res_scale_height)); - } else { - out_rect = MathUtil::Rectangle(0, (int)(params.height * params.res_scale_height), - (int)(params.width * params.res_scale_width), 0); + if (surface == nullptr) { + SurfaceParams new_params = aligned_params; + // Can't have gaps in a surface + new_params.width = aligned_params.stride; + new_params.UpdateParams(); + // GetSurface will create the new surface and possibly adjust res_scale if necessary + surface = GetSurface(new_params, match_res_scale, load_if_create); + } else if (load_if_create) { + ValidateSurface(surface, aligned_params.addr, aligned_params.size); } - return GetSurface(params, match_res_scale, load_if_create); + return std::make_tuple(surface, surface->GetScaledSubRect(params)); } -CachedSurface* RasterizerCacheOpenGL::GetTextureSurface( +Surface RasterizerCacheOpenGL::GetTextureSurface( const Pica::TexturingRegs::FullTextureConfig& config) { - Pica::Texture::TextureInfo info = Pica::Texture::TextureInfo::FromPicaRegister(config.config, config.format); - CachedSurface params; + SurfaceParams params; params.addr = info.physical_address; params.width = info.width; params.height = info.height; params.is_tiled = true; - params.pixel_format = CachedSurface::PixelFormatFromTextureFormat(info.format); - return GetSurface(params, false, true); + params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(info.format); + params.UpdateParams(); + + if (info.width % 8 != 0 || info.height % 8 != 0) { + Surface src_surface; + MathUtil::Rectangle rect; + std::tie(src_surface, rect) = GetSurfaceSubRect(params, ScaleMatch::Ignore, true); + + params.res_scale = src_surface->res_scale; + Surface tmp_surface = CreateSurface(params); + BlitTextures(src_surface->texture.handle, rect, tmp_surface->texture.handle, + tmp_surface->GetScaledRect(), + SurfaceParams::GetFormatType(params.pixel_format), read_framebuffer.handle, + draw_framebuffer.handle); + + remove_surfaces.emplace(tmp_surface); + return tmp_surface; + } + + return GetSurface(params, ScaleMatch::Ignore, true); } -std::tuple> -RasterizerCacheOpenGL::GetFramebufferSurfaces( - const Pica::FramebufferRegs::FramebufferConfig& config) { - +SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( + bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle& viewport_rect) { const auto& regs = Pica::g_state.regs; + const auto& config = regs.framebuffer.framebuffer; - // Make sur that framebuffers don't overlap if both color and depth are being used - u32 fb_area = config.GetWidth() * config.GetHeight(); - bool framebuffers_overlap = - config.GetColorBufferPhysicalAddress() != 0 && - config.GetDepthBufferPhysicalAddress() != 0 && - MathUtil::IntervalsIntersect( - config.GetColorBufferPhysicalAddress(), - fb_area * GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(config.color_format.Value())), - config.GetDepthBufferPhysicalAddress(), - fb_area * Pica::FramebufferRegs::BytesPerDepthPixel(config.depth_format)); - bool using_color_fb = config.GetColorBufferPhysicalAddress() != 0; - bool depth_write_enable = regs.framebuffer.output_merger.depth_write_enable && - regs.framebuffer.framebuffer.allow_depth_stencil_write; - bool using_depth_fb = config.GetDepthBufferPhysicalAddress() != 0 && - (regs.framebuffer.output_merger.depth_test_enable || depth_write_enable || - !framebuffers_overlap); + // update resolution_scale_factor and reset cache if changed + static u16 resolution_scale_factor = GetResolutionScaleFactor(); + if (resolution_scale_factor != GetResolutionScaleFactor()) { + resolution_scale_factor = GetResolutionScaleFactor(); + FlushAll(); + while (!surface_cache.empty()) + UnregisterSurface(*surface_cache.begin()->second.begin()); + } - if (framebuffers_overlap && using_color_fb && using_depth_fb) { + MathUtil::Rectangle viewport_clamped{ + static_cast( + MathUtil::Clamp(viewport_rect.left, 0, static_cast(config.GetWidth()))), + static_cast( + MathUtil::Clamp(viewport_rect.top, 0, static_cast(config.GetHeight()))), + static_cast( + MathUtil::Clamp(viewport_rect.right, 0, static_cast(config.GetWidth()))), + static_cast( + MathUtil::Clamp(viewport_rect.bottom, 0, static_cast(config.GetHeight())))}; + + // get color and depth surfaces + SurfaceParams color_params; + color_params.is_tiled = true; + color_params.res_scale = resolution_scale_factor; + color_params.width = config.GetWidth(); + color_params.height = config.GetHeight(); + SurfaceParams depth_params = color_params; + + color_params.addr = config.GetColorBufferPhysicalAddress(); + color_params.pixel_format = SurfaceParams::PixelFormatFromColorFormat(config.color_format); + color_params.UpdateParams(); + + depth_params.addr = config.GetDepthBufferPhysicalAddress(); + depth_params.pixel_format = SurfaceParams::PixelFormatFromDepthFormat(config.depth_format); + depth_params.UpdateParams(); + + auto color_vp_interval = color_params.GetSubRectInterval(viewport_clamped); + auto depth_vp_interval = depth_params.GetSubRectInterval(viewport_clamped); + + // Make sure that framebuffers don't overlap if both color and depth are being used + if (using_color_fb && using_depth_fb && + boost::icl::length(color_vp_interval & depth_vp_interval)) { LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; " "overlapping framebuffers not supported!"); using_depth_fb = false; } - // get color and depth surfaces - CachedSurface color_params; - CachedSurface depth_params; - color_params.width = depth_params.width = config.GetWidth(); - color_params.height = depth_params.height = config.GetHeight(); - color_params.is_tiled = depth_params.is_tiled = true; + MathUtil::Rectangle color_rect{}; + Surface color_surface = nullptr; + if (using_color_fb) + std::tie(color_surface, color_rect) = + GetSurfaceSubRect(color_params, ScaleMatch::Exact, false); - // Set the internal resolution, assume the same scaling factor for top and bottom screens - float resolution_scale_factor = Settings::values.resolution_factor; - if (resolution_scale_factor == 0.0f) { - // Auto - scale resolution to the window size - resolution_scale_factor = VideoCore::g_emu_window->GetFramebufferLayout().GetScalingRatio(); - } - // Scale the resolution by the specified factor - color_params.res_scale_width = resolution_scale_factor; - depth_params.res_scale_width = resolution_scale_factor; - color_params.res_scale_height = resolution_scale_factor; - depth_params.res_scale_height = resolution_scale_factor; + MathUtil::Rectangle depth_rect{}; + Surface depth_surface = nullptr; + if (using_depth_fb) + std::tie(depth_surface, depth_rect) = + GetSurfaceSubRect(depth_params, ScaleMatch::Exact, false); - color_params.addr = config.GetColorBufferPhysicalAddress(); - color_params.pixel_format = CachedSurface::PixelFormatFromColorFormat(config.color_format); - - depth_params.addr = config.GetDepthBufferPhysicalAddress(); - depth_params.pixel_format = CachedSurface::PixelFormatFromDepthFormat(config.depth_format); - - MathUtil::Rectangle color_rect; - CachedSurface* color_surface = - using_color_fb ? GetSurfaceRect(color_params, true, true, color_rect) : nullptr; - - MathUtil::Rectangle depth_rect; - CachedSurface* depth_surface = - using_depth_fb ? GetSurfaceRect(depth_params, true, true, depth_rect) : nullptr; - - // Sanity check to make sure found surfaces aren't the same - if (using_depth_fb && using_color_fb && color_surface == depth_surface) { - LOG_CRITICAL( - Render_OpenGL, - "Color and depth framebuffer surfaces overlap; overlapping surfaces not supported!"); - using_depth_fb = false; - depth_surface = nullptr; - } - - MathUtil::Rectangle rect; - - if (color_surface != nullptr && depth_surface != nullptr && - (depth_rect.left != color_rect.left || depth_rect.top != color_rect.top)) { - // Can't specify separate color and depth viewport offsets in OpenGL, so re-zero both if - // they don't match - if (color_rect.left != 0 || color_rect.top != 0) { - color_surface = GetSurface(color_params, true, true); - } - - if (depth_rect.left != 0 || depth_rect.top != 0) { - depth_surface = GetSurface(depth_params, true, true); - } - - if (!color_surface->is_tiled) { - rect = MathUtil::Rectangle( - 0, 0, (int)(color_params.width * color_params.res_scale_width), - (int)(color_params.height * color_params.res_scale_height)); - } else { - rect = MathUtil::Rectangle( - 0, (int)(color_params.height * color_params.res_scale_height), - (int)(color_params.width * color_params.res_scale_width), 0); + MathUtil::Rectangle fb_rect{}; + if (color_surface != nullptr && depth_surface != nullptr) { + fb_rect = color_rect; + // Color and Depth surfaces must have the same dimensions and offsets + if (color_rect.bottom != depth_rect.bottom || color_rect.top != depth_rect.top || + color_rect.left != depth_rect.left || color_rect.right != depth_rect.right) { + color_surface = GetSurface(color_params, ScaleMatch::Exact, false); + depth_surface = GetSurface(depth_params, ScaleMatch::Exact, false); + fb_rect = color_surface->GetScaledRect(); } } else if (color_surface != nullptr) { - rect = color_rect; + fb_rect = color_rect; } else if (depth_surface != nullptr) { - rect = depth_rect; - } else { - rect = MathUtil::Rectangle(0, 0, 0, 0); + fb_rect = depth_rect; } - return std::make_tuple(color_surface, depth_surface, rect); + if (color_surface != nullptr) { + ValidateSurface(color_surface, boost::icl::first(color_vp_interval), + boost::icl::length(color_vp_interval)); + } + if (depth_surface != nullptr) { + ValidateSurface(depth_surface, boost::icl::first(depth_vp_interval), + boost::icl::length(depth_vp_interval)); + } + + return std::make_tuple(color_surface, depth_surface, fb_rect); } -CachedSurface* RasterizerCacheOpenGL::TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config) { - auto surface_interval = - boost::icl::interval::right_open(config.GetStartAddress(), config.GetEndAddress()); - auto range = surface_cache.equal_range(surface_interval); - for (auto it = range.first; it != range.second; ++it) { - for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) { - int bits_per_value = 0; - if (config.fill_24bit) { - bits_per_value = 24; - } else if (config.fill_32bit) { - bits_per_value = 32; - } else { - bits_per_value = 16; - } +Surface RasterizerCacheOpenGL::GetFillSurface(const GPU::Regs::MemoryFillConfig& config) { + Surface new_surface = std::make_shared(); - CachedSurface* surface = it2->get(); + new_surface->addr = config.GetStartAddress(); + new_surface->end = config.GetEndAddress(); + new_surface->size = new_surface->end - new_surface->addr; + new_surface->type = SurfaceType::Fill; + new_surface->res_scale = std::numeric_limits::max(); - if (surface->addr == config.GetStartAddress() && - CachedSurface::GetFormatBpp(surface->pixel_format) == bits_per_value && - (surface->width * surface->height * - CachedSurface::GetFormatBpp(surface->pixel_format) / 8) == - (config.GetEndAddress() - config.GetStartAddress())) { - return surface; - } - } + std::memcpy(&new_surface->fill_data[0], &config.value_32bit, 4); + if (config.fill_32bit) { + new_surface->fill_size = 4; + } else if (config.fill_24bit) { + new_surface->fill_size = 3; + } else { + new_surface->fill_size = 2; } - return nullptr; + RegisterSurface(new_surface); + return new_surface; } -MICROPROFILE_DEFINE(OpenGL_SurfaceDownload, "OpenGL", "Surface Download", MP_RGB(128, 192, 64)); -void RasterizerCacheOpenGL::FlushSurface(CachedSurface* surface) { - using PixelFormat = CachedSurface::PixelFormat; - using SurfaceType = CachedSurface::SurfaceType; +SurfaceRect_Tuple RasterizerCacheOpenGL::GetTexCopySurface(const SurfaceParams& params) { + MathUtil::Rectangle rect{}; - if (!surface->dirty) { - return; - } + Surface match_surface = FindMatch( + surface_cache, params, ScaleMatch::Ignore); - MICROPROFILE_SCOPE(OpenGL_SurfaceDownload); + if (match_surface != nullptr) { + ValidateSurface(match_surface, params.addr, params.size); - u8* dst_buffer = Memory::GetPhysicalPointer(surface->addr); - if (dst_buffer == nullptr) { - return; - } - - OpenGLState cur_state = OpenGLState::GetCurState(); - GLuint old_tex = cur_state.texture_units[0].texture_2d; - - OGLTexture unscaled_tex; - GLuint texture_to_flush = surface->texture.handle; - - // If not 1x scale, blit scaled texture to a new 1x texture and use that to flush - if (surface->res_scale_width != 1.f || surface->res_scale_height != 1.f) { - unscaled_tex.Create(); - - AllocateSurfaceTexture(unscaled_tex.handle, surface->pixel_format, surface->width, - surface->height); - BlitTextures( - surface->texture.handle, unscaled_tex.handle, - CachedSurface::GetFormatType(surface->pixel_format), - MathUtil::Rectangle(0, 0, surface->GetScaledWidth(), surface->GetScaledHeight()), - MathUtil::Rectangle(0, 0, surface->width, surface->height)); - - texture_to_flush = unscaled_tex.handle; - } - - cur_state.texture_units[0].texture_2d = texture_to_flush; - cur_state.Apply(); - glActiveTexture(GL_TEXTURE0); - - if (!surface->is_tiled) { - // TODO: Ensure this will always be a color format, not a depth or other format - ASSERT((size_t)surface->pixel_format < fb_format_tuples.size()); - const FormatTuple& tuple = fb_format_tuples[(unsigned int)surface->pixel_format]; - - glPixelStorei(GL_PACK_ROW_LENGTH, (GLint)surface->pixel_stride); - glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, dst_buffer); - glPixelStorei(GL_PACK_ROW_LENGTH, 0); - } else { - SurfaceType type = CachedSurface::GetFormatType(surface->pixel_format); - if (type != SurfaceType::Depth && type != SurfaceType::DepthStencil) { - ASSERT((size_t)surface->pixel_format < fb_format_tuples.size()); - const FormatTuple& tuple = fb_format_tuples[(unsigned int)surface->pixel_format]; - - u32 bytes_per_pixel = CachedSurface::GetFormatBpp(surface->pixel_format) / 8; - - std::vector temp_gl_buffer(surface->width * surface->height * bytes_per_pixel); - - glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data()); - - // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion - // is necessary. - MortonCopyPixels(surface->pixel_format, surface->width, surface->height, - bytes_per_pixel, bytes_per_pixel, dst_buffer, temp_gl_buffer.data(), - false); + SurfaceParams match_subrect; + if (params.width != params.stride) { + const u32 tiled_size = match_surface->is_tiled ? 8 : 1; + match_subrect = params; + match_subrect.width = match_surface->PixelsInBytes(params.width) / tiled_size; + match_subrect.stride = match_surface->PixelsInBytes(params.stride) / tiled_size; + match_subrect.height *= tiled_size; } else { - // Depth/Stencil formats need special treatment since they aren't sampleable using - // LookupTexture and can't use RGBA format - size_t tuple_idx = (size_t)surface->pixel_format - 14; - ASSERT(tuple_idx < depth_format_tuples.size()); - const FormatTuple& tuple = depth_format_tuples[tuple_idx]; - - u32 bytes_per_pixel = CachedSurface::GetFormatBpp(surface->pixel_format) / 8; - - // OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type - bool use_4bpp = (surface->pixel_format == PixelFormat::D24); - - u32 gl_bytes_per_pixel = use_4bpp ? 4 : bytes_per_pixel; - - std::vector temp_gl_buffer(surface->width * surface->height * gl_bytes_per_pixel); - - glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data()); - - u8* temp_gl_buffer_ptr = use_4bpp ? temp_gl_buffer.data() + 1 : temp_gl_buffer.data(); - - MortonCopyPixels(surface->pixel_format, surface->width, surface->height, - bytes_per_pixel, gl_bytes_per_pixel, dst_buffer, temp_gl_buffer_ptr, - false); + match_subrect = match_surface->FromInterval(params.GetInterval()); + ASSERT(match_subrect.GetInterval() == params.GetInterval()); } + + rect = match_surface->GetScaledSubRect(match_subrect); } - surface->dirty = false; - - cur_state.texture_units[0].texture_2d = old_tex; - cur_state.Apply(); + return std::make_tuple(match_surface, rect); } -void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, - bool invalidate) { - if (size == 0) { +void RasterizerCacheOpenGL::DuplicateSurface(const Surface& src_surface, + const Surface& dest_surface) { + ASSERT(dest_surface->addr <= src_surface->addr && dest_surface->end >= src_surface->end); + + BlitSurfaces(src_surface, src_surface->GetScaledRect(), dest_surface, + dest_surface->GetScaledSubRect(*src_surface)); + + dest_surface->invalid_regions -= src_surface->GetInterval(); + dest_surface->invalid_regions += src_surface->invalid_regions; + + SurfaceRegions regions; + for (auto& pair : RangeFromInterval(dirty_regions, src_surface->GetInterval())) { + if (pair.second == src_surface) { + regions += pair.first; + } + } + for (auto& interval : regions) { + dirty_regions.set({interval, dest_surface}); + } +} + +void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, u32 size) { + if (size == 0) + return; + + const SurfaceInterval validate_interval(addr, addr + size); + + if (surface->type == SurfaceType::Fill) { + // Sanity check, fill surfaces will always be valid when used + ASSERT(surface->IsRegionValid(validate_interval)); return; } - // Gather up unique surfaces that touch the region - std::unordered_set> touching_surfaces; + while (true) { + const auto it = surface->invalid_regions.find(validate_interval); + if (it == surface->invalid_regions.end()) + break; - auto surface_interval = boost::icl::interval::right_open(addr, addr + size); - auto cache_upper_bound = surface_cache.upper_bound(surface_interval); - for (auto it = surface_cache.lower_bound(surface_interval); it != cache_upper_bound; ++it) { - std::copy_if(it->second.begin(), it->second.end(), - std::inserter(touching_surfaces, touching_surfaces.end()), - [skip_surface](std::shared_ptr surface) { - return (surface.get() != skip_surface); - }); - } + const auto interval = *it & validate_interval; + // Look for a valid surface to copy from + SurfaceParams params = surface->FromInterval(interval); - // Flush and invalidate surfaces - for (auto surface : touching_surfaces) { - FlushSurface(surface.get()); - if (invalidate) { - Memory::RasterizerMarkRegionCached(surface->addr, surface->size, -1); - surface_cache.subtract( - std::make_pair(boost::icl::interval::right_open( - surface->addr, surface->addr + surface->size), - std::set>({surface}))); + Surface copy_surface = + FindMatch(surface_cache, params, ScaleMatch::Ignore, interval); + if (copy_surface != nullptr) { + SurfaceInterval copy_interval = params.GetCopyableInterval(copy_surface); + CopySurface(copy_surface, surface, copy_interval); + surface->invalid_regions.erase(copy_interval); + continue; } + + // D24S8 to RGBA8 + if (surface->pixel_format == PixelFormat::RGBA8) { + params.pixel_format = PixelFormat::D24S8; + Surface reinterpret_surface = + FindMatch(surface_cache, params, ScaleMatch::Ignore, interval); + if (reinterpret_surface != nullptr) { + ASSERT(reinterpret_surface->pixel_format == PixelFormat::D24S8); + + SurfaceInterval convert_interval = params.GetCopyableInterval(reinterpret_surface); + SurfaceParams convert_params = surface->FromInterval(convert_interval); + auto src_rect = reinterpret_surface->GetScaledSubRect(convert_params); + auto dest_rect = surface->GetScaledSubRect(convert_params); + + ConvertD24S8toABGR(reinterpret_surface->texture.handle, src_rect, + surface->texture.handle, dest_rect); + + surface->invalid_regions.erase(convert_interval); + continue; + } + } + + // Load data from 3DS memory + FlushRegion(params.addr, params.size); + surface->LoadGLBuffer(params.addr, params.end); + surface->UploadGLTexture(surface->GetSubRect(params), read_framebuffer.handle, + draw_framebuffer.handle); + surface->invalid_regions.erase(params.GetInterval()); } } +void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, Surface flush_surface) { + if (size == 0) + return; + + const SurfaceInterval flush_interval(addr, addr + size); + SurfaceRegions flushed_intervals; + + for (auto& pair : RangeFromInterval(dirty_regions, flush_interval)) { + // small sizes imply that this most likely comes from the cpu, flush the entire region + // the point is to avoid thousands of small writes every frame if the cpu decides to access + // that region, anything higher than 8 you're guaranteed it comes from a service + const auto interval = size <= 8 ? pair.first : pair.first & flush_interval; + auto& surface = pair.second; + + if (flush_surface != nullptr && surface != flush_surface) + continue; + + // Sanity check, this surface is the last one that marked this region dirty + ASSERT(surface->IsRegionValid(interval)); + + if (surface->type != SurfaceType::Fill) { + SurfaceParams params = surface->FromInterval(interval); + surface->DownloadGLTexture(surface->GetSubRect(params), read_framebuffer.handle, + draw_framebuffer.handle); + } + surface->FlushGLBuffer(boost::icl::first(interval), boost::icl::last_next(interval)); + flushed_intervals += interval; + } + // Reset dirty regions + dirty_regions -= flushed_intervals; +} + void RasterizerCacheOpenGL::FlushAll() { - for (auto& surfaces : surface_cache) { - for (auto& surface : surfaces.second) { - FlushSurface(surface.get()); + FlushRegion(0, 0xFFFFFFFF); +} + +void RasterizerCacheOpenGL::InvalidateRegion(PAddr addr, u32 size, const Surface& region_owner) { + if (size == 0) + return; + + const SurfaceInterval invalid_interval(addr, addr + size); + + if (region_owner != nullptr) { + ASSERT(region_owner->type != SurfaceType::Texture); + ASSERT(addr >= region_owner->addr && addr + size <= region_owner->end); + // Surfaces can't have a gap + ASSERT(region_owner->width == region_owner->stride); + region_owner->invalid_regions.erase(invalid_interval); + } + + for (auto& pair : RangeFromInterval(surface_cache, invalid_interval)) { + for (auto& cached_surface : pair.second) { + if (cached_surface == region_owner) + continue; + + // If cpu is invalidating this region we want to remove it + // to (likely) mark the memory pages as uncached + if (region_owner == nullptr && size <= 8) { + FlushRegion(cached_surface->addr, cached_surface->size, cached_surface); + remove_surfaces.emplace(cached_surface); + continue; + } + + const auto interval = cached_surface->GetInterval() & invalid_interval; + cached_surface->invalid_regions.insert(interval); + + // Remove only "empty" fill surfaces to avoid destroying and recreating OGL textures + if (cached_surface->type == SurfaceType::Fill && + cached_surface->IsSurfaceFullyInvalid()) { + remove_surfaces.emplace(cached_surface); + } } } + + if (region_owner != nullptr) + dirty_regions.set({invalid_interval, region_owner}); + else + dirty_regions.erase(invalid_interval); + + for (auto& remove_surface : remove_surfaces) { + if (remove_surface == region_owner) { + Surface expanded_surface = FindMatch( + surface_cache, *region_owner, ScaleMatch::Ignore); + ASSERT(expanded_surface); + + if ((region_owner->invalid_regions - expanded_surface->invalid_regions).empty()) { + DuplicateSurface(region_owner, expanded_surface); + } else { + continue; + } + } + UnregisterSurface(remove_surface); + } + + remove_surfaces.clear(); +} + +Surface RasterizerCacheOpenGL::CreateSurface(const SurfaceParams& params) { + Surface surface = std::make_shared(); + static_cast(*surface) = params; + + surface->texture.Create(); + + surface->gl_buffer_size = 0; + surface->invalid_regions.insert(surface->GetInterval()); + AllocateSurfaceTexture(surface->texture.handle, GetFormatTuple(surface->pixel_format), + surface->GetScaledWidth(), surface->GetScaledHeight()); + + return surface; +} + +void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) { + if (surface->registered) { + return; + } + surface->registered = true; + surface_cache.add({surface->GetInterval(), SurfaceSet{surface}}); + UpdatePagesCachedCount(surface->addr, surface->size, 1); +} + +void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) { + if (!surface->registered) { + return; + } + surface->registered = false; + UpdatePagesCachedCount(surface->addr, surface->size, -1); + surface_cache.subtract({surface->GetInterval(), SurfaceSet{surface}}); +} + +void RasterizerCacheOpenGL::UpdatePagesCachedCount(PAddr addr, u32 size, int delta) { + const u32 num_pages = + ((addr + size - 1) >> Memory::PAGE_BITS) - (addr >> Memory::PAGE_BITS) + 1; + const u32 page_start = addr >> Memory::PAGE_BITS; + const u32 page_end = page_start + num_pages; + + // Interval maps will erase segments if count reaches 0, so if delta is negative we have to + // subtract after iterating + const auto pages_interval = PageMap::interval_type::right_open(page_start, page_end); + if (delta > 0) + cached_pages.add({pages_interval, delta}); + + for (auto& pair : RangeFromInterval(cached_pages, pages_interval)) { + const auto interval = pair.first & pages_interval; + const int count = pair.second; + + const PAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS; + const PAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS; + const u32 interval_size = interval_end_addr - interval_start_addr; + + if (delta > 0 && count == delta) + Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true); + else if (delta < 0 && count == -delta) + Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false); + else + ASSERT(count >= 0); + } + + if (delta < 0) + cached_pages.add({pages_interval, delta}); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index cc16aeba0..7ef6a9498 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -13,6 +13,7 @@ #pragma GCC diagnostic ignored "-Wunused-local-typedefs" #endif #include +#include #ifdef __GNUC__ #pragma GCC diagnostic pop #endif @@ -20,21 +21,37 @@ #include "common/assert.h" #include "common/common_funcs.h" #include "common/common_types.h" +#include "common/math_util.h" #include "core/hw/gpu.h" #include "video_core/regs_framebuffer.h" #include "video_core/regs_texturing.h" #include "video_core/renderer_opengl/gl_resource_manager.h" -namespace MathUtil { -template -struct Rectangle; -} - struct CachedSurface; +using Surface = std::shared_ptr; +using SurfaceSet = std::set; -using SurfaceCache = boost::icl::interval_map>>; +using SurfaceRegions = boost::icl::interval_set; +using SurfaceMap = boost::icl::interval_map; +using SurfaceCache = boost::icl::interval_map; -struct CachedSurface { +using SurfaceInterval = SurfaceCache::interval_type; +static_assert(std::is_same() && + std::is_same(), + "incorrect interval types"); + +using SurfaceRect_Tuple = std::tuple>; +using SurfaceSurfaceRect_Tuple = std::tuple>; + +using PageMap = boost::icl::interval_map; + +enum class ScaleMatch { + Exact, // only accept same res scale + Upscale, // only allow higher scale than params + Ignore // accept every scaled res +}; + +struct SurfaceParams { enum class PixelFormat { // First 5 formats are shared between textures and color buffers RGBA8 = 0, @@ -68,11 +85,12 @@ struct CachedSurface { Texture = 1, Depth = 2, DepthStencil = 3, - Invalid = 4, + Fill = 4, + Invalid = 5 }; - static unsigned int GetFormatBpp(CachedSurface::PixelFormat format) { - static const std::array bpp_table = { + static constexpr unsigned int GetFormatBpp(PixelFormat format) { + constexpr std::array bpp_table = { 32, // RGBA8 24, // RGB8 16, // RGB5A1 @@ -93,8 +111,11 @@ struct CachedSurface { 32, // D24S8 }; - ASSERT((unsigned int)format < ARRAY_SIZE(bpp_table)); - return bpp_table[(unsigned int)format]; + assert(static_cast(format) < bpp_table.size()); + return bpp_table[static_cast(format)]; + } + unsigned int GetFormatBpp() const { + return GetFormatBpp(pixel_format); } static PixelFormat PixelFormatFromTextureFormat(Pica::TexturingRegs::TextureFormat format) { @@ -142,7 +163,7 @@ struct CachedSurface { return false; } - static SurfaceType GetFormatType(PixelFormat pixel_format) { + static constexpr SurfaceType GetFormatType(PixelFormat pixel_format) { if ((unsigned int)pixel_format < 5) { return SurfaceType::Color; } @@ -162,31 +183,117 @@ struct CachedSurface { return SurfaceType::Invalid; } + /// Update the params "size", "end" and "type" from the already set "addr", "width", "height" + /// and "pixel_format" + void UpdateParams() { + if (stride == 0) { + stride = width; + } + type = GetFormatType(pixel_format); + size = !is_tiled ? BytesInPixels(stride * (height - 1) + width) + : BytesInPixels(stride * 8 * (height / 8 - 1) + width * 8); + end = addr + size; + } + + SurfaceInterval GetInterval() const { + return SurfaceInterval::right_open(addr, end); + } + + // Returns the outer rectangle containing "interval" + SurfaceParams FromInterval(SurfaceInterval interval) const; + + SurfaceInterval GetSubRectInterval(MathUtil::Rectangle unscaled_rect) const; + + // Returns the region of the biggest valid rectange within interval + SurfaceInterval GetCopyableInterval(const Surface& src_surface) const; + u32 GetScaledWidth() const { - return (u32)(width * res_scale_width); + return width * res_scale; } u32 GetScaledHeight() const { - return (u32)(height * res_scale_height); + return height * res_scale; } - PAddr addr; - u32 size; + MathUtil::Rectangle GetRect() const { + return {0, height, width, 0}; + } - PAddr min_valid; - PAddr max_valid; + MathUtil::Rectangle GetScaledRect() const { + return {0, GetScaledHeight(), GetScaledWidth(), 0}; + } + + u32 PixelsInBytes(u32 size) const { + return size * CHAR_BIT / GetFormatBpp(pixel_format); + } + + u32 BytesInPixels(u32 pixels) const { + return pixels * GetFormatBpp(pixel_format) / CHAR_BIT; + } + + bool ExactMatch(const SurfaceParams& other_surface) const; + bool CanSubRect(const SurfaceParams& sub_surface) const; + bool CanExpand(const SurfaceParams& expanded_surface) const; + bool CanTexCopy(const SurfaceParams& texcopy_params) const; + + MathUtil::Rectangle GetSubRect(const SurfaceParams& sub_surface) const; + MathUtil::Rectangle GetScaledSubRect(const SurfaceParams& sub_surface) const; + + PAddr addr = 0; + PAddr end = 0; + u32 size = 0; + + u32 width = 0; + u32 height = 0; + u32 stride = 0; + u16 res_scale = 1; + + bool is_tiled = false; + PixelFormat pixel_format = PixelFormat::Invalid; + SurfaceType type = SurfaceType::Invalid; +}; + +struct CachedSurface : SurfaceParams { + bool CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const; + bool CanCopy(const SurfaceParams& dest_surface, SurfaceInterval copy_interval) const; + + bool IsRegionValid(SurfaceInterval interval) const { + return (invalid_regions.find(interval) == invalid_regions.end()); + } + + bool IsSurfaceFullyInvalid() const { + return (invalid_regions & GetInterval()) == SurfaceRegions(GetInterval()); + } + + bool registered = false; + SurfaceRegions invalid_regions; + + u32 fill_size = 0; /// Number of bytes to read from fill_data + std::array fill_data; OGLTexture texture; - u32 width; - u32 height; - /// Stride between lines, in pixels. Only valid for images in linear format. - u32 pixel_stride = 0; - float res_scale_width = 1.f; - float res_scale_height = 1.f; - bool is_tiled; - PixelFormat pixel_format; - bool dirty; + static constexpr unsigned int GetGLBytesPerPixel(PixelFormat format) { + // OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type + return format == PixelFormat::Invalid + ? 0 + : (format == PixelFormat::D24 || GetFormatType(format) == SurfaceType::Texture) + ? 4 + : SurfaceParams::GetFormatBpp(format) / 8; + } + + std::unique_ptr gl_buffer; + size_t gl_buffer_size = 0; + + // Read/Write data in 3DS memory to/from gl_buffer + void LoadGLBuffer(PAddr load_start, PAddr load_end); + void FlushGLBuffer(PAddr flush_start, PAddr flush_end); + + // Upload/Download data in gl_buffer in/to this surface's texture + void UploadGLTexture(const MathUtil::Rectangle& rect, GLuint read_fb_handle, + GLuint draw_fb_handle); + void DownloadGLTexture(const MathUtil::Rectangle& rect, GLuint read_fb_handle, + GLuint draw_fb_handle); }; class RasterizerCacheOpenGL : NonCopyable { @@ -194,46 +301,78 @@ public: RasterizerCacheOpenGL(); ~RasterizerCacheOpenGL(); - /// Blits one texture to another - void BlitTextures(GLuint src_tex, GLuint dst_tex, CachedSurface::SurfaceType type, - const MathUtil::Rectangle& src_rect, - const MathUtil::Rectangle& dst_rect); + /// Blit one surface's texture to another + bool BlitSurfaces(const Surface& src_surface, const MathUtil::Rectangle& src_rect, + const Surface& dst_surface, const MathUtil::Rectangle& dst_rect); - /// Attempt to blit one surface's texture to another - bool TryBlitSurfaces(CachedSurface* src_surface, const MathUtil::Rectangle& src_rect, - CachedSurface* dst_surface, const MathUtil::Rectangle& dst_rect); + void ConvertD24S8toABGR(GLuint src_tex, const MathUtil::Rectangle& src_rect, + GLuint dst_tex, const MathUtil::Rectangle& dst_rect); - /// Loads a texture from 3DS memory to OpenGL and caches it (if not already cached) - CachedSurface* GetSurface(const CachedSurface& params, bool match_res_scale, - bool load_if_create); + /// Copy one surface's region to another + void CopySurface(const Surface& src_surface, const Surface& dst_surface, + SurfaceInterval copy_interval); + + /// Load a texture from 3DS memory to OpenGL and cache it (if not already cached) + Surface GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, + bool load_if_create); /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from /// 3DS memory to OpenGL and caches it (if not already cached) - CachedSurface* GetSurfaceRect(const CachedSurface& params, bool match_res_scale, - bool load_if_create, MathUtil::Rectangle& out_rect); + SurfaceRect_Tuple GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale, + bool load_if_create); - /// Gets a surface based on the texture configuration - CachedSurface* GetTextureSurface(const Pica::TexturingRegs::FullTextureConfig& config); + /// Get a surface based on the texture configuration + Surface GetTextureSurface(const Pica::TexturingRegs::FullTextureConfig& config); - /// Gets the color and depth surfaces and rect (resolution scaled) based on the framebuffer - /// configuration - std::tuple> GetFramebufferSurfaces( - const Pica::FramebufferRegs::FramebufferConfig& config); + /// Get the color and depth surfaces based on the framebuffer configuration + SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, + const MathUtil::Rectangle& viewport_rect); - /// Attempt to get a surface that exactly matches the fill region and format - CachedSurface* TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config); + /// Get a surface that matches the fill config + Surface GetFillSurface(const GPU::Regs::MemoryFillConfig& config); - /// Write the surface back to memory - void FlushSurface(CachedSurface* surface); + /// Get a surface that matches a "texture copy" display transfer config + SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params); - /// Write any cached resources overlapping the region back to memory (if dirty) and optionally - /// invalidate them in the cache - void FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, bool invalidate); + /// Write any cached resources overlapping the region back to memory (if dirty) + void FlushRegion(PAddr addr, u32 size, Surface flush_surface = nullptr); + + /// Mark region as being invalidated by region_owner (nullptr if 3DS memory) + void InvalidateRegion(PAddr addr, u32 size, const Surface& region_owner); /// Flush all cached resources tracked by this cache manager void FlushAll(); private: + void DuplicateSurface(const Surface& src_surface, const Surface& dest_surface); + + /// Update surface's texture for given region when necessary + void ValidateSurface(const Surface& surface, PAddr addr, u32 size); + + /// Create a new surface + Surface CreateSurface(const SurfaceParams& params); + + /// Register surface into the cache + void RegisterSurface(const Surface& surface); + + /// Remove surface from the cache + void UnregisterSurface(const Surface& surface); + + /// Increase/decrease the number of surface in pages touching the specified region + void UpdatePagesCachedCount(PAddr addr, u32 size, int delta); + SurfaceCache surface_cache; - OGLFramebuffer transfer_framebuffers[2]; + PageMap cached_pages; + SurfaceMap dirty_regions; + SurfaceSet remove_surfaces; + + OGLFramebuffer read_framebuffer; + OGLFramebuffer draw_framebuffer; + + OGLVertexArray attributeless_vao; + OGLBuffer d24s8_abgr_buffer; + GLsizeiptr d24s8_abgr_buffer_size; + OGLShader d24s8_abgr_shader; + GLint d24s8_abgr_tbo_size_u_id; + GLint d24s8_abgr_viewport_u_id; }; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index 13301ec9f..39fa79fc2 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -36,7 +36,7 @@ public: if (handle == 0) return; glDeleteTextures(1, &handle); - OpenGLState::ResetTexture(handle); + OpenGLState::GetCurState().ResetTexture(handle).Apply(); handle = 0; } @@ -69,7 +69,7 @@ public: if (handle == 0) return; glDeleteSamplers(1, &handle); - OpenGLState::ResetSampler(handle); + OpenGLState::GetCurState().ResetSampler(handle).Apply(); handle = 0; } @@ -102,7 +102,7 @@ public: if (handle == 0) return; glDeleteProgram(handle); - OpenGLState::ResetProgram(handle); + OpenGLState::GetCurState().ResetProgram(handle).Apply(); handle = 0; } @@ -135,7 +135,7 @@ public: if (handle == 0) return; glDeleteBuffers(1, &handle); - OpenGLState::ResetBuffer(handle); + OpenGLState::GetCurState().ResetBuffer(handle).Apply(); handle = 0; } @@ -168,7 +168,7 @@ public: if (handle == 0) return; glDeleteVertexArrays(1, &handle); - OpenGLState::ResetVertexArray(handle); + OpenGLState::GetCurState().ResetVertexArray(handle).Apply(); handle = 0; } @@ -201,7 +201,7 @@ public: if (handle == 0) return; glDeleteFramebuffers(1, &handle); - OpenGLState::ResetFramebuffer(handle); + OpenGLState::GetCurState().ResetFramebuffer(handle).Apply(); handle = 0; } diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 3053b0038..eaf99f8e2 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -41,7 +41,7 @@ struct LightSrc { }; layout (std140) uniform shader_data { - vec2 framebuffer_scale; + int framebuffer_scale; int alphatest_ref; float depth_scale; float depth_offset; diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 9fa353fe4..76354b842 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -69,6 +69,17 @@ OpenGLState::OpenGLState() { draw.uniform_buffer = 0; draw.shader_program = 0; + scissor.enabled = false; + scissor.x = 0; + scissor.y = 0; + scissor.width = 0; + scissor.height = 0; + + viewport.x = 0; + viewport.y = 0; + viewport.width = 0; + viewport.height = 0; + clip_distance = {}; } @@ -193,7 +204,7 @@ void OpenGLState::Apply() const { // Lighting LUTs if (lighting_lut.texture_buffer != cur_state.lighting_lut.texture_buffer) { glActiveTexture(TextureUnits::LightingLUT.Enum()); - glBindTexture(GL_TEXTURE_BUFFER, cur_state.lighting_lut.texture_buffer); + glBindTexture(GL_TEXTURE_BUFFER, lighting_lut.texture_buffer); } // Fog LUT @@ -260,6 +271,26 @@ void OpenGLState::Apply() const { glUseProgram(draw.shader_program); } + // Scissor test + if (scissor.enabled != cur_state.scissor.enabled) { + if (scissor.enabled) { + glEnable(GL_SCISSOR_TEST); + } else { + glDisable(GL_SCISSOR_TEST); + } + } + + if (scissor.x != cur_state.scissor.x || scissor.y != cur_state.scissor.y || + scissor.width != cur_state.scissor.width || scissor.height != cur_state.scissor.height) { + glScissor(scissor.x, scissor.y, scissor.width, scissor.height); + } + + if (viewport.x != cur_state.viewport.x || viewport.y != cur_state.viewport.y || + viewport.width != cur_state.viewport.width || + viewport.height != cur_state.viewport.height) { + glViewport(viewport.x, viewport.y, viewport.width, viewport.height); + } + // Clip distance for (size_t i = 0; i < clip_distance.size(); ++i) { if (clip_distance[i] != cur_state.clip_distance[i]) { @@ -274,62 +305,68 @@ void OpenGLState::Apply() const { cur_state = *this; } -void OpenGLState::ResetTexture(GLuint handle) { - for (auto& unit : cur_state.texture_units) { +OpenGLState& OpenGLState::ResetTexture(GLuint handle) { + for (auto& unit : texture_units) { if (unit.texture_2d == handle) { unit.texture_2d = 0; } } - if (cur_state.lighting_lut.texture_buffer == handle) - cur_state.lighting_lut.texture_buffer = 0; - if (cur_state.fog_lut.texture_buffer == handle) - cur_state.fog_lut.texture_buffer = 0; - if (cur_state.proctex_noise_lut.texture_buffer == handle) - cur_state.proctex_noise_lut.texture_buffer = 0; - if (cur_state.proctex_color_map.texture_buffer == handle) - cur_state.proctex_color_map.texture_buffer = 0; - if (cur_state.proctex_alpha_map.texture_buffer == handle) - cur_state.proctex_alpha_map.texture_buffer = 0; - if (cur_state.proctex_lut.texture_buffer == handle) - cur_state.proctex_lut.texture_buffer = 0; - if (cur_state.proctex_diff_lut.texture_buffer == handle) - cur_state.proctex_diff_lut.texture_buffer = 0; + if (lighting_lut.texture_buffer == handle) + lighting_lut.texture_buffer = 0; + if (fog_lut.texture_buffer == handle) + fog_lut.texture_buffer = 0; + if (proctex_noise_lut.texture_buffer == handle) + proctex_noise_lut.texture_buffer = 0; + if (proctex_color_map.texture_buffer == handle) + proctex_color_map.texture_buffer = 0; + if (proctex_alpha_map.texture_buffer == handle) + proctex_alpha_map.texture_buffer = 0; + if (proctex_lut.texture_buffer == handle) + proctex_lut.texture_buffer = 0; + if (proctex_diff_lut.texture_buffer == handle) + proctex_diff_lut.texture_buffer = 0; + return *this; } -void OpenGLState::ResetSampler(GLuint handle) { - for (auto& unit : cur_state.texture_units) { +OpenGLState& OpenGLState::ResetSampler(GLuint handle) { + for (auto& unit : texture_units) { if (unit.sampler == handle) { unit.sampler = 0; } } + return *this; } -void OpenGLState::ResetProgram(GLuint handle) { - if (cur_state.draw.shader_program == handle) { - cur_state.draw.shader_program = 0; +OpenGLState& OpenGLState::ResetProgram(GLuint handle) { + if (draw.shader_program == handle) { + draw.shader_program = 0; } + return *this; } -void OpenGLState::ResetBuffer(GLuint handle) { - if (cur_state.draw.vertex_buffer == handle) { - cur_state.draw.vertex_buffer = 0; +OpenGLState& OpenGLState::ResetBuffer(GLuint handle) { + if (draw.vertex_buffer == handle) { + draw.vertex_buffer = 0; } - if (cur_state.draw.uniform_buffer == handle) { - cur_state.draw.uniform_buffer = 0; + if (draw.uniform_buffer == handle) { + draw.uniform_buffer = 0; } + return *this; } -void OpenGLState::ResetVertexArray(GLuint handle) { - if (cur_state.draw.vertex_array == handle) { - cur_state.draw.vertex_array = 0; +OpenGLState& OpenGLState::ResetVertexArray(GLuint handle) { + if (draw.vertex_array == handle) { + draw.vertex_array = 0; } + return *this; } -void OpenGLState::ResetFramebuffer(GLuint handle) { - if (cur_state.draw.read_framebuffer == handle) { - cur_state.draw.read_framebuffer = 0; +OpenGLState& OpenGLState::ResetFramebuffer(GLuint handle) { + if (draw.read_framebuffer == handle) { + draw.read_framebuffer = 0; } - if (cur_state.draw.draw_framebuffer == handle) { - cur_state.draw.draw_framebuffer = 0; + if (draw.draw_framebuffer == handle) { + draw.draw_framebuffer = 0; } + return *this; } diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 437fe34c4..033d417bc 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -124,25 +124,40 @@ public: GLuint shader_program; // GL_CURRENT_PROGRAM } draw; + struct { + bool enabled; // GL_SCISSOR_TEST + GLint x; + GLint y; + GLsizei width; + GLsizei height; + } scissor; + + struct { + GLint x; + GLint y; + GLsizei width; + GLsizei height; + } viewport; + std::array clip_distance; // GL_CLIP_DISTANCE OpenGLState(); /// Get the currently active OpenGL state - static const OpenGLState& GetCurState() { + static OpenGLState GetCurState() { return cur_state; } /// Apply this state as the current OpenGL state void Apply() const; - /// Resets and unbinds any references to the given resource in the current OpenGL state - static void ResetTexture(GLuint handle); - static void ResetSampler(GLuint handle); - static void ResetProgram(GLuint handle); - static void ResetBuffer(GLuint handle); - static void ResetVertexArray(GLuint handle); - static void ResetFramebuffer(GLuint handle); + /// Resets any references to the given resource + OpenGLState& ResetTexture(GLuint handle); + OpenGLState& ResetSampler(GLuint handle); + OpenGLState& ResetProgram(GLuint handle); + OpenGLState& ResetBuffer(GLuint handle); + OpenGLState& ResetVertexArray(GLuint handle); + OpenGLState& ResetFramebuffer(GLuint handle); private: static OpenGLState cur_state; diff --git a/src/video_core/swrasterizer/swrasterizer.h b/src/video_core/swrasterizer/swrasterizer.h index 87e64f8f6..f97fe13b4 100644 --- a/src/video_core/swrasterizer/swrasterizer.h +++ b/src/video_core/swrasterizer/swrasterizer.h @@ -11,7 +11,7 @@ namespace Pica { namespace Shader { struct OutputVertex; } -} +} // namespace Pica namespace VideoCore { @@ -22,6 +22,7 @@ class SWRasterizer : public RasterizerInterface { void NotifyPicaRegisterChanged(u32 id) override {} void FlushAll() override {} void FlushRegion(PAddr addr, u32 size) override {} + void InvalidateRegion(PAddr addr, u32 size) override {} void FlushAndInvalidateRegion(PAddr addr, u32 size) override {} }; diff --git a/src/video_core/utils.h b/src/video_core/utils.h index d8567f314..aa4e1bd38 100644 --- a/src/video_core/utils.h +++ b/src/video_core/utils.h @@ -9,9 +9,9 @@ namespace VideoCore { // 8x8 Z-Order coordinate from 2D coordinates -static inline u32 MortonInterleave(u32 x, u32 y) { - static const u32 xlut[] = {0x00, 0x01, 0x04, 0x05, 0x10, 0x11, 0x14, 0x15}; - static const u32 ylut[] = {0x00, 0x02, 0x08, 0x0a, 0x20, 0x22, 0x28, 0x2a}; +static constexpr u32 MortonInterleave(u32 x, u32 y) { + constexpr u32 xlut[] = {0x00, 0x01, 0x04, 0x05, 0x10, 0x11, 0x14, 0x15}; + constexpr u32 ylut[] = {0x00, 0x02, 0x08, 0x0a, 0x20, 0x22, 0x28, 0x2a}; return xlut[x % 8] + ylut[y % 8]; }