From c821c149089ea3f88fa84015998c54b92acc065d Mon Sep 17 00:00:00 2001 From: James Rowe Date: Mon, 13 Nov 2017 22:30:11 -0700 Subject: [PATCH 01/32] Settings: Change resolution scaling to an integer instead of a float --- src/citra/config.cpp | 2 +- src/citra_qt/configuration/config.cpp | 5 +- .../configuration/configure_graphics.cpp | 74 +------------------ src/common/math_util.h | 2 +- src/core/frontend/framebuffer_layout.cpp | 4 +- src/core/frontend/framebuffer_layout.h | 2 +- src/core/settings.h | 2 +- .../renderer_opengl/gl_rasterizer_cache.cpp | 21 ++++-- .../renderer_opengl/gl_shader_gen.cpp | 2 +- 9 files changed, 27 insertions(+), 87 deletions(-) diff --git a/src/citra/config.cpp b/src/citra/config.cpp index 45c28ad09..72bda0be0 100644 --- a/src/citra/config.cpp +++ b/src/citra/config.cpp @@ -88,7 +88,7 @@ void Config::ReadValues() { Settings::values.use_hw_renderer = sdl2_config->GetBoolean("Renderer", "use_hw_renderer", true); Settings::values.use_shader_jit = sdl2_config->GetBoolean("Renderer", "use_shader_jit", true); Settings::values.resolution_factor = - (float)sdl2_config->GetReal("Renderer", "resolution_factor", 1.0); + static_cast(sdl2_config->GetInteger("Renderer", "resolution_factor", 1)); Settings::values.use_vsync = sdl2_config->GetBoolean("Renderer", "use_vsync", false); Settings::values.toggle_framelimit = sdl2_config->GetBoolean("Renderer", "toggle_framelimit", true); diff --git a/src/citra_qt/configuration/config.cpp b/src/citra_qt/configuration/config.cpp index 97993e426..b150d4e88 100644 --- a/src/citra_qt/configuration/config.cpp +++ b/src/citra_qt/configuration/config.cpp @@ -73,7 +73,8 @@ void Config::ReadValues() { qt_config->beginGroup("Renderer"); Settings::values.use_hw_renderer = qt_config->value("use_hw_renderer", true).toBool(); Settings::values.use_shader_jit = qt_config->value("use_shader_jit", true).toBool(); - Settings::values.resolution_factor = qt_config->value("resolution_factor", 1.0).toFloat(); + Settings::values.resolution_factor = + static_cast(qt_config->value("resolution_factor", 1).toInt()); Settings::values.use_vsync = qt_config->value("use_vsync", false).toBool(); Settings::values.toggle_framelimit = qt_config->value("toggle_framelimit", true).toBool(); @@ -236,7 +237,7 @@ void Config::SaveValues() { qt_config->beginGroup("Renderer"); qt_config->setValue("use_hw_renderer", Settings::values.use_hw_renderer); qt_config->setValue("use_shader_jit", Settings::values.use_shader_jit); - qt_config->setValue("resolution_factor", (double)Settings::values.resolution_factor); + qt_config->setValue("resolution_factor", Settings::values.resolution_factor); qt_config->setValue("use_vsync", Settings::values.use_vsync); qt_config->setValue("toggle_framelimit", Settings::values.toggle_framelimit); diff --git a/src/citra_qt/configuration/configure_graphics.cpp b/src/citra_qt/configuration/configure_graphics.cpp index b5a5ab1e1..22a8ca8ae 100644 --- a/src/citra_qt/configuration/configure_graphics.cpp +++ b/src/citra_qt/configuration/configure_graphics.cpp @@ -20,81 +20,11 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent) ConfigureGraphics::~ConfigureGraphics() {} -enum class Resolution : int { - Auto, - Scale1x, - Scale2x, - Scale3x, - Scale4x, - Scale5x, - Scale6x, - Scale7x, - Scale8x, - Scale9x, - Scale10x, -}; - -float ToResolutionFactor(Resolution option) { - switch (option) { - case Resolution::Auto: - return 0.f; - case Resolution::Scale1x: - return 1.f; - case Resolution::Scale2x: - return 2.f; - case Resolution::Scale3x: - return 3.f; - case Resolution::Scale4x: - return 4.f; - case Resolution::Scale5x: - return 5.f; - case Resolution::Scale6x: - return 6.f; - case Resolution::Scale7x: - return 7.f; - case Resolution::Scale8x: - return 8.f; - case Resolution::Scale9x: - return 9.f; - case Resolution::Scale10x: - return 10.f; - } - return 0.f; -} - -Resolution FromResolutionFactor(float factor) { - if (factor == 0.f) { - return Resolution::Auto; - } else if (factor == 1.f) { - return Resolution::Scale1x; - } else if (factor == 2.f) { - return Resolution::Scale2x; - } else if (factor == 3.f) { - return Resolution::Scale3x; - } else if (factor == 4.f) { - return Resolution::Scale4x; - } else if (factor == 5.f) { - return Resolution::Scale5x; - } else if (factor == 6.f) { - return Resolution::Scale6x; - } else if (factor == 7.f) { - return Resolution::Scale7x; - } else if (factor == 8.f) { - return Resolution::Scale8x; - } else if (factor == 9.f) { - return Resolution::Scale9x; - } else if (factor == 10.f) { - return Resolution::Scale10x; - } - return Resolution::Auto; -} - void ConfigureGraphics::setConfiguration() { ui->toggle_hw_renderer->setChecked(Settings::values.use_hw_renderer); ui->resolution_factor_combobox->setEnabled(Settings::values.use_hw_renderer); ui->toggle_shader_jit->setChecked(Settings::values.use_shader_jit); - ui->resolution_factor_combobox->setCurrentIndex( - static_cast(FromResolutionFactor(Settings::values.resolution_factor))); + ui->resolution_factor_combobox->setCurrentIndex(Settings::values.resolution_factor); ui->toggle_vsync->setChecked(Settings::values.use_vsync); ui->toggle_framelimit->setChecked(Settings::values.toggle_framelimit); ui->layout_combobox->setCurrentIndex(static_cast(Settings::values.layout_option)); @@ -105,7 +35,7 @@ void ConfigureGraphics::applyConfiguration() { Settings::values.use_hw_renderer = ui->toggle_hw_renderer->isChecked(); Settings::values.use_shader_jit = ui->toggle_shader_jit->isChecked(); Settings::values.resolution_factor = - ToResolutionFactor(static_cast(ui->resolution_factor_combobox->currentIndex())); + static_cast(ui->resolution_factor_combobox->currentIndex()); Settings::values.use_vsync = ui->toggle_vsync->isChecked(); Settings::values.toggle_framelimit = ui->toggle_framelimit->isChecked(); Settings::values.layout_option = diff --git a/src/common/math_util.h b/src/common/math_util.h index 45a1ed367..fa1d61dac 100644 --- a/src/common/math_util.h +++ b/src/common/math_util.h @@ -29,7 +29,7 @@ struct Rectangle { T right; T bottom; - Rectangle() {} + Rectangle() = default; Rectangle(T left, T top, T right, T bottom) : left(left), top(top), right(right), bottom(bottom) {} diff --git a/src/core/frontend/framebuffer_layout.cpp b/src/core/frontend/framebuffer_layout.cpp index e9f778fcb..7af9556b1 100644 --- a/src/core/frontend/framebuffer_layout.cpp +++ b/src/core/frontend/framebuffer_layout.cpp @@ -16,8 +16,8 @@ static const float TOP_SCREEN_ASPECT_RATIO = static const float BOT_SCREEN_ASPECT_RATIO = static_cast(Core::kScreenBottomHeight) / Core::kScreenBottomWidth; -float FramebufferLayout::GetScalingRatio() const { - return static_cast(top_screen.GetWidth()) / Core::kScreenTopWidth; +u16 FramebufferLayout::GetScalingRatio() const { + return static_cast(((top_screen.GetWidth() - 1) / Core::kScreenTopWidth) + 1); } // Finds the largest size subrectangle contained in window area that is confined to the aspect ratio diff --git a/src/core/frontend/framebuffer_layout.h b/src/core/frontend/framebuffer_layout.h index 4983cf103..0d826be9e 100644 --- a/src/core/frontend/framebuffer_layout.h +++ b/src/core/frontend/framebuffer_layout.h @@ -21,7 +21,7 @@ struct FramebufferLayout { * Returns the ration of pixel size of the top screen, compared to the native size of the 3DS * screen. */ - float GetScalingRatio() const; + u16 GetScalingRatio() const; }; /** diff --git a/src/core/settings.h b/src/core/settings.h index 8d78cb424..34e5914c8 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -95,7 +95,7 @@ struct Values { // Renderer bool use_hw_renderer; bool use_shader_jit; - float resolution_factor; + u16 resolution_factor; bool use_vsync; bool toggle_framelimit; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index f37894e7a..6d47fe29f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -525,12 +525,27 @@ CachedSurface* RasterizerCacheOpenGL::GetTextureSurface( return GetSurface(params, false, true); } +// If the resolution +static u16 GetResolutionScaleFactor() { + return !Settings::values.resolution_factor + ? VideoCore::g_emu_window->GetFramebufferLayout().GetScalingRatio() + : Settings::values.resolution_factor; +} + std::tuple> RasterizerCacheOpenGL::GetFramebufferSurfaces( const Pica::FramebufferRegs::FramebufferConfig& config) { const auto& regs = Pica::g_state.regs; + // update resolution_scale_factor and reset cache if changed + static u16 resolution_scale_factor = GetResolutionScaleFactor(); + if (resolution_scale_factor != GetResolutionScaleFactor()) { + resolution_scale_factor = GetResolutionScaleFactor(); + FlushAll(); + InvalidateRegion(0, 0xffffffff, nullptr); + } + // Make sur that framebuffers don't overlap if both color and depth are being used u32 fb_area = config.GetWidth() * config.GetHeight(); bool framebuffers_overlap = @@ -561,12 +576,6 @@ RasterizerCacheOpenGL::GetFramebufferSurfaces( color_params.height = depth_params.height = config.GetHeight(); color_params.is_tiled = depth_params.is_tiled = true; - // Set the internal resolution, assume the same scaling factor for top and bottom screens - float resolution_scale_factor = Settings::values.resolution_factor; - if (resolution_scale_factor == 0.0f) { - // Auto - scale resolution to the window size - resolution_scale_factor = VideoCore::g_emu_window->GetFramebufferLayout().GetScalingRatio(); - } // Scale the resolution by the specified factor color_params.res_scale_width = resolution_scale_factor; depth_params.res_scale_width = resolution_scale_factor; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 9a61c0cfc..8b7ed0ba3 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -41,7 +41,7 @@ struct LightSrc { }; layout (std140) uniform shader_data { - vec2 framebuffer_scale; + int framebuffer_scale; int alphatest_ref; float depth_scale; float depth_offset; From 13606a6d0b06c003f7bd88f88f001899e2c4e381 Mon Sep 17 00:00:00 2001 From: James Rowe Date: Thu, 23 Nov 2017 10:43:12 -0700 Subject: [PATCH 02/32] Memory: Remove count of cached pages and add InvalidateRegion In a future commit, the count of cached pages will be reintroduced in the actual surface cache. Also adds an Invalidate only to the cache which marks a region as invalid in order to try to avoid a costly flush from 3ds memory --- src/core/hle/kernel/vm_manager.cpp | 1 - src/core/hle/service/gsp/gsp_gpu.cpp | 2 +- src/core/hw/gpu.cpp | 33 +++++++---------- src/core/memory.cpp | 43 +++++++++++----------- src/core/memory.h | 18 ++++----- src/tests/core/arm/arm_test_common.cpp | 1 - src/video_core/rasterizer_interface.h | 3 ++ src/video_core/swrasterizer/swrasterizer.h | 3 +- 8 files changed, 50 insertions(+), 54 deletions(-) diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp index 7a007c065..2d6af0cbb 100644 --- a/src/core/hle/kernel/vm_manager.cpp +++ b/src/core/hle/kernel/vm_manager.cpp @@ -58,7 +58,6 @@ void VMManager::Reset() { page_table.pointers.fill(nullptr); page_table.attributes.fill(Memory::PageType::Unmapped); - page_table.cached_res_count.fill(0); UpdatePageTableForVMA(initial_vma); } diff --git a/src/core/hle/service/gsp/gsp_gpu.cpp b/src/core/hle/service/gsp/gsp_gpu.cpp index bb3cedbe3..ab9879e02 100644 --- a/src/core/hle/service/gsp/gsp_gpu.cpp +++ b/src/core/hle/service/gsp/gsp_gpu.cpp @@ -423,7 +423,7 @@ static void ExecuteCommand(const Command& command, u32 thread_id) { command.dma_request.size, Memory::FlushMode::Flush); Memory::RasterizerFlushVirtualRegion(command.dma_request.dest_address, command.dma_request.size, - Memory::FlushMode::FlushAndInvalidate); + Memory::FlushMode::Invalidate); // TODO(Subv): These memory accesses should not go through the application's memory mapping. // They should go through the GSP module's memory mapping. diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index 350b9a6a2..d65af4aff 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp @@ -96,20 +96,11 @@ static void MemoryFill(const Regs::MemoryFillConfig& config) { u8* start = Memory::GetPhysicalPointer(start_addr); u8* end = Memory::GetPhysicalPointer(end_addr); - // TODO: Consider always accelerating and returning vector of - // regions that the accelerated fill did not cover to - // reduce/eliminate the fill that the cpu has to do. - // This would also mean that the flush below is not needed. - // Fill should first flush all surfaces that touch but are - // not completely within the fill range. - // Then fill all completely covered surfaces, and return the - // regions that were between surfaces or within the touching - // ones for cpu to manually fill here. if (VideoCore::g_renderer->Rasterizer()->AccelerateFill(config)) return; - Memory::RasterizerFlushAndInvalidateRegion(config.GetStartAddress(), - config.GetEndAddress() - config.GetStartAddress()); + Memory::RasterizerInvalidateRegion(config.GetStartAddress(), + config.GetEndAddress() - config.GetStartAddress()); if (config.fill_24bit) { // fill with 24-bit values @@ -199,7 +190,7 @@ static void DisplayTransfer(const Regs::DisplayTransferConfig& config) { u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format); Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), input_size); - Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), output_size); + Memory::RasterizerInvalidateRegion(config.GetPhysicalOutputAddress(), output_size); for (u32 y = 0; y < output_height; ++y) { for (u32 x = 0; x < output_width; ++x) { @@ -363,8 +354,10 @@ static void TextureCopy(const Regs::DisplayTransferConfig& config) { size_t contiguous_output_size = config.texture_copy.size / output_width * (output_width + output_gap); - Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), - static_cast(contiguous_output_size)); + // Only need to flush output if it has a gap + const auto FlushInvalidate_fn = (output_gap != 0) ? Memory::RasterizerFlushAndInvalidateRegion + : Memory::RasterizerInvalidateRegion; + FlushInvalidate_fn(config.GetPhysicalOutputAddress(), static_cast(contiguous_output_size)); u32 remaining_input = input_width; u32 remaining_output = output_width; @@ -446,16 +439,18 @@ inline void Write(u32 addr, const T data) { if (config.is_texture_copy) { TextureCopy(config); - LOG_TRACE(HW_GPU, "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> " - "0x%08X(%u+%u), flags 0x%08X", + LOG_TRACE(HW_GPU, + "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> " + "0x%08X(%u+%u), flags 0x%08X", config.texture_copy.size, config.GetPhysicalInputAddress(), config.texture_copy.input_width * 16, config.texture_copy.input_gap * 16, config.GetPhysicalOutputAddress(), config.texture_copy.output_width * 16, config.texture_copy.output_gap * 16, config.flags); } else { DisplayTransfer(config); - LOG_TRACE(HW_GPU, "DisplayTransfer: 0x%08x(%ux%u)-> " - "0x%08x(%ux%u), dst format %x, flags 0x%08X", + LOG_TRACE(HW_GPU, + "DisplayTransfer: 0x%08x(%ux%u)-> " + "0x%08x(%ux%u), dst format %x, flags 0x%08X", config.GetPhysicalInputAddress(), config.input_width.Value(), config.input_height.Value(), config.GetPhysicalOutputAddress(), config.output_width.Value(), config.output_height.Value(), @@ -570,4 +565,4 @@ void Shutdown() { LOG_DEBUG(HW_GPU, "shutdown OK"); } -} // namespace +} // namespace GPU diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 81e60a66f..9b01bfc8c 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -50,7 +50,6 @@ static void MapPages(PageTable& page_table, u32 base, u32 size, u8* memory, Page page_table.attributes[base] = type; page_table.pointers[base] = memory; - page_table.cached_res_count[base] = 0; base += 1; if (memory != nullptr) @@ -200,7 +199,7 @@ void Write(const VAddr vaddr, const T data) { ASSERT_MSG(false, "Mapped memory page without a pointer @ %08X", vaddr); break; case PageType::RasterizerCachedMemory: { - RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::FlushAndInvalidate); + RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Invalidate); std::memcpy(GetPointerFromVMA(vaddr), &data, sizeof(T)); break; } @@ -208,7 +207,7 @@ void Write(const VAddr vaddr, const T data) { WriteMMIO(GetMMIOHandler(vaddr), vaddr, data); break; case PageType::RasterizerCachedSpecial: { - RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::FlushAndInvalidate); + RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Invalidate); WriteMMIO(GetMMIOHandler(vaddr), vaddr, data); break; } @@ -334,7 +333,7 @@ u8* GetPhysicalPointer(PAddr address) { return target_pointer; } -void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) { +void RasterizerMarkRegionCached(PAddr start, u32 size, bool cached) { if (start == 0) { return; } @@ -355,14 +354,10 @@ void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) { } VAddr vaddr = *maybe_vaddr; - u8& res_count = current_page_table->cached_res_count[vaddr >> PAGE_BITS]; - ASSERT_MSG(count_delta <= UINT8_MAX - res_count, - "Rasterizer resource cache counter overflow!"); - ASSERT_MSG(count_delta >= -res_count, "Rasterizer resource cache counter underflow!"); + PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS]; - // Switch page type to cached if now cached - if (res_count == 0) { - PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS]; + if (cached) { + // Switch page type to cached if now cached switch (page_type) { case PageType::Unmapped: // It is not necessary for a process to have this region mapped into its address @@ -378,13 +373,8 @@ void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) { default: UNREACHABLE(); } - } - - res_count += count_delta; - - // Switch page type to uncached if now uncached - if (res_count == 0) { - PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS]; + } else { + // Switch page type to uncached if now uncached switch (page_type) { case PageType::Unmapped: // It is not necessary for a process to have this region mapped into its address @@ -419,6 +409,12 @@ void RasterizerFlushRegion(PAddr start, u32 size) { } } +void RasterizerInvalidateRegion(PAddr start, u32 size) { + if (VideoCore::g_renderer != nullptr) { + VideoCore::g_renderer->Rasterizer()->InvalidateRegion(start, size); + } +} + void RasterizerFlushAndInvalidateRegion(PAddr start, u32 size) { // Since pages are unmapped on shutdown after video core is shutdown, the renderer may be // null here @@ -450,6 +446,9 @@ void RasterizerFlushVirtualRegion(VAddr start, u32 size, FlushMode mode) { case FlushMode::Flush: rasterizer->FlushRegion(physical_start, overlap_size); break; + case FlushMode::Invalidate: + rasterizer->InvalidateRegion(physical_start, overlap_size); + break; case FlushMode::FlushAndInvalidate: rasterizer->FlushAndInvalidateRegion(physical_start, overlap_size); break; @@ -588,7 +587,7 @@ void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const voi } case PageType::RasterizerCachedMemory: { RasterizerFlushVirtualRegion(current_vaddr, static_cast(copy_amount), - FlushMode::FlushAndInvalidate); + FlushMode::Invalidate); std::memcpy(GetPointerFromVMA(process, current_vaddr), src_buffer, copy_amount); break; } @@ -596,7 +595,7 @@ void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const voi MMIORegionPointer handler = GetMMIOHandler(page_table, current_vaddr); DEBUG_ASSERT(handler); RasterizerFlushVirtualRegion(current_vaddr, static_cast(copy_amount), - FlushMode::FlushAndInvalidate); + FlushMode::Invalidate); handler->WriteBlock(current_vaddr, src_buffer, copy_amount); break; } @@ -648,7 +647,7 @@ void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const size } case PageType::RasterizerCachedMemory: { RasterizerFlushVirtualRegion(current_vaddr, static_cast(copy_amount), - FlushMode::FlushAndInvalidate); + FlushMode::Invalidate); std::memset(GetPointerFromVMA(process, current_vaddr), 0, copy_amount); break; } @@ -656,7 +655,7 @@ void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const size MMIORegionPointer handler = GetMMIOHandler(page_table, current_vaddr); DEBUG_ASSERT(handler); RasterizerFlushVirtualRegion(current_vaddr, static_cast(copy_amount), - FlushMode::FlushAndInvalidate); + FlushMode::Invalidate); handler->WriteBlock(current_vaddr, zeros.data(), copy_amount); break; } diff --git a/src/core/memory.h b/src/core/memory.h index 1527ec4c7..4ec5e8d60 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -72,12 +72,6 @@ struct PageTable { * the corresponding entry in `pointers` MUST be set to null. */ std::array attributes; - - /** - * Indicates the number of externally cached resources touching a page that should be - * flushed before the memory is accessed - */ - std::array cached_res_count; }; /// Physical memory regions as seen from the ARM11 @@ -245,16 +239,20 @@ boost::optional PhysicalToVirtualAddress(PAddr addr); u8* GetPhysicalPointer(PAddr address); /** - * Adds the supplied value to the rasterizer resource cache counter of each - * page touching the region. + * Mark each page touching the region as cached. */ -void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta); +void RasterizerMarkRegionCached(PAddr start, u32 size, bool cached); /** * Flushes any externally cached rasterizer resources touching the given region. */ void RasterizerFlushRegion(PAddr start, u32 size); +/** + * Invalidates any externally cached rasterizer resources touching the given region. + */ +void RasterizerInvalidateRegion(PAddr start, u32 size); + /** * Flushes and invalidates any externally cached rasterizer resources touching the given region. */ @@ -263,6 +261,8 @@ void RasterizerFlushAndInvalidateRegion(PAddr start, u32 size); enum class FlushMode { /// Write back modified surfaces to RAM Flush, + /// Remove region from the cache + Invalidate, /// Write back modified surfaces to RAM, and also remove them from the cache FlushAndInvalidate, }; diff --git a/src/tests/core/arm/arm_test_common.cpp b/src/tests/core/arm/arm_test_common.cpp index 484713a92..8520f53b2 100644 --- a/src/tests/core/arm/arm_test_common.cpp +++ b/src/tests/core/arm/arm_test_common.cpp @@ -20,7 +20,6 @@ TestEnvironment::TestEnvironment(bool mutable_memory_) page_table->pointers.fill(nullptr); page_table->attributes.fill(Memory::PageType::Unmapped); - page_table->cached_res_count.fill(0); Memory::MapIoRegion(*page_table, 0x00000000, 0x80000000, test_memory); Memory::MapIoRegion(*page_table, 0x80000000, 0x80000000, test_memory); diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 8ef7e74c7..1d4c98189 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -38,6 +38,9 @@ public: /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory virtual void FlushRegion(PAddr addr, u32 size) = 0; + /// Notify rasterizer that any caches of the specified region should be invalidated + virtual void InvalidateRegion(PAddr addr, u32 size) = 0; + /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory /// and invalidated virtual void FlushAndInvalidateRegion(PAddr addr, u32 size) = 0; diff --git a/src/video_core/swrasterizer/swrasterizer.h b/src/video_core/swrasterizer/swrasterizer.h index 87e64f8f6..f97fe13b4 100644 --- a/src/video_core/swrasterizer/swrasterizer.h +++ b/src/video_core/swrasterizer/swrasterizer.h @@ -11,7 +11,7 @@ namespace Pica { namespace Shader { struct OutputVertex; } -} +} // namespace Pica namespace VideoCore { @@ -22,6 +22,7 @@ class SWRasterizer : public RasterizerInterface { void NotifyPicaRegisterChanged(u32 id) override {} void FlushAll() override {} void FlushRegion(PAddr addr, u32 size) override {} + void InvalidateRegion(PAddr addr, u32 size) override {} void FlushAndInvalidateRegion(PAddr addr, u32 size) override {} }; From 160ac25527d7a89c966d20e40bb29e9531deb16c Mon Sep 17 00:00:00 2001 From: James Rowe Date: Sat, 25 Nov 2017 13:54:35 -0700 Subject: [PATCH 03/32] OpenGL State: Change setters so they don't directly write to curstate --- .../renderer_opengl/gl_resource_manager.h | 12 +- src/video_core/renderer_opengl/gl_state.cpp | 107 ++++++++++++------ src/video_core/renderer_opengl/gl_state.h | 31 +++-- 3 files changed, 101 insertions(+), 49 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index 13301ec9f..e21972537 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -36,7 +36,7 @@ public: if (handle == 0) return; glDeleteTextures(1, &handle); - OpenGLState::ResetTexture(handle); + OpenGLState::GetCurState().ResetTexture(handle).Apply(); handle = 0; } @@ -69,7 +69,7 @@ public: if (handle == 0) return; glDeleteSamplers(1, &handle); - OpenGLState::ResetSampler(handle); + OpenGLState::GetCurState().ResetSampler(handle).Apply(); handle = 0; } @@ -102,7 +102,7 @@ public: if (handle == 0) return; glDeleteProgram(handle); - OpenGLState::ResetProgram(handle); + OpenGLState::GetCurState().ResetProgram(handle).Apply(); handle = 0; } @@ -135,7 +135,7 @@ public: if (handle == 0) return; glDeleteBuffers(1, &handle); - OpenGLState::ResetBuffer(handle); + OpenGLState::GetCurState().OpenGLState::ResetBuffer(handle).Apply(); handle = 0; } @@ -168,7 +168,7 @@ public: if (handle == 0) return; glDeleteVertexArrays(1, &handle); - OpenGLState::ResetVertexArray(handle); + OpenGLState::GetCurState().ResetVertexArray(handle).Apply(); handle = 0; } @@ -201,7 +201,7 @@ public: if (handle == 0) return; glDeleteFramebuffers(1, &handle); - OpenGLState::ResetFramebuffer(handle); + OpenGLState::GetCurState().ResetFramebuffer(handle).Apply(); handle = 0; } diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 9fa353fe4..76354b842 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -69,6 +69,17 @@ OpenGLState::OpenGLState() { draw.uniform_buffer = 0; draw.shader_program = 0; + scissor.enabled = false; + scissor.x = 0; + scissor.y = 0; + scissor.width = 0; + scissor.height = 0; + + viewport.x = 0; + viewport.y = 0; + viewport.width = 0; + viewport.height = 0; + clip_distance = {}; } @@ -193,7 +204,7 @@ void OpenGLState::Apply() const { // Lighting LUTs if (lighting_lut.texture_buffer != cur_state.lighting_lut.texture_buffer) { glActiveTexture(TextureUnits::LightingLUT.Enum()); - glBindTexture(GL_TEXTURE_BUFFER, cur_state.lighting_lut.texture_buffer); + glBindTexture(GL_TEXTURE_BUFFER, lighting_lut.texture_buffer); } // Fog LUT @@ -260,6 +271,26 @@ void OpenGLState::Apply() const { glUseProgram(draw.shader_program); } + // Scissor test + if (scissor.enabled != cur_state.scissor.enabled) { + if (scissor.enabled) { + glEnable(GL_SCISSOR_TEST); + } else { + glDisable(GL_SCISSOR_TEST); + } + } + + if (scissor.x != cur_state.scissor.x || scissor.y != cur_state.scissor.y || + scissor.width != cur_state.scissor.width || scissor.height != cur_state.scissor.height) { + glScissor(scissor.x, scissor.y, scissor.width, scissor.height); + } + + if (viewport.x != cur_state.viewport.x || viewport.y != cur_state.viewport.y || + viewport.width != cur_state.viewport.width || + viewport.height != cur_state.viewport.height) { + glViewport(viewport.x, viewport.y, viewport.width, viewport.height); + } + // Clip distance for (size_t i = 0; i < clip_distance.size(); ++i) { if (clip_distance[i] != cur_state.clip_distance[i]) { @@ -274,62 +305,68 @@ void OpenGLState::Apply() const { cur_state = *this; } -void OpenGLState::ResetTexture(GLuint handle) { - for (auto& unit : cur_state.texture_units) { +OpenGLState& OpenGLState::ResetTexture(GLuint handle) { + for (auto& unit : texture_units) { if (unit.texture_2d == handle) { unit.texture_2d = 0; } } - if (cur_state.lighting_lut.texture_buffer == handle) - cur_state.lighting_lut.texture_buffer = 0; - if (cur_state.fog_lut.texture_buffer == handle) - cur_state.fog_lut.texture_buffer = 0; - if (cur_state.proctex_noise_lut.texture_buffer == handle) - cur_state.proctex_noise_lut.texture_buffer = 0; - if (cur_state.proctex_color_map.texture_buffer == handle) - cur_state.proctex_color_map.texture_buffer = 0; - if (cur_state.proctex_alpha_map.texture_buffer == handle) - cur_state.proctex_alpha_map.texture_buffer = 0; - if (cur_state.proctex_lut.texture_buffer == handle) - cur_state.proctex_lut.texture_buffer = 0; - if (cur_state.proctex_diff_lut.texture_buffer == handle) - cur_state.proctex_diff_lut.texture_buffer = 0; + if (lighting_lut.texture_buffer == handle) + lighting_lut.texture_buffer = 0; + if (fog_lut.texture_buffer == handle) + fog_lut.texture_buffer = 0; + if (proctex_noise_lut.texture_buffer == handle) + proctex_noise_lut.texture_buffer = 0; + if (proctex_color_map.texture_buffer == handle) + proctex_color_map.texture_buffer = 0; + if (proctex_alpha_map.texture_buffer == handle) + proctex_alpha_map.texture_buffer = 0; + if (proctex_lut.texture_buffer == handle) + proctex_lut.texture_buffer = 0; + if (proctex_diff_lut.texture_buffer == handle) + proctex_diff_lut.texture_buffer = 0; + return *this; } -void OpenGLState::ResetSampler(GLuint handle) { - for (auto& unit : cur_state.texture_units) { +OpenGLState& OpenGLState::ResetSampler(GLuint handle) { + for (auto& unit : texture_units) { if (unit.sampler == handle) { unit.sampler = 0; } } + return *this; } -void OpenGLState::ResetProgram(GLuint handle) { - if (cur_state.draw.shader_program == handle) { - cur_state.draw.shader_program = 0; +OpenGLState& OpenGLState::ResetProgram(GLuint handle) { + if (draw.shader_program == handle) { + draw.shader_program = 0; } + return *this; } -void OpenGLState::ResetBuffer(GLuint handle) { - if (cur_state.draw.vertex_buffer == handle) { - cur_state.draw.vertex_buffer = 0; +OpenGLState& OpenGLState::ResetBuffer(GLuint handle) { + if (draw.vertex_buffer == handle) { + draw.vertex_buffer = 0; } - if (cur_state.draw.uniform_buffer == handle) { - cur_state.draw.uniform_buffer = 0; + if (draw.uniform_buffer == handle) { + draw.uniform_buffer = 0; } + return *this; } -void OpenGLState::ResetVertexArray(GLuint handle) { - if (cur_state.draw.vertex_array == handle) { - cur_state.draw.vertex_array = 0; +OpenGLState& OpenGLState::ResetVertexArray(GLuint handle) { + if (draw.vertex_array == handle) { + draw.vertex_array = 0; } + return *this; } -void OpenGLState::ResetFramebuffer(GLuint handle) { - if (cur_state.draw.read_framebuffer == handle) { - cur_state.draw.read_framebuffer = 0; +OpenGLState& OpenGLState::ResetFramebuffer(GLuint handle) { + if (draw.read_framebuffer == handle) { + draw.read_framebuffer = 0; } - if (cur_state.draw.draw_framebuffer == handle) { - cur_state.draw.draw_framebuffer = 0; + if (draw.draw_framebuffer == handle) { + draw.draw_framebuffer = 0; } + return *this; } diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 437fe34c4..703aee90e 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -124,25 +124,40 @@ public: GLuint shader_program; // GL_CURRENT_PROGRAM } draw; + struct { + bool enabled; // GL_SCISSOR_TEST + GLint x; + GLint y; + GLsizei width; + GLsizei height; + } scissor; + + struct { + GLint x; + GLint y; + GLsizei width; + GLsizei height; + } viewport; + std::array clip_distance; // GL_CLIP_DISTANCE OpenGLState(); /// Get the currently active OpenGL state - static const OpenGLState& GetCurState() { + static OpenGLState& GetCurState() { return cur_state; } /// Apply this state as the current OpenGL state void Apply() const; - /// Resets and unbinds any references to the given resource in the current OpenGL state - static void ResetTexture(GLuint handle); - static void ResetSampler(GLuint handle); - static void ResetProgram(GLuint handle); - static void ResetBuffer(GLuint handle); - static void ResetVertexArray(GLuint handle); - static void ResetFramebuffer(GLuint handle); + /// Resets any references to the given resource + OpenGLState& ResetTexture(GLuint handle); + OpenGLState& ResetSampler(GLuint handle); + OpenGLState& ResetProgram(GLuint handle); + OpenGLState& ResetBuffer(GLuint handle); + OpenGLState& ResetVertexArray(GLuint handle); + OpenGLState& ResetFramebuffer(GLuint handle); private: static OpenGLState cur_state; From e9e2d444ef4ce27427c5629051457bdf9ae955ca Mon Sep 17 00:00:00 2001 From: James Rowe Date: Thu, 16 Nov 2017 21:20:50 -0700 Subject: [PATCH 04/32] OpenGL Cache: Optimize Morton Copy to copy in tiles Compiles two lookup arrays of functions for the different configurations of Morton Copy. --- .../renderer_opengl/gl_rasterizer_cache.cpp | 240 ++++++++++++++---- .../renderer_opengl/gl_rasterizer_cache.h | 6 +- src/video_core/utils.h | 6 +- 3 files changed, 202 insertions(+), 50 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 6d47fe29f..7f9d74788 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -46,6 +46,82 @@ static const std::array depth_format_tuples = {{ {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24S8 }}; +static bool FillSurface(const Surface& surface, const u8* fill_data, + const MathUtil::Rectangle& fill_rect) { + OpenGLState state = OpenGLState::GetCurState(); + + OpenGLState prev_state = state; + SCOPE_EXIT({ prev_state.Apply(); }); + + OpenGLState::ResetTexture(surface->texture.handle); + + state.scissor.enabled = true; + state.scissor.x = static_cast(fill_rect.left); + state.scissor.y = static_cast(std::min(fill_rect.top, fill_rect.bottom)); + state.scissor.width = static_cast(fill_rect.GetWidth()); + state.scissor.height = static_cast(fill_rect.GetHeight()); + + state.draw.draw_framebuffer = transfer_framebuffers[1].handle; + state.Apply(); + + if (surface->type == SurfaceType::Color || surface->type == SurfaceType::Texture) { + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + surface->texture.handle, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + + Pica::Texture::TextureInfo tex_info{}; + tex_info.format = static_cast(surface->pixel_format); + Math::Vec4 color = Pica::Texture::LookupTexture(fill_data, 0, 0, tex_info); + + std::array color_values = {color.x / 255.f, color.y / 255.f, color.z / 255.f, + color.w / 255.f}; + + state.color_mask.red_enabled = GL_TRUE; + state.color_mask.green_enabled = GL_TRUE; + state.color_mask.blue_enabled = GL_TRUE; + state.color_mask.alpha_enabled = GL_TRUE; + state.Apply(); + glClearBufferfv(GL_COLOR, 0, &color_values[0]); + } else if (surface->type == SurfaceType::Depth) { + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, + surface->texture.handle, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + + u32 value_32bit = 0; + GLfloat value_float; + + if (surface->pixel_format == SurfaceParams::PixelFormat::D16) { + std::memcpy(&value_32bit, fill_data, 2); + value_float = value_32bit / 65535.0f; // 2^16 - 1 + } else if (surface->pixel_format == SurfaceParams::PixelFormat::D24) { + std::memcpy(&value_32bit, fill_data, 3); + value_float = value_32bit / 16777215.0f; // 2^24 - 1 + } + + state.depth.write_mask = GL_TRUE; + state.Apply(); + glClearBufferfv(GL_DEPTH, 0, &value_float); + } else if (surface->type == SurfaceType::DepthStencil) { + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + surface->texture.handle, 0); + + u32 value_32bit; + std::memcpy(&value_32bit, fill_data, 4); + + GLfloat value_float = (value_32bit & 0xFFFFFF) / 16777215.0f; // 2^24 - 1 + GLint value_int = (value_32bit >> 24); + + state.depth.write_mask = GL_TRUE; + state.stencil.write_mask = -1; + state.Apply(); + glClearBufferfi(GL_DEPTH_STENCIL, 0, value_float, value_int); + } + return true; +} + RasterizerCacheOpenGL::RasterizerCacheOpenGL() { transfer_framebuffers[0].Create(); transfer_framebuffers[1].Create(); @@ -55,55 +131,131 @@ RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { FlushAll(); } -static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width, u32 height, - u32 bytes_per_pixel, u32 gl_bytes_per_pixel, u8* morton_data, - u8* gl_data, bool morton_to_gl) { - using PixelFormat = CachedSurface::PixelFormat; - - u8* data_ptrs[2]; - u32 depth_stencil_shifts[2] = {24, 8}; - - if (morton_to_gl) { - std::swap(depth_stencil_shifts[0], depth_stencil_shifts[1]); - } - - if (pixel_format == PixelFormat::D24S8) { - for (unsigned y = 0; y < height; ++y) { - for (unsigned x = 0; x < width; ++x) { - const u32 coarse_y = y & ~7; - u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + - coarse_y * width * bytes_per_pixel; - u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel; - - data_ptrs[morton_to_gl] = morton_data + morton_offset; - data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index]; - - // Swap depth and stencil value ordering since 3DS does not match OpenGL - u32 depth_stencil; - memcpy(&depth_stencil, data_ptrs[1], sizeof(u32)); - depth_stencil = (depth_stencil << depth_stencil_shifts[0]) | - (depth_stencil >> depth_stencil_shifts[1]); - - memcpy(data_ptrs[0], &depth_stencil, sizeof(u32)); - } - } - } else { - for (unsigned y = 0; y < height; ++y) { - for (unsigned x = 0; x < width; ++x) { - const u32 coarse_y = y & ~7; - u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + - coarse_y * width * bytes_per_pixel; - u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel; - - data_ptrs[morton_to_gl] = morton_data + morton_offset; - data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index]; - - memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel); +template +static void MortonCopyTile(u32 stride, u8* tile_buffer, u8* gl_buffer) { + constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8; + constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); + for (u32 y = 0; y < 8; ++y) { + for (u32 x = 0; x < 8; ++x) { + u8* tile_ptr = tile_buffer + VideoCore::MortonInterleave(x, y) * bytes_per_pixel; + u8* gl_ptr = gl_buffer + ((7 - y) * stride + x) * gl_bytes_per_pixel; + if (morton_to_gl) { + if (format == PixelFormat::D24S8) { + gl_ptr[0] = tile_ptr[3]; + std::memcpy(gl_ptr + 1, tile_ptr, 3); + } else { + std::memcpy(gl_ptr, tile_ptr, bytes_per_pixel); + } + } else { + if (format == PixelFormat::D24S8) { + std::memcpy(tile_ptr, gl_ptr + 1, 3); + tile_ptr[3] = gl_ptr[0]; + } else { + std::memcpy(tile_ptr, gl_ptr, bytes_per_pixel); + } } } } } +template +static void MortonCopy(u32 stride, u32 height, u8* gl_buffer, PAddr base, PAddr start, PAddr end) { + constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8; + constexpr u32 tile_size = bytes_per_pixel * 64; + + constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); + static_assert(gl_bytes_per_pixel >= bytes_per_pixel, ""); + gl_buffer += gl_bytes_per_pixel - bytes_per_pixel; + + const PAddr aligned_down_start = base + Common::AlignDown(start - base, tile_size); + const PAddr aligned_start = base + Common::AlignUp(start - base, tile_size); + const PAddr aligned_end = base + Common::AlignDown(end - base, tile_size); + + ASSERT(!morton_to_gl || (aligned_start == start && aligned_end == end)); + + const u32 begin_pixel_index = (aligned_down_start - base) / bytes_per_pixel; + u32 x = (begin_pixel_index % (stride * 8)) / 8; + u32 y = (begin_pixel_index / (stride * 8)) * 8; + + gl_buffer += ((height - 8 - y) * stride + x) * gl_bytes_per_pixel; + + auto glbuf_next_tile = [&] { + x = (x + 8) % stride; + gl_buffer += 8 * gl_bytes_per_pixel; + if (!x) { + y += 8; + gl_buffer -= stride * 9 * gl_bytes_per_pixel; + } + }; + + u8* tile_buffer = Memory::GetPhysicalPointer(start); + + if (start < aligned_start && !morton_to_gl) { + std::array tmp_buf; + MortonCopyTile(stride, &tmp_buf[0], gl_buffer); + std::memcpy(tile_buffer, &tmp_buf[start - aligned_down_start], + std::min(aligned_start, end) - start); + + tile_buffer += aligned_start - start; + glbuf_next_tile(); + } + + u8* const buffer_end = tile_buffer + aligned_end - aligned_start; + while (tile_buffer < buffer_end) { + MortonCopyTile(stride, tile_buffer, gl_buffer); + tile_buffer += tile_size; + glbuf_next_tile(); + } + + if (end > std::max(aligned_start, aligned_end) && !morton_to_gl) { + std::array tmp_buf; + MortonCopyTile(stride, &tmp_buf[0], gl_buffer); + std::memcpy(tile_buffer, &tmp_buf[0], end - aligned_end); + } +} + +static constexpr std::array morton_to_gl_fns = { + MortonCopy, // 0 + MortonCopy, // 1 + MortonCopy, // 2 + MortonCopy, // 3 + MortonCopy, // 4 + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, // 5 - 13 + MortonCopy, // 14 + nullptr, // 15 + MortonCopy, // 16 + MortonCopy // 17 +}; + +static constexpr std::array gl_to_morton_fns = { + MortonCopy, // 0 + MortonCopy, // 1 + MortonCopy, // 2 + MortonCopy, // 3 + MortonCopy, // 4 + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, // 5 - 13 + MortonCopy, // 14 + nullptr, // 15 + MortonCopy, // 16 + MortonCopy // 17 +}; + void RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, CachedSurface::SurfaceType type, const MathUtil::Rectangle& src_rect, diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index cc16aeba0..9c2abcd84 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -71,8 +71,8 @@ struct CachedSurface { Invalid = 4, }; - static unsigned int GetFormatBpp(CachedSurface::PixelFormat format) { - static const std::array bpp_table = { + static constexpr unsigned int GetFormatBpp(CachedSurface::PixelFormat format) { + constexpr std::array bpp_table = { 32, // RGBA8 24, // RGB8 16, // RGB5A1 @@ -142,7 +142,7 @@ struct CachedSurface { return false; } - static SurfaceType GetFormatType(PixelFormat pixel_format) { + static constexpr SurfaceType GetFormatType(PixelFormat pixel_format) { if ((unsigned int)pixel_format < 5) { return SurfaceType::Color; } diff --git a/src/video_core/utils.h b/src/video_core/utils.h index d8567f314..aa4e1bd38 100644 --- a/src/video_core/utils.h +++ b/src/video_core/utils.h @@ -9,9 +9,9 @@ namespace VideoCore { // 8x8 Z-Order coordinate from 2D coordinates -static inline u32 MortonInterleave(u32 x, u32 y) { - static const u32 xlut[] = {0x00, 0x01, 0x04, 0x05, 0x10, 0x11, 0x14, 0x15}; - static const u32 ylut[] = {0x00, 0x02, 0x08, 0x0a, 0x20, 0x22, 0x28, 0x2a}; +static constexpr u32 MortonInterleave(u32 x, u32 y) { + constexpr u32 xlut[] = {0x00, 0x01, 0x04, 0x05, 0x10, 0x11, 0x14, 0x15}; + constexpr u32 ylut[] = {0x00, 0x02, 0x08, 0x0a, 0x20, 0x22, 0x28, 0x2a}; return xlut[x % 8] + ylut[y % 8]; } From 0b98b768f5cd194467ba25a7074af87da2b180a5 Mon Sep 17 00:00:00 2001 From: James Rowe Date: Mon, 13 Nov 2017 22:37:49 -0700 Subject: [PATCH 05/32] OpenGL Cache: Add surface utility functions Separates creating and filling surfaces into static functions that can be reused from the different RasterizerCache methods. --- .../renderer_opengl/gl_rasterizer_cache.cpp | 87 ++++++++++--------- 1 file changed, 46 insertions(+), 41 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 7f9d74788..e37da1d8e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -25,6 +25,11 @@ #include "video_core/utils.h" #include "video_core/video_core.h" +using SurfaceType = SurfaceParams::SurfaceType; +using PixelFormat = SurfaceParams::PixelFormat; + +static std::array transfer_framebuffers; + struct FormatTuple { GLint internal_format; GLenum format; @@ -46,6 +51,27 @@ static const std::array depth_format_tuples = {{ {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24S8 }}; +static constexpr FormatTuple tex_tuple = {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}; + +static const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { + const SurfaceType type = SurfaceParams::GetFormatType(pixel_format); + if (type == SurfaceType::Color) { + ASSERT((size_t)pixel_format < fb_format_tuples.size()); + return fb_format_tuples[(unsigned int)pixel_format]; + } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) { + size_t tuple_idx = (size_t)pixel_format - 14; + ASSERT(tuple_idx < depth_format_tuples.size()); + return depth_format_tuples[tuple_idx]; + } else { + return tex_tuple; + } +} + +template +constexpr auto RangeFromInterval(Map& map, const Interval& interval) { + return boost::make_iterator_range(map.equal_range(interval)); +} + static bool FillSurface(const Surface& surface, const u8* fill_data, const MathUtil::Rectangle& fill_rect) { OpenGLState state = OpenGLState::GetCurState(); @@ -53,11 +79,11 @@ static bool FillSurface(const Surface& surface, const u8* fill_data, OpenGLState prev_state = state; SCOPE_EXIT({ prev_state.Apply(); }); - OpenGLState::ResetTexture(surface->texture.handle); + state.ResetTexture(surface->texture.handle); state.scissor.enabled = true; state.scissor.x = static_cast(fill_rect.left); - state.scissor.y = static_cast(std::min(fill_rect.top, fill_rect.bottom)); + state.scissor.y = static_cast(fill_rect.bottom); state.scissor.width = static_cast(fill_rect.GetWidth()); state.scissor.height = static_cast(fill_rect.GetHeight()); @@ -256,24 +282,22 @@ static constexpr std::array gl MortonCopy // 17 }; -void RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, - CachedSurface::SurfaceType type, - const MathUtil::Rectangle& src_rect, - const MathUtil::Rectangle& dst_rect) { - using SurfaceType = CachedSurface::SurfaceType; +static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle& src_rect, GLuint dst_tex, + const MathUtil::Rectangle& dst_rect, SurfaceType type) { + OpenGLState state = OpenGLState::GetCurState(); - OpenGLState cur_state = OpenGLState::GetCurState(); + OpenGLState prev_state = state; + SCOPE_EXIT({ prev_state.Apply(); }); // Make sure textures aren't bound to texture units, since going to bind them to framebuffer // components - OpenGLState::ResetTexture(src_tex); - OpenGLState::ResetTexture(dst_tex); + state.ResetTexture(src_tex); + state.ResetTexture(dst_tex); // Keep track of previous framebuffer bindings - GLuint old_fbs[2] = {cur_state.draw.read_framebuffer, cur_state.draw.draw_framebuffer}; - cur_state.draw.read_framebuffer = transfer_framebuffers[0].handle; - cur_state.draw.draw_framebuffer = transfer_framebuffers[1].handle; - cur_state.Apply(); + state.draw.read_framebuffer = transfer_framebuffers[0].handle; + state.draw.draw_framebuffer = transfer_framebuffers[1].handle; + state.Apply(); u32 buffers = 0; @@ -311,14 +335,11 @@ void RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; } - glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left, - dst_rect.top, dst_rect.right, dst_rect.bottom, buffers, + glBlitFramebuffer(src_rect.left, src_rect.bottom, src_rect.right, src_rect.top, dst_rect.left, + dst_rect.bottom, dst_rect.right, dst_rect.top, buffers, buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST); - // Restore previous framebuffer bindings - cur_state.draw.read_framebuffer = old_fbs[0]; - cur_state.draw.draw_framebuffer = old_fbs[1]; - cur_state.Apply(); + return true; } bool RasterizerCacheOpenGL::TryBlitSurfaces(CachedSurface* src_surface, @@ -336,11 +357,9 @@ bool RasterizerCacheOpenGL::TryBlitSurfaces(CachedSurface* src_surface, return true; } -static void AllocateSurfaceTexture(GLuint texture, CachedSurface::PixelFormat pixel_format, - u32 width, u32 height) { - // Allocate an uninitialized texture of appropriate size and format for the surface - using SurfaceType = CachedSurface::SurfaceType; - +// Allocate an uninitialized texture of appropriate size and format for the surface +static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tuple, u32 width, + u32 height) { OpenGLState cur_state = OpenGLState::GetCurState(); // Keep track of previous texture bindings @@ -349,22 +368,8 @@ static void AllocateSurfaceTexture(GLuint texture, CachedSurface::PixelFormat pi cur_state.Apply(); glActiveTexture(GL_TEXTURE0); - SurfaceType type = CachedSurface::GetFormatType(pixel_format); - - FormatTuple tuple; - if (type == SurfaceType::Color) { - ASSERT((size_t)pixel_format < fb_format_tuples.size()); - tuple = fb_format_tuples[(unsigned int)pixel_format]; - } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) { - size_t tuple_idx = (size_t)pixel_format - 14; - ASSERT(tuple_idx < depth_format_tuples.size()); - tuple = depth_format_tuples[tuple_idx]; - } else { - tuple = {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}; - } - - glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, width, height, 0, tuple.format, - tuple.type, nullptr); + glTexImage2D(GL_TEXTURE_2D, 0, format_tuple.internal_format, width, height, 0, + format_tuple.format, format_tuple.type, nullptr); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); From 3e1cbb7d14a17425dc1b2abc5f71b20050a2afb1 Mon Sep 17 00:00:00 2001 From: James Rowe Date: Fri, 17 Nov 2017 10:27:51 -0700 Subject: [PATCH 06/32] OpenGL Cache: Split CachedSurface Breaks CachedSurface into two classes, the parameters used to create or find a cached surface, and the actual cached surface. This also adds a few helper methods for getting surfaces from cache --- .../renderer_opengl/gl_rasterizer_cache.cpp | 781 ++++++++++++------ .../renderer_opengl/gl_rasterizer_cache.h | 156 +++- src/video_core/renderer_opengl/gl_state.h | 2 +- 3 files changed, 649 insertions(+), 290 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index e37da1d8e..756a76ea6 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -342,6 +342,231 @@ static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle& src_rec return true; } +SurfaceParams SurfaceParams::FromInterval(SurfaceInterval interval) const { + SurfaceParams params = *this; + + const u32 stride_tiled_bytes = BytesInPixels(stride * (is_tiled ? 8 : 1)); + PAddr aligned_start = + addr + Common::AlignDown(boost::icl::first(interval) - addr, stride_tiled_bytes); + PAddr aligned_end = + addr + Common::AlignUp(boost::icl::last_next(interval) - addr, stride_tiled_bytes); + + if (aligned_end - aligned_start > stride_tiled_bytes) { + params.addr = aligned_start; + params.height = (aligned_end - aligned_start) / BytesInPixels(stride); + } else { + // 1 row + ASSERT(aligned_end - aligned_start == stride_tiled_bytes); + const u32 tiled_alignment = BytesInPixels(is_tiled ? 8 * 8 : 1); + aligned_start = + addr + Common::AlignDown(boost::icl::first(interval) - addr, tiled_alignment); + aligned_end = + addr + Common::AlignUp(boost::icl::last_next(interval) - addr, tiled_alignment); + params.addr = aligned_start; + params.width = PixelsInBytes(aligned_end - aligned_start) / (is_tiled ? 8 : 1); + params.height = is_tiled ? 8 : 1; + } + params.UpdateParams(); + + return params; +} + +SurfaceInterval SurfaceParams::GetSubRectInterval(MathUtil::Rectangle unscaled_rect) const { + if (unscaled_rect.GetHeight() == 0 || unscaled_rect.GetWidth() == 0) { + return {}; + } + + if (unscaled_rect.bottom > unscaled_rect.top) { + std::swap(unscaled_rect.top, unscaled_rect.bottom); + } + + if (is_tiled) { + unscaled_rect.left = Common::AlignDown(unscaled_rect.left, 8) * 8; + unscaled_rect.bottom = Common::AlignDown(unscaled_rect.bottom, 8) / 8; + unscaled_rect.right = Common::AlignUp(unscaled_rect.right, 8) * 8; + unscaled_rect.top = Common::AlignUp(unscaled_rect.top, 8) / 8; + } + + const u32 stride_tiled = (!is_tiled ? stride : stride * 8); + + const u32 pixel_offset = + stride_tiled * (!is_tiled ? unscaled_rect.bottom : (height / 8) - unscaled_rect.top) + + unscaled_rect.left; + + const u32 pixels = (unscaled_rect.GetHeight() - 1) * stride_tiled + unscaled_rect.GetWidth(); + + return {addr + BytesInPixels(pixel_offset), addr + BytesInPixels(pixel_offset + pixels)}; +} + +MathUtil::Rectangle SurfaceParams::GetSubRect(const SurfaceParams& sub_surface) const { + const u32 begin_pixel_index = PixelsInBytes(sub_surface.addr - addr); + + if (is_tiled) { + const int x0 = (begin_pixel_index % (stride * 8)) / 8; + const int y0 = (begin_pixel_index / (stride * 8)) * 8; + return MathUtil::Rectangle(x0, height - y0, x0 + sub_surface.width, + height - (y0 + sub_surface.height)); // Top to bottom + } + + const int x0 = begin_pixel_index % stride; + const int y0 = begin_pixel_index / stride; + return MathUtil::Rectangle(x0, y0 + sub_surface.height, x0 + sub_surface.width, + y0); // Bottom to top +} + +MathUtil::Rectangle SurfaceParams::GetScaledSubRect(const SurfaceParams& sub_surface) const { + auto rect = GetSubRect(sub_surface); + rect.left = rect.left * res_scale; + rect.right = rect.right * res_scale; + rect.top = rect.top * res_scale; + rect.bottom = rect.bottom * res_scale; + return rect; +} + +bool SurfaceParams::ExactMatch(const SurfaceParams& other_surface) const { + return (other_surface.addr == addr && other_surface.width == width && + other_surface.height == height && other_surface.stride == stride && + other_surface.pixel_format == pixel_format && other_surface.is_tiled == is_tiled); +} + +bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const { + if (sub_surface.addr < addr || sub_surface.end > end || sub_surface.stride != stride || + sub_surface.pixel_format != pixel_format || sub_surface.is_tiled != is_tiled || + (sub_surface.addr - addr) * 8 % GetFormatBpp() != 0) + return false; + + auto rect = GetSubRect(sub_surface); + + if (rect.left + sub_surface.width > stride) { + return false; + } + + if (is_tiled) { + return PixelsInBytes(sub_surface.addr - addr) % 64 == 0 && sub_surface.height % 8 == 0 && + sub_surface.width % 8 == 0; + } + + return true; +} + +bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const { + if (pixel_format == PixelFormat::Invalid || pixel_format != expanded_surface.pixel_format || + is_tiled != expanded_surface.is_tiled || addr > expanded_surface.end || + expanded_surface.addr > end || stride != expanded_surface.stride) + return false; + + const u32 byte_offset = + std::max(expanded_surface.addr, addr) - std::min(expanded_surface.addr, addr); + + const int x0 = byte_offset % BytesInPixels(stride); + const int y0 = byte_offset / BytesInPixels(stride); + + return x0 == 0 && (!is_tiled || y0 % 8 == 0); +} + +bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const { + if (pixel_format == PixelFormat::Invalid || addr > texcopy_params.addr || + end < texcopy_params.end || ((texcopy_params.addr - addr) * 8) % GetFormatBpp() != 0 || + (texcopy_params.width * 8) % GetFormatBpp() != 0 || + (texcopy_params.stride * 8) % GetFormatBpp() != 0) + return false; + + const u32 begin_pixel_index = PixelsInBytes(texcopy_params.addr - addr); + const int x0 = begin_pixel_index % stride; + const int y0 = begin_pixel_index / stride; + + if (!is_tiled) + return ((texcopy_params.height == 1 || PixelsInBytes(texcopy_params.stride) == stride) && + x0 + PixelsInBytes(texcopy_params.width) <= stride); + + return (PixelsInBytes(texcopy_params.addr - addr) % 64 == 0 && + PixelsInBytes(texcopy_params.width) % 64 == 0 && + (texcopy_params.height == 1 || PixelsInBytes(texcopy_params.stride) == stride * 8) && + x0 + PixelsInBytes(texcopy_params.width / 8) <= stride); +} + +bool CachedSurface::CanFill(const SurfaceParams& dest_surface, + SurfaceInterval fill_interval) const { + if (type == SurfaceType::Fill && IsRegionValid(fill_interval) && + boost::icl::first(fill_interval) >= addr && + boost::icl::last_next(fill_interval) <= end && // dest_surface is within our fill range + dest_surface.FromInterval(fill_interval).GetInterval() == + fill_interval) { // make sure interval is a rectangle in dest surface + if (fill_size * 8 != dest_surface.GetFormatBpp()) { + // Check if bits repeat for our fill_size + const u32 dest_bytes_per_pixel = std::max(dest_surface.GetFormatBpp() / 8, 1u); + std::vector fill_test(fill_size * dest_bytes_per_pixel); + + for (u32 i = 0; i < dest_bytes_per_pixel; ++i) + std::memcpy(&fill_test[i * fill_size], &fill_data[0], fill_size); + + for (u32 i = 0; i < fill_size; ++i) + if (std::memcmp(&fill_test[dest_bytes_per_pixel * i], &fill_test[0], + dest_bytes_per_pixel) != 0) + return false; + + if (dest_surface.GetFormatBpp() == 4 && (fill_test[0] & 0xF) != (fill_test[0] >> 4)) + return false; + } + return true; + } + return false; +} + +bool CachedSurface::CanCopy(const SurfaceParams& dest_surface, + SurfaceInterval copy_interval) const { + SurfaceParams subrect_params = dest_surface.FromInterval(copy_interval); + ASSERT(subrect_params.GetInterval() == copy_interval); + if (CanSubRect(subrect_params)) + return true; + + if (CanFill(dest_surface, copy_interval)) + return true; + + return false; +} + +SurfaceInterval SurfaceParams::GetCopyableInterval(const Surface& src_surface) const { + SurfaceInterval result{}; + const auto valid_regions = + SurfaceRegions(GetInterval() & src_surface->GetInterval()) - src_surface->invalid_regions; + for (auto& valid_interval : valid_regions) { + const SurfaceInterval aligned_interval{ + addr + Common::AlignUp(boost::icl::first(valid_interval) - addr, + BytesInPixels(is_tiled ? 8 * 8 : 1)), + addr + Common::AlignDown(boost::icl::last_next(valid_interval) - addr, + BytesInPixels(is_tiled ? 8 * 8 : 1))}; + + if (BytesInPixels(is_tiled ? 8 * 8 : 1) > boost::icl::length(valid_interval) || + boost::icl::length(aligned_interval) == 0) { + continue; + } + + // Get the rectangle within aligned_interval + const u32 stride_bytes = BytesInPixels(stride) * (is_tiled ? 8 : 1); + SurfaceInterval rect_interval{ + addr + Common::AlignUp(boost::icl::first(aligned_interval) - addr, stride_bytes), + addr + Common::AlignDown(boost::icl::last_next(aligned_interval) - addr, stride_bytes), + }; + if (boost::icl::first(rect_interval) > boost::icl::last_next(rect_interval)) { + // 1 row + rect_interval = aligned_interval; + } else if (boost::icl::length(rect_interval) == 0) { + // 2 rows that do not make a rectangle, return the larger one + const SurfaceInterval row1{boost::icl::first(aligned_interval), + boost::icl::first(rect_interval)}; + const SurfaceInterval row2{boost::icl::first(rect_interval), + boost::icl::last_next(aligned_interval)}; + rect_interval = (boost::icl::length(row1) > boost::icl::length(row2)) ? row1 : row2; + } + + if (boost::icl::length(rect_interval) > boost::icl::length(result)) { + result = rect_interval; + } + } + return result; +} + bool RasterizerCacheOpenGL::TryBlitSurfaces(CachedSurface* src_surface, const MathUtil::Rectangle& src_rect, CachedSurface* dst_surface, @@ -381,201 +606,61 @@ static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tup cur_state.Apply(); } -MICROPROFILE_DEFINE(OpenGL_SurfaceUpload, "OpenGL", "Surface Upload", MP_RGB(128, 64, 192)); -CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bool match_res_scale, - bool load_if_create) { - using PixelFormat = CachedSurface::PixelFormat; - using SurfaceType = CachedSurface::SurfaceType; +MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192)); +void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) { + ASSERT(type != SurfaceType::Fill); - if (params.addr == 0) { - return nullptr; + const u8* const texture_src_data = Memory::GetPhysicalPointer(addr); + if (texture_src_data == nullptr) + return; + + if (gl_buffer == nullptr) { + gl_buffer_size = width * height * GetGLBytesPerPixel(pixel_format); + gl_buffer.reset(new u8[gl_buffer_size]); } - u32 params_size = - params.width * params.height * CachedSurface::GetFormatBpp(params.pixel_format) / 8; + // TODO: Should probably be done in ::Memory:: and check for other regions too + if (load_start < Memory::VRAM_VADDR_END && load_end > Memory::VRAM_VADDR_END) + load_end = Memory::VRAM_VADDR_END; - // Check for an exact match in existing surfaces - CachedSurface* best_exact_surface = nullptr; - float exact_surface_goodness = -1.f; + if (load_start < Memory::VRAM_VADDR && load_end > Memory::VRAM_VADDR) + load_start = Memory::VRAM_VADDR; - auto surface_interval = - boost::icl::interval::right_open(params.addr, params.addr + params_size); - auto range = surface_cache.equal_range(surface_interval); - for (auto it = range.first; it != range.second; ++it) { - for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) { - CachedSurface* surface = it2->get(); + MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); - // Check if the request matches the surface exactly - if (params.addr == surface->addr && params.width == surface->width && - params.height == surface->height && params.pixel_format == surface->pixel_format) { - // Make sure optional param-matching criteria are fulfilled - bool tiling_match = (params.is_tiled == surface->is_tiled); - bool res_scale_match = (params.res_scale_width == surface->res_scale_width && - params.res_scale_height == surface->res_scale_height); - if (!match_res_scale || res_scale_match) { - // Prioritize same-tiling and highest resolution surfaces - float match_goodness = - (float)tiling_match + surface->res_scale_width * surface->res_scale_height; - if (match_goodness > exact_surface_goodness || surface->dirty) { - exact_surface_goodness = match_goodness; - best_exact_surface = surface; - } - } - } - } - } + ASSERT(load_start >= addr && load_end <= end); + const u32 start_offset = load_start - addr; - // Return the best exact surface if found - if (best_exact_surface != nullptr) { - return best_exact_surface; - } - - // No matching surfaces found, so create a new one - u8* texture_src_data = Memory::GetPhysicalPointer(params.addr); - if (texture_src_data == nullptr) { - return nullptr; - } - - MICROPROFILE_SCOPE(OpenGL_SurfaceUpload); - - // Stride only applies to linear images. - ASSERT(params.pixel_stride == 0 || !params.is_tiled); - - std::shared_ptr new_surface = std::make_shared(); - - new_surface->addr = params.addr; - new_surface->size = params_size; - - new_surface->texture.Create(); - new_surface->width = params.width; - new_surface->height = params.height; - new_surface->pixel_stride = params.pixel_stride; - new_surface->res_scale_width = params.res_scale_width; - new_surface->res_scale_height = params.res_scale_height; - - new_surface->is_tiled = params.is_tiled; - new_surface->pixel_format = params.pixel_format; - new_surface->dirty = false; - - if (!load_if_create) { - // Don't load any data; just allocate the surface's texture - AllocateSurfaceTexture(new_surface->texture.handle, new_surface->pixel_format, - new_surface->GetScaledWidth(), new_surface->GetScaledHeight()); + if (!is_tiled) { + ASSERT(type == SurfaceType::Color); + std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset, + load_end - load_start); } else { - // TODO: Consider attempting subrect match in existing surfaces and direct blit here instead - // of memory upload below if that's a common scenario in some game + if (type == SurfaceType::Texture) { + Pica::Texture::TextureInfo tex_info{}; + tex_info.width = width; + tex_info.height = height; + tex_info.format = static_cast(pixel_format); + tex_info.SetDefaultStride(); + tex_info.physical_address = addr; - Memory::RasterizerFlushRegion(params.addr, params_size); + const auto load_interval = SurfaceInterval(load_start, load_end); + const auto rect = GetSubRect(FromInterval(load_interval)); + ASSERT(FromInterval(load_interval).GetInterval() == load_interval); - // Load data from memory to the new surface - OpenGLState cur_state = OpenGLState::GetCurState(); - - GLuint old_tex = cur_state.texture_units[0].texture_2d; - cur_state.texture_units[0].texture_2d = new_surface->texture.handle; - cur_state.Apply(); - glActiveTexture(GL_TEXTURE0); - - if (!new_surface->is_tiled) { - // TODO: Ensure this will always be a color format, not a depth or other format - ASSERT((size_t)new_surface->pixel_format < fb_format_tuples.size()); - const FormatTuple& tuple = fb_format_tuples[(unsigned int)params.pixel_format]; - - glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)new_surface->pixel_stride); - glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0, - tuple.format, tuple.type, texture_src_data); - glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); - } else { - SurfaceType type = CachedSurface::GetFormatType(new_surface->pixel_format); - if (type != SurfaceType::Depth && type != SurfaceType::DepthStencil) { - FormatTuple tuple; - if ((size_t)params.pixel_format < fb_format_tuples.size()) { - tuple = fb_format_tuples[(unsigned int)params.pixel_format]; - } else { - // Texture - tuple = {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}; + for (unsigned y = rect.bottom; y < rect.top; ++y) { + for (unsigned x = rect.left; x < rect.right; ++x) { + auto vec4 = + Pica::Texture::LookupTexture(texture_src_data, x, height - 1 - y, tex_info); + const size_t offset = (x + (width * y)) * 4; + std::memcpy(&gl_buffer[offset], vec4.AsArray(), 4); } - - std::vector> tex_buffer(params.width * params.height); - - Pica::Texture::TextureInfo tex_info; - tex_info.width = params.width; - tex_info.height = params.height; - tex_info.format = (Pica::TexturingRegs::TextureFormat)params.pixel_format; - tex_info.SetDefaultStride(); - tex_info.physical_address = params.addr; - - for (unsigned y = 0; y < params.height; ++y) { - for (unsigned x = 0; x < params.width; ++x) { - tex_buffer[x + params.width * y] = Pica::Texture::LookupTexture( - texture_src_data, x, params.height - 1 - y, tex_info); - } - } - - glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, - 0, GL_RGBA, GL_UNSIGNED_BYTE, tex_buffer.data()); - } else { - // Depth/Stencil formats need special treatment since they aren't sampleable using - // LookupTexture and can't use RGBA format - size_t tuple_idx = (size_t)params.pixel_format - 14; - ASSERT(tuple_idx < depth_format_tuples.size()); - const FormatTuple& tuple = depth_format_tuples[tuple_idx]; - - u32 bytes_per_pixel = CachedSurface::GetFormatBpp(params.pixel_format) / 8; - - // OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type - bool use_4bpp = (params.pixel_format == PixelFormat::D24); - - u32 gl_bytes_per_pixel = use_4bpp ? 4 : bytes_per_pixel; - - std::vector temp_fb_depth_buffer(params.width * params.height * - gl_bytes_per_pixel); - - u8* temp_fb_depth_buffer_ptr = - use_4bpp ? temp_fb_depth_buffer.data() + 1 : temp_fb_depth_buffer.data(); - - MortonCopyPixels(params.pixel_format, params.width, params.height, bytes_per_pixel, - gl_bytes_per_pixel, texture_src_data, temp_fb_depth_buffer_ptr, - true); - - glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, - 0, tuple.format, tuple.type, temp_fb_depth_buffer.data()); } + } else { + morton_to_gl_fns[static_cast(pixel_format)](stride, height, &gl_buffer[0], addr, + load_start, load_end); } - - // If not 1x scale, blit 1x texture to a new scaled texture and replace texture in surface - if (new_surface->res_scale_width != 1.f || new_surface->res_scale_height != 1.f) { - OGLTexture scaled_texture; - scaled_texture.Create(); - - AllocateSurfaceTexture(scaled_texture.handle, new_surface->pixel_format, - new_surface->GetScaledWidth(), new_surface->GetScaledHeight()); - BlitTextures(new_surface->texture.handle, scaled_texture.handle, - CachedSurface::GetFormatType(new_surface->pixel_format), - MathUtil::Rectangle(0, 0, new_surface->width, new_surface->height), - MathUtil::Rectangle(0, 0, new_surface->GetScaledWidth(), - new_surface->GetScaledHeight())); - - new_surface->texture.Release(); - new_surface->texture.handle = scaled_texture.handle; - scaled_texture.handle = 0; - cur_state.texture_units[0].texture_2d = new_surface->texture.handle; - cur_state.Apply(); - } - - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - - cur_state.texture_units[0].texture_2d = old_tex; - cur_state.Apply(); } - - Memory::RasterizerMarkRegionCached(new_surface->addr, new_surface->size, 1); - surface_cache.add(std::make_pair(boost::icl::interval::right_open( - new_surface->addr, new_surface->addr + new_surface->size), - std::set>({new_surface}))); - return new_surface.get(); } CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params, @@ -826,102 +911,272 @@ CachedSurface* RasterizerCacheOpenGL::TryGetFillSurface(const GPU::Regs::MemoryF return nullptr; } -MICROPROFILE_DEFINE(OpenGL_SurfaceDownload, "OpenGL", "Surface Download", MP_RGB(128, 192, 64)); -void RasterizerCacheOpenGL::FlushSurface(CachedSurface* surface) { - using PixelFormat = CachedSurface::PixelFormat; - using SurfaceType = CachedSurface::SurfaceType; - - if (!surface->dirty) { +MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); +void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) { + u8* const dst_buffer = Memory::GetPhysicalPointer(addr); + if (dst_buffer == nullptr) return; + + ASSERT(gl_buffer_size == width * height * GetGLBytesPerPixel(pixel_format)); + + // TODO: Should probably be done in ::Memory:: and check for other regions too + // same as loadglbuffer() + if (flush_start < Memory::VRAM_VADDR_END && flush_end > Memory::VRAM_VADDR_END) + flush_end = Memory::VRAM_VADDR_END; + + if (flush_start < Memory::VRAM_VADDR && flush_end > Memory::VRAM_VADDR) + flush_start = Memory::VRAM_VADDR; + + MICROPROFILE_SCOPE(OpenGL_SurfaceFlush); + + ASSERT(flush_start >= addr && flush_end <= end); + const u32 start_offset = flush_start - addr; + const u32 end_offset = flush_end - addr; + + if (type == SurfaceType::Fill) { + const u32 coarse_start_offset = start_offset - (start_offset % fill_size); + const u32 backup_bytes = start_offset % fill_size; + std::array backup_data; + if (backup_bytes) + std::memcpy(&backup_data[0], &dst_buffer[coarse_start_offset], backup_bytes); + + for (u32 offset = coarse_start_offset; offset < end_offset; offset += fill_size) + std::memcpy(&dst_buffer[offset], &fill_data[0], + std::min(fill_size, end_offset - offset)); + + if (backup_bytes) + std::memcpy(&dst_buffer[coarse_start_offset], &backup_data[0], backup_bytes); + } else if (!is_tiled) { + ASSERT(type == SurfaceType::Color); + std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset], flush_end - flush_start); + } else { + gl_to_morton_fns[static_cast(pixel_format)](stride, height, &gl_buffer[0], addr, + flush_start, flush_end); } +} - MICROPROFILE_SCOPE(OpenGL_SurfaceDownload); - - u8* dst_buffer = Memory::GetPhysicalPointer(surface->addr); - if (dst_buffer == nullptr) { +void CachedSurface::UploadGLTexture(const MathUtil::Rectangle& rect) { + if (type == SurfaceType::Fill) return; + + ASSERT(gl_buffer_size == width * height * GetGLBytesPerPixel(pixel_format)); + + // Load data from memory to the surface + GLint x0 = static_cast(rect.left); + GLint y0 = static_cast(rect.bottom); + size_t buffer_offset = (y0 * stride + x0) * GetGLBytesPerPixel(pixel_format); + + const FormatTuple& tuple = GetFormatTuple(pixel_format); + GLuint target_tex = texture.handle; + + // If not 1x scale, create 1x texture that we will blit from to replace texture subrect in + // surface + OGLTexture unscaled_tex; + if (res_scale != 1) { + x0 = 0; + y0 = 0; + + unscaled_tex.Create(); + AllocateSurfaceTexture(unscaled_tex.handle, tuple, rect.GetWidth(), rect.GetHeight()); + target_tex = unscaled_tex.handle; } OpenGLState cur_state = OpenGLState::GetCurState(); + GLuint old_tex = cur_state.texture_units[0].texture_2d; - - OGLTexture unscaled_tex; - GLuint texture_to_flush = surface->texture.handle; - - // If not 1x scale, blit scaled texture to a new 1x texture and use that to flush - if (surface->res_scale_width != 1.f || surface->res_scale_height != 1.f) { - unscaled_tex.Create(); - - AllocateSurfaceTexture(unscaled_tex.handle, surface->pixel_format, surface->width, - surface->height); - BlitTextures( - surface->texture.handle, unscaled_tex.handle, - CachedSurface::GetFormatType(surface->pixel_format), - MathUtil::Rectangle(0, 0, surface->GetScaledWidth(), surface->GetScaledHeight()), - MathUtil::Rectangle(0, 0, surface->width, surface->height)); - - texture_to_flush = unscaled_tex.handle; - } - - cur_state.texture_units[0].texture_2d = texture_to_flush; + cur_state.texture_units[0].texture_2d = target_tex; cur_state.Apply(); + + // Ensure no bad interactions with GL_UNPACK_ALIGNMENT + ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0); + glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(stride)); + glActiveTexture(GL_TEXTURE0); + glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast(rect.GetWidth()), + static_cast(rect.GetHeight()), tuple.format, tuple.type, + &gl_buffer[buffer_offset]); - if (!surface->is_tiled) { - // TODO: Ensure this will always be a color format, not a depth or other format - ASSERT((size_t)surface->pixel_format < fb_format_tuples.size()); - const FormatTuple& tuple = fb_format_tuples[(unsigned int)surface->pixel_format]; - - glPixelStorei(GL_PACK_ROW_LENGTH, (GLint)surface->pixel_stride); - glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, dst_buffer); - glPixelStorei(GL_PACK_ROW_LENGTH, 0); - } else { - SurfaceType type = CachedSurface::GetFormatType(surface->pixel_format); - if (type != SurfaceType::Depth && type != SurfaceType::DepthStencil) { - ASSERT((size_t)surface->pixel_format < fb_format_tuples.size()); - const FormatTuple& tuple = fb_format_tuples[(unsigned int)surface->pixel_format]; - - u32 bytes_per_pixel = CachedSurface::GetFormatBpp(surface->pixel_format) / 8; - - std::vector temp_gl_buffer(surface->width * surface->height * bytes_per_pixel); - - glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data()); - - // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion - // is necessary. - MortonCopyPixels(surface->pixel_format, surface->width, surface->height, - bytes_per_pixel, bytes_per_pixel, dst_buffer, temp_gl_buffer.data(), - false); - } else { - // Depth/Stencil formats need special treatment since they aren't sampleable using - // LookupTexture and can't use RGBA format - size_t tuple_idx = (size_t)surface->pixel_format - 14; - ASSERT(tuple_idx < depth_format_tuples.size()); - const FormatTuple& tuple = depth_format_tuples[tuple_idx]; - - u32 bytes_per_pixel = CachedSurface::GetFormatBpp(surface->pixel_format) / 8; - - // OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type - bool use_4bpp = (surface->pixel_format == PixelFormat::D24); - - u32 gl_bytes_per_pixel = use_4bpp ? 4 : bytes_per_pixel; - - std::vector temp_gl_buffer(surface->width * surface->height * gl_bytes_per_pixel); - - glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data()); - - u8* temp_gl_buffer_ptr = use_4bpp ? temp_gl_buffer.data() + 1 : temp_gl_buffer.data(); - - MortonCopyPixels(surface->pixel_format, surface->width, surface->height, - bytes_per_pixel, gl_bytes_per_pixel, dst_buffer, temp_gl_buffer_ptr, - false); - } - } - - surface->dirty = false; + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); cur_state.texture_units[0].texture_2d = old_tex; cur_state.Apply(); + + if (res_scale != 1) { + auto scaled_rect = rect; + scaled_rect.left *= res_scale; + scaled_rect.top *= res_scale; + scaled_rect.right *= res_scale; + scaled_rect.bottom *= res_scale; + + BlitTextures(unscaled_tex.handle, {0, rect.GetHeight(), rect.GetWidth(), 0}, texture.handle, + scaled_rect, type); + } +} +void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle& rect) { + if (type == SurfaceType::Fill) + return; + + if (gl_buffer == nullptr) { + gl_buffer_size = width * height * GetGLBytesPerPixel(pixel_format); + gl_buffer.reset(new u8[gl_buffer_size]); + } + + OpenGLState state = OpenGLState::GetCurState(); + OpenGLState prev_state = state; + SCOPE_EXIT({ prev_state.Apply(); }); + + const FormatTuple& tuple = GetFormatTuple(pixel_format); + + // Ensure no bad interactions with GL_PACK_ALIGNMENT + ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0); + glPixelStorei(GL_PACK_ROW_LENGTH, static_cast(stride)); + size_t buffer_offset = (rect.bottom * stride + rect.left) * GetGLBytesPerPixel(pixel_format); + + // If not 1x scale, blit scaled texture to a new 1x texture and use that to flush + OGLTexture unscaled_tex; + if (res_scale != 1) { + auto scaled_rect = rect; + scaled_rect.left *= res_scale; + scaled_rect.top *= res_scale; + scaled_rect.right *= res_scale; + scaled_rect.bottom *= res_scale; + + unscaled_tex.Create(); + AllocateSurfaceTexture(unscaled_tex.handle, tuple, rect.GetWidth(), rect.GetHeight()); + BlitTextures(texture.handle, scaled_rect, unscaled_tex.handle, rect, type); + + state.texture_units[0].texture_2d = unscaled_tex.handle; + state.Apply(); + + glActiveTexture(GL_TEXTURE0); + glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, &gl_buffer[buffer_offset]); + } else { + state.ResetTexture(texture.handle); + state.draw.read_framebuffer = transfer_framebuffers[0].handle; + state.Apply(); + + if (type == SurfaceType::Color || type == SurfaceType::Texture) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + texture.handle, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + 0, 0); + } else if (type == SurfaceType::Depth) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, + texture.handle, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + } else { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + texture.handle, 0); + } + glReadPixels(static_cast(rect.left), static_cast(rect.bottom), + static_cast(rect.GetWidth()), static_cast(rect.GetHeight()), + tuple.format, tuple.type, &gl_buffer[buffer_offset]); + } + + glPixelStorei(GL_PACK_ROW_LENGTH, 0); +} + +enum MatchFlags { + Invalid = 1, // Flag that can be applied to other match types, invalid matches require + // validation before they can be used + Exact = 1 << 1, // Surfaces perfectly match + SubRect = 1 << 2, // Surface encompasses params + Copy = 1 << 3, // Surface we can copy from + Expand = 1 << 4, // Surface that can expand params + TexCopy = 1 << 5 // Surface that will match a display transfer "texture copy" parameters +}; + +constexpr MatchFlags operator|(MatchFlags lhs, MatchFlags rhs) { + return static_cast(static_cast(lhs) | static_cast(rhs)); +} + +/// Get the best surface match (and its match type) for the given flags +template +Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params, + ScaleMatch match_scale_type, + boost::optional validate_interval = boost::none) { + Surface match_surface = nullptr; + bool match_valid = false; + u32 match_scale = 0; + SurfaceInterval match_interval{}; + + for (auto& pair : RangeFromInterval(surface_cache, params.GetInterval())) { + for (auto& surface : pair.second) { + const bool res_scale_matched = match_scale_type == ScaleMatch::Exact + ? (params.res_scale == surface->res_scale) + : (params.res_scale <= surface->res_scale); + bool is_valid = + find_flags & MatchFlags::Copy ? true + : // validity will be checked in GetCopyableInterval + surface->IsRegionValid(validate_interval.value_or(params.GetInterval())); + + if (!(find_flags & MatchFlags::Invalid) && !is_valid) + continue; + + auto IsMatch_Helper = [&](auto check_type, auto match_fn) { + if (!(find_flags & check_type)) + return; + + bool matched; + SurfaceInterval surface_interval; + std::tie(matched, surface_interval) = match_fn(); + if (!matched) + return; + + if (!res_scale_matched && match_scale_type != ScaleMatch::Ignore && + surface->type != SurfaceType::Fill) + return; + + // Found a match, update only if this is better than the previous one + auto UpdateMatch = [&] { + match_surface = surface; + match_valid = is_valid; + match_scale = surface->res_scale; + match_interval = surface_interval; + }; + + if (surface->res_scale > match_scale) { + UpdateMatch(); + return; + } else if (surface->res_scale < match_scale) { + return; + } + + if (is_valid && !match_valid) { + UpdateMatch(); + return; + } else if (is_valid != match_valid) { + return; + } + + if (boost::icl::length(surface_interval) > boost::icl::length(match_interval)) { + UpdateMatch(); + } + }; + IsMatch_Helper(std::integral_constant{}, [&] { + return std::make_pair(surface->ExactMatch(params), surface->GetInterval()); + }); + IsMatch_Helper(std::integral_constant{}, [&] { + return std::make_pair(surface->CanSubRect(params), surface->GetInterval()); + }); + IsMatch_Helper(std::integral_constant{}, [&] { + auto copy_interval = + params.FromInterval(*validate_interval).GetCopyableInterval(surface); + bool matched = boost::icl::length(copy_interval & *validate_interval) != 0 && + surface->CanCopy(params, copy_interval); + return std::make_pair(matched, copy_interval); + }); + IsMatch_Helper(std::integral_constant{}, [&] { + return std::make_pair(surface->CanExpand(params), surface->GetInterval()); + }); + IsMatch_Helper(std::integral_constant{}, [&] { + return std::make_pair(surface->CanTexCopy(params), surface->GetInterval()); + }); + } + } + return match_surface; } void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 9c2abcd84..92bf7d3be 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -13,6 +13,7 @@ #pragma GCC diagnostic ignored "-Wunused-local-typedefs" #endif #include +#include #ifdef __GNUC__ #pragma GCC diagnostic pop #endif @@ -20,21 +21,37 @@ #include "common/assert.h" #include "common/common_funcs.h" #include "common/common_types.h" +#include "common/math_util.h" #include "core/hw/gpu.h" #include "video_core/regs_framebuffer.h" #include "video_core/regs_texturing.h" #include "video_core/renderer_opengl/gl_resource_manager.h" -namespace MathUtil { -template -struct Rectangle; -} - struct CachedSurface; +using Surface = std::shared_ptr; +using SurfaceSet = std::set; -using SurfaceCache = boost::icl::interval_map>>; +using SurfaceRegions = boost::icl::interval_set; +using SurfaceMap = boost::icl::interval_map; +using SurfaceCache = boost::icl::interval_map; -struct CachedSurface { +using SurfaceInterval = SurfaceCache::interval_type; +static_assert(std::is_same() && + std::is_same(), + "incorrect interval types"); + +using SurfaceRect_Tuple = std::tuple>; +using SurfaceSurfaceRect_Tuple = std::tuple>; + +using PageMap = boost::icl::interval_map; + +enum class ScaleMatch { + Exact, // only accept same res scale + Upscale, // only allow higher scale than params + Ignore // accept every scaled res +}; + +struct SurfaceParams { enum class PixelFormat { // First 5 formats are shared between textures and color buffers RGBA8 = 0, @@ -68,10 +85,11 @@ struct CachedSurface { Texture = 1, Depth = 2, DepthStencil = 3, - Invalid = 4, + Fill = 4, + Invalid = 5 }; - static constexpr unsigned int GetFormatBpp(CachedSurface::PixelFormat format) { + static constexpr unsigned int GetFormatBpp(PixelFormat format) { constexpr std::array bpp_table = { 32, // RGBA8 24, // RGB8 @@ -93,8 +111,11 @@ struct CachedSurface { 32, // D24S8 }; - ASSERT((unsigned int)format < ARRAY_SIZE(bpp_table)); - return bpp_table[(unsigned int)format]; + ASSERT(static_cast(format) < bpp_table.size()); + return bpp_table[static_cast(format)]; + } + unsigned int GetFormatBpp() const { + return GetFormatBpp(pixel_format); } static PixelFormat PixelFormatFromTextureFormat(Pica::TexturingRegs::TextureFormat format) { @@ -162,31 +183,114 @@ struct CachedSurface { return SurfaceType::Invalid; } + /// Update the params "size", "end" and "type" from the already set "addr", "width", "height" + /// and "pixel_format" + void UpdateParams() { + if (stride == 0) { + stride = width; + } + type = GetFormatType(pixel_format); + size = !is_tiled ? BytesInPixels(stride * (height - 1) + width) + : BytesInPixels(stride * 8 * (height / 8 - 1) + width * 8); + end = addr + size; + } + + SurfaceInterval GetInterval() const { + return SurfaceInterval::right_open(addr, end); + } + + // Returns the outer rectangle containing "interval" + SurfaceParams FromInterval(SurfaceInterval interval) const; + + SurfaceInterval GetSubRectInterval(MathUtil::Rectangle unscaled_rect) const; + + // Returns the region of the biggest valid rectange within interval + SurfaceInterval GetCopyableInterval(const Surface& src_surface) const; + u32 GetScaledWidth() const { - return (u32)(width * res_scale_width); + return width * res_scale; } u32 GetScaledHeight() const { - return (u32)(height * res_scale_height); + return height * res_scale; } - PAddr addr; - u32 size; + MathUtil::Rectangle GetRect() const { + return {0, height, width, 0}; + } - PAddr min_valid; - PAddr max_valid; + MathUtil::Rectangle GetScaledRect() const { + return {0, GetScaledHeight(), GetScaledWidth(), 0}; + } + + u32 PixelsInBytes(u32 size) const { + return size * 8 / GetFormatBpp(pixel_format); + } + + u32 BytesInPixels(u32 pixels) const { + return pixels * GetFormatBpp(pixel_format) / 8; + } + + bool ExactMatch(const SurfaceParams& other_surface) const; + bool CanSubRect(const SurfaceParams& sub_surface) const; + bool CanExpand(const SurfaceParams& expanded_surface) const; + bool CanTexCopy(const SurfaceParams& texcopy_params) const; + + MathUtil::Rectangle GetSubRect(const SurfaceParams& sub_surface) const; + MathUtil::Rectangle GetScaledSubRect(const SurfaceParams& sub_surface) const; + + PAddr addr = 0; + PAddr end = 0; + u32 size = 0; + + u32 width = 0; + u32 height = 0; + u32 stride = 0; + u16 res_scale = 1; + + bool is_tiled = false; + PixelFormat pixel_format = PixelFormat::Invalid; + SurfaceType type = SurfaceType::Invalid; +}; + +struct CachedSurface : SurfaceParams { + bool CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const; + bool CanCopy(const SurfaceParams& dest_surface, SurfaceInterval copy_interval) const; + + bool IsRegionValid(SurfaceInterval interval) const { + return (invalid_regions.find(interval) == invalid_regions.end()); + } + + bool IsSurfaceFullyInvalid() const { + return (invalid_regions & GetInterval()) == SurfaceRegions(GetInterval()); + } + + SurfaceRegions invalid_regions; + + u32 fill_size = 0; /// Number of bytes to read from fill_data + std::array fill_data; OGLTexture texture; - u32 width; - u32 height; - /// Stride between lines, in pixels. Only valid for images in linear format. - u32 pixel_stride = 0; - float res_scale_width = 1.f; - float res_scale_height = 1.f; - bool is_tiled; - PixelFormat pixel_format; - bool dirty; + static constexpr unsigned int GetGLBytesPerPixel(PixelFormat format) { + // OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type + return format == PixelFormat::Invalid + ? 0 + : (format == PixelFormat::D24 || GetFormatType(format) == SurfaceType::Texture) + ? 4 + : SurfaceParams::GetFormatBpp(format) / 8; + } + + std::unique_ptr gl_buffer; + size_t gl_buffer_size = 0; + + // Read/Write data in 3DS memory to/from gl_buffer + void LoadGLBuffer(PAddr load_start, PAddr load_end); + void FlushGLBuffer(PAddr flush_start, PAddr flush_end); + + // Upload/Download data in gl_buffer in/to this surface's texture + void UploadGLTexture(const MathUtil::Rectangle& rect); + void DownloadGLTexture(const MathUtil::Rectangle& rect); }; class RasterizerCacheOpenGL : NonCopyable { diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 703aee90e..033d417bc 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -144,7 +144,7 @@ public: OpenGLState(); /// Get the currently active OpenGL state - static OpenGLState& GetCurState() { + static OpenGLState GetCurState() { return cur_state; } From 81ea32d1e0e7c13b8342c1c60fb454026c986806 Mon Sep 17 00:00:00 2001 From: James Rowe Date: Sat, 25 Nov 2017 13:21:32 -0700 Subject: [PATCH 07/32] OpenGL Cache: Refactor Surface Cache interface Changes the public interface of the surface cache to make it easier to use. Reintroduces the cached page count cached pages that was removed in an earlier commit. --- .../renderer_opengl/gl_rasterizer_cache.cpp | 152 ++++++++++++++---- .../renderer_opengl/gl_rasterizer_cache.h | 71 +++++--- 2 files changed, 170 insertions(+), 53 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 756a76ea6..2ba1ca029 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -1179,42 +1179,140 @@ Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params return match_surface; } -void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, - bool invalidate) { - if (size == 0) { +void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, Surface flush_surface) { + if (size == 0) return; - } - // Gather up unique surfaces that touch the region - std::unordered_set> touching_surfaces; + const auto flush_interval = SurfaceInterval(addr, addr + size); + for (auto& pair : RangeFromInterval(dirty_regions, flush_interval)) { + const auto interval = pair.first & flush_interval; + auto& surface = pair.second; - auto surface_interval = boost::icl::interval::right_open(addr, addr + size); - auto cache_upper_bound = surface_cache.upper_bound(surface_interval); - for (auto it = surface_cache.lower_bound(surface_interval); it != cache_upper_bound; ++it) { - std::copy_if(it->second.begin(), it->second.end(), - std::inserter(touching_surfaces, touching_surfaces.end()), - [skip_surface](std::shared_ptr surface) { - return (surface.get() != skip_surface); - }); - } + if (flush_surface != nullptr && surface != flush_surface) + continue; - // Flush and invalidate surfaces - for (auto surface : touching_surfaces) { - FlushSurface(surface.get()); - if (invalidate) { - Memory::RasterizerMarkRegionCached(surface->addr, surface->size, -1); - surface_cache.subtract( - std::make_pair(boost::icl::interval::right_open( - surface->addr, surface->addr + surface->size), - std::set>({surface}))); + // Sanity check, this surface is the last one that marked this region dirty + ASSERT(surface->IsRegionValid(interval)); + + if (surface->type != SurfaceType::Fill) { + SurfaceParams params = surface->FromInterval(interval); + surface->DownloadGLTexture(surface->GetSubRect(params)); } + surface->FlushGLBuffer(boost::icl::first(interval), boost::icl::last_next(interval)); } + + // Reset dirty regions + dirty_regions.erase(flush_interval); } void RasterizerCacheOpenGL::FlushAll() { - for (auto& surfaces : surface_cache) { - for (auto& surface : surfaces.second) { - FlushSurface(surface.get()); + FlushRegion(0, 0xFFFFFFFF); +} + +void RasterizerCacheOpenGL::InvalidateRegion(PAddr addr, u32 size, const Surface& region_owner) { + if (size == 0) + return; + + const auto invalid_interval = SurfaceInterval(addr, addr + size); + + if (region_owner != nullptr) { + ASSERT(region_owner->type != SurfaceType::Texture); + ASSERT(addr >= region_owner->addr && addr + size <= region_owner->end); + ASSERT(region_owner->width == region_owner->stride); // Surfaces can't have a gap + region_owner->invalid_regions.erase(invalid_interval); + } + + for (auto& pair : RangeFromInterval(surface_cache, invalid_interval)) { + for (auto& cached_surface : pair.second) { + if (cached_surface == region_owner) + continue; + + const auto interval = cached_surface->GetInterval() & invalid_interval; + cached_surface->invalid_regions.insert(interval); + + // Remove only "empty" fill surfaces to avoid destroying and recreating OGL textures + if (cached_surface->type == SurfaceType::Fill && + cached_surface->IsSurfaceFullyInvalid()) { + remove_surfaces.emplace(cached_surface); + } } } + + if (region_owner != nullptr) + dirty_regions.set({invalid_interval, region_owner}); + else + dirty_regions.erase(invalid_interval); + + for (auto& remove_surface : remove_surfaces) { + if (remove_surface == region_owner) { + Surface expanded_surface = FindMatch( + surface_cache, *region_owner, ScaleMatch::Ignore); + ASSERT(expanded_surface); + + if ((region_owner->invalid_regions - expanded_surface->invalid_regions).empty()) { + DuplicateSurface(region_owner, expanded_surface); + } else { + continue; + } + } + UnregisterSurface(remove_surface); + } + + remove_surfaces.clear(); +} + +Surface RasterizerCacheOpenGL::CreateSurface(const SurfaceParams& params) { + Surface surface = std::make_shared(); + static_cast(*surface) = params; + + surface->texture.Create(); + + surface->gl_buffer_size = 0; + surface->invalid_regions.insert(surface->GetInterval()); + AllocateSurfaceTexture(surface->texture.handle, GetFormatTuple(surface->pixel_format), + surface->GetScaledWidth(), surface->GetScaledHeight()); + + return surface; +} + +void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) { + surface_cache.add({surface->GetInterval(), SurfaceSet{surface}}); + UpdatePagesCachedCount(surface->addr, surface->size, 1); +} + +void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) { + UpdatePagesCachedCount(surface->addr, surface->size, -1); + surface_cache.subtract({surface->GetInterval(), SurfaceSet{surface}}); +} + +void RasterizerCacheOpenGL::UpdatePagesCachedCount(PAddr addr, u32 size, int delta) { + const u32 num_pages = + ((addr + size - 1) >> Memory::PAGE_BITS) - (addr >> Memory::PAGE_BITS) + 1; + const u32 page_start = addr >> Memory::PAGE_BITS; + const u32 page_end = page_start + num_pages; + + // Interval maps will erase segments if count reaches 0, so if delta is negative we have to + // subtract after iterating + const auto pages_interval = PageMap::interval_type::right_open(page_start, page_end); + if (delta > 0) + cached_pages.add({pages_interval, delta}); + + for (auto& pair : RangeFromInterval(cached_pages, pages_interval)) { + const auto interval = pair.first & pages_interval; + const int count = pair.second; + + const PAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS; + const PAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS; + const u32 interval_size = interval_end_addr - interval_start_addr; + + if (delta > 0 && count == delta) + Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true); + else if (delta < 0 && count == -delta) + Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false); + else + ASSERT(count >= 0); + } + + if (delta < 0) + cached_pages.add({pages_interval, delta}); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 92bf7d3be..b2faa3916 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -298,46 +298,65 @@ public: RasterizerCacheOpenGL(); ~RasterizerCacheOpenGL(); - /// Blits one texture to another - void BlitTextures(GLuint src_tex, GLuint dst_tex, CachedSurface::SurfaceType type, - const MathUtil::Rectangle& src_rect, - const MathUtil::Rectangle& dst_rect); + /// Blit one surface's texture to another + bool BlitSurfaces(const Surface& src_surface, const MathUtil::Rectangle& src_rect, + const Surface& dst_surface, const MathUtil::Rectangle& dst_rect); - /// Attempt to blit one surface's texture to another - bool TryBlitSurfaces(CachedSurface* src_surface, const MathUtil::Rectangle& src_rect, - CachedSurface* dst_surface, const MathUtil::Rectangle& dst_rect); + /// Copy one surface's region to another + void CopySurface(const Surface& src_surface, const Surface& dst_surface, + SurfaceInterval copy_interval); - /// Loads a texture from 3DS memory to OpenGL and caches it (if not already cached) - CachedSurface* GetSurface(const CachedSurface& params, bool match_res_scale, - bool load_if_create); + /// Load a texture from 3DS memory to OpenGL and cache it (if not already cached) + Surface GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, + bool load_if_create); /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from /// 3DS memory to OpenGL and caches it (if not already cached) - CachedSurface* GetSurfaceRect(const CachedSurface& params, bool match_res_scale, - bool load_if_create, MathUtil::Rectangle& out_rect); + SurfaceRect_Tuple GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale, + bool load_if_create); - /// Gets a surface based on the texture configuration - CachedSurface* GetTextureSurface(const Pica::TexturingRegs::FullTextureConfig& config); + /// Get a surface based on the texture configuration + Surface GetTextureSurface(const Pica::TexturingRegs::FullTextureConfig& config); - /// Gets the color and depth surfaces and rect (resolution scaled) based on the framebuffer - /// configuration - std::tuple> GetFramebufferSurfaces( - const Pica::FramebufferRegs::FramebufferConfig& config); + /// Get the color and depth surfaces based on the framebuffer configuration + SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, + const MathUtil::Rectangle& viewport_rect); - /// Attempt to get a surface that exactly matches the fill region and format - CachedSurface* TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config); + /// Get a surface that matches the fill config + Surface GetFillSurface(const GPU::Regs::MemoryFillConfig& config); - /// Write the surface back to memory - void FlushSurface(CachedSurface* surface); + /// Get a surface that matches a "texture copy" display transfer config + SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params); - /// Write any cached resources overlapping the region back to memory (if dirty) and optionally - /// invalidate them in the cache - void FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, bool invalidate); + /// Write any cached resources overlapping the region back to memory (if dirty) + void FlushRegion(PAddr addr, u32 size, Surface flush_surface = nullptr); + + /// Mark region as being invalidated by region_owner (nullptr if 3DS memory) + void InvalidateRegion(PAddr addr, u32 size, const Surface& region_owner); /// Flush all cached resources tracked by this cache manager void FlushAll(); private: + void DuplicateSurface(const Surface& src_surface, const Surface& dest_surface); + + /// Update surface's texture for given region when necessary + void ValidateSurface(const Surface& surface, PAddr addr, u32 size); + + /// Create a new surface + Surface CreateSurface(const SurfaceParams& params); + + /// Register surface into the cache + void RegisterSurface(const Surface& surface); + + /// Remove surface from the cache + void UnregisterSurface(const Surface& surface); + + /// Increase/decrease the number of surface in pages touching the specified region + void UpdatePagesCachedCount(PAddr addr, u32 size, int delta); + SurfaceCache surface_cache; - OGLFramebuffer transfer_framebuffers[2]; + SurfaceMap dirty_regions; + PageMap cached_pages; + SurfaceSet remove_surfaces; }; From e5adb6a26b19ff475ae939142fc63216df1e124c Mon Sep 17 00:00:00 2001 From: James Rowe Date: Sat, 25 Nov 2017 15:38:30 -0700 Subject: [PATCH 08/32] OpenGL Cache: Add the rest of the Cache methods Fills in the rasterizer cache methods using the helper methods added in the previous commits. --- .../renderer_opengl/gl_rasterizer_cache.cpp | 492 +++++++++++------- 1 file changed, 306 insertions(+), 186 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 2ba1ca029..a3115a718 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -6,14 +6,20 @@ #include #include #include +#include #include #include #include +#include +#include #include +#include "common/alignment.h" #include "common/bit_field.h" +#include "common/color.h" #include "common/logging/log.h" #include "common/math_util.h" #include "common/microprofile.h" +#include "common/scope_exit.h" #include "common/vector_math.h" #include "core/frontend/emu_window.h" #include "core/memory.h" @@ -155,6 +161,10 @@ RasterizerCacheOpenGL::RasterizerCacheOpenGL() { RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { FlushAll(); + while (!surface_cache.empty()) + UnregisterSurface(*surface_cache.begin()->second.begin()); + transfer_framebuffers[0].Release(); + transfer_framebuffers[1].Release(); } template @@ -567,19 +577,45 @@ SurfaceInterval SurfaceParams::GetCopyableInterval(const Surface& src_surface) c return result; } -bool RasterizerCacheOpenGL::TryBlitSurfaces(CachedSurface* src_surface, - const MathUtil::Rectangle& src_rect, - CachedSurface* dst_surface, - const MathUtil::Rectangle& dst_rect) { +void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surface& dst_surface, + SurfaceInterval copy_interval) { + SurfaceParams subrect_params = dst_surface->FromInterval(copy_interval); + ASSERT(subrect_params.GetInterval() == copy_interval); - if (!CachedSurface::CheckFormatsBlittable(src_surface->pixel_format, - dst_surface->pixel_format)) { - return false; + ASSERT(src_surface != dst_surface); + + // This is only called when CanCopy is true, no need to run checks here + if (src_surface->type == SurfaceType::Fill) { + // FillSurface needs a 4 bytes buffer + const u32 fill_offset = + (boost::icl::first(copy_interval) - src_surface->addr) % src_surface->fill_size; + std::array fill_buffer; + + u32 fill_buff_pos = fill_offset; + for (int i : {0, 1, 2, 3}) + fill_buffer[i] = src_surface->fill_data[fill_buff_pos++ % src_surface->fill_size]; + + FillSurface(dst_surface, &fill_buffer[0], dst_surface->GetScaledSubRect(subrect_params)); + return; } + if (src_surface->CanSubRect(subrect_params)) { + BlitTextures(src_surface->texture.handle, src_surface->GetScaledSubRect(subrect_params), + dst_surface->texture.handle, dst_surface->GetScaledSubRect(subrect_params), + src_surface->type); + return; + } + UNREACHABLE(); +} - BlitTextures(src_surface->texture.handle, dst_surface->texture.handle, - CachedSurface::GetFormatType(src_surface->pixel_format), src_rect, dst_rect); - return true; +bool RasterizerCacheOpenGL::BlitSurfaces(const Surface& src_surface, + const MathUtil::Rectangle& src_rect, + const Surface& dst_surface, + const MathUtil::Rectangle& dst_rect) { + if (!SurfaceParams::CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format)) + return false; + + return BlitTextures(src_surface->texture.handle, src_rect, dst_surface->texture.handle, + dst_rect, src_surface->type); } // Allocate an uninitialized texture of appropriate size and format for the surface @@ -663,252 +699,336 @@ void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) { } } -CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params, - bool match_res_scale, bool load_if_create, - MathUtil::Rectangle& out_rect) { - if (params.addr == 0) { +Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, + bool load_if_create) { + if (params.addr == 0 || params.height * params.width == 0) { return nullptr; } - u32 total_pixels = params.width * params.height; - u32 params_size = total_pixels * CachedSurface::GetFormatBpp(params.pixel_format) / 8; + ASSERT(params.width == params.stride); // Use GetSurfaceSubRect instead - // Attempt to find encompassing surfaces - CachedSurface* best_subrect_surface = nullptr; - float subrect_surface_goodness = -1.f; + // Check for an exact match in existing surfaces + Surface surface = + FindMatch(surface_cache, params, match_res_scale); - auto surface_interval = - boost::icl::interval::right_open(params.addr, params.addr + params_size); - auto cache_upper_bound = surface_cache.upper_bound(surface_interval); - for (auto it = surface_cache.lower_bound(surface_interval); it != cache_upper_bound; ++it) { - for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) { - CachedSurface* surface = it2->get(); - - // Check if the request is contained in the surface - if (params.addr >= surface->addr && - params.addr + params_size - 1 <= surface->addr + surface->size - 1 && - params.pixel_format == surface->pixel_format) { - // Make sure optional param-matching criteria are fulfilled - bool tiling_match = (params.is_tiled == surface->is_tiled); - bool res_scale_match = (params.res_scale_width == surface->res_scale_width && - params.res_scale_height == surface->res_scale_height); - if (!match_res_scale || res_scale_match) { - // Prioritize same-tiling and highest resolution surfaces - float match_goodness = - (float)tiling_match + surface->res_scale_width * surface->res_scale_height; - if (match_goodness > subrect_surface_goodness || surface->dirty) { - subrect_surface_goodness = match_goodness; - best_subrect_surface = surface; - } + if (surface == nullptr) { + u16 target_res_scale = params.res_scale; + if (match_res_scale != ScaleMatch::Exact) { + // This surface may have a subrect of another surface with a higher res_scale, find it + // to adjust our params + SurfaceParams find_params = params; + Surface expandable = FindMatch( + surface_cache, find_params, match_res_scale); + if (expandable != nullptr && expandable->res_scale > target_res_scale) { + target_res_scale = expandable->res_scale; + } + // Keep res_scale when reinterpreting d24s8 -> rgba8 + if (params.pixel_format == PixelFormat::RGBA8) { + find_params.pixel_format = PixelFormat::D24S8; + expandable = FindMatch( + surface_cache, find_params, match_res_scale); + if (expandable != nullptr && expandable->res_scale > target_res_scale) { + target_res_scale = expandable->res_scale; } } } + SurfaceParams new_params = params; + new_params.res_scale = target_res_scale; + surface = CreateSurface(new_params); + RegisterSurface(surface); } - // Return the best subrect surface if found - if (best_subrect_surface != nullptr) { - unsigned int bytes_per_pixel = - (CachedSurface::GetFormatBpp(best_subrect_surface->pixel_format) / 8); + if (load_if_create) { + ValidateSurface(surface, params.addr, params.size); + } - int x0, y0; + return surface; +} - if (!params.is_tiled) { - u32 begin_pixel_index = (params.addr - best_subrect_surface->addr) / bytes_per_pixel; - x0 = begin_pixel_index % best_subrect_surface->width; - y0 = begin_pixel_index / best_subrect_surface->width; +SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& params, + ScaleMatch match_res_scale, + bool load_if_create) { + if (params.addr == 0 || params.height * params.width == 0) { + return {nullptr, {}}; + } - out_rect = MathUtil::Rectangle(x0, y0, x0 + params.width, y0 + params.height); - } else { - u32 bytes_per_tile = 8 * 8 * bytes_per_pixel; - u32 tiles_per_row = best_subrect_surface->width / 8; + // Attempt to find encompassing surface + Surface surface = FindMatch(surface_cache, params, + match_res_scale); - u32 begin_tile_index = (params.addr - best_subrect_surface->addr) / bytes_per_tile; - x0 = begin_tile_index % tiles_per_row * 8; - y0 = begin_tile_index / tiles_per_row * 8; + // Check if FindMatch failed because of res scaling + // If that's the case create a new surface with + // the dimensions of the lower res_scale surface + // to suggest it should not be used again + if (surface == nullptr && match_res_scale != ScaleMatch::Ignore) { + surface = FindMatch(surface_cache, params, + ScaleMatch::Ignore); + if (surface != nullptr) { + ASSERT(surface->res_scale < params.res_scale); + SurfaceParams new_params = *surface; + new_params.res_scale = params.res_scale; - // Tiled surfaces are flipped vertically in the rasterizer vs. 3DS memory. - out_rect = - MathUtil::Rectangle(x0, best_subrect_surface->height - y0, x0 + params.width, - best_subrect_surface->height - (y0 + params.height)); + surface = CreateSurface(new_params); + RegisterSurface(surface); } + } - out_rect.left = (int)(out_rect.left * best_subrect_surface->res_scale_width); - out_rect.right = (int)(out_rect.right * best_subrect_surface->res_scale_width); - out_rect.top = (int)(out_rect.top * best_subrect_surface->res_scale_height); - out_rect.bottom = (int)(out_rect.bottom * best_subrect_surface->res_scale_height); + // Check for a surface we can expand before creating a new one + if (surface == nullptr) { + surface = FindMatch(surface_cache, params, + match_res_scale); + if (surface != nullptr) { + SurfaceParams new_params = *surface; + new_params.addr = std::min(params.addr, surface->addr); + new_params.end = std::max(params.end, surface->end); + new_params.size = new_params.end - new_params.addr; + new_params.height = new_params.size / params.BytesInPixels(params.stride); + ASSERT(new_params.size % params.BytesInPixels(params.stride) == 0); - return best_subrect_surface; + Surface new_surface = CreateSurface(new_params); + DuplicateSurface(surface, new_surface); + + // Delete the expanded surface, this can't be done safely yet + // because it may still be in use + remove_surfaces.emplace(surface); + + surface = new_surface; + RegisterSurface(new_surface); + } } // No subrect found - create and return a new surface - if (!params.is_tiled) { - out_rect = MathUtil::Rectangle(0, 0, (int)(params.width * params.res_scale_width), - (int)(params.height * params.res_scale_height)); - } else { - out_rect = MathUtil::Rectangle(0, (int)(params.height * params.res_scale_height), - (int)(params.width * params.res_scale_width), 0); + if (surface == nullptr) { + SurfaceParams new_params = params; + new_params.width = params.stride; // Can't have gaps in a surface + new_params.UpdateParams(); + // GetSurface will create the new surface and possibly adjust res_scale if necessary + surface = GetSurface(new_params, match_res_scale, load_if_create); + } else if (load_if_create) { + ValidateSurface(surface, params.addr, params.size); } - return GetSurface(params, match_res_scale, load_if_create); + return {surface, surface->GetScaledSubRect(params)}; } -CachedSurface* RasterizerCacheOpenGL::GetTextureSurface( +Surface RasterizerCacheOpenGL::GetTextureSurface( const Pica::TexturingRegs::FullTextureConfig& config) { - Pica::Texture::TextureInfo info = Pica::Texture::TextureInfo::FromPicaRegister(config.config, config.format); - CachedSurface params; + SurfaceParams params; params.addr = info.physical_address; params.width = info.width; params.height = info.height; params.is_tiled = true; - params.pixel_format = CachedSurface::PixelFormatFromTextureFormat(info.format); - return GetSurface(params, false, true); + params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(info.format); + params.UpdateParams(); + return GetSurface(params, ScaleMatch::Ignore, true); } -// If the resolution static u16 GetResolutionScaleFactor() { return !Settings::values.resolution_factor ? VideoCore::g_emu_window->GetFramebufferLayout().GetScalingRatio() : Settings::values.resolution_factor; } -std::tuple> -RasterizerCacheOpenGL::GetFramebufferSurfaces( - const Pica::FramebufferRegs::FramebufferConfig& config) { - +SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( + bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle& viewport_rect) { const auto& regs = Pica::g_state.regs; + const auto& config = regs.framebuffer.framebuffer; // update resolution_scale_factor and reset cache if changed static u16 resolution_scale_factor = GetResolutionScaleFactor(); if (resolution_scale_factor != GetResolutionScaleFactor()) { resolution_scale_factor = GetResolutionScaleFactor(); FlushAll(); - InvalidateRegion(0, 0xffffffff, nullptr); + while (!surface_cache.empty()) + UnregisterSurface(*surface_cache.begin()->second.begin()); } - // Make sur that framebuffers don't overlap if both color and depth are being used - u32 fb_area = config.GetWidth() * config.GetHeight(); - bool framebuffers_overlap = - config.GetColorBufferPhysicalAddress() != 0 && - config.GetDepthBufferPhysicalAddress() != 0 && - MathUtil::IntervalsIntersect( - config.GetColorBufferPhysicalAddress(), - fb_area * GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(config.color_format.Value())), - config.GetDepthBufferPhysicalAddress(), - fb_area * Pica::FramebufferRegs::BytesPerDepthPixel(config.depth_format)); - bool using_color_fb = config.GetColorBufferPhysicalAddress() != 0; - bool depth_write_enable = regs.framebuffer.output_merger.depth_write_enable && - regs.framebuffer.framebuffer.allow_depth_stencil_write; - bool using_depth_fb = config.GetDepthBufferPhysicalAddress() != 0 && - (regs.framebuffer.output_merger.depth_test_enable || depth_write_enable || - !framebuffers_overlap); + MathUtil::Rectangle viewport_clamped{ + static_cast( + MathUtil::Clamp(viewport_rect.left, 0, static_cast(config.GetWidth()))), + static_cast( + MathUtil::Clamp(viewport_rect.top, 0, static_cast(config.GetHeight()))), + static_cast( + MathUtil::Clamp(viewport_rect.right, 0, static_cast(config.GetWidth()))), + static_cast( + MathUtil::Clamp(viewport_rect.bottom, 0, static_cast(config.GetHeight())))}; - if (framebuffers_overlap && using_color_fb && using_depth_fb) { + // get color and depth surfaces + SurfaceParams color_params; + color_params.is_tiled = true; + color_params.res_scale = resolution_scale_factor; + color_params.width = config.GetWidth(); + color_params.height = config.GetHeight(); + SurfaceParams depth_params = color_params; + + color_params.addr = config.GetColorBufferPhysicalAddress(); + color_params.pixel_format = SurfaceParams::PixelFormatFromColorFormat(config.color_format); + color_params.UpdateParams(); + + depth_params.addr = config.GetDepthBufferPhysicalAddress(); + depth_params.pixel_format = SurfaceParams::PixelFormatFromDepthFormat(config.depth_format); + depth_params.UpdateParams(); + + auto color_vp_interval = color_params.GetSubRectInterval(viewport_clamped); + auto depth_vp_interval = depth_params.GetSubRectInterval(viewport_clamped); + + // Make sur that framebuffers don't overlap if both color and depth are being used + if (using_color_fb && using_depth_fb && + boost::icl::length(color_vp_interval & depth_vp_interval)) { LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; " "overlapping framebuffers not supported!"); using_depth_fb = false; } - // get color and depth surfaces - CachedSurface color_params; - CachedSurface depth_params; - color_params.width = depth_params.width = config.GetWidth(); - color_params.height = depth_params.height = config.GetHeight(); - color_params.is_tiled = depth_params.is_tiled = true; + MathUtil::Rectangle color_rect{}; + Surface color_surface = nullptr; + if (using_color_fb) + std::tie(color_surface, color_rect) = + GetSurfaceSubRect(color_params, ScaleMatch::Exact, false); - // Scale the resolution by the specified factor - color_params.res_scale_width = resolution_scale_factor; - depth_params.res_scale_width = resolution_scale_factor; - color_params.res_scale_height = resolution_scale_factor; - depth_params.res_scale_height = resolution_scale_factor; + MathUtil::Rectangle depth_rect{}; + Surface depth_surface = nullptr; + if (using_depth_fb) + std::tie(depth_surface, depth_rect) = + GetSurfaceSubRect(depth_params, ScaleMatch::Exact, false); - color_params.addr = config.GetColorBufferPhysicalAddress(); - color_params.pixel_format = CachedSurface::PixelFormatFromColorFormat(config.color_format); - - depth_params.addr = config.GetDepthBufferPhysicalAddress(); - depth_params.pixel_format = CachedSurface::PixelFormatFromDepthFormat(config.depth_format); - - MathUtil::Rectangle color_rect; - CachedSurface* color_surface = - using_color_fb ? GetSurfaceRect(color_params, true, true, color_rect) : nullptr; - - MathUtil::Rectangle depth_rect; - CachedSurface* depth_surface = - using_depth_fb ? GetSurfaceRect(depth_params, true, true, depth_rect) : nullptr; - - // Sanity check to make sure found surfaces aren't the same - if (using_depth_fb && using_color_fb && color_surface == depth_surface) { - LOG_CRITICAL( - Render_OpenGL, - "Color and depth framebuffer surfaces overlap; overlapping surfaces not supported!"); - using_depth_fb = false; - depth_surface = nullptr; - } - - MathUtil::Rectangle rect; - - if (color_surface != nullptr && depth_surface != nullptr && - (depth_rect.left != color_rect.left || depth_rect.top != color_rect.top)) { - // Can't specify separate color and depth viewport offsets in OpenGL, so re-zero both if - // they don't match - if (color_rect.left != 0 || color_rect.top != 0) { - color_surface = GetSurface(color_params, true, true); - } - - if (depth_rect.left != 0 || depth_rect.top != 0) { - depth_surface = GetSurface(depth_params, true, true); - } - - if (!color_surface->is_tiled) { - rect = MathUtil::Rectangle( - 0, 0, (int)(color_params.width * color_params.res_scale_width), - (int)(color_params.height * color_params.res_scale_height)); - } else { - rect = MathUtil::Rectangle( - 0, (int)(color_params.height * color_params.res_scale_height), - (int)(color_params.width * color_params.res_scale_width), 0); + MathUtil::Rectangle fb_rect{}; + if (color_surface != nullptr && depth_surface != nullptr) { + fb_rect = color_rect; + // Color and Depth surfaces must have the same dimensions and offsets + if (color_rect.bottom != depth_rect.bottom || + color_surface->height != depth_surface->height) { + color_surface = GetSurface(color_params, ScaleMatch::Exact, false); + depth_surface = GetSurface(depth_params, ScaleMatch::Exact, false); + fb_rect = color_surface->GetScaledRect(); } } else if (color_surface != nullptr) { - rect = color_rect; + fb_rect = color_rect; } else if (depth_surface != nullptr) { - rect = depth_rect; - } else { - rect = MathUtil::Rectangle(0, 0, 0, 0); + fb_rect = depth_rect; + } + ASSERT(!fb_rect.left && fb_rect.right == config.GetWidth() * resolution_scale_factor); + + if (color_surface != nullptr) { + ValidateSurface(color_surface, boost::icl::first(color_vp_interval), + boost::icl::length(color_vp_interval)); + } + if (depth_surface != nullptr) { + ValidateSurface(depth_surface, boost::icl::first(depth_vp_interval), + boost::icl::length(depth_vp_interval)); } - return std::make_tuple(color_surface, depth_surface, rect); + return {color_surface, depth_surface, fb_rect}; } -CachedSurface* RasterizerCacheOpenGL::TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config) { - auto surface_interval = - boost::icl::interval::right_open(config.GetStartAddress(), config.GetEndAddress()); - auto range = surface_cache.equal_range(surface_interval); - for (auto it = range.first; it != range.second; ++it) { - for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) { - int bits_per_value = 0; - if (config.fill_24bit) { - bits_per_value = 24; - } else if (config.fill_32bit) { - bits_per_value = 32; - } else { - bits_per_value = 16; - } +SurfaceRect_Tuple RasterizerCacheOpenGL::GetTexCopySurface(const SurfaceParams& params) { + MathUtil::Rectangle rect{}; - CachedSurface* surface = it2->get(); + Surface match_surface = FindMatch( + surface_cache, params, ScaleMatch::Ignore); - if (surface->addr == config.GetStartAddress() && - CachedSurface::GetFormatBpp(surface->pixel_format) == bits_per_value && - (surface->width * surface->height * - CachedSurface::GetFormatBpp(surface->pixel_format) / 8) == - (config.GetEndAddress() - config.GetStartAddress())) { - return surface; - } + if (match_surface != nullptr) { + ValidateSurface(match_surface, params.addr, params.size); + + SurfaceParams match_subrect = params; + match_subrect.width = match_surface->PixelsInBytes(params.width); + match_subrect.stride = match_surface->PixelsInBytes(params.stride); + + if (match_surface->is_tiled) { + match_subrect.width /= 8; + match_subrect.stride /= 8; + match_subrect.height *= 8; } + + rect = match_surface->GetScaledSubRect(match_subrect); } - return nullptr; + return {match_surface, rect}; +} + +Surface RasterizerCacheOpenGL::GetFillSurface(const GPU::Regs::MemoryFillConfig& config) { + Surface new_surface = std::make_shared(); + + new_surface->addr = config.GetStartAddress(); + new_surface->end = config.GetEndAddress(); + new_surface->size = new_surface->end - new_surface->addr; + new_surface->type = SurfaceType::Fill; + new_surface->res_scale = std::numeric_limits::max(); + std::memcpy(&new_surface->fill_data[0], &config.value_32bit, 4); + if (config.fill_32bit) { + new_surface->fill_size = 4; + } else if (config.fill_24bit) { + new_surface->fill_size = 3; + } else { + new_surface->fill_size = 2; + } + + RegisterSurface(new_surface); + return new_surface; +} + +void RasterizerCacheOpenGL::DuplicateSurface(const Surface& src_surface, + const Surface& dest_surface) { + ASSERT(dest_surface->addr <= src_surface->addr && dest_surface->end >= src_surface->end); + + BlitSurfaces(src_surface, src_surface->GetScaledRect(), dest_surface, + dest_surface->GetScaledSubRect(*src_surface)); + + dest_surface->invalid_regions -= src_surface->GetInterval(); + dest_surface->invalid_regions += src_surface->invalid_regions; + + SurfaceRegions regions; + for (auto& pair : RangeFromInterval(dirty_regions, src_surface->GetInterval())) { + if (pair.second == src_surface) { + regions += pair.first; + } + } + for (auto& interval : regions) { + dirty_regions.set({interval, dest_surface}); + } +} + +void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, u32 size) { + if (size == 0) + return; + + const auto validate_interval = SurfaceInterval(addr, addr + size); + + if (surface->type == SurfaceType::Fill) { + // Sanity check, fill surfaces will always be valid when used + ASSERT(surface->IsRegionValid(validate_interval)); + return; + } + + const auto validate_regions = surface->invalid_regions.find(validate_interval); + + for (;;) { + const auto it = validate_regions.begin(); + if (it == surface->invalid_regions.end()) + break; + + const auto interval = *it & validate_interval; + // Look for a valid surface to copy from + SurfaceParams params = surface->FromInterval(interval); + + Surface copy_surface = + FindMatch(surface_cache, params, ScaleMatch::Ignore, interval); + if (copy_surface != nullptr) { + SurfaceInterval copy_interval = params.GetCopyableInterval(copy_surface); + CopySurface(copy_surface, surface, copy_interval); + validate_regions.erase(interval); + continue; + } + + // Load data from 3DS memory + FlushRegion(params.addr, params.size); + surface->LoadGLBuffer(params.addr, params.end); + surface->UploadGLTexture(surface->GetSubRect(params)); + validate_regions.erase(interval) + } } MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); From 24e187891fc0992a543f8f54ca46910fb390dd85 Mon Sep 17 00:00:00 2001 From: James Rowe Date: Tue, 12 Dec 2017 10:21:55 -0700 Subject: [PATCH 09/32] OpenGL Rasterizer: Update to use the new cache --- .../renderer_opengl/gl_rasterizer.cpp | 437 ++++++++---------- .../renderer_opengl/gl_rasterizer.h | 5 +- 2 files changed, 208 insertions(+), 234 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 599e6e2af..ddca45e24 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -8,7 +8,6 @@ #include #include #include "common/assert.h" -#include "common/color.h" #include "common/logging/log.h" #include "common/math_util.h" #include "common/microprofile.h" @@ -23,6 +22,9 @@ #include "video_core/renderer_opengl/pica_to_gl.h" #include "video_core/renderer_opengl/renderer_opengl.h" +using PixelFormat = SurfaceParams::PixelFormat; +using SurfaceType = SurfaceParams::SurfaceType; + MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); @@ -227,21 +229,64 @@ void RasterizerOpenGL::DrawTriangles() { MICROPROFILE_SCOPE(OpenGL_Drawing); const auto& regs = Pica::g_state.regs; - // Sync and bind the framebuffer surfaces - CachedSurface* color_surface; - CachedSurface* depth_surface; - MathUtil::Rectangle rect; - std::tie(color_surface, depth_surface, rect) = - res_cache.GetFramebufferSurfaces(regs.framebuffer.framebuffer); + const bool has_stencil = + regs.framebuffer.framebuffer.depth_format == Pica::FramebufferRegs::DepthFormat::D24S8; + const bool write_color_fb = + state.color_mask.red_enabled == GL_TRUE || state.color_mask.green_enabled == GL_TRUE || + state.color_mask.blue_enabled == GL_TRUE || state.color_mask.alpha_enabled == GL_TRUE; + + const bool write_depth_fb = + (state.depth.test_enabled && state.depth.write_mask == GL_TRUE) || + (has_stencil && state.stencil.test_enabled && state.stencil.write_mask != 0); + + const bool using_color_fb = + regs.framebuffer.framebuffer.GetColorBufferPhysicalAddress() != 0 && write_color_fb; + const bool using_depth_fb = + regs.framebuffer.framebuffer.GetDepthBufferPhysicalAddress() != 0 && + (write_depth_fb || regs.framebuffer.output_merger.depth_test_enable != 0 || + (has_stencil && state.stencil.test_enabled)); + + MathUtil::Rectangle viewport_rect_unscaled{ + // These registers hold half-width and half-height, so must be multiplied by 2 + regs.rasterizer.viewport_corner.x, // left + regs.rasterizer.viewport_corner.y + // top + static_cast(Pica::float24::FromRaw(regs.rasterizer.viewport_size_y).ToFloat32() * + 2), + regs.rasterizer.viewport_corner.x + // right + static_cast(Pica::float24::FromRaw(regs.rasterizer.viewport_size_x).ToFloat32() * + 2), + regs.rasterizer.viewport_corner.y // bottom + }; + + Surface color_surface; + Surface depth_surface; + MathUtil::Rectangle surfaces_rect; + std::tie(color_surface, depth_surface, surfaces_rect) = + res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect_unscaled); + + const u16 res_scale = color_surface != nullptr + ? color_surface->res_scale + : (depth_surface == nullptr ? 1u : depth_surface->res_scale); + + MathUtil::Rectangle draw_rect{ + MathUtil::Clamp(surfaces_rect.left + viewport_rect_unscaled.left * res_scale, // left + surfaces_rect.left, surfaces_rect.right), + MathUtil::Clamp(surfaces_rect.bottom + viewport_rect_unscaled.top * res_scale, // top + surfaces_rect.bottom, surfaces_rect.top), + MathUtil::Clamp(surfaces_rect.left + viewport_rect_unscaled.right * res_scale, // right + surfaces_rect.left, surfaces_rect.right), + MathUtil::Clamp(surfaces_rect.bottom + viewport_rect_unscaled.bottom * res_scale, // bottom + surfaces_rect.bottom, surfaces_rect.top)}; + + // Bind the framebuffer surfaces state.draw.draw_framebuffer = framebuffer.handle; state.Apply(); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, color_surface != nullptr ? color_surface->texture.handle : 0, 0); if (depth_surface != nullptr) { - if (regs.framebuffer.framebuffer.depth_format == - Pica::FramebufferRegs::DepthFormat::D24S8) { + if (has_stencil) { // attach both depth and stencil glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, depth_surface->texture.handle, 0); @@ -259,38 +304,30 @@ void RasterizerOpenGL::DrawTriangles() { } // Sync the viewport - // These registers hold half-width and half-height, so must be multiplied by 2 - GLsizei viewport_width = - (GLsizei)Pica::float24::FromRaw(regs.rasterizer.viewport_size_x).ToFloat32() * 2; - GLsizei viewport_height = - (GLsizei)Pica::float24::FromRaw(regs.rasterizer.viewport_size_y).ToFloat32() * 2; + state.viewport.x = + static_cast(surfaces_rect.left + viewport_rect_unscaled.left * res_scale); + state.viewport.y = + static_cast(surfaces_rect.bottom + viewport_rect_unscaled.bottom * res_scale); + state.viewport.width = static_cast(viewport_rect_unscaled.GetWidth() * res_scale); + state.viewport.height = static_cast(viewport_rect_unscaled.GetHeight() * res_scale); - glViewport( - (GLint)(rect.left + regs.rasterizer.viewport_corner.x * color_surface->res_scale_width), - (GLint)(rect.bottom + regs.rasterizer.viewport_corner.y * color_surface->res_scale_height), - (GLsizei)(viewport_width * color_surface->res_scale_width), - (GLsizei)(viewport_height * color_surface->res_scale_height)); - - if (uniform_block_data.data.framebuffer_scale[0] != color_surface->res_scale_width || - uniform_block_data.data.framebuffer_scale[1] != color_surface->res_scale_height) { - - uniform_block_data.data.framebuffer_scale[0] = color_surface->res_scale_width; - uniform_block_data.data.framebuffer_scale[1] = color_surface->res_scale_height; + if (uniform_block_data.data.framebuffer_scale != res_scale) { + uniform_block_data.data.framebuffer_scale = res_scale; uniform_block_data.dirty = true; } // Scissor checks are window-, not viewport-relative, which means that if the cached texture // sub-rect changes, the scissor bounds also need to be updated. - GLint scissor_x1 = static_cast( - rect.left + regs.rasterizer.scissor_test.x1 * color_surface->res_scale_width); - GLint scissor_y1 = static_cast( - rect.bottom + regs.rasterizer.scissor_test.y1 * color_surface->res_scale_height); + GLint scissor_x1 = + static_cast(surfaces_rect.left + regs.rasterizer.scissor_test.x1 * res_scale); + GLint scissor_y1 = + static_cast(surfaces_rect.bottom + regs.rasterizer.scissor_test.y1 * res_scale); // x2, y2 have +1 added to cover the entire pixel area, otherwise you might get cracks when // scaling or doing multisampling. - GLint scissor_x2 = static_cast( - rect.left + (regs.rasterizer.scissor_test.x2 + 1) * color_surface->res_scale_width); - GLint scissor_y2 = static_cast( - rect.bottom + (regs.rasterizer.scissor_test.y2 + 1) * color_surface->res_scale_height); + GLint scissor_x2 = + static_cast(surfaces_rect.left + (regs.rasterizer.scissor_test.x2 + 1) * res_scale); + GLint scissor_y2 = static_cast(surfaces_rect.bottom + + (regs.rasterizer.scissor_test.y2 + 1) * res_scale); if (uniform_block_data.data.scissor_x1 != scissor_x1 || uniform_block_data.data.scissor_x2 != scissor_x2 || @@ -311,7 +348,7 @@ void RasterizerOpenGL::DrawTriangles() { if (texture.enabled) { texture_samplers[texture_index].SyncWithConfig(texture.config); - CachedSurface* surface = res_cache.GetTextureSurface(texture); + Surface surface = res_cache.GetTextureSurface(texture); if (surface != nullptr) { state.texture_units[texture_index].texture_2d = surface->texture.handle; } else { @@ -380,6 +417,15 @@ void RasterizerOpenGL::DrawTriangles() { uniform_block_data.dirty = false; } + // Viewport can have negative offsets or larger + // dimensions than our framebuffer sub-rect. + // Enable scissor test to prevent drawing + // outside of the framebuffer region + state.scissor.enabled = true; + state.scissor.x = draw_rect.left; + state.scissor.y = draw_rect.bottom; + state.scissor.width = draw_rect.GetWidth(); + state.scissor.height = draw_rect.GetHeight(); state.Apply(); // Draw the vertex batch @@ -387,16 +433,8 @@ void RasterizerOpenGL::DrawTriangles() { GL_STREAM_DRAW); glDrawArrays(GL_TRIANGLES, 0, (GLsizei)vertex_batch.size()); - // Mark framebuffer surfaces as dirty - // TODO: Restrict invalidation area to the viewport - if (color_surface != nullptr) { - color_surface->dirty = true; - res_cache.FlushRegion(color_surface->addr, color_surface->size, color_surface, true); - } - if (depth_surface != nullptr) { - depth_surface->dirty = true; - res_cache.FlushRegion(depth_surface->addr, depth_surface->size, depth_surface, true); - } + // Disable scissor test + state.scissor.enabled = false; vertex_batch.clear(); @@ -405,6 +443,22 @@ void RasterizerOpenGL::DrawTriangles() { state.texture_units[texture_index].texture_2d = 0; } state.Apply(); + + // Mark framebuffer surfaces as dirty + MathUtil::Rectangle draw_rect_unscaled{ + draw_rect.left / res_scale, draw_rect.top / res_scale, draw_rect.right / res_scale, + draw_rect.bottom / res_scale}; + + if (color_surface != nullptr && write_color_fb) { + auto interval = color_surface->GetSubRectInterval(draw_rect_unscaled); + res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval), + color_surface); + } + if (depth_surface != nullptr && write_depth_fb) { + auto interval = depth_surface->GetSubRectInterval(draw_rect_unscaled); + res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval), + depth_surface); + } } void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { @@ -893,227 +947,143 @@ void RasterizerOpenGL::FlushAll() { void RasterizerOpenGL::FlushRegion(PAddr addr, u32 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); - res_cache.FlushRegion(addr, size, nullptr, false); + res_cache.FlushRegion(addr, size); +} + +void RasterizerOpenGL::InvalidateRegion(PAddr addr, u32 size) { + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + res_cache.InvalidateRegion(addr, size, nullptr); } void RasterizerOpenGL::FlushAndInvalidateRegion(PAddr addr, u32 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); - res_cache.FlushRegion(addr, size, nullptr, true); + res_cache.FlushRegion(addr, size); + res_cache.InvalidateRegion(addr, size, nullptr); } bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { MICROPROFILE_SCOPE(OpenGL_Blits); - CachedSurface src_params; + SurfaceParams src_params; src_params.addr = config.GetPhysicalInputAddress(); - // It's important to use the correct source input width to properly skip over parts of the input - // image which will be cropped from the output but still affect the stride of the input image. - src_params.width = config.input_width; - // Using the output's height is fine because we don't read or skip over the remaining part of - // the image, and it allows for smaller texture cache lookup rectangles. + src_params.width = config.output_width; + src_params.stride = config.input_width; src_params.height = config.output_height; src_params.is_tiled = !config.input_linear; - src_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.input_format); + src_params.pixel_format = SurfaceParams::PixelFormatFromGPUPixelFormat(config.input_format); + src_params.UpdateParams(); - CachedSurface dst_params; + SurfaceParams dst_params; dst_params.addr = config.GetPhysicalOutputAddress(); - dst_params.width = - config.scaling != config.NoScale ? config.output_width / 2 : config.output_width.Value(); - dst_params.height = - config.scaling == config.ScaleXY ? config.output_height / 2 : config.output_height.Value(); + dst_params.width = config.scaling != config.NoScale ? config.output_width.Value() / 2 + : config.output_width.Value(); + dst_params.height = config.scaling == config.ScaleXY ? config.output_height.Value() / 2 + : config.output_height.Value(); dst_params.is_tiled = config.input_linear != config.dont_swizzle; - dst_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.output_format); + dst_params.pixel_format = SurfaceParams::PixelFormatFromGPUPixelFormat(config.output_format); + dst_params.UpdateParams(); - MathUtil::Rectangle src_rect; - CachedSurface* src_surface = res_cache.GetSurfaceRect(src_params, false, true, src_rect); - - if (src_surface == nullptr) { + MathUtil::Rectangle src_rect; + Surface src_surface; + std::tie(src_surface, src_rect) = + res_cache.GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true); + if (src_surface == nullptr) return false; - } - // Adjust the source rectangle to take into account parts of the input lines being cropped - if (config.input_width > config.output_width) { - src_rect.right -= static_cast((config.input_width - config.output_width) * - src_surface->res_scale_width); - } + dst_params.res_scale = src_surface->res_scale; - // Require destination surface to have same resolution scale as source to preserve scaling - dst_params.res_scale_width = src_surface->res_scale_width; - dst_params.res_scale_height = src_surface->res_scale_height; - - MathUtil::Rectangle dst_rect; - CachedSurface* dst_surface = res_cache.GetSurfaceRect(dst_params, true, false, dst_rect); - - if (dst_surface == nullptr) { + MathUtil::Rectangle dst_rect; + Surface dst_surface; + std::tie(dst_surface, dst_rect) = + res_cache.GetSurfaceSubRect(dst_params, ScaleMatch::Upscale, false); + if (dst_surface == nullptr) return false; - } - // Don't accelerate if the src and dst surfaces are the same - if (src_surface == dst_surface) { + if (src_surface->is_tiled != dst_surface->is_tiled) + std::swap(src_rect.top, src_rect.bottom); + + if (config.flip_vertically) + std::swap(src_rect.top, src_rect.bottom); + + if (!res_cache.BlitSurfaces(src_surface, src_rect, dst_surface, dst_rect)) return false; - } - if (config.flip_vertically) { - std::swap(dst_rect.top, dst_rect.bottom); - } - - if (!res_cache.TryBlitSurfaces(src_surface, src_rect, dst_surface, dst_rect)) { - return false; - } - - u32 dst_size = dst_params.width * dst_params.height * - CachedSurface::GetFormatBpp(dst_params.pixel_format) / 8; - dst_surface->dirty = true; - res_cache.FlushRegion(config.GetPhysicalOutputAddress(), dst_size, dst_surface, true); + res_cache.InvalidateRegion(dst_params.addr, dst_params.size, dst_surface); return true; } bool RasterizerOpenGL::AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) { - // TODO(tfarley): Try to hardware accelerate this - return false; -} + u32 input_width = config.texture_copy.input_width * 16; + u32 input_gap = config.texture_copy.input_gap * 16; + u32 output_width = config.texture_copy.output_width * 16; + u32 output_gap = config.texture_copy.output_gap * 16; -bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { - MICROPROFILE_SCOPE(OpenGL_Blits); - using PixelFormat = CachedSurface::PixelFormat; - using SurfaceType = CachedSurface::SurfaceType; + if (config.texture_copy.size == 0) + return true; - CachedSurface* dst_surface = res_cache.TryGetFillSurface(config); + if (input_width >= config.texture_copy.size) { + input_width = config.texture_copy.size; + input_gap = 0; + } - if (dst_surface == nullptr) { + if (output_width >= config.texture_copy.size) { + output_width = config.texture_copy.size; + output_gap = 0; + } + + if (input_width != output_width || config.texture_copy.size % input_width != 0) { return false; } - OpenGLState cur_state = OpenGLState::GetCurState(); + SurfaceParams src_params; + src_params.addr = config.GetPhysicalInputAddress(); + src_params.stride = input_width + input_gap; // stride in bytes + src_params.width = input_width; // width in bytes + src_params.height = config.texture_copy.size / input_width; + src_params.size = ((src_params.height - 1) * src_params.stride) + src_params.width; + src_params.end = src_params.addr + src_params.size; - SurfaceType dst_type = CachedSurface::GetFormatType(dst_surface->pixel_format); + MathUtil::Rectangle src_rect; + Surface src_surface; + std::tie(src_surface, src_rect) = res_cache.GetTexCopySurface(src_params); + if (src_surface == nullptr) + return false; - GLuint old_fb = cur_state.draw.draw_framebuffer; - cur_state.draw.draw_framebuffer = framebuffer.handle; - // TODO: When scissor test is implemented, need to disable scissor test in cur_state here so - // Clear call isn't affected - cur_state.Apply(); + if ((output_gap * 8) % SurfaceParams::GetFormatBpp(src_surface->pixel_format) != 0 || + (src_surface->is_tiled && src_surface->PixelsInBytes(output_gap) % 64 != 0)) + return false; - if (dst_type == SurfaceType::Color || dst_type == SurfaceType::Texture) { - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, - dst_surface->texture.handle, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, - 0); + SurfaceParams dst_params = *src_surface; + dst_params.addr = config.GetPhysicalOutputAddress(); + dst_params.width = src_rect.GetWidth() / src_surface->res_scale; + dst_params.stride = dst_params.width + src_surface->PixelsInBytes( + src_surface->is_tiled ? output_gap / 8 : output_gap); + dst_params.height = src_rect.GetHeight() / src_surface->res_scale; + dst_params.res_scale = src_surface->res_scale; + dst_params.UpdateParams(); - GLfloat color_values[4] = {0.0f, 0.0f, 0.0f, 0.0f}; + const bool load_gap = output_gap != 0; // Since we are going to invalidate the gap if there is + // one, we will have to load it first + MathUtil::Rectangle dst_rect; + Surface dst_surface; + std::tie(dst_surface, dst_rect) = + res_cache.GetSurfaceSubRect(dst_params, ScaleMatch::Upscale, load_gap); + if (src_surface == nullptr) + return false; - // TODO: Handle additional pixel format and fill value size combinations to accelerate more - // cases - // For instance, checking if fill value's bytes/bits repeat to allow filling - // I8/A8/I4/A4/... - // Currently only handles formats that are multiples of the fill value size + if (!res_cache.BlitSurfaces(src_surface, src_rect, dst_surface, dst_rect)) + return false; - if (config.fill_24bit) { - switch (dst_surface->pixel_format) { - case PixelFormat::RGB8: - color_values[0] = config.value_24bit_r / 255.0f; - color_values[1] = config.value_24bit_g / 255.0f; - color_values[2] = config.value_24bit_b / 255.0f; - break; - default: - return false; - } - } else if (config.fill_32bit) { - u32 value = config.value_32bit; + res_cache.InvalidateRegion(dst_params.addr, dst_params.size, dst_surface); + return true; +} - switch (dst_surface->pixel_format) { - case PixelFormat::RGBA8: - color_values[0] = (value >> 24) / 255.0f; - color_values[1] = ((value >> 16) & 0xFF) / 255.0f; - color_values[2] = ((value >> 8) & 0xFF) / 255.0f; - color_values[3] = (value & 0xFF) / 255.0f; - break; - default: - return false; - } - } else { - u16 value_16bit = config.value_16bit.Value(); - Math::Vec4 color; +bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { + Surface dst_surface = res_cache.GetFillSurface(config); + if (dst_surface == nullptr) + return false; - switch (dst_surface->pixel_format) { - case PixelFormat::RGBA8: - color_values[0] = (value_16bit >> 8) / 255.0f; - color_values[1] = (value_16bit & 0xFF) / 255.0f; - color_values[2] = color_values[0]; - color_values[3] = color_values[1]; - break; - case PixelFormat::RGB5A1: - color = Color::DecodeRGB5A1((const u8*)&value_16bit); - color_values[0] = color[0] / 31.0f; - color_values[1] = color[1] / 31.0f; - color_values[2] = color[2] / 31.0f; - color_values[3] = color[3]; - break; - case PixelFormat::RGB565: - color = Color::DecodeRGB565((const u8*)&value_16bit); - color_values[0] = color[0] / 31.0f; - color_values[1] = color[1] / 63.0f; - color_values[2] = color[2] / 31.0f; - break; - case PixelFormat::RGBA4: - color = Color::DecodeRGBA4((const u8*)&value_16bit); - color_values[0] = color[0] / 15.0f; - color_values[1] = color[1] / 15.0f; - color_values[2] = color[2] / 15.0f; - color_values[3] = color[3] / 15.0f; - break; - case PixelFormat::IA8: - case PixelFormat::RG8: - color_values[0] = (value_16bit >> 8) / 255.0f; - color_values[1] = (value_16bit & 0xFF) / 255.0f; - break; - default: - return false; - } - } - - cur_state.color_mask.red_enabled = GL_TRUE; - cur_state.color_mask.green_enabled = GL_TRUE; - cur_state.color_mask.blue_enabled = GL_TRUE; - cur_state.color_mask.alpha_enabled = GL_TRUE; - cur_state.Apply(); - glClearBufferfv(GL_COLOR, 0, color_values); - } else if (dst_type == SurfaceType::Depth) { - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, - dst_surface->texture.handle, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - - GLfloat value_float; - if (dst_surface->pixel_format == CachedSurface::PixelFormat::D16) { - value_float = config.value_32bit / 65535.0f; // 2^16 - 1 - } else if (dst_surface->pixel_format == CachedSurface::PixelFormat::D24) { - value_float = config.value_32bit / 16777215.0f; // 2^24 - 1 - } - - cur_state.depth.write_mask = GL_TRUE; - cur_state.Apply(); - glClearBufferfv(GL_DEPTH, 0, &value_float); - } else if (dst_type == SurfaceType::DepthStencil) { - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - dst_surface->texture.handle, 0); - - GLfloat value_float = (config.value_32bit & 0xFFFFFF) / 16777215.0f; // 2^24 - 1 - GLint value_int = (config.value_32bit >> 24); - - cur_state.depth.write_mask = GL_TRUE; - cur_state.stencil.write_mask = 0xFF; - cur_state.Apply(); - glClearBufferfi(GL_DEPTH_STENCIL, 0, value_float, value_int); - } - - cur_state.draw.draw_framebuffer = old_fb; - // TODO: Return scissor test to previous value when scissor test is implemented - cur_state.Apply(); - - dst_surface->dirty = true; - res_cache.FlushRegion(dst_surface->addr, dst_surface->size, dst_surface, true); + res_cache.InvalidateRegion(dst_surface->addr, dst_surface->size, dst_surface); return true; } @@ -1125,16 +1095,19 @@ bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& con } MICROPROFILE_SCOPE(OpenGL_CacheManagement); - CachedSurface src_params; + SurfaceParams src_params; src_params.addr = framebuffer_addr; - src_params.width = config.width; + src_params.width = std::min(config.width.Value(), pixel_stride); src_params.height = config.height; - src_params.pixel_stride = pixel_stride; + src_params.stride = pixel_stride; src_params.is_tiled = false; - src_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.color_format); + src_params.pixel_format = SurfaceParams::PixelFormatFromGPUPixelFormat(config.color_format); + src_params.UpdateParams(); - MathUtil::Rectangle src_rect; - CachedSurface* src_surface = res_cache.GetSurfaceRect(src_params, false, true, src_rect); + MathUtil::Rectangle src_rect; + Surface src_surface; + std::tie(src_surface, src_rect) = + res_cache.GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true); if (src_surface == nullptr) { return false; @@ -1144,8 +1117,8 @@ bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& con u32 scaled_height = src_surface->GetScaledHeight(); screen_info.display_texcoords = MathUtil::Rectangle( - (float)src_rect.top / (float)scaled_height, (float)src_rect.left / (float)scaled_width, - (float)src_rect.bottom / (float)scaled_height, (float)src_rect.right / (float)scaled_width); + (float)src_rect.bottom / (float)scaled_height, (float)src_rect.left / (float)scaled_width, + (float)src_rect.top / (float)scaled_height, (float)src_rect.right / (float)scaled_width); screen_info.display_texture = src_surface->texture.handle; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 46c62961c..18808b1e4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -43,6 +43,7 @@ public: void NotifyPicaRegisterChanged(u32 id) override; void FlushAll() override; void FlushRegion(PAddr addr, u32 size) override; + void InvalidateRegion(PAddr addr, u32 size) override; void FlushAndInvalidateRegion(PAddr addr, u32 size) override; bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) override; bool AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) override; @@ -135,7 +136,7 @@ private: // the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. // Not following that rule will cause problems on some AMD drivers. struct UniformData { - alignas(8) GLvec2 framebuffer_scale; + GLint framebuffer_scale; GLint alphatest_ref; GLfloat depth_scale; GLfloat depth_offset; @@ -155,7 +156,7 @@ private: }; static_assert( - sizeof(UniformData) == 0x470, + sizeof(UniformData) == 0x460, "The size of the UniformData structure has changed, update the structure in the shader"); static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec"); From 5b872c41d8f27e0af8672d3c54a1bcaca1a5c6b8 Mon Sep 17 00:00:00 2001 From: James Rowe Date: Sat, 9 Dec 2017 16:00:55 -0700 Subject: [PATCH 10/32] OpenGL Cache: Reorder methods The previous commits added the methods where they were located originally to try to get an easy to read diff between changes. This commit fixes compliation since the static methods are now declared before they are used. --- .../renderer_opengl/gl_rasterizer_cache.cpp | 955 +++++++++--------- 1 file changed, 483 insertions(+), 472 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index a3115a718..30632dc0d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -42,7 +42,7 @@ struct FormatTuple { GLenum type; }; -static const std::array fb_format_tuples = {{ +static constexpr std::array fb_format_tuples = {{ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8}, // RGBA8 {GL_RGB8, GL_BGR, GL_UNSIGNED_BYTE}, // RGB8 {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1}, // RGB5A1 @@ -50,7 +50,7 @@ static const std::array fb_format_tuples = {{ {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4}, // RGBA4 }}; -static const std::array depth_format_tuples = {{ +static constexpr std::array depth_format_tuples = {{ {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16 {}, {GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT}, // D24 @@ -78,93 +78,10 @@ constexpr auto RangeFromInterval(Map& map, const Interval& interval) { return boost::make_iterator_range(map.equal_range(interval)); } -static bool FillSurface(const Surface& surface, const u8* fill_data, - const MathUtil::Rectangle& fill_rect) { - OpenGLState state = OpenGLState::GetCurState(); - - OpenGLState prev_state = state; - SCOPE_EXIT({ prev_state.Apply(); }); - - state.ResetTexture(surface->texture.handle); - - state.scissor.enabled = true; - state.scissor.x = static_cast(fill_rect.left); - state.scissor.y = static_cast(fill_rect.bottom); - state.scissor.width = static_cast(fill_rect.GetWidth()); - state.scissor.height = static_cast(fill_rect.GetHeight()); - - state.draw.draw_framebuffer = transfer_framebuffers[1].handle; - state.Apply(); - - if (surface->type == SurfaceType::Color || surface->type == SurfaceType::Texture) { - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, - surface->texture.handle, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, - 0); - - Pica::Texture::TextureInfo tex_info{}; - tex_info.format = static_cast(surface->pixel_format); - Math::Vec4 color = Pica::Texture::LookupTexture(fill_data, 0, 0, tex_info); - - std::array color_values = {color.x / 255.f, color.y / 255.f, color.z / 255.f, - color.w / 255.f}; - - state.color_mask.red_enabled = GL_TRUE; - state.color_mask.green_enabled = GL_TRUE; - state.color_mask.blue_enabled = GL_TRUE; - state.color_mask.alpha_enabled = GL_TRUE; - state.Apply(); - glClearBufferfv(GL_COLOR, 0, &color_values[0]); - } else if (surface->type == SurfaceType::Depth) { - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, - surface->texture.handle, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - - u32 value_32bit = 0; - GLfloat value_float; - - if (surface->pixel_format == SurfaceParams::PixelFormat::D16) { - std::memcpy(&value_32bit, fill_data, 2); - value_float = value_32bit / 65535.0f; // 2^16 - 1 - } else if (surface->pixel_format == SurfaceParams::PixelFormat::D24) { - std::memcpy(&value_32bit, fill_data, 3); - value_float = value_32bit / 16777215.0f; // 2^24 - 1 - } - - state.depth.write_mask = GL_TRUE; - state.Apply(); - glClearBufferfv(GL_DEPTH, 0, &value_float); - } else if (surface->type == SurfaceType::DepthStencil) { - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - surface->texture.handle, 0); - - u32 value_32bit; - std::memcpy(&value_32bit, fill_data, 4); - - GLfloat value_float = (value_32bit & 0xFFFFFF) / 16777215.0f; // 2^24 - 1 - GLint value_int = (value_32bit >> 24); - - state.depth.write_mask = GL_TRUE; - state.stencil.write_mask = -1; - state.Apply(); - glClearBufferfi(GL_DEPTH_STENCIL, 0, value_float, value_int); - } - return true; -} - -RasterizerCacheOpenGL::RasterizerCacheOpenGL() { - transfer_framebuffers[0].Create(); - transfer_framebuffers[1].Create(); -} - -RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { - FlushAll(); - while (!surface_cache.empty()) - UnregisterSurface(*surface_cache.begin()->second.begin()); - transfer_framebuffers[0].Release(); - transfer_framebuffers[1].Release(); +static u16 GetResolutionScaleFactor() { + return !Settings::values.resolution_factor + ? VideoCore::g_emu_window->GetFramebufferLayout().GetScalingRatio() + : Settings::values.resolution_factor; } template @@ -292,6 +209,30 @@ static constexpr std::array gl MortonCopy // 17 }; +// Allocate an uninitialized texture of appropriate size and format for the surface +static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tuple, u32 width, + u32 height) { + OpenGLState cur_state = OpenGLState::GetCurState(); + + // Keep track of previous texture bindings + GLuint old_tex = cur_state.texture_units[0].texture_2d; + cur_state.texture_units[0].texture_2d = texture; + cur_state.Apply(); + glActiveTexture(GL_TEXTURE0); + + glTexImage2D(GL_TEXTURE_2D, 0, format_tuple.internal_format, width, height, 0, + format_tuple.format, format_tuple.type, nullptr); + + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + + // Restore previous texture bindings + cur_state.texture_units[0].texture_2d = old_tex; + cur_state.Apply(); +} + static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle& src_rect, GLuint dst_tex, const MathUtil::Rectangle& dst_rect, SurfaceType type) { OpenGLState state = OpenGLState::GetCurState(); @@ -352,6 +293,82 @@ static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle& src_rec return true; } +static bool FillSurface(const Surface& surface, const u8* fill_data, + const MathUtil::Rectangle& fill_rect) { + OpenGLState state = OpenGLState::GetCurState(); + + OpenGLState prev_state = state; + SCOPE_EXIT({ prev_state.Apply(); }); + + state.ResetTexture(surface->texture.handle); + + state.scissor.enabled = true; + state.scissor.x = static_cast(fill_rect.left); + state.scissor.y = static_cast(fill_rect.bottom); + state.scissor.width = static_cast(fill_rect.GetWidth()); + state.scissor.height = static_cast(fill_rect.GetHeight()); + + state.draw.draw_framebuffer = transfer_framebuffers[1].handle; + state.Apply(); + + if (surface->type == SurfaceType::Color || surface->type == SurfaceType::Texture) { + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + surface->texture.handle, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + + Pica::Texture::TextureInfo tex_info{}; + tex_info.format = static_cast(surface->pixel_format); + Math::Vec4 color = Pica::Texture::LookupTexture(fill_data, 0, 0, tex_info); + + std::array color_values = {color.x / 255.f, color.y / 255.f, color.z / 255.f, + color.w / 255.f}; + + state.color_mask.red_enabled = GL_TRUE; + state.color_mask.green_enabled = GL_TRUE; + state.color_mask.blue_enabled = GL_TRUE; + state.color_mask.alpha_enabled = GL_TRUE; + state.Apply(); + glClearBufferfv(GL_COLOR, 0, &color_values[0]); + } else if (surface->type == SurfaceType::Depth) { + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, + surface->texture.handle, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + + u32 value_32bit = 0; + GLfloat value_float; + + if (surface->pixel_format == SurfaceParams::PixelFormat::D16) { + std::memcpy(&value_32bit, fill_data, 2); + value_float = value_32bit / 65535.0f; // 2^16 - 1 + } else if (surface->pixel_format == SurfaceParams::PixelFormat::D24) { + std::memcpy(&value_32bit, fill_data, 3); + value_float = value_32bit / 16777215.0f; // 2^24 - 1 + } + + state.depth.write_mask = GL_TRUE; + state.Apply(); + glClearBufferfv(GL_DEPTH, 0, &value_float); + } else if (surface->type == SurfaceType::DepthStencil) { + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + surface->texture.handle, 0); + + u32 value_32bit; + std::memcpy(&value_32bit, fill_data, 4); + + GLfloat value_float = (value_32bit & 0xFFFFFF) / 16777215.0f; // 2^24 - 1 + GLint value_int = (value_32bit >> 24); + + state.depth.write_mask = GL_TRUE; + state.stencil.write_mask = -1; + state.Apply(); + glClearBufferfi(GL_DEPTH_STENCIL, 0, value_float, value_int); + } + return true; +} + SurfaceParams SurfaceParams::FromInterval(SurfaceInterval interval) const { SurfaceParams params = *this; @@ -386,10 +403,6 @@ SurfaceInterval SurfaceParams::GetSubRectInterval(MathUtil::Rectangle unsca return {}; } - if (unscaled_rect.bottom > unscaled_rect.top) { - std::swap(unscaled_rect.top, unscaled_rect.bottom); - } - if (is_tiled) { unscaled_rect.left = Common::AlignDown(unscaled_rect.left, 8) * 8; unscaled_rect.bottom = Common::AlignDown(unscaled_rect.bottom, 8) / 8; @@ -414,14 +427,15 @@ MathUtil::Rectangle SurfaceParams::GetSubRect(const SurfaceParams& sub_surf if (is_tiled) { const int x0 = (begin_pixel_index % (stride * 8)) / 8; const int y0 = (begin_pixel_index / (stride * 8)) * 8; + // Top to bottom return MathUtil::Rectangle(x0, height - y0, x0 + sub_surface.width, - height - (y0 + sub_surface.height)); // Top to bottom + height - (y0 + sub_surface.height)); } const int x0 = begin_pixel_index % stride; const int y0 = begin_pixel_index / stride; - return MathUtil::Rectangle(x0, y0 + sub_surface.height, x0 + sub_surface.width, - y0); // Bottom to top + // Bottom to top + return MathUtil::Rectangle(x0, y0 + sub_surface.height, x0 + sub_surface.width, y0); } MathUtil::Rectangle SurfaceParams::GetScaledSubRect(const SurfaceParams& sub_surface) const { @@ -482,13 +496,14 @@ bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const { return false; const u32 begin_pixel_index = PixelsInBytes(texcopy_params.addr - addr); - const int x0 = begin_pixel_index % stride; - const int y0 = begin_pixel_index / stride; - if (!is_tiled) + if (!is_tiled) { + const int x0 = begin_pixel_index % stride; return ((texcopy_params.height == 1 || PixelsInBytes(texcopy_params.stride) == stride) && x0 + PixelsInBytes(texcopy_params.width) <= stride); + } + const int x0 = (begin_pixel_index % (stride * 8)) / 8; return (PixelsInBytes(texcopy_params.addr - addr) % 64 == 0 && PixelsInBytes(texcopy_params.width) % 64 == 0 && (texcopy_params.height == 1 || PixelsInBytes(texcopy_params.stride) == stride * 8) && @@ -607,41 +622,6 @@ void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surfac UNREACHABLE(); } -bool RasterizerCacheOpenGL::BlitSurfaces(const Surface& src_surface, - const MathUtil::Rectangle& src_rect, - const Surface& dst_surface, - const MathUtil::Rectangle& dst_rect) { - if (!SurfaceParams::CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format)) - return false; - - return BlitTextures(src_surface->texture.handle, src_rect, dst_surface->texture.handle, - dst_rect, src_surface->type); -} - -// Allocate an uninitialized texture of appropriate size and format for the surface -static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tuple, u32 width, - u32 height) { - OpenGLState cur_state = OpenGLState::GetCurState(); - - // Keep track of previous texture bindings - GLuint old_tex = cur_state.texture_units[0].texture_2d; - cur_state.texture_units[0].texture_2d = texture; - cur_state.Apply(); - glActiveTexture(GL_TEXTURE0); - - glTexImage2D(GL_TEXTURE_2D, 0, format_tuple.internal_format, width, height, 0, - format_tuple.format, format_tuple.type, nullptr); - - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - - // Restore previous texture bindings - cur_state.texture_units[0].texture_2d = old_tex; - cur_state.Apply(); -} - MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192)); void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) { ASSERT(type != SurfaceType::Fill); @@ -680,7 +660,7 @@ void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) { tex_info.SetDefaultStride(); tex_info.physical_address = addr; - const auto load_interval = SurfaceInterval(load_start, load_end); + const SurfaceInterval load_interval(load_start, load_end); const auto rect = GetSubRect(FromInterval(load_interval)); ASSERT(FromInterval(load_interval).GetInterval() == load_interval); @@ -699,338 +679,6 @@ void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) { } } -Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, - bool load_if_create) { - if (params.addr == 0 || params.height * params.width == 0) { - return nullptr; - } - - ASSERT(params.width == params.stride); // Use GetSurfaceSubRect instead - - // Check for an exact match in existing surfaces - Surface surface = - FindMatch(surface_cache, params, match_res_scale); - - if (surface == nullptr) { - u16 target_res_scale = params.res_scale; - if (match_res_scale != ScaleMatch::Exact) { - // This surface may have a subrect of another surface with a higher res_scale, find it - // to adjust our params - SurfaceParams find_params = params; - Surface expandable = FindMatch( - surface_cache, find_params, match_res_scale); - if (expandable != nullptr && expandable->res_scale > target_res_scale) { - target_res_scale = expandable->res_scale; - } - // Keep res_scale when reinterpreting d24s8 -> rgba8 - if (params.pixel_format == PixelFormat::RGBA8) { - find_params.pixel_format = PixelFormat::D24S8; - expandable = FindMatch( - surface_cache, find_params, match_res_scale); - if (expandable != nullptr && expandable->res_scale > target_res_scale) { - target_res_scale = expandable->res_scale; - } - } - } - SurfaceParams new_params = params; - new_params.res_scale = target_res_scale; - surface = CreateSurface(new_params); - RegisterSurface(surface); - } - - if (load_if_create) { - ValidateSurface(surface, params.addr, params.size); - } - - return surface; -} - -SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& params, - ScaleMatch match_res_scale, - bool load_if_create) { - if (params.addr == 0 || params.height * params.width == 0) { - return {nullptr, {}}; - } - - // Attempt to find encompassing surface - Surface surface = FindMatch(surface_cache, params, - match_res_scale); - - // Check if FindMatch failed because of res scaling - // If that's the case create a new surface with - // the dimensions of the lower res_scale surface - // to suggest it should not be used again - if (surface == nullptr && match_res_scale != ScaleMatch::Ignore) { - surface = FindMatch(surface_cache, params, - ScaleMatch::Ignore); - if (surface != nullptr) { - ASSERT(surface->res_scale < params.res_scale); - SurfaceParams new_params = *surface; - new_params.res_scale = params.res_scale; - - surface = CreateSurface(new_params); - RegisterSurface(surface); - } - } - - // Check for a surface we can expand before creating a new one - if (surface == nullptr) { - surface = FindMatch(surface_cache, params, - match_res_scale); - if (surface != nullptr) { - SurfaceParams new_params = *surface; - new_params.addr = std::min(params.addr, surface->addr); - new_params.end = std::max(params.end, surface->end); - new_params.size = new_params.end - new_params.addr; - new_params.height = new_params.size / params.BytesInPixels(params.stride); - ASSERT(new_params.size % params.BytesInPixels(params.stride) == 0); - - Surface new_surface = CreateSurface(new_params); - DuplicateSurface(surface, new_surface); - - // Delete the expanded surface, this can't be done safely yet - // because it may still be in use - remove_surfaces.emplace(surface); - - surface = new_surface; - RegisterSurface(new_surface); - } - } - - // No subrect found - create and return a new surface - if (surface == nullptr) { - SurfaceParams new_params = params; - new_params.width = params.stride; // Can't have gaps in a surface - new_params.UpdateParams(); - // GetSurface will create the new surface and possibly adjust res_scale if necessary - surface = GetSurface(new_params, match_res_scale, load_if_create); - } else if (load_if_create) { - ValidateSurface(surface, params.addr, params.size); - } - - return {surface, surface->GetScaledSubRect(params)}; -} - -Surface RasterizerCacheOpenGL::GetTextureSurface( - const Pica::TexturingRegs::FullTextureConfig& config) { - Pica::Texture::TextureInfo info = - Pica::Texture::TextureInfo::FromPicaRegister(config.config, config.format); - - SurfaceParams params; - params.addr = info.physical_address; - params.width = info.width; - params.height = info.height; - params.is_tiled = true; - params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(info.format); - params.UpdateParams(); - return GetSurface(params, ScaleMatch::Ignore, true); -} - -static u16 GetResolutionScaleFactor() { - return !Settings::values.resolution_factor - ? VideoCore::g_emu_window->GetFramebufferLayout().GetScalingRatio() - : Settings::values.resolution_factor; -} - -SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( - bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle& viewport_rect) { - const auto& regs = Pica::g_state.regs; - const auto& config = regs.framebuffer.framebuffer; - - // update resolution_scale_factor and reset cache if changed - static u16 resolution_scale_factor = GetResolutionScaleFactor(); - if (resolution_scale_factor != GetResolutionScaleFactor()) { - resolution_scale_factor = GetResolutionScaleFactor(); - FlushAll(); - while (!surface_cache.empty()) - UnregisterSurface(*surface_cache.begin()->second.begin()); - } - - MathUtil::Rectangle viewport_clamped{ - static_cast( - MathUtil::Clamp(viewport_rect.left, 0, static_cast(config.GetWidth()))), - static_cast( - MathUtil::Clamp(viewport_rect.top, 0, static_cast(config.GetHeight()))), - static_cast( - MathUtil::Clamp(viewport_rect.right, 0, static_cast(config.GetWidth()))), - static_cast( - MathUtil::Clamp(viewport_rect.bottom, 0, static_cast(config.GetHeight())))}; - - // get color and depth surfaces - SurfaceParams color_params; - color_params.is_tiled = true; - color_params.res_scale = resolution_scale_factor; - color_params.width = config.GetWidth(); - color_params.height = config.GetHeight(); - SurfaceParams depth_params = color_params; - - color_params.addr = config.GetColorBufferPhysicalAddress(); - color_params.pixel_format = SurfaceParams::PixelFormatFromColorFormat(config.color_format); - color_params.UpdateParams(); - - depth_params.addr = config.GetDepthBufferPhysicalAddress(); - depth_params.pixel_format = SurfaceParams::PixelFormatFromDepthFormat(config.depth_format); - depth_params.UpdateParams(); - - auto color_vp_interval = color_params.GetSubRectInterval(viewport_clamped); - auto depth_vp_interval = depth_params.GetSubRectInterval(viewport_clamped); - - // Make sur that framebuffers don't overlap if both color and depth are being used - if (using_color_fb && using_depth_fb && - boost::icl::length(color_vp_interval & depth_vp_interval)) { - LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; " - "overlapping framebuffers not supported!"); - using_depth_fb = false; - } - - MathUtil::Rectangle color_rect{}; - Surface color_surface = nullptr; - if (using_color_fb) - std::tie(color_surface, color_rect) = - GetSurfaceSubRect(color_params, ScaleMatch::Exact, false); - - MathUtil::Rectangle depth_rect{}; - Surface depth_surface = nullptr; - if (using_depth_fb) - std::tie(depth_surface, depth_rect) = - GetSurfaceSubRect(depth_params, ScaleMatch::Exact, false); - - MathUtil::Rectangle fb_rect{}; - if (color_surface != nullptr && depth_surface != nullptr) { - fb_rect = color_rect; - // Color and Depth surfaces must have the same dimensions and offsets - if (color_rect.bottom != depth_rect.bottom || - color_surface->height != depth_surface->height) { - color_surface = GetSurface(color_params, ScaleMatch::Exact, false); - depth_surface = GetSurface(depth_params, ScaleMatch::Exact, false); - fb_rect = color_surface->GetScaledRect(); - } - } else if (color_surface != nullptr) { - fb_rect = color_rect; - } else if (depth_surface != nullptr) { - fb_rect = depth_rect; - } - ASSERT(!fb_rect.left && fb_rect.right == config.GetWidth() * resolution_scale_factor); - - if (color_surface != nullptr) { - ValidateSurface(color_surface, boost::icl::first(color_vp_interval), - boost::icl::length(color_vp_interval)); - } - if (depth_surface != nullptr) { - ValidateSurface(depth_surface, boost::icl::first(depth_vp_interval), - boost::icl::length(depth_vp_interval)); - } - - return {color_surface, depth_surface, fb_rect}; -} - -SurfaceRect_Tuple RasterizerCacheOpenGL::GetTexCopySurface(const SurfaceParams& params) { - MathUtil::Rectangle rect{}; - - Surface match_surface = FindMatch( - surface_cache, params, ScaleMatch::Ignore); - - if (match_surface != nullptr) { - ValidateSurface(match_surface, params.addr, params.size); - - SurfaceParams match_subrect = params; - match_subrect.width = match_surface->PixelsInBytes(params.width); - match_subrect.stride = match_surface->PixelsInBytes(params.stride); - - if (match_surface->is_tiled) { - match_subrect.width /= 8; - match_subrect.stride /= 8; - match_subrect.height *= 8; - } - - rect = match_surface->GetScaledSubRect(match_subrect); - } - - return {match_surface, rect}; -} - -Surface RasterizerCacheOpenGL::GetFillSurface(const GPU::Regs::MemoryFillConfig& config) { - Surface new_surface = std::make_shared(); - - new_surface->addr = config.GetStartAddress(); - new_surface->end = config.GetEndAddress(); - new_surface->size = new_surface->end - new_surface->addr; - new_surface->type = SurfaceType::Fill; - new_surface->res_scale = std::numeric_limits::max(); - std::memcpy(&new_surface->fill_data[0], &config.value_32bit, 4); - if (config.fill_32bit) { - new_surface->fill_size = 4; - } else if (config.fill_24bit) { - new_surface->fill_size = 3; - } else { - new_surface->fill_size = 2; - } - - RegisterSurface(new_surface); - return new_surface; -} - -void RasterizerCacheOpenGL::DuplicateSurface(const Surface& src_surface, - const Surface& dest_surface) { - ASSERT(dest_surface->addr <= src_surface->addr && dest_surface->end >= src_surface->end); - - BlitSurfaces(src_surface, src_surface->GetScaledRect(), dest_surface, - dest_surface->GetScaledSubRect(*src_surface)); - - dest_surface->invalid_regions -= src_surface->GetInterval(); - dest_surface->invalid_regions += src_surface->invalid_regions; - - SurfaceRegions regions; - for (auto& pair : RangeFromInterval(dirty_regions, src_surface->GetInterval())) { - if (pair.second == src_surface) { - regions += pair.first; - } - } - for (auto& interval : regions) { - dirty_regions.set({interval, dest_surface}); - } -} - -void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, u32 size) { - if (size == 0) - return; - - const auto validate_interval = SurfaceInterval(addr, addr + size); - - if (surface->type == SurfaceType::Fill) { - // Sanity check, fill surfaces will always be valid when used - ASSERT(surface->IsRegionValid(validate_interval)); - return; - } - - const auto validate_regions = surface->invalid_regions.find(validate_interval); - - for (;;) { - const auto it = validate_regions.begin(); - if (it == surface->invalid_regions.end()) - break; - - const auto interval = *it & validate_interval; - // Look for a valid surface to copy from - SurfaceParams params = surface->FromInterval(interval); - - Surface copy_surface = - FindMatch(surface_cache, params, ScaleMatch::Ignore, interval); - if (copy_surface != nullptr) { - SurfaceInterval copy_interval = params.GetCopyableInterval(copy_surface); - CopySurface(copy_surface, surface, copy_interval); - validate_regions.erase(interval); - continue; - } - - // Load data from 3DS memory - FlushRegion(params.addr, params.size); - surface->LoadGLBuffer(params.addr, params.end); - surface->UploadGLTexture(surface->GetSubRect(params)); - validate_regions.erase(interval) - } -} - MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) { u8* const dst_buffer = Memory::GetPhysicalPointer(addr); @@ -1299,13 +947,368 @@ Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params return match_surface; } +RasterizerCacheOpenGL::RasterizerCacheOpenGL() { + transfer_framebuffers[0].Create(); + transfer_framebuffers[1].Create(); +} + +RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { + FlushAll(); + while (!surface_cache.empty()) + UnregisterSurface(*surface_cache.begin()->second.begin()); + transfer_framebuffers[0].Release(); + transfer_framebuffers[1].Release(); +} + +bool RasterizerCacheOpenGL::BlitSurfaces(const Surface& src_surface, + const MathUtil::Rectangle& src_rect, + const Surface& dst_surface, + const MathUtil::Rectangle& dst_rect) { + if (!SurfaceParams::CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format)) + return false; + + return BlitTextures(src_surface->texture.handle, src_rect, dst_surface->texture.handle, + dst_rect, src_surface->type); +} + +Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, + bool load_if_create) { + if (params.addr == 0 || params.height * params.width == 0) { + return nullptr; + } + + ASSERT(params.width == params.stride); // Use GetSurfaceSubRect instead + + // Check for an exact match in existing surfaces + Surface surface = + FindMatch(surface_cache, params, match_res_scale); + + if (surface == nullptr) { + u16 target_res_scale = params.res_scale; + if (match_res_scale != ScaleMatch::Exact) { + // This surface may have a subrect of another surface with a higher res_scale, find it + // to adjust our params + SurfaceParams find_params = params; + Surface expandable = FindMatch( + surface_cache, find_params, match_res_scale); + if (expandable != nullptr && expandable->res_scale > target_res_scale) { + target_res_scale = expandable->res_scale; + } + // Keep res_scale when reinterpreting d24s8 -> rgba8 + if (params.pixel_format == PixelFormat::RGBA8) { + find_params.pixel_format = PixelFormat::D24S8; + expandable = FindMatch( + surface_cache, find_params, match_res_scale); + if (expandable != nullptr && expandable->res_scale > target_res_scale) { + target_res_scale = expandable->res_scale; + } + } + } + SurfaceParams new_params = params; + new_params.res_scale = target_res_scale; + surface = CreateSurface(new_params); + RegisterSurface(surface); + } + + if (load_if_create) { + ValidateSurface(surface, params.addr, params.size); + } + + return surface; +} + +SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& params, + ScaleMatch match_res_scale, + bool load_if_create) { + if (params.addr == 0 || params.height * params.width == 0) { + return {nullptr, {}}; + } + + // Attempt to find encompassing surface + Surface surface = FindMatch(surface_cache, params, + match_res_scale); + + // Check if FindMatch failed because of res scaling + // If that's the case create a new surface with + // the dimensions of the lower res_scale surface + // to suggest it should not be used again + if (surface == nullptr && match_res_scale != ScaleMatch::Ignore) { + surface = FindMatch(surface_cache, params, + ScaleMatch::Ignore); + if (surface != nullptr) { + ASSERT(surface->res_scale < params.res_scale); + SurfaceParams new_params = *surface; + new_params.res_scale = params.res_scale; + + surface = CreateSurface(new_params); + RegisterSurface(surface); + } + } + + // Check for a surface we can expand before creating a new one + if (surface == nullptr) { + surface = FindMatch(surface_cache, params, + match_res_scale); + if (surface != nullptr) { + SurfaceParams new_params = *surface; + new_params.addr = std::min(params.addr, surface->addr); + new_params.end = std::max(params.end, surface->end); + new_params.size = new_params.end - new_params.addr; + new_params.height = new_params.size / params.BytesInPixels(params.stride); + ASSERT(new_params.size % params.BytesInPixels(params.stride) == 0); + + Surface new_surface = CreateSurface(new_params); + DuplicateSurface(surface, new_surface); + + // Delete the expanded surface, this can't be done safely yet + // because it may still be in use + remove_surfaces.emplace(surface); + + surface = new_surface; + RegisterSurface(new_surface); + } + } + + // No subrect found - create and return a new surface + if (surface == nullptr) { + SurfaceParams new_params = params; + new_params.width = params.stride; // Can't have gaps in a surface + new_params.UpdateParams(); + // GetSurface will create the new surface and possibly adjust res_scale if necessary + surface = GetSurface(new_params, match_res_scale, load_if_create); + } else if (load_if_create) { + ValidateSurface(surface, params.addr, params.size); + } + + return {surface, surface->GetScaledSubRect(params)}; +} + +Surface RasterizerCacheOpenGL::GetTextureSurface( + const Pica::TexturingRegs::FullTextureConfig& config) { + Pica::Texture::TextureInfo info = + Pica::Texture::TextureInfo::FromPicaRegister(config.config, config.format); + + SurfaceParams params; + params.addr = info.physical_address; + params.width = info.width; + params.height = info.height; + params.is_tiled = true; + params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(info.format); + params.UpdateParams(); + return GetSurface(params, ScaleMatch::Ignore, true); +} + +SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( + bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle& viewport_rect) { + const auto& regs = Pica::g_state.regs; + const auto& config = regs.framebuffer.framebuffer; + + // update resolution_scale_factor and reset cache if changed + static u16 resolution_scale_factor = GetResolutionScaleFactor(); + if (resolution_scale_factor != GetResolutionScaleFactor()) { + resolution_scale_factor = GetResolutionScaleFactor(); + FlushAll(); + while (!surface_cache.empty()) + UnregisterSurface(*surface_cache.begin()->second.begin()); + } + + MathUtil::Rectangle viewport_clamped{ + static_cast( + MathUtil::Clamp(viewport_rect.left, 0, static_cast(config.GetWidth()))), + static_cast( + MathUtil::Clamp(viewport_rect.top, 0, static_cast(config.GetHeight()))), + static_cast( + MathUtil::Clamp(viewport_rect.right, 0, static_cast(config.GetWidth()))), + static_cast( + MathUtil::Clamp(viewport_rect.bottom, 0, static_cast(config.GetHeight())))}; + + // get color and depth surfaces + SurfaceParams color_params; + color_params.is_tiled = true; + color_params.res_scale = resolution_scale_factor; + color_params.width = config.GetWidth(); + color_params.height = config.GetHeight(); + SurfaceParams depth_params = color_params; + + color_params.addr = config.GetColorBufferPhysicalAddress(); + color_params.pixel_format = SurfaceParams::PixelFormatFromColorFormat(config.color_format); + color_params.UpdateParams(); + + depth_params.addr = config.GetDepthBufferPhysicalAddress(); + depth_params.pixel_format = SurfaceParams::PixelFormatFromDepthFormat(config.depth_format); + depth_params.UpdateParams(); + + auto color_vp_interval = color_params.GetSubRectInterval(viewport_clamped); + auto depth_vp_interval = depth_params.GetSubRectInterval(viewport_clamped); + + // Make sur that framebuffers don't overlap if both color and depth are being used + if (using_color_fb && using_depth_fb && + boost::icl::length(color_vp_interval & depth_vp_interval)) { + LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; " + "overlapping framebuffers not supported!"); + using_depth_fb = false; + } + + MathUtil::Rectangle color_rect{}; + Surface color_surface = nullptr; + if (using_color_fb) + std::tie(color_surface, color_rect) = + GetSurfaceSubRect(color_params, ScaleMatch::Exact, false); + + MathUtil::Rectangle depth_rect{}; + Surface depth_surface = nullptr; + if (using_depth_fb) + std::tie(depth_surface, depth_rect) = + GetSurfaceSubRect(depth_params, ScaleMatch::Exact, false); + + MathUtil::Rectangle fb_rect{}; + if (color_surface != nullptr && depth_surface != nullptr) { + fb_rect = color_rect; + // Color and Depth surfaces must have the same dimensions and offsets + if (color_rect.bottom != depth_rect.bottom || + color_surface->height != depth_surface->height) { + color_surface = GetSurface(color_params, ScaleMatch::Exact, false); + depth_surface = GetSurface(depth_params, ScaleMatch::Exact, false); + fb_rect = color_surface->GetScaledRect(); + } + } else if (color_surface != nullptr) { + fb_rect = color_rect; + } else if (depth_surface != nullptr) { + fb_rect = depth_rect; + } + ASSERT(!fb_rect.left && fb_rect.right == config.GetWidth() * resolution_scale_factor); + + if (color_surface != nullptr) { + ValidateSurface(color_surface, boost::icl::first(color_vp_interval), + boost::icl::length(color_vp_interval)); + } + if (depth_surface != nullptr) { + ValidateSurface(depth_surface, boost::icl::first(depth_vp_interval), + boost::icl::length(depth_vp_interval)); + } + + return {color_surface, depth_surface, fb_rect}; +} + +Surface RasterizerCacheOpenGL::GetFillSurface(const GPU::Regs::MemoryFillConfig& config) { + Surface new_surface = std::make_shared(); + + new_surface->addr = config.GetStartAddress(); + new_surface->end = config.GetEndAddress(); + new_surface->size = new_surface->end - new_surface->addr; + new_surface->type = SurfaceType::Fill; + new_surface->res_scale = std::numeric_limits::max(); + std::memcpy(&new_surface->fill_data[0], &config.value_32bit, 4); + if (config.fill_32bit) { + new_surface->fill_size = 4; + } else if (config.fill_24bit) { + new_surface->fill_size = 3; + } else { + new_surface->fill_size = 2; + } + + RegisterSurface(new_surface); + return new_surface; +} + +SurfaceRect_Tuple RasterizerCacheOpenGL::GetTexCopySurface(const SurfaceParams& params) { + MathUtil::Rectangle rect{}; + + Surface match_surface = FindMatch( + surface_cache, params, ScaleMatch::Ignore); + + if (match_surface != nullptr) { + ValidateSurface(match_surface, params.addr, params.size); + + SurfaceParams match_subrect = params; + match_subrect.width = match_surface->PixelsInBytes(params.width); + match_subrect.stride = match_surface->PixelsInBytes(params.stride); + + if (match_surface->is_tiled) { + match_subrect.width /= 8; + match_subrect.stride /= 8; + match_subrect.height *= 8; + } + + rect = match_surface->GetScaledSubRect(match_subrect); + } + + return {match_surface, rect}; +} + +void RasterizerCacheOpenGL::DuplicateSurface(const Surface& src_surface, + const Surface& dest_surface) { + ASSERT(dest_surface->addr <= src_surface->addr && dest_surface->end >= src_surface->end); + + BlitSurfaces(src_surface, src_surface->GetScaledRect(), dest_surface, + dest_surface->GetScaledSubRect(*src_surface)); + + dest_surface->invalid_regions -= src_surface->GetInterval(); + dest_surface->invalid_regions += src_surface->invalid_regions; + + SurfaceRegions regions; + for (auto& pair : RangeFromInterval(dirty_regions, src_surface->GetInterval())) { + if (pair.second == src_surface) { + regions += pair.first; + } + } + for (auto& interval : regions) { + dirty_regions.set({interval, dest_surface}); + } +} + +void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, u32 size) { + if (size == 0) + return; + + const SurfaceInterval validate_interval(addr, addr + size); + + if (surface->type == SurfaceType::Fill) { + // Sanity check, fill surfaces will always be valid when used + ASSERT(surface->IsRegionValid(validate_interval)); + return; + } + + const auto validate_regions = surface->invalid_regions.find(validate_interval); + + for (;;) { + const auto it = validate_regions.begin(); + if (it == surface->invalid_regions.end()) + break; + + const auto interval = *it & validate_interval; + // Look for a valid surface to copy from + SurfaceParams params = surface->FromInterval(interval); + + Surface copy_surface = + FindMatch(surface_cache, params, ScaleMatch::Ignore, interval); + if (copy_surface != nullptr) { + SurfaceInterval copy_interval = params.GetCopyableInterval(copy_surface); + CopySurface(copy_surface, surface, copy_interval); + validate_regions.erase(interval); + continue; + } + + // Load data from 3DS memory + FlushRegion(params.addr, params.size); + surface->LoadGLBuffer(params.addr, params.end); + surface->UploadGLTexture(surface->GetSubRect(params)); + validate_regions.erase(interval) + } +} + void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, Surface flush_surface) { if (size == 0) return; - const auto flush_interval = SurfaceInterval(addr, addr + size); + const SurfaceInterval flush_interval(addr, addr + size); + SurfaceRegions flushed_intervals; + for (auto& pair : RangeFromInterval(dirty_regions, flush_interval)) { - const auto interval = pair.first & flush_interval; + const auto interval = size <= 8 + ? // this most likely comes from the cpu, flush the entire region + pair.first + : pair.first & flush_interval; auto& surface = pair.second; if (flush_surface != nullptr && surface != flush_surface) @@ -1319,10 +1322,10 @@ void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, Surface flush_surf surface->DownloadGLTexture(surface->GetSubRect(params)); } surface->FlushGLBuffer(boost::icl::first(interval), boost::icl::last_next(interval)); + flushed_intervals += interval; } - // Reset dirty regions - dirty_regions.erase(flush_interval); + dirty_regions -= flushed_intervals; } void RasterizerCacheOpenGL::FlushAll() { @@ -1333,7 +1336,7 @@ void RasterizerCacheOpenGL::InvalidateRegion(PAddr addr, u32 size, const Surface if (size == 0) return; - const auto invalid_interval = SurfaceInterval(addr, addr + size); + const SurfaceInterval invalid_interval(addr, addr + size); if (region_owner != nullptr) { ASSERT(region_owner->type != SurfaceType::Texture); @@ -1347,6 +1350,14 @@ void RasterizerCacheOpenGL::InvalidateRegion(PAddr addr, u32 size, const Surface if (cached_surface == region_owner) continue; + // If cpu is invalidating this region we want to remove it + // to (likely) mark the memory pages as uncached + if (region_owner == nullptr && size <= 8) { + FlushRegion(cached_surface->addr, cached_surface->size, cached_surface); + remove_surfaces.emplace(cached_surface); + continue; + } + const auto interval = cached_surface->GetInterval() & invalid_interval; cached_surface->invalid_regions.insert(interval); From 0498d34d18a906a08b29b4c6c67f1703558a0ef2 Mon Sep 17 00:00:00 2001 From: James Rowe Date: Wed, 29 Nov 2017 21:09:03 -0700 Subject: [PATCH 11/32] OpenGL Cache: Ignore format reinterpretation hack Several games such as Smash will cause some regions that are cached on the gpu to be revalidated, but (seemingly) we can just ignore these cases. If the data is already found on the gpu in dirty_regions, then we validate those, and skip flushing that region from cpu. Its unknown if this breaks any games, but it does speed up many games. Additionally, it removes outlines in the pokemon games. --- .../renderer_opengl/gl_rasterizer_cache.cpp | 25 ++++++++++++++++--- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 30632dc0d..a75e504a8 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -1269,11 +1269,15 @@ void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, return; } - const auto validate_regions = surface->invalid_regions.find(validate_interval); + auto validate_regions = surface->invalid_regions & validate_interval; + auto notify_validated = [&](SurfaceInterval interval) { + surface->invalid_regions.erase(interval); + validate_regions.erase(interval); + }; for (;;) { const auto it = validate_regions.begin(); - if (it == surface->invalid_regions.end()) + if (it == validate_regions.end()) break; const auto interval = *it & validate_interval; @@ -1285,15 +1289,28 @@ void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, if (copy_surface != nullptr) { SurfaceInterval copy_interval = params.GetCopyableInterval(copy_surface); CopySurface(copy_surface, surface, copy_interval); - validate_regions.erase(interval); + notify_validated(copy_interval); continue; } + // HACK HACK HACK: Ignore format reinterpretation + // this is a placeholder for HW texture decoding/encoding + constexpr bool IGNORE_FORMAT_REINTERPRETING = true; + bool retry = false; + if (IGNORE_FORMAT_REINTERPRETING) { + for (const auto& pair : RangeFromInterval(dirty_regions, interval)) { + validate_regions.erase(pair.first & interval); + retry = true; + } + } + if (retry) + continue; + // Load data from 3DS memory FlushRegion(params.addr, params.size); surface->LoadGLBuffer(params.addr, params.end); surface->UploadGLTexture(surface->GetSubRect(params)); - validate_regions.erase(interval) + notify_validated(params.GetInterval()); } } From 72034b772deb529cc163873d995b662c4a5b1f6e Mon Sep 17 00:00:00 2001 From: James Rowe Date: Sat, 9 Dec 2017 13:51:46 -0700 Subject: [PATCH 12/32] Minor style changes --- .../renderer_opengl/gl_rasterizer_cache.cpp | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index a75e504a8..aa606e34e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -875,10 +875,11 @@ Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params const bool res_scale_matched = match_scale_type == ScaleMatch::Exact ? (params.res_scale == surface->res_scale) : (params.res_scale <= surface->res_scale); + // validity will be checked in GetCopyableInterval bool is_valid = - find_flags & MatchFlags::Copy ? true - : // validity will be checked in GetCopyableInterval - surface->IsRegionValid(validate_interval.value_or(params.GetInterval())); + find_flags & MatchFlags::Copy + ? true + : surface->IsRegionValid(validate_interval.value_or(params.GetInterval())); if (!(find_flags & MatchFlags::Invalid) && !is_valid) continue; @@ -976,8 +977,8 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, ScaleMatc if (params.addr == 0 || params.height * params.width == 0) { return nullptr; } - - ASSERT(params.width == params.stride); // Use GetSurfaceSubRect instead + // Use GetSurfaceSubRect instead + ASSERT(params.width == params.stride); // Check for an exact match in existing surfaces Surface surface = @@ -1072,7 +1073,8 @@ SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& // No subrect found - create and return a new surface if (surface == nullptr) { SurfaceParams new_params = params; - new_params.width = params.stride; // Can't have gaps in a surface + // Can't have gaps in a surface + new_params.width = params.stride; new_params.UpdateParams(); // GetSurface will create the new surface and possibly adjust res_scale if necessary surface = GetSurface(new_params, match_res_scale, load_if_create); @@ -1141,7 +1143,7 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( auto color_vp_interval = color_params.GetSubRectInterval(viewport_clamped); auto depth_vp_interval = depth_params.GetSubRectInterval(viewport_clamped); - // Make sur that framebuffers don't overlap if both color and depth are being used + // Make sure that framebuffers don't overlap if both color and depth are being used if (using_color_fb && using_depth_fb && boost::icl::length(color_vp_interval & depth_vp_interval)) { LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; " @@ -1322,10 +1324,8 @@ void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, Surface flush_surf SurfaceRegions flushed_intervals; for (auto& pair : RangeFromInterval(dirty_regions, flush_interval)) { - const auto interval = size <= 8 - ? // this most likely comes from the cpu, flush the entire region - pair.first - : pair.first & flush_interval; + // small sizes imply that this most likely comes from the cpu, flush the entire region + const auto interval = size <= 8 ? pair.first : pair.first & flush_interval; auto& surface = pair.second; if (flush_surface != nullptr && surface != flush_surface) @@ -1358,7 +1358,8 @@ void RasterizerCacheOpenGL::InvalidateRegion(PAddr addr, u32 size, const Surface if (region_owner != nullptr) { ASSERT(region_owner->type != SurfaceType::Texture); ASSERT(addr >= region_owner->addr && addr + size <= region_owner->end); - ASSERT(region_owner->width == region_owner->stride); // Surfaces can't have a gap + // Surfaces can't have a gap + ASSERT(region_owner->width == region_owner->stride); region_owner->invalid_regions.erase(invalid_interval); } From 34ff77f5f75c4532406a3e140f037ddc098174ad Mon Sep 17 00:00:00 2001 From: James Rowe Date: Wed, 13 Dec 2017 09:12:09 -0700 Subject: [PATCH 13/32] Revert "OpenGL Cache: Ignore format reinterpretation hack" Testing found a few games that did some crazy things which breaks the assumptions made in that commit. --- .../renderer_opengl/gl_rasterizer_cache.cpp | 25 +++---------------- 1 file changed, 4 insertions(+), 21 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index aa606e34e..a7cb34b70 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -1271,15 +1271,11 @@ void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, return; } - auto validate_regions = surface->invalid_regions & validate_interval; - auto notify_validated = [&](SurfaceInterval interval) { - surface->invalid_regions.erase(interval); - validate_regions.erase(interval); - }; + const auto validate_regions = surface->invalid_regions.find(validate_interval); for (;;) { const auto it = validate_regions.begin(); - if (it == validate_regions.end()) + if (it == surface->invalid_regions.end()) break; const auto interval = *it & validate_interval; @@ -1291,28 +1287,15 @@ void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, if (copy_surface != nullptr) { SurfaceInterval copy_interval = params.GetCopyableInterval(copy_surface); CopySurface(copy_surface, surface, copy_interval); - notify_validated(copy_interval); + validate_regions.erase(interval); continue; } - // HACK HACK HACK: Ignore format reinterpretation - // this is a placeholder for HW texture decoding/encoding - constexpr bool IGNORE_FORMAT_REINTERPRETING = true; - bool retry = false; - if (IGNORE_FORMAT_REINTERPRETING) { - for (const auto& pair : RangeFromInterval(dirty_regions, interval)) { - validate_regions.erase(pair.first & interval); - retry = true; - } - } - if (retry) - continue; - // Load data from 3DS memory FlushRegion(params.addr, params.size); surface->LoadGLBuffer(params.addr, params.end); surface->UploadGLTexture(surface->GetSubRect(params)); - notify_validated(params.GetInterval()); + validate_regions.erase(interval) } } From 91fad7010ba7595de49b05602f287261b2727a10 Mon Sep 17 00:00:00 2001 From: James Rowe Date: Wed, 13 Dec 2017 10:22:29 -0700 Subject: [PATCH 14/32] Fix compilation on mac and linux --- .../renderer_opengl/gl_rasterizer_cache.cpp | 12 ++++++------ src/video_core/renderer_opengl/gl_rasterizer_cache.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index a7cb34b70..e6c290293 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -1022,7 +1022,7 @@ SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& ScaleMatch match_res_scale, bool load_if_create) { if (params.addr == 0 || params.height * params.width == 0) { - return {nullptr, {}}; + return std::make_tuple(nullptr, MathUtil::Rectangle{}); } // Attempt to find encompassing surface @@ -1082,7 +1082,7 @@ SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& ValidateSurface(surface, params.addr, params.size); } - return {surface, surface->GetScaledSubRect(params)}; + return std::make_tuple(surface, surface->GetScaledSubRect(params)); } Surface RasterizerCacheOpenGL::GetTextureSurface( @@ -1189,7 +1189,7 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( boost::icl::length(depth_vp_interval)); } - return {color_surface, depth_surface, fb_rect}; + return std::make_tuple(color_surface, depth_surface, fb_rect); } Surface RasterizerCacheOpenGL::GetFillSurface(const GPU::Regs::MemoryFillConfig& config) { @@ -1235,7 +1235,7 @@ SurfaceRect_Tuple RasterizerCacheOpenGL::GetTexCopySurface(const SurfaceParams& rect = match_surface->GetScaledSubRect(match_subrect); } - return {match_surface, rect}; + return std::make_tuple(match_surface, rect); } void RasterizerCacheOpenGL::DuplicateSurface(const Surface& src_surface, @@ -1271,7 +1271,7 @@ void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, return; } - const auto validate_regions = surface->invalid_regions.find(validate_interval); + auto validate_regions = surface->invalid_regions & validate_interval; for (;;) { const auto it = validate_regions.begin(); @@ -1295,7 +1295,7 @@ void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, FlushRegion(params.addr, params.size); surface->LoadGLBuffer(params.addr, params.end); surface->UploadGLTexture(surface->GetSubRect(params)); - validate_regions.erase(interval) + validate_regions.erase(interval); } } diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index b2faa3916..e4db19af7 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -111,7 +111,7 @@ struct SurfaceParams { 32, // D24S8 }; - ASSERT(static_cast(format) < bpp_table.size()); + assert(static_cast(format) < bpp_table.size()); return bpp_table[static_cast(format)]; } unsigned int GetFormatBpp() const { From f893daa4a2cdb58192116f474edb503b9d08fae8 Mon Sep 17 00:00:00 2001 From: Phantom Date: Wed, 13 Dec 2017 19:18:09 +0100 Subject: [PATCH 15/32] Perform the same checks on TexCopy params that SW does --- .../renderer_opengl/gl_rasterizer.cpp | 40 ++++++++++++++----- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index ddca45e24..69797114a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -7,6 +7,7 @@ #include #include #include +#include "common/alignment.h" #include "common/assert.h" #include "common/logging/log.h" #include "common/math_util.h" @@ -1013,25 +1014,42 @@ bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransfe } bool RasterizerOpenGL::AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) { - u32 input_width = config.texture_copy.input_width * 16; + u32 copy_size = Common::AlignDown(config.texture_copy.size, 16); + + if (copy_size == 0) + return false; + u32 input_gap = config.texture_copy.input_gap * 16; - u32 output_width = config.texture_copy.output_width * 16; + u32 input_width = config.texture_copy.input_width * 16; + if (input_width == 0) { + if (input_gap == 0) { + input_width = copy_size; + } else { + return false; + } + } + u32 output_gap = config.texture_copy.output_gap * 16; + u32 output_width = config.texture_copy.output_width * 16; + if (output_width == 0) { + if (output_gap == 0) { + output_width = copy_size; + } else { + return false; + } + } - if (config.texture_copy.size == 0) - return true; - - if (input_width >= config.texture_copy.size) { - input_width = config.texture_copy.size; + if (input_width >= copy_size) { + input_width = copy_size; input_gap = 0; } - if (output_width >= config.texture_copy.size) { - output_width = config.texture_copy.size; + if (output_width >= copy_size) { + output_width = copy_size; output_gap = 0; } - if (input_width != output_width || config.texture_copy.size % input_width != 0) { + if (input_width != output_width || copy_size % input_width != 0) { return false; } @@ -1039,7 +1057,7 @@ bool RasterizerOpenGL::AccelerateTextureCopy(const GPU::Regs::DisplayTransferCon src_params.addr = config.GetPhysicalInputAddress(); src_params.stride = input_width + input_gap; // stride in bytes src_params.width = input_width; // width in bytes - src_params.height = config.texture_copy.size / input_width; + src_params.height = copy_size / input_width; src_params.size = ((src_params.height - 1) * src_params.stride) + src_params.width; src_params.end = src_params.addr + src_params.size; From 9a6a452857250455050d06b86767925563b25007 Mon Sep 17 00:00:00 2001 From: Phantom Date: Wed, 13 Dec 2017 19:22:02 +0100 Subject: [PATCH 16/32] Fix broken surface validation logic since removal of the reinterpret hack --- src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index e6c290293..3939c86c2 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -1271,10 +1271,8 @@ void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, return; } - auto validate_regions = surface->invalid_regions & validate_interval; - for (;;) { - const auto it = validate_regions.begin(); + const auto it = surface->invalid_regions.find(validate_interval); if (it == surface->invalid_regions.end()) break; @@ -1287,7 +1285,7 @@ void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, if (copy_surface != nullptr) { SurfaceInterval copy_interval = params.GetCopyableInterval(copy_surface); CopySurface(copy_surface, surface, copy_interval); - validate_regions.erase(interval); + surface->invalid_regions.erase(interval); continue; } @@ -1295,7 +1293,7 @@ void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, FlushRegion(params.addr, params.size); surface->LoadGLBuffer(params.addr, params.end); surface->UploadGLTexture(surface->GetSubRect(params)); - validate_regions.erase(interval); + surface->invalid_regions.erase(interval); } } From ac4c589ab56af1ce44899868506b0c3e12b7f644 Mon Sep 17 00:00:00 2001 From: James Rowe Date: Wed, 13 Dec 2017 12:04:17 -0700 Subject: [PATCH 17/32] Workaround for ICE on gcc5 --- src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 3939c86c2..3798d9bb3 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -872,7 +872,7 @@ Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params for (auto& pair : RangeFromInterval(surface_cache, params.GetInterval())) { for (auto& surface : pair.second) { - const bool res_scale_matched = match_scale_type == ScaleMatch::Exact + bool res_scale_matched = match_scale_type == ScaleMatch::Exact ? (params.res_scale == surface->res_scale) : (params.res_scale <= surface->res_scale); // validity will be checked in GetCopyableInterval From 7e673af52755dfb016ccb569b3be0af20043a202 Mon Sep 17 00:00:00 2001 From: James Rowe Date: Thu, 14 Dec 2017 10:33:03 -0700 Subject: [PATCH 18/32] Remove the correct intervals from the surface when validating --- src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 3798d9bb3..6b52d2798 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -873,8 +873,8 @@ Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params for (auto& pair : RangeFromInterval(surface_cache, params.GetInterval())) { for (auto& surface : pair.second) { bool res_scale_matched = match_scale_type == ScaleMatch::Exact - ? (params.res_scale == surface->res_scale) - : (params.res_scale <= surface->res_scale); + ? (params.res_scale == surface->res_scale) + : (params.res_scale <= surface->res_scale); // validity will be checked in GetCopyableInterval bool is_valid = find_flags & MatchFlags::Copy @@ -1285,7 +1285,7 @@ void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, if (copy_surface != nullptr) { SurfaceInterval copy_interval = params.GetCopyableInterval(copy_surface); CopySurface(copy_surface, surface, copy_interval); - surface->invalid_regions.erase(interval); + surface->invalid_regions.erase(copy_interval); continue; } @@ -1293,7 +1293,7 @@ void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, FlushRegion(params.addr, params.size); surface->LoadGLBuffer(params.addr, params.end); surface->UploadGLTexture(surface->GetSubRect(params)); - surface->invalid_regions.erase(interval); + surface->invalid_regions.erase(params.GetInterval()); } } From 4e053220a826676d0765ef9eee313a4b13e3cd0e Mon Sep 17 00:00:00 2001 From: James Rowe Date: Sat, 16 Dec 2017 10:42:06 -0700 Subject: [PATCH 19/32] When downloading from a surface into gl_buffer, ingore any x/y offsets in rect and use 0,0 as the origin --- src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 6b52d2798..37c2abe3d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -780,6 +780,7 @@ void CachedSurface::UploadGLTexture(const MathUtil::Rectangle& rect) { scaled_rect, type); } } + void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle& rect) { if (type == SurfaceType::Fill) return; @@ -801,7 +802,6 @@ void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle& rect) { size_t buffer_offset = (rect.bottom * stride + rect.left) * GetGLBytesPerPixel(pixel_format); // If not 1x scale, blit scaled texture to a new 1x texture and use that to flush - OGLTexture unscaled_tex; if (res_scale != 1) { auto scaled_rect = rect; scaled_rect.left *= res_scale; @@ -809,9 +809,12 @@ void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle& rect) { scaled_rect.right *= res_scale; scaled_rect.bottom *= res_scale; + OGLTexture unscaled_tex; unscaled_tex.Create(); + + MathUtil::Rectangle unscaled_tex_rect{0, rect.GetHeight(), rect.GetWidth(), 0}; AllocateSurfaceTexture(unscaled_tex.handle, tuple, rect.GetWidth(), rect.GetHeight()); - BlitTextures(texture.handle, scaled_rect, unscaled_tex.handle, rect, type); + BlitTextures(texture.handle, scaled_rect, unscaled_tex.handle, unscaled_tex_rect, type); state.texture_units[0].texture_2d = unscaled_tex.handle; state.Apply(); From 10fb9242ae9b347d7dd86f34de806c242c78cc8e Mon Sep 17 00:00:00 2001 From: James Rowe Date: Sat, 23 Dec 2017 08:39:00 -0700 Subject: [PATCH 20/32] Fix clang format --- src/core/hw/gpu.cpp | 10 ++++------ src/video_core/renderer_opengl/gl_rasterizer.cpp | 5 +++-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index d65af4aff..8ba679ea7 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp @@ -439,18 +439,16 @@ inline void Write(u32 addr, const T data) { if (config.is_texture_copy) { TextureCopy(config); - LOG_TRACE(HW_GPU, - "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> " - "0x%08X(%u+%u), flags 0x%08X", + LOG_TRACE(HW_GPU, "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> " + "0x%08X(%u+%u), flags 0x%08X", config.texture_copy.size, config.GetPhysicalInputAddress(), config.texture_copy.input_width * 16, config.texture_copy.input_gap * 16, config.GetPhysicalOutputAddress(), config.texture_copy.output_width * 16, config.texture_copy.output_gap * 16, config.flags); } else { DisplayTransfer(config); - LOG_TRACE(HW_GPU, - "DisplayTransfer: 0x%08x(%ux%u)-> " - "0x%08x(%ux%u), dst format %x, flags 0x%08X", + LOG_TRACE(HW_GPU, "DisplayTransfer: 0x%08x(%ux%u)-> " + "0x%08x(%ux%u), dst format %x, flags 0x%08X", config.GetPhysicalInputAddress(), config.input_width.Value(), config.input_height.Value(), config.GetPhysicalOutputAddress(), config.output_width.Value(), config.output_height.Value(), diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 69797114a..6b3bbaab8 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -1074,8 +1074,9 @@ bool RasterizerOpenGL::AccelerateTextureCopy(const GPU::Regs::DisplayTransferCon SurfaceParams dst_params = *src_surface; dst_params.addr = config.GetPhysicalOutputAddress(); dst_params.width = src_rect.GetWidth() / src_surface->res_scale; - dst_params.stride = dst_params.width + src_surface->PixelsInBytes( - src_surface->is_tiled ? output_gap / 8 : output_gap); + dst_params.stride = + dst_params.width + + src_surface->PixelsInBytes(src_surface->is_tiled ? output_gap / 8 : output_gap); dst_params.height = src_rect.GetHeight() / src_surface->res_scale; dst_params.res_scale = src_surface->res_scale; dst_params.UpdateParams(); From 1c4d1d1aceed4a0be369b1969587e9c710fc083e Mon Sep 17 00:00:00 2001 From: James Rowe Date: Sat, 23 Dec 2017 16:09:25 -0700 Subject: [PATCH 21/32] Move trasnfer_framebuffer to a member of RasterCache. Address review comments --- src/core/memory.cpp | 84 ++++++++++--------- .../renderer_opengl/gl_rasterizer.cpp | 4 +- .../renderer_opengl/gl_rasterizer_cache.cpp | 47 ++++++----- .../renderer_opengl/gl_rasterizer_cache.h | 9 +- 4 files changed, 83 insertions(+), 61 deletions(-) diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 9b01bfc8c..17339c66b 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -404,61 +404,69 @@ void RasterizerMarkRegionCached(PAddr start, u32 size, bool cached) { } void RasterizerFlushRegion(PAddr start, u32 size) { - if (VideoCore::g_renderer != nullptr) { - VideoCore::g_renderer->Rasterizer()->FlushRegion(start, size); + if (VideoCore::g_renderer == nullptr) { + return; } + + VideoCore::g_renderer->Rasterizer()->FlushRegion(start, size); } void RasterizerInvalidateRegion(PAddr start, u32 size) { - if (VideoCore::g_renderer != nullptr) { - VideoCore::g_renderer->Rasterizer()->InvalidateRegion(start, size); + if (VideoCore::g_renderer == nullptr) { + return; } + + VideoCore::g_renderer->Rasterizer()->InvalidateRegion(start, size); } void RasterizerFlushAndInvalidateRegion(PAddr start, u32 size) { // Since pages are unmapped on shutdown after video core is shutdown, the renderer may be // null here - if (VideoCore::g_renderer != nullptr) { - VideoCore::g_renderer->Rasterizer()->FlushAndInvalidateRegion(start, size); + if (VideoCore::g_renderer == nullptr) { + return; } + + VideoCore::g_renderer->Rasterizer()->FlushAndInvalidateRegion(start, size); } void RasterizerFlushVirtualRegion(VAddr start, u32 size, FlushMode mode) { // Since pages are unmapped on shutdown after video core is shutdown, the renderer may be // null here - if (VideoCore::g_renderer != nullptr) { - VAddr end = start + size; - - auto CheckRegion = [&](VAddr region_start, VAddr region_end) { - if (start >= region_end || end <= region_start) { - // No overlap with region - return; - } - - VAddr overlap_start = std::max(start, region_start); - VAddr overlap_end = std::min(end, region_end); - - PAddr physical_start = TryVirtualToPhysicalAddress(overlap_start).value(); - u32 overlap_size = overlap_end - overlap_start; - - auto* rasterizer = VideoCore::g_renderer->Rasterizer(); - switch (mode) { - case FlushMode::Flush: - rasterizer->FlushRegion(physical_start, overlap_size); - break; - case FlushMode::Invalidate: - rasterizer->InvalidateRegion(physical_start, overlap_size); - break; - case FlushMode::FlushAndInvalidate: - rasterizer->FlushAndInvalidateRegion(physical_start, overlap_size); - break; - } - }; - - CheckRegion(LINEAR_HEAP_VADDR, LINEAR_HEAP_VADDR_END); - CheckRegion(NEW_LINEAR_HEAP_VADDR, NEW_LINEAR_HEAP_VADDR_END); - CheckRegion(VRAM_VADDR, VRAM_VADDR_END); + if (VideoCore::g_renderer == nullptr) { + return; } + + VAddr end = start + size; + + auto CheckRegion = [&](VAddr region_start, VAddr region_end) { + if (start >= region_end || end <= region_start) { + // No overlap with region + return; + } + + VAddr overlap_start = std::max(start, region_start); + VAddr overlap_end = std::min(end, region_end); + + PAddr physical_start = TryVirtualToPhysicalAddress(overlap_start).value(); + u32 overlap_size = overlap_end - overlap_start; + + auto* rasterizer = VideoCore::g_renderer->Rasterizer(); + switch (mode) { + case FlushMode::Flush: + rasterizer->FlushRegion(physical_start, overlap_size); + break; + case FlushMode::Invalidate: + rasterizer->InvalidateRegion(physical_start, overlap_size); + break; + case FlushMode::FlushAndInvalidate: + rasterizer->FlushAndInvalidateRegion(physical_start, overlap_size); + break; + } + }; + + CheckRegion(LINEAR_HEAP_VADDR, LINEAR_HEAP_VADDR_END); + CheckRegion(NEW_LINEAR_HEAP_VADDR, NEW_LINEAR_HEAP_VADDR_END); + CheckRegion(VRAM_VADDR, VRAM_VADDR_END); } u8 Read8(const VAddr addr) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 6b3bbaab8..f4ead0848 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -1150,8 +1150,8 @@ void RasterizerOpenGL::SamplerInfo::Create() { wrap_s = wrap_t = TextureConfig::Repeat; border_color = 0; - glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, - GL_LINEAR); // default is GL_LINEAR_MIPMAP_LINEAR + // default is GL_LINEAR_MIPMAP_LINEAR + glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); // Other attributes have correct defaults } diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 37c2abe3d..4e7dc1048 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -34,8 +34,6 @@ using SurfaceType = SurfaceParams::SurfaceType; using PixelFormat = SurfaceParams::PixelFormat; -static std::array transfer_framebuffers; - struct FormatTuple { GLint internal_format; GLenum format; @@ -153,7 +151,7 @@ static void MortonCopy(u32 stride, u32 height, u8* gl_buffer, PAddr base, PAddr glbuf_next_tile(); } - u8* const buffer_end = tile_buffer + aligned_end - aligned_start; + const u8* const buffer_end = tile_buffer + aligned_end - aligned_start; while (tile_buffer < buffer_end) { MortonCopyTile(stride, tile_buffer, gl_buffer); tile_buffer += tile_size; @@ -234,7 +232,8 @@ static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tup } static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle& src_rect, GLuint dst_tex, - const MathUtil::Rectangle& dst_rect, SurfaceType type) { + const MathUtil::Rectangle& dst_rect, SurfaceType type, + GLuint read_handle, GLuint draw_handle) { OpenGLState state = OpenGLState::GetCurState(); OpenGLState prev_state = state; @@ -246,8 +245,8 @@ static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle& src_rec state.ResetTexture(dst_tex); // Keep track of previous framebuffer bindings - state.draw.read_framebuffer = transfer_framebuffers[0].handle; - state.draw.draw_framebuffer = transfer_framebuffers[1].handle; + state.draw.read_framebuffer = read_handle; + state.draw.draw_framebuffer = draw_handle; state.Apply(); u32 buffers = 0; @@ -294,7 +293,7 @@ static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle& src_rec } static bool FillSurface(const Surface& surface, const u8* fill_data, - const MathUtil::Rectangle& fill_rect) { + const MathUtil::Rectangle& fill_rect, GLuint draw_handle) { OpenGLState state = OpenGLState::GetCurState(); OpenGLState prev_state = state; @@ -308,7 +307,7 @@ static bool FillSurface(const Surface& surface, const u8* fill_data, state.scissor.width = static_cast(fill_rect.GetWidth()); state.scissor.height = static_cast(fill_rect.GetHeight()); - state.draw.draw_framebuffer = transfer_framebuffers[1].handle; + state.draw.draw_framebuffer = draw_handle; state.Apply(); if (surface->type == SurfaceType::Color || surface->type == SurfaceType::Texture) { @@ -610,13 +609,14 @@ void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surfac for (int i : {0, 1, 2, 3}) fill_buffer[i] = src_surface->fill_data[fill_buff_pos++ % src_surface->fill_size]; - FillSurface(dst_surface, &fill_buffer[0], dst_surface->GetScaledSubRect(subrect_params)); + FillSurface(dst_surface, &fill_buffer[0], dst_surface->GetScaledSubRect(subrect_params), + draw_framebuffer.handle); return; } if (src_surface->CanSubRect(subrect_params)) { BlitTextures(src_surface->texture.handle, src_surface->GetScaledSubRect(subrect_params), dst_surface->texture.handle, dst_surface->GetScaledSubRect(subrect_params), - src_surface->type); + src_surface->type, read_framebuffer.handle, draw_framebuffer.handle); return; } UNREACHABLE(); @@ -777,7 +777,7 @@ void CachedSurface::UploadGLTexture(const MathUtil::Rectangle& rect) { scaled_rect.bottom *= res_scale; BlitTextures(unscaled_tex.handle, {0, rect.GetHeight(), rect.GetWidth(), 0}, texture.handle, - scaled_rect, type); + scaled_rect, type, read_framebuffer_handle, draw_framebuffer_handle); } } @@ -814,7 +814,8 @@ void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle& rect) { MathUtil::Rectangle unscaled_tex_rect{0, rect.GetHeight(), rect.GetWidth(), 0}; AllocateSurfaceTexture(unscaled_tex.handle, tuple, rect.GetWidth(), rect.GetHeight()); - BlitTextures(texture.handle, scaled_rect, unscaled_tex.handle, unscaled_tex_rect, type); + BlitTextures(texture.handle, scaled_rect, unscaled_tex.handle, unscaled_tex_rect, type, + read_framebuffer_handle, draw_framebuffer_handle); state.texture_units[0].texture_2d = unscaled_tex.handle; state.Apply(); @@ -823,7 +824,7 @@ void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle& rect) { glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, &gl_buffer[buffer_offset]); } else { state.ResetTexture(texture.handle); - state.draw.read_framebuffer = transfer_framebuffers[0].handle; + state.draw.read_framebuffer = read_framebuffer_handle; state.Apply(); if (type == SurfaceType::Color || type == SurfaceType::Texture) { @@ -952,16 +953,16 @@ Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params } RasterizerCacheOpenGL::RasterizerCacheOpenGL() { - transfer_framebuffers[0].Create(); - transfer_framebuffers[1].Create(); + read_framebuffer.Create(); + draw_framebuffer.Create(); } RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { FlushAll(); while (!surface_cache.empty()) UnregisterSurface(*surface_cache.begin()->second.begin()); - transfer_framebuffers[0].Release(); - transfer_framebuffers[1].Release(); + read_framebuffer.Release(); + draw_framebuffer.Release(); } bool RasterizerCacheOpenGL::BlitSurfaces(const Surface& src_surface, @@ -972,7 +973,8 @@ bool RasterizerCacheOpenGL::BlitSurfaces(const Surface& src_surface, return false; return BlitTextures(src_surface->texture.handle, src_rect, dst_surface->texture.handle, - dst_rect, src_surface->type); + dst_rect, src_surface->type, read_framebuffer.handle, + draw_framebuffer.handle); } Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, @@ -1203,6 +1205,9 @@ Surface RasterizerCacheOpenGL::GetFillSurface(const GPU::Regs::MemoryFillConfig& new_surface->size = new_surface->end - new_surface->addr; new_surface->type = SurfaceType::Fill; new_surface->res_scale = std::numeric_limits::max(); + new_surface->read_framebuffer_handle = read_framebuffer.handle; + new_surface->draw_framebuffer_handle = draw_framebuffer.handle; + std::memcpy(&new_surface->fill_data[0], &config.value_32bit, 4); if (config.fill_32bit) { new_surface->fill_size = 4; @@ -1274,7 +1279,7 @@ void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, return; } - for (;;) { + while (true) { const auto it = surface->invalid_regions.find(validate_interval); if (it == surface->invalid_regions.end()) break; @@ -1309,6 +1314,8 @@ void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, Surface flush_surf for (auto& pair : RangeFromInterval(dirty_regions, flush_interval)) { // small sizes imply that this most likely comes from the cpu, flush the entire region + // the point is to avoid thousands of small writes every frame if the cpu decides to access + // that region, anything higher than 8 you're guaranteed it comes from a service const auto interval = size <= 8 ? pair.first : pair.first & flush_interval; auto& surface = pair.second; @@ -1397,6 +1404,8 @@ void RasterizerCacheOpenGL::InvalidateRegion(PAddr addr, u32 size, const Surface Surface RasterizerCacheOpenGL::CreateSurface(const SurfaceParams& params) { Surface surface = std::make_shared(); static_cast(*surface) = params; + surface->read_framebuffer_handle = read_framebuffer.handle; + surface->draw_framebuffer_handle = draw_framebuffer.handle; surface->texture.Create(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index e4db19af7..c4632f24e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -224,11 +224,11 @@ struct SurfaceParams { } u32 PixelsInBytes(u32 size) const { - return size * 8 / GetFormatBpp(pixel_format); + return size * CHAR_BIT / GetFormatBpp(pixel_format); } u32 BytesInPixels(u32 pixels) const { - return pixels * GetFormatBpp(pixel_format) / 8; + return pixels * GetFormatBpp(pixel_format) / CHAR_BIT; } bool ExactMatch(const SurfaceParams& other_surface) const; @@ -284,6 +284,9 @@ struct CachedSurface : SurfaceParams { std::unique_ptr gl_buffer; size_t gl_buffer_size = 0; + GLuint read_framebuffer_handle; + GLuint draw_framebuffer_handle; + // Read/Write data in 3DS memory to/from gl_buffer void LoadGLBuffer(PAddr load_start, PAddr load_end); void FlushGLBuffer(PAddr flush_start, PAddr flush_end); @@ -359,4 +362,6 @@ private: SurfaceMap dirty_regions; PageMap cached_pages; SurfaceSet remove_surfaces; + OGLFramebuffer read_framebuffer; + OGLFramebuffer draw_framebuffer; }; From 1591fa8d3de145847220cc4e53bb50682ab609e1 Mon Sep 17 00:00:00 2001 From: Phantom Date: Fri, 29 Dec 2017 17:00:09 +0100 Subject: [PATCH 22/32] Remove read_framebuffer_handle and draw_framebuffer_handle from CachedSurface --- .../renderer_opengl/gl_rasterizer_cache.cpp | 34 +++++++++---------- .../renderer_opengl/gl_rasterizer_cache.h | 12 +++---- 2 files changed, 22 insertions(+), 24 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 4e7dc1048..66b6022cb 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -233,7 +233,7 @@ static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tup static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle& src_rect, GLuint dst_tex, const MathUtil::Rectangle& dst_rect, SurfaceType type, - GLuint read_handle, GLuint draw_handle) { + GLuint read_fb_handle, GLuint draw_fb_handle) { OpenGLState state = OpenGLState::GetCurState(); OpenGLState prev_state = state; @@ -245,8 +245,8 @@ static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle& src_rec state.ResetTexture(dst_tex); // Keep track of previous framebuffer bindings - state.draw.read_framebuffer = read_handle; - state.draw.draw_framebuffer = draw_handle; + state.draw.read_framebuffer = read_fb_handle; + state.draw.draw_framebuffer = draw_fb_handle; state.Apply(); u32 buffers = 0; @@ -293,7 +293,7 @@ static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle& src_rec } static bool FillSurface(const Surface& surface, const u8* fill_data, - const MathUtil::Rectangle& fill_rect, GLuint draw_handle) { + const MathUtil::Rectangle& fill_rect, GLuint draw_fb_handle) { OpenGLState state = OpenGLState::GetCurState(); OpenGLState prev_state = state; @@ -307,7 +307,7 @@ static bool FillSurface(const Surface& surface, const u8* fill_data, state.scissor.width = static_cast(fill_rect.GetWidth()); state.scissor.height = static_cast(fill_rect.GetHeight()); - state.draw.draw_framebuffer = draw_handle; + state.draw.draw_framebuffer = draw_fb_handle; state.Apply(); if (surface->type == SurfaceType::Color || surface->type == SurfaceType::Texture) { @@ -723,7 +723,8 @@ void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) { } } -void CachedSurface::UploadGLTexture(const MathUtil::Rectangle& rect) { +void CachedSurface::UploadGLTexture(const MathUtil::Rectangle& rect, GLuint read_fb_handle, + GLuint draw_fb_handle) { if (type == SurfaceType::Fill) return; @@ -777,11 +778,12 @@ void CachedSurface::UploadGLTexture(const MathUtil::Rectangle& rect) { scaled_rect.bottom *= res_scale; BlitTextures(unscaled_tex.handle, {0, rect.GetHeight(), rect.GetWidth(), 0}, texture.handle, - scaled_rect, type, read_framebuffer_handle, draw_framebuffer_handle); + scaled_rect, type, read_fb_handle, draw_fb_handle); } } -void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle& rect) { +void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle& rect, GLuint read_fb_handle, + GLuint draw_fb_handle) { if (type == SurfaceType::Fill) return; @@ -815,7 +817,7 @@ void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle& rect) { MathUtil::Rectangle unscaled_tex_rect{0, rect.GetHeight(), rect.GetWidth(), 0}; AllocateSurfaceTexture(unscaled_tex.handle, tuple, rect.GetWidth(), rect.GetHeight()); BlitTextures(texture.handle, scaled_rect, unscaled_tex.handle, unscaled_tex_rect, type, - read_framebuffer_handle, draw_framebuffer_handle); + read_fb_handle, draw_fb_handle); state.texture_units[0].texture_2d = unscaled_tex.handle; state.Apply(); @@ -824,7 +826,7 @@ void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle& rect) { glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, &gl_buffer[buffer_offset]); } else { state.ResetTexture(texture.handle); - state.draw.read_framebuffer = read_framebuffer_handle; + state.draw.read_framebuffer = read_fb_handle; state.Apply(); if (type == SurfaceType::Color || type == SurfaceType::Texture) { @@ -961,8 +963,6 @@ RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { FlushAll(); while (!surface_cache.empty()) UnregisterSurface(*surface_cache.begin()->second.begin()); - read_framebuffer.Release(); - draw_framebuffer.Release(); } bool RasterizerCacheOpenGL::BlitSurfaces(const Surface& src_surface, @@ -1205,8 +1205,6 @@ Surface RasterizerCacheOpenGL::GetFillSurface(const GPU::Regs::MemoryFillConfig& new_surface->size = new_surface->end - new_surface->addr; new_surface->type = SurfaceType::Fill; new_surface->res_scale = std::numeric_limits::max(); - new_surface->read_framebuffer_handle = read_framebuffer.handle; - new_surface->draw_framebuffer_handle = draw_framebuffer.handle; std::memcpy(&new_surface->fill_data[0], &config.value_32bit, 4); if (config.fill_32bit) { @@ -1300,7 +1298,8 @@ void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, // Load data from 3DS memory FlushRegion(params.addr, params.size); surface->LoadGLBuffer(params.addr, params.end); - surface->UploadGLTexture(surface->GetSubRect(params)); + surface->UploadGLTexture(surface->GetSubRect(params), read_framebuffer.handle, + draw_framebuffer.handle); surface->invalid_regions.erase(params.GetInterval()); } } @@ -1327,7 +1326,8 @@ void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, Surface flush_surf if (surface->type != SurfaceType::Fill) { SurfaceParams params = surface->FromInterval(interval); - surface->DownloadGLTexture(surface->GetSubRect(params)); + surface->DownloadGLTexture(surface->GetSubRect(params), read_framebuffer.handle, + draw_framebuffer.handle); } surface->FlushGLBuffer(boost::icl::first(interval), boost::icl::last_next(interval)); flushed_intervals += interval; @@ -1404,8 +1404,6 @@ void RasterizerCacheOpenGL::InvalidateRegion(PAddr addr, u32 size, const Surface Surface RasterizerCacheOpenGL::CreateSurface(const SurfaceParams& params) { Surface surface = std::make_shared(); static_cast(*surface) = params; - surface->read_framebuffer_handle = read_framebuffer.handle; - surface->draw_framebuffer_handle = draw_framebuffer.handle; surface->texture.Create(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index c4632f24e..deb091317 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -284,16 +284,15 @@ struct CachedSurface : SurfaceParams { std::unique_ptr gl_buffer; size_t gl_buffer_size = 0; - GLuint read_framebuffer_handle; - GLuint draw_framebuffer_handle; - // Read/Write data in 3DS memory to/from gl_buffer void LoadGLBuffer(PAddr load_start, PAddr load_end); void FlushGLBuffer(PAddr flush_start, PAddr flush_end); // Upload/Download data in gl_buffer in/to this surface's texture - void UploadGLTexture(const MathUtil::Rectangle& rect); - void DownloadGLTexture(const MathUtil::Rectangle& rect); + void UploadGLTexture(const MathUtil::Rectangle& rect, GLuint read_fb_handle, + GLuint draw_fb_handle); + void DownloadGLTexture(const MathUtil::Rectangle& rect, GLuint read_fb_handle, + GLuint draw_fb_handle); }; class RasterizerCacheOpenGL : NonCopyable { @@ -359,9 +358,10 @@ private: void UpdatePagesCachedCount(PAddr addr, u32 size, int delta); SurfaceCache surface_cache; - SurfaceMap dirty_regions; PageMap cached_pages; + SurfaceMap dirty_regions; SurfaceSet remove_surfaces; + OGLFramebuffer read_framebuffer; OGLFramebuffer draw_framebuffer; }; From 19672cfee806bba23b6da4581f87a9b1826f3224 Mon Sep 17 00:00:00 2001 From: Phantom Date: Fri, 29 Dec 2017 17:01:37 +0100 Subject: [PATCH 23/32] CachedSurface: Add microprofile scopes for UploadGLTexture and DownloadGLTexture --- src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 66b6022cb..aa3009b5b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -723,11 +723,14 @@ void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) { } } +MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192)); void CachedSurface::UploadGLTexture(const MathUtil::Rectangle& rect, GLuint read_fb_handle, GLuint draw_fb_handle) { if (type == SurfaceType::Fill) return; + MICROPROFILE_SCOPE(OpenGL_TextureUL); + ASSERT(gl_buffer_size == width * height * GetGLBytesPerPixel(pixel_format)); // Load data from memory to the surface @@ -782,11 +785,14 @@ void CachedSurface::UploadGLTexture(const MathUtil::Rectangle& rect, GLuint } } +MICROPROFILE_DEFINE(OpenGL_TextureDL, "OpenGL", "Texture Download", MP_RGB(128, 192, 64)); void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle& rect, GLuint read_fb_handle, GLuint draw_fb_handle) { if (type == SurfaceType::Fill) return; + MICROPROFILE_SCOPE(OpenGL_TextureDL); + if (gl_buffer == nullptr) { gl_buffer_size = width * height * GetGLBytesPerPixel(pixel_format); gl_buffer.reset(new u8[gl_buffer_size]); From be1d0cee1e131eb4cd1c8becf3e3714644bd9495 Mon Sep 17 00:00:00 2001 From: Phantom Date: Fri, 29 Dec 2017 17:07:01 +0100 Subject: [PATCH 24/32] Fix viewport to surface rect clamping --- .../renderer_opengl/gl_rasterizer.cpp | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f4ead0848..3ceab54a2 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -271,14 +271,18 @@ void RasterizerOpenGL::DrawTriangles() { : (depth_surface == nullptr ? 1u : depth_surface->res_scale); MathUtil::Rectangle draw_rect{ - MathUtil::Clamp(surfaces_rect.left + viewport_rect_unscaled.left * res_scale, // left - surfaces_rect.left, surfaces_rect.right), - MathUtil::Clamp(surfaces_rect.bottom + viewport_rect_unscaled.top * res_scale, // top - surfaces_rect.bottom, surfaces_rect.top), - MathUtil::Clamp(surfaces_rect.left + viewport_rect_unscaled.right * res_scale, // right - surfaces_rect.left, surfaces_rect.right), - MathUtil::Clamp(surfaces_rect.bottom + viewport_rect_unscaled.bottom * res_scale, // bottom - surfaces_rect.bottom, surfaces_rect.top)}; + static_cast(MathUtil::Clamp(static_cast(surfaces_rect.left) + + viewport_rect_unscaled.left * res_scale, + surfaces_rect.left, surfaces_rect.right)), // Left + static_cast(MathUtil::Clamp(static_cast(surfaces_rect.bottom) + + viewport_rect_unscaled.top * res_scale, + surfaces_rect.bottom, surfaces_rect.top)), // Top + static_cast(MathUtil::Clamp(static_cast(surfaces_rect.left) + + viewport_rect_unscaled.right * res_scale, + surfaces_rect.left, surfaces_rect.right)), // Right + static_cast(MathUtil::Clamp(static_cast(surfaces_rect.bottom) + + viewport_rect_unscaled.bottom * res_scale, + surfaces_rect.bottom, surfaces_rect.top))}; // Bottom // Bind the framebuffer surfaces state.draw.draw_framebuffer = framebuffer.handle; @@ -306,9 +310,9 @@ void RasterizerOpenGL::DrawTriangles() { // Sync the viewport state.viewport.x = - static_cast(surfaces_rect.left + viewport_rect_unscaled.left * res_scale); + static_cast(surfaces_rect.left) + viewport_rect_unscaled.left * res_scale; state.viewport.y = - static_cast(surfaces_rect.bottom + viewport_rect_unscaled.bottom * res_scale); + static_cast(surfaces_rect.bottom) + viewport_rect_unscaled.bottom * res_scale; state.viewport.width = static_cast(viewport_rect_unscaled.GetWidth() * res_scale); state.viewport.height = static_cast(viewport_rect_unscaled.GetHeight() * res_scale); From 7f1aec8fbb96b5c29c91332250eef60dff4c8f12 Mon Sep 17 00:00:00 2001 From: Phantom Date: Sat, 30 Dec 2017 07:42:32 +0100 Subject: [PATCH 25/32] Support for textures smaller than 8*8 --- .../renderer_opengl/gl_rasterizer_cache.cpp | 89 +++++++++++-------- .../renderer_opengl/gl_rasterizer_cache.h | 1 + 2 files changed, 55 insertions(+), 35 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index aa3009b5b..066a58085 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -449,27 +449,18 @@ MathUtil::Rectangle SurfaceParams::GetScaledSubRect(const SurfaceParams& su bool SurfaceParams::ExactMatch(const SurfaceParams& other_surface) const { return (other_surface.addr == addr && other_surface.width == width && other_surface.height == height && other_surface.stride == stride && - other_surface.pixel_format == pixel_format && other_surface.is_tiled == is_tiled); + other_surface.pixel_format == pixel_format && pixel_format != PixelFormat::Invalid && + other_surface.is_tiled == is_tiled); } bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const { - if (sub_surface.addr < addr || sub_surface.end > end || sub_surface.stride != stride || - sub_surface.pixel_format != pixel_format || sub_surface.is_tiled != is_tiled || - (sub_surface.addr - addr) * 8 % GetFormatBpp() != 0) - return false; - - auto rect = GetSubRect(sub_surface); - - if (rect.left + sub_surface.width > stride) { - return false; - } - - if (is_tiled) { - return PixelsInBytes(sub_surface.addr - addr) % 64 == 0 && sub_surface.height % 8 == 0 && - sub_surface.width % 8 == 0; - } - - return true; + return (sub_surface.addr >= addr && sub_surface.end <= end && + sub_surface.pixel_format == pixel_format && pixel_format != PixelFormat::Invalid && + sub_surface.is_tiled == is_tiled && + (sub_surface.addr - addr) * 8 % GetFormatBpp() == 0 && + (!is_tiled || PixelsInBytes(sub_surface.addr - addr) % 64 == 0) && + (sub_surface.stride == stride || sub_surface.height <= (is_tiled ? 8u : 1u)) && + GetSubRect(sub_surface).left + sub_surface.width <= stride); } bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const { @@ -1006,15 +997,6 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, ScaleMatc if (expandable != nullptr && expandable->res_scale > target_res_scale) { target_res_scale = expandable->res_scale; } - // Keep res_scale when reinterpreting d24s8 -> rgba8 - if (params.pixel_format == PixelFormat::RGBA8) { - find_params.pixel_format = PixelFormat::D24S8; - expandable = FindMatch( - surface_cache, find_params, match_res_scale); - if (expandable != nullptr && expandable->res_scale > target_res_scale) { - target_res_scale = expandable->res_scale; - } - } } SurfaceParams new_params = params; new_params.res_scale = target_res_scale; @@ -1057,17 +1039,29 @@ SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& } } + SurfaceParams aligned_params = params; + if (params.is_tiled) { + aligned_params.height = Common::AlignUp(params.height, 8); + aligned_params.width = Common::AlignUp(params.width, 8); + aligned_params.stride = Common::AlignUp(params.stride, 8); + aligned_params.UpdateParams(); + } + // Check for a surface we can expand before creating a new one if (surface == nullptr) { - surface = FindMatch(surface_cache, params, + surface = FindMatch(surface_cache, aligned_params, match_res_scale); if (surface != nullptr) { + aligned_params.width = aligned_params.stride; + aligned_params.UpdateParams(); + SurfaceParams new_params = *surface; - new_params.addr = std::min(params.addr, surface->addr); - new_params.end = std::max(params.end, surface->end); + new_params.addr = std::min(aligned_params.addr, surface->addr); + new_params.end = std::max(aligned_params.end, surface->end); new_params.size = new_params.end - new_params.addr; - new_params.height = new_params.size / params.BytesInPixels(params.stride); - ASSERT(new_params.size % params.BytesInPixels(params.stride) == 0); + new_params.height = + new_params.size / aligned_params.BytesInPixels(aligned_params.stride); + ASSERT(new_params.size % aligned_params.BytesInPixels(aligned_params.stride) == 0); Surface new_surface = CreateSurface(new_params); DuplicateSurface(surface, new_surface); @@ -1083,14 +1077,14 @@ SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& // No subrect found - create and return a new surface if (surface == nullptr) { - SurfaceParams new_params = params; + SurfaceParams new_params = aligned_params; // Can't have gaps in a surface - new_params.width = params.stride; + new_params.width = aligned_params.stride; new_params.UpdateParams(); // GetSurface will create the new surface and possibly adjust res_scale if necessary surface = GetSurface(new_params, match_res_scale, load_if_create); } else if (load_if_create) { - ValidateSurface(surface, params.addr, params.size); + ValidateSurface(surface, aligned_params.addr, aligned_params.size); } return std::make_tuple(surface, surface->GetScaledSubRect(params)); @@ -1108,6 +1102,23 @@ Surface RasterizerCacheOpenGL::GetTextureSurface( params.is_tiled = true; params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(info.format); params.UpdateParams(); + + if (info.width % 8 != 0 || info.height % 8 != 0) { + Surface src_surface; + MathUtil::Rectangle rect; + std::tie(src_surface, rect) = GetSurfaceSubRect(params, ScaleMatch::Ignore, true); + + params.res_scale = src_surface->res_scale; + Surface tmp_surface = CreateSurface(params); + BlitTextures(src_surface->texture.handle, rect, tmp_surface->texture.handle, + tmp_surface->GetScaledRect(), + SurfaceParams::GetFormatType(params.pixel_format), read_framebuffer.handle, + draw_framebuffer.handle); + + remove_surfaces.emplace(tmp_surface); + return tmp_surface; + } + return GetSurface(params, ScaleMatch::Ignore, true); } @@ -1422,11 +1433,19 @@ Surface RasterizerCacheOpenGL::CreateSurface(const SurfaceParams& params) { } void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) { + if (surface->registered) { + return; + } + surface->registered = true; surface_cache.add({surface->GetInterval(), SurfaceSet{surface}}); UpdatePagesCachedCount(surface->addr, surface->size, 1); } void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) { + if (!surface->registered) { + return; + } + surface->registered = false; UpdatePagesCachedCount(surface->addr, surface->size, -1); surface_cache.subtract({surface->GetInterval(), SurfaceSet{surface}}); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index deb091317..d1f739a78 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -265,6 +265,7 @@ struct CachedSurface : SurfaceParams { return (invalid_regions & GetInterval()) == SurfaceRegions(GetInterval()); } + bool registered = false; SurfaceRegions invalid_regions; u32 fill_size = 0; /// Number of bytes to read from fill_data From 88f652151173c0642ec0ee6d01bf0802ba1abdd4 Mon Sep 17 00:00:00 2001 From: Phantom Date: Mon, 15 Jan 2018 10:05:56 +0100 Subject: [PATCH 26/32] AccelerateTextureCopy: Better support for contiguous copy --- .../renderer_opengl/gl_rasterizer.cpp | 54 ++++++------ .../renderer_opengl/gl_rasterizer_cache.cpp | 86 +++++++++---------- 2 files changed, 65 insertions(+), 75 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 3ceab54a2..f893b3e3e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -1019,41 +1019,33 @@ bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransfe bool RasterizerOpenGL::AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) { u32 copy_size = Common::AlignDown(config.texture_copy.size, 16); - - if (copy_size == 0) + if (copy_size == 0) { return false; + } u32 input_gap = config.texture_copy.input_gap * 16; u32 input_width = config.texture_copy.input_width * 16; - if (input_width == 0) { - if (input_gap == 0) { - input_width = copy_size; - } else { - return false; - } + if (input_width == 0 && input_gap != 0) { + return false; + } + if (input_gap == 0 || input_width >= copy_size) { + input_width = copy_size; + input_gap = 0; + } + if (copy_size % input_width != 0) { + return false; } u32 output_gap = config.texture_copy.output_gap * 16; u32 output_width = config.texture_copy.output_width * 16; - if (output_width == 0) { - if (output_gap == 0) { - output_width = copy_size; - } else { - return false; - } + if (output_width == 0 && output_gap != 0) { + return false; } - - if (input_width >= copy_size) { - input_width = copy_size; - input_gap = 0; - } - - if (output_width >= copy_size) { + if (output_gap == 0 || output_width >= copy_size) { output_width = copy_size; output_gap = 0; } - - if (input_width != output_width || copy_size % input_width != 0) { + if (copy_size % output_width != 0) { return false; } @@ -1068,12 +1060,16 @@ bool RasterizerOpenGL::AccelerateTextureCopy(const GPU::Regs::DisplayTransferCon MathUtil::Rectangle src_rect; Surface src_surface; std::tie(src_surface, src_rect) = res_cache.GetTexCopySurface(src_params); - if (src_surface == nullptr) + if (src_surface == nullptr) { return false; + } - if ((output_gap * 8) % SurfaceParams::GetFormatBpp(src_surface->pixel_format) != 0 || - (src_surface->is_tiled && src_surface->PixelsInBytes(output_gap) % 64 != 0)) + if (output_gap != 0 && + (output_width != src_surface->BytesInPixels(src_rect.GetWidth() / src_surface->res_scale) * + (src_surface->is_tiled ? 8 : 1) || + output_gap % src_surface->BytesInPixels(src_surface->is_tiled ? 64 : 1) != 0)) { return false; + } SurfaceParams dst_params = *src_surface; dst_params.addr = config.GetPhysicalOutputAddress(); @@ -1091,11 +1087,13 @@ bool RasterizerOpenGL::AccelerateTextureCopy(const GPU::Regs::DisplayTransferCon Surface dst_surface; std::tie(dst_surface, dst_rect) = res_cache.GetSurfaceSubRect(dst_params, ScaleMatch::Upscale, load_gap); - if (src_surface == nullptr) + if (src_surface == nullptr) { return false; + } - if (!res_cache.BlitSurfaces(src_surface, src_rect, dst_surface, dst_rect)) + if (!res_cache.BlitSurfaces(src_surface, src_rect, dst_surface, dst_rect)) { return false; + } res_cache.InvalidateRegion(dst_params.addr, dst_params.size, dst_surface); return true; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 066a58085..d51ac5b1e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -390,6 +390,7 @@ SurfaceParams SurfaceParams::FromInterval(SurfaceInterval interval) const { addr + Common::AlignUp(boost::icl::last_next(interval) - addr, tiled_alignment); params.addr = aligned_start; params.width = PixelsInBytes(aligned_end - aligned_start) / (is_tiled ? 8 : 1); + params.stride = params.width; params.height = is_tiled ? 8 : 1; } params.UpdateParams(); @@ -447,57 +448,45 @@ MathUtil::Rectangle SurfaceParams::GetScaledSubRect(const SurfaceParams& su } bool SurfaceParams::ExactMatch(const SurfaceParams& other_surface) const { - return (other_surface.addr == addr && other_surface.width == width && - other_surface.height == height && other_surface.stride == stride && - other_surface.pixel_format == pixel_format && pixel_format != PixelFormat::Invalid && - other_surface.is_tiled == is_tiled); + return other_surface.addr == addr && other_surface.width == width && + other_surface.height == height && other_surface.stride == stride && + other_surface.pixel_format == pixel_format && pixel_format != PixelFormat::Invalid && + other_surface.is_tiled == is_tiled; } bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const { - return (sub_surface.addr >= addr && sub_surface.end <= end && - sub_surface.pixel_format == pixel_format && pixel_format != PixelFormat::Invalid && - sub_surface.is_tiled == is_tiled && - (sub_surface.addr - addr) * 8 % GetFormatBpp() == 0 && - (!is_tiled || PixelsInBytes(sub_surface.addr - addr) % 64 == 0) && - (sub_surface.stride == stride || sub_surface.height <= (is_tiled ? 8u : 1u)) && - GetSubRect(sub_surface).left + sub_surface.width <= stride); + return sub_surface.addr >= addr && sub_surface.end <= end && + sub_surface.pixel_format == pixel_format && pixel_format != PixelFormat::Invalid && + sub_surface.is_tiled == is_tiled && + (sub_surface.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 && + (sub_surface.stride == stride || sub_surface.height <= (is_tiled ? 8u : 1u)) && + GetSubRect(sub_surface).left + sub_surface.width <= stride; } bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const { - if (pixel_format == PixelFormat::Invalid || pixel_format != expanded_surface.pixel_format || - is_tiled != expanded_surface.is_tiled || addr > expanded_surface.end || - expanded_surface.addr > end || stride != expanded_surface.stride) - return false; - - const u32 byte_offset = - std::max(expanded_surface.addr, addr) - std::min(expanded_surface.addr, addr); - - const int x0 = byte_offset % BytesInPixels(stride); - const int y0 = byte_offset / BytesInPixels(stride); - - return x0 == 0 && (!is_tiled || y0 % 8 == 0); + return pixel_format != PixelFormat::Invalid && pixel_format == expanded_surface.pixel_format && + addr <= expanded_surface.end && expanded_surface.addr <= end && + is_tiled == expanded_surface.is_tiled && stride == expanded_surface.stride && + (std::max(expanded_surface.addr, addr) - std::min(expanded_surface.addr, addr)) % + BytesInPixels(stride * (is_tiled ? 8 : 1)) == + 0; } bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const { if (pixel_format == PixelFormat::Invalid || addr > texcopy_params.addr || - end < texcopy_params.end || ((texcopy_params.addr - addr) * 8) % GetFormatBpp() != 0 || - (texcopy_params.width * 8) % GetFormatBpp() != 0 || - (texcopy_params.stride * 8) % GetFormatBpp() != 0) + end < texcopy_params.end) { return false; - - const u32 begin_pixel_index = PixelsInBytes(texcopy_params.addr - addr); - - if (!is_tiled) { - const int x0 = begin_pixel_index % stride; - return ((texcopy_params.height == 1 || PixelsInBytes(texcopy_params.stride) == stride) && - x0 + PixelsInBytes(texcopy_params.width) <= stride); } - - const int x0 = (begin_pixel_index % (stride * 8)) / 8; - return (PixelsInBytes(texcopy_params.addr - addr) % 64 == 0 && - PixelsInBytes(texcopy_params.width) % 64 == 0 && - (texcopy_params.height == 1 || PixelsInBytes(texcopy_params.stride) == stride * 8) && - x0 + PixelsInBytes(texcopy_params.width / 8) <= stride); + if (texcopy_params.width != texcopy_params.stride) { + const u32 tile_stride = BytesInPixels(stride * (is_tiled ? 8 : 1)); + return (texcopy_params.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 && + texcopy_params.width % BytesInPixels(is_tiled ? 64 : 1) == 0 && + (texcopy_params.height == 1 || texcopy_params.stride == tile_stride) && + ((texcopy_params.addr - addr) % tile_stride) + texcopy_params.width <= tile_stride; + } else { + return FromInterval(texcopy_params.GetInterval()).GetInterval() == + texcopy_params.GetInterval(); + } } bool CachedSurface::CanFill(const SurfaceParams& dest_surface, @@ -1245,14 +1234,17 @@ SurfaceRect_Tuple RasterizerCacheOpenGL::GetTexCopySurface(const SurfaceParams& if (match_surface != nullptr) { ValidateSurface(match_surface, params.addr, params.size); - SurfaceParams match_subrect = params; - match_subrect.width = match_surface->PixelsInBytes(params.width); - match_subrect.stride = match_surface->PixelsInBytes(params.stride); - - if (match_surface->is_tiled) { - match_subrect.width /= 8; - match_subrect.stride /= 8; - match_subrect.height *= 8; + SurfaceParams match_subrect; + if (params.width != params.stride) { + match_subrect = params; + match_subrect.width = + match_surface->PixelsInBytes(params.width) / (match_surface->is_tiled ? 8 : 1); + match_subrect.stride = + match_surface->PixelsInBytes(params.stride) / (match_surface->is_tiled ? 8 : 1); + match_subrect.height *= (match_surface->is_tiled ? 8 : 1); + } else { + match_subrect = match_surface->FromInterval(params.GetInterval()); + ASSERT(match_subrect.GetInterval() == params.GetInterval()); } rect = match_surface->GetScaledSubRect(match_subrect); From db21154142e8f2f814058907ed29d914286fb7c6 Mon Sep 17 00:00:00 2001 From: Phantom Date: Mon, 29 Jan 2018 00:50:47 +0100 Subject: [PATCH 27/32] GetFramebufferSurfaces: Remove an assert that is no longer correct --- src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index d51ac5b1e..8a77133fd 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -971,6 +971,8 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, ScaleMatc // Use GetSurfaceSubRect instead ASSERT(params.width == params.stride); + ASSERT(!params.is_tiled || (params.width % 8 == 0 && params.height % 8 == 0)); + // Check for an exact match in existing surfaces Surface surface = FindMatch(surface_cache, params, match_res_scale); @@ -1178,8 +1180,8 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( if (color_surface != nullptr && depth_surface != nullptr) { fb_rect = color_rect; // Color and Depth surfaces must have the same dimensions and offsets - if (color_rect.bottom != depth_rect.bottom || - color_surface->height != depth_surface->height) { + if (color_rect.bottom != depth_rect.bottom || color_rect.top != depth_rect.top || + color_rect.left != depth_rect.left || color_rect.right != depth_rect.right) { color_surface = GetSurface(color_params, ScaleMatch::Exact, false); depth_surface = GetSurface(depth_params, ScaleMatch::Exact, false); fb_rect = color_surface->GetScaledRect(); @@ -1189,7 +1191,6 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( } else if (depth_surface != nullptr) { fb_rect = depth_rect; } - ASSERT(!fb_rect.left && fb_rect.right == config.GetWidth() * resolution_scale_factor); if (color_surface != nullptr) { ValidateSurface(color_surface, boost::icl::first(color_vp_interval), From d813bc5eb5e30be8c26a1827596f32aa6af5a230 Mon Sep 17 00:00:00 2001 From: Phantom Date: Tue, 16 Jan 2018 03:06:35 +0100 Subject: [PATCH 28/32] D24S8 to RGBA8 conversion --- .../renderer_opengl/gl_rasterizer_cache.cpp | 133 ++++++++++++++++++ .../renderer_opengl/gl_rasterizer_cache.h | 10 ++ 2 files changed, 143 insertions(+) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 8a77133fd..5fca50b2f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -943,6 +943,52 @@ Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params RasterizerCacheOpenGL::RasterizerCacheOpenGL() { read_framebuffer.Create(); draw_framebuffer.Create(); + + attributeless_vao.Create(); + + d24s8_abgr_buffer.Create(); + d24s8_abgr_buffer_size = 0; + + const char* vs_source = R"( +#version 330 core +const vec2 vertices[4] = vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0)); +void main() { + gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0); +} +)"; + const char* fs_source = R"( +#version 330 core + +uniform samplerBuffer tbo; +uniform vec2 tbo_size; +uniform vec4 viewport; + +out vec4 color; + +void main() { + vec2 tbo_coord = (gl_FragCoord.xy - viewport.xy) * tbo_size / viewport.zw; + int tbo_offset = int(tbo_coord.y) * int(tbo_size.x) + int(tbo_coord.x); + color = texelFetch(tbo, tbo_offset).rabg; +} +)"; + d24s8_abgr_shader.Create(vs_source, fs_source); + + OpenGLState state = OpenGLState::GetCurState(); + GLuint old_program = state.draw.shader_program; + state.draw.shader_program = d24s8_abgr_shader.handle; + state.Apply(); + + GLint tbo_u_id = glGetUniformLocation(d24s8_abgr_shader.handle, "tbo"); + ASSERT(tbo_u_id != -1); + glUniform1i(tbo_u_id, 0); + + state.draw.shader_program = old_program; + state.Apply(); + + d24s8_abgr_tbo_size_u_id = glGetUniformLocation(d24s8_abgr_shader.handle, "tbo_size"); + ASSERT(d24s8_abgr_tbo_size_u_id != -1); + d24s8_abgr_viewport_u_id = glGetUniformLocation(d24s8_abgr_shader.handle, "viewport"); + ASSERT(d24s8_abgr_viewport_u_id != -1); } RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { @@ -963,6 +1009,63 @@ bool RasterizerCacheOpenGL::BlitSurfaces(const Surface& src_surface, draw_framebuffer.handle); } +void RasterizerCacheOpenGL::ConvertD24S8toABGR(GLuint src_tex, + const MathUtil::Rectangle& src_rect, + GLuint dst_tex, + const MathUtil::Rectangle& dst_rect) { + OpenGLState prev_state = OpenGLState::GetCurState(); + SCOPE_EXIT({ prev_state.Apply(); }); + + OpenGLState state; + state.draw.read_framebuffer = read_framebuffer.handle; + state.draw.draw_framebuffer = draw_framebuffer.handle; + state.Apply(); + + glBindBuffer(GL_PIXEL_PACK_BUFFER, d24s8_abgr_buffer.handle); + + GLsizeiptr target_pbo_size = src_rect.GetWidth() * src_rect.GetHeight() * 4; + if (target_pbo_size > d24s8_abgr_buffer_size) { + d24s8_abgr_buffer_size = target_pbo_size * 2; + glBufferData(GL_PIXEL_PACK_BUFFER, d24s8_abgr_buffer_size, nullptr, GL_STREAM_COPY); + } + + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, src_tex, + 0); + glReadPixels(static_cast(src_rect.left), static_cast(src_rect.bottom), + static_cast(src_rect.GetWidth()), + static_cast(src_rect.GetHeight()), GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, + 0); + + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + + // PBO now contains src_tex in RABG format + state.draw.shader_program = d24s8_abgr_shader.handle; + state.draw.vertex_array = attributeless_vao.handle; + state.viewport.x = static_cast(dst_rect.left); + state.viewport.y = static_cast(dst_rect.bottom); + state.viewport.width = static_cast(dst_rect.GetWidth()); + state.viewport.height = static_cast(dst_rect.GetHeight()); + state.Apply(); + + OGLTexture tbo; + tbo.Create(); + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_BUFFER, tbo.handle); + glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA8, d24s8_abgr_buffer.handle); + + glUniform2f(d24s8_abgr_tbo_size_u_id, static_cast(src_rect.GetWidth()), + static_cast(src_rect.GetHeight())); + glUniform4f(d24s8_abgr_viewport_u_id, static_cast(state.viewport.x), + static_cast(state.viewport.y), static_cast(state.viewport.width), + static_cast(state.viewport.height)); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + + glBindTexture(GL_TEXTURE_BUFFER, 0); +} + Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, bool load_if_create) { if (params.addr == 0 || params.height * params.width == 0) { @@ -988,6 +1091,15 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, ScaleMatc if (expandable != nullptr && expandable->res_scale > target_res_scale) { target_res_scale = expandable->res_scale; } + // Keep res_scale when reinterpreting d24s8 -> rgba8 + if (params.pixel_format == PixelFormat::RGBA8) { + find_params.pixel_format = PixelFormat::D24S8; + expandable = FindMatch( + surface_cache, find_params, match_res_scale); + if (expandable != nullptr && expandable->res_scale > target_res_scale) { + target_res_scale = expandable->res_scale; + } + } } SurfaceParams new_params = params; new_params.res_scale = target_res_scale; @@ -1305,6 +1417,27 @@ void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, continue; } + // D24S8 to RGBA8 + if (surface->pixel_format == PixelFormat::RGBA8) { + params.pixel_format = PixelFormat::D24S8; + Surface reinterpret_surface = + FindMatch(surface_cache, params, ScaleMatch::Ignore, interval); + if (reinterpret_surface != nullptr) { + ASSERT(reinterpret_surface->pixel_format == PixelFormat::D24S8); + + SurfaceInterval convert_interval = params.GetCopyableInterval(reinterpret_surface); + SurfaceParams convert_params = surface->FromInterval(convert_interval); + auto src_rect = reinterpret_surface->GetScaledSubRect(convert_params); + auto dest_rect = surface->GetScaledSubRect(convert_params); + + ConvertD24S8toABGR(reinterpret_surface->texture.handle, src_rect, + surface->texture.handle, dest_rect); + + surface->invalid_regions.erase(convert_interval); + continue; + } + } + // Load data from 3DS memory FlushRegion(params.addr, params.size); surface->LoadGLBuffer(params.addr, params.end); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index d1f739a78..7ef6a9498 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -305,6 +305,9 @@ public: bool BlitSurfaces(const Surface& src_surface, const MathUtil::Rectangle& src_rect, const Surface& dst_surface, const MathUtil::Rectangle& dst_rect); + void ConvertD24S8toABGR(GLuint src_tex, const MathUtil::Rectangle& src_rect, + GLuint dst_tex, const MathUtil::Rectangle& dst_rect); + /// Copy one surface's region to another void CopySurface(const Surface& src_surface, const Surface& dst_surface, SurfaceInterval copy_interval); @@ -365,4 +368,11 @@ private: OGLFramebuffer read_framebuffer; OGLFramebuffer draw_framebuffer; + + OGLVertexArray attributeless_vao; + OGLBuffer d24s8_abgr_buffer; + GLsizeiptr d24s8_abgr_buffer_size; + OGLShader d24s8_abgr_shader; + GLint d24s8_abgr_tbo_size_u_id; + GLint d24s8_abgr_viewport_u_id; }; From 9e16a3c449725d1144d86d29cb6edd23a7f6a0ab Mon Sep 17 00:00:00 2001 From: Phantom Date: Wed, 31 Jan 2018 00:11:31 +0100 Subject: [PATCH 29/32] ConvertD24S8toABGR: fix fb attachment --- src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 5fca50b2f..e7721a812 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -1029,6 +1029,7 @@ void RasterizerCacheOpenGL::ConvertD24S8toABGR(GLuint src_tex, glBufferData(GL_PIXEL_PACK_BUFFER, d24s8_abgr_buffer_size, nullptr, GL_STREAM_COPY); } + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0); glReadPixels(static_cast(src_rect.left), static_cast(src_rect.bottom), From 18456ff9e6a8659adb2b859c556cfa17cf638ee1 Mon Sep 17 00:00:00 2001 From: James Rowe Date: Mon, 5 Feb 2018 20:31:50 -0700 Subject: [PATCH 30/32] Address Lioncash's comments --- .../renderer_opengl/gl_rasterizer.cpp | 15 +++--- .../renderer_opengl/gl_rasterizer_cache.cpp | 46 +++++++++---------- .../renderer_opengl/gl_resource_manager.h | 2 +- 3 files changed, 30 insertions(+), 33 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f893b3e3e..fc56eb168 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -1074,15 +1074,14 @@ bool RasterizerOpenGL::AccelerateTextureCopy(const GPU::Regs::DisplayTransferCon SurfaceParams dst_params = *src_surface; dst_params.addr = config.GetPhysicalOutputAddress(); dst_params.width = src_rect.GetWidth() / src_surface->res_scale; - dst_params.stride = - dst_params.width + - src_surface->PixelsInBytes(src_surface->is_tiled ? output_gap / 8 : output_gap); + dst_params.stride = dst_params.width + src_surface->PixelsInBytes( + src_surface->is_tiled ? output_gap / 8 : output_gap); dst_params.height = src_rect.GetHeight() / src_surface->res_scale; dst_params.res_scale = src_surface->res_scale; dst_params.UpdateParams(); - const bool load_gap = output_gap != 0; // Since we are going to invalidate the gap if there is - // one, we will have to load it first + // Since we are going to invalidate the gap if there is one, we will have to load it first + const bool load_gap = output_gap != 0; MathUtil::Rectangle dst_rect; Surface dst_surface; std::tie(dst_surface, dst_rect) = @@ -1391,7 +1390,8 @@ void RasterizerOpenGL::SyncBlendColor() { void RasterizerOpenGL::SyncFogColor() { const auto& regs = Pica::g_state.regs; uniform_block_data.data.fog_color = { - regs.texturing.fog_color.r.Value() / 255.0f, regs.texturing.fog_color.g.Value() / 255.0f, + regs.texturing.fog_color.r.Value() / 255.0f, + regs.texturing.fog_color.g.Value() / 255.0f, regs.texturing.fog_color.b.Value() / 255.0f, }; uniform_block_data.dirty = true; @@ -1419,7 +1419,8 @@ void RasterizerOpenGL::SyncProcTexNoise() { Pica::float16::FromRaw(regs.proctex_noise_frequency.v).ToFloat32(), }; uniform_block_data.data.proctex_noise_a = { - regs.proctex_noise_u.amplitude / 4095.0f, regs.proctex_noise_v.amplitude / 4095.0f, + regs.proctex_noise_u.amplitude / 4095.0f, + regs.proctex_noise_v.amplitude / 4095.0f, }; uniform_block_data.data.proctex_noise_p = { Pica::float16::FromRaw(regs.proctex_noise_u.phase).ToFloat32(), diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index e7721a812..aef06873d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -60,15 +60,14 @@ static constexpr FormatTuple tex_tuple = {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}; static const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { const SurfaceType type = SurfaceParams::GetFormatType(pixel_format); if (type == SurfaceType::Color) { - ASSERT((size_t)pixel_format < fb_format_tuples.size()); - return fb_format_tuples[(unsigned int)pixel_format]; + ASSERT(static_cast(pixel_format) < fb_format_tuples.size()); + return fb_format_tuples[static_cast(pixel_format)]; } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) { - size_t tuple_idx = (size_t)pixel_format - 14; + size_t tuple_idx = static_cast(pixel_format) - 14; ASSERT(tuple_idx < depth_format_tuples.size()); return depth_format_tuples[tuple_idx]; - } else { - return tex_tuple; } + return tex_tuple; } template @@ -244,7 +243,6 @@ static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle& src_rec state.ResetTexture(src_tex); state.ResetTexture(dst_tex); - // Keep track of previous framebuffer bindings state.draw.read_framebuffer = read_fb_handle; state.draw.draw_framebuffer = draw_fb_handle; state.Apply(); @@ -355,7 +353,7 @@ static bool FillSurface(const Surface& surface, const u8* fill_data, surface->texture.handle, 0); u32 value_32bit; - std::memcpy(&value_32bit, fill_data, 4); + std::memcpy(&value_32bit, fill_data, sizeof(u32)); GLfloat value_float = (value_32bit & 0xFFFFFF) / 16777215.0f; // 2^24 - 1 GLint value_int = (value_32bit >> 24); @@ -370,8 +368,8 @@ static bool FillSurface(const Surface& surface, const u8* fill_data, SurfaceParams SurfaceParams::FromInterval(SurfaceInterval interval) const { SurfaceParams params = *this; - - const u32 stride_tiled_bytes = BytesInPixels(stride * (is_tiled ? 8 : 1)); + const u32 tiled_size = is_tiled ? 8 : 1; + const u32 stride_tiled_bytes = BytesInPixels(stride * tiled_size); PAddr aligned_start = addr + Common::AlignDown(boost::icl::first(interval) - addr, stride_tiled_bytes); PAddr aligned_end = @@ -389,9 +387,9 @@ SurfaceParams SurfaceParams::FromInterval(SurfaceInterval interval) const { aligned_end = addr + Common::AlignUp(boost::icl::last_next(interval) - addr, tiled_alignment); params.addr = aligned_start; - params.width = PixelsInBytes(aligned_end - aligned_start) / (is_tiled ? 8 : 1); + params.width = PixelsInBytes(aligned_end - aligned_start) / tiled_size; params.stride = params.width; - params.height = is_tiled ? 8 : 1; + params.height = tiled_size; } params.UpdateParams(); @@ -410,7 +408,7 @@ SurfaceInterval SurfaceParams::GetSubRectInterval(MathUtil::Rectangle unsca unscaled_rect.top = Common::AlignUp(unscaled_rect.top, 8) / 8; } - const u32 stride_tiled = (!is_tiled ? stride : stride * 8); + const u32 stride_tiled = !is_tiled ? stride : stride * 8; const u32 pixel_offset = stride_tiled * (!is_tiled ? unscaled_rect.bottom : (height / 8) - unscaled_rect.top) + @@ -448,10 +446,10 @@ MathUtil::Rectangle SurfaceParams::GetScaledSubRect(const SurfaceParams& su } bool SurfaceParams::ExactMatch(const SurfaceParams& other_surface) const { - return other_surface.addr == addr && other_surface.width == width && - other_surface.height == height && other_surface.stride == stride && - other_surface.pixel_format == pixel_format && pixel_format != PixelFormat::Invalid && - other_surface.is_tiled == is_tiled; + return std::tie(other_surface.addr, other_surface.width, other_surface.height, + other_surface.stride, other_surface.pixel_format, other_surface.is_tiled) == + std::tie(addr, width, height, stride, pixel_format, is_tiled) && + pixel_format != PixelFormat::Invalid; } bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const { @@ -483,10 +481,8 @@ bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const { texcopy_params.width % BytesInPixels(is_tiled ? 64 : 1) == 0 && (texcopy_params.height == 1 || texcopy_params.stride == tile_stride) && ((texcopy_params.addr - addr) % tile_stride) + texcopy_params.width <= tile_stride; - } else { - return FromInterval(texcopy_params.GetInterval()).GetInterval() == - texcopy_params.GetInterval(); } + return FromInterval(texcopy_params.GetInterval()).GetInterval() == texcopy_params.GetInterval(); } bool CachedSurface::CanFill(const SurfaceParams& dest_surface, @@ -688,9 +684,10 @@ void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) { if (backup_bytes) std::memcpy(&backup_data[0], &dst_buffer[coarse_start_offset], backup_bytes); - for (u32 offset = coarse_start_offset; offset < end_offset; offset += fill_size) + for (u32 offset = coarse_start_offset; offset < end_offset; offset += fill_size) { std::memcpy(&dst_buffer[offset], &fill_data[0], std::min(fill_size, end_offset - offset)); + } if (backup_bytes) std::memcpy(&dst_buffer[coarse_start_offset], &backup_data[0], backup_bytes); @@ -1350,12 +1347,11 @@ SurfaceRect_Tuple RasterizerCacheOpenGL::GetTexCopySurface(const SurfaceParams& SurfaceParams match_subrect; if (params.width != params.stride) { + const u32 tiled_size = match_surface->is_tiled ? 8 : 1; match_subrect = params; - match_subrect.width = - match_surface->PixelsInBytes(params.width) / (match_surface->is_tiled ? 8 : 1); - match_subrect.stride = - match_surface->PixelsInBytes(params.stride) / (match_surface->is_tiled ? 8 : 1); - match_subrect.height *= (match_surface->is_tiled ? 8 : 1); + match_subrect.width = match_surface->PixelsInBytes(params.width) / tiled_size; + match_subrect.stride = match_surface->PixelsInBytes(params.stride) / tiled_size; + match_subrect.height *= tiled_size; } else { match_subrect = match_surface->FromInterval(params.GetInterval()); ASSERT(match_subrect.GetInterval() == params.GetInterval()); diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index e21972537..39fa79fc2 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -135,7 +135,7 @@ public: if (handle == 0) return; glDeleteBuffers(1, &handle); - OpenGLState::GetCurState().OpenGLState::ResetBuffer(handle).Apply(); + OpenGLState::GetCurState().ResetBuffer(handle).Apply(); handle = 0; } From 1d419bac1b33afc0b9e76177f44dba8a29736474 Mon Sep 17 00:00:00 2001 From: James Rowe Date: Sun, 4 Mar 2018 22:06:09 -0700 Subject: [PATCH 31/32] Disable accelerated texture copy for Texture surfaces --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index fc56eb168..f028ea001 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -1090,6 +1090,10 @@ bool RasterizerOpenGL::AccelerateTextureCopy(const GPU::Regs::DisplayTransferCon return false; } + if (dst_surface->type == SurfaceType::Texture) { + return false; + } + if (!res_cache.BlitSurfaces(src_surface, src_rect, dst_surface, dst_rect)) { return false; } From c2515ff39d2123e4ac3aba904c477f8781884117 Mon Sep 17 00:00:00 2001 From: wwylele Date: Mon, 5 Mar 2018 11:09:20 +0200 Subject: [PATCH 32/32] clang-format fix --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f028ea001..8d2d1698a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -1065,8 +1065,9 @@ bool RasterizerOpenGL::AccelerateTextureCopy(const GPU::Regs::DisplayTransferCon } if (output_gap != 0 && - (output_width != src_surface->BytesInPixels(src_rect.GetWidth() / src_surface->res_scale) * - (src_surface->is_tiled ? 8 : 1) || + (output_width != + src_surface->BytesInPixels(src_rect.GetWidth() / src_surface->res_scale) * + (src_surface->is_tiled ? 8 : 1) || output_gap % src_surface->BytesInPixels(src_surface->is_tiled ? 64 : 1) != 0)) { return false; } @@ -1074,8 +1075,9 @@ bool RasterizerOpenGL::AccelerateTextureCopy(const GPU::Regs::DisplayTransferCon SurfaceParams dst_params = *src_surface; dst_params.addr = config.GetPhysicalOutputAddress(); dst_params.width = src_rect.GetWidth() / src_surface->res_scale; - dst_params.stride = dst_params.width + src_surface->PixelsInBytes( - src_surface->is_tiled ? output_gap / 8 : output_gap); + dst_params.stride = + dst_params.width + + src_surface->PixelsInBytes(src_surface->is_tiled ? output_gap / 8 : output_gap); dst_params.height = src_rect.GetHeight() / src_surface->res_scale; dst_params.res_scale = src_surface->res_scale; dst_params.UpdateParams(); @@ -1394,8 +1396,7 @@ void RasterizerOpenGL::SyncBlendColor() { void RasterizerOpenGL::SyncFogColor() { const auto& regs = Pica::g_state.regs; uniform_block_data.data.fog_color = { - regs.texturing.fog_color.r.Value() / 255.0f, - regs.texturing.fog_color.g.Value() / 255.0f, + regs.texturing.fog_color.r.Value() / 255.0f, regs.texturing.fog_color.g.Value() / 255.0f, regs.texturing.fog_color.b.Value() / 255.0f, }; uniform_block_data.dirty = true; @@ -1423,8 +1424,7 @@ void RasterizerOpenGL::SyncProcTexNoise() { Pica::float16::FromRaw(regs.proctex_noise_frequency.v).ToFloat32(), }; uniform_block_data.data.proctex_noise_a = { - regs.proctex_noise_u.amplitude / 4095.0f, - regs.proctex_noise_v.amplitude / 4095.0f, + regs.proctex_noise_u.amplitude / 4095.0f, regs.proctex_noise_v.amplitude / 4095.0f, }; uniform_block_data.data.proctex_noise_p = { Pica::float16::FromRaw(regs.proctex_noise_u.phase).ToFloat32(),