From 13606a6d0b06c003f7bd88f88f001899e2c4e381 Mon Sep 17 00:00:00 2001 From: James Rowe Date: Thu, 23 Nov 2017 10:43:12 -0700 Subject: [PATCH] Memory: Remove count of cached pages and add InvalidateRegion In a future commit, the count of cached pages will be reintroduced in the actual surface cache. Also adds an Invalidate only to the cache which marks a region as invalid in order to try to avoid a costly flush from 3ds memory --- src/core/hle/kernel/vm_manager.cpp | 1 - src/core/hle/service/gsp/gsp_gpu.cpp | 2 +- src/core/hw/gpu.cpp | 33 +++++++---------- src/core/memory.cpp | 43 +++++++++++----------- src/core/memory.h | 18 ++++----- src/tests/core/arm/arm_test_common.cpp | 1 - src/video_core/rasterizer_interface.h | 3 ++ src/video_core/swrasterizer/swrasterizer.h | 3 +- 8 files changed, 50 insertions(+), 54 deletions(-) diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp index 7a007c065..2d6af0cbb 100644 --- a/src/core/hle/kernel/vm_manager.cpp +++ b/src/core/hle/kernel/vm_manager.cpp @@ -58,7 +58,6 @@ void VMManager::Reset() { page_table.pointers.fill(nullptr); page_table.attributes.fill(Memory::PageType::Unmapped); - page_table.cached_res_count.fill(0); UpdatePageTableForVMA(initial_vma); } diff --git a/src/core/hle/service/gsp/gsp_gpu.cpp b/src/core/hle/service/gsp/gsp_gpu.cpp index bb3cedbe3..ab9879e02 100644 --- a/src/core/hle/service/gsp/gsp_gpu.cpp +++ b/src/core/hle/service/gsp/gsp_gpu.cpp @@ -423,7 +423,7 @@ static void ExecuteCommand(const Command& command, u32 thread_id) { command.dma_request.size, Memory::FlushMode::Flush); Memory::RasterizerFlushVirtualRegion(command.dma_request.dest_address, command.dma_request.size, - Memory::FlushMode::FlushAndInvalidate); + Memory::FlushMode::Invalidate); // TODO(Subv): These memory accesses should not go through the application's memory mapping. // They should go through the GSP module's memory mapping. diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index 350b9a6a2..d65af4aff 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp @@ -96,20 +96,11 @@ static void MemoryFill(const Regs::MemoryFillConfig& config) { u8* start = Memory::GetPhysicalPointer(start_addr); u8* end = Memory::GetPhysicalPointer(end_addr); - // TODO: Consider always accelerating and returning vector of - // regions that the accelerated fill did not cover to - // reduce/eliminate the fill that the cpu has to do. - // This would also mean that the flush below is not needed. - // Fill should first flush all surfaces that touch but are - // not completely within the fill range. - // Then fill all completely covered surfaces, and return the - // regions that were between surfaces or within the touching - // ones for cpu to manually fill here. if (VideoCore::g_renderer->Rasterizer()->AccelerateFill(config)) return; - Memory::RasterizerFlushAndInvalidateRegion(config.GetStartAddress(), - config.GetEndAddress() - config.GetStartAddress()); + Memory::RasterizerInvalidateRegion(config.GetStartAddress(), + config.GetEndAddress() - config.GetStartAddress()); if (config.fill_24bit) { // fill with 24-bit values @@ -199,7 +190,7 @@ static void DisplayTransfer(const Regs::DisplayTransferConfig& config) { u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format); Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), input_size); - Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), output_size); + Memory::RasterizerInvalidateRegion(config.GetPhysicalOutputAddress(), output_size); for (u32 y = 0; y < output_height; ++y) { for (u32 x = 0; x < output_width; ++x) { @@ -363,8 +354,10 @@ static void TextureCopy(const Regs::DisplayTransferConfig& config) { size_t contiguous_output_size = config.texture_copy.size / output_width * (output_width + output_gap); - Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), - static_cast(contiguous_output_size)); + // Only need to flush output if it has a gap + const auto FlushInvalidate_fn = (output_gap != 0) ? Memory::RasterizerFlushAndInvalidateRegion + : Memory::RasterizerInvalidateRegion; + FlushInvalidate_fn(config.GetPhysicalOutputAddress(), static_cast(contiguous_output_size)); u32 remaining_input = input_width; u32 remaining_output = output_width; @@ -446,16 +439,18 @@ inline void Write(u32 addr, const T data) { if (config.is_texture_copy) { TextureCopy(config); - LOG_TRACE(HW_GPU, "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> " - "0x%08X(%u+%u), flags 0x%08X", + LOG_TRACE(HW_GPU, + "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> " + "0x%08X(%u+%u), flags 0x%08X", config.texture_copy.size, config.GetPhysicalInputAddress(), config.texture_copy.input_width * 16, config.texture_copy.input_gap * 16, config.GetPhysicalOutputAddress(), config.texture_copy.output_width * 16, config.texture_copy.output_gap * 16, config.flags); } else { DisplayTransfer(config); - LOG_TRACE(HW_GPU, "DisplayTransfer: 0x%08x(%ux%u)-> " - "0x%08x(%ux%u), dst format %x, flags 0x%08X", + LOG_TRACE(HW_GPU, + "DisplayTransfer: 0x%08x(%ux%u)-> " + "0x%08x(%ux%u), dst format %x, flags 0x%08X", config.GetPhysicalInputAddress(), config.input_width.Value(), config.input_height.Value(), config.GetPhysicalOutputAddress(), config.output_width.Value(), config.output_height.Value(), @@ -570,4 +565,4 @@ void Shutdown() { LOG_DEBUG(HW_GPU, "shutdown OK"); } -} // namespace +} // namespace GPU diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 81e60a66f..9b01bfc8c 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -50,7 +50,6 @@ static void MapPages(PageTable& page_table, u32 base, u32 size, u8* memory, Page page_table.attributes[base] = type; page_table.pointers[base] = memory; - page_table.cached_res_count[base] = 0; base += 1; if (memory != nullptr) @@ -200,7 +199,7 @@ void Write(const VAddr vaddr, const T data) { ASSERT_MSG(false, "Mapped memory page without a pointer @ %08X", vaddr); break; case PageType::RasterizerCachedMemory: { - RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::FlushAndInvalidate); + RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Invalidate); std::memcpy(GetPointerFromVMA(vaddr), &data, sizeof(T)); break; } @@ -208,7 +207,7 @@ void Write(const VAddr vaddr, const T data) { WriteMMIO(GetMMIOHandler(vaddr), vaddr, data); break; case PageType::RasterizerCachedSpecial: { - RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::FlushAndInvalidate); + RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Invalidate); WriteMMIO(GetMMIOHandler(vaddr), vaddr, data); break; } @@ -334,7 +333,7 @@ u8* GetPhysicalPointer(PAddr address) { return target_pointer; } -void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) { +void RasterizerMarkRegionCached(PAddr start, u32 size, bool cached) { if (start == 0) { return; } @@ -355,14 +354,10 @@ void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) { } VAddr vaddr = *maybe_vaddr; - u8& res_count = current_page_table->cached_res_count[vaddr >> PAGE_BITS]; - ASSERT_MSG(count_delta <= UINT8_MAX - res_count, - "Rasterizer resource cache counter overflow!"); - ASSERT_MSG(count_delta >= -res_count, "Rasterizer resource cache counter underflow!"); + PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS]; - // Switch page type to cached if now cached - if (res_count == 0) { - PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS]; + if (cached) { + // Switch page type to cached if now cached switch (page_type) { case PageType::Unmapped: // It is not necessary for a process to have this region mapped into its address @@ -378,13 +373,8 @@ void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) { default: UNREACHABLE(); } - } - - res_count += count_delta; - - // Switch page type to uncached if now uncached - if (res_count == 0) { - PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS]; + } else { + // Switch page type to uncached if now uncached switch (page_type) { case PageType::Unmapped: // It is not necessary for a process to have this region mapped into its address @@ -419,6 +409,12 @@ void RasterizerFlushRegion(PAddr start, u32 size) { } } +void RasterizerInvalidateRegion(PAddr start, u32 size) { + if (VideoCore::g_renderer != nullptr) { + VideoCore::g_renderer->Rasterizer()->InvalidateRegion(start, size); + } +} + void RasterizerFlushAndInvalidateRegion(PAddr start, u32 size) { // Since pages are unmapped on shutdown after video core is shutdown, the renderer may be // null here @@ -450,6 +446,9 @@ void RasterizerFlushVirtualRegion(VAddr start, u32 size, FlushMode mode) { case FlushMode::Flush: rasterizer->FlushRegion(physical_start, overlap_size); break; + case FlushMode::Invalidate: + rasterizer->InvalidateRegion(physical_start, overlap_size); + break; case FlushMode::FlushAndInvalidate: rasterizer->FlushAndInvalidateRegion(physical_start, overlap_size); break; @@ -588,7 +587,7 @@ void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const voi } case PageType::RasterizerCachedMemory: { RasterizerFlushVirtualRegion(current_vaddr, static_cast(copy_amount), - FlushMode::FlushAndInvalidate); + FlushMode::Invalidate); std::memcpy(GetPointerFromVMA(process, current_vaddr), src_buffer, copy_amount); break; } @@ -596,7 +595,7 @@ void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const voi MMIORegionPointer handler = GetMMIOHandler(page_table, current_vaddr); DEBUG_ASSERT(handler); RasterizerFlushVirtualRegion(current_vaddr, static_cast(copy_amount), - FlushMode::FlushAndInvalidate); + FlushMode::Invalidate); handler->WriteBlock(current_vaddr, src_buffer, copy_amount); break; } @@ -648,7 +647,7 @@ void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const size } case PageType::RasterizerCachedMemory: { RasterizerFlushVirtualRegion(current_vaddr, static_cast(copy_amount), - FlushMode::FlushAndInvalidate); + FlushMode::Invalidate); std::memset(GetPointerFromVMA(process, current_vaddr), 0, copy_amount); break; } @@ -656,7 +655,7 @@ void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const size MMIORegionPointer handler = GetMMIOHandler(page_table, current_vaddr); DEBUG_ASSERT(handler); RasterizerFlushVirtualRegion(current_vaddr, static_cast(copy_amount), - FlushMode::FlushAndInvalidate); + FlushMode::Invalidate); handler->WriteBlock(current_vaddr, zeros.data(), copy_amount); break; } diff --git a/src/core/memory.h b/src/core/memory.h index 1527ec4c7..4ec5e8d60 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -72,12 +72,6 @@ struct PageTable { * the corresponding entry in `pointers` MUST be set to null. */ std::array attributes; - - /** - * Indicates the number of externally cached resources touching a page that should be - * flushed before the memory is accessed - */ - std::array cached_res_count; }; /// Physical memory regions as seen from the ARM11 @@ -245,16 +239,20 @@ boost::optional PhysicalToVirtualAddress(PAddr addr); u8* GetPhysicalPointer(PAddr address); /** - * Adds the supplied value to the rasterizer resource cache counter of each - * page touching the region. + * Mark each page touching the region as cached. */ -void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta); +void RasterizerMarkRegionCached(PAddr start, u32 size, bool cached); /** * Flushes any externally cached rasterizer resources touching the given region. */ void RasterizerFlushRegion(PAddr start, u32 size); +/** + * Invalidates any externally cached rasterizer resources touching the given region. + */ +void RasterizerInvalidateRegion(PAddr start, u32 size); + /** * Flushes and invalidates any externally cached rasterizer resources touching the given region. */ @@ -263,6 +261,8 @@ void RasterizerFlushAndInvalidateRegion(PAddr start, u32 size); enum class FlushMode { /// Write back modified surfaces to RAM Flush, + /// Remove region from the cache + Invalidate, /// Write back modified surfaces to RAM, and also remove them from the cache FlushAndInvalidate, }; diff --git a/src/tests/core/arm/arm_test_common.cpp b/src/tests/core/arm/arm_test_common.cpp index 484713a92..8520f53b2 100644 --- a/src/tests/core/arm/arm_test_common.cpp +++ b/src/tests/core/arm/arm_test_common.cpp @@ -20,7 +20,6 @@ TestEnvironment::TestEnvironment(bool mutable_memory_) page_table->pointers.fill(nullptr); page_table->attributes.fill(Memory::PageType::Unmapped); - page_table->cached_res_count.fill(0); Memory::MapIoRegion(*page_table, 0x00000000, 0x80000000, test_memory); Memory::MapIoRegion(*page_table, 0x80000000, 0x80000000, test_memory); diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 8ef7e74c7..1d4c98189 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -38,6 +38,9 @@ public: /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory virtual void FlushRegion(PAddr addr, u32 size) = 0; + /// Notify rasterizer that any caches of the specified region should be invalidated + virtual void InvalidateRegion(PAddr addr, u32 size) = 0; + /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory /// and invalidated virtual void FlushAndInvalidateRegion(PAddr addr, u32 size) = 0; diff --git a/src/video_core/swrasterizer/swrasterizer.h b/src/video_core/swrasterizer/swrasterizer.h index 87e64f8f6..f97fe13b4 100644 --- a/src/video_core/swrasterizer/swrasterizer.h +++ b/src/video_core/swrasterizer/swrasterizer.h @@ -11,7 +11,7 @@ namespace Pica { namespace Shader { struct OutputVertex; } -} +} // namespace Pica namespace VideoCore { @@ -22,6 +22,7 @@ class SWRasterizer : public RasterizerInterface { void NotifyPicaRegisterChanged(u32 id) override {} void FlushAll() override {} void FlushRegion(PAddr addr, u32 size) override {} + void InvalidateRegion(PAddr addr, u32 size) override {} void FlushAndInvalidateRegion(PAddr addr, u32 size) override {} };