From a7c1306e2d19f1270f6fa0603ba20043c90e9c05 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 30 Jan 2024 11:52:41 +0100 Subject: [PATCH 1/5] Texture Cache: make sparse texture table per channel --- src/video_core/texture_cache/texture_cache.h | 87 +++++++++++-------- .../texture_cache/texture_cache_base.h | 4 +- 2 files changed, 51 insertions(+), 40 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 7398ed2ec..a7400adfa 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1431,7 +1431,8 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DA } } }; - ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu); + ForEachSparseImageInRegion(channel_state->gpu_memory.GetID(), gpu_addr, size_bytes, + region_check_gpu); bool can_rescale = info.rescaleable; bool any_rescaled = false; @@ -1842,7 +1843,7 @@ void TextureCache

::ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, s if (!storage_id) { return; } - auto& gpu_page_table = gpu_page_table_storage[*storage_id]; + auto& gpu_page_table = gpu_page_table_storage[*storage_id * 2]; ForEachGPUPage(gpu_addr, size, [this, &gpu_page_table, &images, gpu_addr, size, func](u64 page) { const auto it = gpu_page_table.find(page); @@ -1882,41 +1883,48 @@ void TextureCache

::ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, s template template -void TextureCache

::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) { +void TextureCache

::ForEachSparseImageInRegion(size_t as_id, GPUVAddr gpu_addr, size_t size, + Func&& func) { using FuncReturn = typename std::invoke_result::type; static constexpr bool BOOL_BREAK = std::is_same_v; boost::container::small_vector images; - ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { - const auto it = sparse_page_table.find(page); - if (it == sparse_page_table.end()) { - if constexpr (BOOL_BREAK) { - return false; - } else { - return; - } - } - for (const ImageId image_id : it->second) { - Image& image = slot_images[image_id]; - if (True(image.flags & ImageFlagBits::Picked)) { - continue; - } - if (!image.OverlapsGPU(gpu_addr, size)) { - continue; - } - image.flags |= ImageFlagBits::Picked; - images.push_back(image_id); - if constexpr (BOOL_BREAK) { - if (func(image_id, image)) { - return true; - } - } else { - func(image_id, image); - } - } - if constexpr (BOOL_BREAK) { - return false; - } - }); + auto storage_id = getStorageID(as_id); + if (!storage_id) { + return; + } + auto& sparse_page_table = gpu_page_table_storage[*storage_id * 2 + 1]; + ForEachGPUPage(gpu_addr, size, + [this, &sparse_page_table, &images, gpu_addr, size, func](u64 page) { + const auto it = sparse_page_table.find(page); + if (it == sparse_page_table.end()) { + if constexpr (BOOL_BREAK) { + return false; + } else { + return; + } + } + for (const ImageId image_id : it->second) { + Image& image = slot_images[image_id]; + if (True(image.flags & ImageFlagBits::Picked)) { + continue; + } + if (!image.OverlapsGPU(gpu_addr, size)) { + continue; + } + image.flags |= ImageFlagBits::Picked; + images.push_back(image_id); + if constexpr (BOOL_BREAK) { + if (func(image_id, image)) { + return true; + } + } else { + func(image_id, image); + } + } + if constexpr (BOOL_BREAK) { + return false; + } + }); for (const ImageId image_id : images) { slot_images[image_id].flags &= ~ImageFlagBits::Picked; } @@ -1988,8 +1996,9 @@ void TextureCache

::RegisterImage(ImageId image_id) { sparse_maps.push_back(map_id); }); sparse_views.emplace(image_id, std::move(sparse_maps)); - ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, - [this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); }); + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { + (*channel_state->sparse_page_table)[page].push_back(image_id); + }); } template @@ -2042,7 +2051,7 @@ void TextureCache

::UnregisterImage(ImageId image_id) { return; } ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) { - clear_page_table(page, sparse_page_table); + clear_page_table(page, (*channel_state->sparse_page_table)); }); auto it = sparse_views.find(image_id); ASSERT(it != sparse_views.end()); @@ -2496,13 +2505,15 @@ void TextureCache

::CreateChannel(struct Tegra::Control::ChannelState& channel const auto it = channel_map.find(channel.bind_id); auto* this_state = &channel_storage[it->second]; const auto& this_as_ref = address_spaces[channel.memory_manager->GetID()]; - this_state->gpu_page_table = &gpu_page_table_storage[this_as_ref.storage_id]; + this_state->gpu_page_table = &gpu_page_table_storage[this_as_ref.storage_id * 2]; + this_state->sparse_page_table = &gpu_page_table_storage[this_as_ref.storage_id * 2 + 1]; } /// Bind a channel for execution. template void TextureCache

::OnGPUASRegister([[maybe_unused]] size_t map_id) { gpu_page_table_storage.emplace_back(); + gpu_page_table_storage.emplace_back(); } } // namespace VideoCommon diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 8699d40d4..f9aebb293 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -86,6 +86,7 @@ public: std::unordered_map samplers; TextureCacheGPUMap* gpu_page_table; + TextureCacheGPUMap* sparse_page_table; }; template @@ -357,7 +358,7 @@ private: void ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, size_t size, Func&& func); template - void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func); + void ForEachSparseImageInRegion(size_t as_id, GPUVAddr gpu_addr, size_t size, Func&& func); /// Iterates over all the images in a region calling func template @@ -431,7 +432,6 @@ private: std::unordered_map framebuffers; std::unordered_map, Common::IdentityHash> page_table; - std::unordered_map, Common::IdentityHash> sparse_page_table; std::unordered_map> sparse_views; DAddr virtual_invalid_space{}; From 8f848f43e979ac4049237d3b6a161696dd85372b Mon Sep 17 00:00:00 2001 From: Liam Date: Tue, 16 Jan 2024 23:35:48 -0500 Subject: [PATCH 2/5] smmu: use new range mutex construction for protecting counters --- src/common/CMakeLists.txt | 1 + src/common/range_mutex.h | 93 ++++++++++++++++++++++++++++++ src/core/device_memory_manager.h | 3 +- src/core/device_memory_manager.inc | 11 +--- 4 files changed, 97 insertions(+), 11 deletions(-) create mode 100644 src/common/range_mutex.h diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index e30fea268..85926fc8f 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -106,6 +106,7 @@ add_library(common STATIC precompiled_headers.h quaternion.h range_map.h + range_mutex.h reader_writer_queue.h ring_buffer.h ${CMAKE_CURRENT_BINARY_DIR}/scm_rev.cpp diff --git a/src/common/range_mutex.h b/src/common/range_mutex.h new file mode 100644 index 000000000..d6c949811 --- /dev/null +++ b/src/common/range_mutex.h @@ -0,0 +1,93 @@ +// SPDX-FileCopyrightText: 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include + +#include "common/intrusive_list.h" + +namespace Common { + +class ScopedRangeLock; + +class RangeMutex { +public: + explicit RangeMutex() = default; + ~RangeMutex() = default; + +private: + friend class ScopedRangeLock; + + void Lock(ScopedRangeLock& l); + void Unlock(ScopedRangeLock& l); + bool HasIntersectionLocked(ScopedRangeLock& l); + +private: + std::mutex m_mutex; + std::condition_variable m_cv; + + using LockList = Common::IntrusiveListBaseTraits::ListType; + LockList m_list; +}; + +class ScopedRangeLock : public Common::IntrusiveListBaseNode { +public: + explicit ScopedRangeLock(RangeMutex& mutex, u64 address, u64 size) + : m_mutex(mutex), m_address(address), m_size(size) { + if (m_size > 0) { + m_mutex.Lock(*this); + } + } + ~ScopedRangeLock() { + if (m_size > 0) { + m_mutex.Unlock(*this); + } + } + + u64 GetAddress() const { + return m_address; + } + + u64 GetSize() const { + return m_size; + } + +private: + RangeMutex& m_mutex; + const u64 m_address{}; + const u64 m_size{}; +}; + +inline void RangeMutex::Lock(ScopedRangeLock& l) { + std::unique_lock lk{m_mutex}; + m_cv.wait(lk, [&] { return !HasIntersectionLocked(l); }); + m_list.push_back(l); +} + +inline void RangeMutex::Unlock(ScopedRangeLock& l) { + { + std::scoped_lock lk{m_mutex}; + m_list.erase(m_list.iterator_to(l)); + } + m_cv.notify_all(); +} + +inline bool RangeMutex::HasIntersectionLocked(ScopedRangeLock& l) { + const auto cur_begin = l.GetAddress(); + const auto cur_last = l.GetAddress() + l.GetSize() - 1; + + for (const auto& other : m_list) { + const auto other_begin = other.GetAddress(); + const auto other_last = other.GetAddress() + other.GetSize() - 1; + + if (cur_begin <= other_last && other_begin <= cur_last) { + return true; + } + } + + return false; +} + +} // namespace Common diff --git a/src/core/device_memory_manager.h b/src/core/device_memory_manager.h index ffeed46cc..63823602c 100644 --- a/src/core/device_memory_manager.h +++ b/src/core/device_memory_manager.h @@ -10,6 +10,7 @@ #include #include "common/common_types.h" +#include "common/range_mutex.h" #include "common/scratch_buffer.h" #include "common/virtual_buffer.h" @@ -204,7 +205,7 @@ private: (1ULL << (device_virtual_bits - page_bits)) / subentries; using CachedPages = std::array; std::unique_ptr cached_pages; - std::mutex counter_guard; + Common::RangeMutex counter_guard; std::mutex mapping_guard; }; diff --git a/src/core/device_memory_manager.inc b/src/core/device_memory_manager.inc index eab8a2731..0a59000aa 100644 --- a/src/core/device_memory_manager.inc +++ b/src/core/device_memory_manager.inc @@ -508,12 +508,7 @@ void DeviceMemoryManager::UnregisterProcess(Asid asid) { template void DeviceMemoryManager::UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta) { - std::unique_lock lk(counter_guard, std::defer_lock); - const auto Lock = [&] { - if (!lk) { - lk.lock(); - } - }; + Common::ScopedRangeLock lk(counter_guard, addr, size); u64 uncache_begin = 0; u64 cache_begin = 0; u64 uncache_bytes = 0; @@ -548,7 +543,6 @@ void DeviceMemoryManager::UpdatePagesCachedCount(DAddr addr, size_t size } uncache_bytes += Memory::YUZU_PAGESIZE; } else if (uncache_bytes > 0) { - Lock(); MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS, uncache_bytes, false); uncache_bytes = 0; @@ -559,7 +553,6 @@ void DeviceMemoryManager::UpdatePagesCachedCount(DAddr addr, size_t size } cache_bytes += Memory::YUZU_PAGESIZE; } else if (cache_bytes > 0) { - Lock(); MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes, true); cache_bytes = 0; @@ -567,12 +560,10 @@ void DeviceMemoryManager::UpdatePagesCachedCount(DAddr addr, size_t size vpage++; } if (uncache_bytes > 0) { - Lock(); MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS, uncache_bytes, false); } if (cache_bytes > 0) { - Lock(); MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes, true); } From aaab11e36f55f6b06f08b10aeca4b14f112914ee Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 30 Jan 2024 13:46:01 +0100 Subject: [PATCH 3/5] NVDRV: Join the heaper optimization blocks --- src/core/hle/service/nvdrv/core/container.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/core/hle/service/nvdrv/core/container.cpp b/src/core/hle/service/nvdrv/core/container.cpp index dc1b4d5be..e89cca6f2 100644 --- a/src/core/hle/service/nvdrv/core/container.cpp +++ b/src/core/hle/service/nvdrv/core/container.cpp @@ -83,7 +83,9 @@ SessionId Container::OpenSession(Kernel::KProcess* process) { // Check if this memory block is heap. if (svc_mem_info.state == Kernel::Svc::MemoryState::Normal) { - if (svc_mem_info.size > region_size) { + if (region_start + region_size == svc_mem_info.base_address) { + region_size += svc_mem_info.size; + } else if (svc_mem_info.size > region_size) { region_size = svc_mem_info.size; region_start = svc_mem_info.base_address; } From 738e9a79a06c9d3955488022a1d3afd67bc5a11d Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 30 Jan 2024 20:14:06 +0100 Subject: [PATCH 4/5] DeviceMemory: Make counter types configurable --- src/core/device_memory_manager.h | 15 ++++++++++----- src/core/device_memory_manager.inc | 18 +++++++++--------- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/src/core/device_memory_manager.h b/src/core/device_memory_manager.h index 63823602c..0568a821b 100644 --- a/src/core/device_memory_manager.h +++ b/src/core/device_memory_manager.h @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -181,24 +182,28 @@ private: } Common::VirtualBuffer cpu_backing_address; - static constexpr size_t subentries = 8 / sizeof(u8); + using CounterType = u8; + using CounterAtomicType = std::atomic_uint8_t; + static constexpr size_t subentries = 8 / sizeof(CounterType); static constexpr size_t subentries_mask = subentries - 1; + static constexpr size_t subentries_shift = + std::countr_zero(sizeof(u64)) - std::countr_zero(sizeof(CounterType)); class CounterEntry final { public: CounterEntry() = default; - std::atomic_uint8_t& Count(std::size_t page) { + CounterAtomicType& Count(std::size_t page) { return values[page & subentries_mask]; } - const std::atomic_uint8_t& Count(std::size_t page) const { + const CounterAtomicType& Count(std::size_t page) const { return values[page & subentries_mask]; } private: - std::array values{}; + std::array values{}; }; - static_assert(sizeof(CounterEntry) == subentries * sizeof(u8), + static_assert(sizeof(CounterEntry) == subentries * sizeof(CounterType), "CounterEntry should be 8 bytes!"); static constexpr size_t num_counter_entries = diff --git a/src/core/device_memory_manager.inc b/src/core/device_memory_manager.inc index 0a59000aa..7afe54949 100644 --- a/src/core/device_memory_manager.inc +++ b/src/core/device_memory_manager.inc @@ -213,8 +213,8 @@ void DeviceMemoryManager::Free(DAddr start, size_t size) { } template -void DeviceMemoryManager::Map(DAddr address, VAddr virtual_address, size_t size, - Asid asid, bool track) { +void DeviceMemoryManager::Map(DAddr address, VAddr virtual_address, size_t size, Asid asid, + bool track) { Core::Memory::Memory* process_memory = registered_processes[asid.id]; size_t start_page_d = address >> Memory::YUZU_PAGEBITS; size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS; @@ -522,10 +522,10 @@ void DeviceMemoryManager::UpdatePagesCachedCount(DAddr addr, size_t size size_t vpage = base_vaddress >> Memory::YUZU_PAGEBITS; auto* memory_device_inter = registered_processes[asid.id]; for (; page != page_end; ++page) { - std::atomic_uint8_t& count = cached_pages->at(page >> 3).Count(page); + CounterAtomicType& count = cached_pages->at(page >> subentries_shift).Count(page); if (delta > 0) { - ASSERT_MSG(count.load(std::memory_order::relaxed) < std::numeric_limits::max(), + ASSERT_MSG(count.load(std::memory_order::relaxed) < std::numeric_limits::max(), "Count may overflow!"); } else if (delta < 0) { ASSERT_MSG(count.load(std::memory_order::relaxed) > 0, "Count may underflow!"); @@ -534,7 +534,7 @@ void DeviceMemoryManager::UpdatePagesCachedCount(DAddr addr, size_t size } // Adds or subtracts 1, as count is a unsigned 8-bit value - count.fetch_add(static_cast(delta), std::memory_order_release); + count.fetch_add(static_cast(delta), std::memory_order_release); // Assume delta is either -1 or 1 if (count.load(std::memory_order::relaxed) == 0) { @@ -553,15 +553,15 @@ void DeviceMemoryManager::UpdatePagesCachedCount(DAddr addr, size_t size } cache_bytes += Memory::YUZU_PAGESIZE; } else if (cache_bytes > 0) { - MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes, - true); + MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, + cache_bytes, true); cache_bytes = 0; } vpage++; } if (uncache_bytes > 0) { - MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS, uncache_bytes, - false); + MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS, + uncache_bytes, false); } if (cache_bytes > 0) { MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes, From d57165df450e8a2fa811706758fb8ab352f623ae Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 31 Jan 2024 14:31:13 +0100 Subject: [PATCH 5/5] Device Memory Manager: ensure raster protection only within mapped device addresses. --- src/core/device_memory_manager.inc | 40 +++++++++++++++++------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/src/core/device_memory_manager.inc b/src/core/device_memory_manager.inc index 7afe54949..b026f4220 100644 --- a/src/core/device_memory_manager.inc +++ b/src/core/device_memory_manager.inc @@ -519,18 +519,32 @@ void DeviceMemoryManager::UpdatePagesCachedCount(DAddr addr, size_t size const size_t page_end = Common::DivCeil(addr + size, Memory::YUZU_PAGESIZE); size_t page = addr >> Memory::YUZU_PAGEBITS; auto [asid, base_vaddress] = ExtractCPUBacking(page); - size_t vpage = base_vaddress >> Memory::YUZU_PAGEBITS; auto* memory_device_inter = registered_processes[asid.id]; + const auto release_pending = [&] { + if (uncache_bytes > 0) { + MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS, + uncache_bytes, false); + uncache_bytes = 0; + } + if (cache_bytes > 0) { + MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, + cache_bytes, true); + cache_bytes = 0; + } + }; for (; page != page_end; ++page) { CounterAtomicType& count = cached_pages->at(page >> subentries_shift).Count(page); + auto [asid_2, vpage] = ExtractCPUBacking(page); + vpage >>= Memory::YUZU_PAGEBITS; - if (delta > 0) { - ASSERT_MSG(count.load(std::memory_order::relaxed) < std::numeric_limits::max(), - "Count may overflow!"); - } else if (delta < 0) { - ASSERT_MSG(count.load(std::memory_order::relaxed) > 0, "Count may underflow!"); - } else { - ASSERT_MSG(false, "Delta must be non-zero!"); + if (vpage == 0) [[unlikely]] { + release_pending(); + continue; + } + + if (asid.id != asid_2.id) [[unlikely]] { + release_pending(); + memory_device_inter = registered_processes[asid_2.id]; } // Adds or subtracts 1, as count is a unsigned 8-bit value @@ -557,16 +571,8 @@ void DeviceMemoryManager::UpdatePagesCachedCount(DAddr addr, size_t size cache_bytes, true); cache_bytes = 0; } - vpage++; - } - if (uncache_bytes > 0) { - MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS, - uncache_bytes, false); - } - if (cache_bytes > 0) { - MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes, - true); } + release_pending(); } } // namespace Core