diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h index ee8602ce97..8a5e6a3e7d 100644 --- a/src/video_core/buffer_cache/buffer_base.h +++ b/src/video_core/buffer_cache/buffer_base.h @@ -19,6 +19,7 @@ namespace VideoCommon { enum class BufferFlagBits { Picked = 1 << 0, + CachedWrites = 1 << 1, }; DECLARE_ENUM_FLAG_OPERATORS(BufferFlagBits) @@ -40,7 +41,7 @@ class BufferBase { static constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE; /// Vector tracking modified pages tightly packed with small vector optimization - union WrittenWords { + union WordsArray { /// Returns the pointer to the words state [[nodiscard]] const u64* Pointer(bool is_short) const noexcept { return is_short ? &stack : heap; @@ -55,49 +56,59 @@ class BufferBase { u64* heap; ///< Not-small buffers pointer to the storage }; - struct GpuCpuWords { - explicit GpuCpuWords() = default; - explicit GpuCpuWords(u64 size_bytes_) : size_bytes{size_bytes_} { + struct Words { + explicit Words() = default; + explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} { if (IsShort()) { cpu.stack = ~u64{0}; gpu.stack = 0; + cached_cpu.stack = 0; + untracked.stack = ~u64{0}; } else { // Share allocation between CPU and GPU pages and set their default values const size_t num_words = NumWords(); - u64* const alloc = new u64[num_words * 2]; + u64* const alloc = new u64[num_words * 4]; cpu.heap = alloc; gpu.heap = alloc + num_words; + cached_cpu.heap = alloc + num_words * 2; + untracked.heap = alloc + num_words * 3; std::fill_n(cpu.heap, num_words, ~u64{0}); std::fill_n(gpu.heap, num_words, 0); + std::fill_n(cached_cpu.heap, num_words, 0); + std::fill_n(untracked.heap, num_words, ~u64{0}); } // Clean up tailing bits - const u64 last_local_page = - Common::DivCeil(size_bytes % BYTES_PER_WORD, BYTES_PER_PAGE); + const u64 last_word_size = size_bytes % BYTES_PER_WORD; + const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE); const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD; - u64& last_word = cpu.Pointer(IsShort())[NumWords() - 1]; - last_word = (last_word << shift) >> shift; + const u64 last_word = (~u64{0} << shift) >> shift; + cpu.Pointer(IsShort())[NumWords() - 1] = last_word; + untracked.Pointer(IsShort())[NumWords() - 1] = last_word; } - ~GpuCpuWords() { + ~Words() { Release(); } - GpuCpuWords& operator=(GpuCpuWords&& rhs) noexcept { + Words& operator=(Words&& rhs) noexcept { Release(); size_bytes = rhs.size_bytes; cpu = rhs.cpu; gpu = rhs.gpu; + cached_cpu = rhs.cached_cpu; + untracked = rhs.untracked; rhs.cpu.heap = nullptr; return *this; } - GpuCpuWords(GpuCpuWords&& rhs) noexcept - : size_bytes{rhs.size_bytes}, cpu{rhs.cpu}, gpu{rhs.gpu} { + Words(Words&& rhs) noexcept + : size_bytes{rhs.size_bytes}, cpu{rhs.cpu}, gpu{rhs.gpu}, + cached_cpu{rhs.cached_cpu}, untracked{rhs.untracked} { rhs.cpu.heap = nullptr; } - GpuCpuWords& operator=(const GpuCpuWords&) = delete; - GpuCpuWords(const GpuCpuWords&) = delete; + Words& operator=(const Words&) = delete; + Words(const Words&) = delete; /// Returns true when the buffer fits in the small vector optimization [[nodiscard]] bool IsShort() const noexcept { @@ -118,8 +129,17 @@ class BufferBase { } u64 size_bytes = 0; - WrittenWords cpu; - WrittenWords gpu; + WordsArray cpu; + WordsArray gpu; + WordsArray cached_cpu; + WordsArray untracked; + }; + + enum class Type { + CPU, + GPU, + CachedCPU, + Untracked, }; public: @@ -132,68 +152,93 @@ public: BufferBase& operator=(const BufferBase&) = delete; BufferBase(const BufferBase&) = delete; + BufferBase& operator=(BufferBase&&) = default; + BufferBase(BufferBase&&) = default; + /// Returns the inclusive CPU modified range in a begin end pair [[nodiscard]] std::pair ModifiedCpuRegion(VAddr query_cpu_addr, u64 query_size) const noexcept { const u64 offset = query_cpu_addr - cpu_addr; - return ModifiedRegion(offset, query_size); + return ModifiedRegion(offset, query_size); } /// Returns the inclusive GPU modified range in a begin end pair [[nodiscard]] std::pair ModifiedGpuRegion(VAddr query_cpu_addr, u64 query_size) const noexcept { const u64 offset = query_cpu_addr - cpu_addr; - return ModifiedRegion(offset, query_size); + return ModifiedRegion(offset, query_size); } /// Returns true if a region has been modified from the CPU [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept { const u64 offset = query_cpu_addr - cpu_addr; - return IsRegionModified(offset, query_size); + return IsRegionModified(offset, query_size); } /// Returns true if a region has been modified from the GPU [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept { const u64 offset = query_cpu_addr - cpu_addr; - return IsRegionModified(offset, query_size); + return IsRegionModified(offset, query_size); } /// Mark region as CPU modified, notifying the rasterizer about this change void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) { - ChangeRegionState(words.cpu, dirty_cpu_addr, size); + ChangeRegionState(dirty_cpu_addr, size); } /// Unmark region as CPU modified, notifying the rasterizer about this change void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) { - ChangeRegionState(words.cpu, dirty_cpu_addr, size); + ChangeRegionState(dirty_cpu_addr, size); } /// Mark region as modified from the host GPU void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept { - ChangeRegionState(words.gpu, dirty_cpu_addr, size); + ChangeRegionState(dirty_cpu_addr, size); } /// Unmark region as modified from the host GPU void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept { - ChangeRegionState(words.gpu, dirty_cpu_addr, size); + ChangeRegionState(dirty_cpu_addr, size); + } + + /// Mark region as modified from the CPU + /// but don't mark it as modified until FlusHCachedWrites is called. + void CachedCpuWrite(VAddr dirty_cpu_addr, u64 size) { + flags |= BufferFlagBits::CachedWrites; + ChangeRegionState(dirty_cpu_addr, size); + } + + /// Flushes cached CPU writes, and notify the rasterizer about the deltas + void FlushCachedWrites() noexcept { + flags &= ~BufferFlagBits::CachedWrites; + const u64 num_words = NumWords(); + const u64* const cached_words = Array(); + u64* const untracked_words = Array(); + u64* const cpu_words = Array(); + for (u64 word_index = 0; word_index < num_words; ++word_index) { + const u64 cached_bits = cached_words[word_index]; + NotifyRasterizer(word_index, untracked_words[word_index], cached_bits); + untracked_words[word_index] |= cached_bits; + cpu_words[word_index] |= cached_bits; + } } /// Call 'func' for each CPU modified range and unmark those pages as CPU modified template void ForEachUploadRange(VAddr query_cpu_range, u64 size, Func&& func) { - ForEachModifiedRange(query_cpu_range, size, func); + ForEachModifiedRange(query_cpu_range, size, func); } /// Call 'func' for each GPU modified range and unmark those pages as GPU modified template void ForEachDownloadRange(VAddr query_cpu_range, u64 size, Func&& func) { - ForEachModifiedRange(query_cpu_range, size, func); + ForEachModifiedRange(query_cpu_range, size, func); } /// Call 'func' for each GPU modified range and unmark those pages as GPU modified template void ForEachDownloadRange(Func&& func) { - ForEachModifiedRange(cpu_addr, SizeBytes(), func); + ForEachModifiedRange(cpu_addr, SizeBytes(), func); } /// Mark buffer as picked @@ -216,6 +261,11 @@ public: return True(flags & BufferFlagBits::Picked); } + /// Returns true when the buffer has pending cached writes + [[nodiscard]] bool HasCachedWrites() const noexcept { + return True(flags & BufferFlagBits::CachedWrites); + } + /// Returns the base CPU address of the buffer [[nodiscard]] VAddr CpuAddr() const noexcept { return cpu_addr; @@ -233,26 +283,48 @@ public: } private: + template + u64* Array() noexcept { + if constexpr (type == Type::CPU) { + return words.cpu.Pointer(IsShort()); + } else if constexpr (type == Type::GPU) { + return words.gpu.Pointer(IsShort()); + } else if constexpr (type == Type::CachedCPU) { + return words.cached_cpu.Pointer(IsShort()); + } else if constexpr (type == Type::Untracked) { + return words.untracked.Pointer(IsShort()); + } + } + + template + const u64* Array() const noexcept { + if constexpr (type == Type::CPU) { + return words.cpu.Pointer(IsShort()); + } else if constexpr (type == Type::GPU) { + return words.gpu.Pointer(IsShort()); + } else if constexpr (type == Type::CachedCPU) { + return words.cached_cpu.Pointer(IsShort()); + } else if constexpr (type == Type::Untracked) { + return words.untracked.Pointer(IsShort()); + } + } + /** * Change the state of a range of pages * - * @param written_words Pages to be marked or unmarked as modified * @param dirty_addr Base address to mark or unmark as modified * @param size Size in bytes to mark or unmark as modified - * - * @tparam enable True when the bits will be set to one, false for zero - * @tparam notify_rasterizer True when the rasterizer has to be notified about the changes */ - template - void ChangeRegionState(WrittenWords& written_words, u64 dirty_addr, - s64 size) noexcept(!notify_rasterizer) { + template + void ChangeRegionState(u64 dirty_addr, s64 size) noexcept(type == Type::GPU) { const s64 difference = dirty_addr - cpu_addr; const u64 offset = std::max(difference, 0); size += std::min(difference, 0); if (offset >= SizeBytes() || size < 0) { return; } - u64* const state_words = written_words.Pointer(IsShort()); + u64* const untracked_words = Array(); + u64* const state_words = Array(); const u64 offset_end = std::min(offset + size, SizeBytes()); const u64 begin_page_index = offset / BYTES_PER_PAGE; const u64 begin_word_index = begin_page_index / PAGES_PER_WORD; @@ -268,13 +340,19 @@ private: u64 bits = ~u64{0}; bits = (bits >> right_offset) << right_offset; bits = (bits << left_offset) >> left_offset; - if constexpr (notify_rasterizer) { - NotifyRasterizer(word_index, state_words[word_index], bits); + if constexpr (type == Type::CPU || type == Type::CachedCPU) { + NotifyRasterizer(word_index, untracked_words[word_index], bits); } if constexpr (enable) { state_words[word_index] |= bits; + if constexpr (type == Type::CPU || type == Type::CachedCPU) { + untracked_words[word_index] |= bits; + } } else { state_words[word_index] &= ~bits; + if constexpr (type == Type::CPU || type == Type::CachedCPU) { + untracked_words[word_index] &= ~bits; + } } page_index = 0; ++word_index; @@ -291,7 +369,7 @@ private: * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages */ template - void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) { + void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const { u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits; VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; while (changed_bits != 0) { @@ -315,21 +393,20 @@ private: * @param query_cpu_range Base CPU address to loop over * @param size Size in bytes of the CPU range to loop over * @param func Function to call for each turned off region - * - * @tparam gpu True for host GPU pages, false for CPU pages - * @tparam notify_rasterizer True when the rasterizer should be notified about state changes */ - template + template void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) { + static_assert(type != Type::Untracked); + const s64 difference = query_cpu_range - cpu_addr; const u64 query_begin = std::max(difference, 0); size += std::min(difference, 0); if (query_begin >= SizeBytes() || size < 0) { return; } - const u64* const cpu_words = words.cpu.Pointer(IsShort()); + u64* const untracked_words = Array(); + u64* const state_words = Array(); const u64 query_end = query_begin + std::min(static_cast(size), SizeBytes()); - u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort()); u64* const words_begin = state_words + query_begin / BYTES_PER_WORD; u64* const words_end = state_words + Common::DivCeil(query_end, BYTES_PER_WORD); @@ -345,7 +422,8 @@ private: const u64 word_index_end = std::distance(state_words, last_modified_word); const unsigned local_page_begin = std::countr_zero(*first_modified_word); - const unsigned local_page_end = PAGES_PER_WORD - std::countl_zero(last_modified_word[-1]); + const unsigned local_page_end = + static_cast(PAGES_PER_WORD) - std::countl_zero(last_modified_word[-1]); const u64 word_page_begin = word_index_begin * PAGES_PER_WORD; const u64 word_page_end = (word_index_end - 1) * PAGES_PER_WORD; const u64 query_page_begin = query_begin / BYTES_PER_PAGE; @@ -371,11 +449,13 @@ private: const u64 current_word = state_words[word_index] & bits; state_words[word_index] &= ~bits; - // Exclude CPU modified pages when visiting GPU pages - const u64 word = current_word & ~(gpu ? cpu_words[word_index] : 0); - if constexpr (notify_rasterizer) { - NotifyRasterizer(word_index, word, ~u64{0}); + if constexpr (type == Type::CPU) { + const u64 current_bits = untracked_words[word_index] & bits; + untracked_words[word_index] &= ~bits; + NotifyRasterizer(word_index, current_bits, ~u64{0}); } + // Exclude CPU modified pages when visiting GPU pages + const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0); u64 page = page_begin; page_begin = 0; @@ -416,17 +496,20 @@ private: * @param offset Offset in bytes from the start of the buffer * @param size Size in bytes of the region to query for modifications */ - template + template [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept { - const u64* const cpu_words = words.cpu.Pointer(IsShort()); - const u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort()); + static_assert(type != Type::Untracked); + + const u64* const untracked_words = Array(); + const u64* const state_words = Array(); const u64 num_query_words = size / BYTES_PER_WORD + 1; const u64 word_begin = offset / BYTES_PER_WORD; const u64 word_end = std::min(word_begin + num_query_words, NumWords()); const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE); u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD; for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) { - const u64 word = state_words[word_index] & ~(gpu ? cpu_words[word_index] : 0); + const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0; + const u64 word = state_words[word_index] & ~off_word; if (word == 0) { continue; } @@ -445,13 +528,13 @@ private: * * @param offset Offset in bytes from the start of the buffer * @param size Size in bytes of the region to query for modifications - * - * @tparam gpu True to query GPU modified pages, false for CPU pages */ - template + template [[nodiscard]] std::pair ModifiedRegion(u64 offset, u64 size) const noexcept { - const u64* const cpu_words = words.cpu.Pointer(IsShort()); - const u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort()); + static_assert(type != Type::Untracked); + + const u64* const untracked_words = Array(); + const u64* const state_words = Array(); const u64 num_query_words = size / BYTES_PER_WORD + 1; const u64 word_begin = offset / BYTES_PER_WORD; const u64 word_end = std::min(word_begin + num_query_words, NumWords()); @@ -460,7 +543,8 @@ private: u64 begin = std::numeric_limits::max(); u64 end = 0; for (u64 word_index = word_begin; word_index < word_end; ++word_index) { - const u64 word = state_words[word_index] & ~(gpu ? cpu_words[word_index] : 0); + const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0; + const u64 word = state_words[word_index] & ~off_word; if (word == 0) { continue; } @@ -488,7 +572,7 @@ private: RasterizerInterface* rasterizer = nullptr; VAddr cpu_addr = 0; - GpuCpuWords words; + Words words; BufferFlagBits flags{}; };