From 4284893044d72f0169935c5a134ca08bc7814d59 Mon Sep 17 00:00:00 2001 From: PabloMK7 Date: Fri, 3 Nov 2023 01:19:00 +0100 Subject: [PATCH] Implement RomFS cache and async reads. (#7089) * Implement RomFS cache and async reads. * Suggestions and fix compilation. * Apply suggestions --- src/common/CMakeLists.txt | 1 + src/common/file_util.cpp | 37 ++++++++++ src/common/file_util.h | 20 +++++ src/common/static_lru_cache.h | 113 +++++++++++++++++++++++++++++ src/core/file_sys/file_backend.h | 14 ++++ src/core/file_sys/ivfc_archive.h | 8 ++ src/core/file_sys/layered_fs.h | 8 ++ src/core/file_sys/romfs_reader.cpp | 102 ++++++++++++++++++++++++-- src/core/file_sys/romfs_reader.h | 24 ++++++ src/core/hle/service/fs/file.cpp | 99 +++++++++++++++++++++---- 10 files changed, 404 insertions(+), 22 deletions(-) create mode 100644 src/common/static_lru_cache.h diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 74a22e6b0..78c8f6c55 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -124,6 +124,7 @@ add_library(citra_common STATIC serialization/boost_flat_set.h serialization/boost_small_vector.hpp serialization/boost_vector.hpp + static_lru_cache.h string_literal.h string_util.cpp string_util.h diff --git a/src/common/file_util.cpp b/src/common/file_util.cpp index acbf79bff..1d8b2c509 100644 --- a/src/common/file_util.cpp +++ b/src/common/file_util.cpp @@ -1155,6 +1155,43 @@ std::size_t IOFile::ReadImpl(void* data, std::size_t length, std::size_t data_si return std::fread(data, data_size, length, m_file); } +#ifdef _WIN32 +static std::size_t pread(int fd, void* buf, size_t count, uint64_t offset) { + long unsigned int read_bytes = 0; + OVERLAPPED overlapped = {0}; + HANDLE file = reinterpret_cast(_get_osfhandle(fd)); + + overlapped.OffsetHigh = static_cast(offset >> 32); + overlapped.Offset = static_cast(offset & 0xFFFF'FFFFLL); + SetLastError(0); + bool ret = ReadFile(file, buf, static_cast(count), &read_bytes, &overlapped); + + if (!ret && GetLastError() != ERROR_HANDLE_EOF) { + errno = GetLastError(); + return std::numeric_limits::max(); + } + return read_bytes; +} +#else +#define pread ::pread +#endif + +std::size_t IOFile::ReadAtImpl(void* data, std::size_t length, std::size_t data_size, + std::size_t offset) { + if (!IsOpen()) { + m_good = false; + return std::numeric_limits::max(); + } + + if (length == 0) { + return 0; + } + + DEBUG_ASSERT(data != nullptr); + + return pread(fileno(m_file), data, data_size * length, offset); +} + std::size_t IOFile::WriteImpl(const void* data, std::size_t length, std::size_t data_size) { if (!IsOpen()) { m_good = false; diff --git a/src/common/file_util.h b/src/common/file_util.h index 1d28c4864..2a4cc70ef 100644 --- a/src/common/file_util.h +++ b/src/common/file_util.h @@ -294,6 +294,18 @@ public: return items_read; } + template + std::size_t ReadAtArray(T* data, std::size_t length, std::size_t offset) { + static_assert(std::is_trivially_copyable_v, + "Given array does not consist of trivially copyable objects"); + + std::size_t items_read = ReadAtImpl(data, length, sizeof(T), offset); + if (items_read != length) + m_good = false; + + return items_read; + } + template std::size_t WriteArray(const T* data, std::size_t length) { static_assert(std::is_trivially_copyable_v, @@ -312,6 +324,12 @@ public: return ReadArray(reinterpret_cast(data), length); } + template + std::size_t ReadAtBytes(T* data, std::size_t length, std::size_t offset) { + static_assert(std::is_trivially_copyable_v, "T must be trivially copyable"); + return ReadAtArray(reinterpret_cast(data), length, offset); + } + template std::size_t WriteBytes(const T* data, std::size_t length) { static_assert(std::is_trivially_copyable_v, "T must be trivially copyable"); @@ -363,6 +381,8 @@ public: private: std::size_t ReadImpl(void* data, std::size_t length, std::size_t data_size); + std::size_t ReadAtImpl(void* data, std::size_t length, std::size_t data_size, + std::size_t offset); std::size_t WriteImpl(const void* data, std::size_t length, std::size_t data_size); bool Open(); diff --git a/src/common/static_lru_cache.h b/src/common/static_lru_cache.h new file mode 100644 index 000000000..0294f6120 --- /dev/null +++ b/src/common/static_lru_cache.h @@ -0,0 +1,113 @@ +// Modified version of: https://www.boost.org/doc/libs/1_79_0/boost/compute/detail/lru_cache.hpp +// Most important change is the use of an array instead of a map, so that elements are +// statically allocated. The insert and get methods have been merged into the request method. +// Original license: +// +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// +#pragma once + +#include +#include +#include +#include + +namespace Common { + +// a cache which evicts the least recently used item when it is full +// the cache elements are statically allocated. +template +class StaticLRUCache { +public: + using key_type = Key; + using value_type = Value; + using list_type = std::list>; + using array_type = std::array; + + StaticLRUCache() = default; + + ~StaticLRUCache() = default; + + size_t size() const { + return m_list.size(); + } + + constexpr size_t capacity() const { + return m_array.size(); + } + + bool empty() const { + return m_list.empty(); + } + + bool contains(const key_type& key) const { + return find(key) != m_list.end(); + } + + // Requests an element from the cache. If it is not found, + // the element is inserted using its key. + // Returns whether the element was present in the cache + // and a reference to the element itself. + std::pair request(const key_type& key) { + // lookup value in the cache + auto i = find(key); + if (i == m_list.cend()) { + size_t next_index = size(); + // insert item into the cache, but first check if it is full + if (next_index >= capacity()) { + // cache is full, evict the least recently used item + next_index = evict(); + } + + // insert the new item + m_list.push_front(std::make_pair(key, next_index)); + return std::pair(false, m_array[next_index]); + } + // return the value, but first update its place in the most + // recently used list + if (i != m_list.cbegin()) { + // move item to the front of the most recently used list + auto backup = *i; + m_list.erase(i); + m_list.push_front(backup); + + // return the value + return std::pair(true, m_array[backup.second]); + } else { + // the item is already at the front of the most recently + // used list so just return it + return std::pair(true, m_array[i->second]); + } + } + + void clear() { + m_list.clear(); + } + +private: + typename list_type::const_iterator find(const key_type& key) const { + return std::find_if(m_list.cbegin(), m_list.cend(), + [&key](const auto& el) { return el.first == key; }); + } + + size_t evict() { + // evict item from the end of most recently used list + typename list_type::iterator i = --m_list.end(); + size_t evicted_index = i->second; + m_list.erase(i); + return evicted_index; + } + +private: + array_type m_array; + list_type m_list; +}; + +} // namespace Common \ No newline at end of file diff --git a/src/core/file_sys/file_backend.h b/src/core/file_sys/file_backend.h index 43a7531a4..bf0a1b493 100644 --- a/src/core/file_sys/file_backend.h +++ b/src/core/file_sys/file_backend.h @@ -86,6 +86,20 @@ public: */ virtual void Flush() const = 0; + /** + * Whether the backend supports cached reads. + */ + virtual bool AllowsCachedReads() const { + return false; + } + + /** + * Whether the cache is ready for a specified offset and length. + */ + virtual bool CacheReady(std::size_t file_offset, std::size_t length) { + return false; + } + protected: std::unique_ptr delay_generator; diff --git a/src/core/file_sys/ivfc_archive.h b/src/core/file_sys/ivfc_archive.h index 09a1f6afc..0c0f7969e 100644 --- a/src/core/file_sys/ivfc_archive.h +++ b/src/core/file_sys/ivfc_archive.h @@ -131,6 +131,14 @@ public: } void Flush() const override {} + bool AllowsCachedReads() const override { + return romfs_file->AllowsCachedReads(); + } + + bool CacheReady(std::size_t file_offset, std::size_t length) override { + return romfs_file->CacheReady(file_offset, length); + } + private: std::shared_ptr romfs_file; diff --git a/src/core/file_sys/layered_fs.h b/src/core/file_sys/layered_fs.h index 3643be99d..0a0dc5669 100644 --- a/src/core/file_sys/layered_fs.h +++ b/src/core/file_sys/layered_fs.h @@ -53,6 +53,14 @@ public: bool DumpRomFS(const std::string& target_path); + bool AllowsCachedReads() const override { + return false; + } + + bool CacheReady(std::size_t file_offset, std::size_t length) override { + return false; + } + private: struct File; struct Directory { diff --git a/src/core/file_sys/romfs_reader.cpp b/src/core/file_sys/romfs_reader.cpp index 4c83515b3..bdfc9f62a 100644 --- a/src/core/file_sys/romfs_reader.cpp +++ b/src/core/file_sys/romfs_reader.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include "common/archives.h" @@ -9,17 +10,102 @@ SERIALIZE_EXPORT_IMPL(FileSys::DirectRomFSReader) namespace FileSys { std::size_t DirectRomFSReader::ReadFile(std::size_t offset, std::size_t length, u8* buffer) { + length = std::min(length, static_cast(data_size) - offset); if (length == 0) return 0; // Crypto++ does not like zero size buffer - file.Seek(file_offset + offset, SEEK_SET); - std::size_t read_length = std::min(length, static_cast(data_size) - offset); - read_length = file.ReadBytes(buffer, read_length); - if (is_encrypted) { - CryptoPP::CTR_Mode::Decryption d(key.data(), key.size(), ctr.data()); - d.Seek(crypto_offset + offset); - d.ProcessData(buffer, buffer, read_length); + + const auto segments = BreakupRead(offset, length); + size_t read_progress = 0; + + // Skip cache if the read is too big + if (segments.size() == 1 && segments[0].second > cache_line_size) { + length = file.ReadAtBytes(buffer, length, file_offset + offset); + if (is_encrypted) { + CryptoPP::CTR_Mode::Decryption d(key.data(), key.size(), ctr.data()); + d.Seek(crypto_offset + offset); + d.ProcessData(buffer, buffer, length); + } + // LOG_INFO(Service_FS, "Cache SKIP: offset={}, length={}", offset, length); + return length; } - return read_length; + + // TODO(PabloMK7): Make cache thread safe, read the comment in CacheReady function. + // std::unique_lock read_guard(cache_mutex); + for (const auto& seg : segments) { + size_t read_size = cache_line_size; + size_t page = OffsetToPage(seg.first); + // Check if segment is in cache + auto cache_entry = cache.request(page); + if (!cache_entry.first) { + // If not found, read from disk and cache the data + read_size = file.ReadAtBytes(cache_entry.second.data(), read_size, file_offset + page); + if (is_encrypted && read_size) { + CryptoPP::CTR_Mode::Decryption d(key.data(), key.size(), ctr.data()); + d.Seek(crypto_offset + page); + d.ProcessData(cache_entry.second.data(), cache_entry.second.data(), read_size); + } + // LOG_INFO(Service_FS, "Cache MISS: page={}, length={}, into={}", page, seg.second, + // (seg.first - page)); + } else { + // LOG_INFO(Service_FS, "Cache HIT: page={}, length={}, into={}", page, seg.second, + // (seg.first - page)); + } + size_t copy_amount = + (read_size > (seg.first - page)) + ? std::min((seg.first - page) + seg.second, read_size) - (seg.first - page) + : 0; + std::memcpy(buffer + read_progress, cache_entry.second.data() + (seg.first - page), + copy_amount); + read_progress += copy_amount; + } + return read_progress; +} + +bool DirectRomFSReader::AllowsCachedReads() const { + return true; +} + +bool DirectRomFSReader::CacheReady(std::size_t file_offset, std::size_t length) { + auto segments = BreakupRead(file_offset, length); + if (segments.size() == 1 && segments[0].second > cache_line_size) { + return false; + } else { + // TODO(PabloMK7): Since the LRU cache is not thread safe, a lock must be used. + // However, this completely breaks the point of using a cache, because + // smaller reads may be blocked by bigger reads. For now, always return + // data being in cache to prevent the need of a lock, and only read data + // asynchronously if it is too big to use the cache. + /* + std::shared_lock read_guard(cache_mutex); + for (auto it = segments.begin(); it != segments.end(); it++) { + if (!cache.contains(OffsetToPage(it->first))) + return false; + } + */ + return true; + } +} + +std::vector> DirectRomFSReader::BreakupRead( + std::size_t offset, std::size_t length) { + + std::vector> ret; + + // Reads bigger than the cache line size will probably never hit again + if (length > cache_line_size) { + ret.push_back(std::make_pair(offset, length)); + return ret; + } + + size_t curr_offset = offset; + while (length) { + size_t next_page = OffsetToPage(curr_offset + cache_line_size); + size_t curr_page_len = std::min(length, next_page - curr_offset); + ret.push_back(std::make_pair(curr_offset, curr_page_len)); + curr_offset = next_page; + length -= curr_page_len; + } + return ret; } } // namespace FileSys diff --git a/src/core/file_sys/romfs_reader.h b/src/core/file_sys/romfs_reader.h index 26dcb9857..28583b413 100644 --- a/src/core/file_sys/romfs_reader.h +++ b/src/core/file_sys/romfs_reader.h @@ -1,11 +1,14 @@ #pragma once #include +#include #include #include #include +#include "common/alignment.h" #include "common/common_types.h" #include "common/file_util.h" +#include "common/static_lru_cache.h" namespace FileSys { @@ -18,6 +21,8 @@ public: virtual std::size_t GetSize() const = 0; virtual std::size_t ReadFile(std::size_t offset, std::size_t length, u8* buffer) = 0; + virtual bool AllowsCachedReads() const = 0; + virtual bool CacheReady(std::size_t file_offset, std::size_t length) = 0; private: template @@ -48,6 +53,10 @@ public: std::size_t ReadFile(std::size_t offset, std::size_t length, u8* buffer) override; + bool AllowsCachedReads() const override; + + bool CacheReady(std::size_t file_offset, std::size_t length) override; + private: bool is_encrypted; FileUtil::IOFile file; @@ -57,8 +66,23 @@ private: u64 crypto_offset; u64 data_size; + // Total cache size: 128KB + static constexpr size_t cache_line_size = (1 << 13); // About 8KB + static constexpr size_t cache_line_count = 16; + + Common::StaticLRUCache, cache_line_count> cache; + // TODO(PabloMK7): Make cache thread safe, read the comment in CacheReady function. + // std::shared_mutex cache_mutex; + DirectRomFSReader() = default; + std::size_t OffsetToPage(std::size_t offset) { + return Common::AlignDown(offset, cache_line_size); + } + + std::vector> BreakupRead(std::size_t offset, + std::size_t length); + template void serialize(Archive& ar, const unsigned int) { ar& boost::serialization::base_object(*this); diff --git a/src/core/hle/service/fs/file.cpp b/src/core/hle/service/fs/file.cpp index 15f7705ae..62e02d263 100644 --- a/src/core/hle/service/fs/file.cpp +++ b/src/core/hle/service/fs/file.cpp @@ -57,7 +57,6 @@ void File::Read(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp(ctx); u64 offset = rp.Pop(); u32 length = rp.Pop(); - auto& buffer = rp.PopMappedBuffer(); LOG_TRACE(Service_FS, "Read {}: offset=0x{:x} length=0x{:08X}", GetName(), offset, length); const FileSessionSlot* file = GetSessionData(ctx.Session()); @@ -76,22 +75,94 @@ void File::Read(Kernel::HLERequestContext& ctx) { offset, length, backend->GetSize()); } - IPC::RequestBuilder rb = rp.MakeBuilder(2, 2); + // Conventional reading if the backend does not support cache. + if (!backend->AllowsCachedReads()) { + auto& buffer = rp.PopMappedBuffer(); + IPC::RequestBuilder rb = rp.MakeBuilder(2, 2); + std::unique_ptr data = std::make_unique(static_cast(operator new(length))); + const auto read = backend->Read(offset, length, *data); + if (read.Failed()) { + rb.Push(read.Code()); + rb.Push(0); + } else { + buffer.Write(*data, 0, *read); + rb.Push(RESULT_SUCCESS); + rb.Push(static_cast(*read)); + } + rb.PushMappedBuffer(buffer); - std::vector data(length); - ResultVal read = backend->Read(offset, data.size(), data.data()); - if (read.Failed()) { - rb.Push(read.Code()); - rb.Push(0); - } else { - buffer.Write(data.data(), 0, *read); - rb.Push(RESULT_SUCCESS); - rb.Push(static_cast(*read)); + std::chrono::nanoseconds read_timeout_ns{backend->GetReadDelayNs(length)}; + ctx.SleepClientThread("file::read", read_timeout_ns, nullptr); + return; } - rb.PushMappedBuffer(buffer); - std::chrono::nanoseconds read_timeout_ns{backend->GetReadDelayNs(length)}; - ctx.SleepClientThread("file::read", read_timeout_ns, nullptr); + struct AsyncData { + // Input + u32 length; + u64 offset; + std::chrono::steady_clock::time_point pre_timer; + bool cache_ready; + + // Output + ResultCode ret{0}; + Kernel::MappedBuffer* buffer; + std::unique_ptr data; + size_t read_size; + }; + + auto async_data = std::make_shared(); + async_data->buffer = &rp.PopMappedBuffer(); + async_data->length = length; + async_data->offset = offset; + async_data->cache_ready = backend->CacheReady(offset, length); + if (!async_data->cache_ready) { + async_data->pre_timer = std::chrono::steady_clock::now(); + } + + // LOG_DEBUG(Service_FS, "cache={}, offset={}, length={}", cache_ready, offset, length); + ctx.RunAsync( + [this, async_data](Kernel::HLERequestContext& ctx) { + async_data->data = + std::make_unique(static_cast(operator new(async_data->length))); + const auto read = + backend->Read(async_data->offset, async_data->length, *async_data->data); + if (read.Failed()) { + async_data->ret = read.Code(); + async_data->read_size = 0; + } else { + async_data->ret = RESULT_SUCCESS; + async_data->read_size = *read; + } + + const auto read_delay = static_cast(backend->GetReadDelayNs(async_data->length)); + if (!async_data->cache_ready) { + const auto time_took = std::chrono::duration_cast( + std::chrono::steady_clock::now() - async_data->pre_timer) + .count(); + /* + if (time_took > read_delay) { + LOG_DEBUG(Service_FS, "Took longer! length={}, time_took={}, read_delay={}", + async_data->length, time_took, read_delay); + } + */ + return static_cast((read_delay > time_took) ? (read_delay - time_took) : 0); + } else { + return static_cast(read_delay); + } + }, + [async_data](Kernel::HLERequestContext& ctx) { + IPC::RequestBuilder rb(ctx, 0x0802, 2, 2); + if (async_data->ret.IsError()) { + rb.Push(async_data->ret); + rb.Push(0); + } else { + async_data->buffer->Write(*async_data->data, 0, async_data->read_size); + rb.Push(RESULT_SUCCESS); + rb.Push(static_cast(async_data->read_size)); + } + rb.PushMappedBuffer(*async_data->buffer); + }, + !async_data->cache_ready); } void File::Write(Kernel::HLERequestContext& ctx) {