Texture Cache: Improve accuracy of sparse texture detection.

This commit is contained in:
Fernando Sahmkow 2021-06-13 03:34:06 +02:00
parent 38165fb7e3
commit fd98fcf7f0
6 changed files with 346 additions and 135 deletions

View file

@ -69,11 +69,17 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
} else { } else {
UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr); UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr);
} }
const auto submapped_ranges = GetSubmappedRange(gpu_addr, size);
for (const auto& map : submapped_ranges) {
// Flush and invalidate through the GPU interface, to be asynchronous if possible. // Flush and invalidate through the GPU interface, to be asynchronous if possible.
const std::optional<VAddr> cpu_addr = GpuToCpuAddress(gpu_addr); const std::optional<VAddr> cpu_addr = GpuToCpuAddress(map.first);
ASSERT(cpu_addr); ASSERT(cpu_addr);
rasterizer->UnmapMemory(*cpu_addr, size); rasterizer->UnmapMemory(*cpu_addr, map.second);
}
UpdateRange(gpu_addr, PageEntry::State::Unmapped, size); UpdateRange(gpu_addr, PageEntry::State::Unmapped, size);
} }
@ -128,7 +134,8 @@ void MemoryManager::SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::s
//// Lock the new page //// Lock the new page
// TryLockPage(page_entry, size); // TryLockPage(page_entry, size);
auto& current_page = page_table[PageEntryIndex(gpu_addr)]; auto& current_page = page_table[PageEntryIndex(gpu_addr)];
if (current_page.IsValid() != page_entry.IsValid() ||
if ((!current_page.IsValid() && page_entry.IsValid()) ||
current_page.ToAddress() != page_entry.ToAddress()) { current_page.ToAddress() != page_entry.ToAddress()) {
rasterizer->ModifyGPUMemory(gpu_addr, size); rasterizer->ModifyGPUMemory(gpu_addr, size);
} }
@ -179,6 +186,19 @@ std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const {
return page_entry.ToAddress() + (gpu_addr & page_mask); return page_entry.ToAddress() + (gpu_addr & page_mask);
} }
std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t size) const {
size_t page_index{addr >> page_bits};
const size_t page_last{(addr + size + page_size - 1) >> page_bits};
while (page_index < page_last) {
const auto page_addr{GpuToCpuAddress(page_index << page_bits)};
if (page_addr && *page_addr != 0) {
return page_addr;
}
++page_index;
}
return std::nullopt;
}
template <typename T> template <typename T>
T MemoryManager::Read(GPUVAddr addr) const { T MemoryManager::Read(GPUVAddr addr) const {
if (auto page_pointer{GetPointer(addr)}; page_pointer) { if (auto page_pointer{GetPointer(addr)}; page_pointer) {
@ -375,4 +395,79 @@ bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const {
return page <= Core::Memory::PAGE_SIZE; return page <= Core::Memory::PAGE_SIZE;
} }
bool MemoryManager::IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const {
size_t page_index{gpu_addr >> page_bits};
const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits};
std::optional<VAddr> old_page_addr{};
while (page_index != page_last) {
const auto page_addr{GpuToCpuAddress(page_index << page_bits)};
if (!page_addr || *page_addr == 0) {
return false;
}
if (old_page_addr) {
if (*old_page_addr + page_size != *page_addr) {
return false;
}
}
old_page_addr = page_addr;
++page_index;
}
return true;
}
bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const {
size_t page_index{gpu_addr >> page_bits};
const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits};
while (page_index < page_last) {
if (!page_table[page_index].IsValid() || page_table[page_index].ToAddress() == 0) {
return false;
}
++page_index;
}
return true;
}
std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
GPUVAddr gpu_addr, std::size_t size) const {
std::vector<std::pair<GPUVAddr, std::size_t>> result{};
size_t page_index{gpu_addr >> page_bits};
size_t remaining_size{size};
size_t page_offset{gpu_addr & page_mask};
std::optional<std::pair<GPUVAddr, std::size_t>> last_segment{};
std::optional<VAddr> old_page_addr{};
const auto extend_size = [this, &last_segment, &page_index](std::size_t bytes) {
if (!last_segment) {
GPUVAddr new_base_addr = page_index << page_bits;
last_segment = {new_base_addr, bytes};
} else {
last_segment->second += bytes;
}
};
const auto split = [this, &last_segment, &result] {
if (last_segment) {
result.push_back(*last_segment);
last_segment = std::nullopt;
}
};
while (remaining_size > 0) {
const size_t num_bytes{std::min(page_size - page_offset, remaining_size)};
const auto page_addr{GpuToCpuAddress(page_index << page_bits)};
if (!page_addr) {
split();
} else if (old_page_addr) {
if (*old_page_addr + page_size != *page_addr) {
split();
}
extend_size(num_bytes);
} else {
extend_size(num_bytes);
}
++page_index;
page_offset = 0;
remaining_size -= num_bytes;
}
split();
return result;
}
} // namespace Tegra } // namespace Tegra

View file

@ -76,6 +76,8 @@ public:
[[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const; [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const;
[[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr, std::size_t size) const;
template <typename T> template <typename T>
[[nodiscard]] T Read(GPUVAddr addr) const; [[nodiscard]] T Read(GPUVAddr addr) const;
@ -116,6 +118,24 @@ public:
*/ */
[[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const; [[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const;
/**
* IsContinousRange checks if a gpu region is mapped by a single range of cpu addresses.
*/
[[nodiscard]] bool IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const;
/**
* IsFullyMappedRange checks if a gpu region is mapped entirely.
*/
[[nodiscard]] bool IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const;
/**
* GetSubmappedRange returns a vector with all the subranges of cpu addresses mapped beneath.
* if the region is continous, a single pair will be returned. If it's unmapped, an empty vector
* will be returned;
*/
std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr,
std::size_t size) const;
[[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size); [[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size);
[[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align); [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align);
[[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size); [[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size);

View file

@ -25,11 +25,12 @@ enum class ImageFlagBits : u32 {
Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted
Registered = 1 << 6, ///< True when the image is registered Registered = 1 << 6, ///< True when the image is registered
Picked = 1 << 7, ///< Temporary flag to mark the image as picked Picked = 1 << 7, ///< Temporary flag to mark the image as picked
Remapped = 1 << 8, ///< Image has been remapped.
// Garbage Collection Flags // Garbage Collection Flags
BadOverlap = 1 << 8, ///< This image overlaps other but doesn't fit, has higher BadOverlap = 1 << 9, ///< This image overlaps other but doesn't fit, has higher
///< garbage collection priority ///< garbage collection priority
Alias = 1 << 9, ///< This image has aliases and has priority on garbage Alias = 1 << 10, ///< This image has aliases and has priority on garbage
///< collection ///< collection
}; };
DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)

View file

@ -13,6 +13,7 @@
#include <span> #include <span>
#include <type_traits> #include <type_traits>
#include <unordered_map> #include <unordered_map>
#include <unordered_set>
#include <utility> #include <utility>
#include <vector> #include <vector>
@ -155,6 +156,9 @@ public:
/// Remove images in a region /// Remove images in a region
void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size); void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size);
/// Used when GPU memory changes layout on sparse textures.
// void CheckRemaps();
/// Blit an image with the given parameters /// Blit an image with the given parameters
void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Surface& src,
@ -238,7 +242,7 @@ private:
FramebufferId GetFramebufferId(const RenderTargets& key); FramebufferId GetFramebufferId(const RenderTargets& key);
/// Refresh the contents (pixel data) of an image /// Refresh the contents (pixel data) of an image
void RefreshContents(Image& image); void RefreshContents(Image& image, ImageId image_id);
/// Upload data from guest to an image /// Upload data from guest to an image
template <typename StagingBuffer> template <typename StagingBuffer>
@ -290,6 +294,9 @@ private:
template <typename Func> template <typename Func>
void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func); void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func);
template <typename Func>
void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func);
/// Iterates over all the images in a region calling func /// Iterates over all the images in a region calling func
template <typename Func> template <typename Func>
void ForEachSparseSegment(ImageBase& image, Func&& func); void ForEachSparseSegment(ImageBase& image, Func&& func);
@ -304,10 +311,10 @@ private:
void UnregisterImage(ImageId image); void UnregisterImage(ImageId image);
/// Track CPU reads and writes for image /// Track CPU reads and writes for image
void TrackImage(ImageBase& image); void TrackImage(ImageBase& image, ImageId image_id);
/// Stop tracking CPU reads and writes for image /// Stop tracking CPU reads and writes for image
void UntrackImage(ImageBase& image); void UntrackImage(ImageBase& image, ImageId image_id);
/// Delete image from the cache /// Delete image from the cache
void DeleteImage(ImageId image); void DeleteImage(ImageId image);
@ -367,6 +374,11 @@ private:
std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table; std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table;
std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table; std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table;
std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table;
std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views;
VAddr virtual_invalid_space{};
bool has_deleted_images = false; bool has_deleted_images = false;
u64 total_used_memory = 0; u64 total_used_memory = 0;
@ -685,7 +697,9 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) {
return; return;
} }
image.flags |= ImageFlagBits::CpuModified; image.flags |= ImageFlagBits::CpuModified;
UntrackImage(image); if (True(image.flags & ImageFlagBits::Tracked)) {
UntrackImage(image, image_id);
}
}); });
} }
@ -722,7 +736,7 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
for (const ImageId id : deleted_images) { for (const ImageId id : deleted_images) {
Image& image = slot_images[id]; Image& image = slot_images[id];
if (True(image.flags & ImageFlagBits::Tracked)) { if (True(image.flags & ImageFlagBits::Tracked)) {
UntrackImage(image); UntrackImage(image, id);
} }
UnregisterImage(id); UnregisterImage(id);
DeleteImage(id); DeleteImage(id);
@ -736,11 +750,13 @@ void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) {
[&](ImageId id, Image&) { deleted_images.push_back(id); }); [&](ImageId id, Image&) { deleted_images.push_back(id); });
for (const ImageId id : deleted_images) { for (const ImageId id : deleted_images) {
Image& image = slot_images[id]; Image& image = slot_images[id];
if (True(image.flags & ImageFlagBits::Tracked)) { if (True(image.flags & ImageFlagBits::Remapped)) {
UntrackImage(image); continue;
}
image.flags |= ImageFlagBits::Remapped;
if (True(image.flags & ImageFlagBits::Tracked)) {
UntrackImage(image, id);
} }
UnregisterImage(id);
DeleteImage(id);
} }
} }
@ -958,13 +974,13 @@ bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
} }
template <class P> template <class P>
void TextureCache<P>::RefreshContents(Image& image) { void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) {
if (False(image.flags & ImageFlagBits::CpuModified)) { if (False(image.flags & ImageFlagBits::CpuModified)) {
// Only upload modified images // Only upload modified images
return; return;
} }
image.flags &= ~ImageFlagBits::CpuModified; image.flags &= ~ImageFlagBits::CpuModified;
TrackImage(image); TrackImage(image, image_id);
if (image.info.num_samples > 1) { if (image.info.num_samples > 1) {
LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
@ -1043,14 +1059,20 @@ ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_a
template <class P> template <class P>
ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
RelaxedOptions options) { RelaxedOptions options) {
const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
if (!cpu_addr) {
cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info));
if (!cpu_addr) { if (!cpu_addr) {
return ImageId{}; return ImageId{};
} }
}
const bool broken_views = runtime.HasBrokenTextureViewFormats(); const bool broken_views = runtime.HasBrokenTextureViewFormats();
const bool native_bgr = runtime.HasNativeBgr(); const bool native_bgr = runtime.HasNativeBgr();
ImageId image_id; ImageId image_id;
const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
if (True(existing_image.flags & ImageFlagBits::Remapped)) {
return false;
}
if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) {
const bool strict_size = False(options & RelaxedOptions::Size) && const bool strict_size = False(options & RelaxedOptions::Size) &&
True(existing_image.flags & ImageFlagBits::Strong); True(existing_image.flags & ImageFlagBits::Strong);
@ -1069,14 +1091,23 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
} }
return false; return false;
}; };
ForEachImageInRegionGPU(gpu_addr, CalculateGuestSizeInBytes(info), lambda); ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda);
return image_id; return image_id;
} }
template <class P> template <class P>
ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
RelaxedOptions options) { RelaxedOptions options) {
const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
if (!cpu_addr) {
const auto size = CalculateGuestSizeInBytes(info);
cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size);
if (!cpu_addr) {
const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space;
virtual_invalid_space += Common::AlignUp(size, 32);
cpu_addr = std::optional<VAddr>(fake_addr);
}
}
ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr);
const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr);
const Image& image = slot_images[image_id]; const Image& image = slot_images[image_id];
@ -1096,10 +1127,16 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
const bool broken_views = runtime.HasBrokenTextureViewFormats(); const bool broken_views = runtime.HasBrokenTextureViewFormats();
const bool native_bgr = runtime.HasNativeBgr(); const bool native_bgr = runtime.HasNativeBgr();
std::vector<ImageId> overlap_ids; std::vector<ImageId> overlap_ids;
std::unordered_set<ImageId> overlaps_found;
std::vector<ImageId> left_aliased_ids; std::vector<ImageId> left_aliased_ids;
std::vector<ImageId> right_aliased_ids; std::vector<ImageId> right_aliased_ids;
std::unordered_set<ImageId> ignore_textures;
std::vector<ImageId> bad_overlap_ids; std::vector<ImageId> bad_overlap_ids;
ForEachImageInRegionGPU(gpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) { const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) {
if (True(overlap.flags & ImageFlagBits::Remapped)) {
ignore_textures.insert(overlap_id);
return;
}
if (info.type == ImageType::Linear) { if (info.type == ImageType::Linear) {
if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) {
// Alias linear images with the same pitch // Alias linear images with the same pitch
@ -1107,6 +1144,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
} }
return; return;
} }
overlaps_found.insert(overlap_id);
static constexpr bool strict_size = true; static constexpr bool strict_size = true;
const std::optional<OverlapResult> solution = ResolveOverlap( const std::optional<OverlapResult> solution = ResolveOverlap(
new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr); new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr);
@ -1130,30 +1168,34 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
bad_overlap_ids.push_back(overlap_id); bad_overlap_ids.push_back(overlap_id);
overlap.flags |= ImageFlagBits::BadOverlap; overlap.flags |= ImageFlagBits::BadOverlap;
} }
}); };
ForEachImageInRegion(cpu_addr, size_bytes, region_check);
const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) {
if (!overlaps_found.contains(overlap_id)) {
ignore_textures.insert(overlap_id);
}
};
ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu);
const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
Image& new_image = slot_images[new_image_id]; Image& new_image = slot_images[new_image_id];
new_image.is_sparse = false; new_image.is_sparse =
if (new_image.info.type != ImageType::Linear && new_image.info.type != ImageType::Buffer) { !gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes);
const LevelArray offsets = CalculateMipLevelOffsets(new_image.info);
size_t level; for (const ImageId overlap_id : ignore_textures) {
const size_t levels = static_cast<size_t>(new_image.info.resources.levels); Image& overlap = slot_images[overlap_id];
VAddr n_cpu_addr = new_image.cpu_addr; if (True(overlap.flags & ImageFlagBits::GpuModified)) {
GPUVAddr n_gpu_addr = new_image.gpu_addr; UNIMPLEMENTED();
for (level = 0; level < levels; level++) {
n_gpu_addr += offsets[level];
n_cpu_addr += offsets[level];
std::optional<VAddr> cpu_addr_opt = gpu_memory.GpuToCpuAddress(n_gpu_addr);
if (!cpu_addr_opt || *cpu_addr_opt == 0 || n_cpu_addr != *cpu_addr_opt) {
new_image.is_sparse = true;
break;
} }
if (True(overlap.flags & ImageFlagBits::Tracked)) {
UntrackImage(overlap, overlap_id);
} }
UnregisterImage(overlap_id);
DeleteImage(overlap_id);
} }
// TODO: Only upload what we need // TODO: Only upload what we need
RefreshContents(new_image); RefreshContents(new_image, new_image_id);
for (const ImageId overlap_id : overlap_ids) { for (const ImageId overlap_id : overlap_ids) {
Image& overlap = slot_images[overlap_id]; Image& overlap = slot_images[overlap_id];
@ -1165,7 +1207,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
runtime.CopyImage(new_image, overlap, copies); runtime.CopyImage(new_image, overlap, copies);
} }
if (True(overlap.flags & ImageFlagBits::Tracked)) { if (True(overlap.flags & ImageFlagBits::Tracked)) {
UntrackImage(overlap); UntrackImage(overlap, overlap_id);
} }
UnregisterImage(overlap_id); UnregisterImage(overlap_id);
DeleteImage(overlap_id); DeleteImage(overlap_id);
@ -1388,18 +1430,59 @@ void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Fu
} }
} }
template <class P>
template <typename Func>
void TextureCache<P>::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) {
using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
boost::container::small_vector<ImageId, 8> images;
ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) {
const auto it = sparse_page_table.find(page);
if (it == sparse_page_table.end()) {
if constexpr (BOOL_BREAK) {
return false;
} else {
return;
}
}
for (const ImageId image_id : it->second) {
Image& image = slot_images[image_id];
if (True(image.flags & ImageFlagBits::Picked)) {
continue;
}
if (!image.OverlapsGPU(gpu_addr, size)) {
continue;
}
image.flags |= ImageFlagBits::Picked;
images.push_back(image_id);
if constexpr (BOOL_BREAK) {
if (func(image_id, image)) {
return true;
}
} else {
func(image_id, image);
}
}
if constexpr (BOOL_BREAK) {
return false;
}
});
for (const ImageId image_id : images) {
slot_images[image_id].flags &= ~ImageFlagBits::Picked;
}
}
template <class P> template <class P>
template <typename Func> template <typename Func>
void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) { void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) {
using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type; using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type;
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
GPUVAddr gpu_addr = image.gpu_addr; const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes);
const size_t levels = image.info.resources.levels; for (auto& segment : segments) {
const auto mipmap_sizes = CalculateMipLevelSizes(image.info); const auto gpu_addr = segment.first;
for (size_t level = 0; level < levels; level++) { const auto size = segment.second;
const size_t size = mipmap_sizes[level];
std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
if (cpu_addr && *cpu_addr != 0) { ASSERT(cpu_addr);
if constexpr (BOOL_BREAK) { if constexpr (BOOL_BREAK) {
if (func(gpu_addr, *cpu_addr, size)) { if (func(gpu_addr, *cpu_addr, size)) {
return true; return true;
@ -1408,8 +1491,6 @@ void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) {
func(gpu_addr, *cpu_addr, size); func(gpu_addr, *cpu_addr, size);
} }
} }
gpu_addr += size;
}
} }
template <class P> template <class P>
@ -1446,11 +1527,17 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
image.map_view_id = map_id; image.map_view_id = map_id;
return; return;
} }
ForEachSparseSegment(image, [this, image_id](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { std::vector<ImageViewId> sparse_maps{};
ForEachSparseSegment(
image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id);
ForEachCPUPage(cpu_addr, size, ForEachCPUPage(cpu_addr, size,
[this, map_id](u64 page) { page_table[page].push_back(map_id); }); [this, map_id](u64 page) { page_table[page].push_back(map_id); });
sparse_maps.push_back(map_id);
}); });
sparse_views.emplace(image_id, std::move(sparse_maps));
ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
[this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); });
} }
template <class P> template <class P>
@ -1467,20 +1554,26 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
} }
total_used_memory -= Common::AlignUp(tentative_size, 1024); total_used_memory -= Common::AlignUp(tentative_size, 1024);
ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { const auto& clear_page_table =
const auto page_it = gpu_page_table.find(page); [this, image_id](
if (page_it == gpu_page_table.end()) { u64 page,
std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>>& selected_page_table) {
const auto page_it = selected_page_table.find(page);
if (page_it == selected_page_table.end()) {
UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
return; return;
} }
std::vector<ImageId>& image_ids = page_it->second; std::vector<ImageId>& image_ids = page_it->second;
const auto vector_it = std::ranges::find(image_ids, image_id); const auto vector_it = std::ranges::find(image_ids, image_id);
if (vector_it == image_ids.end()) { if (vector_it == image_ids.end()) {
UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", page << PAGE_BITS); UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}",
page << PAGE_BITS);
return; return;
} }
image_ids.erase(vector_it); image_ids.erase(vector_it);
}); };
ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
[this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); });
if (!image.is_sparse) { if (!image.is_sparse) {
const auto map_id = image.map_view_id; const auto map_id = image.map_view_id;
ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) { ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) {
@ -1501,11 +1594,17 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
slot_map_views.erase(map_id); slot_map_views.erase(map_id);
return; return;
} }
boost::container::small_vector<ImageMapId, 8> maps_to_delete; ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) {
ForEachSparseSegment( clear_page_table(page, sparse_page_table);
image, [this, image_id, &maps_to_delete]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, });
size_t size) { auto it = sparse_views.find(image_id);
ForEachCPUPage(cpu_addr, size, [this, image_id, &maps_to_delete](u64 page) { ASSERT(it != sparse_views.end());
auto& sparse_maps = it->second;
for (auto& map_view_id : sparse_maps) {
const auto& map = slot_map_views[map_view_id];
const VAddr cpu_addr = map.cpu_addr;
const std::size_t size = map.size;
ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) {
const auto page_it = page_table.find(page); const auto page_it = page_table.find(page);
if (page_it == page_table.end()) { if (page_it == page_table.end()) {
UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
@ -1520,27 +1619,36 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
continue; continue;
} }
if (!map.picked) { if (!map.picked) {
maps_to_delete.push_back(*vector_it);
map.picked = true; map.picked = true;
} }
vector_it = image_map_ids.erase(vector_it); vector_it = image_map_ids.erase(vector_it);
} }
}); });
}); slot_map_views.erase(map_view_id);
for (const ImageMapId map_id : maps_to_delete) {
slot_map_views.erase(map_id);
} }
sparse_views.erase(it);
} }
template <class P> template <class P>
void TextureCache<P>::TrackImage(ImageBase& image) { void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) {
ASSERT(False(image.flags & ImageFlagBits::Tracked)); ASSERT(False(image.flags & ImageFlagBits::Tracked));
image.flags |= ImageFlagBits::Tracked; image.flags |= ImageFlagBits::Tracked;
if (!image.is_sparse) { if (!image.is_sparse) {
rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
return; return;
} }
if (True(image.flags & ImageFlagBits::Registered)) {
auto it = sparse_views.find(image_id);
ASSERT(it != sparse_views.end());
auto& sparse_maps = it->second;
for (auto& map_view_id : sparse_maps) {
const auto& map = slot_map_views[map_view_id];
const VAddr cpu_addr = map.cpu_addr;
const std::size_t size = map.size;
rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
}
return;
}
ForEachSparseSegment(image, ForEachSparseSegment(image,
[this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
@ -1548,17 +1656,23 @@ void TextureCache<P>::TrackImage(ImageBase& image) {
} }
template <class P> template <class P>
void TextureCache<P>::UntrackImage(ImageBase& image) { void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) {
ASSERT(True(image.flags & ImageFlagBits::Tracked)); ASSERT(True(image.flags & ImageFlagBits::Tracked));
image.flags &= ~ImageFlagBits::Tracked; image.flags &= ~ImageFlagBits::Tracked;
if (!image.is_sparse) { if (!image.is_sparse) {
rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1);
return; return;
} }
ForEachSparseSegment(image, ASSERT(True(image.flags & ImageFlagBits::Registered));
[this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { auto it = sparse_views.find(image_id);
ASSERT(it != sparse_views.end());
auto& sparse_maps = it->second;
for (auto& map_view_id : sparse_maps) {
const auto& map = slot_map_views[map_view_id];
const VAddr cpu_addr = map.cpu_addr;
const std::size_t size = map.size;
rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1);
}); }
} }
template <class P> template <class P>
@ -1700,10 +1814,10 @@ void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool
if (invalidate) { if (invalidate) {
image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified);
if (False(image.flags & ImageFlagBits::Tracked)) { if (False(image.flags & ImageFlagBits::Tracked)) {
TrackImage(image); TrackImage(image, image_id);
} }
} else { } else {
RefreshContents(image); RefreshContents(image, image_id);
SynchronizeAliases(image_id); SynchronizeAliases(image_id);
} }
if (is_modification) { if (is_modification) {

View file

@ -786,37 +786,20 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn
return copies; return copies;
} }
bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr) {
if (gpu_addr == 0) {
return false;
}
if (gpu_addr > (u64(1) << 48)) {
return false;
}
const auto cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
return cpu_addr.has_value() && *cpu_addr != 0;
}
bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) { bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) {
const GPUVAddr gpu_addr = config.Address(); const GPUVAddr address = config.Address();
if (IsValidAddress(gpu_memory, gpu_addr)) { if (address == 0) {
return false;
}
if (address > (1ULL << 48)) {
return false;
}
if (gpu_memory.GpuToCpuAddress(address).has_value()) {
return true; return true;
} }
if (!config.IsBlockLinear()) {
return false;
}
const size_t levels = config.max_mip_level + 1;
if (levels <= 1) {
return false;
}
const ImageInfo info{config}; const ImageInfo info{config};
const LevelArray offsets = CalculateMipLevelOffsets(info); const size_t guest_size_bytes = CalculateGuestSizeInBytes(info);
for (size_t level = 1; level < levels; level++) { return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value();
if (IsValidAddress(gpu_memory, static_cast<GPUVAddr>(gpu_addr + offsets[level]))) {
return true;
}
}
return false;
} }
std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,

View file

@ -57,8 +57,6 @@ struct OverlapResult {
const ImageInfo& src, const ImageInfo& src,
SubresourceBase base); SubresourceBase base);
[[nodiscard]] bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr);
[[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); [[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config);
[[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, [[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory,