509 lines
20 KiB
C++
Executable File
509 lines
20 KiB
C++
Executable File
// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
|
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
|
|
|
#pragma once
|
|
|
|
#include <atomic>
|
|
#include <deque>
|
|
#include <limits>
|
|
#include <mutex>
|
|
#include <span>
|
|
#include <type_traits>
|
|
#include <unordered_map>
|
|
#include <unordered_set>
|
|
#include <vector>
|
|
#include <boost/container/small_vector.hpp>
|
|
#include <queue>
|
|
|
|
#include "common/common_types.h"
|
|
#include "common/hash.h"
|
|
#include "common/literals.h"
|
|
#include "common/lru_cache.h"
|
|
#include "common/polyfill_ranges.h"
|
|
#include "common/scratch_buffer.h"
|
|
#include "common/thread_worker.h"
|
|
#include "video_core/compatible_formats.h"
|
|
#include "video_core/control/channel_state_cache.h"
|
|
#include "video_core/delayed_destruction_ring.h"
|
|
#include "video_core/engines/fermi_2d.h"
|
|
#include "video_core/surface.h"
|
|
#include "video_core/texture_cache/descriptor_table.h"
|
|
#include "video_core/texture_cache/image_base.h"
|
|
#include "video_core/texture_cache/image_info.h"
|
|
#include "video_core/texture_cache/image_view_base.h"
|
|
#include "video_core/texture_cache/render_targets.h"
|
|
#include "video_core/texture_cache/slot_vector.h"
|
|
#include "video_core/texture_cache/types.h"
|
|
#include "video_core/textures/texture.h"
|
|
|
|
namespace Tegra::Control {
|
|
struct ChannelState;
|
|
}
|
|
|
|
namespace VideoCommon {
|
|
|
|
using Tegra::Texture::TICEntry;
|
|
using Tegra::Texture::TSCEntry;
|
|
using VideoCore::Surface::PixelFormat;
|
|
using namespace Common::Literals;
|
|
|
|
struct ImageViewInOut {
|
|
u32 index{};
|
|
bool blacklist{};
|
|
ImageViewId id{};
|
|
};
|
|
|
|
struct AsyncDecodeContext {
|
|
ImageId image_id;
|
|
Common::ScratchBuffer<u8> decoded_data;
|
|
boost::container::small_vector<BufferImageCopy, 16> copies;
|
|
std::mutex mutex;
|
|
std::atomic_bool complete;
|
|
};
|
|
|
|
using TextureCacheGPUMap = std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>>;
|
|
|
|
class TextureCacheChannelInfo : public ChannelInfo {
|
|
public:
|
|
TextureCacheChannelInfo() = delete;
|
|
TextureCacheChannelInfo(Tegra::Control::ChannelState& state) noexcept;
|
|
TextureCacheChannelInfo(const TextureCacheChannelInfo& state) = delete;
|
|
TextureCacheChannelInfo& operator=(const TextureCacheChannelInfo&) = delete;
|
|
|
|
DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
|
|
DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
|
|
std::vector<SamplerId> graphics_sampler_ids;
|
|
std::vector<ImageViewId> graphics_image_view_ids;
|
|
|
|
DescriptorTable<TICEntry> compute_image_table{gpu_memory};
|
|
DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
|
|
std::vector<SamplerId> compute_sampler_ids;
|
|
std::vector<ImageViewId> compute_image_view_ids;
|
|
|
|
std::unordered_map<TICEntry, ImageViewId> image_views;
|
|
std::unordered_map<TSCEntry, SamplerId> samplers;
|
|
|
|
TextureCacheGPUMap* gpu_page_table;
|
|
};
|
|
|
|
template <class P>
|
|
class TextureCache : public VideoCommon::ChannelSetupCaches<TextureCacheChannelInfo> {
|
|
/// Address shift for caching images into a hash table
|
|
static constexpr u64 YUZU_PAGEBITS = 20;
|
|
|
|
/// Enables debugging features to the texture cache
|
|
static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION;
|
|
/// Implement blits as copies between framebuffers
|
|
static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS;
|
|
/// True when some copies have to be emulated
|
|
static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES;
|
|
/// True when the API can provide info about the memory of the device.
|
|
static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
|
|
/// True when the API can do asynchronous texture downloads.
|
|
static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS;
|
|
|
|
static constexpr size_t UNSET_CHANNEL{std::numeric_limits<size_t>::max()};
|
|
|
|
static constexpr s64 TARGET_THRESHOLD = 4_GiB;
|
|
static constexpr s64 DEFAULT_EXPECTED_MEMORY = 1_GiB + 125_MiB;
|
|
static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB + 625_MiB;
|
|
static constexpr size_t GC_EMERGENCY_COUNTS = 2;
|
|
|
|
using Runtime = typename P::Runtime;
|
|
using Image = typename P::Image;
|
|
using ImageAlloc = typename P::ImageAlloc;
|
|
using ImageView = typename P::ImageView;
|
|
using Sampler = typename P::Sampler;
|
|
using Framebuffer = typename P::Framebuffer;
|
|
using AsyncBuffer = typename P::AsyncBuffer;
|
|
using BufferType = typename P::BufferType;
|
|
|
|
struct BlitImages {
|
|
ImageId dst_id;
|
|
ImageId src_id;
|
|
PixelFormat dst_format;
|
|
PixelFormat src_format;
|
|
};
|
|
|
|
public:
|
|
explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&);
|
|
|
|
/// Notify the cache that a new frame has been queued
|
|
void TickFrame();
|
|
|
|
/// Return a constant reference to the given image view id
|
|
[[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept;
|
|
|
|
/// Return a reference to the given image view id
|
|
[[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept;
|
|
|
|
/// Get the imageview from the graphics descriptor table in the specified index
|
|
[[nodiscard]] ImageView& GetImageView(u32 index) noexcept;
|
|
|
|
/// Mark an image as modified from the GPU
|
|
void MarkModification(ImageId id) noexcept;
|
|
|
|
/// Fill image_view_ids with the graphics images in indices
|
|
template <bool has_blacklists>
|
|
void FillGraphicsImageViews(std::span<ImageViewInOut> views);
|
|
|
|
/// Fill image_view_ids with the compute images in indices
|
|
void FillComputeImageViews(std::span<ImageViewInOut> views);
|
|
|
|
/// Handle feedback loops during draws.
|
|
void CheckFeedbackLoop(std::span<const ImageViewInOut> views);
|
|
|
|
/// Get the sampler from the graphics descriptor table in the specified index
|
|
Sampler* GetGraphicsSampler(u32 index);
|
|
|
|
/// Get the sampler from the compute descriptor table in the specified index
|
|
Sampler* GetComputeSampler(u32 index);
|
|
|
|
/// Get the sampler id from the graphics descriptor table in the specified index
|
|
SamplerId GetGraphicsSamplerId(u32 index);
|
|
|
|
/// Get the sampler id from the compute descriptor table in the specified index
|
|
SamplerId GetComputeSamplerId(u32 index);
|
|
|
|
/// Return a constant reference to the given sampler id
|
|
[[nodiscard]] const Sampler& GetSampler(SamplerId id) const noexcept;
|
|
|
|
/// Return a reference to the given sampler id
|
|
[[nodiscard]] Sampler& GetSampler(SamplerId id) noexcept;
|
|
|
|
/// Refresh the state for graphics image view and sampler descriptors
|
|
void SynchronizeGraphicsDescriptors();
|
|
|
|
/// Refresh the state for compute image view and sampler descriptors
|
|
void SynchronizeComputeDescriptors();
|
|
|
|
/// Updates the Render Targets if they can be rescaled
|
|
/// @retval True if the Render Targets have been rescaled.
|
|
bool RescaleRenderTargets();
|
|
|
|
/// Update bound render targets and upload memory if necessary
|
|
/// @param is_clear True when the render targets are being used for clears
|
|
void UpdateRenderTargets(bool is_clear);
|
|
|
|
/// Find a framebuffer with the currently bound render targets
|
|
/// UpdateRenderTargets should be called before this
|
|
Framebuffer* GetFramebuffer();
|
|
|
|
/// Mark images in a range as modified from the CPU
|
|
void WriteMemory(VAddr cpu_addr, size_t size);
|
|
|
|
/// Download contents of host images to guest memory in a region
|
|
void DownloadMemory(VAddr cpu_addr, size_t size);
|
|
|
|
std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(VAddr cpu_addr, u64 size);
|
|
|
|
/// Remove images in a region
|
|
void UnmapMemory(VAddr cpu_addr, size_t size);
|
|
|
|
/// Remove images in a region
|
|
void UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size);
|
|
|
|
/// Blit an image with the given parameters
|
|
bool BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
|
|
const Tegra::Engines::Fermi2D::Surface& src,
|
|
const Tegra::Engines::Fermi2D::Config& copy);
|
|
|
|
/// Try to find a cached image view in the given CPU address
|
|
[[nodiscard]] ImageView* TryFindFramebufferImageView(const Tegra::FramebufferConfig& config,
|
|
VAddr cpu_addr);
|
|
|
|
/// Return true when there are uncommitted images to be downloaded
|
|
[[nodiscard]] bool HasUncommittedFlushes() const noexcept;
|
|
|
|
/// Return true when the caller should wait for async downloads
|
|
[[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept;
|
|
|
|
/// Commit asynchronous downloads
|
|
void CommitAsyncFlushes();
|
|
|
|
/// Pop asynchronous downloads
|
|
void PopAsyncFlushes();
|
|
|
|
[[nodiscard]] ImageId DmaImageId(const Tegra::DMA::ImageOperand& operand, bool is_upload);
|
|
|
|
[[nodiscard]] std::pair<Image*, BufferImageCopy> DmaBufferImageCopy(
|
|
const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::BufferOperand& buffer_operand,
|
|
const Tegra::DMA::ImageOperand& image_operand, ImageId image_id, bool modifies_image);
|
|
|
|
void DownloadImageIntoBuffer(Image* image, BufferType buffer, size_t buffer_offset,
|
|
std::span<const VideoCommon::BufferImageCopy> copies,
|
|
GPUVAddr address = 0, size_t size = 0);
|
|
|
|
/// Return true when a CPU region is modified from the GPU
|
|
[[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
|
|
|
|
[[nodiscard]] bool IsRescaling() const noexcept;
|
|
|
|
[[nodiscard]] bool IsRescaling(const ImageViewBase& image_view) const noexcept;
|
|
|
|
/// Create channel state.
|
|
void CreateChannel(Tegra::Control::ChannelState& channel) final override;
|
|
|
|
/// Prepare an image to be used
|
|
void PrepareImage(ImageId image_id, bool is_modification, bool invalidate);
|
|
|
|
std::recursive_mutex mutex;
|
|
|
|
private:
|
|
/// Iterate over all page indices in a range
|
|
template <typename Func>
|
|
static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) {
|
|
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
|
|
const u64 page_end = (addr + size - 1) >> YUZU_PAGEBITS;
|
|
for (u64 page = addr >> YUZU_PAGEBITS; page <= page_end; ++page) {
|
|
if constexpr (RETURNS_BOOL) {
|
|
if (func(page)) {
|
|
break;
|
|
}
|
|
} else {
|
|
func(page);
|
|
}
|
|
}
|
|
}
|
|
|
|
template <typename Func>
|
|
static void ForEachGPUPage(GPUVAddr addr, size_t size, Func&& func) {
|
|
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
|
|
const u64 page_end = (addr + size - 1) >> YUZU_PAGEBITS;
|
|
for (u64 page = addr >> YUZU_PAGEBITS; page <= page_end; ++page) {
|
|
if constexpr (RETURNS_BOOL) {
|
|
if (func(page)) {
|
|
break;
|
|
}
|
|
} else {
|
|
func(page);
|
|
}
|
|
}
|
|
}
|
|
|
|
void OnGPUASRegister(size_t map_id) final override;
|
|
|
|
/// Runs the Garbage Collector.
|
|
void RunGarbageCollector();
|
|
|
|
/// Fills image_view_ids in the image views in indices
|
|
template <bool has_blacklists>
|
|
void FillImageViews(DescriptorTable<TICEntry>& table,
|
|
std::span<ImageViewId> cached_image_view_ids,
|
|
std::span<ImageViewInOut> views);
|
|
|
|
/// Find or create an image view in the guest descriptor table
|
|
ImageViewId VisitImageView(DescriptorTable<TICEntry>& table,
|
|
std::span<ImageViewId> cached_image_view_ids, u32 index);
|
|
|
|
/// Find or create a framebuffer with the given render target parameters
|
|
FramebufferId GetFramebufferId(const RenderTargets& key);
|
|
|
|
/// Refresh the contents (pixel data) of an image
|
|
void RefreshContents(Image& image, ImageId image_id);
|
|
|
|
/// Upload data from guest to an image
|
|
template <typename StagingBuffer>
|
|
void UploadImageContents(Image& image, StagingBuffer& staging_buffer);
|
|
|
|
/// Find or create an image view from a guest descriptor
|
|
[[nodiscard]] ImageViewId FindImageView(const TICEntry& config);
|
|
|
|
/// Create a new image view from a guest descriptor
|
|
[[nodiscard]] ImageViewId CreateImageView(const TICEntry& config);
|
|
|
|
/// Find or create an image from the given parameters
|
|
[[nodiscard]] ImageId FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
|
|
RelaxedOptions options = RelaxedOptions{});
|
|
|
|
/// Find an image from the given parameters
|
|
[[nodiscard]] ImageId FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
|
|
RelaxedOptions options);
|
|
|
|
/// Create an image from the given parameters
|
|
[[nodiscard]] ImageId InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
|
|
RelaxedOptions options);
|
|
|
|
/// Create a new image and join perfectly matching existing images
|
|
/// Remove joined images from the cache
|
|
[[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr);
|
|
|
|
[[nodiscard]] ImageId FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr);
|
|
|
|
/// Return a blit image pair from the given guest blit parameters
|
|
[[nodiscard]] std::optional<BlitImages> GetBlitImages(
|
|
const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src,
|
|
const Tegra::Engines::Fermi2D::Config& copy);
|
|
|
|
/// Find or create a sampler from a guest descriptor sampler
|
|
[[nodiscard]] SamplerId FindSampler(const TSCEntry& config);
|
|
|
|
/// Find or create an image view for the given color buffer index
|
|
[[nodiscard]] ImageViewId FindColorBuffer(size_t index);
|
|
|
|
/// Find or create an image view for the depth buffer
|
|
[[nodiscard]] ImageViewId FindDepthBuffer();
|
|
|
|
/// Find or create a view for a render target with the given image parameters
|
|
[[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr);
|
|
|
|
/// Iterates over all the images in a region calling func
|
|
template <typename Func>
|
|
void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func);
|
|
|
|
template <typename Func>
|
|
void ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, size_t size, Func&& func);
|
|
|
|
template <typename Func>
|
|
void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func);
|
|
|
|
/// Iterates over all the images in a region calling func
|
|
template <typename Func>
|
|
void ForEachSparseSegment(ImageBase& image, Func&& func);
|
|
|
|
/// Find or create an image view in the given image with the passed parameters
|
|
[[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info);
|
|
|
|
/// Register image in the page table
|
|
void RegisterImage(ImageId image);
|
|
|
|
/// Unregister image from the page table
|
|
void UnregisterImage(ImageId image);
|
|
|
|
/// Track CPU reads and writes for image
|
|
void TrackImage(ImageBase& image, ImageId image_id);
|
|
|
|
/// Stop tracking CPU reads and writes for image
|
|
void UntrackImage(ImageBase& image, ImageId image_id);
|
|
|
|
/// Delete image from the cache
|
|
void DeleteImage(ImageId image, bool immediate_delete = false);
|
|
|
|
/// Remove image views references from the cache
|
|
void RemoveImageViewReferences(std::span<const ImageViewId> removed_views);
|
|
|
|
/// Remove framebuffers using the given image views from the cache
|
|
void RemoveFramebuffers(std::span<const ImageViewId> removed_views);
|
|
|
|
/// Mark an image as modified from the GPU
|
|
void MarkModification(ImageBase& image) noexcept;
|
|
|
|
/// Synchronize image aliases, copying data if needed
|
|
void SynchronizeAliases(ImageId image_id);
|
|
|
|
/// Prepare an image view to be used
|
|
void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate);
|
|
|
|
/// Execute copies from one image to the other, even if they are incompatible
|
|
void CopyImage(ImageId dst_id, ImageId src_id, std::vector<ImageCopy> copies);
|
|
|
|
/// Bind an image view as render target, downloading resources preemtively if needed
|
|
void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id);
|
|
|
|
/// Create a render target from a given image and image view parameters
|
|
[[nodiscard]] std::pair<FramebufferId, ImageViewId> RenderTargetFromImage(
|
|
ImageId, const ImageViewInfo& view_info);
|
|
|
|
/// Returns true if the current clear parameters clear the whole image of a given image view
|
|
[[nodiscard]] bool IsFullClear(ImageViewId id);
|
|
|
|
[[nodiscard]] std::pair<u32, u32> PrepareDmaImage(ImageId dst_id, GPUVAddr base_addr,
|
|
bool mark_as_modified);
|
|
|
|
bool ImageCanRescale(ImageBase& image);
|
|
void InvalidateScale(Image& image);
|
|
bool ScaleUp(Image& image);
|
|
bool ScaleDown(Image& image);
|
|
u64 GetScaledImageSizeBytes(const ImageBase& image);
|
|
|
|
void QueueAsyncDecode(Image& image, ImageId image_id);
|
|
void TickAsyncDecode();
|
|
|
|
Runtime& runtime;
|
|
|
|
VideoCore::RasterizerInterface& rasterizer;
|
|
std::deque<TextureCacheGPUMap> gpu_page_table_storage;
|
|
|
|
RenderTargets render_targets;
|
|
|
|
std::unordered_map<RenderTargets, FramebufferId> framebuffers;
|
|
|
|
std::unordered_map<u64, std::vector<ImageMapId>, Common::IdentityHash<u64>> page_table;
|
|
std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table;
|
|
std::unordered_map<ImageId, boost::container::small_vector<ImageViewId, 16>> sparse_views;
|
|
|
|
VAddr virtual_invalid_space{};
|
|
|
|
bool has_deleted_images = false;
|
|
bool is_rescaling = false;
|
|
u64 total_used_memory = 0;
|
|
u64 minimum_memory;
|
|
u64 expected_memory;
|
|
u64 critical_memory;
|
|
|
|
struct BufferDownload {
|
|
GPUVAddr address;
|
|
size_t size;
|
|
};
|
|
|
|
struct PendingDownload {
|
|
bool is_swizzle;
|
|
size_t async_buffer_id;
|
|
SlotId object_id;
|
|
};
|
|
|
|
SlotVector<Image> slot_images;
|
|
SlotVector<ImageMapView> slot_map_views;
|
|
SlotVector<ImageView> slot_image_views;
|
|
SlotVector<ImageAlloc> slot_image_allocs;
|
|
SlotVector<Sampler> slot_samplers;
|
|
SlotVector<Framebuffer> slot_framebuffers;
|
|
SlotVector<BufferDownload> slot_buffer_downloads;
|
|
|
|
// TODO: This data structure is not optimal and it should be reworked
|
|
|
|
std::vector<PendingDownload> uncommitted_downloads;
|
|
std::deque<std::vector<PendingDownload>> committed_downloads;
|
|
std::vector<AsyncBuffer> uncommitted_async_buffers;
|
|
std::deque<std::vector<AsyncBuffer>> async_buffers;
|
|
std::deque<AsyncBuffer> async_buffers_death_ring;
|
|
|
|
struct LRUItemParams {
|
|
using ObjectType = ImageId;
|
|
using TickType = u64;
|
|
};
|
|
Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache;
|
|
|
|
static constexpr size_t TICKS_TO_DESTROY = 8;
|
|
DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images;
|
|
DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view;
|
|
DelayedDestructionRing<Framebuffer, TICKS_TO_DESTROY> sentenced_framebuffers;
|
|
|
|
std::unordered_map<GPUVAddr, ImageAllocId> image_allocs_table;
|
|
|
|
Common::ScratchBuffer<u8> swizzle_data_buffer;
|
|
Common::ScratchBuffer<u8> unswizzle_data_buffer;
|
|
|
|
u64 modification_tick = 0;
|
|
u64 frame_tick = 0;
|
|
|
|
Common::ThreadWorker texture_decode_worker{1, "TextureDecoder"};
|
|
std::vector<std::unique_ptr<AsyncDecodeContext>> async_decodes;
|
|
|
|
// Join caching
|
|
boost::container::small_vector<ImageId, 4> join_overlap_ids;
|
|
std::unordered_set<ImageId> join_overlaps_found;
|
|
boost::container::small_vector<ImageId, 4> join_left_aliased_ids;
|
|
boost::container::small_vector<ImageId, 4> join_right_aliased_ids;
|
|
std::unordered_set<ImageId> join_ignore_textures;
|
|
boost::container::small_vector<ImageId, 4> join_bad_overlap_ids;
|
|
struct JoinCopy {
|
|
bool is_alias;
|
|
ImageId id;
|
|
};
|
|
boost::container::small_vector<JoinCopy, 4> join_copies_to_do;
|
|
std::unordered_map<ImageId, size_t> join_alias_indices;
|
|
};
|
|
|
|
} // namespace VideoCommon
|