2022-11-20 00:09:56 +01:00
|
|
|
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
|
|
|
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
2019-07-19 16:50:40 +02:00
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
2021-01-17 00:48:58 +01:00
|
|
|
#include <algorithm>
|
2019-07-19 16:50:40 +02:00
|
|
|
#include <memory>
|
2021-03-02 06:44:19 +01:00
|
|
|
#include <numeric>
|
2019-07-19 16:50:40 +02:00
|
|
|
|
2022-11-20 00:09:56 +01:00
|
|
|
#include "video_core/buffer_cache/buffer_cache_base.h"
|
2021-01-20 01:59:53 +01:00
|
|
|
|
2022-11-20 00:09:56 +01:00
|
|
|
namespace VideoCommon {
|
2021-01-17 00:48:58 +01:00
|
|
|
|
2023-04-22 13:36:18 +02:00
|
|
|
using Core::Memory::YUZU_PAGESIZE;
|
|
|
|
|
2021-01-17 00:48:58 +01:00
|
|
|
template <class P>
|
|
|
|
BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
|
2021-11-05 15:52:31 +01:00
|
|
|
Core::Memory::Memory& cpu_memory_, Runtime& runtime_)
|
2022-11-20 00:09:56 +01:00
|
|
|
: runtime{runtime_}, rasterizer{rasterizer_}, cpu_memory{cpu_memory_}, memory_tracker{
|
|
|
|
rasterizer} {
|
2021-01-17 00:48:58 +01:00
|
|
|
// Ensure the first slot is used for the null buffer
|
|
|
|
void(slot_buffers.insert(runtime, NullBufferParams{}));
|
2021-07-08 02:52:07 +02:00
|
|
|
common_ranges.clear();
|
2023-04-28 23:54:54 +02:00
|
|
|
inline_buffer_id = NULL_BUFFER_ID;
|
2022-01-16 05:05:34 +01:00
|
|
|
|
|
|
|
if (!runtime.CanReportMemoryUsage()) {
|
|
|
|
minimum_memory = DEFAULT_EXPECTED_MEMORY;
|
|
|
|
critical_memory = DEFAULT_CRITICAL_MEMORY;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory());
|
|
|
|
const s64 min_spacing_expected = device_memory - 1_GiB - 512_MiB;
|
|
|
|
const s64 min_spacing_critical = device_memory - 1_GiB;
|
2022-01-16 06:34:43 +01:00
|
|
|
const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD);
|
|
|
|
const s64 min_vacancy_expected = (6 * mem_threshold) / 10;
|
|
|
|
const s64 min_vacancy_critical = (3 * mem_threshold) / 10;
|
2022-01-16 05:05:34 +01:00
|
|
|
minimum_memory = static_cast<u64>(
|
|
|
|
std::max(std::min(device_memory - min_vacancy_expected, min_spacing_expected),
|
|
|
|
DEFAULT_EXPECTED_MEMORY));
|
|
|
|
critical_memory = static_cast<u64>(
|
|
|
|
std::max(std::min(device_memory - min_vacancy_critical, min_spacing_critical),
|
|
|
|
DEFAULT_CRITICAL_MEMORY));
|
2021-01-17 00:48:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
2021-06-17 00:29:48 +02:00
|
|
|
void BufferCache<P>::RunGarbageCollector() {
|
2022-01-16 05:05:34 +01:00
|
|
|
const bool aggressive_gc = total_used_memory >= critical_memory;
|
2021-06-14 13:42:22 +02:00
|
|
|
const u64 ticks_to_destroy = aggressive_gc ? 60 : 120;
|
|
|
|
int num_iterations = aggressive_gc ? 64 : 32;
|
2021-08-15 15:35:53 +02:00
|
|
|
const auto clean_up = [this, &num_iterations](BufferId buffer_id) {
|
|
|
|
if (num_iterations == 0) {
|
|
|
|
return true;
|
2021-01-20 01:59:53 +01:00
|
|
|
}
|
2021-08-15 15:35:53 +02:00
|
|
|
--num_iterations;
|
|
|
|
auto& buffer = slot_buffers[buffer_id];
|
|
|
|
DownloadBufferMemory(buffer);
|
|
|
|
DeleteBuffer(buffer_id);
|
|
|
|
return false;
|
|
|
|
};
|
|
|
|
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up);
|
2021-01-17 00:48:58 +01:00
|
|
|
}
|
|
|
|
|
2021-06-17 00:29:48 +02:00
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::TickFrame() {
|
|
|
|
// Calculate hits and shots and move hit bits to the right
|
|
|
|
const u32 hits = std::reduce(uniform_cache_hits.begin(), uniform_cache_hits.end());
|
|
|
|
const u32 shots = std::reduce(uniform_cache_shots.begin(), uniform_cache_shots.end());
|
|
|
|
std::copy_n(uniform_cache_hits.begin(), uniform_cache_hits.size() - 1,
|
|
|
|
uniform_cache_hits.begin() + 1);
|
|
|
|
std::copy_n(uniform_cache_shots.begin(), uniform_cache_shots.size() - 1,
|
|
|
|
uniform_cache_shots.begin() + 1);
|
|
|
|
uniform_cache_hits[0] = 0;
|
|
|
|
uniform_cache_shots[0] = 0;
|
|
|
|
|
|
|
|
const bool skip_preferred = hits * 256 < shots * 251;
|
|
|
|
uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0;
|
|
|
|
|
2022-01-16 05:05:34 +01:00
|
|
|
// If we can obtain the memory info, use it instead of the estimate.
|
|
|
|
if (runtime.CanReportMemoryUsage()) {
|
|
|
|
total_used_memory = runtime.GetDeviceMemoryUsage();
|
|
|
|
}
|
|
|
|
if (total_used_memory >= minimum_memory) {
|
2021-06-17 00:29:48 +02:00
|
|
|
RunGarbageCollector();
|
|
|
|
}
|
|
|
|
++frame_tick;
|
|
|
|
delayed_destruction_ring.Tick();
|
2023-04-23 21:03:50 +02:00
|
|
|
|
|
|
|
if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
|
|
|
|
for (auto& buffer : async_buffers_death_ring) {
|
|
|
|
runtime.FreeDeferredStagingBuffer(buffer);
|
|
|
|
}
|
|
|
|
async_buffers_death_ring.clear();
|
|
|
|
}
|
2021-06-17 00:29:48 +02:00
|
|
|
}
|
|
|
|
|
2021-01-17 00:48:58 +01:00
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) {
|
2022-11-20 00:09:56 +01:00
|
|
|
memory_tracker.MarkRegionAsCpuModified(cpu_addr, size);
|
2023-04-22 13:36:18 +02:00
|
|
|
if (memory_tracker.IsRegionGpuModified(cpu_addr, size)) {
|
|
|
|
const IntervalType subtract_interval{cpu_addr, cpu_addr + size};
|
|
|
|
ClearDownload(subtract_interval);
|
|
|
|
common_ranges.subtract(subtract_interval);
|
|
|
|
}
|
2021-01-17 00:48:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) {
|
2022-11-20 00:09:56 +01:00
|
|
|
memory_tracker.CachedCpuWrite(cpu_addr, size);
|
2023-04-30 17:14:06 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
|
|
|
std::optional<VideoCore::RasterizerDownloadArea> BufferCache<P>::GetFlushArea(VAddr cpu_addr,
|
|
|
|
u64 size) {
|
|
|
|
std::optional<VideoCore::RasterizerDownloadArea> area{};
|
|
|
|
area.emplace();
|
|
|
|
VAddr cpu_addr_start_aligned = Common::AlignDown(cpu_addr, Core::Memory::YUZU_PAGESIZE);
|
|
|
|
VAddr cpu_addr_end_aligned = Common::AlignUp(cpu_addr + size, Core::Memory::YUZU_PAGESIZE);
|
|
|
|
area->start_address = cpu_addr_start_aligned;
|
|
|
|
area->end_address = cpu_addr_end_aligned;
|
|
|
|
if (memory_tracker.IsRegionPreflushable(cpu_addr, size)) {
|
|
|
|
area->preemtive = true;
|
|
|
|
return area;
|
|
|
|
};
|
2023-05-04 02:34:49 +02:00
|
|
|
memory_tracker.MarkRegionAsPreflushable(cpu_addr_start_aligned,
|
|
|
|
cpu_addr_end_aligned - cpu_addr_start_aligned);
|
2023-04-30 17:14:06 +02:00
|
|
|
area->preemtive = !IsRegionGpuModified(cpu_addr, size);
|
|
|
|
return area;
|
2021-01-17 00:48:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) {
|
2023-04-22 13:36:18 +02:00
|
|
|
WaitOnAsyncFlushes(cpu_addr, size);
|
2021-06-25 07:43:01 +02:00
|
|
|
ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) {
|
|
|
|
DownloadBufferMemory(buffer, cpu_addr, size);
|
|
|
|
});
|
2021-01-17 00:48:58 +01:00
|
|
|
}
|
|
|
|
|
2023-04-22 13:36:18 +02:00
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::WaitOnAsyncFlushes(VAddr cpu_addr, u64 size) {
|
|
|
|
bool must_wait = false;
|
|
|
|
ForEachInOverlapCounter(async_downloads, cpu_addr, size,
|
|
|
|
[&](VAddr, VAddr, int) { must_wait = true; });
|
|
|
|
bool must_release = false;
|
|
|
|
ForEachInRangeSet(pending_ranges, cpu_addr, size, [&](VAddr, VAddr) { must_release = true; });
|
|
|
|
if (must_release) {
|
|
|
|
std::function<void()> tmp([]() {});
|
|
|
|
rasterizer.SignalFence(std::move(tmp));
|
|
|
|
}
|
|
|
|
if (must_wait || must_release) {
|
|
|
|
rasterizer.ReleaseFences();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-07-13 16:16:14 +02:00
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::ClearDownload(IntervalType subtract_interval) {
|
2023-04-22 20:10:40 +02:00
|
|
|
RemoveEachInOverlapCounter(async_downloads, subtract_interval, -1024);
|
2021-07-13 16:16:14 +02:00
|
|
|
uncommitted_ranges.subtract(subtract_interval);
|
2023-04-22 13:36:18 +02:00
|
|
|
pending_ranges.subtract(subtract_interval);
|
2021-07-13 16:16:14 +02:00
|
|
|
for (auto& interval_set : committed_ranges) {
|
|
|
|
interval_set.subtract(subtract_interval);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-07-10 18:19:10 +02:00
|
|
|
template <class P>
|
|
|
|
bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) {
|
2021-11-05 15:52:31 +01:00
|
|
|
const std::optional<VAddr> cpu_src_address = gpu_memory->GpuToCpuAddress(src_address);
|
|
|
|
const std::optional<VAddr> cpu_dest_address = gpu_memory->GpuToCpuAddress(dest_address);
|
2021-07-10 18:19:10 +02:00
|
|
|
if (!cpu_src_address || !cpu_dest_address) {
|
|
|
|
return false;
|
|
|
|
}
|
2021-07-13 03:33:08 +02:00
|
|
|
const bool source_dirty = IsRegionRegistered(*cpu_src_address, amount);
|
|
|
|
const bool dest_dirty = IsRegionRegistered(*cpu_dest_address, amount);
|
2021-07-12 04:10:42 +02:00
|
|
|
if (!source_dirty && !dest_dirty) {
|
2021-07-10 18:19:10 +02:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount};
|
2023-04-22 13:36:18 +02:00
|
|
|
WaitOnAsyncFlushes(*cpu_src_address, static_cast<u32>(amount));
|
2021-07-13 16:16:14 +02:00
|
|
|
ClearDownload(subtract_interval);
|
2021-07-10 18:19:10 +02:00
|
|
|
|
|
|
|
BufferId buffer_a;
|
|
|
|
BufferId buffer_b;
|
|
|
|
do {
|
|
|
|
has_deleted_buffers = false;
|
|
|
|
buffer_a = FindBuffer(*cpu_src_address, static_cast<u32>(amount));
|
|
|
|
buffer_b = FindBuffer(*cpu_dest_address, static_cast<u32>(amount));
|
|
|
|
} while (has_deleted_buffers);
|
|
|
|
auto& src_buffer = slot_buffers[buffer_a];
|
|
|
|
auto& dest_buffer = slot_buffers[buffer_b];
|
2021-07-12 04:10:42 +02:00
|
|
|
SynchronizeBuffer(src_buffer, *cpu_src_address, static_cast<u32>(amount));
|
2021-07-14 18:25:33 +02:00
|
|
|
SynchronizeBuffer(dest_buffer, *cpu_dest_address, static_cast<u32>(amount));
|
2021-07-10 18:19:10 +02:00
|
|
|
std::array copies{BufferCopy{
|
|
|
|
.src_offset = src_buffer.Offset(*cpu_src_address),
|
|
|
|
.dst_offset = dest_buffer.Offset(*cpu_dest_address),
|
|
|
|
.size = amount,
|
|
|
|
}};
|
|
|
|
|
2021-07-12 04:10:42 +02:00
|
|
|
boost::container::small_vector<IntervalType, 4> tmp_intervals;
|
|
|
|
auto mirror = [&](VAddr base_address, VAddr base_address_end) {
|
|
|
|
const u64 size = base_address_end - base_address;
|
|
|
|
const VAddr diff = base_address - *cpu_src_address;
|
|
|
|
const VAddr new_base_address = *cpu_dest_address + diff;
|
2021-07-10 18:19:10 +02:00
|
|
|
const IntervalType add_interval{new_base_address, new_base_address + size};
|
2021-07-12 04:10:42 +02:00
|
|
|
tmp_intervals.push_back(add_interval);
|
2023-05-04 02:34:49 +02:00
|
|
|
if (!Settings::values.use_reactive_flushing.GetValue() ||
|
|
|
|
memory_tracker.IsRegionPreflushable(new_base_address, new_base_address + size)) {
|
2023-04-30 17:14:06 +02:00
|
|
|
uncommitted_ranges.add(add_interval);
|
|
|
|
pending_ranges.add(add_interval);
|
|
|
|
}
|
2021-07-10 18:19:10 +02:00
|
|
|
};
|
2022-11-20 00:09:56 +01:00
|
|
|
ForEachInRangeSet(common_ranges, *cpu_src_address, amount, mirror);
|
2021-07-12 04:10:42 +02:00
|
|
|
// This subtraction in this order is important for overlapping copies.
|
|
|
|
common_ranges.subtract(subtract_interval);
|
2021-09-20 02:35:07 +02:00
|
|
|
const bool has_new_downloads = tmp_intervals.size() != 0;
|
|
|
|
for (const IntervalType& add_interval : tmp_intervals) {
|
2021-07-12 04:10:42 +02:00
|
|
|
common_ranges.add(add_interval);
|
2021-07-10 18:19:10 +02:00
|
|
|
}
|
|
|
|
runtime.CopyBuffer(dest_buffer, src_buffer, copies);
|
2021-09-20 02:35:07 +02:00
|
|
|
if (has_new_downloads) {
|
2022-11-20 00:09:56 +01:00
|
|
|
memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount);
|
2021-07-10 18:19:10 +02:00
|
|
|
}
|
2023-04-30 17:14:06 +02:00
|
|
|
tmp_buffer.resize(amount);
|
2021-07-10 18:19:10 +02:00
|
|
|
cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount);
|
|
|
|
cpu_memory.WriteBlockUnsafe(*cpu_dest_address, tmp_buffer.data(), amount);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2021-07-13 03:33:08 +02:00
|
|
|
template <class P>
|
|
|
|
bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
|
2021-11-05 15:52:31 +01:00
|
|
|
const std::optional<VAddr> cpu_dst_address = gpu_memory->GpuToCpuAddress(dst_address);
|
2021-07-13 03:33:08 +02:00
|
|
|
if (!cpu_dst_address) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
const bool dest_dirty = IsRegionRegistered(*cpu_dst_address, amount);
|
|
|
|
if (!dest_dirty) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2021-07-14 19:04:45 +02:00
|
|
|
const size_t size = amount * sizeof(u32);
|
|
|
|
const IntervalType subtract_interval{*cpu_dst_address, *cpu_dst_address + size};
|
2021-07-13 16:16:14 +02:00
|
|
|
ClearDownload(subtract_interval);
|
2021-07-13 03:33:08 +02:00
|
|
|
common_ranges.subtract(subtract_interval);
|
|
|
|
|
2021-07-20 23:50:48 +02:00
|
|
|
const BufferId buffer = FindBuffer(*cpu_dst_address, static_cast<u32>(size));
|
2021-07-13 03:33:08 +02:00
|
|
|
auto& dest_buffer = slot_buffers[buffer];
|
2021-07-20 23:50:48 +02:00
|
|
|
const u32 offset = dest_buffer.Offset(*cpu_dst_address);
|
2021-07-13 03:33:08 +02:00
|
|
|
runtime.ClearBuffer(dest_buffer, offset, size, value);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2022-03-27 05:05:57 +02:00
|
|
|
template <class P>
|
|
|
|
std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_addr, u32 size,
|
|
|
|
ObtainBufferSynchronize sync_info,
|
|
|
|
ObtainBufferOperation post_op) {
|
|
|
|
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
|
|
|
|
if (!cpu_addr) {
|
|
|
|
return {&slot_buffers[NULL_BUFFER_ID], 0};
|
|
|
|
}
|
|
|
|
const BufferId buffer_id = FindBuffer(*cpu_addr, size);
|
|
|
|
Buffer& buffer = slot_buffers[buffer_id];
|
|
|
|
|
|
|
|
// synchronize op
|
|
|
|
switch (sync_info) {
|
|
|
|
case ObtainBufferSynchronize::FullSynchronize:
|
|
|
|
SynchronizeBuffer(buffer, *cpu_addr, size);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (post_op) {
|
|
|
|
case ObtainBufferOperation::MarkAsWritten:
|
|
|
|
MarkWrittenBuffer(buffer_id, *cpu_addr, size);
|
|
|
|
break;
|
|
|
|
case ObtainBufferOperation::DiscardWrite: {
|
|
|
|
IntervalType interval{*cpu_addr, size};
|
|
|
|
ClearDownload(interval);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return {&buffer, buffer.Offset(*cpu_addr)};
|
|
|
|
}
|
|
|
|
|
2021-01-17 00:48:58 +01:00
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
|
|
|
|
u32 size) {
|
2021-11-05 15:52:31 +01:00
|
|
|
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
|
2021-01-17 00:48:58 +01:00
|
|
|
const Binding binding{
|
|
|
|
.cpu_addr = *cpu_addr,
|
|
|
|
.size = size,
|
|
|
|
.buffer_id = BufferId{},
|
|
|
|
};
|
|
|
|
uniform_buffers[stage][index] = binding;
|
|
|
|
}
|
|
|
|
|
2021-06-01 19:26:43 +02:00
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::DisableGraphicsUniformBuffer(size_t stage, u32 index) {
|
|
|
|
uniform_buffers[stage][index] = NULL_BINDING;
|
|
|
|
}
|
|
|
|
|
2021-01-17 00:48:58 +01:00
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::UpdateGraphicsBuffers(bool is_indexed) {
|
|
|
|
MICROPROFILE_SCOPE(GPU_PrepareBuffers);
|
|
|
|
do {
|
|
|
|
has_deleted_buffers = false;
|
|
|
|
DoUpdateGraphicsBuffers(is_indexed);
|
|
|
|
} while (has_deleted_buffers);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::UpdateComputeBuffers() {
|
|
|
|
MICROPROFILE_SCOPE(GPU_PrepareBuffers);
|
|
|
|
do {
|
|
|
|
has_deleted_buffers = false;
|
|
|
|
DoUpdateComputeBuffers();
|
|
|
|
} while (has_deleted_buffers);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) {
|
|
|
|
MICROPROFILE_SCOPE(GPU_BindUploadBuffers);
|
|
|
|
if (is_indexed) {
|
|
|
|
BindHostIndexBuffer();
|
|
|
|
} else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
|
2022-12-06 06:45:26 +01:00
|
|
|
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
|
2022-12-12 15:17:33 +01:00
|
|
|
if (draw_state.topology == Maxwell::PrimitiveTopology::Quads ||
|
|
|
|
draw_state.topology == Maxwell::PrimitiveTopology::QuadStrip) {
|
|
|
|
runtime.BindQuadIndexBuffer(draw_state.topology, draw_state.vertex_buffer.first,
|
|
|
|
draw_state.vertex_buffer.count);
|
2019-07-19 19:22:27 +02:00
|
|
|
}
|
2021-01-17 00:48:58 +01:00
|
|
|
}
|
|
|
|
BindHostVertexBuffers();
|
|
|
|
BindHostTransformFeedbackBuffers();
|
2022-02-09 15:39:40 +01:00
|
|
|
if (current_draw_indirect) {
|
|
|
|
BindHostDrawIndirectBuffers();
|
|
|
|
}
|
2021-01-17 00:48:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::BindHostStageBuffers(size_t stage) {
|
|
|
|
MICROPROFILE_SCOPE(GPU_BindUploadBuffers);
|
|
|
|
BindHostGraphicsUniformBuffers(stage);
|
|
|
|
BindHostGraphicsStorageBuffers(stage);
|
2021-04-07 01:14:55 +02:00
|
|
|
BindHostGraphicsTextureBuffers(stage);
|
2021-01-17 00:48:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::BindHostComputeBuffers() {
|
|
|
|
MICROPROFILE_SCOPE(GPU_BindUploadBuffers);
|
|
|
|
BindHostComputeUniformBuffers();
|
|
|
|
BindHostComputeStorageBuffers();
|
2021-04-07 01:14:55 +02:00
|
|
|
BindHostComputeTextureBuffers();
|
2021-01-17 00:48:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
2021-06-02 07:15:07 +02:00
|
|
|
void BufferCache<P>::SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask,
|
|
|
|
const UniformBufferSizes* sizes) {
|
2021-01-17 00:48:58 +01:00
|
|
|
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
|
2021-06-02 07:15:07 +02:00
|
|
|
if (enabled_uniform_buffer_masks != mask) {
|
2021-06-10 07:24:12 +02:00
|
|
|
if constexpr (IS_OPENGL) {
|
|
|
|
fast_bound_uniform_buffers.fill(0);
|
|
|
|
}
|
2021-05-30 07:57:42 +02:00
|
|
|
dirty_uniform_buffers.fill(~u32{0});
|
2021-06-18 08:22:00 +02:00
|
|
|
uniform_buffer_binding_sizes.fill({});
|
2019-07-19 19:22:27 +02:00
|
|
|
}
|
2021-01-17 00:48:58 +01:00
|
|
|
}
|
2021-06-02 07:15:07 +02:00
|
|
|
enabled_uniform_buffer_masks = mask;
|
|
|
|
uniform_buffer_sizes = sizes;
|
2021-01-17 00:48:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
2021-06-02 07:15:07 +02:00
|
|
|
void BufferCache<P>::SetComputeUniformBufferState(u32 mask,
|
|
|
|
const ComputeUniformBufferSizes* sizes) {
|
|
|
|
enabled_compute_uniform_buffer_mask = mask;
|
|
|
|
compute_uniform_buffer_sizes = sizes;
|
2021-01-17 00:48:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::UnbindGraphicsStorageBuffers(size_t stage) {
|
|
|
|
enabled_storage_buffers[stage] = 0;
|
|
|
|
written_storage_buffers[stage] = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index,
|
|
|
|
u32 cbuf_offset, bool is_written) {
|
|
|
|
enabled_storage_buffers[stage] |= 1U << ssbo_index;
|
|
|
|
written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index;
|
|
|
|
|
2021-11-05 15:52:31 +01:00
|
|
|
const auto& cbufs = maxwell3d->state.shader_stages[stage];
|
2021-01-17 00:48:58 +01:00
|
|
|
const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset;
|
2023-02-25 22:24:21 +01:00
|
|
|
storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr, cbuf_index, is_written);
|
2021-01-17 00:48:58 +01:00
|
|
|
}
|
|
|
|
|
2021-04-07 01:14:55 +02:00
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::UnbindGraphicsTextureBuffers(size_t stage) {
|
|
|
|
enabled_texture_buffers[stage] = 0;
|
2021-04-15 02:36:36 +02:00
|
|
|
written_texture_buffers[stage] = 0;
|
2021-05-23 09:28:34 +02:00
|
|
|
image_texture_buffers[stage] = 0;
|
2021-04-07 01:14:55 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr,
|
2021-05-23 09:28:34 +02:00
|
|
|
u32 size, PixelFormat format, bool is_written,
|
|
|
|
bool is_image) {
|
2021-04-07 01:14:55 +02:00
|
|
|
enabled_texture_buffers[stage] |= 1U << tbo_index;
|
2021-04-15 02:36:36 +02:00
|
|
|
written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index;
|
2021-05-23 09:28:34 +02:00
|
|
|
if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
|
|
|
|
image_texture_buffers[stage] |= (is_image ? 1U : 0U) << tbo_index;
|
|
|
|
}
|
2021-04-07 01:14:55 +02:00
|
|
|
texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format);
|
|
|
|
}
|
|
|
|
|
2021-01-17 00:48:58 +01:00
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::UnbindComputeStorageBuffers() {
|
|
|
|
enabled_compute_storage_buffers = 0;
|
|
|
|
written_compute_storage_buffers = 0;
|
2021-05-23 09:28:34 +02:00
|
|
|
image_compute_texture_buffers = 0;
|
2021-01-17 00:48:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
|
|
|
|
bool is_written) {
|
|
|
|
enabled_compute_storage_buffers |= 1U << ssbo_index;
|
|
|
|
written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index;
|
|
|
|
|
2021-11-05 15:52:31 +01:00
|
|
|
const auto& launch_desc = kepler_compute->launch_description;
|
2021-01-17 00:48:58 +01:00
|
|
|
ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0);
|
|
|
|
|
|
|
|
const auto& cbufs = launch_desc.const_buffer_config;
|
|
|
|
const GPUVAddr ssbo_addr = cbufs[cbuf_index].Address() + cbuf_offset;
|
2023-02-25 22:24:21 +01:00
|
|
|
compute_storage_buffers[ssbo_index] = StorageBufferBinding(ssbo_addr, cbuf_index, is_written);
|
2021-01-17 00:48:58 +01:00
|
|
|
}
|
|
|
|
|
2021-04-07 01:14:55 +02:00
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::UnbindComputeTextureBuffers() {
|
|
|
|
enabled_compute_texture_buffers = 0;
|
2021-04-15 02:36:36 +02:00
|
|
|
written_compute_texture_buffers = 0;
|
2021-05-23 09:28:34 +02:00
|
|
|
image_compute_texture_buffers = 0;
|
2021-04-07 01:14:55 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size,
|
2021-05-23 09:28:34 +02:00
|
|
|
PixelFormat format, bool is_written, bool is_image) {
|
2021-04-07 01:14:55 +02:00
|
|
|
enabled_compute_texture_buffers |= 1U << tbo_index;
|
2021-04-15 02:36:36 +02:00
|
|
|
written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index;
|
2021-05-23 09:28:34 +02:00
|
|
|
if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
|
|
|
|
image_compute_texture_buffers |= (is_image ? 1U : 0U) << tbo_index;
|
|
|
|
}
|
2021-04-07 01:14:55 +02:00
|
|
|
compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format);
|
|
|
|
}
|
|
|
|
|
2021-01-17 00:48:58 +01:00
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::FlushCachedWrites() {
|
2022-11-20 00:09:56 +01:00
|
|
|
memory_tracker.FlushCachedWrites();
|
2021-01-17 00:48:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
|
|
|
bool BufferCache<P>::HasUncommittedFlushes() const noexcept {
|
2023-04-22 13:36:18 +02:00
|
|
|
return !uncommitted_ranges.empty() || !committed_ranges.empty();
|
2021-07-07 16:42:26 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::AccumulateFlushes() {
|
|
|
|
if (uncommitted_ranges.empty()) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
committed_ranges.emplace_back(std::move(uncommitted_ranges));
|
2021-01-17 00:48:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
|
|
|
bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept {
|
2023-04-22 13:36:18 +02:00
|
|
|
if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
|
|
|
|
return (!async_buffers.empty() && async_buffers.front().has_value());
|
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
2021-01-17 00:48:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
2021-07-04 18:08:49 +02:00
|
|
|
void BufferCache<P>::CommitAsyncFlushesHigh() {
|
2021-07-07 16:42:26 +02:00
|
|
|
AccumulateFlushes();
|
2022-02-09 15:39:40 +01:00
|
|
|
|
2021-07-07 16:42:26 +02:00
|
|
|
if (committed_ranges.empty()) {
|
2023-04-23 21:03:50 +02:00
|
|
|
if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
|
2023-05-04 13:23:36 +02:00
|
|
|
|
|
|
|
async_buffers.emplace_back(std::optional<Async_Buffer>{});
|
2023-04-22 13:36:18 +02:00
|
|
|
}
|
2021-07-04 18:28:20 +02:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
MICROPROFILE_SCOPE(GPU_DownloadMemory);
|
|
|
|
|
2023-04-22 13:36:18 +02:00
|
|
|
pending_ranges.clear();
|
2022-02-06 01:16:11 +01:00
|
|
|
auto it = committed_ranges.begin();
|
|
|
|
while (it != committed_ranges.end()) {
|
|
|
|
auto& current_intervals = *it;
|
|
|
|
auto next_it = std::next(it);
|
|
|
|
while (next_it != committed_ranges.end()) {
|
|
|
|
for (auto& interval : *next_it) {
|
|
|
|
current_intervals.subtract(interval);
|
|
|
|
}
|
|
|
|
next_it++;
|
|
|
|
}
|
|
|
|
it++;
|
|
|
|
}
|
|
|
|
|
2021-07-04 18:28:20 +02:00
|
|
|
boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads;
|
|
|
|
u64 total_size_bytes = 0;
|
|
|
|
u64 largest_copy = 0;
|
2021-07-07 16:42:26 +02:00
|
|
|
for (const IntervalSet& intervals : committed_ranges) {
|
|
|
|
for (auto& interval : intervals) {
|
|
|
|
const std::size_t size = interval.upper() - interval.lower();
|
|
|
|
const VAddr cpu_addr = interval.lower();
|
|
|
|
ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
|
2022-11-20 00:09:56 +01:00
|
|
|
const VAddr buffer_start = buffer.CpuAddr();
|
|
|
|
const VAddr buffer_end = buffer_start + buffer.SizeBytes();
|
|
|
|
const VAddr new_start = std::max(buffer_start, cpu_addr);
|
|
|
|
const VAddr new_end = std::min(buffer_end, cpu_addr + size);
|
|
|
|
memory_tracker.ForEachDownloadRange(
|
|
|
|
new_start, new_end - new_start, false, [&](u64 cpu_addr_out, u64 range_size) {
|
2021-07-08 02:52:07 +02:00
|
|
|
const VAddr buffer_addr = buffer.CpuAddr();
|
|
|
|
const auto add_download = [&](VAddr start, VAddr end) {
|
|
|
|
const u64 new_offset = start - buffer_addr;
|
|
|
|
const u64 new_size = end - start;
|
|
|
|
downloads.push_back({
|
|
|
|
BufferCopy{
|
|
|
|
.src_offset = new_offset,
|
|
|
|
.dst_offset = total_size_bytes,
|
|
|
|
.size = new_size,
|
|
|
|
},
|
|
|
|
buffer_id,
|
|
|
|
});
|
|
|
|
// Align up to avoid cache conflicts
|
2023-04-22 13:36:18 +02:00
|
|
|
constexpr u64 align = 64ULL;
|
2021-07-08 02:52:07 +02:00
|
|
|
constexpr u64 mask = ~(align - 1ULL);
|
|
|
|
total_size_bytes += (new_size + align - 1) & mask;
|
|
|
|
largest_copy = std::max(largest_copy, new_size);
|
|
|
|
};
|
|
|
|
|
2022-11-20 00:09:56 +01:00
|
|
|
ForEachInRangeSet(common_ranges, cpu_addr_out, range_size, add_download);
|
2021-07-08 02:52:07 +02:00
|
|
|
});
|
2021-07-07 16:42:26 +02:00
|
|
|
});
|
|
|
|
}
|
2021-07-04 18:28:20 +02:00
|
|
|
}
|
2021-07-07 16:42:26 +02:00
|
|
|
committed_ranges.clear();
|
2021-07-04 18:28:20 +02:00
|
|
|
if (downloads.empty()) {
|
2023-04-23 21:03:50 +02:00
|
|
|
if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
|
2023-05-04 13:23:36 +02:00
|
|
|
|
|
|
|
async_buffers.emplace_back(std::optional<Async_Buffer>{});
|
2023-04-22 13:36:18 +02:00
|
|
|
}
|
2021-07-04 18:28:20 +02:00
|
|
|
return;
|
|
|
|
}
|
2023-05-04 13:23:36 +02:00
|
|
|
if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
|
|
|
|
auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes, true);
|
|
|
|
boost::container::small_vector<BufferCopy, 4> normalized_copies;
|
|
|
|
IntervalSet new_async_range{};
|
|
|
|
runtime.PreCopyBarrier();
|
|
|
|
for (auto& [copy, buffer_id] : downloads) {
|
|
|
|
copy.dst_offset += download_staging.offset;
|
|
|
|
const std::array copies{copy};
|
|
|
|
BufferCopy second_copy{copy};
|
|
|
|
Buffer& buffer = slot_buffers[buffer_id];
|
|
|
|
second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset;
|
|
|
|
VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset);
|
|
|
|
const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size};
|
|
|
|
async_downloads += std::make_pair(base_interval, 1);
|
|
|
|
runtime.CopyBuffer(download_staging.buffer, buffer, copies, false);
|
|
|
|
normalized_copies.push_back(second_copy);
|
2021-07-04 18:28:20 +02:00
|
|
|
}
|
2023-05-04 13:23:36 +02:00
|
|
|
runtime.PostCopyBarrier();
|
|
|
|
pending_downloads.emplace_back(std::move(normalized_copies));
|
|
|
|
async_buffers.emplace_back(download_staging);
|
2021-07-04 18:28:20 +02:00
|
|
|
} else {
|
2023-05-04 13:23:36 +02:00
|
|
|
if (!Settings::IsGPULevelHigh()) {
|
|
|
|
committed_ranges.clear();
|
|
|
|
uncommitted_ranges.clear();
|
2023-04-22 13:36:18 +02:00
|
|
|
} else {
|
2023-05-04 13:23:36 +02:00
|
|
|
if constexpr (USE_MEMORY_MAPS) {
|
|
|
|
auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
|
|
|
|
runtime.PreCopyBarrier();
|
|
|
|
for (auto& [copy, buffer_id] : downloads) {
|
|
|
|
// Have in mind the staging buffer offset for the copy
|
|
|
|
copy.dst_offset += download_staging.offset;
|
|
|
|
const std::array copies{copy};
|
|
|
|
runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies,
|
|
|
|
false);
|
|
|
|
}
|
|
|
|
runtime.PostCopyBarrier();
|
|
|
|
runtime.Finish();
|
|
|
|
for (const auto& [copy, buffer_id] : downloads) {
|
|
|
|
const Buffer& buffer = slot_buffers[buffer_id];
|
|
|
|
const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset;
|
|
|
|
// Undo the modified offset
|
|
|
|
const u64 dst_offset = copy.dst_offset - download_staging.offset;
|
|
|
|
const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset;
|
|
|
|
cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
|
|
|
|
for (const auto& [copy, buffer_id] : downloads) {
|
|
|
|
Buffer& buffer = slot_buffers[buffer_id];
|
|
|
|
buffer.ImmediateDownload(copy.src_offset,
|
|
|
|
immediate_buffer.subspan(0, copy.size));
|
|
|
|
const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset;
|
|
|
|
cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size);
|
|
|
|
}
|
2022-11-20 00:09:56 +01:00
|
|
|
}
|
|
|
|
}
|
2022-11-18 00:21:13 +01:00
|
|
|
}
|
2021-01-17 00:48:58 +01:00
|
|
|
}
|
|
|
|
|
2021-07-04 18:08:49 +02:00
|
|
|
template <class P>
|
2022-11-20 00:09:56 +01:00
|
|
|
void BufferCache<P>::CommitAsyncFlushes() {
|
|
|
|
CommitAsyncFlushesHigh();
|
|
|
|
}
|
2021-07-04 18:08:49 +02:00
|
|
|
|
2021-01-17 00:48:58 +01:00
|
|
|
template <class P>
|
2022-11-20 00:09:56 +01:00
|
|
|
void BufferCache<P>::PopAsyncFlushes() {
|
|
|
|
MICROPROFILE_SCOPE(GPU_DownloadMemory);
|
|
|
|
PopAsyncBuffers();
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::PopAsyncBuffers() {
|
|
|
|
if (async_buffers.empty()) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (!async_buffers.front().has_value()) {
|
|
|
|
async_buffers.pop_front();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
|
|
|
|
auto& downloads = pending_downloads.front();
|
|
|
|
auto& async_buffer = async_buffers.front();
|
|
|
|
u8* base = async_buffer->mapped_span.data();
|
|
|
|
const size_t base_offset = async_buffer->offset;
|
|
|
|
for (const auto& copy : downloads) {
|
|
|
|
const VAddr cpu_addr = static_cast<VAddr>(copy.src_offset);
|
|
|
|
const u64 dst_offset = copy.dst_offset - base_offset;
|
|
|
|
const u8* read_mapped_memory = base + dst_offset;
|
2023-04-22 13:36:18 +02:00
|
|
|
ForEachInOverlapCounter(
|
|
|
|
async_downloads, cpu_addr, copy.size, [&](VAddr start, VAddr end, int count) {
|
|
|
|
cpu_memory.WriteBlockUnsafe(start, &read_mapped_memory[start - cpu_addr],
|
|
|
|
end - start);
|
|
|
|
if (count == 1) {
|
|
|
|
const IntervalType base_interval{start, end};
|
|
|
|
common_ranges.subtract(base_interval);
|
|
|
|
}
|
|
|
|
});
|
2023-04-22 20:10:40 +02:00
|
|
|
const IntervalType subtract_interval{cpu_addr, cpu_addr + copy.size};
|
|
|
|
RemoveEachInOverlapCounter(async_downloads, subtract_interval, -1);
|
2019-07-19 19:22:27 +02:00
|
|
|
}
|
2023-04-23 21:03:50 +02:00
|
|
|
async_buffers_death_ring.emplace_back(*async_buffer);
|
2022-11-20 00:09:56 +01:00
|
|
|
async_buffers.pop_front();
|
|
|
|
pending_downloads.pop_front();
|
2019-07-19 16:50:40 +02:00
|
|
|
}
|
2022-11-20 00:09:56 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
|
|
|
bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
|
2023-04-22 13:36:18 +02:00
|
|
|
bool is_dirty = false;
|
|
|
|
ForEachInRangeSet(common_ranges, addr, size, [&](VAddr, VAddr) { is_dirty = true; });
|
|
|
|
return is_dirty;
|
2021-01-17 00:48:58 +01:00
|
|
|
}
|
|
|
|
|
2021-07-13 03:33:08 +02:00
|
|
|
template <class P>
|
2021-07-14 19:04:45 +02:00
|
|
|
bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) {
|
2021-07-13 03:33:08 +02:00
|
|
|
const VAddr end_addr = addr + size;
|
2023-04-28 23:54:54 +02:00
|
|
|
const u64 page_end = Common::DivCeil(end_addr, CACHING_PAGESIZE);
|
|
|
|
for (u64 page = addr >> CACHING_PAGEBITS; page < page_end;) {
|
2021-07-13 03:33:08 +02:00
|
|
|
const BufferId buffer_id = page_table[page];
|
|
|
|
if (!buffer_id) {
|
|
|
|
++page;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
Buffer& buffer = slot_buffers[buffer_id];
|
|
|
|
const VAddr buf_start_addr = buffer.CpuAddr();
|
|
|
|
const VAddr buf_end_addr = buf_start_addr + buffer.SizeBytes();
|
|
|
|
if (buf_start_addr < end_addr && addr < buf_end_addr) {
|
|
|
|
return true;
|
|
|
|
}
|
2023-04-28 23:54:54 +02:00
|
|
|
page = Common::DivCeil(end_addr, CACHING_PAGESIZE);
|
2021-07-13 03:33:08 +02:00
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2021-07-04 18:08:49 +02:00
|
|
|
template <class P>
|
|
|
|
bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) {
|
2022-11-20 00:09:56 +01:00
|
|
|
return memory_tracker.IsRegionCpuModified(addr, size);
|
2021-07-04 18:08:49 +02:00
|
|
|
}
|
|
|
|
|
2021-01-17 00:48:58 +01:00
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::BindHostIndexBuffer() {
|
|
|
|
Buffer& buffer = slot_buffers[index_buffer.buffer_id];
|
2021-08-15 15:35:53 +02:00
|
|
|
TouchBuffer(buffer, index_buffer.buffer_id);
|
2021-01-17 00:48:58 +01:00
|
|
|
const u32 offset = buffer.Offset(index_buffer.cpu_addr);
|
|
|
|
const u32 size = index_buffer.size;
|
2022-12-06 06:45:26 +01:00
|
|
|
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
|
2023-04-28 23:54:54 +02:00
|
|
|
if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] {
|
2022-11-09 08:57:42 +01:00
|
|
|
if constexpr (USE_MEMORY_MAPS) {
|
|
|
|
auto upload_staging = runtime.UploadStagingBuffer(size);
|
|
|
|
std::array<BufferCopy, 1> copies{
|
|
|
|
{BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}};
|
|
|
|
std::memcpy(upload_staging.mapped_span.data(),
|
2022-12-06 06:45:26 +01:00
|
|
|
draw_state.inline_index_draw_indexes.data(), size);
|
2022-11-09 08:57:42 +01:00
|
|
|
runtime.CopyBuffer(buffer, upload_staging.buffer, copies);
|
|
|
|
} else {
|
2022-12-06 06:45:26 +01:00
|
|
|
buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes);
|
2022-11-09 08:57:42 +01:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
SynchronizeBuffer(buffer, index_buffer.cpu_addr, size);
|
|
|
|
}
|
2021-01-17 00:48:58 +01:00
|
|
|
if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
|
2022-12-06 06:45:26 +01:00
|
|
|
const u32 new_offset =
|
|
|
|
offset + draw_state.index_buffer.first * draw_state.index_buffer.FormatSizeInBytes();
|
2021-07-12 04:10:42 +02:00
|
|
|
runtime.BindIndexBuffer(buffer, new_offset, size);
|
2021-01-17 00:48:58 +01:00
|
|
|
} else {
|
2022-12-06 06:45:26 +01:00
|
|
|
runtime.BindIndexBuffer(draw_state.topology, draw_state.index_buffer.format,
|
|
|
|
draw_state.index_buffer.first, draw_state.index_buffer.count,
|
|
|
|
buffer, offset, size);
|
2021-01-17 00:48:58 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::BindHostVertexBuffers() {
|
2021-11-05 15:52:31 +01:00
|
|
|
auto& flags = maxwell3d->dirty.flags;
|
2021-01-17 00:48:58 +01:00
|
|
|
for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
|
|
|
|
const Binding& binding = vertex_buffers[index];
|
|
|
|
Buffer& buffer = slot_buffers[binding.buffer_id];
|
2021-08-15 15:35:53 +02:00
|
|
|
TouchBuffer(buffer, binding.buffer_id);
|
2021-01-17 00:48:58 +01:00
|
|
|
SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
|
|
|
|
if (!flags[Dirty::VertexBuffer0 + index]) {
|
|
|
|
continue;
|
2019-07-19 16:50:40 +02:00
|
|
|
}
|
2021-01-17 00:48:58 +01:00
|
|
|
flags[Dirty::VertexBuffer0 + index] = false;
|
|
|
|
|
2022-08-12 11:58:09 +02:00
|
|
|
const u32 stride = maxwell3d->regs.vertex_streams[index].stride;
|
2021-01-17 00:48:58 +01:00
|
|
|
const u32 offset = buffer.Offset(binding.cpu_addr);
|
|
|
|
runtime.BindVertexBuffer(index, buffer, offset, binding.size, stride);
|
|
|
|
}
|
|
|
|
}
|
2019-07-19 16:50:40 +02:00
|
|
|
|
2022-02-09 15:39:40 +01:00
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::BindHostDrawIndirectBuffers() {
|
|
|
|
const auto bind_buffer = [this](const Binding& binding) {
|
|
|
|
Buffer& buffer = slot_buffers[binding.buffer_id];
|
|
|
|
TouchBuffer(buffer, binding.buffer_id);
|
|
|
|
SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
|
|
|
|
};
|
|
|
|
if (current_draw_indirect->include_count) {
|
|
|
|
bind_buffer(count_buffer_binding);
|
|
|
|
}
|
|
|
|
bind_buffer(indirect_buffer_binding);
|
|
|
|
}
|
|
|
|
|
2021-01-17 00:48:58 +01:00
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) {
|
|
|
|
u32 dirty = ~0U;
|
|
|
|
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
|
|
|
|
dirty = std::exchange(dirty_uniform_buffers[stage], 0);
|
|
|
|
}
|
|
|
|
u32 binding_index = 0;
|
2021-06-02 07:15:07 +02:00
|
|
|
ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) {
|
2021-01-17 00:48:58 +01:00
|
|
|
const bool needs_bind = ((dirty >> index) & 1) != 0;
|
|
|
|
BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind);
|
|
|
|
if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
|
|
|
|
++binding_index;
|
2020-05-17 21:56:08 +02:00
|
|
|
}
|
2021-01-17 00:48:58 +01:00
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index,
|
|
|
|
bool needs_bind) {
|
|
|
|
const Binding& binding = uniform_buffers[stage][index];
|
|
|
|
const VAddr cpu_addr = binding.cpu_addr;
|
2021-06-02 07:15:07 +02:00
|
|
|
const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]);
|
2021-01-17 00:48:58 +01:00
|
|
|
Buffer& buffer = slot_buffers[binding.buffer_id];
|
2021-08-15 15:35:53 +02:00
|
|
|
TouchBuffer(buffer, binding.buffer_id);
|
2021-05-16 13:43:40 +02:00
|
|
|
const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID &&
|
|
|
|
size <= uniform_buffer_skip_cache_size &&
|
2022-11-20 00:09:56 +01:00
|
|
|
!memory_tracker.IsRegionGpuModified(cpu_addr, size);
|
2021-05-16 13:43:40 +02:00
|
|
|
if (use_fast_buffer) {
|
2021-01-17 07:16:15 +01:00
|
|
|
if constexpr (IS_OPENGL) {
|
2021-01-17 00:48:58 +01:00
|
|
|
if (runtime.HasFastBufferSubData()) {
|
|
|
|
// Fast path for Nvidia
|
2021-06-18 08:22:00 +02:00
|
|
|
const bool should_fast_bind =
|
|
|
|
!HasFastUniformBufferBound(stage, binding_index) ||
|
|
|
|
uniform_buffer_binding_sizes[stage][binding_index] != size;
|
|
|
|
if (should_fast_bind) {
|
2021-01-17 00:48:58 +01:00
|
|
|
// We only have to bind when the currently bound buffer is not the fast version
|
2021-06-10 07:24:12 +02:00
|
|
|
fast_bound_uniform_buffers[stage] |= 1U << binding_index;
|
2021-06-18 08:22:00 +02:00
|
|
|
uniform_buffer_binding_sizes[stage][binding_index] = size;
|
2021-01-17 00:48:58 +01:00
|
|
|
runtime.BindFastUniformBuffer(stage, binding_index, size);
|
|
|
|
}
|
|
|
|
const auto span = ImmediateBufferWithData(cpu_addr, size);
|
|
|
|
runtime.PushFastUniformBuffer(stage, binding_index, span);
|
2021-01-17 07:16:15 +01:00
|
|
|
return;
|
2020-05-17 21:56:08 +02:00
|
|
|
}
|
|
|
|
}
|
2021-06-10 07:24:12 +02:00
|
|
|
if constexpr (IS_OPENGL) {
|
|
|
|
fast_bound_uniform_buffers[stage] |= 1U << binding_index;
|
2021-06-18 08:22:00 +02:00
|
|
|
uniform_buffer_binding_sizes[stage][binding_index] = size;
|
2021-06-10 07:24:12 +02:00
|
|
|
}
|
2021-01-17 07:16:15 +01:00
|
|
|
// Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan
|
|
|
|
const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size);
|
|
|
|
cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size);
|
|
|
|
return;
|
2019-07-19 16:50:40 +02:00
|
|
|
}
|
2021-01-17 00:48:58 +01:00
|
|
|
// Classic cached path
|
2021-03-02 06:44:19 +01:00
|
|
|
const bool sync_cached = SynchronizeBuffer(buffer, cpu_addr, size);
|
|
|
|
if (sync_cached) {
|
|
|
|
++uniform_cache_hits[0];
|
|
|
|
}
|
|
|
|
++uniform_cache_shots[0];
|
|
|
|
|
2021-06-18 08:22:00 +02:00
|
|
|
// Skip binding if it's not needed and if the bound buffer is not the fast version
|
|
|
|
// This exists to avoid instances where the fast buffer is bound and a GPU write happens
|
|
|
|
needs_bind |= HasFastUniformBufferBound(stage, binding_index);
|
|
|
|
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
|
|
|
|
needs_bind |= uniform_buffer_binding_sizes[stage][binding_index] != size;
|
|
|
|
}
|
|
|
|
if (!needs_bind) {
|
2021-01-17 00:48:58 +01:00
|
|
|
return;
|
2019-07-19 16:50:40 +02:00
|
|
|
}
|
2021-01-17 00:48:58 +01:00
|
|
|
const u32 offset = buffer.Offset(cpu_addr);
|
2021-06-10 07:24:12 +02:00
|
|
|
if constexpr (IS_OPENGL) {
|
|
|
|
// Fast buffer will be unbound
|
|
|
|
fast_bound_uniform_buffers[stage] &= ~(1U << binding_index);
|
|
|
|
|
|
|
|
// Mark the index as dirty if offset doesn't match
|
|
|
|
const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset();
|
|
|
|
dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index;
|
|
|
|
}
|
2021-06-18 08:22:00 +02:00
|
|
|
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
|
|
|
|
uniform_buffer_binding_sizes[stage][binding_index] = size;
|
|
|
|
}
|
2021-01-17 00:48:58 +01:00
|
|
|
if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
|
|
|
|
runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size);
|
|
|
|
} else {
|
|
|
|
runtime.BindUniformBuffer(buffer, offset, size);
|
2019-07-20 18:54:31 +02:00
|
|
|
}
|
2021-01-17 00:48:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
|
|
|
|
u32 binding_index = 0;
|
|
|
|
ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) {
|
|
|
|
const Binding& binding = storage_buffers[stage][index];
|
|
|
|
Buffer& buffer = slot_buffers[binding.buffer_id];
|
2021-08-15 15:35:53 +02:00
|
|
|
TouchBuffer(buffer, binding.buffer_id);
|
2021-01-17 00:48:58 +01:00
|
|
|
const u32 size = binding.size;
|
|
|
|
SynchronizeBuffer(buffer, binding.cpu_addr, size);
|
|
|
|
|
|
|
|
const u32 offset = buffer.Offset(binding.cpu_addr);
|
|
|
|
const bool is_written = ((written_storage_buffers[stage] >> index) & 1) != 0;
|
|
|
|
if constexpr (NEEDS_BIND_STORAGE_INDEX) {
|
|
|
|
runtime.BindStorageBuffer(stage, binding_index, buffer, offset, size, is_written);
|
|
|
|
++binding_index;
|
|
|
|
} else {
|
|
|
|
runtime.BindStorageBuffer(buffer, offset, size, is_written);
|
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
2019-07-20 18:54:31 +02:00
|
|
|
|
2021-04-07 01:14:55 +02:00
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
|
|
|
|
ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) {
|
|
|
|
const TextureBufferBinding& binding = texture_buffers[stage][index];
|
|
|
|
Buffer& buffer = slot_buffers[binding.buffer_id];
|
|
|
|
const u32 size = binding.size;
|
|
|
|
SynchronizeBuffer(buffer, binding.cpu_addr, size);
|
|
|
|
|
|
|
|
const u32 offset = buffer.Offset(binding.cpu_addr);
|
|
|
|
const PixelFormat format = binding.format;
|
2021-05-23 09:28:34 +02:00
|
|
|
if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
|
|
|
|
if (((image_texture_buffers[stage] >> index) & 1) != 0) {
|
|
|
|
runtime.BindImageBuffer(buffer, offset, size, format);
|
|
|
|
} else {
|
|
|
|
runtime.BindTextureBuffer(buffer, offset, size, format);
|
|
|
|
}
|
2021-04-07 01:14:55 +02:00
|
|
|
} else {
|
|
|
|
runtime.BindTextureBuffer(buffer, offset, size, format);
|
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2021-01-17 00:48:58 +01:00
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::BindHostTransformFeedbackBuffers() {
|
2022-08-12 11:58:09 +02:00
|
|
|
if (maxwell3d->regs.transform_feedback_enabled == 0) {
|
2021-01-17 00:48:58 +01:00
|
|
|
return;
|
2019-07-19 16:50:40 +02:00
|
|
|
}
|
2021-01-17 00:48:58 +01:00
|
|
|
for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
|
|
|
|
const Binding& binding = transform_feedback_buffers[index];
|
|
|
|
Buffer& buffer = slot_buffers[binding.buffer_id];
|
2021-08-15 15:35:53 +02:00
|
|
|
TouchBuffer(buffer, binding.buffer_id);
|
2021-01-17 00:48:58 +01:00
|
|
|
const u32 size = binding.size;
|
|
|
|
SynchronizeBuffer(buffer, binding.cpu_addr, size);
|
|
|
|
|
|
|
|
const u32 offset = buffer.Offset(binding.cpu_addr);
|
|
|
|
runtime.BindTransformFeedbackBuffer(index, buffer, offset, size);
|
2019-07-19 16:50:40 +02:00
|
|
|
}
|
2021-01-17 00:48:58 +01:00
|
|
|
}
|
2019-07-19 16:50:40 +02:00
|
|
|
|
2021-01-17 00:48:58 +01:00
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::BindHostComputeUniformBuffers() {
|
|
|
|
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
|
|
|
|
// Mark all uniform buffers as dirty
|
|
|
|
dirty_uniform_buffers.fill(~u32{0});
|
2021-06-14 09:32:45 +02:00
|
|
|
fast_bound_uniform_buffers.fill(0);
|
2021-01-17 00:48:58 +01:00
|
|
|
}
|
|
|
|
u32 binding_index = 0;
|
2021-06-02 07:15:07 +02:00
|
|
|
ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) {
|
2021-01-17 00:48:58 +01:00
|
|
|
const Binding& binding = compute_uniform_buffers[index];
|
|
|
|
Buffer& buffer = slot_buffers[binding.buffer_id];
|
2021-08-15 15:35:53 +02:00
|
|
|
TouchBuffer(buffer, binding.buffer_id);
|
2021-06-02 07:15:07 +02:00
|
|
|
const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]);
|
2021-01-17 00:48:58 +01:00
|
|
|
SynchronizeBuffer(buffer, binding.cpu_addr, size);
|
|
|
|
|
|
|
|
const u32 offset = buffer.Offset(binding.cpu_addr);
|
|
|
|
if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
|
|
|
|
runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size);
|
|
|
|
++binding_index;
|
|
|
|
} else {
|
|
|
|
runtime.BindUniformBuffer(buffer, offset, size);
|
2019-07-19 16:50:40 +02:00
|
|
|
}
|
2021-01-17 00:48:58 +01:00
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::BindHostComputeStorageBuffers() {
|
|
|
|
u32 binding_index = 0;
|
|
|
|
ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
|
|
|
|
const Binding& binding = compute_storage_buffers[index];
|
|
|
|
Buffer& buffer = slot_buffers[binding.buffer_id];
|
2021-08-15 15:35:53 +02:00
|
|
|
TouchBuffer(buffer, binding.buffer_id);
|
2021-01-17 00:48:58 +01:00
|
|
|
const u32 size = binding.size;
|
|
|
|
SynchronizeBuffer(buffer, binding.cpu_addr, size);
|
|
|
|
|
|
|
|
const u32 offset = buffer.Offset(binding.cpu_addr);
|
|
|
|
const bool is_written = ((written_compute_storage_buffers >> index) & 1) != 0;
|
|
|
|
if constexpr (NEEDS_BIND_STORAGE_INDEX) {
|
|
|
|
runtime.BindComputeStorageBuffer(binding_index, buffer, offset, size, is_written);
|
|
|
|
++binding_index;
|
|
|
|
} else {
|
|
|
|
runtime.BindStorageBuffer(buffer, offset, size, is_written);
|
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
2020-06-09 23:27:59 +02:00
|
|
|
|
2021-04-07 01:14:55 +02:00
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::BindHostComputeTextureBuffers() {
|
|
|
|
ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) {
|
|
|
|
const TextureBufferBinding& binding = compute_texture_buffers[index];
|
|
|
|
Buffer& buffer = slot_buffers[binding.buffer_id];
|
|
|
|
const u32 size = binding.size;
|
|
|
|
SynchronizeBuffer(buffer, binding.cpu_addr, size);
|
|
|
|
|
|
|
|
const u32 offset = buffer.Offset(binding.cpu_addr);
|
|
|
|
const PixelFormat format = binding.format;
|
2021-05-23 09:28:34 +02:00
|
|
|
if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
|
|
|
|
if (((image_compute_texture_buffers >> index) & 1) != 0) {
|
|
|
|
runtime.BindImageBuffer(buffer, offset, size, format);
|
|
|
|
} else {
|
|
|
|
runtime.BindTextureBuffer(buffer, offset, size, format);
|
|
|
|
}
|
2021-04-07 01:14:55 +02:00
|
|
|
} else {
|
|
|
|
runtime.BindTextureBuffer(buffer, offset, size, format);
|
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2021-01-17 00:48:58 +01:00
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) {
|
2022-03-06 14:52:35 +01:00
|
|
|
do {
|
|
|
|
has_deleted_buffers = false;
|
|
|
|
if (is_indexed) {
|
|
|
|
UpdateIndexBuffer();
|
|
|
|
}
|
|
|
|
UpdateVertexBuffers();
|
|
|
|
UpdateTransformFeedbackBuffers();
|
|
|
|
for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
|
|
|
|
UpdateUniformBuffers(stage);
|
|
|
|
UpdateStorageBuffers(stage);
|
|
|
|
UpdateTextureBuffers(stage);
|
|
|
|
}
|
2022-02-09 15:39:40 +01:00
|
|
|
if (current_draw_indirect) {
|
|
|
|
UpdateDrawIndirect();
|
|
|
|
}
|
2022-03-06 14:52:35 +01:00
|
|
|
} while (has_deleted_buffers);
|
2021-01-17 00:48:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::DoUpdateComputeBuffers() {
|
|
|
|
UpdateComputeUniformBuffers();
|
|
|
|
UpdateComputeStorageBuffers();
|
2021-04-07 01:14:55 +02:00
|
|
|
UpdateComputeTextureBuffers();
|
2021-01-17 00:48:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::UpdateIndexBuffer() {
|
|
|
|
// We have to check for the dirty flags and index count
|
|
|
|
// The index count is currently changed without updating the dirty flags
|
2022-12-06 06:45:26 +01:00
|
|
|
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
|
2022-11-20 00:09:56 +01:00
|
|
|
const auto& index_buffer_ref = draw_state.index_buffer;
|
2021-11-05 15:52:31 +01:00
|
|
|
auto& flags = maxwell3d->dirty.flags;
|
2022-11-18 00:21:13 +01:00
|
|
|
if (!flags[Dirty::IndexBuffer]) {
|
2021-01-17 00:48:58 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
flags[Dirty::IndexBuffer] = false;
|
2023-04-28 23:54:54 +02:00
|
|
|
if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] {
|
2022-12-06 06:45:26 +01:00
|
|
|
auto inline_index_size = static_cast<u32>(draw_state.inline_index_draw_indexes.size());
|
2023-04-28 23:54:54 +02:00
|
|
|
u32 buffer_size = Common::AlignUp(inline_index_size, CACHING_PAGESIZE);
|
|
|
|
if (inline_buffer_id == NULL_BUFFER_ID) [[unlikely]] {
|
|
|
|
inline_buffer_id = CreateBuffer(0, buffer_size);
|
|
|
|
}
|
|
|
|
if (slot_buffers[inline_buffer_id].SizeBytes() < buffer_size) [[unlikely]] {
|
|
|
|
slot_buffers.erase(inline_buffer_id);
|
|
|
|
inline_buffer_id = CreateBuffer(0, buffer_size);
|
|
|
|
}
|
2022-11-09 08:57:42 +01:00
|
|
|
index_buffer = Binding{
|
|
|
|
.cpu_addr = 0,
|
|
|
|
.size = inline_index_size,
|
2023-04-28 23:54:54 +02:00
|
|
|
.buffer_id = inline_buffer_id,
|
2022-11-09 08:57:42 +01:00
|
|
|
};
|
|
|
|
return;
|
|
|
|
}
|
2022-11-20 00:09:56 +01:00
|
|
|
|
|
|
|
const GPUVAddr gpu_addr_begin = index_buffer_ref.StartAddress();
|
|
|
|
const GPUVAddr gpu_addr_end = index_buffer_ref.EndAddress();
|
2021-11-05 15:52:31 +01:00
|
|
|
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin);
|
2021-01-17 00:48:58 +01:00
|
|
|
const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
|
2022-11-20 00:09:56 +01:00
|
|
|
const u32 draw_size =
|
|
|
|
(index_buffer_ref.count + index_buffer_ref.first) * index_buffer_ref.FormatSizeInBytes();
|
2021-01-17 00:48:58 +01:00
|
|
|
const u32 size = std::min(address_size, draw_size);
|
|
|
|
if (size == 0 || !cpu_addr) {
|
|
|
|
index_buffer = NULL_BINDING;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
index_buffer = Binding{
|
|
|
|
.cpu_addr = *cpu_addr,
|
|
|
|
.size = size,
|
|
|
|
.buffer_id = FindBuffer(*cpu_addr, size),
|
|
|
|
};
|
|
|
|
}
|
2019-07-19 16:50:40 +02:00
|
|
|
|
2021-01-17 00:48:58 +01:00
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::UpdateVertexBuffers() {
|
2021-11-05 15:52:31 +01:00
|
|
|
auto& flags = maxwell3d->dirty.flags;
|
|
|
|
if (!maxwell3d->dirty.flags[Dirty::VertexBuffers]) {
|
2021-01-17 00:48:58 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
flags[Dirty::VertexBuffers] = false;
|
2020-06-09 23:27:59 +02:00
|
|
|
|
2021-01-17 00:48:58 +01:00
|
|
|
for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
|
|
|
|
UpdateVertexBuffer(index);
|
2019-07-19 16:50:40 +02:00
|
|
|
}
|
2021-01-17 00:48:58 +01:00
|
|
|
}
|
2019-07-19 16:50:40 +02:00
|
|
|
|
2021-01-17 00:48:58 +01:00
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::UpdateVertexBuffer(u32 index) {
|
2021-11-05 15:52:31 +01:00
|
|
|
if (!maxwell3d->dirty.flags[Dirty::VertexBuffer0 + index]) {
|
2021-01-17 00:48:58 +01:00
|
|
|
return;
|
|
|
|
}
|
2022-08-12 11:58:09 +02:00
|
|
|
const auto& array = maxwell3d->regs.vertex_streams[index];
|
|
|
|
const auto& limit = maxwell3d->regs.vertex_stream_limits[index];
|
|
|
|
const GPUVAddr gpu_addr_begin = array.Address();
|
|
|
|
const GPUVAddr gpu_addr_end = limit.Address() + 1;
|
2021-11-05 15:52:31 +01:00
|
|
|
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin);
|
2022-11-20 00:09:56 +01:00
|
|
|
const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
|
|
|
|
u32 size = address_size; // TODO: Analyze stride and number of vertices
|
|
|
|
if (array.enable == 0 || size == 0 || !cpu_addr) {
|
2021-01-17 00:48:58 +01:00
|
|
|
vertex_buffers[index] = NULL_BINDING;
|
|
|
|
return;
|
|
|
|
}
|
2022-04-13 16:20:34 +02:00
|
|
|
if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) {
|
2022-11-20 00:09:56 +01:00
|
|
|
size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size));
|
2022-04-13 16:20:34 +02:00
|
|
|
}
|
2021-01-17 00:48:58 +01:00
|
|
|
vertex_buffers[index] = Binding{
|
|
|
|
.cpu_addr = *cpu_addr,
|
|
|
|
.size = size,
|
|
|
|
.buffer_id = FindBuffer(*cpu_addr, size),
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2022-02-09 15:39:40 +01:00
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::UpdateDrawIndirect() {
|
|
|
|
const auto update = [this](GPUVAddr gpu_addr, size_t size, Binding& binding) {
|
|
|
|
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
|
|
|
|
if (!cpu_addr) {
|
|
|
|
binding = NULL_BINDING;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
binding = Binding{
|
|
|
|
.cpu_addr = *cpu_addr,
|
|
|
|
.size = static_cast<u32>(size),
|
|
|
|
.buffer_id = FindBuffer(*cpu_addr, static_cast<u32>(size)),
|
|
|
|
};
|
|
|
|
};
|
|
|
|
if (current_draw_indirect->include_count) {
|
|
|
|
update(current_draw_indirect->count_start_address, sizeof(u32), count_buffer_binding);
|
|
|
|
}
|
|
|
|
update(current_draw_indirect->indirect_start_address, current_draw_indirect->buffer_size,
|
|
|
|
indirect_buffer_binding);
|
|
|
|
}
|
|
|
|
|
2021-01-17 00:48:58 +01:00
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::UpdateUniformBuffers(size_t stage) {
|
2021-06-02 07:15:07 +02:00
|
|
|
ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) {
|
2021-01-17 00:48:58 +01:00
|
|
|
Binding& binding = uniform_buffers[stage][index];
|
|
|
|
if (binding.buffer_id) {
|
|
|
|
// Already updated
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
// Mark as dirty
|
|
|
|
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
|
|
|
|
dirty_uniform_buffers[stage] |= 1U << index;
|
|
|
|
}
|
|
|
|
// Resolve buffer
|
|
|
|
binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::UpdateStorageBuffers(size_t stage) {
|
|
|
|
const u32 written_mask = written_storage_buffers[stage];
|
|
|
|
ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) {
|
|
|
|
// Resolve buffer
|
|
|
|
Binding& binding = storage_buffers[stage][index];
|
|
|
|
const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size);
|
|
|
|
binding.buffer_id = buffer_id;
|
|
|
|
// Mark buffer as written if needed
|
|
|
|
if (((written_mask >> index) & 1) != 0) {
|
|
|
|
MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size);
|
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
2020-06-09 23:27:59 +02:00
|
|
|
|
2021-04-07 01:14:55 +02:00
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::UpdateTextureBuffers(size_t stage) {
|
|
|
|
ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) {
|
|
|
|
Binding& binding = texture_buffers[stage][index];
|
|
|
|
binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
|
2021-04-15 02:36:36 +02:00
|
|
|
// Mark buffer as written if needed
|
|
|
|
if (((written_texture_buffers[stage] >> index) & 1) != 0) {
|
|
|
|
MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
|
|
|
|
}
|
2021-04-07 01:14:55 +02:00
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2021-01-17 00:48:58 +01:00
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::UpdateTransformFeedbackBuffers() {
|
2022-08-12 11:58:09 +02:00
|
|
|
if (maxwell3d->regs.transform_feedback_enabled == 0) {
|
2021-01-17 00:48:58 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
|
|
|
|
UpdateTransformFeedbackBuffer(index);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) {
|
2022-08-12 11:58:09 +02:00
|
|
|
const auto& binding = maxwell3d->regs.transform_feedback.buffers[index];
|
|
|
|
const GPUVAddr gpu_addr = binding.Address() + binding.start_offset;
|
|
|
|
const u32 size = binding.size;
|
2021-11-05 15:52:31 +01:00
|
|
|
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
|
2022-08-12 11:58:09 +02:00
|
|
|
if (binding.enable == 0 || size == 0 || !cpu_addr) {
|
2021-01-17 00:48:58 +01:00
|
|
|
transform_feedback_buffers[index] = NULL_BINDING;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
const BufferId buffer_id = FindBuffer(*cpu_addr, size);
|
|
|
|
transform_feedback_buffers[index] = Binding{
|
|
|
|
.cpu_addr = *cpu_addr,
|
|
|
|
.size = size,
|
|
|
|
.buffer_id = buffer_id,
|
|
|
|
};
|
|
|
|
MarkWrittenBuffer(buffer_id, *cpu_addr, size);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::UpdateComputeUniformBuffers() {
|
2021-06-02 07:15:07 +02:00
|
|
|
ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) {
|
2021-01-17 00:48:58 +01:00
|
|
|
Binding& binding = compute_uniform_buffers[index];
|
|
|
|
binding = NULL_BINDING;
|
2021-11-05 15:52:31 +01:00
|
|
|
const auto& launch_desc = kepler_compute->launch_description;
|
2021-01-17 00:48:58 +01:00
|
|
|
if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) {
|
|
|
|
const auto& cbuf = launch_desc.const_buffer_config[index];
|
2021-11-05 15:52:31 +01:00
|
|
|
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(cbuf.Address());
|
2021-01-17 00:48:58 +01:00
|
|
|
if (cpu_addr) {
|
|
|
|
binding.cpu_addr = *cpu_addr;
|
|
|
|
binding.size = cbuf.size;
|
2019-07-19 16:50:40 +02:00
|
|
|
}
|
|
|
|
}
|
2021-01-17 00:48:58 +01:00
|
|
|
binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::UpdateComputeStorageBuffers() {
|
|
|
|
ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
|
|
|
|
// Resolve buffer
|
|
|
|
Binding& binding = compute_storage_buffers[index];
|
2021-05-23 09:28:34 +02:00
|
|
|
binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
|
2021-01-17 00:48:58 +01:00
|
|
|
// Mark as written if needed
|
|
|
|
if (((written_compute_storage_buffers >> index) & 1) != 0) {
|
2021-05-23 09:28:34 +02:00
|
|
|
MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
|
2021-01-17 00:48:58 +01:00
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2021-04-07 01:14:55 +02:00
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::UpdateComputeTextureBuffers() {
|
|
|
|
ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) {
|
|
|
|
Binding& binding = compute_texture_buffers[index];
|
|
|
|
binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
|
2021-04-15 02:36:36 +02:00
|
|
|
// Mark as written if needed
|
|
|
|
if (((written_compute_texture_buffers >> index) & 1) != 0) {
|
|
|
|
MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
|
2021-01-17 00:48:58 +01:00
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
2023-04-22 13:36:18 +02:00
|
|
|
void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) {
|
2022-11-20 00:09:56 +01:00
|
|
|
memory_tracker.MarkRegionAsGpuModified(cpu_addr, size);
|
2021-01-17 00:48:58 +01:00
|
|
|
|
2023-04-22 13:36:18 +02:00
|
|
|
if (memory_tracker.IsRegionCpuModified(cpu_addr, size)) {
|
|
|
|
SynchronizeBuffer(slot_buffers[buffer_id], cpu_addr, size);
|
|
|
|
}
|
|
|
|
|
2021-07-08 02:52:07 +02:00
|
|
|
const IntervalType base_interval{cpu_addr, cpu_addr + size};
|
|
|
|
common_ranges.add(base_interval);
|
2023-05-04 02:34:49 +02:00
|
|
|
if (Settings::values.use_reactive_flushing.GetValue() &&
|
|
|
|
!memory_tracker.IsRegionPreflushable(cpu_addr, cpu_addr + size)) {
|
2023-04-30 17:14:06 +02:00
|
|
|
return;
|
|
|
|
}
|
2023-04-22 20:10:40 +02:00
|
|
|
uncommitted_ranges.add(base_interval);
|
|
|
|
pending_ranges.add(base_interval);
|
2021-01-17 00:48:58 +01:00
|
|
|
}
|
2019-07-19 16:50:40 +02:00
|
|
|
|
2021-01-17 00:48:58 +01:00
|
|
|
template <class P>
|
|
|
|
BufferId BufferCache<P>::FindBuffer(VAddr cpu_addr, u32 size) {
|
|
|
|
if (cpu_addr == 0) {
|
|
|
|
return NULL_BUFFER_ID;
|
|
|
|
}
|
2023-04-28 23:54:54 +02:00
|
|
|
const u64 page = cpu_addr >> CACHING_PAGEBITS;
|
2021-01-17 00:48:58 +01:00
|
|
|
const BufferId buffer_id = page_table[page];
|
|
|
|
if (!buffer_id) {
|
|
|
|
return CreateBuffer(cpu_addr, size);
|
|
|
|
}
|
|
|
|
const Buffer& buffer = slot_buffers[buffer_id];
|
|
|
|
if (buffer.IsInBounds(cpu_addr, size)) {
|
|
|
|
return buffer_id;
|
|
|
|
}
|
|
|
|
return CreateBuffer(cpu_addr, size);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
2021-01-18 04:56:59 +01:00
|
|
|
typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu_addr,
|
|
|
|
u32 wanted_size) {
|
2021-01-18 21:14:14 +01:00
|
|
|
static constexpr int STREAM_LEAP_THRESHOLD = 16;
|
2021-01-17 00:48:58 +01:00
|
|
|
std::vector<BufferId> overlap_ids;
|
2021-01-18 04:56:59 +01:00
|
|
|
VAddr begin = cpu_addr;
|
|
|
|
VAddr end = cpu_addr + wanted_size;
|
2021-01-18 21:14:14 +01:00
|
|
|
int stream_score = 0;
|
|
|
|
bool has_stream_leap = false;
|
2022-11-09 08:57:42 +01:00
|
|
|
if (begin == 0) {
|
|
|
|
return OverlapResult{
|
|
|
|
.ids = std::move(overlap_ids),
|
|
|
|
.begin = begin,
|
|
|
|
.end = end,
|
|
|
|
.has_stream_leap = has_stream_leap,
|
|
|
|
};
|
|
|
|
}
|
2023-04-28 23:54:54 +02:00
|
|
|
for (; cpu_addr >> CACHING_PAGEBITS < Common::DivCeil(end, CACHING_PAGESIZE);
|
|
|
|
cpu_addr += CACHING_PAGESIZE) {
|
|
|
|
const BufferId overlap_id = page_table[cpu_addr >> CACHING_PAGEBITS];
|
2021-01-17 00:48:58 +01:00
|
|
|
if (!overlap_id) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
Buffer& overlap = slot_buffers[overlap_id];
|
|
|
|
if (overlap.IsPicked()) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
overlap_ids.push_back(overlap_id);
|
2021-01-18 21:14:14 +01:00
|
|
|
overlap.Pick();
|
2021-01-17 00:48:58 +01:00
|
|
|
const VAddr overlap_cpu_addr = overlap.CpuAddr();
|
2022-01-31 10:07:01 +01:00
|
|
|
const bool expands_left = overlap_cpu_addr < begin;
|
|
|
|
if (expands_left) {
|
2021-01-18 04:56:59 +01:00
|
|
|
cpu_addr = begin = overlap_cpu_addr;
|
2019-07-20 17:42:53 +02:00
|
|
|
}
|
2022-01-31 10:07:01 +01:00
|
|
|
const VAddr overlap_end = overlap_cpu_addr + overlap.SizeBytes();
|
|
|
|
const bool expands_right = overlap_end > end;
|
|
|
|
if (overlap_end > end) {
|
|
|
|
end = overlap_end;
|
|
|
|
}
|
2021-01-18 21:14:14 +01:00
|
|
|
stream_score += overlap.StreamScore();
|
|
|
|
if (stream_score > STREAM_LEAP_THRESHOLD && !has_stream_leap) {
|
|
|
|
// When this memory region has been joined a bunch of times, we assume it's being used
|
|
|
|
// as a stream buffer. Increase the size to skip constantly recreating buffers.
|
|
|
|
has_stream_leap = true;
|
2022-01-31 10:07:01 +01:00
|
|
|
if (expands_right) {
|
2023-04-28 23:54:54 +02:00
|
|
|
begin -= CACHING_PAGESIZE * 256;
|
2022-01-31 10:07:01 +01:00
|
|
|
cpu_addr = begin;
|
|
|
|
}
|
|
|
|
if (expands_left) {
|
2023-04-28 23:54:54 +02:00
|
|
|
end += CACHING_PAGESIZE * 256;
|
2022-01-31 10:07:01 +01:00
|
|
|
}
|
2021-01-18 21:14:14 +01:00
|
|
|
}
|
2019-07-20 17:42:53 +02:00
|
|
|
}
|
2021-01-18 04:56:59 +01:00
|
|
|
return OverlapResult{
|
|
|
|
.ids = std::move(overlap_ids),
|
|
|
|
.begin = begin,
|
|
|
|
.end = end,
|
2021-01-18 21:14:14 +01:00
|
|
|
.has_stream_leap = has_stream_leap,
|
2021-01-18 04:56:59 +01:00
|
|
|
};
|
|
|
|
}
|
2021-01-17 00:48:58 +01:00
|
|
|
|
2021-01-18 04:56:59 +01:00
|
|
|
template <class P>
|
2021-01-18 21:14:14 +01:00
|
|
|
void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id,
|
|
|
|
bool accumulate_stream_score) {
|
2021-01-18 04:56:59 +01:00
|
|
|
Buffer& new_buffer = slot_buffers[new_buffer_id];
|
|
|
|
Buffer& overlap = slot_buffers[overlap_id];
|
2021-01-18 21:14:14 +01:00
|
|
|
if (accumulate_stream_score) {
|
|
|
|
new_buffer.IncreaseStreamScore(overlap.StreamScore() + 1);
|
|
|
|
}
|
2022-11-20 00:09:56 +01:00
|
|
|
boost::container::small_vector<BufferCopy, 1> copies;
|
2021-01-18 04:56:59 +01:00
|
|
|
const size_t dst_base_offset = overlap.CpuAddr() - new_buffer.CpuAddr();
|
2022-11-20 00:09:56 +01:00
|
|
|
copies.push_back(BufferCopy{
|
|
|
|
.src_offset = 0,
|
|
|
|
.dst_offset = dst_base_offset,
|
|
|
|
.size = overlap.SizeBytes(),
|
2021-01-18 04:56:59 +01:00
|
|
|
});
|
2022-11-20 00:09:56 +01:00
|
|
|
runtime.CopyBuffer(new_buffer, overlap, copies);
|
|
|
|
DeleteBuffer(overlap_id, true);
|
2021-01-18 04:56:59 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
|
|
|
BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) {
|
2023-04-28 23:54:54 +02:00
|
|
|
VAddr cpu_addr_end = Common::AlignUp(cpu_addr + wanted_size, CACHING_PAGESIZE);
|
|
|
|
cpu_addr = Common::AlignDown(cpu_addr, CACHING_PAGESIZE);
|
|
|
|
wanted_size = static_cast<u32>(cpu_addr_end - cpu_addr);
|
2021-01-18 04:56:59 +01:00
|
|
|
const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size);
|
|
|
|
const u32 size = static_cast<u32>(overlap.end - overlap.begin);
|
|
|
|
const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size);
|
2022-03-06 14:52:35 +01:00
|
|
|
auto& new_buffer = slot_buffers[new_buffer_id];
|
|
|
|
runtime.ClearBuffer(new_buffer, 0, new_buffer.SizeBytes(), 0);
|
2021-01-18 04:56:59 +01:00
|
|
|
for (const BufferId overlap_id : overlap.ids) {
|
2021-01-18 21:14:14 +01:00
|
|
|
JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
|
2021-01-17 00:48:58 +01:00
|
|
|
}
|
|
|
|
Register(new_buffer_id);
|
2022-11-20 00:09:56 +01:00
|
|
|
TouchBuffer(new_buffer, new_buffer_id);
|
2021-01-17 00:48:58 +01:00
|
|
|
return new_buffer_id;
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::Register(BufferId buffer_id) {
|
|
|
|
ChangeRegister<true>(buffer_id);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::Unregister(BufferId buffer_id) {
|
|
|
|
ChangeRegister<false>(buffer_id);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
|
|
|
template <bool insert>
|
|
|
|
void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
|
2021-08-15 15:35:53 +02:00
|
|
|
Buffer& buffer = slot_buffers[buffer_id];
|
2021-06-13 15:47:54 +02:00
|
|
|
const auto size = buffer.SizeBytes();
|
|
|
|
if (insert) {
|
|
|
|
total_used_memory += Common::AlignUp(size, 1024);
|
2021-08-29 18:19:53 +02:00
|
|
|
buffer.setLRUID(lru_cache.Insert(buffer_id, frame_tick));
|
2021-06-13 15:47:54 +02:00
|
|
|
} else {
|
|
|
|
total_used_memory -= Common::AlignUp(size, 1024);
|
2021-08-29 18:19:53 +02:00
|
|
|
lru_cache.Free(buffer.getLRUID());
|
2021-06-13 15:47:54 +02:00
|
|
|
}
|
2021-01-17 00:48:58 +01:00
|
|
|
const VAddr cpu_addr_begin = buffer.CpuAddr();
|
2021-06-13 15:47:54 +02:00
|
|
|
const VAddr cpu_addr_end = cpu_addr_begin + size;
|
2023-04-28 23:54:54 +02:00
|
|
|
const u64 page_begin = cpu_addr_begin / CACHING_PAGESIZE;
|
|
|
|
const u64 page_end = Common::DivCeil(cpu_addr_end, CACHING_PAGESIZE);
|
2021-01-17 00:48:58 +01:00
|
|
|
for (u64 page = page_begin; page != page_end; ++page) {
|
|
|
|
if constexpr (insert) {
|
|
|
|
page_table[page] = buffer_id;
|
|
|
|
} else {
|
|
|
|
page_table[page] = BufferId{};
|
2019-07-20 17:42:53 +02:00
|
|
|
}
|
|
|
|
}
|
2021-01-17 00:48:58 +01:00
|
|
|
}
|
2019-07-20 17:42:53 +02:00
|
|
|
|
2021-01-20 01:59:53 +01:00
|
|
|
template <class P>
|
2021-08-15 15:35:53 +02:00
|
|
|
void BufferCache<P>::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept {
|
|
|
|
if (buffer_id != NULL_BUFFER_ID) {
|
2021-08-29 18:19:53 +02:00
|
|
|
lru_cache.Touch(buffer.getLRUID(), frame_tick);
|
2021-08-15 15:35:53 +02:00
|
|
|
}
|
2021-01-20 01:59:53 +01:00
|
|
|
}
|
|
|
|
|
2021-01-17 00:48:58 +01:00
|
|
|
template <class P>
|
2021-03-02 06:44:19 +01:00
|
|
|
bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) {
|
|
|
|
return SynchronizeBufferImpl(buffer, cpu_addr, size);
|
2021-01-17 00:48:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
2021-03-02 06:44:19 +01:00
|
|
|
bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size) {
|
2021-01-17 00:48:58 +01:00
|
|
|
boost::container::small_vector<BufferCopy, 4> copies;
|
|
|
|
u64 total_size_bytes = 0;
|
|
|
|
u64 largest_copy = 0;
|
2022-11-20 00:09:56 +01:00
|
|
|
VAddr buffer_start = buffer.CpuAddr();
|
|
|
|
memory_tracker.ForEachUploadRange(cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) {
|
2021-01-17 00:48:58 +01:00
|
|
|
copies.push_back(BufferCopy{
|
|
|
|
.src_offset = total_size_bytes,
|
2022-11-20 00:09:56 +01:00
|
|
|
.dst_offset = cpu_addr_out - buffer_start,
|
2021-01-17 00:48:58 +01:00
|
|
|
.size = range_size,
|
|
|
|
});
|
|
|
|
total_size_bytes += range_size;
|
|
|
|
largest_copy = std::max(largest_copy, range_size);
|
|
|
|
});
|
|
|
|
if (total_size_bytes == 0) {
|
2021-03-02 06:44:19 +01:00
|
|
|
return true;
|
2021-01-17 00:48:58 +01:00
|
|
|
}
|
|
|
|
const std::span<BufferCopy> copies_span(copies.data(), copies.size());
|
|
|
|
UploadMemory(buffer, total_size_bytes, largest_copy, copies_span);
|
2021-03-02 06:44:19 +01:00
|
|
|
return false;
|
2021-01-17 00:48:58 +01:00
|
|
|
}
|
|
|
|
|
2022-11-20 00:09:56 +01:00
|
|
|
template <class P>
|
|
|
|
bool BufferCache<P>::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size) {
|
|
|
|
boost::container::small_vector<BufferCopy, 4> copies;
|
|
|
|
u64 total_size_bytes = 0;
|
|
|
|
u64 largest_copy = 0;
|
|
|
|
IntervalSet found_sets{};
|
|
|
|
auto make_copies = [&] {
|
|
|
|
for (auto& interval : found_sets) {
|
|
|
|
const std::size_t sub_size = interval.upper() - interval.lower();
|
2023-04-23 03:58:16 +02:00
|
|
|
const VAddr cpu_addr_ = interval.lower();
|
2022-11-20 00:09:56 +01:00
|
|
|
copies.push_back(BufferCopy{
|
|
|
|
.src_offset = total_size_bytes,
|
2023-04-23 03:58:16 +02:00
|
|
|
.dst_offset = cpu_addr_ - buffer.CpuAddr(),
|
2022-11-20 00:09:56 +01:00
|
|
|
.size = sub_size,
|
|
|
|
});
|
|
|
|
total_size_bytes += sub_size;
|
2023-05-03 00:05:30 +02:00
|
|
|
largest_copy = std::max<u64>(largest_copy, sub_size);
|
2022-11-20 00:09:56 +01:00
|
|
|
}
|
|
|
|
const std::span<BufferCopy> copies_span(copies.data(), copies.size());
|
|
|
|
UploadMemory(buffer, total_size_bytes, largest_copy, copies_span);
|
|
|
|
};
|
|
|
|
memory_tracker.ForEachUploadRange(cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) {
|
|
|
|
const VAddr base_adr = cpu_addr_out;
|
|
|
|
const VAddr end_adr = base_adr + range_size;
|
|
|
|
const IntervalType add_interval{base_adr, end_adr};
|
|
|
|
found_sets.add(add_interval);
|
|
|
|
});
|
|
|
|
if (found_sets.empty()) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
const IntervalType search_interval{cpu_addr, cpu_addr + size};
|
|
|
|
auto it = common_ranges.lower_bound(search_interval);
|
|
|
|
auto it_end = common_ranges.upper_bound(search_interval);
|
|
|
|
if (it == common_ranges.end()) {
|
|
|
|
make_copies();
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
while (it != it_end) {
|
|
|
|
found_sets.subtract(*it);
|
|
|
|
it++;
|
|
|
|
}
|
|
|
|
make_copies();
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2021-01-17 00:48:58 +01:00
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
|
|
|
|
std::span<BufferCopy> copies) {
|
|
|
|
if constexpr (USE_MEMORY_MAPS) {
|
|
|
|
MappedUploadMemory(buffer, total_size_bytes, copies);
|
|
|
|
} else {
|
|
|
|
ImmediateUploadMemory(buffer, largest_copy, copies);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
2022-11-20 00:09:56 +01:00
|
|
|
void BufferCache<P>::ImmediateUploadMemory([[maybe_unused]] Buffer& buffer,
|
|
|
|
[[maybe_unused]] u64 largest_copy,
|
|
|
|
[[maybe_unused]] std::span<const BufferCopy> copies) {
|
|
|
|
if constexpr (!USE_MEMORY_MAPS) {
|
|
|
|
std::span<u8> immediate_buffer;
|
|
|
|
for (const BufferCopy& copy : copies) {
|
|
|
|
std::span<const u8> upload_span;
|
|
|
|
const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset;
|
|
|
|
if (IsRangeGranular(cpu_addr, copy.size)) {
|
|
|
|
upload_span = std::span(cpu_memory.GetPointer(cpu_addr), copy.size);
|
|
|
|
} else {
|
|
|
|
if (immediate_buffer.empty()) {
|
|
|
|
immediate_buffer = ImmediateBuffer(largest_copy);
|
|
|
|
}
|
|
|
|
cpu_memory.ReadBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size);
|
|
|
|
upload_span = immediate_buffer.subspan(0, copy.size);
|
2019-07-20 17:42:53 +02:00
|
|
|
}
|
2022-11-20 00:09:56 +01:00
|
|
|
buffer.ImmediateUpload(copy.dst_offset, upload_span);
|
2019-07-20 17:42:53 +02:00
|
|
|
}
|
|
|
|
}
|
2021-01-17 00:48:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
2022-11-20 00:09:56 +01:00
|
|
|
void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer,
|
|
|
|
[[maybe_unused]] u64 total_size_bytes,
|
|
|
|
[[maybe_unused]] std::span<BufferCopy> copies) {
|
|
|
|
if constexpr (USE_MEMORY_MAPS) {
|
|
|
|
auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes);
|
|
|
|
const std::span<u8> staging_pointer = upload_staging.mapped_span;
|
|
|
|
for (BufferCopy& copy : copies) {
|
|
|
|
u8* const src_pointer = staging_pointer.data() + copy.src_offset;
|
|
|
|
const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset;
|
|
|
|
cpu_memory.ReadBlockUnsafe(cpu_addr, src_pointer, copy.size);
|
2021-01-16 20:20:18 +01:00
|
|
|
|
2022-11-20 00:09:56 +01:00
|
|
|
// Apply the staging offset
|
|
|
|
copy.src_offset += upload_staging.offset;
|
|
|
|
}
|
|
|
|
runtime.CopyBuffer(buffer, upload_staging.buffer, copies);
|
2020-06-09 23:27:59 +02:00
|
|
|
}
|
2021-01-17 00:48:58 +01:00
|
|
|
}
|
|
|
|
|
2022-01-29 22:00:49 +01:00
|
|
|
template <class P>
|
|
|
|
bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
|
2022-08-14 11:36:36 +02:00
|
|
|
std::span<const u8> inlined_buffer) {
|
2022-01-29 22:00:49 +01:00
|
|
|
const bool is_dirty = IsRegionRegistered(dest_address, copy_size);
|
|
|
|
if (!is_dirty) {
|
|
|
|
return false;
|
|
|
|
}
|
2023-04-22 13:36:18 +02:00
|
|
|
VAddr aligned_start = Common::AlignDown(dest_address, YUZU_PAGESIZE);
|
|
|
|
VAddr aligned_end = Common::AlignUp(dest_address + copy_size, YUZU_PAGESIZE);
|
|
|
|
if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) {
|
2022-01-29 22:00:49 +01:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
const IntervalType subtract_interval{dest_address, dest_address + copy_size};
|
|
|
|
ClearDownload(subtract_interval);
|
2022-01-31 23:41:29 +01:00
|
|
|
common_ranges.subtract(subtract_interval);
|
2022-01-29 22:00:49 +01:00
|
|
|
|
|
|
|
BufferId buffer_id = FindBuffer(dest_address, static_cast<u32>(copy_size));
|
|
|
|
auto& buffer = slot_buffers[buffer_id];
|
|
|
|
SynchronizeBuffer(buffer, dest_address, static_cast<u32>(copy_size));
|
|
|
|
|
|
|
|
if constexpr (USE_MEMORY_MAPS) {
|
2022-11-24 14:02:58 +01:00
|
|
|
auto upload_staging = runtime.UploadStagingBuffer(copy_size);
|
2022-01-29 22:00:49 +01:00
|
|
|
std::array copies{BufferCopy{
|
2022-11-24 14:02:58 +01:00
|
|
|
.src_offset = upload_staging.offset,
|
2022-01-29 22:00:49 +01:00
|
|
|
.dst_offset = buffer.Offset(dest_address),
|
|
|
|
.size = copy_size,
|
|
|
|
}};
|
|
|
|
u8* const src_pointer = upload_staging.mapped_span.data();
|
|
|
|
std::memcpy(src_pointer, inlined_buffer.data(), copy_size);
|
|
|
|
runtime.CopyBuffer(buffer, upload_staging.buffer, copies);
|
|
|
|
} else {
|
2022-01-31 23:41:29 +01:00
|
|
|
buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size));
|
2022-01-29 22:00:49 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2021-01-20 01:59:53 +01:00
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::DownloadBufferMemory(Buffer& buffer) {
|
|
|
|
DownloadBufferMemory(buffer, buffer.CpuAddr(), buffer.SizeBytes());
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 size) {
|
|
|
|
boost::container::small_vector<BufferCopy, 1> copies;
|
|
|
|
u64 total_size_bytes = 0;
|
|
|
|
u64 largest_copy = 0;
|
2022-11-20 00:09:56 +01:00
|
|
|
memory_tracker.ForEachDownloadRangeAndClear(
|
|
|
|
cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) {
|
|
|
|
const VAddr buffer_addr = buffer.CpuAddr();
|
|
|
|
const auto add_download = [&](VAddr start, VAddr end) {
|
|
|
|
const u64 new_offset = start - buffer_addr;
|
|
|
|
const u64 new_size = end - start;
|
|
|
|
copies.push_back(BufferCopy{
|
|
|
|
.src_offset = new_offset,
|
|
|
|
.dst_offset = total_size_bytes,
|
|
|
|
.size = new_size,
|
|
|
|
});
|
|
|
|
// Align up to avoid cache conflicts
|
2023-04-22 20:10:40 +02:00
|
|
|
constexpr u64 align = 64ULL;
|
2022-11-20 00:09:56 +01:00
|
|
|
constexpr u64 mask = ~(align - 1ULL);
|
|
|
|
total_size_bytes += (new_size + align - 1) & mask;
|
|
|
|
largest_copy = std::max(largest_copy, new_size);
|
|
|
|
};
|
|
|
|
|
|
|
|
const VAddr start_address = cpu_addr_out;
|
|
|
|
const VAddr end_address = start_address + range_size;
|
|
|
|
ForEachInRangeSet(common_ranges, start_address, range_size, add_download);
|
|
|
|
const IntervalType subtract_interval{start_address, end_address};
|
|
|
|
ClearDownload(subtract_interval);
|
|
|
|
common_ranges.subtract(subtract_interval);
|
|
|
|
});
|
2021-01-20 01:59:53 +01:00
|
|
|
if (total_size_bytes == 0) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
MICROPROFILE_SCOPE(GPU_DownloadMemory);
|
|
|
|
|
|
|
|
if constexpr (USE_MEMORY_MAPS) {
|
|
|
|
auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
|
|
|
|
const u8* const mapped_memory = download_staging.mapped_span.data();
|
|
|
|
const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size());
|
|
|
|
for (BufferCopy& copy : copies) {
|
|
|
|
// Modify copies to have the staging offset in mind
|
|
|
|
copy.dst_offset += download_staging.offset;
|
|
|
|
}
|
|
|
|
runtime.CopyBuffer(download_staging.buffer, buffer, copies_span);
|
|
|
|
runtime.Finish();
|
|
|
|
for (const BufferCopy& copy : copies) {
|
|
|
|
const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
|
|
|
|
// Undo the modified offset
|
|
|
|
const u64 dst_offset = copy.dst_offset - download_staging.offset;
|
|
|
|
const u8* copy_mapped_memory = mapped_memory + dst_offset;
|
|
|
|
cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
|
|
|
|
for (const BufferCopy& copy : copies) {
|
|
|
|
buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size));
|
|
|
|
const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
|
|
|
|
cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-17 00:48:58 +01:00
|
|
|
template <class P>
|
2022-11-20 00:09:56 +01:00
|
|
|
void BufferCache<P>::DeleteBuffer(BufferId buffer_id, bool do_not_mark) {
|
2021-01-17 00:48:58 +01:00
|
|
|
const auto scalar_replace = [buffer_id](Binding& binding) {
|
|
|
|
if (binding.buffer_id == buffer_id) {
|
|
|
|
binding.buffer_id = BufferId{};
|
|
|
|
}
|
|
|
|
};
|
|
|
|
const auto replace = [scalar_replace](std::span<Binding> bindings) {
|
|
|
|
std::ranges::for_each(bindings, scalar_replace);
|
|
|
|
};
|
|
|
|
scalar_replace(index_buffer);
|
|
|
|
replace(vertex_buffers);
|
|
|
|
std::ranges::for_each(uniform_buffers, replace);
|
|
|
|
std::ranges::for_each(storage_buffers, replace);
|
|
|
|
replace(transform_feedback_buffers);
|
|
|
|
replace(compute_uniform_buffers);
|
|
|
|
replace(compute_storage_buffers);
|
|
|
|
|
|
|
|
// Mark the whole buffer as CPU written to stop tracking CPU writes
|
2022-11-20 00:09:56 +01:00
|
|
|
if (!do_not_mark) {
|
|
|
|
Buffer& buffer = slot_buffers[buffer_id];
|
|
|
|
memory_tracker.MarkRegionAsCpuModified(buffer.CpuAddr(), buffer.SizeBytes());
|
|
|
|
}
|
2021-01-17 00:48:58 +01:00
|
|
|
|
|
|
|
Unregister(buffer_id);
|
|
|
|
delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id]));
|
2021-01-20 01:59:53 +01:00
|
|
|
slot_buffers.erase(buffer_id);
|
2021-01-17 00:48:58 +01:00
|
|
|
|
|
|
|
NotifyBufferDeletion();
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
|
|
|
void BufferCache<P>::NotifyBufferDeletion() {
|
|
|
|
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
|
|
|
|
dirty_uniform_buffers.fill(~u32{0});
|
2021-06-18 08:22:00 +02:00
|
|
|
uniform_buffer_binding_sizes.fill({});
|
2020-02-18 22:20:39 +01:00
|
|
|
}
|
2021-11-05 15:52:31 +01:00
|
|
|
auto& flags = maxwell3d->dirty.flags;
|
2021-01-17 00:48:58 +01:00
|
|
|
flags[Dirty::IndexBuffer] = true;
|
|
|
|
flags[Dirty::VertexBuffers] = true;
|
|
|
|
for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
|
|
|
|
flags[Dirty::VertexBuffer0 + index] = true;
|
|
|
|
}
|
|
|
|
has_deleted_buffers = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
2022-03-06 14:52:35 +01:00
|
|
|
typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr,
|
2023-02-25 22:24:21 +01:00
|
|
|
u32 cbuf_index,
|
2022-03-06 14:52:35 +01:00
|
|
|
bool is_written) const {
|
2021-11-05 15:52:31 +01:00
|
|
|
const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr);
|
2023-02-25 22:24:21 +01:00
|
|
|
const auto size = [&]() {
|
|
|
|
const bool is_nvn_cbuf = cbuf_index == 0;
|
|
|
|
// The NVN driver buffer (index 0) is known to pack the SSBO address followed by its size.
|
|
|
|
if (is_nvn_cbuf) {
|
|
|
|
return gpu_memory->Read<u32>(ssbo_addr + 8);
|
|
|
|
}
|
|
|
|
// Other titles (notably Doom Eternal) may use STG/LDG on buffer addresses in custom defined
|
|
|
|
// cbufs, which do not store the sizes adjacent to the addresses, so use the fully
|
|
|
|
// mapped buffer size for now.
|
|
|
|
const u32 memory_layout_size = static_cast<u32>(gpu_memory->GetMemoryLayoutSize(gpu_addr));
|
|
|
|
LOG_INFO(HW_GPU, "Binding storage buffer for cbuf index {}, MemoryLayoutSize 0x{:X}",
|
|
|
|
cbuf_index, memory_layout_size);
|
|
|
|
return memory_layout_size;
|
|
|
|
}();
|
2023-01-07 21:48:50 +01:00
|
|
|
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
|
2021-01-17 00:48:58 +01:00
|
|
|
if (!cpu_addr || size == 0) {
|
2023-02-25 22:24:21 +01:00
|
|
|
LOG_WARNING(HW_GPU, "Failed to find storage buffer for cbuf index {}", cbuf_index);
|
2021-01-17 00:48:58 +01:00
|
|
|
return NULL_BINDING;
|
|
|
|
}
|
2022-11-20 00:09:56 +01:00
|
|
|
const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, YUZU_PAGESIZE);
|
2021-01-17 00:48:58 +01:00
|
|
|
const Binding binding{
|
|
|
|
.cpu_addr = *cpu_addr,
|
2023-01-07 21:48:50 +01:00
|
|
|
.size = is_written ? size : static_cast<u32>(cpu_end - *cpu_addr),
|
2021-01-17 00:48:58 +01:00
|
|
|
.buffer_id = BufferId{},
|
|
|
|
};
|
|
|
|
return binding;
|
|
|
|
}
|
|
|
|
|
2021-04-07 01:14:55 +02:00
|
|
|
template <class P>
|
|
|
|
typename BufferCache<P>::TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(
|
|
|
|
GPUVAddr gpu_addr, u32 size, PixelFormat format) {
|
2021-11-05 15:52:31 +01:00
|
|
|
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
|
2021-04-07 01:14:55 +02:00
|
|
|
TextureBufferBinding binding;
|
|
|
|
if (!cpu_addr || size == 0) {
|
|
|
|
binding.cpu_addr = 0;
|
|
|
|
binding.size = 0;
|
|
|
|
binding.buffer_id = NULL_BUFFER_ID;
|
|
|
|
binding.format = PixelFormat::Invalid;
|
|
|
|
} else {
|
|
|
|
binding.cpu_addr = *cpu_addr;
|
|
|
|
binding.size = size;
|
|
|
|
binding.buffer_id = BufferId{};
|
|
|
|
binding.format = format;
|
|
|
|
}
|
|
|
|
return binding;
|
|
|
|
}
|
|
|
|
|
2021-01-17 00:48:58 +01:00
|
|
|
template <class P>
|
|
|
|
std::span<const u8> BufferCache<P>::ImmediateBufferWithData(VAddr cpu_addr, size_t size) {
|
|
|
|
u8* const base_pointer = cpu_memory.GetPointer(cpu_addr);
|
|
|
|
if (IsRangeGranular(cpu_addr, size) ||
|
|
|
|
base_pointer + size == cpu_memory.GetPointer(cpu_addr + size)) {
|
|
|
|
return std::span(base_pointer, size);
|
|
|
|
} else {
|
|
|
|
const std::span<u8> span = ImmediateBuffer(size);
|
|
|
|
cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size);
|
|
|
|
return span;
|
|
|
|
}
|
|
|
|
}
|
2020-02-18 22:20:39 +01:00
|
|
|
|
2021-01-17 00:48:58 +01:00
|
|
|
template <class P>
|
|
|
|
std::span<u8> BufferCache<P>::ImmediateBuffer(size_t wanted_capacity) {
|
2022-12-20 04:40:50 +01:00
|
|
|
immediate_buffer_alloc.resize_destructive(wanted_capacity);
|
2022-12-16 06:20:24 +01:00
|
|
|
return std::span<u8>(immediate_buffer_alloc.data(), wanted_capacity);
|
2021-01-17 00:48:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
|
|
|
bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept {
|
|
|
|
if constexpr (IS_OPENGL) {
|
|
|
|
return ((fast_bound_uniform_buffers[stage] >> binding_index) & 1) != 0;
|
|
|
|
} else {
|
|
|
|
// Only OpenGL has fast uniform buffers
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
2019-07-19 16:50:40 +02:00
|
|
|
|
2022-02-09 15:39:40 +01:00
|
|
|
template <class P>
|
|
|
|
std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectCount() {
|
|
|
|
auto& buffer = slot_buffers[count_buffer_binding.buffer_id];
|
|
|
|
return std::make_pair(&buffer, buffer.Offset(count_buffer_binding.cpu_addr));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class P>
|
|
|
|
std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectBuffer() {
|
|
|
|
auto& buffer = slot_buffers[indirect_buffer_binding.buffer_id];
|
|
|
|
return std::make_pair(&buffer, buffer.Offset(indirect_buffer_binding.cpu_addr));
|
|
|
|
}
|
|
|
|
|
2019-07-19 16:50:40 +02:00
|
|
|
} // namespace VideoCommon
|