MemoryTracking: Initial setup of atomic writes.
This commit is contained in:
parent
b60b70e86d
commit
47d0d292d5
8 changed files with 183 additions and 14 deletions
|
@ -27,6 +27,7 @@
|
||||||
#include "core/file_sys/savedata_factory.h"
|
#include "core/file_sys/savedata_factory.h"
|
||||||
#include "core/file_sys/vfs_concat.h"
|
#include "core/file_sys/vfs_concat.h"
|
||||||
#include "core/file_sys/vfs_real.h"
|
#include "core/file_sys/vfs_real.h"
|
||||||
|
#include "core/gpu_dirty_memory_manager.h"
|
||||||
#include "core/hid/hid_core.h"
|
#include "core/hid/hid_core.h"
|
||||||
#include "core/hle/kernel/k_memory_manager.h"
|
#include "core/hle/kernel/k_memory_manager.h"
|
||||||
#include "core/hle/kernel/k_process.h"
|
#include "core/hle/kernel/k_process.h"
|
||||||
|
@ -54,6 +55,7 @@
|
||||||
#include "video_core/renderer_base.h"
|
#include "video_core/renderer_base.h"
|
||||||
#include "video_core/video_core.h"
|
#include "video_core/video_core.h"
|
||||||
|
|
||||||
|
|
||||||
MICROPROFILE_DEFINE(ARM_CPU0, "ARM", "CPU 0", MP_RGB(255, 64, 64));
|
MICROPROFILE_DEFINE(ARM_CPU0, "ARM", "CPU 0", MP_RGB(255, 64, 64));
|
||||||
MICROPROFILE_DEFINE(ARM_CPU1, "ARM", "CPU 1", MP_RGB(255, 64, 64));
|
MICROPROFILE_DEFINE(ARM_CPU1, "ARM", "CPU 1", MP_RGB(255, 64, 64));
|
||||||
MICROPROFILE_DEFINE(ARM_CPU2, "ARM", "CPU 2", MP_RGB(255, 64, 64));
|
MICROPROFILE_DEFINE(ARM_CPU2, "ARM", "CPU 2", MP_RGB(255, 64, 64));
|
||||||
|
@ -540,6 +542,9 @@ struct System::Impl {
|
||||||
|
|
||||||
std::array<u64, Core::Hardware::NUM_CPU_CORES> dynarmic_ticks{};
|
std::array<u64, Core::Hardware::NUM_CPU_CORES> dynarmic_ticks{};
|
||||||
std::array<MicroProfileToken, Core::Hardware::NUM_CPU_CORES> microprofile_cpu{};
|
std::array<MicroProfileToken, Core::Hardware::NUM_CPU_CORES> microprofile_cpu{};
|
||||||
|
|
||||||
|
std::array<Core::GPUDirtyMemoryManager, Core::Hardware::NUM_CPU_CORES>
|
||||||
|
gpu_dirty_memory_write_manager{};
|
||||||
};
|
};
|
||||||
|
|
||||||
System::System() : impl{std::make_unique<Impl>(*this)} {}
|
System::System() : impl{std::make_unique<Impl>(*this)} {}
|
||||||
|
@ -629,10 +634,31 @@ void System::PrepareReschedule(const u32 core_index) {
|
||||||
impl->kernel.PrepareReschedule(core_index);
|
impl->kernel.PrepareReschedule(core_index);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Core::GPUDirtyMemoryManager& System::CurrentGPUDirtyMemoryManager() {
|
||||||
|
const std::size_t core = impl->kernel.GetCurrentHostThreadID();
|
||||||
|
return impl->gpu_dirty_memory_write_manager[core < Core::Hardware::NUM_CPU_CORES
|
||||||
|
? core
|
||||||
|
: Core::Hardware::NUM_CPU_CORES - 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Provides a constant reference to the current gou dirty memory manager.
|
||||||
|
const Core::GPUDirtyMemoryManager& System::CurrentGPUDirtyMemoryManager() const {
|
||||||
|
const std::size_t core = impl->kernel.GetCurrentHostThreadID();
|
||||||
|
return impl->gpu_dirty_memory_write_manager[core < Core::Hardware::NUM_CPU_CORES
|
||||||
|
? core
|
||||||
|
: Core::Hardware::NUM_CPU_CORES - 1];
|
||||||
|
}
|
||||||
|
|
||||||
size_t System::GetCurrentHostThreadID() const {
|
size_t System::GetCurrentHostThreadID() const {
|
||||||
return impl->kernel.GetCurrentHostThreadID();
|
return impl->kernel.GetCurrentHostThreadID();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void System::GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback) {
|
||||||
|
for (auto& manager : impl->gpu_dirty_memory_write_manager) {
|
||||||
|
manager.Gather(callback);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
PerfStatsResults System::GetAndResetPerfStats() {
|
PerfStatsResults System::GetAndResetPerfStats() {
|
||||||
return impl->GetAndResetPerfStats();
|
return impl->GetAndResetPerfStats();
|
||||||
}
|
}
|
||||||
|
|
|
@ -108,9 +108,10 @@ class CpuManager;
|
||||||
class Debugger;
|
class Debugger;
|
||||||
class DeviceMemory;
|
class DeviceMemory;
|
||||||
class ExclusiveMonitor;
|
class ExclusiveMonitor;
|
||||||
class SpeedLimiter;
|
class GPUDirtyMemoryManager;
|
||||||
class PerfStats;
|
class PerfStats;
|
||||||
class Reporter;
|
class Reporter;
|
||||||
|
class SpeedLimiter;
|
||||||
class TelemetrySession;
|
class TelemetrySession;
|
||||||
|
|
||||||
struct PerfStatsResults;
|
struct PerfStatsResults;
|
||||||
|
@ -225,6 +226,14 @@ public:
|
||||||
/// Prepare the core emulation for a reschedule
|
/// Prepare the core emulation for a reschedule
|
||||||
void PrepareReschedule(u32 core_index);
|
void PrepareReschedule(u32 core_index);
|
||||||
|
|
||||||
|
/// Provides a reference to the gou dirty memory manager.
|
||||||
|
[[nodiscard]] Core::GPUDirtyMemoryManager& CurrentGPUDirtyMemoryManager();
|
||||||
|
|
||||||
|
/// Provides a constant reference to the current gou dirty memory manager.
|
||||||
|
[[nodiscard]] const Core::GPUDirtyMemoryManager& CurrentGPUDirtyMemoryManager() const;
|
||||||
|
|
||||||
|
void GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback);
|
||||||
|
|
||||||
[[nodiscard]] size_t GetCurrentHostThreadID() const;
|
[[nodiscard]] size_t GetCurrentHostThreadID() const;
|
||||||
|
|
||||||
/// Gets and resets core performance statistics
|
/// Gets and resets core performance statistics
|
||||||
|
|
112
src/core/gpu_dirty_memory_manager.h
Normal file
112
src/core/gpu_dirty_memory_manager.h
Normal file
|
@ -0,0 +1,112 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <atomic>
|
||||||
|
#include <bit>
|
||||||
|
#include <functional>
|
||||||
|
#include <mutex>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "core/memory.h"
|
||||||
|
|
||||||
|
namespace Core {
|
||||||
|
|
||||||
|
class GPUDirtyMemoryManager {
|
||||||
|
public:
|
||||||
|
GPUDirtyMemoryManager() : current{default_transform} {}
|
||||||
|
|
||||||
|
~GPUDirtyMemoryManager() = default;
|
||||||
|
|
||||||
|
void Collect(VAddr address, size_t size) {
|
||||||
|
TransformAddress t = BuildTransform(address, size);
|
||||||
|
TransformAddress tmp, original;
|
||||||
|
do {
|
||||||
|
tmp = current.load(std::memory_order_acquire);
|
||||||
|
original = tmp;
|
||||||
|
if (tmp.address != t.address) {
|
||||||
|
if (IsValid(tmp.address)) {
|
||||||
|
std::scoped_lock lk(guard);
|
||||||
|
back_buffer.emplace_back(tmp);
|
||||||
|
current.exchange(t, std::memory_order_relaxed);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
tmp.address = t.address;
|
||||||
|
tmp.mask = 0;
|
||||||
|
}
|
||||||
|
if ((tmp.mask | t.mask) == tmp.mask) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
tmp.mask |= t.mask;
|
||||||
|
} while (!current.compare_exchange_weak(original, tmp, std::memory_order_release,
|
||||||
|
std::memory_order_relaxed));
|
||||||
|
}
|
||||||
|
|
||||||
|
void Gather(std::function<void(VAddr, size_t)>& callback) {
|
||||||
|
{
|
||||||
|
std::scoped_lock lk(guard);
|
||||||
|
TransformAddress t = current.exchange(default_transform, std::memory_order_relaxed);
|
||||||
|
front_buffer.swap(back_buffer);
|
||||||
|
if (IsValid(t.address)) {
|
||||||
|
front_buffer.emplace_back(t);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (auto& transform : front_buffer) {
|
||||||
|
size_t offset = 0;
|
||||||
|
u64 mask = transform.mask;
|
||||||
|
while (mask != 0) {
|
||||||
|
const size_t empty_bits = std::countr_zero(mask);
|
||||||
|
offset += empty_bits << align_bits;
|
||||||
|
mask = mask >> empty_bits;
|
||||||
|
|
||||||
|
const size_t continuous_bits = std::countr_one(mask);
|
||||||
|
callback((transform.address << Memory::YUZU_PAGEBITS) + offset,
|
||||||
|
continuous_bits << align_bits);
|
||||||
|
mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0;
|
||||||
|
offset += continuous_bits << align_bits;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
front_buffer.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
struct alignas(16) TransformAddress {
|
||||||
|
VAddr address;
|
||||||
|
u64 mask;
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr static size_t align_bits = 6U;
|
||||||
|
constexpr static size_t align_size = 1U << align_bits;
|
||||||
|
constexpr static size_t align_mask = align_size - 1;
|
||||||
|
constexpr static TransformAddress default_transform = {.address = ~0ULL, .mask = 0ULL};
|
||||||
|
|
||||||
|
bool IsValid(VAddr address) {
|
||||||
|
return address < (1ULL << 39);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
T CreateMask(size_t top_bit, size_t minor_bit) {
|
||||||
|
T mask = ~T(0);
|
||||||
|
mask <<= (sizeof(T) * 8 - top_bit);
|
||||||
|
mask >>= (sizeof(T) * 8 - top_bit);
|
||||||
|
mask >>= minor_bit;
|
||||||
|
mask <<= minor_bit;
|
||||||
|
return mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
TransformAddress BuildTransform(VAddr address, size_t size) {
|
||||||
|
const size_t minor_address = address & Memory::YUZU_PAGEMASK;
|
||||||
|
const size_t minor_bit = minor_address >> align_bits;
|
||||||
|
const size_t top_bit = (minor_address + size + align_mask) >> align_bits;
|
||||||
|
TransformAddress result{};
|
||||||
|
result.address = address >> Memory::YUZU_PAGEBITS;
|
||||||
|
result.mask = CreateMask<u64>(top_bit, minor_bit);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::atomic<TransformAddress> current{};
|
||||||
|
std::mutex guard;
|
||||||
|
std::vector<TransformAddress> back_buffer;
|
||||||
|
std::vector<TransformAddress> front_buffer;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Core
|
|
@ -13,6 +13,7 @@
|
||||||
#include "common/swap.h"
|
#include "common/swap.h"
|
||||||
#include "core/core.h"
|
#include "core/core.h"
|
||||||
#include "core/device_memory.h"
|
#include "core/device_memory.h"
|
||||||
|
#include "core/gpu_dirty_memory_manager.h"
|
||||||
#include "core/hardware_properties.h"
|
#include "core/hardware_properties.h"
|
||||||
#include "core/hle/kernel/k_page_table.h"
|
#include "core/hle/kernel/k_page_table.h"
|
||||||
#include "core/hle/kernel/k_process.h"
|
#include "core/hle/kernel/k_process.h"
|
||||||
|
@ -678,7 +679,7 @@ struct Memory::Impl {
|
||||||
LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8,
|
LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8,
|
||||||
GetInteger(vaddr), static_cast<u64>(data));
|
GetInteger(vaddr), static_cast<u64>(data));
|
||||||
},
|
},
|
||||||
[&]() { system.GPU().InvalidateRegion(GetInteger(vaddr), sizeof(T)); });
|
[&]() { system.CurrentGPUDirtyMemoryManager().Collect(GetInteger(vaddr), sizeof(T)); });
|
||||||
if (ptr) {
|
if (ptr) {
|
||||||
std::memcpy(ptr, &data, sizeof(T));
|
std::memcpy(ptr, &data, sizeof(T));
|
||||||
}
|
}
|
||||||
|
@ -692,7 +693,7 @@ struct Memory::Impl {
|
||||||
LOG_ERROR(HW_Memory, "Unmapped WriteExclusive{} @ 0x{:016X} = 0x{:016X}",
|
LOG_ERROR(HW_Memory, "Unmapped WriteExclusive{} @ 0x{:016X} = 0x{:016X}",
|
||||||
sizeof(T) * 8, GetInteger(vaddr), static_cast<u64>(data));
|
sizeof(T) * 8, GetInteger(vaddr), static_cast<u64>(data));
|
||||||
},
|
},
|
||||||
[&]() { system.GPU().InvalidateRegion(GetInteger(vaddr), sizeof(T)); });
|
[&]() { system.CurrentGPUDirtyMemoryManager().Collect(GetInteger(vaddr), sizeof(T)); });
|
||||||
if (ptr) {
|
if (ptr) {
|
||||||
const auto volatile_pointer = reinterpret_cast<volatile T*>(ptr);
|
const auto volatile_pointer = reinterpret_cast<volatile T*>(ptr);
|
||||||
return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
|
return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
|
||||||
|
@ -707,7 +708,7 @@ struct Memory::Impl {
|
||||||
LOG_ERROR(HW_Memory, "Unmapped WriteExclusive128 @ 0x{:016X} = 0x{:016X}{:016X}",
|
LOG_ERROR(HW_Memory, "Unmapped WriteExclusive128 @ 0x{:016X} = 0x{:016X}{:016X}",
|
||||||
GetInteger(vaddr), static_cast<u64>(data[1]), static_cast<u64>(data[0]));
|
GetInteger(vaddr), static_cast<u64>(data[1]), static_cast<u64>(data[0]));
|
||||||
},
|
},
|
||||||
[&]() { system.GPU().InvalidateRegion(GetInteger(vaddr), sizeof(u128)); });
|
[&]() { system.CurrentGPUDirtyMemoryManager().Collect(GetInteger(vaddr), sizeof(u128)); });
|
||||||
if (ptr) {
|
if (ptr) {
|
||||||
const auto volatile_pointer = reinterpret_cast<volatile u64*>(ptr);
|
const auto volatile_pointer = reinterpret_cast<volatile u64*>(ptr);
|
||||||
return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
|
return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
|
||||||
|
|
|
@ -115,7 +115,21 @@ void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) {
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) {
|
void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) {
|
||||||
memory_tracker.CachedCpuWrite(cpu_addr, size);
|
const bool is_dirty = IsRegionRegistered(cpu_addr, size);
|
||||||
|
if (!is_dirty) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
VAddr aligned_start = Common::AlignDown(cpu_addr, YUZU_PAGESIZE);
|
||||||
|
VAddr aligned_end = Common::AlignUp(cpu_addr + size, YUZU_PAGESIZE);
|
||||||
|
if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) {
|
||||||
|
WriteMemory(cpu_addr, size);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
tmp_buffer.resize_destructive(size);
|
||||||
|
cpu_memory.ReadBlockUnsafe(cpu_addr, tmp_buffer.data(), size);
|
||||||
|
|
||||||
|
InlineMemoryImplementation(cpu_addr, size, tmp_buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
|
@ -1553,6 +1567,14 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
InlineMemoryImplementation(dest_address, copy_size, inlined_buffer);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class P>
|
||||||
|
void BufferCache<P>::InlineMemoryImplementation(VAddr dest_address, size_t copy_size,
|
||||||
|
std::span<const u8> inlined_buffer) {
|
||||||
const IntervalType subtract_interval{dest_address, dest_address + copy_size};
|
const IntervalType subtract_interval{dest_address, dest_address + copy_size};
|
||||||
ClearDownload(subtract_interval);
|
ClearDownload(subtract_interval);
|
||||||
common_ranges.subtract(subtract_interval);
|
common_ranges.subtract(subtract_interval);
|
||||||
|
@ -1574,8 +1596,6 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
|
||||||
} else {
|
} else {
|
||||||
buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size));
|
buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size));
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
|
|
|
@ -543,6 +543,8 @@ private:
|
||||||
|
|
||||||
void ClearDownload(IntervalType subtract_interval);
|
void ClearDownload(IntervalType subtract_interval);
|
||||||
|
|
||||||
|
void InlineMemoryImplementation(VAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer);
|
||||||
|
|
||||||
VideoCore::RasterizerInterface& rasterizer;
|
VideoCore::RasterizerInterface& rasterizer;
|
||||||
Core::Memory::Memory& cpu_memory;
|
Core::Memory::Memory& cpu_memory;
|
||||||
|
|
||||||
|
|
|
@ -95,7 +95,9 @@ struct GPU::Impl {
|
||||||
|
|
||||||
/// Synchronizes CPU writes with Host GPU memory.
|
/// Synchronizes CPU writes with Host GPU memory.
|
||||||
void InvalidateGPUCache() {
|
void InvalidateGPUCache() {
|
||||||
rasterizer->InvalidateGPUCache();
|
std::function<void(VAddr, size_t)> callback_writes(
|
||||||
|
[this](VAddr address, size_t size) { rasterizer->OnCPUWrite(address, size); });
|
||||||
|
system.GatherGPUDirtyMemory(callback_writes);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Signal the ending of command list.
|
/// Signal the ending of command list.
|
||||||
|
|
|
@ -570,7 +570,7 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
|
||||||
if (addr == 0 || size == 0) {
|
if (addr == 0 || size == 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
pipeline_cache.OnCPUWrite(addr, size);
|
|
||||||
{
|
{
|
||||||
std::scoped_lock lock{texture_cache.mutex};
|
std::scoped_lock lock{texture_cache.mutex};
|
||||||
texture_cache.WriteMemory(addr, size);
|
texture_cache.WriteMemory(addr, size);
|
||||||
|
@ -579,14 +579,11 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
|
||||||
std::scoped_lock lock{buffer_cache.mutex};
|
std::scoped_lock lock{buffer_cache.mutex};
|
||||||
buffer_cache.CachedWriteMemory(addr, size);
|
buffer_cache.CachedWriteMemory(addr, size);
|
||||||
}
|
}
|
||||||
|
pipeline_cache.InvalidateRegion(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::InvalidateGPUCache() {
|
void RasterizerVulkan::InvalidateGPUCache() {
|
||||||
pipeline_cache.SyncGuestHost();
|
gpu.InvalidateGPUCache();
|
||||||
{
|
|
||||||
std::scoped_lock lock{buffer_cache.mutex};
|
|
||||||
buffer_cache.FlushCachedWrites();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {
|
void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {
|
||||||
|
|
Loading…
Reference in a new issue