113 lines
3.6 KiB
C++
113 lines
3.6 KiB
C++
|
#pragma once
|
||
|
|
||
|
#include <atomic>
|
||
|
#include <bit>
|
||
|
#include <functional>
|
||
|
#include <mutex>
|
||
|
#include <utility>
|
||
|
#include <vector>
|
||
|
|
||
|
#include "core/memory.h"
|
||
|
|
||
|
namespace Core {
|
||
|
|
||
|
class GPUDirtyMemoryManager {
|
||
|
public:
|
||
|
GPUDirtyMemoryManager() : current{default_transform} {}
|
||
|
|
||
|
~GPUDirtyMemoryManager() = default;
|
||
|
|
||
|
void Collect(VAddr address, size_t size) {
|
||
|
TransformAddress t = BuildTransform(address, size);
|
||
|
TransformAddress tmp, original;
|
||
|
do {
|
||
|
tmp = current.load(std::memory_order_acquire);
|
||
|
original = tmp;
|
||
|
if (tmp.address != t.address) {
|
||
|
if (IsValid(tmp.address)) {
|
||
|
std::scoped_lock lk(guard);
|
||
|
back_buffer.emplace_back(tmp);
|
||
|
current.exchange(t, std::memory_order_relaxed);
|
||
|
return;
|
||
|
}
|
||
|
tmp.address = t.address;
|
||
|
tmp.mask = 0;
|
||
|
}
|
||
|
if ((tmp.mask | t.mask) == tmp.mask) {
|
||
|
return;
|
||
|
}
|
||
|
tmp.mask |= t.mask;
|
||
|
} while (!current.compare_exchange_weak(original, tmp, std::memory_order_release,
|
||
|
std::memory_order_relaxed));
|
||
|
}
|
||
|
|
||
|
void Gather(std::function<void(VAddr, size_t)>& callback) {
|
||
|
{
|
||
|
std::scoped_lock lk(guard);
|
||
|
TransformAddress t = current.exchange(default_transform, std::memory_order_relaxed);
|
||
|
front_buffer.swap(back_buffer);
|
||
|
if (IsValid(t.address)) {
|
||
|
front_buffer.emplace_back(t);
|
||
|
}
|
||
|
}
|
||
|
for (auto& transform : front_buffer) {
|
||
|
size_t offset = 0;
|
||
|
u64 mask = transform.mask;
|
||
|
while (mask != 0) {
|
||
|
const size_t empty_bits = std::countr_zero(mask);
|
||
|
offset += empty_bits << align_bits;
|
||
|
mask = mask >> empty_bits;
|
||
|
|
||
|
const size_t continuous_bits = std::countr_one(mask);
|
||
|
callback((transform.address << Memory::YUZU_PAGEBITS) + offset,
|
||
|
continuous_bits << align_bits);
|
||
|
mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0;
|
||
|
offset += continuous_bits << align_bits;
|
||
|
}
|
||
|
}
|
||
|
front_buffer.clear();
|
||
|
}
|
||
|
|
||
|
private:
|
||
|
struct alignas(16) TransformAddress {
|
||
|
VAddr address;
|
||
|
u64 mask;
|
||
|
};
|
||
|
|
||
|
constexpr static size_t align_bits = 6U;
|
||
|
constexpr static size_t align_size = 1U << align_bits;
|
||
|
constexpr static size_t align_mask = align_size - 1;
|
||
|
constexpr static TransformAddress default_transform = {.address = ~0ULL, .mask = 0ULL};
|
||
|
|
||
|
bool IsValid(VAddr address) {
|
||
|
return address < (1ULL << 39);
|
||
|
}
|
||
|
|
||
|
template <typename T>
|
||
|
T CreateMask(size_t top_bit, size_t minor_bit) {
|
||
|
T mask = ~T(0);
|
||
|
mask <<= (sizeof(T) * 8 - top_bit);
|
||
|
mask >>= (sizeof(T) * 8 - top_bit);
|
||
|
mask >>= minor_bit;
|
||
|
mask <<= minor_bit;
|
||
|
return mask;
|
||
|
}
|
||
|
|
||
|
TransformAddress BuildTransform(VAddr address, size_t size) {
|
||
|
const size_t minor_address = address & Memory::YUZU_PAGEMASK;
|
||
|
const size_t minor_bit = minor_address >> align_bits;
|
||
|
const size_t top_bit = (minor_address + size + align_mask) >> align_bits;
|
||
|
TransformAddress result{};
|
||
|
result.address = address >> Memory::YUZU_PAGEBITS;
|
||
|
result.mask = CreateMask<u64>(top_bit, minor_bit);
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
std::atomic<TransformAddress> current{};
|
||
|
std::mutex guard;
|
||
|
std::vector<TransformAddress> back_buffer;
|
||
|
std::vector<TransformAddress> front_buffer;
|
||
|
};
|
||
|
|
||
|
} // namespace Core
|