SMMU: Add Android compatibility

This commit is contained in:
Fernando Sahmkow 2023-12-30 04:37:25 +01:00 committed by Liam
parent 0adc09e0af
commit 303cd31162
9 changed files with 42 additions and 50 deletions

View file

@ -217,9 +217,6 @@ DeviceMemoryManager<Traits>::DeviceMemoryManager(const DeviceMemory& device_memo
cpu_backing_address(device_as_size >> Memory::YUZU_PAGEBITS) {
impl = std::make_unique<DeviceMemoryManagerAllocator<Traits>>();
cached_pages = std::make_unique<CachedPages>();
for (size_t i = 0; i < 1ULL << (33 - 12); i++) {
compressed_device_addr[i] = 0;
}
}
template <typename Traits>
@ -517,7 +514,7 @@ void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size
u64 cache_begin = 0;
u64 uncache_bytes = 0;
u64 cache_bytes = 0;
const auto* MarkRegionCaching = &DeviceMemoryManager<Traits>::DeviceMethods::MarkRegionCaching;
const auto MarkRegionCaching = &DeviceMemoryManager<Traits>::DeviceMethods::MarkRegionCaching;
std::atomic_thread_fence(std::memory_order_acquire);
const size_t page_end = Common::DivCeil(addr + size, Memory::YUZU_PAGESIZE);
@ -577,4 +574,4 @@ void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size
}
}
} // namespace Core
} // namespace Core

View file

@ -8,6 +8,7 @@
#include "common/common_types.h"
#include "common/logging/log.h"
#include "core/core.h"
#include "core/hle/kernel/k_process.h"
#include "core/hle/service/nvdrv/core/container.h"
#include "core/hle/service/nvdrv/core/nvmap.h"
#include "core/hle/service/nvdrv/core/syncpoint_manager.h"
@ -109,7 +110,7 @@ NvResult nvhost_nvdec_common::Submit(IoctlSubmit& params, std::span<u8> data, De
ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;);
Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count);
session->process->GetMemory().ReadBlock(object->address + cmd_buffer.offset, cmdlist.data(),
cmdlist.size() * sizeof(u32));
cmdlist.size() * sizeof(u32));
gpu.PushCommandBuffer(core.Host1xDeviceFile().fd_to_id[fd], cmdlist);
}
// Some games expect command_buffers to be written back
@ -135,7 +136,8 @@ NvResult nvhost_nvdec_common::GetWaitbase(IoctlGetWaitbase& params) {
return NvResult::Success;
}
NvResult nvhost_nvdec_common::MapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries, DeviceFD fd) {
NvResult nvhost_nvdec_common::MapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries,
DeviceFD fd) {
const size_t num_entries = std::min(params.num_entries, static_cast<u32>(entries.size()));
for (size_t i = 0; i < num_entries; i++) {
DAddr pin_address = nvmap.PinHandle(entries[i].map_handle, sessions[fd], true);

View file

@ -44,8 +44,7 @@ bool AddressSpaceContains(const Common::PageTable& table, const Common::ProcessA
// from outside classes. This also allows modification to the internals of the memory
// subsystem without needing to rebuild all files that make use of the memory interface.
struct Memory::Impl {
explicit Impl(Core::System& system_)
: system{system_} {}
explicit Impl(Core::System& system_) : system{system_} {}
void SetCurrentPageTable(Kernel::KProcess& process) {
current_page_table = &process.GetPageTable().GetImpl();
@ -640,18 +639,6 @@ struct Memory::Impl {
LOG_DEBUG(HW_Memory, "Mapping {:016X} onto {:016X}-{:016X}", GetInteger(target),
base * YUZU_PAGESIZE, (base + size) * YUZU_PAGESIZE);
// During boot, current_page_table might not be set yet, in which case we need not flush
/*if (system.IsPoweredOn()) {
auto& gpu = system.GPU();
for (u64 i = 0; i < size; i++) {
const auto page = base + i;
if (page_table.pointers[page].Type() == Common::PageType::RasterizerCachedMemory) {
gpu.FlushAndInvalidateRegion(page << YUZU_PAGEBITS, YUZU_PAGESIZE);
}
}
}*/
const auto end = base + size;
ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
base + page_table.pointers.size());
@ -823,8 +810,7 @@ struct Memory::Impl {
}
const size_t core = system.GetCurrentHostThreadID();
auto& current_area = rasterizer_read_areas[core];
gpu_device_memory->ApplyOpOnPointer(
p, scratch_buffers[core], [&](DAddr address) {
gpu_device_memory->ApplyOpOnPointer(p, scratch_buffers[core], [&](DAddr address) {
const DAddr end_address = address + size;
if (current_area.start_address <= address && end_address <= current_area.end_address)
[[likely]] {
@ -852,8 +838,7 @@ struct Memory::Impl {
sys_core_guard.unlock();
}
});
gpu_device_memory->ApplyOpOnPointer(
p, scratch_buffers[core], [&](DAddr address) {
gpu_device_memory->ApplyOpOnPointer(p, scratch_buffers[core], [&](DAddr address) {
auto& current_area = rasterizer_write_areas[core];
PAddr subaddress = address >> YUZU_PAGEBITS;
bool do_collection = current_area.last_address == subaddress;
@ -872,12 +857,25 @@ struct Memory::Impl {
PAddr last_address;
};
void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) {
system.GPU().InvalidateRegion(GetInteger(dest_addr), size);
}
void FlushRegion(Common::ProcessAddress dest_addr, size_t size) {
system.GPU().FlushRegion(GetInteger(dest_addr), size);
void InvalidateGPUMemory(u8* p, size_t size) {
constexpr size_t sys_core = Core::Hardware::NUM_CPU_CORES - 1;
const size_t core = std::min(system.GetCurrentHostThreadID(),
sys_core); // any other calls threads go to syscore.
if (!gpu_device_memory) [[unlikely]] {
gpu_device_memory = &system.Host1x().MemoryManager();
}
// Guard on sys_core;
if (core == sys_core) [[unlikely]] {
sys_core_guard.lock();
}
SCOPE_EXIT({
if (core == sys_core) [[unlikely]] {
sys_core_guard.unlock();
}
});
auto& gpu = system.GPU();
gpu_device_memory->ApplyOpOnPointer(
p, scratch_buffers[core], [&](DAddr address) { gpu.InvalidateRegion(address, size); });
}
Core::System& system;
@ -1081,14 +1079,6 @@ void Memory::MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug)
impl->MarkRegionDebug(GetInteger(vaddr), size, debug);
}
void Memory::InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) {
impl->InvalidateRegion(dest_addr, size);
}
void Memory::FlushRegion(Common::ProcessAddress dest_addr, size_t size) {
impl->FlushRegion(dest_addr, size);
}
bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) {
[[maybe_unused]] bool mapped = true;
[[maybe_unused]] bool rasterizer = false;
@ -1100,10 +1090,10 @@ bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) {
GetInteger(vaddr));
mapped = false;
},
[&] {
impl->system.GPU().InvalidateRegion(GetInteger(vaddr), size);
rasterizer = true;
});
[&] { rasterizer = true; });
if (rasterizer) {
impl->InvalidateGPUMemory(ptr, size);
}
#ifdef __linux__
if (!rasterizer && mapped) {

View file

@ -486,10 +486,10 @@ public:
void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug);
void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers);
void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size);
bool InvalidateNCE(Common::ProcessAddress vaddr, size_t size);
bool InvalidateSeparateHeap(void* fault_address);
void FlushRegion(Common::ProcessAddress dest_addr, size_t size);
private:
Core::System& system;

View file

@ -17,7 +17,7 @@ struct MaxwellDeviceTraits {
static constexpr bool supports_pinning = false;
static constexpr size_t device_virtual_bits = 34;
using DeviceInterface = typename VideoCore::RasterizerInterface;
using DeviceMethods = typename MaxwellDeviceMethods;
using DeviceMethods = MaxwellDeviceMethods;
};
using MaxwellDeviceMemoryManager = Core::DeviceMemoryManager<MaxwellDeviceTraits>;

View file

@ -13,6 +13,8 @@ Host1x::Host1x(Core::System& system_)
memory_manager(system.DeviceMemory()), gmmu_manager{system, memory_manager, 32, 12},
allocator{std::make_unique<Common::FlatAllocator<u32, 0, 32>>(1 << 12)} {}
Host1x::~Host1x() = default;
} // namespace Host1x
} // namespace Tegra

View file

@ -21,6 +21,7 @@ namespace Host1x {
class Host1x {
public:
explicit Host1x(Core::System& system);
~Host1x();
SyncpointManager& GetSyncpointManager() {
return syncpoint_manager;

View file

@ -68,7 +68,7 @@ public:
if (!address) {
return {};
}
return memory.GetPointer(*address);
return memory.GetPointer<T>(*address);
}
template <typename T>

View file

@ -256,8 +256,8 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type
return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS,
static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK));
};
u8* pointer = impl->device_memory.GetPointer<u8>(cpu_addr);
u8* pointer_timestamp = impl->device_memory.GetPointer<u8>(cpu_addr + 8);
u8* pointer = impl->device_memory.template GetPointer<u8>(cpu_addr);
u8* pointer_timestamp = impl->device_memory.template GetPointer<u8>(cpu_addr + 8);
bool is_synced = !Settings::IsGPULevelHigh() && is_fence;
std::function<void()> operation([this, is_synced, streamer, query_base = query, query_location,
pointer, pointer_timestamp] {
@ -561,7 +561,7 @@ bool QueryCacheBase<Traits>::SemiFlushQueryDirty(QueryCacheBase<Traits>::QueryLo
}
if (True(query_base->flags & QueryFlagBits::IsFinalValueSynced) &&
False(query_base->flags & QueryFlagBits::IsGuestSynced)) {
auto* ptr = impl->device_memory.GetPointer<u8>(query_base->guest_address);
auto* ptr = impl->device_memory.template GetPointer<u8>(query_base->guest_address);
if (True(query_base->flags & QueryFlagBits::HasTimestamp)) {
std::memcpy(ptr, &query_base->value, sizeof(query_base->value));
return false;