mirror of
https://git.suyu.dev/suyu/suyu.git
synced 2025-01-11 02:01:01 +01:00
Merge pull request #9084 from vonchenplus/dma_copy
video_core: implement 1D copies based on VMM 'kind'
This commit is contained in:
commit
b8a70c9999
7 changed files with 415 additions and 73 deletions
|
@ -311,7 +311,8 @@ NvResult nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& out
|
||||||
handle->address +
|
handle->address +
|
||||||
(static_cast<u64>(entry.handle_offset_big_pages) << vm.big_page_size_bits))};
|
(static_cast<u64>(entry.handle_offset_big_pages) << vm.big_page_size_bits))};
|
||||||
|
|
||||||
gmmu->Map(virtual_address, cpu_address, size, use_big_pages);
|
gmmu->Map(virtual_address, cpu_address, size, static_cast<Tegra::PTEKind>(entry.kind),
|
||||||
|
use_big_pages);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -350,7 +351,8 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8
|
||||||
u64 gpu_address{static_cast<u64>(params.offset + params.buffer_offset)};
|
u64 gpu_address{static_cast<u64>(params.offset + params.buffer_offset)};
|
||||||
VAddr cpu_address{mapping->ptr + params.buffer_offset};
|
VAddr cpu_address{mapping->ptr + params.buffer_offset};
|
||||||
|
|
||||||
gmmu->Map(gpu_address, cpu_address, params.mapping_size, mapping->big_page);
|
gmmu->Map(gpu_address, cpu_address, params.mapping_size,
|
||||||
|
static_cast<Tegra::PTEKind>(params.kind), mapping->big_page);
|
||||||
|
|
||||||
return NvResult::Success;
|
return NvResult::Success;
|
||||||
} catch (const std::out_of_range&) {
|
} catch (const std::out_of_range&) {
|
||||||
|
@ -389,7 +391,8 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8
|
||||||
}
|
}
|
||||||
|
|
||||||
const bool use_big_pages = alloc->second.big_pages && big_page;
|
const bool use_big_pages = alloc->second.big_pages && big_page;
|
||||||
gmmu->Map(params.offset, cpu_address, size, use_big_pages);
|
gmmu->Map(params.offset, cpu_address, size, static_cast<Tegra::PTEKind>(params.kind),
|
||||||
|
use_big_pages);
|
||||||
|
|
||||||
auto mapping{std::make_shared<Mapping>(cpu_address, params.offset, size, true,
|
auto mapping{std::make_shared<Mapping>(cpu_address, params.offset, size, true,
|
||||||
use_big_pages, alloc->second.sparse)};
|
use_big_pages, alloc->second.sparse)};
|
||||||
|
@ -409,7 +412,8 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8
|
||||||
return NvResult::InsufficientMemory;
|
return NvResult::InsufficientMemory;
|
||||||
}
|
}
|
||||||
|
|
||||||
gmmu->Map(params.offset, cpu_address, Common::AlignUp(size, page_size), big_page);
|
gmmu->Map(params.offset, cpu_address, Common::AlignUp(size, page_size),
|
||||||
|
static_cast<Tegra::PTEKind>(params.kind), big_page);
|
||||||
|
|
||||||
auto mapping{
|
auto mapping{
|
||||||
std::make_shared<Mapping>(cpu_address, params.offset, size, false, big_page, false)};
|
std::make_shared<Mapping>(cpu_address, params.offset, size, false, big_page, false)};
|
||||||
|
|
|
@ -82,6 +82,7 @@ add_library(video_core STATIC
|
||||||
gpu_thread.h
|
gpu_thread.h
|
||||||
memory_manager.cpp
|
memory_manager.cpp
|
||||||
memory_manager.h
|
memory_manager.h
|
||||||
|
pte_kind.h
|
||||||
query_cache.h
|
query_cache.h
|
||||||
rasterizer_accelerated.cpp
|
rasterizer_accelerated.cpp
|
||||||
rasterizer_accelerated.h
|
rasterizer_accelerated.h
|
||||||
|
|
|
@ -56,6 +56,7 @@ void MaxwellDMA::Launch() {
|
||||||
ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE);
|
ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE);
|
||||||
ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED);
|
ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED);
|
||||||
|
|
||||||
|
if (launch.multi_line_enable) {
|
||||||
const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH;
|
const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH;
|
||||||
const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH;
|
const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH;
|
||||||
|
|
||||||
|
@ -66,57 +67,75 @@ void MaxwellDMA::Launch() {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (is_src_pitch && is_dst_pitch) {
|
if (is_src_pitch && is_dst_pitch) {
|
||||||
CopyPitchToPitch();
|
for (u32 line = 0; line < regs.line_count; ++line) {
|
||||||
|
const GPUVAddr source_line =
|
||||||
|
regs.offset_in + static_cast<size_t>(line) * regs.pitch_in;
|
||||||
|
const GPUVAddr dest_line =
|
||||||
|
regs.offset_out + static_cast<size_t>(line) * regs.pitch_out;
|
||||||
|
memory_manager.CopyBlock(dest_line, source_line, regs.line_length_in);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
ASSERT(launch.multi_line_enable == 1);
|
|
||||||
|
|
||||||
if (!is_src_pitch && is_dst_pitch) {
|
if (!is_src_pitch && is_dst_pitch) {
|
||||||
CopyBlockLinearToPitch();
|
CopyBlockLinearToPitch();
|
||||||
} else {
|
} else {
|
||||||
CopyPitchToBlockLinear();
|
CopyPitchToBlockLinear();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ReleaseSemaphore();
|
} else {
|
||||||
}
|
|
||||||
|
|
||||||
void MaxwellDMA::CopyPitchToPitch() {
|
|
||||||
// When `multi_line_enable` bit is enabled we copy a 2D image of dimensions
|
|
||||||
// (line_length_in, line_count).
|
|
||||||
// Otherwise the copy is performed as if we were copying a 1D buffer of length line_length_in.
|
|
||||||
const bool remap_enabled = regs.launch_dma.remap_enable != 0;
|
|
||||||
if (regs.launch_dma.multi_line_enable) {
|
|
||||||
UNIMPLEMENTED_IF(remap_enabled);
|
|
||||||
|
|
||||||
// Perform a line-by-line copy.
|
|
||||||
// We're going to take a subrect of size (line_length_in, line_count) from the source
|
|
||||||
// rectangle. There is no need to manually flush/invalidate the regions because CopyBlock
|
|
||||||
// does that for us.
|
|
||||||
for (u32 line = 0; line < regs.line_count; ++line) {
|
|
||||||
const GPUVAddr source_line = regs.offset_in + static_cast<size_t>(line) * regs.pitch_in;
|
|
||||||
const GPUVAddr dest_line = regs.offset_out + static_cast<size_t>(line) * regs.pitch_out;
|
|
||||||
memory_manager.CopyBlock(dest_line, source_line, regs.line_length_in);
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
// TODO: allow multisized components.
|
// TODO: allow multisized components.
|
||||||
auto& accelerate = rasterizer->AccessAccelerateDMA();
|
auto& accelerate = rasterizer->AccessAccelerateDMA();
|
||||||
const bool is_const_a_dst = regs.remap_const.dst_x == RemapConst::Swizzle::CONST_A;
|
const bool is_const_a_dst = regs.remap_const.dst_x == RemapConst::Swizzle::CONST_A;
|
||||||
const bool is_buffer_clear = remap_enabled && is_const_a_dst;
|
if (regs.launch_dma.remap_enable != 0 && is_const_a_dst) {
|
||||||
if (is_buffer_clear) {
|
|
||||||
ASSERT(regs.remap_const.component_size_minus_one == 3);
|
ASSERT(regs.remap_const.component_size_minus_one == 3);
|
||||||
accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value);
|
accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value);
|
||||||
std::vector<u32> tmp_buffer(regs.line_length_in, regs.remap_consta_value);
|
std::vector<u32> tmp_buffer(regs.line_length_in, regs.remap_consta_value);
|
||||||
memory_manager.WriteBlockUnsafe(regs.offset_out, reinterpret_cast<u8*>(tmp_buffer.data()),
|
memory_manager.WriteBlockUnsafe(regs.offset_out,
|
||||||
|
reinterpret_cast<u8*>(tmp_buffer.data()),
|
||||||
regs.line_length_in * sizeof(u32));
|
regs.line_length_in * sizeof(u32));
|
||||||
return;
|
} else {
|
||||||
|
auto convert_linear_2_blocklinear_addr = [](u64 address) {
|
||||||
|
return (address & ~0x1f0ULL) | ((address & 0x40) >> 2) | ((address & 0x10) << 1) |
|
||||||
|
((address & 0x180) >> 1) | ((address & 0x20) << 3);
|
||||||
|
};
|
||||||
|
auto src_kind = memory_manager.GetPageKind(regs.offset_in);
|
||||||
|
auto dst_kind = memory_manager.GetPageKind(regs.offset_out);
|
||||||
|
const bool is_src_pitch = IsPitchKind(static_cast<PTEKind>(src_kind));
|
||||||
|
const bool is_dst_pitch = IsPitchKind(static_cast<PTEKind>(dst_kind));
|
||||||
|
if (!is_src_pitch && is_dst_pitch) {
|
||||||
|
std::vector<u8> tmp_buffer(regs.line_length_in);
|
||||||
|
std::vector<u8> dst_buffer(regs.line_length_in);
|
||||||
|
memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
|
||||||
|
regs.line_length_in);
|
||||||
|
for (u32 offset = 0; offset < regs.line_length_in; ++offset) {
|
||||||
|
dst_buffer[offset] =
|
||||||
|
tmp_buffer[convert_linear_2_blocklinear_addr(regs.offset_in + offset) -
|
||||||
|
regs.offset_in];
|
||||||
}
|
}
|
||||||
UNIMPLEMENTED_IF(remap_enabled);
|
memory_manager.WriteBlock(regs.offset_out, dst_buffer.data(), regs.line_length_in);
|
||||||
|
} else if (is_src_pitch && !is_dst_pitch) {
|
||||||
|
std::vector<u8> tmp_buffer(regs.line_length_in);
|
||||||
|
std::vector<u8> dst_buffer(regs.line_length_in);
|
||||||
|
memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
|
||||||
|
regs.line_length_in);
|
||||||
|
for (u32 offset = 0; offset < regs.line_length_in; ++offset) {
|
||||||
|
dst_buffer[convert_linear_2_blocklinear_addr(regs.offset_out + offset) -
|
||||||
|
regs.offset_out] = tmp_buffer[offset];
|
||||||
|
}
|
||||||
|
memory_manager.WriteBlock(regs.offset_out, dst_buffer.data(), regs.line_length_in);
|
||||||
|
} else {
|
||||||
if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) {
|
if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) {
|
||||||
std::vector<u8> tmp_buffer(regs.line_length_in);
|
std::vector<u8> tmp_buffer(regs.line_length_in);
|
||||||
memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), regs.line_length_in);
|
memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
|
||||||
memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(), regs.line_length_in);
|
regs.line_length_in);
|
||||||
|
memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(),
|
||||||
|
regs.line_length_in);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ReleaseSemaphore();
|
||||||
|
}
|
||||||
|
|
||||||
void MaxwellDMA::CopyBlockLinearToPitch() {
|
void MaxwellDMA::CopyBlockLinearToPitch() {
|
||||||
UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0);
|
UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0);
|
||||||
|
|
|
@ -219,8 +219,6 @@ private:
|
||||||
/// registers.
|
/// registers.
|
||||||
void Launch();
|
void Launch();
|
||||||
|
|
||||||
void CopyPitchToPitch();
|
|
||||||
|
|
||||||
void CopyBlockLinearToPitch();
|
void CopyBlockLinearToPitch();
|
||||||
|
|
||||||
void CopyPitchToBlockLinear();
|
void CopyPitchToBlockLinear();
|
||||||
|
|
|
@ -41,7 +41,11 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64
|
||||||
big_entries.resize(big_page_table_size / 32, 0);
|
big_entries.resize(big_page_table_size / 32, 0);
|
||||||
big_page_table_cpu.resize(big_page_table_size);
|
big_page_table_cpu.resize(big_page_table_size);
|
||||||
big_page_continous.resize(big_page_table_size / continous_bits, 0);
|
big_page_continous.resize(big_page_table_size / continous_bits, 0);
|
||||||
|
std::array<PTEKind, 32> kind_valus;
|
||||||
|
kind_valus.fill(PTEKind::INVALID);
|
||||||
|
big_kinds.resize(big_page_table_size / 32, kind_valus);
|
||||||
entries.resize(page_table_size / 32, 0);
|
entries.resize(page_table_size / 32, 0);
|
||||||
|
kinds.resize(big_page_table_size / 32, kind_valus);
|
||||||
}
|
}
|
||||||
|
|
||||||
MemoryManager::~MemoryManager() = default;
|
MemoryManager::~MemoryManager() = default;
|
||||||
|
@ -78,6 +82,41 @@ void MemoryManager::SetEntry(size_t position, MemoryManager::EntryType entry) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PTEKind MemoryManager::GetPageKind(GPUVAddr gpu_addr) const {
|
||||||
|
auto entry = GetEntry<true>(gpu_addr);
|
||||||
|
if (entry == EntryType::Mapped || entry == EntryType::Reserved) [[likely]] {
|
||||||
|
return GetKind<true>(gpu_addr);
|
||||||
|
} else {
|
||||||
|
return GetKind<false>(gpu_addr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <bool is_big_page>
|
||||||
|
PTEKind MemoryManager::GetKind(size_t position) const {
|
||||||
|
if constexpr (is_big_page) {
|
||||||
|
position = position >> big_page_bits;
|
||||||
|
const size_t sub_index = position % 32;
|
||||||
|
return big_kinds[position / 32][sub_index];
|
||||||
|
} else {
|
||||||
|
position = position >> page_bits;
|
||||||
|
const size_t sub_index = position % 32;
|
||||||
|
return kinds[position / 32][sub_index];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <bool is_big_page>
|
||||||
|
void MemoryManager::SetKind(size_t position, PTEKind kind) {
|
||||||
|
if constexpr (is_big_page) {
|
||||||
|
position = position >> big_page_bits;
|
||||||
|
const size_t sub_index = position % 32;
|
||||||
|
big_kinds[position / 32][sub_index] = kind;
|
||||||
|
} else {
|
||||||
|
position = position >> page_bits;
|
||||||
|
const size_t sub_index = position % 32;
|
||||||
|
kinds[position / 32][sub_index] = kind;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
inline bool MemoryManager::IsBigPageContinous(size_t big_page_index) const {
|
inline bool MemoryManager::IsBigPageContinous(size_t big_page_index) const {
|
||||||
const u64 entry_mask = big_page_continous[big_page_index / continous_bits];
|
const u64 entry_mask = big_page_continous[big_page_index / continous_bits];
|
||||||
const size_t sub_index = big_page_index % continous_bits;
|
const size_t sub_index = big_page_index % continous_bits;
|
||||||
|
@ -92,8 +131,8 @@ inline void MemoryManager::SetBigPageContinous(size_t big_page_index, bool value
|
||||||
}
|
}
|
||||||
|
|
||||||
template <MemoryManager::EntryType entry_type>
|
template <MemoryManager::EntryType entry_type>
|
||||||
GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr,
|
GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size,
|
||||||
size_t size) {
|
PTEKind kind) {
|
||||||
u64 remaining_size{size};
|
u64 remaining_size{size};
|
||||||
if constexpr (entry_type == EntryType::Mapped) {
|
if constexpr (entry_type == EntryType::Mapped) {
|
||||||
page_table.ReserveRange(gpu_addr, size);
|
page_table.ReserveRange(gpu_addr, size);
|
||||||
|
@ -102,6 +141,7 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp
|
||||||
const GPUVAddr current_gpu_addr = gpu_addr + offset;
|
const GPUVAddr current_gpu_addr = gpu_addr + offset;
|
||||||
[[maybe_unused]] const auto current_entry_type = GetEntry<false>(current_gpu_addr);
|
[[maybe_unused]] const auto current_entry_type = GetEntry<false>(current_gpu_addr);
|
||||||
SetEntry<false>(current_gpu_addr, entry_type);
|
SetEntry<false>(current_gpu_addr, entry_type);
|
||||||
|
SetKind<false>(current_gpu_addr, kind);
|
||||||
if (current_entry_type != entry_type) {
|
if (current_entry_type != entry_type) {
|
||||||
rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, page_size);
|
rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, page_size);
|
||||||
}
|
}
|
||||||
|
@ -118,12 +158,13 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp
|
||||||
|
|
||||||
template <MemoryManager::EntryType entry_type>
|
template <MemoryManager::EntryType entry_type>
|
||||||
GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr,
|
GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr,
|
||||||
size_t size) {
|
size_t size, PTEKind kind) {
|
||||||
u64 remaining_size{size};
|
u64 remaining_size{size};
|
||||||
for (u64 offset{}; offset < size; offset += big_page_size) {
|
for (u64 offset{}; offset < size; offset += big_page_size) {
|
||||||
const GPUVAddr current_gpu_addr = gpu_addr + offset;
|
const GPUVAddr current_gpu_addr = gpu_addr + offset;
|
||||||
[[maybe_unused]] const auto current_entry_type = GetEntry<true>(current_gpu_addr);
|
[[maybe_unused]] const auto current_entry_type = GetEntry<true>(current_gpu_addr);
|
||||||
SetEntry<true>(current_gpu_addr, entry_type);
|
SetEntry<true>(current_gpu_addr, entry_type);
|
||||||
|
SetKind<true>(current_gpu_addr, kind);
|
||||||
if (current_entry_type != entry_type) {
|
if (current_entry_type != entry_type) {
|
||||||
rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, big_page_size);
|
rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, big_page_size);
|
||||||
}
|
}
|
||||||
|
@ -159,19 +200,19 @@ void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_)
|
||||||
rasterizer = rasterizer_;
|
rasterizer = rasterizer_;
|
||||||
}
|
}
|
||||||
|
|
||||||
GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size,
|
GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, PTEKind kind,
|
||||||
bool is_big_pages) {
|
bool is_big_pages) {
|
||||||
if (is_big_pages) [[likely]] {
|
if (is_big_pages) [[likely]] {
|
||||||
return BigPageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size);
|
return BigPageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size, kind);
|
||||||
}
|
}
|
||||||
return PageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size);
|
return PageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size, kind);
|
||||||
}
|
}
|
||||||
|
|
||||||
GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages) {
|
GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages) {
|
||||||
if (is_big_pages) [[likely]] {
|
if (is_big_pages) [[likely]] {
|
||||||
return BigPageTableOp<EntryType::Reserved>(gpu_addr, 0, size);
|
return BigPageTableOp<EntryType::Reserved>(gpu_addr, 0, size, PTEKind::INVALID);
|
||||||
}
|
}
|
||||||
return PageTableOp<EntryType::Reserved>(gpu_addr, 0, size);
|
return PageTableOp<EntryType::Reserved>(gpu_addr, 0, size, PTEKind::INVALID);
|
||||||
}
|
}
|
||||||
|
|
||||||
void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
|
void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
|
||||||
|
@ -188,8 +229,8 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
|
||||||
rasterizer->UnmapMemory(*cpu_addr, map_size);
|
rasterizer->UnmapMemory(*cpu_addr, map_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
BigPageTableOp<EntryType::Free>(gpu_addr, 0, size);
|
BigPageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID);
|
||||||
PageTableOp<EntryType::Free>(gpu_addr, 0, size);
|
PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const {
|
std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const {
|
||||||
|
|
|
@ -11,6 +11,7 @@
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "common/multi_level_page_table.h"
|
#include "common/multi_level_page_table.h"
|
||||||
#include "common/virtual_buffer.h"
|
#include "common/virtual_buffer.h"
|
||||||
|
#include "video_core/pte_kind.h"
|
||||||
|
|
||||||
namespace VideoCore {
|
namespace VideoCore {
|
||||||
class RasterizerInterface;
|
class RasterizerInterface;
|
||||||
|
@ -98,7 +99,8 @@ public:
|
||||||
std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr,
|
std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr,
|
||||||
std::size_t size) const;
|
std::size_t size) const;
|
||||||
|
|
||||||
GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, bool is_big_pages = true);
|
GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size,
|
||||||
|
PTEKind kind = PTEKind::INVALID, bool is_big_pages = true);
|
||||||
GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages = true);
|
GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages = true);
|
||||||
void Unmap(GPUVAddr gpu_addr, std::size_t size);
|
void Unmap(GPUVAddr gpu_addr, std::size_t size);
|
||||||
|
|
||||||
|
@ -114,6 +116,8 @@ public:
|
||||||
return gpu_addr < address_space_size;
|
return gpu_addr < address_space_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PTEKind GetPageKind(GPUVAddr gpu_addr) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped>
|
template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped>
|
||||||
inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped,
|
inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped,
|
||||||
|
@ -166,10 +170,12 @@ private:
|
||||||
std::vector<u64> big_entries;
|
std::vector<u64> big_entries;
|
||||||
|
|
||||||
template <EntryType entry_type>
|
template <EntryType entry_type>
|
||||||
GPUVAddr PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size);
|
GPUVAddr PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size,
|
||||||
|
PTEKind kind);
|
||||||
|
|
||||||
template <EntryType entry_type>
|
template <EntryType entry_type>
|
||||||
GPUVAddr BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size);
|
GPUVAddr BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size,
|
||||||
|
PTEKind kind);
|
||||||
|
|
||||||
template <bool is_big_page>
|
template <bool is_big_page>
|
||||||
inline EntryType GetEntry(size_t position) const;
|
inline EntryType GetEntry(size_t position) const;
|
||||||
|
@ -177,6 +183,15 @@ private:
|
||||||
template <bool is_big_page>
|
template <bool is_big_page>
|
||||||
inline void SetEntry(size_t position, EntryType entry);
|
inline void SetEntry(size_t position, EntryType entry);
|
||||||
|
|
||||||
|
std::vector<std::array<PTEKind, 32>> kinds;
|
||||||
|
std::vector<std::array<PTEKind, 32>> big_kinds;
|
||||||
|
|
||||||
|
template <bool is_big_page>
|
||||||
|
inline PTEKind GetKind(size_t position) const;
|
||||||
|
|
||||||
|
template <bool is_big_page>
|
||||||
|
inline void SetKind(size_t position, PTEKind kind);
|
||||||
|
|
||||||
Common::MultiLevelPageTable<u32> page_table;
|
Common::MultiLevelPageTable<u32> page_table;
|
||||||
Common::VirtualBuffer<u32> big_page_table_cpu;
|
Common::VirtualBuffer<u32> big_page_table_cpu;
|
||||||
|
|
||||||
|
|
264
src/video_core/pte_kind.h
Normal file
264
src/video_core/pte_kind.h
Normal file
|
@ -0,0 +1,264 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "common/common_types.h"
|
||||||
|
|
||||||
|
namespace Tegra {
|
||||||
|
|
||||||
|
// https://github.com/NVIDIA/open-gpu-doc/blob/master/manuals/volta/gv100/dev_mmu.ref.txt
|
||||||
|
enum class PTEKind : u8 {
|
||||||
|
INVALID = 0xff,
|
||||||
|
PITCH = 0x00,
|
||||||
|
Z16 = 0x01,
|
||||||
|
Z16_2C = 0x02,
|
||||||
|
Z16_MS2_2C = 0x03,
|
||||||
|
Z16_MS4_2C = 0x04,
|
||||||
|
Z16_MS8_2C = 0x05,
|
||||||
|
Z16_MS16_2C = 0x06,
|
||||||
|
Z16_2Z = 0x07,
|
||||||
|
Z16_MS2_2Z = 0x08,
|
||||||
|
Z16_MS4_2Z = 0x09,
|
||||||
|
Z16_MS8_2Z = 0x0a,
|
||||||
|
Z16_MS16_2Z = 0x0b,
|
||||||
|
Z16_2CZ = 0x36,
|
||||||
|
Z16_MS2_2CZ = 0x37,
|
||||||
|
Z16_MS4_2CZ = 0x38,
|
||||||
|
Z16_MS8_2CZ = 0x39,
|
||||||
|
Z16_MS16_2CZ = 0x5f,
|
||||||
|
Z16_4CZ = 0x0c,
|
||||||
|
Z16_MS2_4CZ = 0x0d,
|
||||||
|
Z16_MS4_4CZ = 0x0e,
|
||||||
|
Z16_MS8_4CZ = 0x0f,
|
||||||
|
Z16_MS16_4CZ = 0x10,
|
||||||
|
S8Z24 = 0x11,
|
||||||
|
S8Z24_1Z = 0x12,
|
||||||
|
S8Z24_MS2_1Z = 0x13,
|
||||||
|
S8Z24_MS4_1Z = 0x14,
|
||||||
|
S8Z24_MS8_1Z = 0x15,
|
||||||
|
S8Z24_MS16_1Z = 0x16,
|
||||||
|
S8Z24_2CZ = 0x17,
|
||||||
|
S8Z24_MS2_2CZ = 0x18,
|
||||||
|
S8Z24_MS4_2CZ = 0x19,
|
||||||
|
S8Z24_MS8_2CZ = 0x1a,
|
||||||
|
S8Z24_MS16_2CZ = 0x1b,
|
||||||
|
S8Z24_2CS = 0x1c,
|
||||||
|
S8Z24_MS2_2CS = 0x1d,
|
||||||
|
S8Z24_MS4_2CS = 0x1e,
|
||||||
|
S8Z24_MS8_2CS = 0x1f,
|
||||||
|
S8Z24_MS16_2CS = 0x20,
|
||||||
|
S8Z24_4CSZV = 0x21,
|
||||||
|
S8Z24_MS2_4CSZV = 0x22,
|
||||||
|
S8Z24_MS4_4CSZV = 0x23,
|
||||||
|
S8Z24_MS8_4CSZV = 0x24,
|
||||||
|
S8Z24_MS16_4CSZV = 0x25,
|
||||||
|
V8Z24_MS4_VC12 = 0x26,
|
||||||
|
V8Z24_MS4_VC4 = 0x27,
|
||||||
|
V8Z24_MS8_VC8 = 0x28,
|
||||||
|
V8Z24_MS8_VC24 = 0x29,
|
||||||
|
V8Z24_MS4_VC12_1ZV = 0x2e,
|
||||||
|
V8Z24_MS4_VC4_1ZV = 0x2f,
|
||||||
|
V8Z24_MS8_VC8_1ZV = 0x30,
|
||||||
|
V8Z24_MS8_VC24_1ZV = 0x31,
|
||||||
|
V8Z24_MS4_VC12_2CS = 0x32,
|
||||||
|
V8Z24_MS4_VC4_2CS = 0x33,
|
||||||
|
V8Z24_MS8_VC8_2CS = 0x34,
|
||||||
|
V8Z24_MS8_VC24_2CS = 0x35,
|
||||||
|
V8Z24_MS4_VC12_2CZV = 0x3a,
|
||||||
|
V8Z24_MS4_VC4_2CZV = 0x3b,
|
||||||
|
V8Z24_MS8_VC8_2CZV = 0x3c,
|
||||||
|
V8Z24_MS8_VC24_2CZV = 0x3d,
|
||||||
|
V8Z24_MS4_VC12_2ZV = 0x3e,
|
||||||
|
V8Z24_MS4_VC4_2ZV = 0x3f,
|
||||||
|
V8Z24_MS8_VC8_2ZV = 0x40,
|
||||||
|
V8Z24_MS8_VC24_2ZV = 0x41,
|
||||||
|
V8Z24_MS4_VC12_4CSZV = 0x42,
|
||||||
|
V8Z24_MS4_VC4_4CSZV = 0x43,
|
||||||
|
V8Z24_MS8_VC8_4CSZV = 0x44,
|
||||||
|
V8Z24_MS8_VC24_4CSZV = 0x45,
|
||||||
|
Z24S8 = 0x46,
|
||||||
|
Z24S8_1Z = 0x47,
|
||||||
|
Z24S8_MS2_1Z = 0x48,
|
||||||
|
Z24S8_MS4_1Z = 0x49,
|
||||||
|
Z24S8_MS8_1Z = 0x4a,
|
||||||
|
Z24S8_MS16_1Z = 0x4b,
|
||||||
|
Z24S8_2CS = 0x4c,
|
||||||
|
Z24S8_MS2_2CS = 0x4d,
|
||||||
|
Z24S8_MS4_2CS = 0x4e,
|
||||||
|
Z24S8_MS8_2CS = 0x4f,
|
||||||
|
Z24S8_MS16_2CS = 0x50,
|
||||||
|
Z24S8_2CZ = 0x51,
|
||||||
|
Z24S8_MS2_2CZ = 0x52,
|
||||||
|
Z24S8_MS4_2CZ = 0x53,
|
||||||
|
Z24S8_MS8_2CZ = 0x54,
|
||||||
|
Z24S8_MS16_2CZ = 0x55,
|
||||||
|
Z24S8_4CSZV = 0x56,
|
||||||
|
Z24S8_MS2_4CSZV = 0x57,
|
||||||
|
Z24S8_MS4_4CSZV = 0x58,
|
||||||
|
Z24S8_MS8_4CSZV = 0x59,
|
||||||
|
Z24S8_MS16_4CSZV = 0x5a,
|
||||||
|
Z24V8_MS4_VC12 = 0x5b,
|
||||||
|
Z24V8_MS4_VC4 = 0x5c,
|
||||||
|
Z24V8_MS8_VC8 = 0x5d,
|
||||||
|
Z24V8_MS8_VC24 = 0x5e,
|
||||||
|
YUV_B8C1_2Y = 0x60,
|
||||||
|
YUV_B8C2_2Y = 0x61,
|
||||||
|
YUV_B10C1_2Y = 0x62,
|
||||||
|
YUV_B10C2_2Y = 0x6b,
|
||||||
|
YUV_B12C1_2Y = 0x6c,
|
||||||
|
YUV_B12C2_2Y = 0x6d,
|
||||||
|
Z24V8_MS4_VC12_1ZV = 0x63,
|
||||||
|
Z24V8_MS4_VC4_1ZV = 0x64,
|
||||||
|
Z24V8_MS8_VC8_1ZV = 0x65,
|
||||||
|
Z24V8_MS8_VC24_1ZV = 0x66,
|
||||||
|
Z24V8_MS4_VC12_2CS = 0x67,
|
||||||
|
Z24V8_MS4_VC4_2CS = 0x68,
|
||||||
|
Z24V8_MS8_VC8_2CS = 0x69,
|
||||||
|
Z24V8_MS8_VC24_2CS = 0x6a,
|
||||||
|
Z24V8_MS4_VC12_2CZV = 0x6f,
|
||||||
|
Z24V8_MS4_VC4_2CZV = 0x70,
|
||||||
|
Z24V8_MS8_VC8_2CZV = 0x71,
|
||||||
|
Z24V8_MS8_VC24_2CZV = 0x72,
|
||||||
|
Z24V8_MS4_VC12_2ZV = 0x73,
|
||||||
|
Z24V8_MS4_VC4_2ZV = 0x74,
|
||||||
|
Z24V8_MS8_VC8_2ZV = 0x75,
|
||||||
|
Z24V8_MS8_VC24_2ZV = 0x76,
|
||||||
|
Z24V8_MS4_VC12_4CSZV = 0x77,
|
||||||
|
Z24V8_MS4_VC4_4CSZV = 0x78,
|
||||||
|
Z24V8_MS8_VC8_4CSZV = 0x79,
|
||||||
|
Z24V8_MS8_VC24_4CSZV = 0x7a,
|
||||||
|
ZF32 = 0x7b,
|
||||||
|
ZF32_1Z = 0x7c,
|
||||||
|
ZF32_MS2_1Z = 0x7d,
|
||||||
|
ZF32_MS4_1Z = 0x7e,
|
||||||
|
ZF32_MS8_1Z = 0x7f,
|
||||||
|
ZF32_MS16_1Z = 0x80,
|
||||||
|
ZF32_2CS = 0x81,
|
||||||
|
ZF32_MS2_2CS = 0x82,
|
||||||
|
ZF32_MS4_2CS = 0x83,
|
||||||
|
ZF32_MS8_2CS = 0x84,
|
||||||
|
ZF32_MS16_2CS = 0x85,
|
||||||
|
ZF32_2CZ = 0x86,
|
||||||
|
ZF32_MS2_2CZ = 0x87,
|
||||||
|
ZF32_MS4_2CZ = 0x88,
|
||||||
|
ZF32_MS8_2CZ = 0x89,
|
||||||
|
ZF32_MS16_2CZ = 0x8a,
|
||||||
|
X8Z24_X16V8S8_MS4_VC12 = 0x8b,
|
||||||
|
X8Z24_X16V8S8_MS4_VC4 = 0x8c,
|
||||||
|
X8Z24_X16V8S8_MS8_VC8 = 0x8d,
|
||||||
|
X8Z24_X16V8S8_MS8_VC24 = 0x8e,
|
||||||
|
X8Z24_X16V8S8_MS4_VC12_1CS = 0x8f,
|
||||||
|
X8Z24_X16V8S8_MS4_VC4_1CS = 0x90,
|
||||||
|
X8Z24_X16V8S8_MS8_VC8_1CS = 0x91,
|
||||||
|
X8Z24_X16V8S8_MS8_VC24_1CS = 0x92,
|
||||||
|
X8Z24_X16V8S8_MS4_VC12_1ZV = 0x97,
|
||||||
|
X8Z24_X16V8S8_MS4_VC4_1ZV = 0x98,
|
||||||
|
X8Z24_X16V8S8_MS8_VC8_1ZV = 0x99,
|
||||||
|
X8Z24_X16V8S8_MS8_VC24_1ZV = 0x9a,
|
||||||
|
X8Z24_X16V8S8_MS4_VC12_1CZV = 0x9b,
|
||||||
|
X8Z24_X16V8S8_MS4_VC4_1CZV = 0x9c,
|
||||||
|
X8Z24_X16V8S8_MS8_VC8_1CZV = 0x9d,
|
||||||
|
X8Z24_X16V8S8_MS8_VC24_1CZV = 0x9e,
|
||||||
|
X8Z24_X16V8S8_MS4_VC12_2CS = 0x9f,
|
||||||
|
X8Z24_X16V8S8_MS4_VC4_2CS = 0xa0,
|
||||||
|
X8Z24_X16V8S8_MS8_VC8_2CS = 0xa1,
|
||||||
|
X8Z24_X16V8S8_MS8_VC24_2CS = 0xa2,
|
||||||
|
X8Z24_X16V8S8_MS4_VC12_2CSZV = 0xa3,
|
||||||
|
X8Z24_X16V8S8_MS4_VC4_2CSZV = 0xa4,
|
||||||
|
X8Z24_X16V8S8_MS8_VC8_2CSZV = 0xa5,
|
||||||
|
X8Z24_X16V8S8_MS8_VC24_2CSZV = 0xa6,
|
||||||
|
ZF32_X16V8S8_MS4_VC12 = 0xa7,
|
||||||
|
ZF32_X16V8S8_MS4_VC4 = 0xa8,
|
||||||
|
ZF32_X16V8S8_MS8_VC8 = 0xa9,
|
||||||
|
ZF32_X16V8S8_MS8_VC24 = 0xaa,
|
||||||
|
ZF32_X16V8S8_MS4_VC12_1CS = 0xab,
|
||||||
|
ZF32_X16V8S8_MS4_VC4_1CS = 0xac,
|
||||||
|
ZF32_X16V8S8_MS8_VC8_1CS = 0xad,
|
||||||
|
ZF32_X16V8S8_MS8_VC24_1CS = 0xae,
|
||||||
|
ZF32_X16V8S8_MS4_VC12_1ZV = 0xb3,
|
||||||
|
ZF32_X16V8S8_MS4_VC4_1ZV = 0xb4,
|
||||||
|
ZF32_X16V8S8_MS8_VC8_1ZV = 0xb5,
|
||||||
|
ZF32_X16V8S8_MS8_VC24_1ZV = 0xb6,
|
||||||
|
ZF32_X16V8S8_MS4_VC12_1CZV = 0xb7,
|
||||||
|
ZF32_X16V8S8_MS4_VC4_1CZV = 0xb8,
|
||||||
|
ZF32_X16V8S8_MS8_VC8_1CZV = 0xb9,
|
||||||
|
ZF32_X16V8S8_MS8_VC24_1CZV = 0xba,
|
||||||
|
ZF32_X16V8S8_MS4_VC12_2CS = 0xbb,
|
||||||
|
ZF32_X16V8S8_MS4_VC4_2CS = 0xbc,
|
||||||
|
ZF32_X16V8S8_MS8_VC8_2CS = 0xbd,
|
||||||
|
ZF32_X16V8S8_MS8_VC24_2CS = 0xbe,
|
||||||
|
ZF32_X16V8S8_MS4_VC12_2CSZV = 0xbf,
|
||||||
|
ZF32_X16V8S8_MS4_VC4_2CSZV = 0xc0,
|
||||||
|
ZF32_X16V8S8_MS8_VC8_2CSZV = 0xc1,
|
||||||
|
ZF32_X16V8S8_MS8_VC24_2CSZV = 0xc2,
|
||||||
|
ZF32_X24S8 = 0xc3,
|
||||||
|
ZF32_X24S8_1CS = 0xc4,
|
||||||
|
ZF32_X24S8_MS2_1CS = 0xc5,
|
||||||
|
ZF32_X24S8_MS4_1CS = 0xc6,
|
||||||
|
ZF32_X24S8_MS8_1CS = 0xc7,
|
||||||
|
ZF32_X24S8_MS16_1CS = 0xc8,
|
||||||
|
ZF32_X24S8_2CSZV = 0xce,
|
||||||
|
ZF32_X24S8_MS2_2CSZV = 0xcf,
|
||||||
|
ZF32_X24S8_MS4_2CSZV = 0xd0,
|
||||||
|
ZF32_X24S8_MS8_2CSZV = 0xd1,
|
||||||
|
ZF32_X24S8_MS16_2CSZV = 0xd2,
|
||||||
|
ZF32_X24S8_2CS = 0xd3,
|
||||||
|
ZF32_X24S8_MS2_2CS = 0xd4,
|
||||||
|
ZF32_X24S8_MS4_2CS = 0xd5,
|
||||||
|
ZF32_X24S8_MS8_2CS = 0xd6,
|
||||||
|
ZF32_X24S8_MS16_2CS = 0xd7,
|
||||||
|
S8 = 0x2a,
|
||||||
|
S8_2S = 0x2b,
|
||||||
|
GENERIC_16BX2 = 0xfe,
|
||||||
|
C32_2C = 0xd8,
|
||||||
|
C32_2CBR = 0xd9,
|
||||||
|
C32_2CBA = 0xda,
|
||||||
|
C32_2CRA = 0xdb,
|
||||||
|
C32_2BRA = 0xdc,
|
||||||
|
C32_MS2_2C = 0xdd,
|
||||||
|
C32_MS2_2CBR = 0xde,
|
||||||
|
C32_MS2_4CBRA = 0xcc,
|
||||||
|
C32_MS4_2C = 0xdf,
|
||||||
|
C32_MS4_2CBR = 0xe0,
|
||||||
|
C32_MS4_2CBA = 0xe1,
|
||||||
|
C32_MS4_2CRA = 0xe2,
|
||||||
|
C32_MS4_2BRA = 0xe3,
|
||||||
|
C32_MS4_4CBRA = 0x2c,
|
||||||
|
C32_MS8_MS16_2C = 0xe4,
|
||||||
|
C32_MS8_MS16_2CRA = 0xe5,
|
||||||
|
C64_2C = 0xe6,
|
||||||
|
C64_2CBR = 0xe7,
|
||||||
|
C64_2CBA = 0xe8,
|
||||||
|
C64_2CRA = 0xe9,
|
||||||
|
C64_2BRA = 0xea,
|
||||||
|
C64_MS2_2C = 0xeb,
|
||||||
|
C64_MS2_2CBR = 0xec,
|
||||||
|
C64_MS2_4CBRA = 0xcd,
|
||||||
|
C64_MS4_2C = 0xed,
|
||||||
|
C64_MS4_2CBR = 0xee,
|
||||||
|
C64_MS4_2CBA = 0xef,
|
||||||
|
C64_MS4_2CRA = 0xf0,
|
||||||
|
C64_MS4_2BRA = 0xf1,
|
||||||
|
C64_MS4_4CBRA = 0x2d,
|
||||||
|
C64_MS8_MS16_2C = 0xf2,
|
||||||
|
C64_MS8_MS16_2CRA = 0xf3,
|
||||||
|
C128_2C = 0xf4,
|
||||||
|
C128_2CR = 0xf5,
|
||||||
|
C128_MS2_2C = 0xf6,
|
||||||
|
C128_MS2_2CR = 0xf7,
|
||||||
|
C128_MS4_2C = 0xf8,
|
||||||
|
C128_MS4_2CR = 0xf9,
|
||||||
|
C128_MS8_MS16_2C = 0xfa,
|
||||||
|
C128_MS8_MS16_2CR = 0xfb,
|
||||||
|
X8C24 = 0xfc,
|
||||||
|
PITCH_NO_SWIZZLE = 0xfd,
|
||||||
|
SMSKED_MESSAGE = 0xca,
|
||||||
|
SMHOST_MESSAGE = 0xcb,
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr bool IsPitchKind(PTEKind kind) {
|
||||||
|
return kind == PTEKind::PITCH || kind == PTEKind::PITCH_NO_SWIZZLE;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Tegra
|
Loading…
Reference in a new issue