Merge pull request #178 from Subv/command_buffers

GPU: Added a command processor to decode the GPU pushbuffers and forward the commands to their respective engines
This commit is contained in:
bunnei 2018-02-12 13:51:52 -05:00 committed by GitHub
commit be5ba4d952
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
20 changed files with 364 additions and 23 deletions

View file

@ -139,8 +139,6 @@ add_library(core STATIC
hle/service/nvdrv/devices/nvmap.h hle/service/nvdrv/devices/nvmap.h
hle/service/nvdrv/interface.cpp hle/service/nvdrv/interface.cpp
hle/service/nvdrv/interface.h hle/service/nvdrv/interface.h
hle/service/nvdrv/memory_manager.cpp
hle/service/nvdrv/memory_manager.h
hle/service/nvdrv/nvdrv.cpp hle/service/nvdrv/nvdrv.cpp
hle/service/nvdrv/nvdrv.h hle/service/nvdrv/nvdrv.h
hle/service/nvdrv/nvmemp.cpp hle/service/nvdrv/nvmemp.cpp

View file

@ -154,6 +154,8 @@ System::ResultStatus System::Init(EmuWindow* emu_window, u32 system_mode) {
break; break;
} }
gpu_core = std::make_unique<Tegra::GPU>();
telemetry_session = std::make_unique<Core::TelemetrySession>(); telemetry_session = std::make_unique<Core::TelemetrySession>();
CoreTiming::Init(); CoreTiming::Init();

View file

@ -11,6 +11,7 @@
#include "core/memory.h" #include "core/memory.h"
#include "core/perf_stats.h" #include "core/perf_stats.h"
#include "core/telemetry_session.h" #include "core/telemetry_session.h"
#include "video_core/gpu.h"
class EmuWindow; class EmuWindow;
class ARM_Interface; class ARM_Interface;
@ -102,6 +103,10 @@ public:
return *cpu_core; return *cpu_core;
} }
Tegra::GPU& GPU() {
return *gpu_core;
}
PerfStats perf_stats; PerfStats perf_stats;
FrameLimiter frame_limiter; FrameLimiter frame_limiter;
@ -138,6 +143,8 @@ private:
///< ARM11 CPU core ///< ARM11 CPU core
std::unique_ptr<ARM_Interface> cpu_core; std::unique_ptr<ARM_Interface> cpu_core;
std::unique_ptr<Tegra::GPU> gpu_core;
/// When true, signals that a reschedule should happen /// When true, signals that a reschedule should happen
bool reschedule_pending{}; bool reschedule_pending{};

View file

@ -4,6 +4,7 @@
#include "common/assert.h" #include "common/assert.h"
#include "common/logging/log.h" #include "common/logging/log.h"
#include "core/core.h"
#include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h" #include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h"
#include "core/hle/service/nvdrv/devices/nvmap.h" #include "core/hle/service/nvdrv/devices/nvmap.h"
@ -44,11 +45,12 @@ u32 nvhost_as_gpu::AllocateSpace(const std::vector<u8>& input, std::vector<u8>&
LOG_DEBUG(Service_NVDRV, "called, pages=%x, page_size=%x, flags=%x", params.pages, LOG_DEBUG(Service_NVDRV, "called, pages=%x, page_size=%x, flags=%x", params.pages,
params.page_size, params.flags); params.page_size, params.flags);
auto& gpu = Core::System::GetInstance().GPU();
const u64 size{static_cast<u64>(params.pages) * static_cast<u64>(params.page_size)}; const u64 size{static_cast<u64>(params.pages) * static_cast<u64>(params.page_size)};
if (params.flags & 1) { if (params.flags & 1) {
params.offset = memory_manager->AllocateSpace(params.offset, size, 1); params.offset = gpu.memory_manager->AllocateSpace(params.offset, size, 1);
} else { } else {
params.offset = memory_manager->AllocateSpace(size, params.align); params.offset = gpu.memory_manager->AllocateSpace(size, params.align);
} }
std::memcpy(output.data(), &params, output.size()); std::memcpy(output.data(), &params, output.size());
@ -71,10 +73,12 @@ u32 nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& ou
auto object = nvmap_dev->GetObject(params.nvmap_handle); auto object = nvmap_dev->GetObject(params.nvmap_handle);
ASSERT(object); ASSERT(object);
auto& gpu = Core::System::GetInstance().GPU();
if (params.flags & 1) { if (params.flags & 1) {
params.offset = memory_manager->MapBufferEx(object->addr, params.offset, object->size); params.offset = gpu.memory_manager->MapBufferEx(object->addr, params.offset, object->size);
} else { } else {
params.offset = memory_manager->MapBufferEx(object->addr, object->size); params.offset = gpu.memory_manager->MapBufferEx(object->addr, object->size);
} }
std::memcpy(output.data(), &params, output.size()); std::memcpy(output.data(), &params, output.size());

View file

@ -10,7 +10,6 @@
#include "common/common_types.h" #include "common/common_types.h"
#include "common/swap.h" #include "common/swap.h"
#include "core/hle/service/nvdrv/devices/nvdevice.h" #include "core/hle/service/nvdrv/devices/nvdevice.h"
#include "core/hle/service/nvdrv/memory_manager.h"
namespace Service { namespace Service {
namespace Nvidia { namespace Nvidia {
@ -20,9 +19,7 @@ class nvmap;
class nvhost_as_gpu final : public nvdevice { class nvhost_as_gpu final : public nvdevice {
public: public:
nvhost_as_gpu(std::shared_ptr<nvmap> nvmap_dev) : nvdevice(), nvmap_dev(std::move(nvmap_dev)) { nvhost_as_gpu(std::shared_ptr<nvmap> nvmap_dev) : nvmap_dev(std::move(nvmap_dev)) {}
memory_manager = std::make_shared<MemoryManager>();
}
~nvhost_as_gpu() override = default; ~nvhost_as_gpu() override = default;
u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
@ -101,7 +98,6 @@ private:
u32 GetVARegions(const std::vector<u8>& input, std::vector<u8>& output); u32 GetVARegions(const std::vector<u8>& input, std::vector<u8>& output);
std::shared_ptr<nvmap> nvmap_dev; std::shared_ptr<nvmap> nvmap_dev;
std::shared_ptr<MemoryManager> memory_manager;
}; };
} // namespace Devices } // namespace Devices

View file

@ -5,6 +5,7 @@
#include <map> #include <map>
#include "common/assert.h" #include "common/assert.h"
#include "common/logging/log.h" #include "common/logging/log.h"
#include "core/core.h"
#include "core/hle/service/nvdrv/devices/nvhost_gpu.h" #include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
namespace Service { namespace Service {
@ -131,7 +132,7 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp
params.num_entries * sizeof(IoctlGpfifoEntry)); params.num_entries * sizeof(IoctlGpfifoEntry));
for (auto entry : entries) { for (auto entry : entries) {
VAddr va_addr = entry.Address(); VAddr va_addr = entry.Address();
// TODO(ogniK): Process these Core::System::GetInstance().GPU().ProcessCommandList(va_addr, entry.sz);
} }
params.fence_out.id = 0; params.fence_out.id = 0;
params.fence_out.value = 0; params.fence_out.value = 0;

View file

@ -4,6 +4,7 @@
#pragma once #pragma once
#include <memory>
#include <vector> #include <vector>
#include "common/common_types.h" #include "common/common_types.h"
#include "common/swap.h" #include "common/swap.h"
@ -12,12 +13,14 @@
namespace Service { namespace Service {
namespace Nvidia { namespace Nvidia {
namespace Devices { namespace Devices {
class nvmap;
constexpr u32 NVGPU_IOCTL_MAGIC('H'); constexpr u32 NVGPU_IOCTL_MAGIC('H');
constexpr u32 NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO(0x8); constexpr u32 NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO(0x8);
class nvhost_gpu final : public nvdevice { class nvhost_gpu final : public nvdevice {
public: public:
nvhost_gpu() = default; nvhost_gpu(std::shared_ptr<nvmap> nvmap_dev) : nvmap_dev(std::move(nvmap_dev)) {}
~nvhost_gpu() override = default; ~nvhost_gpu() override = default;
u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
@ -132,6 +135,8 @@ private:
u32 AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& output); u32 AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& output);
u32 AllocateObjectContext(const std::vector<u8>& input, std::vector<u8>& output); u32 AllocateObjectContext(const std::vector<u8>& input, std::vector<u8>& output);
u32 SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output); u32 SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output);
std::shared_ptr<nvmap> nvmap_dev;
}; };
} // namespace Devices } // namespace Devices

View file

@ -32,11 +32,11 @@ void InstallInterfaces(SM::ServiceManager& service_manager) {
Module::Module() { Module::Module() {
auto nvmap_dev = std::make_shared<Devices::nvmap>(); auto nvmap_dev = std::make_shared<Devices::nvmap>();
devices["/dev/nvhost-as-gpu"] = std::make_shared<Devices::nvhost_as_gpu>(nvmap_dev); devices["/dev/nvhost-as-gpu"] = std::make_shared<Devices::nvhost_as_gpu>(nvmap_dev);
devices["/dev/nvhost-gpu"] = std::make_shared<Devices::nvhost_gpu>(nvmap_dev);
devices["/dev/nvhost-ctrl-gpu"] = std::make_shared<Devices::nvhost_ctrl_gpu>(); devices["/dev/nvhost-ctrl-gpu"] = std::make_shared<Devices::nvhost_ctrl_gpu>();
devices["/dev/nvmap"] = nvmap_dev; devices["/dev/nvmap"] = nvmap_dev;
devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(nvmap_dev); devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(nvmap_dev);
devices["/dev/nvhost-ctrl"] = std::make_shared<Devices::nvhost_ctrl>(); devices["/dev/nvhost-ctrl"] = std::make_shared<Devices::nvhost_ctrl>();
devices["/dev/nvhost-gpu"] = std::make_shared<Devices::nvhost_gpu>();
} }
u32 Module::Open(std::string device_name) { u32 Module::Open(std::string device_name) {

View file

@ -1,4 +1,15 @@
add_library(video_core STATIC add_library(video_core STATIC
command_processor.cpp
command_processor.h
engines/fermi_2d.cpp
engines/fermi_2d.h
engines/maxwell_3d.cpp
engines/maxwell_3d.h
engines/maxwell_compute.cpp
engines/maxwell_compute.h
gpu.h
memory_manager.cpp
memory_manager.h
renderer_base.cpp renderer_base.cpp
renderer_base.h renderer_base.h
renderer_opengl/gl_resource_manager.h renderer_opengl/gl_resource_manager.h

View file

@ -0,0 +1,119 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <array>
#include <cstddef>
#include <memory>
#include <utility>
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/microprofile.h"
#include "common/vector_math.h"
#include "core/memory.h"
#include "core/tracer/recorder.h"
#include "video_core/command_processor.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/maxwell_compute.h"
#include "video_core/gpu.h"
#include "video_core/renderer_base.h"
#include "video_core/video_core.h"
namespace Tegra {
enum class BufferMethods {
BindObject = 0,
CountBufferMethods = 0x100,
};
void GPU::WriteReg(u32 method, u32 subchannel, u32 value) {
LOG_WARNING(HW_GPU, "Processing method %08X on subchannel %u value %08X", method, subchannel,
value);
if (method == static_cast<u32>(BufferMethods::BindObject)) {
// Bind the current subchannel to the desired engine id.
LOG_DEBUG(HW_GPU, "Binding subchannel %u to engine %u", subchannel, value);
ASSERT(bound_engines.find(subchannel) == bound_engines.end());
bound_engines[subchannel] = static_cast<EngineID>(value);
return;
}
if (method < static_cast<u32>(BufferMethods::CountBufferMethods)) {
// TODO(Subv): Research and implement these methods.
LOG_ERROR(HW_GPU, "Special buffer methods other than Bind are not implemented");
return;
}
ASSERT(bound_engines.find(subchannel) != bound_engines.end());
const EngineID engine = bound_engines[subchannel];
switch (engine) {
case EngineID::FERMI_TWOD_A:
fermi_2d->WriteReg(method, value);
break;
case EngineID::MAXWELL_B:
maxwell_3d->WriteReg(method, value);
break;
case EngineID::MAXWELL_COMPUTE_B:
maxwell_compute->WriteReg(method, value);
break;
default:
UNIMPLEMENTED();
}
}
void GPU::ProcessCommandList(GPUVAddr address, u32 size) {
// TODO(Subv): PhysicalToVirtualAddress is a misnomer, it converts a GPU VAddr into an
// application VAddr.
const VAddr head_address = memory_manager->PhysicalToVirtualAddress(address);
VAddr current_addr = head_address;
while (current_addr < head_address + size * sizeof(CommandHeader)) {
const CommandHeader header = {Memory::Read32(current_addr)};
current_addr += sizeof(u32);
switch (header.mode.Value()) {
case SubmissionMode::IncreasingOld:
case SubmissionMode::Increasing: {
// Increase the method value with each argument.
for (unsigned i = 0; i < header.arg_count; ++i) {
WriteReg(header.method + i, header.subchannel, Memory::Read32(current_addr));
current_addr += sizeof(u32);
}
break;
}
case SubmissionMode::NonIncreasingOld:
case SubmissionMode::NonIncreasing: {
// Use the same method value for all arguments.
for (unsigned i = 0; i < header.arg_count; ++i) {
WriteReg(header.method, header.subchannel, Memory::Read32(current_addr));
current_addr += sizeof(u32);
}
break;
}
case SubmissionMode::IncreaseOnce: {
ASSERT(header.arg_count.Value() >= 1);
// Use the original method for the first argument and then the next method for all other
// arguments.
WriteReg(header.method, header.subchannel, Memory::Read32(current_addr));
current_addr += sizeof(u32);
// Use the same method value for all arguments.
for (unsigned i = 1; i < header.arg_count; ++i) {
WriteReg(header.method + 1, header.subchannel, Memory::Read32(current_addr));
current_addr += sizeof(u32);
}
break;
}
case SubmissionMode::Inline: {
// The register value is stored in the bits 16-28 as an immediate
WriteReg(header.method, header.subchannel, header.inline_data);
break;
}
default:
UNIMPLEMENTED();
}
}
}
} // namespace Tegra

View file

@ -0,0 +1,39 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <type_traits>
#include "common/bit_field.h"
#include "common/common_types.h"
namespace Tegra {
enum class SubmissionMode : u32 {
IncreasingOld = 0,
Increasing = 1,
NonIncreasingOld = 2,
NonIncreasing = 3,
Inline = 4,
IncreaseOnce = 5
};
union CommandHeader {
u32 hex;
BitField<0, 13, u32> method;
BitField<13, 3, u32> subchannel;
BitField<16, 13, u32> arg_count;
BitField<16, 13, u32> inline_data;
BitField<29, 3, SubmissionMode> mode;
};
static_assert(std::is_standard_layout<CommandHeader>::value == true,
"CommandHeader does not use standard layout");
static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!");
void ProcessCommandList(VAddr address, u32 size);
} // namespace Tegra

View file

@ -0,0 +1,13 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "video_core/engines/fermi_2d.h"
namespace Tegra {
namespace Engines {
void Fermi2D::WriteReg(u32 method, u32 value) {}
} // namespace Engines
} // namespace Tegra

View file

@ -0,0 +1,22 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/common_types.h"
namespace Tegra {
namespace Engines {
class Fermi2D final {
public:
Fermi2D() = default;
~Fermi2D() = default;
/// Write the value to the register identified by method.
void WriteReg(u32 method, u32 value);
};
} // namespace Engines
} // namespace Tegra

View file

@ -0,0 +1,13 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "video_core/engines/maxwell_3d.h"
namespace Tegra {
namespace Engines {
void Maxwell3D::WriteReg(u32 method, u32 value) {}
} // namespace Engines
} // namespace Tegra

View file

@ -0,0 +1,22 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/common_types.h"
namespace Tegra {
namespace Engines {
class Maxwell3D final {
public:
Maxwell3D() = default;
~Maxwell3D() = default;
/// Write the value to the register identified by method.
void WriteReg(u32 method, u32 value);
};
} // namespace Engines
} // namespace Tegra

View file

@ -0,0 +1,13 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "video_core/engines/maxwell_compute.h"
namespace Tegra {
namespace Engines {
void MaxwellCompute::WriteReg(u32 method, u32 value) {}
} // namespace Engines
} // namespace Tegra

View file

@ -0,0 +1,22 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/common_types.h"
namespace Tegra {
namespace Engines {
class MaxwellCompute final {
public:
MaxwellCompute() = default;
~MaxwellCompute() = default;
/// Write the value to the register identified by method.
void WriteReg(u32 method, u32 value);
};
} // namespace Engines
} // namespace Tegra

55
src/video_core/gpu.h Normal file
View file

@ -0,0 +1,55 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include <unordered_map>
#include "common/common_types.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/maxwell_compute.h"
#include "video_core/memory_manager.h"
namespace Tegra {
enum class EngineID {
FERMI_TWOD_A = 0x902D, // 2D Engine
MAXWELL_B = 0xB197, // 3D Engine
MAXWELL_COMPUTE_B = 0xB1C0,
KEPLER_INLINE_TO_MEMORY_B = 0xA140,
MAXWELL_DMA_COPY_A = 0xB0B5,
};
class GPU final {
public:
GPU() {
memory_manager = std::make_unique<MemoryManager>();
maxwell_3d = std::make_unique<Engines::Maxwell3D>();
fermi_2d = std::make_unique<Engines::Fermi2D>();
maxwell_compute = std::make_unique<Engines::MaxwellCompute>();
}
~GPU() = default;
/// Processes a command list stored at the specified address in GPU memory.
void ProcessCommandList(GPUVAddr address, u32 size);
std::unique_ptr<MemoryManager> memory_manager;
private:
/// Writes a single register in the engine bound to the specified subchannel
void WriteReg(u32 method, u32 subchannel, u32 value);
/// Mapping of command subchannels to their bound engine ids.
std::unordered_map<u32, EngineID> bound_engines;
/// 3D engine
std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
/// 2D engine
std::unique_ptr<Engines::Fermi2D> fermi_2d;
/// Compute engine
std::unique_ptr<Engines::MaxwellCompute> maxwell_compute;
};
} // namespace Tegra

View file

@ -3,10 +3,9 @@
// Refer to the license.txt file included. // Refer to the license.txt file included.
#include "common/assert.h" #include "common/assert.h"
#include "core/hle/service/nvdrv/memory_manager.h" #include "video_core/memory_manager.h"
namespace Service { namespace Tegra {
namespace Nvidia {
PAddr MemoryManager::AllocateSpace(u64 size, u64 align) { PAddr MemoryManager::AllocateSpace(u64 size, u64 align) {
boost::optional<PAddr> paddr = FindFreeBlock(size, align); boost::optional<PAddr> paddr = FindFreeBlock(size, align);
@ -108,5 +107,4 @@ VAddr& MemoryManager::PageSlot(PAddr paddr) {
return (*block)[(paddr >> Memory::PAGE_BITS) & PAGE_BLOCK_MASK]; return (*block)[(paddr >> Memory::PAGE_BITS) & PAGE_BLOCK_MASK];
} }
} // namespace Nvidia } // namespace Tegra
} // namespace Service

View file

@ -9,8 +9,10 @@
#include "common/common_types.h" #include "common/common_types.h"
#include "core/memory.h" #include "core/memory.h"
namespace Service { namespace Tegra {
namespace Nvidia {
/// Virtual addresses in the GPU's memory map are 64 bit.
using GPUVAddr = u64;
class MemoryManager final { class MemoryManager final {
public: public:
@ -44,5 +46,4 @@ private:
std::array<std::unique_ptr<PageBlock>, PAGE_TABLE_SIZE> page_table{}; std::array<std::unique_ptr<PageBlock>, PAGE_TABLE_SIZE> page_table{};
}; };
} // namespace Nvidia } // namespace Tegra
} // namespace Service