mirror of
https://git.suyu.dev/suyu/suyu.git
synced 2024-12-23 08:50:57 +01:00
hle service: nvdrv: nvhost_gpu: Update to use SyncpointManager and other improvements.
- Refactor so that SubmitGPFIFO and KickoffPB use shared functionality. - Implement add_wait and add_increment flags.
This commit is contained in:
parent
c6e1c46ac7
commit
e67b8678f8
3 changed files with 108 additions and 48 deletions
|
@ -7,14 +7,17 @@
|
||||||
#include "common/logging/log.h"
|
#include "common/logging/log.h"
|
||||||
#include "core/core.h"
|
#include "core/core.h"
|
||||||
#include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
|
#include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
|
||||||
|
#include "core/hle/service/nvdrv/syncpoint_manager.h"
|
||||||
#include "core/memory.h"
|
#include "core/memory.h"
|
||||||
#include "video_core/gpu.h"
|
#include "video_core/gpu.h"
|
||||||
#include "video_core/memory_manager.h"
|
#include "video_core/memory_manager.h"
|
||||||
|
|
||||||
namespace Service::Nvidia::Devices {
|
namespace Service::Nvidia::Devices {
|
||||||
|
|
||||||
nvhost_gpu::nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev)
|
nvhost_gpu::nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
|
||||||
: nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {}
|
SyncpointManager& syncpoint_manager)
|
||||||
|
: nvdevice(system), nvmap_dev(std::move(nvmap_dev)), syncpoint_manager{syncpoint_manager} {}
|
||||||
|
|
||||||
nvhost_gpu::~nvhost_gpu() = default;
|
nvhost_gpu::~nvhost_gpu() = default;
|
||||||
|
|
||||||
u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
|
u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
|
||||||
|
@ -126,10 +129,9 @@ u32 nvhost_gpu::AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& ou
|
||||||
params.num_entries, params.flags, params.unk0, params.unk1, params.unk2,
|
params.num_entries, params.flags, params.unk0, params.unk1, params.unk2,
|
||||||
params.unk3);
|
params.unk3);
|
||||||
|
|
||||||
auto& gpu = system.GPU();
|
params.fence_out.id = syncpoint_manager.AllocateSyncpoint();
|
||||||
params.fence_out.id = assigned_syncpoints;
|
params.fence_out.value = syncpoint_manager.RefreshSyncpoint(params.fence_out.id);
|
||||||
params.fence_out.value = gpu.GetSyncpointValue(assigned_syncpoints);
|
|
||||||
assigned_syncpoints++;
|
|
||||||
std::memcpy(output.data(), ¶ms, output.size());
|
std::memcpy(output.data(), ¶ms, output.size());
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -145,37 +147,95 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector<
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static std::vector<Tegra::CommandHeader> BuildWaitCommandList(Fence fence) {
|
||||||
|
return {
|
||||||
|
Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceValue, 1,
|
||||||
|
Tegra::SubmissionMode::Increasing),
|
||||||
|
{fence.value},
|
||||||
|
Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1,
|
||||||
|
Tegra::SubmissionMode::Increasing),
|
||||||
|
Tegra::GPU::FenceAction::Build(Tegra::GPU::FenceOperation::Acquire, fence.id),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(Fence fence, u32 add_increment) {
|
||||||
|
std::vector<Tegra::CommandHeader> result{
|
||||||
|
Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceValue, 1,
|
||||||
|
Tegra::SubmissionMode::Increasing),
|
||||||
|
{}};
|
||||||
|
|
||||||
|
for (u32 count = 0; count < add_increment; ++count) {
|
||||||
|
result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1,
|
||||||
|
Tegra::SubmissionMode::Increasing));
|
||||||
|
result.emplace_back(
|
||||||
|
Tegra::GPU::FenceAction::Build(Tegra::GPU::FenceOperation::Increment, fence.id));
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::vector<Tegra::CommandHeader> BuildIncrementWithWfiCommandList(Fence fence,
|
||||||
|
u32 add_increment) {
|
||||||
|
std::vector<Tegra::CommandHeader> result{
|
||||||
|
Tegra::BuildCommandHeader(Tegra::BufferMethods::WaitForInterrupt, 1,
|
||||||
|
Tegra::SubmissionMode::Increasing),
|
||||||
|
{}};
|
||||||
|
const std::vector<Tegra::CommandHeader> increment{
|
||||||
|
BuildIncrementCommandList(fence, add_increment)};
|
||||||
|
|
||||||
|
result.insert(result.end(), increment.begin(), increment.end());
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>& output,
|
||||||
|
Tegra::CommandList&& entries) {
|
||||||
|
LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address,
|
||||||
|
params.num_entries, params.flags.raw);
|
||||||
|
|
||||||
|
auto& gpu = system.GPU();
|
||||||
|
if (params.flags.add_wait.Value() &&
|
||||||
|
!syncpoint_manager.IsSyncpointExpired(params.fence_out.id, params.fence_out.value)) {
|
||||||
|
gpu.PushGPUEntries(Tegra::CommandList{BuildWaitCommandList(params.fence_out)});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (params.flags.add_increment.Value() || params.flags.increment.Value()) {
|
||||||
|
const u32 increment_value = params.flags.increment.Value() ? params.fence_out.value : 0;
|
||||||
|
params.fence_out.value = syncpoint_manager.IncreaseSyncpoint(
|
||||||
|
params.fence_out.id, params.AddIncrementValue() + increment_value);
|
||||||
|
} else {
|
||||||
|
params.fence_out.value = syncpoint_manager.GetSyncpointMax(params.fence_out.id);
|
||||||
|
}
|
||||||
|
|
||||||
|
entries.RefreshIntegrityChecks(gpu);
|
||||||
|
gpu.PushGPUEntries(std::move(entries));
|
||||||
|
|
||||||
|
if (params.flags.add_increment.Value()) {
|
||||||
|
if (params.flags.suppress_wfi) {
|
||||||
|
gpu.PushGPUEntries(Tegra::CommandList{
|
||||||
|
BuildIncrementCommandList(params.fence_out, params.AddIncrementValue())});
|
||||||
|
} else {
|
||||||
|
gpu.PushGPUEntries(Tegra::CommandList{
|
||||||
|
BuildIncrementWithWfiCommandList(params.fence_out, params.AddIncrementValue())});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmitGpfifo));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) {
|
u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) {
|
||||||
if (input.size() < sizeof(IoctlSubmitGpfifo)) {
|
if (input.size() < sizeof(IoctlSubmitGpfifo)) {
|
||||||
UNIMPLEMENTED();
|
UNIMPLEMENTED();
|
||||||
}
|
}
|
||||||
IoctlSubmitGpfifo params{};
|
IoctlSubmitGpfifo params{};
|
||||||
std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo));
|
std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo));
|
||||||
LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address,
|
|
||||||
params.num_entries, params.flags.raw);
|
|
||||||
|
|
||||||
ASSERT_MSG(input.size() == sizeof(IoctlSubmitGpfifo) +
|
|
||||||
params.num_entries * sizeof(Tegra::CommandListHeader),
|
|
||||||
"Incorrect input size");
|
|
||||||
|
|
||||||
Tegra::CommandList entries(params.num_entries);
|
Tegra::CommandList entries(params.num_entries);
|
||||||
std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
|
std::memcpy(entries.command_lists.data(), &input[sizeof(IoctlSubmitGpfifo)],
|
||||||
params.num_entries * sizeof(Tegra::CommandListHeader));
|
params.num_entries * sizeof(Tegra::CommandListHeader));
|
||||||
|
|
||||||
UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0);
|
return SubmitGPFIFOImpl(params, output, std::move(entries));
|
||||||
UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0);
|
|
||||||
|
|
||||||
auto& gpu = system.GPU();
|
|
||||||
u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id);
|
|
||||||
if (params.flags.increment.Value()) {
|
|
||||||
params.fence_out.value += current_syncpoint_value;
|
|
||||||
} else {
|
|
||||||
params.fence_out.value = current_syncpoint_value;
|
|
||||||
}
|
|
||||||
gpu.PushGPUEntries(std::move(entries));
|
|
||||||
|
|
||||||
std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmitGpfifo));
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output,
|
u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output,
|
||||||
|
@ -185,31 +245,17 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output,
|
||||||
}
|
}
|
||||||
IoctlSubmitGpfifo params{};
|
IoctlSubmitGpfifo params{};
|
||||||
std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo));
|
std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo));
|
||||||
LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address,
|
|
||||||
params.num_entries, params.flags.raw);
|
|
||||||
|
|
||||||
Tegra::CommandList entries(params.num_entries);
|
Tegra::CommandList entries(params.num_entries);
|
||||||
if (version == IoctlVersion::Version2) {
|
if (version == IoctlVersion::Version2) {
|
||||||
std::memcpy(entries.data(), input2.data(),
|
std::memcpy(entries.command_lists.data(), input2.data(),
|
||||||
params.num_entries * sizeof(Tegra::CommandListHeader));
|
params.num_entries * sizeof(Tegra::CommandListHeader));
|
||||||
} else {
|
} else {
|
||||||
system.Memory().ReadBlock(params.address, entries.data(),
|
system.Memory().ReadBlock(params.address, entries.command_lists.data(),
|
||||||
params.num_entries * sizeof(Tegra::CommandListHeader));
|
params.num_entries * sizeof(Tegra::CommandListHeader));
|
||||||
}
|
}
|
||||||
UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0);
|
|
||||||
UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0);
|
|
||||||
|
|
||||||
auto& gpu = system.GPU();
|
return SubmitGPFIFOImpl(params, output, std::move(entries));
|
||||||
u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id);
|
|
||||||
if (params.flags.increment.Value()) {
|
|
||||||
params.fence_out.value += current_syncpoint_value;
|
|
||||||
} else {
|
|
||||||
params.fence_out.value = current_syncpoint_value;
|
|
||||||
}
|
|
||||||
gpu.PushGPUEntries(std::move(entries));
|
|
||||||
|
|
||||||
std::memcpy(output.data(), ¶ms, output.size());
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 nvhost_gpu::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output) {
|
u32 nvhost_gpu::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output) {
|
||||||
|
|
|
@ -11,6 +11,11 @@
|
||||||
#include "common/swap.h"
|
#include "common/swap.h"
|
||||||
#include "core/hle/service/nvdrv/devices/nvdevice.h"
|
#include "core/hle/service/nvdrv/devices/nvdevice.h"
|
||||||
#include "core/hle/service/nvdrv/nvdata.h"
|
#include "core/hle/service/nvdrv/nvdata.h"
|
||||||
|
#include "video_core/dma_pusher.h"
|
||||||
|
|
||||||
|
namespace Service::Nvidia {
|
||||||
|
class SyncpointManager;
|
||||||
|
}
|
||||||
|
|
||||||
namespace Service::Nvidia::Devices {
|
namespace Service::Nvidia::Devices {
|
||||||
|
|
||||||
|
@ -21,7 +26,8 @@ constexpr u32 NVGPU_IOCTL_CHANNEL_KICKOFF_PB(0x1b);
|
||||||
|
|
||||||
class nvhost_gpu final : public nvdevice {
|
class nvhost_gpu final : public nvdevice {
|
||||||
public:
|
public:
|
||||||
explicit nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
|
explicit nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
|
||||||
|
SyncpointManager& syncpoint_manager);
|
||||||
~nvhost_gpu() override;
|
~nvhost_gpu() override;
|
||||||
|
|
||||||
u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
|
u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
|
||||||
|
@ -162,10 +168,15 @@ private:
|
||||||
u32_le raw;
|
u32_le raw;
|
||||||
BitField<0, 1, u32_le> add_wait; // append a wait sync_point to the list
|
BitField<0, 1, u32_le> add_wait; // append a wait sync_point to the list
|
||||||
BitField<1, 1, u32_le> add_increment; // append an increment to the list
|
BitField<1, 1, u32_le> add_increment; // append an increment to the list
|
||||||
BitField<2, 1, u32_le> new_hw_format; // Mostly ignored
|
BitField<2, 1, u32_le> new_hw_format; // mostly ignored
|
||||||
|
BitField<4, 1, u32_le> suppress_wfi; // suppress wait for interrupt
|
||||||
BitField<8, 1, u32_le> increment; // increment the returned fence
|
BitField<8, 1, u32_le> increment; // increment the returned fence
|
||||||
} flags;
|
} flags;
|
||||||
Fence fence_out; // returned new fence object for others to wait on
|
Fence fence_out; // returned new fence object for others to wait on
|
||||||
|
|
||||||
|
u32 AddIncrementValue() const {
|
||||||
|
return flags.add_increment.Value() << 1;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(Fence),
|
static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(Fence),
|
||||||
"IoctlSubmitGpfifo is incorrect size");
|
"IoctlSubmitGpfifo is incorrect size");
|
||||||
|
@ -190,6 +201,8 @@ private:
|
||||||
u32 SetChannelPriority(const std::vector<u8>& input, std::vector<u8>& output);
|
u32 SetChannelPriority(const std::vector<u8>& input, std::vector<u8>& output);
|
||||||
u32 AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& output);
|
u32 AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& output);
|
||||||
u32 AllocateObjectContext(const std::vector<u8>& input, std::vector<u8>& output);
|
u32 AllocateObjectContext(const std::vector<u8>& input, std::vector<u8>& output);
|
||||||
|
u32 SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>& output,
|
||||||
|
Tegra::CommandList&& entries);
|
||||||
u32 SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output);
|
u32 SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output);
|
||||||
u32 KickoffPB(const std::vector<u8>& input, std::vector<u8>& output,
|
u32 KickoffPB(const std::vector<u8>& input, std::vector<u8>& output,
|
||||||
const std::vector<u8>& input2, IoctlVersion version);
|
const std::vector<u8>& input2, IoctlVersion version);
|
||||||
|
@ -198,7 +211,7 @@ private:
|
||||||
u32 ChannelSetTimeslice(const std::vector<u8>& input, std::vector<u8>& output);
|
u32 ChannelSetTimeslice(const std::vector<u8>& input, std::vector<u8>& output);
|
||||||
|
|
||||||
std::shared_ptr<nvmap> nvmap_dev;
|
std::shared_ptr<nvmap> nvmap_dev;
|
||||||
u32 assigned_syncpoints{};
|
SyncpointManager& syncpoint_manager;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Service::Nvidia::Devices
|
} // namespace Service::Nvidia::Devices
|
||||||
|
|
|
@ -47,7 +47,8 @@ Module::Module(Core::System& system) : syncpoint_manager{system.GPU()} {
|
||||||
}
|
}
|
||||||
auto nvmap_dev = std::make_shared<Devices::nvmap>(system);
|
auto nvmap_dev = std::make_shared<Devices::nvmap>(system);
|
||||||
devices["/dev/nvhost-as-gpu"] = std::make_shared<Devices::nvhost_as_gpu>(system, nvmap_dev);
|
devices["/dev/nvhost-as-gpu"] = std::make_shared<Devices::nvhost_as_gpu>(system, nvmap_dev);
|
||||||
devices["/dev/nvhost-gpu"] = std::make_shared<Devices::nvhost_gpu>(system, nvmap_dev);
|
devices["/dev/nvhost-gpu"] =
|
||||||
|
std::make_shared<Devices::nvhost_gpu>(system, nvmap_dev, syncpoint_manager);
|
||||||
devices["/dev/nvhost-ctrl-gpu"] = std::make_shared<Devices::nvhost_ctrl_gpu>(system);
|
devices["/dev/nvhost-ctrl-gpu"] = std::make_shared<Devices::nvhost_ctrl_gpu>(system);
|
||||||
devices["/dev/nvmap"] = nvmap_dev;
|
devices["/dev/nvmap"] = nvmap_dev;
|
||||||
devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev);
|
devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev);
|
||||||
|
|
Loading…
Reference in a new issue