nvdrv: rework to remove memcpy

This commit is contained in:
Liam 2023-10-25 00:34:40 -04:00
parent 94b7ac50bb
commit 723df0f368
16 changed files with 243 additions and 225 deletions

View file

@ -11,97 +11,149 @@
namespace Service::Nvidia::Devices {
struct Ioctl1Traits {
template <typename T, typename R, typename A>
static T GetClassImpl(R (T::*)(A));
template <typename T, typename R, typename A>
static A GetArgImpl(R (T::*)(A));
struct IoctlOneArgTraits {
template <typename T, typename R, typename A, typename... B>
static A GetFirstArgImpl(R (T::*)(A, B...));
};
struct Ioctl23Traits {
template <typename T, typename R, typename A, typename B>
static T GetClassImpl(R (T::*)(A, B));
struct IoctlTwoArgTraits {
template <typename T, typename R, typename A, typename B, typename... C>
static A GetFirstArgImpl(R (T::*)(A, B, C...));
template <typename T, typename R, typename A, typename B>
static A GetArgImpl(R (T::*)(A, B));
template <typename T, typename R, typename A, typename B, typename... C>
static B GetSecondArgImpl(R (T::*)(A, B, C...));
};
template <typename T>
struct ContainerType {
using ValueType = T;
};
struct Null {};
template <Common::IsContiguousContainer T>
struct ContainerType<T> {
using ValueType = T::value_type;
};
// clang-format off
template <typename InnerArg, typename F, typename Self, typename... Rest>
NvResult Wrap(std::span<const u8> input, std::span<u8> output, Self* self, F&& callable,
Rest&&... rest) {
using Arg = ContainerType<InnerArg>::ValueType;
constexpr bool ArgumentIsContainer = Common::IsContiguousContainer<InnerArg>;
template <typename FixedArg, typename VarArg, typename InlInVarArg, typename InlOutVarArg, typename F>
NvResult WrapGeneric(F&& callable, std::span<const u8> input, std::span<const u8> inline_input, std::span<u8> output, std::span<u8> inline_output) {
constexpr bool HasFixedArg = !std::is_same_v<FixedArg, Null>;
constexpr bool HasVarArg = !std::is_same_v<VarArg, Null>;
constexpr bool HasInlInVarArg = !std::is_same_v<InlInVarArg, Null>;
constexpr bool HasInlOutVarArg = !std::is_same_v<InlOutVarArg, Null>;
// Verify that the input and output sizes are valid.
const size_t in_params = input.size() / sizeof(Arg);
const size_t out_params = output.size() / sizeof(Arg);
if (in_params * sizeof(Arg) != input.size()) {
return NvResult::InvalidSize;
}
if (out_params * sizeof(Arg) != output.size()) {
return NvResult::InvalidSize;
}
if (in_params == 0 && out_params == 0 && !ArgumentIsContainer) {
return NvResult::InvalidSize;
// Declare the fixed-size input value.
FixedArg fixed{};
size_t var_offset = 0;
if constexpr (HasFixedArg) {
// Read the fixed-size input value.
var_offset = std::min(sizeof(FixedArg), input.size());
if (var_offset > 0) {
std::memcpy(&fixed, input.data(), var_offset);
}
}
// Copy inputs, if needed.
std::vector<Arg> params(std::max(in_params, out_params));
if (in_params > 0) {
std::memcpy(params.data(), input.data(), input.size());
// Read the variable-sized inputs.
const size_t num_var_args = HasVarArg ? ((input.size() - var_offset) / sizeof(VarArg)) : 0;
std::vector<VarArg> var_args(num_var_args);
if constexpr (HasVarArg) {
if (num_var_args > 0) {
std::memcpy(var_args.data(), input.data() + var_offset, num_var_args * sizeof(VarArg));
}
}
const size_t num_inl_in_var_args = HasInlInVarArg ? (inline_input.size() / sizeof(InlInVarArg)) : 0;
std::vector<InlInVarArg> inl_in_var_args(num_inl_in_var_args);
if constexpr (HasInlInVarArg) {
if (num_inl_in_var_args > 0) {
std::memcpy(inl_in_var_args.data(), inline_input.data(), num_inl_in_var_args * sizeof(InlInVarArg));
}
}
// Construct inline output data.
const size_t num_inl_out_var_args = HasInlOutVarArg ? (inline_output.size() / sizeof(InlOutVarArg)) : 0;
std::vector<InlOutVarArg> inl_out_var_args(num_inl_out_var_args);
// Perform the call.
NvResult result;
if constexpr (ArgumentIsContainer) {
result = (self->*callable)(params, std::forward<Rest>(rest)...);
} else {
result = (self->*callable)(params.front(), std::forward<Rest>(rest)...);
NvResult result = callable(fixed, var_args, inl_in_var_args, inl_out_var_args);
// Copy outputs.
if constexpr (HasFixedArg) {
if (output.size() > 0) {
std::memcpy(output.data(), &fixed, std::min(output.size(), sizeof(FixedArg)));
}
}
// Copy outputs, if needed.
if (out_params > 0) {
std::memcpy(output.data(), params.data(), output.size());
if constexpr (HasVarArg) {
if (num_var_args > 0 && output.size() > var_offset) {
const size_t max_var_size = output.size() - var_offset;
std::memcpy(output.data() + var_offset, var_args.data(), std::min(max_var_size, num_var_args * sizeof(VarArg)));
}
}
// Copy inline outputs.
if constexpr (HasInlOutVarArg) {
if (num_inl_out_var_args > 0) {
std::memcpy(inline_output.data(), inl_out_var_args.data(), num_inl_out_var_args * sizeof(InlOutVarArg));
}
}
// We're done.
return result;
}
template <typename F>
NvResult nvdevice::Wrap1(F&& callable, std::span<const u8> input, std::span<u8> output) {
using Self = decltype(Ioctl1Traits::GetClassImpl(callable));
using InnerArg = std::remove_reference_t<decltype(Ioctl1Traits::GetArgImpl(callable))>;
template <typename Self, typename F, typename... Rest>
NvResult WrapFixed(Self* self, F&& callable, std::span<const u8> input, std::span<u8> output, Rest&&... rest) {
using FixedArg = typename std::remove_reference_t<decltype(IoctlOneArgTraits::GetFirstArgImpl(callable))>;
return Wrap<InnerArg>(input, output, static_cast<Self*>(this), callable);
const auto Callable = [&](auto& fixed, auto& var, auto& inl_in, auto& inl_out) -> NvResult {
return (self->*callable)(fixed, std::forward<Rest>(rest)...);
};
return WrapGeneric<FixedArg, Null, Null, Null>(std::move(Callable), input, {}, output, {});
}
template <typename F>
NvResult nvdevice::Wrap2(F&& callable, std::span<const u8> input, std::span<const u8> inline_input,
std::span<u8> output) {
using Self = decltype(Ioctl23Traits::GetClassImpl(callable));
using InnerArg = std::remove_reference_t<decltype(Ioctl23Traits::GetArgImpl(callable))>;
template <typename Self, typename F, typename... Rest>
NvResult WrapFixedInlOut(Self* self, F&& callable, std::span<const u8> input, std::span<u8> output, std::span<u8> inline_output, Rest&&... rest) {
using FixedArg = typename std::remove_reference_t<decltype(IoctlTwoArgTraits::GetFirstArgImpl(callable))>;
using InlOutVarArg = typename std::remove_reference_t<decltype(IoctlTwoArgTraits::GetSecondArgImpl(callable))>::value_type;
return Wrap<InnerArg>(input, output, static_cast<Self*>(this), callable, inline_input);
const auto Callable = [&](auto& fixed, auto& var, auto& inl_in, auto& inl_out) -> NvResult {
return (self->*callable)(fixed, inl_out, std::forward<Rest>(rest)...);
};
return WrapGeneric<FixedArg, Null, Null, InlOutVarArg>(std::move(Callable), input, {}, output, inline_output);
}
template <typename F>
NvResult nvdevice::Wrap3(F&& callable, std::span<const u8> input, std::span<u8> output,
std::span<u8> inline_output) {
using Self = decltype(Ioctl23Traits::GetClassImpl(callable));
using InnerArg = std::remove_reference_t<decltype(Ioctl23Traits::GetArgImpl(callable))>;
template <typename Self, typename F, typename... Rest>
NvResult WrapVariable(Self* self, F&& callable, std::span<const u8> input, std::span<u8> output, Rest&&... rest) {
using VarArg = typename std::remove_reference_t<decltype(IoctlOneArgTraits::GetFirstArgImpl(callable))>::value_type;
return Wrap<InnerArg>(input, output, static_cast<Self*>(this), callable, inline_output);
const auto Callable = [&](auto& fixed, auto& var, auto& inl_in, auto& inl_out) -> NvResult {
return (self->*callable)(var, std::forward<Rest>(rest)...);
};
return WrapGeneric<Null, VarArg, Null, Null>(std::move(Callable), input, {}, output, {});
}
template <typename Self, typename F, typename... Rest>
NvResult WrapFixedVariable(Self* self, F&& callable, std::span<const u8> input, std::span<u8> output, Rest&&... rest) {
using FixedArg = typename std::remove_reference_t<decltype(IoctlTwoArgTraits::GetFirstArgImpl(callable))>;
using VarArg = typename std::remove_reference_t<decltype(IoctlTwoArgTraits::GetSecondArgImpl(callable))>::value_type;
const auto Callable = [&](auto& fixed, auto& var, auto& inl_in, auto& inl_out) -> NvResult {
return (self->*callable)(fixed, var, std::forward<Rest>(rest)...);
};
return WrapGeneric<FixedArg, VarArg, Null, Null>(std::move(Callable), input, {}, output, {});
}
template <typename Self, typename F, typename... Rest>
NvResult WrapFixedInlIn(Self* self, F&& callable, std::span<const u8> input, std::span<const u8> inline_input, std::span<u8> output, Rest&&... rest) {
using FixedArg = typename std::remove_reference_t<decltype(IoctlTwoArgTraits::GetFirstArgImpl(callable))>;
using InlInVarArg = typename std::remove_reference_t<decltype(IoctlTwoArgTraits::GetSecondArgImpl(callable))>::value_type;
const auto Callable = [&](auto& fixed, auto& var, auto& inl_in, auto& inl_out) -> NvResult {
return (self->*callable)(fixed, inl_in, std::forward<Rest>(rest)...);
};
return WrapGeneric<FixedArg, Null, InlInVarArg, Null>(std::move(Callable), input, inline_input, output, {});
}
// clang-format on
} // namespace Service::Nvidia::Devices

View file

@ -74,18 +74,6 @@ public:
return nullptr;
}
protected:
template <typename F>
NvResult Wrap1(F&& callable, std::span<const u8> input, std::span<u8> output);
template <typename F>
NvResult Wrap2(F&& callable, std::span<const u8> input, std::span<const u8> inline_input,
std::span<u8> output);
template <typename F>
NvResult Wrap3(F&& callable, std::span<const u8> input, std::span<u8> output,
std::span<u8> inline_output);
protected:
Core::System& system;
};

View file

@ -34,21 +34,21 @@ NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> i
case 'A':
switch (command.cmd) {
case 0x1:
return Wrap1(&nvhost_as_gpu::BindChannel, input, output);
return WrapFixed(this, &nvhost_as_gpu::BindChannel, input, output);
case 0x2:
return Wrap1(&nvhost_as_gpu::AllocateSpace, input, output);
return WrapFixed(this, &nvhost_as_gpu::AllocateSpace, input, output);
case 0x3:
return Wrap1(&nvhost_as_gpu::FreeSpace, input, output);
return WrapFixed(this, &nvhost_as_gpu::FreeSpace, input, output);
case 0x5:
return Wrap1(&nvhost_as_gpu::UnmapBuffer, input, output);
return WrapFixed(this, &nvhost_as_gpu::UnmapBuffer, input, output);
case 0x6:
return Wrap1(&nvhost_as_gpu::MapBufferEx, input, output);
return WrapFixed(this, &nvhost_as_gpu::MapBufferEx, input, output);
case 0x8:
return Wrap1(&nvhost_as_gpu::GetVARegions1, input, output);
return WrapFixed(this, &nvhost_as_gpu::GetVARegions1, input, output);
case 0x9:
return Wrap1(&nvhost_as_gpu::AllocAsEx, input, output);
return WrapFixed(this, &nvhost_as_gpu::AllocAsEx, input, output);
case 0x14:
return Wrap1(&nvhost_as_gpu::Remap, input, output);
return WrapVariable(this, &nvhost_as_gpu::Remap, input, output);
default:
break;
}
@ -73,7 +73,8 @@ NvResult nvhost_as_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> i
case 'A':
switch (command.cmd) {
case 0x8:
return Wrap3(&nvhost_as_gpu::GetVARegions3, input, output, inline_output);
return WrapFixedInlOut(this, &nvhost_as_gpu::GetVARegions3, input, output,
inline_output);
default:
break;
}
@ -482,7 +483,7 @@ NvResult nvhost_as_gpu::GetVARegions1(IoctlGetVaRegions& params) {
return NvResult::Success;
}
NvResult nvhost_as_gpu::GetVARegions3(IoctlGetVaRegions& params, std::span<u8> inline_output) {
NvResult nvhost_as_gpu::GetVARegions3(IoctlGetVaRegions& params, std::span<VaRegion> regions) {
LOG_DEBUG(Service_NVDRV, "called, buf_addr={:X}, buf_size={:X}", params.buf_addr,
params.buf_size);
@ -494,7 +495,10 @@ NvResult nvhost_as_gpu::GetVARegions3(IoctlGetVaRegions& params, std::span<u8> i
GetVARegionsImpl(params);
std::memcpy(inline_output.data(), params.regions.data(), 2 * sizeof(VaRegion));
const size_t num_regions = std::min(params.regions.size(), regions.size());
for (size_t i = 0; i < num_regions; i++) {
regions[i] = params.regions[i];
}
return NvResult::Success;
}

View file

@ -149,7 +149,7 @@ private:
void GetVARegionsImpl(IoctlGetVaRegions& params);
NvResult GetVARegions1(IoctlGetVaRegions& params);
NvResult GetVARegions3(IoctlGetVaRegions& params, std::span<u8> inline_output);
NvResult GetVARegions3(IoctlGetVaRegions& params, std::span<VaRegion> regions);
void FreeMappingLocked(u64 offset);

View file

@ -41,19 +41,19 @@ NvResult nvhost_ctrl::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> inp
case 0x0:
switch (command.cmd) {
case 0x1b:
return Wrap1(&nvhost_ctrl::NvOsGetConfigU32, input, output);
return WrapFixed(this, &nvhost_ctrl::NvOsGetConfigU32, input, output);
case 0x1c:
return Wrap1(&nvhost_ctrl::IocCtrlClearEventWait, input, output);
return WrapFixed(this, &nvhost_ctrl::IocCtrlClearEventWait, input, output);
case 0x1d:
return Wrap1(&nvhost_ctrl::IocCtrlEventWaitWithAllocation, input, output);
return WrapFixed(this, &nvhost_ctrl::IocCtrlEventWait, input, output, true);
case 0x1e:
return Wrap1(&nvhost_ctrl::IocCtrlEventWaitNotAllocation, input, output);
return WrapFixed(this, &nvhost_ctrl::IocCtrlEventWait, input, output, false);
case 0x1f:
return Wrap1(&nvhost_ctrl::IocCtrlEventRegister, input, output);
return WrapFixed(this, &nvhost_ctrl::IocCtrlEventRegister, input, output);
case 0x20:
return Wrap1(&nvhost_ctrl::IocCtrlEventUnregister, input, output);
return WrapFixed(this, &nvhost_ctrl::IocCtrlEventUnregister, input, output);
case 0x21:
return Wrap1(&nvhost_ctrl::IocCtrlEventUnregisterBatch, input, output);
return WrapFixed(this, &nvhost_ctrl::IocCtrlEventUnregisterBatch, input, output);
}
break;
default:
@ -86,7 +86,7 @@ NvResult nvhost_ctrl::NvOsGetConfigU32(IocGetConfigParams& params) {
return NvResult::ConfigVarNotFound; // Returns error on production mode
}
NvResult nvhost_ctrl::IocCtrlEventWaitImpl(IocCtrlEventWaitParams& params, bool is_allocation) {
NvResult nvhost_ctrl::IocCtrlEventWait(IocCtrlEventWaitParams& params, bool is_allocation) {
LOG_DEBUG(Service_NVDRV, "syncpt_id={}, threshold={}, timeout={}, is_allocation={}",
params.fence.id, params.fence.value, params.timeout, is_allocation);

View file

@ -190,20 +190,11 @@ private:
NvResult IocCtrlEventRegister(IocCtrlEventRegisterParams& params);
NvResult IocCtrlEventUnregister(IocCtrlEventUnregisterParams& params);
NvResult IocCtrlEventUnregisterBatch(IocCtrlEventUnregisterBatchParams& params);
NvResult IocCtrlEventWait(IocCtrlEventWaitParams& params, bool is_allocation);
NvResult IocCtrlClearEventWait(IocCtrlEventClearParams& params);
NvResult FreeEvent(u32 slot);
// TODO: these are not the correct names
NvResult IocCtrlEventWaitNotAllocation(IocCtrlEventWaitParams& params) {
return this->IocCtrlEventWaitImpl(params, false);
}
NvResult IocCtrlEventWaitWithAllocation(IocCtrlEventWaitParams& params) {
return this->IocCtrlEventWaitImpl(params, true);
}
NvResult IocCtrlEventWaitImpl(IocCtrlEventWaitParams& params, bool is_allocation);
EventInterface& events_interface;
NvCore::Container& core;
NvCore::SyncpointManager& syncpoint_manager;

View file

@ -28,23 +28,23 @@ NvResult nvhost_ctrl_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8>
case 'G':
switch (command.cmd) {
case 0x1:
return Wrap1(&nvhost_ctrl_gpu::ZCullGetCtxSize, input, output);
return WrapFixed(this, &nvhost_ctrl_gpu::ZCullGetCtxSize, input, output);
case 0x2:
return Wrap1(&nvhost_ctrl_gpu::ZCullGetInfo, input, output);
return WrapFixed(this, &nvhost_ctrl_gpu::ZCullGetInfo, input, output);
case 0x3:
return Wrap1(&nvhost_ctrl_gpu::ZBCSetTable, input, output);
return WrapFixed(this, &nvhost_ctrl_gpu::ZBCSetTable, input, output);
case 0x4:
return Wrap1(&nvhost_ctrl_gpu::ZBCQueryTable, input, output);
return WrapFixed(this, &nvhost_ctrl_gpu::ZBCQueryTable, input, output);
case 0x5:
return Wrap1(&nvhost_ctrl_gpu::GetCharacteristics1, input, output);
return WrapFixed(this, &nvhost_ctrl_gpu::GetCharacteristics1, input, output);
case 0x6:
return Wrap1(&nvhost_ctrl_gpu::GetTPCMasks1, input, output);
return WrapFixed(this, &nvhost_ctrl_gpu::GetTPCMasks1, input, output);
case 0x7:
return Wrap1(&nvhost_ctrl_gpu::FlushL2, input, output);
return WrapFixed(this, &nvhost_ctrl_gpu::FlushL2, input, output);
case 0x14:
return Wrap1(&nvhost_ctrl_gpu::GetActiveSlotMask, input, output);
return WrapFixed(this, &nvhost_ctrl_gpu::GetActiveSlotMask, input, output);
case 0x1c:
return Wrap1(&nvhost_ctrl_gpu::GetGpuTime, input, output);
return WrapFixed(this, &nvhost_ctrl_gpu::GetGpuTime, input, output);
default:
break;
}
@ -66,9 +66,11 @@ NvResult nvhost_ctrl_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8>
case 'G':
switch (command.cmd) {
case 0x5:
return Wrap3(&nvhost_ctrl_gpu::GetCharacteristics3, input, output, inline_output);
return WrapFixedInlOut(this, &nvhost_ctrl_gpu::GetCharacteristics3, input, output,
inline_output);
case 0x6:
return Wrap3(&nvhost_ctrl_gpu::GetTPCMasks3, input, output, inline_output);
return WrapFixedInlOut(this, &nvhost_ctrl_gpu::GetTPCMasks3, input, output,
inline_output);
default:
break;
}
@ -125,8 +127,8 @@ NvResult nvhost_ctrl_gpu::GetCharacteristics1(IoctlCharacteristics& params) {
return NvResult::Success;
}
NvResult nvhost_ctrl_gpu::GetCharacteristics3(IoctlCharacteristics& params,
std::span<u8> inline_output) {
NvResult nvhost_ctrl_gpu::GetCharacteristics3(
IoctlCharacteristics& params, std::span<IoctlGpuCharacteristics> gpu_characteristics) {
LOG_DEBUG(Service_NVDRV, "called");
params.gc.arch = 0x120;
@ -166,8 +168,9 @@ NvResult nvhost_ctrl_gpu::GetCharacteristics3(IoctlCharacteristics& params,
params.gc.gr_compbit_store_base_hw = 0x0;
params.gpu_characteristics_buf_size = 0xA0;
params.gpu_characteristics_buf_addr = 0xdeadbeef; // Cannot be 0 (UNUSED)
std::memcpy(inline_output.data(), &params.gc,
std::min(sizeof(params.gc), inline_output.size()));
if (!gpu_characteristics.empty()) {
gpu_characteristics.front() = params.gc;
}
return NvResult::Success;
}
@ -179,14 +182,14 @@ NvResult nvhost_ctrl_gpu::GetTPCMasks1(IoctlGpuGetTpcMasksArgs& params) {
return NvResult::Success;
}
NvResult nvhost_ctrl_gpu::GetTPCMasks3(IoctlGpuGetTpcMasksArgs& params,
std::span<u8> inline_output) {
NvResult nvhost_ctrl_gpu::GetTPCMasks3(IoctlGpuGetTpcMasksArgs& params, std::span<u32> tpc_mask) {
LOG_DEBUG(Service_NVDRV, "called, mask_buffer_size=0x{:X}", params.mask_buffer_size);
if (params.mask_buffer_size != 0) {
params.tcp_mask = 3;
}
std::memcpy(inline_output.data(), &params.tcp_mask,
std::min(sizeof(params.tcp_mask), inline_output.size()));
if (!tpc_mask.empty()) {
tpc_mask.front() = params.tcp_mask;
}
return NvResult::Success;
}

View file

@ -152,10 +152,11 @@ private:
static_assert(sizeof(IoctlGetGpuTime) == 0x10, "IoctlGetGpuTime is incorrect size");
NvResult GetCharacteristics1(IoctlCharacteristics& params);
NvResult GetCharacteristics3(IoctlCharacteristics& params, std::span<u8> inline_output);
NvResult GetCharacteristics3(IoctlCharacteristics& params,
std::span<IoctlGpuCharacteristics> gpu_characteristics);
NvResult GetTPCMasks1(IoctlGpuGetTpcMasksArgs& params);
NvResult GetTPCMasks3(IoctlGpuGetTpcMasksArgs& params, std::span<u8> inline_output);
NvResult GetTPCMasks3(IoctlGpuGetTpcMasksArgs& params, std::span<u32> tpc_mask);
NvResult GetActiveSlotMask(IoctlActiveSlotMask& params);
NvResult ZCullGetCtxSize(IoctlZcullGetCtxSize& params);

View file

@ -53,7 +53,7 @@ NvResult nvhost_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> inpu
case 0x0:
switch (command.cmd) {
case 0x3:
return Wrap1(&nvhost_gpu::GetWaitbase, input, output);
return WrapFixed(this, &nvhost_gpu::GetWaitbase, input, output);
default:
break;
}
@ -61,25 +61,25 @@ NvResult nvhost_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> inpu
case 'H':
switch (command.cmd) {
case 0x1:
return Wrap1(&nvhost_gpu::SetNVMAPfd, input, output);
return WrapFixed(this, &nvhost_gpu::SetNVMAPfd, input, output);
case 0x3:
return Wrap1(&nvhost_gpu::ChannelSetTimeout, input, output);
return WrapFixed(this, &nvhost_gpu::ChannelSetTimeout, input, output);
case 0x8:
return SubmitGPFIFOBase1(input, false);
return WrapFixedVariable(this, &nvhost_gpu::SubmitGPFIFOBase1, input, output, false);
case 0x9:
return Wrap1(&nvhost_gpu::AllocateObjectContext, input, output);
return WrapFixed(this, &nvhost_gpu::AllocateObjectContext, input, output);
case 0xb:
return Wrap1(&nvhost_gpu::ZCullBind, input, output);
return WrapFixed(this, &nvhost_gpu::ZCullBind, input, output);
case 0xc:
return Wrap1(&nvhost_gpu::SetErrorNotifier, input, output);
return WrapFixed(this, &nvhost_gpu::SetErrorNotifier, input, output);
case 0xd:
return Wrap1(&nvhost_gpu::SetChannelPriority, input, output);
return WrapFixed(this, &nvhost_gpu::SetChannelPriority, input, output);
case 0x1a:
return Wrap1(&nvhost_gpu::AllocGPFIFOEx2, input, output);
return WrapFixed(this, &nvhost_gpu::AllocGPFIFOEx2, input, output);
case 0x1b:
return SubmitGPFIFOBase1(input, true);
return WrapFixedVariable(this, &nvhost_gpu::SubmitGPFIFOBase1, input, output, true);
case 0x1d:
return Wrap1(&nvhost_gpu::ChannelSetTimeslice, input, output);
return WrapFixed(this, &nvhost_gpu::ChannelSetTimeslice, input, output);
default:
break;
}
@ -87,9 +87,9 @@ NvResult nvhost_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> inpu
case 'G':
switch (command.cmd) {
case 0x14:
return Wrap1(&nvhost_gpu::SetClientData, input, output);
return WrapFixed(this, &nvhost_gpu::SetClientData, input, output);
case 0x15:
return Wrap1(&nvhost_gpu::GetClientData, input, output);
return WrapFixed(this, &nvhost_gpu::GetClientData, input, output);
default:
break;
}
@ -105,7 +105,8 @@ NvResult nvhost_gpu::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> inpu
case 'H':
switch (command.cmd) {
case 0x1b:
return SubmitGPFIFOBase2(input, inline_input);
return WrapFixedInlIn(this, &nvhost_gpu::SubmitGPFIFOBase2, input, inline_input,
output);
}
break;
}
@ -271,36 +272,35 @@ NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, Tegra::CommandL
return NvResult::Success;
}
NvResult nvhost_gpu::SubmitGPFIFOBase1(std::span<const u8> input, bool kickoff) {
if (input.size() < sizeof(IoctlSubmitGpfifo)) {
NvResult nvhost_gpu::SubmitGPFIFOBase1(IoctlSubmitGpfifo& params,
std::span<Tegra::CommandListHeader> commands, bool kickoff) {
if (params.num_entries > commands.size()) {
UNIMPLEMENTED();
return NvResult::InvalidSize;
}
IoctlSubmitGpfifo params{};
std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo));
Tegra::CommandList entries(params.num_entries);
Tegra::CommandList entries(params.num_entries);
if (kickoff) {
system.ApplicationMemory().ReadBlock(params.address, entries.command_lists.data(),
params.num_entries * sizeof(Tegra::CommandListHeader));
} else {
std::memcpy(entries.command_lists.data(), &input[sizeof(IoctlSubmitGpfifo)],
std::memcpy(entries.command_lists.data(), commands.data(),
params.num_entries * sizeof(Tegra::CommandListHeader));
}
return SubmitGPFIFOImpl(params, std::move(entries));
}
NvResult nvhost_gpu::SubmitGPFIFOBase2(std::span<const u8> input,
std::span<const u8> input_inline) {
if (input.size() < sizeof(IoctlSubmitGpfifo)) {
NvResult nvhost_gpu::SubmitGPFIFOBase2(IoctlSubmitGpfifo& params,
std::span<const Tegra::CommandListHeader> commands) {
if (params.num_entries > commands.size()) {
UNIMPLEMENTED();
return NvResult::InvalidSize;
}
IoctlSubmitGpfifo params{};
std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo));
Tegra::CommandList entries(params.num_entries);
std::memcpy(entries.command_lists.data(), input_inline.data(), input_inline.size());
std::memcpy(entries.command_lists.data(), commands.data(),
params.num_entries * sizeof(Tegra::CommandListHeader));
return SubmitGPFIFOImpl(params, std::move(entries));
}

View file

@ -196,8 +196,10 @@ private:
NvResult AllocateObjectContext(IoctlAllocObjCtx& params);
NvResult SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, Tegra::CommandList&& entries);
NvResult SubmitGPFIFOBase1(std::span<const u8> input, bool kickoff = false);
NvResult SubmitGPFIFOBase2(std::span<const u8> input, std::span<const u8> input_inline);
NvResult SubmitGPFIFOBase1(IoctlSubmitGpfifo& params,
std::span<Tegra::CommandListHeader> commands, bool kickoff = false);
NvResult SubmitGPFIFOBase2(IoctlSubmitGpfifo& params,
std::span<const Tegra::CommandListHeader> commands);
NvResult GetWaitbase(IoctlGetWaitbase& params);
NvResult ChannelSetTimeout(IoctlChannelSetTimeout& params);

View file

@ -26,18 +26,18 @@ NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> in
if (!host1x_file.fd_to_id.contains(fd)) {
host1x_file.fd_to_id[fd] = host1x_file.nvdec_next_id++;
}
return Submit(fd, input, output);
return WrapFixedVariable(this, &nvhost_nvdec::Submit, input, output, fd);
}
case 0x2:
return Wrap1(&nvhost_nvdec::GetSyncpoint, input, output);
return WrapFixed(this, &nvhost_nvdec::GetSyncpoint, input, output);
case 0x3:
return Wrap1(&nvhost_nvdec::GetWaitbase, input, output);
return WrapFixed(this, &nvhost_nvdec::GetWaitbase, input, output);
case 0x7:
return Wrap1(&nvhost_nvdec::SetSubmitTimeout, input, output);
return WrapFixed(this, &nvhost_nvdec::SetSubmitTimeout, input, output);
case 0x9:
return MapBuffer(input, output);
return WrapFixedVariable(this, &nvhost_nvdec::MapBuffer, input, output);
case 0xa:
return UnmapBuffer(input, output);
return WrapFixedVariable(this, &nvhost_nvdec::UnmapBuffer, input, output);
default:
break;
}
@ -45,7 +45,7 @@ NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> in
case 'H':
switch (command.cmd) {
case 0x1:
return Wrap1(&nvhost_nvdec::SetNVMAPfd, input, output);
return WrapFixed(this, &nvhost_nvdec::SetNVMAPfd, input, output);
default:
break;
}

View file

@ -76,13 +76,7 @@ NvResult nvhost_nvdec_common::SetNVMAPfd(IoctlSetNvmapFD& params) {
return NvResult::Success;
}
NvResult nvhost_nvdec_common::Submit(DeviceFD fd, std::span<const u8> input, std::span<u8> output) {
if (input.size() < sizeof(IoctlSubmit) || output.size() < sizeof(IoctlSubmit)) {
UNIMPLEMENTED();
return NvResult::InvalidSize;
}
IoctlSubmit params{};
std::memcpy(&params, input.data(), std::min(input.size(), sizeof(IoctlSubmit)));
NvResult nvhost_nvdec_common::Submit(IoctlSubmit& params, std::span<u8> data, DeviceFD fd) {
LOG_DEBUG(Service_NVDRV, "called NVDEC Submit, cmd_buffer_count={}", params.cmd_buffer_count);
// Instantiate param buffers
@ -93,12 +87,12 @@ NvResult nvhost_nvdec_common::Submit(DeviceFD fd, std::span<const u8> input, std
std::vector<u32> fence_thresholds(params.fence_count);
// Slice input into their respective buffers
std::size_t offset = sizeof(IoctlSubmit);
offset += SliceVectors(input, command_buffers, params.cmd_buffer_count, offset);
offset += SliceVectors(input, relocs, params.relocation_count, offset);
offset += SliceVectors(input, reloc_shifts, params.relocation_count, offset);
offset += SliceVectors(input, syncpt_increments, params.syncpoint_count, offset);
offset += SliceVectors(input, fence_thresholds, params.fence_count, offset);
std::size_t offset = 0;
offset += SliceVectors(data, command_buffers, params.cmd_buffer_count, offset);
offset += SliceVectors(data, relocs, params.relocation_count, offset);
offset += SliceVectors(data, reloc_shifts, params.relocation_count, offset);
offset += SliceVectors(data, syncpt_increments, params.syncpoint_count, offset);
offset += SliceVectors(data, fence_thresholds, params.fence_count, offset);
auto& gpu = system.GPU();
if (gpu.UseNvdec()) {
@ -116,14 +110,13 @@ NvResult nvhost_nvdec_common::Submit(DeviceFD fd, std::span<const u8> input, std
cmdlist.size() * sizeof(u32));
gpu.PushCommandBuffer(core.Host1xDeviceFile().fd_to_id[fd], cmdlist);
}
std::memcpy(output.data(), &params, sizeof(IoctlSubmit));
// Some games expect command_buffers to be written back
offset = sizeof(IoctlSubmit);
offset += WriteVectors(output, command_buffers, offset);
offset += WriteVectors(output, relocs, offset);
offset += WriteVectors(output, reloc_shifts, offset);
offset += WriteVectors(output, syncpt_increments, offset);
offset += WriteVectors(output, fence_thresholds, offset);
offset = 0;
offset += WriteVectors(data, command_buffers, offset);
offset += WriteVectors(data, relocs, offset);
offset += WriteVectors(data, reloc_shifts, offset);
offset += WriteVectors(data, syncpt_increments, offset);
offset += WriteVectors(data, fence_thresholds, offset);
return NvResult::Success;
}
@ -140,40 +133,24 @@ NvResult nvhost_nvdec_common::GetWaitbase(IoctlGetWaitbase& params) {
return NvResult::Success;
}
NvResult nvhost_nvdec_common::MapBuffer(std::span<const u8> input, std::span<u8> output) {
IoctlMapBuffer params{};
std::memcpy(&params, input.data(), sizeof(IoctlMapBuffer));
std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries);
SliceVectors(input, cmd_buffer_handles, params.num_entries, sizeof(IoctlMapBuffer));
for (auto& cmd_buffer : cmd_buffer_handles) {
cmd_buffer.map_address = nvmap.PinHandle(cmd_buffer.map_handle);
NvResult nvhost_nvdec_common::MapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries) {
const size_t num_entries = std::min(params.num_entries, static_cast<u32>(entries.size()));
for (size_t i = 0; i < num_entries; i++) {
entries[i].map_address = nvmap.PinHandle(entries[i].map_handle);
}
if (output.size() <
sizeof(IoctlMapBuffer) + cmd_buffer_handles.size() * sizeof(MapBufferEntry)) {
return NvResult::InvalidSize;
}
std::memcpy(output.data(), &params, sizeof(IoctlMapBuffer));
std::memcpy(output.data() + sizeof(IoctlMapBuffer), cmd_buffer_handles.data(),
cmd_buffer_handles.size() * sizeof(MapBufferEntry));
return NvResult::Success;
}
NvResult nvhost_nvdec_common::UnmapBuffer(std::span<const u8> input, std::span<u8> output) {
IoctlMapBuffer params{};
std::memcpy(&params, input.data(), std::min(input.size(), sizeof(IoctlMapBuffer)));
std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries);
SliceVectors(input, cmd_buffer_handles, params.num_entries, sizeof(IoctlMapBuffer));
for (auto& cmd_buffer : cmd_buffer_handles) {
nvmap.UnpinHandle(cmd_buffer.map_handle);
NvResult nvhost_nvdec_common::UnmapBuffer(IoctlMapBuffer& params,
std::span<MapBufferEntry> entries) {
const size_t num_entries = std::min(params.num_entries, static_cast<u32>(entries.size()));
for (size_t i = 0; i < num_entries; i++) {
nvmap.UnpinHandle(entries[i].map_handle);
entries[i] = {};
}
std::memset(output.data(), 0, output.size());
params = {};
return NvResult::Success;
}

View file

@ -108,11 +108,11 @@ protected:
/// Ioctl command implementations
NvResult SetNVMAPfd(IoctlSetNvmapFD&);
NvResult Submit(DeviceFD fd, std::span<const u8> input, std::span<u8> output);
NvResult Submit(IoctlSubmit& params, std::span<u8> input, DeviceFD fd);
NvResult GetSyncpoint(IoctlGetSyncpoint& params);
NvResult GetWaitbase(IoctlGetWaitbase& params);
NvResult MapBuffer(std::span<const u8> input, std::span<u8> output);
NvResult UnmapBuffer(std::span<const u8> input, std::span<u8> output);
NvResult MapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries);
NvResult UnmapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries);
NvResult SetSubmitTimeout(u32 timeout);
Kernel::KEvent* QueryEvent(u32 event_id) override;

View file

@ -19,7 +19,7 @@ NvResult nvhost_nvjpg::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> in
case 'H':
switch (command.cmd) {
case 0x1:
return Wrap1(&nvhost_nvjpg::SetNVMAPfd, input, output);
return WrapFixed(this, &nvhost_nvjpg::SetNVMAPfd, input, output);
default:
break;
}

View file

@ -26,16 +26,16 @@ NvResult nvhost_vic::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> inpu
if (!host1x_file.fd_to_id.contains(fd)) {
host1x_file.fd_to_id[fd] = host1x_file.vic_next_id++;
}
return Submit(fd, input, output);
return WrapFixedVariable(this, &nvhost_vic::Submit, input, output, fd);
}
case 0x2:
return Wrap1(&nvhost_vic::GetSyncpoint, input, output);
return WrapFixed(this, &nvhost_vic::GetSyncpoint, input, output);
case 0x3:
return Wrap1(&nvhost_vic::GetWaitbase, input, output);
return WrapFixed(this, &nvhost_vic::GetWaitbase, input, output);
case 0x9:
return MapBuffer(input, output);
return WrapFixedVariable(this, &nvhost_vic::MapBuffer, input, output);
case 0xa:
return UnmapBuffer(input, output);
return WrapFixedVariable(this, &nvhost_vic::UnmapBuffer, input, output);
default:
break;
}
@ -43,7 +43,7 @@ NvResult nvhost_vic::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> inpu
case 'H':
switch (command.cmd) {
case 0x1:
return Wrap1(&nvhost_vic::SetNVMAPfd, input, output);
return WrapFixed(this, &nvhost_vic::SetNVMAPfd, input, output);
default:
break;
}

View file

@ -32,17 +32,17 @@ NvResult nvmap::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
case 0x1:
switch (command.cmd) {
case 0x1:
return Wrap1(&nvmap::IocCreate, input, output);
return WrapFixed(this, &nvmap::IocCreate, input, output);
case 0x3:
return Wrap1(&nvmap::IocFromId, input, output);
return WrapFixed(this, &nvmap::IocFromId, input, output);
case 0x4:
return Wrap1(&nvmap::IocAlloc, input, output);
return WrapFixed(this, &nvmap::IocAlloc, input, output);
case 0x5:
return Wrap1(&nvmap::IocFree, input, output);
return WrapFixed(this, &nvmap::IocFree, input, output);
case 0x9:
return Wrap1(&nvmap::IocParam, input, output);
return WrapFixed(this, &nvmap::IocParam, input, output);
case 0xe:
return Wrap1(&nvmap::IocGetId, input, output);
return WrapFixed(this, &nvmap::IocGetId, input, output);
default:
break;
}