nvdrv: rework to remove memcpy

This commit is contained in:
Liam 2023-10-25 00:34:40 -04:00
parent 94b7ac50bb
commit 723df0f368
16 changed files with 243 additions and 225 deletions

View file

@ -11,97 +11,149 @@
namespace Service::Nvidia::Devices { namespace Service::Nvidia::Devices {
struct Ioctl1Traits { struct IoctlOneArgTraits {
template <typename T, typename R, typename A> template <typename T, typename R, typename A, typename... B>
static T GetClassImpl(R (T::*)(A)); static A GetFirstArgImpl(R (T::*)(A, B...));
template <typename T, typename R, typename A>
static A GetArgImpl(R (T::*)(A));
}; };
struct Ioctl23Traits { struct IoctlTwoArgTraits {
template <typename T, typename R, typename A, typename B> template <typename T, typename R, typename A, typename B, typename... C>
static T GetClassImpl(R (T::*)(A, B)); static A GetFirstArgImpl(R (T::*)(A, B, C...));
template <typename T, typename R, typename A, typename B> template <typename T, typename R, typename A, typename B, typename... C>
static A GetArgImpl(R (T::*)(A, B)); static B GetSecondArgImpl(R (T::*)(A, B, C...));
}; };
template <typename T> struct Null {};
struct ContainerType {
using ValueType = T;
};
template <Common::IsContiguousContainer T> // clang-format off
struct ContainerType<T> {
using ValueType = T::value_type;
};
template <typename InnerArg, typename F, typename Self, typename... Rest> template <typename FixedArg, typename VarArg, typename InlInVarArg, typename InlOutVarArg, typename F>
NvResult Wrap(std::span<const u8> input, std::span<u8> output, Self* self, F&& callable, NvResult WrapGeneric(F&& callable, std::span<const u8> input, std::span<const u8> inline_input, std::span<u8> output, std::span<u8> inline_output) {
Rest&&... rest) { constexpr bool HasFixedArg = !std::is_same_v<FixedArg, Null>;
using Arg = ContainerType<InnerArg>::ValueType; constexpr bool HasVarArg = !std::is_same_v<VarArg, Null>;
constexpr bool ArgumentIsContainer = Common::IsContiguousContainer<InnerArg>; constexpr bool HasInlInVarArg = !std::is_same_v<InlInVarArg, Null>;
constexpr bool HasInlOutVarArg = !std::is_same_v<InlOutVarArg, Null>;
// Verify that the input and output sizes are valid. // Declare the fixed-size input value.
const size_t in_params = input.size() / sizeof(Arg); FixedArg fixed{};
const size_t out_params = output.size() / sizeof(Arg); size_t var_offset = 0;
if (in_params * sizeof(Arg) != input.size()) {
return NvResult::InvalidSize; if constexpr (HasFixedArg) {
} // Read the fixed-size input value.
if (out_params * sizeof(Arg) != output.size()) { var_offset = std::min(sizeof(FixedArg), input.size());
return NvResult::InvalidSize; if (var_offset > 0) {
} std::memcpy(&fixed, input.data(), var_offset);
if (in_params == 0 && out_params == 0 && !ArgumentIsContainer) { }
return NvResult::InvalidSize;
} }
// Copy inputs, if needed. // Read the variable-sized inputs.
std::vector<Arg> params(std::max(in_params, out_params)); const size_t num_var_args = HasVarArg ? ((input.size() - var_offset) / sizeof(VarArg)) : 0;
if (in_params > 0) { std::vector<VarArg> var_args(num_var_args);
std::memcpy(params.data(), input.data(), input.size()); if constexpr (HasVarArg) {
if (num_var_args > 0) {
std::memcpy(var_args.data(), input.data() + var_offset, num_var_args * sizeof(VarArg));
}
} }
const size_t num_inl_in_var_args = HasInlInVarArg ? (inline_input.size() / sizeof(InlInVarArg)) : 0;
std::vector<InlInVarArg> inl_in_var_args(num_inl_in_var_args);
if constexpr (HasInlInVarArg) {
if (num_inl_in_var_args > 0) {
std::memcpy(inl_in_var_args.data(), inline_input.data(), num_inl_in_var_args * sizeof(InlInVarArg));
}
}
// Construct inline output data.
const size_t num_inl_out_var_args = HasInlOutVarArg ? (inline_output.size() / sizeof(InlOutVarArg)) : 0;
std::vector<InlOutVarArg> inl_out_var_args(num_inl_out_var_args);
// Perform the call. // Perform the call.
NvResult result; NvResult result = callable(fixed, var_args, inl_in_var_args, inl_out_var_args);
if constexpr (ArgumentIsContainer) {
result = (self->*callable)(params, std::forward<Rest>(rest)...); // Copy outputs.
} else { if constexpr (HasFixedArg) {
result = (self->*callable)(params.front(), std::forward<Rest>(rest)...); if (output.size() > 0) {
std::memcpy(output.data(), &fixed, std::min(output.size(), sizeof(FixedArg)));
}
} }
// Copy outputs, if needed. if constexpr (HasVarArg) {
if (out_params > 0) { if (num_var_args > 0 && output.size() > var_offset) {
std::memcpy(output.data(), params.data(), output.size()); const size_t max_var_size = output.size() - var_offset;
std::memcpy(output.data() + var_offset, var_args.data(), std::min(max_var_size, num_var_args * sizeof(VarArg)));
}
} }
// Copy inline outputs.
if constexpr (HasInlOutVarArg) {
if (num_inl_out_var_args > 0) {
std::memcpy(inline_output.data(), inl_out_var_args.data(), num_inl_out_var_args * sizeof(InlOutVarArg));
}
}
// We're done.
return result; return result;
} }
template <typename F> template <typename Self, typename F, typename... Rest>
NvResult nvdevice::Wrap1(F&& callable, std::span<const u8> input, std::span<u8> output) { NvResult WrapFixed(Self* self, F&& callable, std::span<const u8> input, std::span<u8> output, Rest&&... rest) {
using Self = decltype(Ioctl1Traits::GetClassImpl(callable)); using FixedArg = typename std::remove_reference_t<decltype(IoctlOneArgTraits::GetFirstArgImpl(callable))>;
using InnerArg = std::remove_reference_t<decltype(Ioctl1Traits::GetArgImpl(callable))>;
return Wrap<InnerArg>(input, output, static_cast<Self*>(this), callable); const auto Callable = [&](auto& fixed, auto& var, auto& inl_in, auto& inl_out) -> NvResult {
return (self->*callable)(fixed, std::forward<Rest>(rest)...);
};
return WrapGeneric<FixedArg, Null, Null, Null>(std::move(Callable), input, {}, output, {});
} }
template <typename F> template <typename Self, typename F, typename... Rest>
NvResult nvdevice::Wrap2(F&& callable, std::span<const u8> input, std::span<const u8> inline_input, NvResult WrapFixedInlOut(Self* self, F&& callable, std::span<const u8> input, std::span<u8> output, std::span<u8> inline_output, Rest&&... rest) {
std::span<u8> output) { using FixedArg = typename std::remove_reference_t<decltype(IoctlTwoArgTraits::GetFirstArgImpl(callable))>;
using Self = decltype(Ioctl23Traits::GetClassImpl(callable)); using InlOutVarArg = typename std::remove_reference_t<decltype(IoctlTwoArgTraits::GetSecondArgImpl(callable))>::value_type;
using InnerArg = std::remove_reference_t<decltype(Ioctl23Traits::GetArgImpl(callable))>;
return Wrap<InnerArg>(input, output, static_cast<Self*>(this), callable, inline_input); const auto Callable = [&](auto& fixed, auto& var, auto& inl_in, auto& inl_out) -> NvResult {
return (self->*callable)(fixed, inl_out, std::forward<Rest>(rest)...);
};
return WrapGeneric<FixedArg, Null, Null, InlOutVarArg>(std::move(Callable), input, {}, output, inline_output);
} }
template <typename F> template <typename Self, typename F, typename... Rest>
NvResult nvdevice::Wrap3(F&& callable, std::span<const u8> input, std::span<u8> output, NvResult WrapVariable(Self* self, F&& callable, std::span<const u8> input, std::span<u8> output, Rest&&... rest) {
std::span<u8> inline_output) { using VarArg = typename std::remove_reference_t<decltype(IoctlOneArgTraits::GetFirstArgImpl(callable))>::value_type;
using Self = decltype(Ioctl23Traits::GetClassImpl(callable));
using InnerArg = std::remove_reference_t<decltype(Ioctl23Traits::GetArgImpl(callable))>;
return Wrap<InnerArg>(input, output, static_cast<Self*>(this), callable, inline_output); const auto Callable = [&](auto& fixed, auto& var, auto& inl_in, auto& inl_out) -> NvResult {
return (self->*callable)(var, std::forward<Rest>(rest)...);
};
return WrapGeneric<Null, VarArg, Null, Null>(std::move(Callable), input, {}, output, {});
} }
template <typename Self, typename F, typename... Rest>
NvResult WrapFixedVariable(Self* self, F&& callable, std::span<const u8> input, std::span<u8> output, Rest&&... rest) {
using FixedArg = typename std::remove_reference_t<decltype(IoctlTwoArgTraits::GetFirstArgImpl(callable))>;
using VarArg = typename std::remove_reference_t<decltype(IoctlTwoArgTraits::GetSecondArgImpl(callable))>::value_type;
const auto Callable = [&](auto& fixed, auto& var, auto& inl_in, auto& inl_out) -> NvResult {
return (self->*callable)(fixed, var, std::forward<Rest>(rest)...);
};
return WrapGeneric<FixedArg, VarArg, Null, Null>(std::move(Callable), input, {}, output, {});
}
template <typename Self, typename F, typename... Rest>
NvResult WrapFixedInlIn(Self* self, F&& callable, std::span<const u8> input, std::span<const u8> inline_input, std::span<u8> output, Rest&&... rest) {
using FixedArg = typename std::remove_reference_t<decltype(IoctlTwoArgTraits::GetFirstArgImpl(callable))>;
using InlInVarArg = typename std::remove_reference_t<decltype(IoctlTwoArgTraits::GetSecondArgImpl(callable))>::value_type;
const auto Callable = [&](auto& fixed, auto& var, auto& inl_in, auto& inl_out) -> NvResult {
return (self->*callable)(fixed, inl_in, std::forward<Rest>(rest)...);
};
return WrapGeneric<FixedArg, Null, InlInVarArg, Null>(std::move(Callable), input, inline_input, output, {});
}
// clang-format on
} // namespace Service::Nvidia::Devices } // namespace Service::Nvidia::Devices

View file

@ -74,18 +74,6 @@ public:
return nullptr; return nullptr;
} }
protected:
template <typename F>
NvResult Wrap1(F&& callable, std::span<const u8> input, std::span<u8> output);
template <typename F>
NvResult Wrap2(F&& callable, std::span<const u8> input, std::span<const u8> inline_input,
std::span<u8> output);
template <typename F>
NvResult Wrap3(F&& callable, std::span<const u8> input, std::span<u8> output,
std::span<u8> inline_output);
protected: protected:
Core::System& system; Core::System& system;
}; };

View file

@ -34,21 +34,21 @@ NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> i
case 'A': case 'A':
switch (command.cmd) { switch (command.cmd) {
case 0x1: case 0x1:
return Wrap1(&nvhost_as_gpu::BindChannel, input, output); return WrapFixed(this, &nvhost_as_gpu::BindChannel, input, output);
case 0x2: case 0x2:
return Wrap1(&nvhost_as_gpu::AllocateSpace, input, output); return WrapFixed(this, &nvhost_as_gpu::AllocateSpace, input, output);
case 0x3: case 0x3:
return Wrap1(&nvhost_as_gpu::FreeSpace, input, output); return WrapFixed(this, &nvhost_as_gpu::FreeSpace, input, output);
case 0x5: case 0x5:
return Wrap1(&nvhost_as_gpu::UnmapBuffer, input, output); return WrapFixed(this, &nvhost_as_gpu::UnmapBuffer, input, output);
case 0x6: case 0x6:
return Wrap1(&nvhost_as_gpu::MapBufferEx, input, output); return WrapFixed(this, &nvhost_as_gpu::MapBufferEx, input, output);
case 0x8: case 0x8:
return Wrap1(&nvhost_as_gpu::GetVARegions1, input, output); return WrapFixed(this, &nvhost_as_gpu::GetVARegions1, input, output);
case 0x9: case 0x9:
return Wrap1(&nvhost_as_gpu::AllocAsEx, input, output); return WrapFixed(this, &nvhost_as_gpu::AllocAsEx, input, output);
case 0x14: case 0x14:
return Wrap1(&nvhost_as_gpu::Remap, input, output); return WrapVariable(this, &nvhost_as_gpu::Remap, input, output);
default: default:
break; break;
} }
@ -73,7 +73,8 @@ NvResult nvhost_as_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> i
case 'A': case 'A':
switch (command.cmd) { switch (command.cmd) {
case 0x8: case 0x8:
return Wrap3(&nvhost_as_gpu::GetVARegions3, input, output, inline_output); return WrapFixedInlOut(this, &nvhost_as_gpu::GetVARegions3, input, output,
inline_output);
default: default:
break; break;
} }
@ -482,7 +483,7 @@ NvResult nvhost_as_gpu::GetVARegions1(IoctlGetVaRegions& params) {
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_as_gpu::GetVARegions3(IoctlGetVaRegions& params, std::span<u8> inline_output) { NvResult nvhost_as_gpu::GetVARegions3(IoctlGetVaRegions& params, std::span<VaRegion> regions) {
LOG_DEBUG(Service_NVDRV, "called, buf_addr={:X}, buf_size={:X}", params.buf_addr, LOG_DEBUG(Service_NVDRV, "called, buf_addr={:X}, buf_size={:X}", params.buf_addr,
params.buf_size); params.buf_size);
@ -494,7 +495,10 @@ NvResult nvhost_as_gpu::GetVARegions3(IoctlGetVaRegions& params, std::span<u8> i
GetVARegionsImpl(params); GetVARegionsImpl(params);
std::memcpy(inline_output.data(), params.regions.data(), 2 * sizeof(VaRegion)); const size_t num_regions = std::min(params.regions.size(), regions.size());
for (size_t i = 0; i < num_regions; i++) {
regions[i] = params.regions[i];
}
return NvResult::Success; return NvResult::Success;
} }

View file

@ -149,7 +149,7 @@ private:
void GetVARegionsImpl(IoctlGetVaRegions& params); void GetVARegionsImpl(IoctlGetVaRegions& params);
NvResult GetVARegions1(IoctlGetVaRegions& params); NvResult GetVARegions1(IoctlGetVaRegions& params);
NvResult GetVARegions3(IoctlGetVaRegions& params, std::span<u8> inline_output); NvResult GetVARegions3(IoctlGetVaRegions& params, std::span<VaRegion> regions);
void FreeMappingLocked(u64 offset); void FreeMappingLocked(u64 offset);

View file

@ -41,19 +41,19 @@ NvResult nvhost_ctrl::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> inp
case 0x0: case 0x0:
switch (command.cmd) { switch (command.cmd) {
case 0x1b: case 0x1b:
return Wrap1(&nvhost_ctrl::NvOsGetConfigU32, input, output); return WrapFixed(this, &nvhost_ctrl::NvOsGetConfigU32, input, output);
case 0x1c: case 0x1c:
return Wrap1(&nvhost_ctrl::IocCtrlClearEventWait, input, output); return WrapFixed(this, &nvhost_ctrl::IocCtrlClearEventWait, input, output);
case 0x1d: case 0x1d:
return Wrap1(&nvhost_ctrl::IocCtrlEventWaitWithAllocation, input, output); return WrapFixed(this, &nvhost_ctrl::IocCtrlEventWait, input, output, true);
case 0x1e: case 0x1e:
return Wrap1(&nvhost_ctrl::IocCtrlEventWaitNotAllocation, input, output); return WrapFixed(this, &nvhost_ctrl::IocCtrlEventWait, input, output, false);
case 0x1f: case 0x1f:
return Wrap1(&nvhost_ctrl::IocCtrlEventRegister, input, output); return WrapFixed(this, &nvhost_ctrl::IocCtrlEventRegister, input, output);
case 0x20: case 0x20:
return Wrap1(&nvhost_ctrl::IocCtrlEventUnregister, input, output); return WrapFixed(this, &nvhost_ctrl::IocCtrlEventUnregister, input, output);
case 0x21: case 0x21:
return Wrap1(&nvhost_ctrl::IocCtrlEventUnregisterBatch, input, output); return WrapFixed(this, &nvhost_ctrl::IocCtrlEventUnregisterBatch, input, output);
} }
break; break;
default: default:
@ -86,7 +86,7 @@ NvResult nvhost_ctrl::NvOsGetConfigU32(IocGetConfigParams& params) {
return NvResult::ConfigVarNotFound; // Returns error on production mode return NvResult::ConfigVarNotFound; // Returns error on production mode
} }
NvResult nvhost_ctrl::IocCtrlEventWaitImpl(IocCtrlEventWaitParams& params, bool is_allocation) { NvResult nvhost_ctrl::IocCtrlEventWait(IocCtrlEventWaitParams& params, bool is_allocation) {
LOG_DEBUG(Service_NVDRV, "syncpt_id={}, threshold={}, timeout={}, is_allocation={}", LOG_DEBUG(Service_NVDRV, "syncpt_id={}, threshold={}, timeout={}, is_allocation={}",
params.fence.id, params.fence.value, params.timeout, is_allocation); params.fence.id, params.fence.value, params.timeout, is_allocation);

View file

@ -190,20 +190,11 @@ private:
NvResult IocCtrlEventRegister(IocCtrlEventRegisterParams& params); NvResult IocCtrlEventRegister(IocCtrlEventRegisterParams& params);
NvResult IocCtrlEventUnregister(IocCtrlEventUnregisterParams& params); NvResult IocCtrlEventUnregister(IocCtrlEventUnregisterParams& params);
NvResult IocCtrlEventUnregisterBatch(IocCtrlEventUnregisterBatchParams& params); NvResult IocCtrlEventUnregisterBatch(IocCtrlEventUnregisterBatchParams& params);
NvResult IocCtrlEventWait(IocCtrlEventWaitParams& params, bool is_allocation);
NvResult IocCtrlClearEventWait(IocCtrlEventClearParams& params); NvResult IocCtrlClearEventWait(IocCtrlEventClearParams& params);
NvResult FreeEvent(u32 slot); NvResult FreeEvent(u32 slot);
// TODO: these are not the correct names
NvResult IocCtrlEventWaitNotAllocation(IocCtrlEventWaitParams& params) {
return this->IocCtrlEventWaitImpl(params, false);
}
NvResult IocCtrlEventWaitWithAllocation(IocCtrlEventWaitParams& params) {
return this->IocCtrlEventWaitImpl(params, true);
}
NvResult IocCtrlEventWaitImpl(IocCtrlEventWaitParams& params, bool is_allocation);
EventInterface& events_interface; EventInterface& events_interface;
NvCore::Container& core; NvCore::Container& core;
NvCore::SyncpointManager& syncpoint_manager; NvCore::SyncpointManager& syncpoint_manager;

View file

@ -28,23 +28,23 @@ NvResult nvhost_ctrl_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8>
case 'G': case 'G':
switch (command.cmd) { switch (command.cmd) {
case 0x1: case 0x1:
return Wrap1(&nvhost_ctrl_gpu::ZCullGetCtxSize, input, output); return WrapFixed(this, &nvhost_ctrl_gpu::ZCullGetCtxSize, input, output);
case 0x2: case 0x2:
return Wrap1(&nvhost_ctrl_gpu::ZCullGetInfo, input, output); return WrapFixed(this, &nvhost_ctrl_gpu::ZCullGetInfo, input, output);
case 0x3: case 0x3:
return Wrap1(&nvhost_ctrl_gpu::ZBCSetTable, input, output); return WrapFixed(this, &nvhost_ctrl_gpu::ZBCSetTable, input, output);
case 0x4: case 0x4:
return Wrap1(&nvhost_ctrl_gpu::ZBCQueryTable, input, output); return WrapFixed(this, &nvhost_ctrl_gpu::ZBCQueryTable, input, output);
case 0x5: case 0x5:
return Wrap1(&nvhost_ctrl_gpu::GetCharacteristics1, input, output); return WrapFixed(this, &nvhost_ctrl_gpu::GetCharacteristics1, input, output);
case 0x6: case 0x6:
return Wrap1(&nvhost_ctrl_gpu::GetTPCMasks1, input, output); return WrapFixed(this, &nvhost_ctrl_gpu::GetTPCMasks1, input, output);
case 0x7: case 0x7:
return Wrap1(&nvhost_ctrl_gpu::FlushL2, input, output); return WrapFixed(this, &nvhost_ctrl_gpu::FlushL2, input, output);
case 0x14: case 0x14:
return Wrap1(&nvhost_ctrl_gpu::GetActiveSlotMask, input, output); return WrapFixed(this, &nvhost_ctrl_gpu::GetActiveSlotMask, input, output);
case 0x1c: case 0x1c:
return Wrap1(&nvhost_ctrl_gpu::GetGpuTime, input, output); return WrapFixed(this, &nvhost_ctrl_gpu::GetGpuTime, input, output);
default: default:
break; break;
} }
@ -66,9 +66,11 @@ NvResult nvhost_ctrl_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8>
case 'G': case 'G':
switch (command.cmd) { switch (command.cmd) {
case 0x5: case 0x5:
return Wrap3(&nvhost_ctrl_gpu::GetCharacteristics3, input, output, inline_output); return WrapFixedInlOut(this, &nvhost_ctrl_gpu::GetCharacteristics3, input, output,
inline_output);
case 0x6: case 0x6:
return Wrap3(&nvhost_ctrl_gpu::GetTPCMasks3, input, output, inline_output); return WrapFixedInlOut(this, &nvhost_ctrl_gpu::GetTPCMasks3, input, output,
inline_output);
default: default:
break; break;
} }
@ -125,8 +127,8 @@ NvResult nvhost_ctrl_gpu::GetCharacteristics1(IoctlCharacteristics& params) {
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_ctrl_gpu::GetCharacteristics3(IoctlCharacteristics& params, NvResult nvhost_ctrl_gpu::GetCharacteristics3(
std::span<u8> inline_output) { IoctlCharacteristics& params, std::span<IoctlGpuCharacteristics> gpu_characteristics) {
LOG_DEBUG(Service_NVDRV, "called"); LOG_DEBUG(Service_NVDRV, "called");
params.gc.arch = 0x120; params.gc.arch = 0x120;
@ -166,8 +168,9 @@ NvResult nvhost_ctrl_gpu::GetCharacteristics3(IoctlCharacteristics& params,
params.gc.gr_compbit_store_base_hw = 0x0; params.gc.gr_compbit_store_base_hw = 0x0;
params.gpu_characteristics_buf_size = 0xA0; params.gpu_characteristics_buf_size = 0xA0;
params.gpu_characteristics_buf_addr = 0xdeadbeef; // Cannot be 0 (UNUSED) params.gpu_characteristics_buf_addr = 0xdeadbeef; // Cannot be 0 (UNUSED)
std::memcpy(inline_output.data(), &params.gc, if (!gpu_characteristics.empty()) {
std::min(sizeof(params.gc), inline_output.size())); gpu_characteristics.front() = params.gc;
}
return NvResult::Success; return NvResult::Success;
} }
@ -179,14 +182,14 @@ NvResult nvhost_ctrl_gpu::GetTPCMasks1(IoctlGpuGetTpcMasksArgs& params) {
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_ctrl_gpu::GetTPCMasks3(IoctlGpuGetTpcMasksArgs& params, NvResult nvhost_ctrl_gpu::GetTPCMasks3(IoctlGpuGetTpcMasksArgs& params, std::span<u32> tpc_mask) {
std::span<u8> inline_output) {
LOG_DEBUG(Service_NVDRV, "called, mask_buffer_size=0x{:X}", params.mask_buffer_size); LOG_DEBUG(Service_NVDRV, "called, mask_buffer_size=0x{:X}", params.mask_buffer_size);
if (params.mask_buffer_size != 0) { if (params.mask_buffer_size != 0) {
params.tcp_mask = 3; params.tcp_mask = 3;
} }
std::memcpy(inline_output.data(), &params.tcp_mask, if (!tpc_mask.empty()) {
std::min(sizeof(params.tcp_mask), inline_output.size())); tpc_mask.front() = params.tcp_mask;
}
return NvResult::Success; return NvResult::Success;
} }

View file

@ -152,10 +152,11 @@ private:
static_assert(sizeof(IoctlGetGpuTime) == 0x10, "IoctlGetGpuTime is incorrect size"); static_assert(sizeof(IoctlGetGpuTime) == 0x10, "IoctlGetGpuTime is incorrect size");
NvResult GetCharacteristics1(IoctlCharacteristics& params); NvResult GetCharacteristics1(IoctlCharacteristics& params);
NvResult GetCharacteristics3(IoctlCharacteristics& params, std::span<u8> inline_output); NvResult GetCharacteristics3(IoctlCharacteristics& params,
std::span<IoctlGpuCharacteristics> gpu_characteristics);
NvResult GetTPCMasks1(IoctlGpuGetTpcMasksArgs& params); NvResult GetTPCMasks1(IoctlGpuGetTpcMasksArgs& params);
NvResult GetTPCMasks3(IoctlGpuGetTpcMasksArgs& params, std::span<u8> inline_output); NvResult GetTPCMasks3(IoctlGpuGetTpcMasksArgs& params, std::span<u32> tpc_mask);
NvResult GetActiveSlotMask(IoctlActiveSlotMask& params); NvResult GetActiveSlotMask(IoctlActiveSlotMask& params);
NvResult ZCullGetCtxSize(IoctlZcullGetCtxSize& params); NvResult ZCullGetCtxSize(IoctlZcullGetCtxSize& params);

View file

@ -53,7 +53,7 @@ NvResult nvhost_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> inpu
case 0x0: case 0x0:
switch (command.cmd) { switch (command.cmd) {
case 0x3: case 0x3:
return Wrap1(&nvhost_gpu::GetWaitbase, input, output); return WrapFixed(this, &nvhost_gpu::GetWaitbase, input, output);
default: default:
break; break;
} }
@ -61,25 +61,25 @@ NvResult nvhost_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> inpu
case 'H': case 'H':
switch (command.cmd) { switch (command.cmd) {
case 0x1: case 0x1:
return Wrap1(&nvhost_gpu::SetNVMAPfd, input, output); return WrapFixed(this, &nvhost_gpu::SetNVMAPfd, input, output);
case 0x3: case 0x3:
return Wrap1(&nvhost_gpu::ChannelSetTimeout, input, output); return WrapFixed(this, &nvhost_gpu::ChannelSetTimeout, input, output);
case 0x8: case 0x8:
return SubmitGPFIFOBase1(input, false); return WrapFixedVariable(this, &nvhost_gpu::SubmitGPFIFOBase1, input, output, false);
case 0x9: case 0x9:
return Wrap1(&nvhost_gpu::AllocateObjectContext, input, output); return WrapFixed(this, &nvhost_gpu::AllocateObjectContext, input, output);
case 0xb: case 0xb:
return Wrap1(&nvhost_gpu::ZCullBind, input, output); return WrapFixed(this, &nvhost_gpu::ZCullBind, input, output);
case 0xc: case 0xc:
return Wrap1(&nvhost_gpu::SetErrorNotifier, input, output); return WrapFixed(this, &nvhost_gpu::SetErrorNotifier, input, output);
case 0xd: case 0xd:
return Wrap1(&nvhost_gpu::SetChannelPriority, input, output); return WrapFixed(this, &nvhost_gpu::SetChannelPriority, input, output);
case 0x1a: case 0x1a:
return Wrap1(&nvhost_gpu::AllocGPFIFOEx2, input, output); return WrapFixed(this, &nvhost_gpu::AllocGPFIFOEx2, input, output);
case 0x1b: case 0x1b:
return SubmitGPFIFOBase1(input, true); return WrapFixedVariable(this, &nvhost_gpu::SubmitGPFIFOBase1, input, output, true);
case 0x1d: case 0x1d:
return Wrap1(&nvhost_gpu::ChannelSetTimeslice, input, output); return WrapFixed(this, &nvhost_gpu::ChannelSetTimeslice, input, output);
default: default:
break; break;
} }
@ -87,9 +87,9 @@ NvResult nvhost_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> inpu
case 'G': case 'G':
switch (command.cmd) { switch (command.cmd) {
case 0x14: case 0x14:
return Wrap1(&nvhost_gpu::SetClientData, input, output); return WrapFixed(this, &nvhost_gpu::SetClientData, input, output);
case 0x15: case 0x15:
return Wrap1(&nvhost_gpu::GetClientData, input, output); return WrapFixed(this, &nvhost_gpu::GetClientData, input, output);
default: default:
break; break;
} }
@ -105,7 +105,8 @@ NvResult nvhost_gpu::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> inpu
case 'H': case 'H':
switch (command.cmd) { switch (command.cmd) {
case 0x1b: case 0x1b:
return SubmitGPFIFOBase2(input, inline_input); return WrapFixedInlIn(this, &nvhost_gpu::SubmitGPFIFOBase2, input, inline_input,
output);
} }
break; break;
} }
@ -271,36 +272,35 @@ NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, Tegra::CommandL
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_gpu::SubmitGPFIFOBase1(std::span<const u8> input, bool kickoff) { NvResult nvhost_gpu::SubmitGPFIFOBase1(IoctlSubmitGpfifo& params,
if (input.size() < sizeof(IoctlSubmitGpfifo)) { std::span<Tegra::CommandListHeader> commands, bool kickoff) {
if (params.num_entries > commands.size()) {
UNIMPLEMENTED(); UNIMPLEMENTED();
return NvResult::InvalidSize; return NvResult::InvalidSize;
} }
IoctlSubmitGpfifo params{};
std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo));
Tegra::CommandList entries(params.num_entries);
Tegra::CommandList entries(params.num_entries);
if (kickoff) { if (kickoff) {
system.ApplicationMemory().ReadBlock(params.address, entries.command_lists.data(), system.ApplicationMemory().ReadBlock(params.address, entries.command_lists.data(),
params.num_entries * sizeof(Tegra::CommandListHeader)); params.num_entries * sizeof(Tegra::CommandListHeader));
} else { } else {
std::memcpy(entries.command_lists.data(), &input[sizeof(IoctlSubmitGpfifo)], std::memcpy(entries.command_lists.data(), commands.data(),
params.num_entries * sizeof(Tegra::CommandListHeader)); params.num_entries * sizeof(Tegra::CommandListHeader));
} }
return SubmitGPFIFOImpl(params, std::move(entries)); return SubmitGPFIFOImpl(params, std::move(entries));
} }
NvResult nvhost_gpu::SubmitGPFIFOBase2(std::span<const u8> input, NvResult nvhost_gpu::SubmitGPFIFOBase2(IoctlSubmitGpfifo& params,
std::span<const u8> input_inline) { std::span<const Tegra::CommandListHeader> commands) {
if (input.size() < sizeof(IoctlSubmitGpfifo)) { if (params.num_entries > commands.size()) {
UNIMPLEMENTED(); UNIMPLEMENTED();
return NvResult::InvalidSize; return NvResult::InvalidSize;
} }
IoctlSubmitGpfifo params{};
std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo));
Tegra::CommandList entries(params.num_entries); Tegra::CommandList entries(params.num_entries);
std::memcpy(entries.command_lists.data(), input_inline.data(), input_inline.size()); std::memcpy(entries.command_lists.data(), commands.data(),
params.num_entries * sizeof(Tegra::CommandListHeader));
return SubmitGPFIFOImpl(params, std::move(entries)); return SubmitGPFIFOImpl(params, std::move(entries));
} }

View file

@ -196,8 +196,10 @@ private:
NvResult AllocateObjectContext(IoctlAllocObjCtx& params); NvResult AllocateObjectContext(IoctlAllocObjCtx& params);
NvResult SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, Tegra::CommandList&& entries); NvResult SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, Tegra::CommandList&& entries);
NvResult SubmitGPFIFOBase1(std::span<const u8> input, bool kickoff = false); NvResult SubmitGPFIFOBase1(IoctlSubmitGpfifo& params,
NvResult SubmitGPFIFOBase2(std::span<const u8> input, std::span<const u8> input_inline); std::span<Tegra::CommandListHeader> commands, bool kickoff = false);
NvResult SubmitGPFIFOBase2(IoctlSubmitGpfifo& params,
std::span<const Tegra::CommandListHeader> commands);
NvResult GetWaitbase(IoctlGetWaitbase& params); NvResult GetWaitbase(IoctlGetWaitbase& params);
NvResult ChannelSetTimeout(IoctlChannelSetTimeout& params); NvResult ChannelSetTimeout(IoctlChannelSetTimeout& params);

View file

@ -26,18 +26,18 @@ NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> in
if (!host1x_file.fd_to_id.contains(fd)) { if (!host1x_file.fd_to_id.contains(fd)) {
host1x_file.fd_to_id[fd] = host1x_file.nvdec_next_id++; host1x_file.fd_to_id[fd] = host1x_file.nvdec_next_id++;
} }
return Submit(fd, input, output); return WrapFixedVariable(this, &nvhost_nvdec::Submit, input, output, fd);
} }
case 0x2: case 0x2:
return Wrap1(&nvhost_nvdec::GetSyncpoint, input, output); return WrapFixed(this, &nvhost_nvdec::GetSyncpoint, input, output);
case 0x3: case 0x3:
return Wrap1(&nvhost_nvdec::GetWaitbase, input, output); return WrapFixed(this, &nvhost_nvdec::GetWaitbase, input, output);
case 0x7: case 0x7:
return Wrap1(&nvhost_nvdec::SetSubmitTimeout, input, output); return WrapFixed(this, &nvhost_nvdec::SetSubmitTimeout, input, output);
case 0x9: case 0x9:
return MapBuffer(input, output); return WrapFixedVariable(this, &nvhost_nvdec::MapBuffer, input, output);
case 0xa: case 0xa:
return UnmapBuffer(input, output); return WrapFixedVariable(this, &nvhost_nvdec::UnmapBuffer, input, output);
default: default:
break; break;
} }
@ -45,7 +45,7 @@ NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> in
case 'H': case 'H':
switch (command.cmd) { switch (command.cmd) {
case 0x1: case 0x1:
return Wrap1(&nvhost_nvdec::SetNVMAPfd, input, output); return WrapFixed(this, &nvhost_nvdec::SetNVMAPfd, input, output);
default: default:
break; break;
} }

View file

@ -76,13 +76,7 @@ NvResult nvhost_nvdec_common::SetNVMAPfd(IoctlSetNvmapFD& params) {
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_nvdec_common::Submit(DeviceFD fd, std::span<const u8> input, std::span<u8> output) { NvResult nvhost_nvdec_common::Submit(IoctlSubmit& params, std::span<u8> data, DeviceFD fd) {
if (input.size() < sizeof(IoctlSubmit) || output.size() < sizeof(IoctlSubmit)) {
UNIMPLEMENTED();
return NvResult::InvalidSize;
}
IoctlSubmit params{};
std::memcpy(&params, input.data(), std::min(input.size(), sizeof(IoctlSubmit)));
LOG_DEBUG(Service_NVDRV, "called NVDEC Submit, cmd_buffer_count={}", params.cmd_buffer_count); LOG_DEBUG(Service_NVDRV, "called NVDEC Submit, cmd_buffer_count={}", params.cmd_buffer_count);
// Instantiate param buffers // Instantiate param buffers
@ -93,12 +87,12 @@ NvResult nvhost_nvdec_common::Submit(DeviceFD fd, std::span<const u8> input, std
std::vector<u32> fence_thresholds(params.fence_count); std::vector<u32> fence_thresholds(params.fence_count);
// Slice input into their respective buffers // Slice input into their respective buffers
std::size_t offset = sizeof(IoctlSubmit); std::size_t offset = 0;
offset += SliceVectors(input, command_buffers, params.cmd_buffer_count, offset); offset += SliceVectors(data, command_buffers, params.cmd_buffer_count, offset);
offset += SliceVectors(input, relocs, params.relocation_count, offset); offset += SliceVectors(data, relocs, params.relocation_count, offset);
offset += SliceVectors(input, reloc_shifts, params.relocation_count, offset); offset += SliceVectors(data, reloc_shifts, params.relocation_count, offset);
offset += SliceVectors(input, syncpt_increments, params.syncpoint_count, offset); offset += SliceVectors(data, syncpt_increments, params.syncpoint_count, offset);
offset += SliceVectors(input, fence_thresholds, params.fence_count, offset); offset += SliceVectors(data, fence_thresholds, params.fence_count, offset);
auto& gpu = system.GPU(); auto& gpu = system.GPU();
if (gpu.UseNvdec()) { if (gpu.UseNvdec()) {
@ -116,14 +110,13 @@ NvResult nvhost_nvdec_common::Submit(DeviceFD fd, std::span<const u8> input, std
cmdlist.size() * sizeof(u32)); cmdlist.size() * sizeof(u32));
gpu.PushCommandBuffer(core.Host1xDeviceFile().fd_to_id[fd], cmdlist); gpu.PushCommandBuffer(core.Host1xDeviceFile().fd_to_id[fd], cmdlist);
} }
std::memcpy(output.data(), &params, sizeof(IoctlSubmit));
// Some games expect command_buffers to be written back // Some games expect command_buffers to be written back
offset = sizeof(IoctlSubmit); offset = 0;
offset += WriteVectors(output, command_buffers, offset); offset += WriteVectors(data, command_buffers, offset);
offset += WriteVectors(output, relocs, offset); offset += WriteVectors(data, relocs, offset);
offset += WriteVectors(output, reloc_shifts, offset); offset += WriteVectors(data, reloc_shifts, offset);
offset += WriteVectors(output, syncpt_increments, offset); offset += WriteVectors(data, syncpt_increments, offset);
offset += WriteVectors(output, fence_thresholds, offset); offset += WriteVectors(data, fence_thresholds, offset);
return NvResult::Success; return NvResult::Success;
} }
@ -140,40 +133,24 @@ NvResult nvhost_nvdec_common::GetWaitbase(IoctlGetWaitbase& params) {
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_nvdec_common::MapBuffer(std::span<const u8> input, std::span<u8> output) { NvResult nvhost_nvdec_common::MapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries) {
IoctlMapBuffer params{}; const size_t num_entries = std::min(params.num_entries, static_cast<u32>(entries.size()));
std::memcpy(&params, input.data(), sizeof(IoctlMapBuffer)); for (size_t i = 0; i < num_entries; i++) {
std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries); entries[i].map_address = nvmap.PinHandle(entries[i].map_handle);
SliceVectors(input, cmd_buffer_handles, params.num_entries, sizeof(IoctlMapBuffer));
for (auto& cmd_buffer : cmd_buffer_handles) {
cmd_buffer.map_address = nvmap.PinHandle(cmd_buffer.map_handle);
} }
if (output.size() <
sizeof(IoctlMapBuffer) + cmd_buffer_handles.size() * sizeof(MapBufferEntry)) {
return NvResult::InvalidSize;
}
std::memcpy(output.data(), &params, sizeof(IoctlMapBuffer));
std::memcpy(output.data() + sizeof(IoctlMapBuffer), cmd_buffer_handles.data(),
cmd_buffer_handles.size() * sizeof(MapBufferEntry));
return NvResult::Success; return NvResult::Success;
} }
NvResult nvhost_nvdec_common::UnmapBuffer(std::span<const u8> input, std::span<u8> output) { NvResult nvhost_nvdec_common::UnmapBuffer(IoctlMapBuffer& params,
IoctlMapBuffer params{}; std::span<MapBufferEntry> entries) {
std::memcpy(&params, input.data(), std::min(input.size(), sizeof(IoctlMapBuffer))); const size_t num_entries = std::min(params.num_entries, static_cast<u32>(entries.size()));
std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries); for (size_t i = 0; i < num_entries; i++) {
nvmap.UnpinHandle(entries[i].map_handle);
SliceVectors(input, cmd_buffer_handles, params.num_entries, sizeof(IoctlMapBuffer)); entries[i] = {};
for (auto& cmd_buffer : cmd_buffer_handles) {
nvmap.UnpinHandle(cmd_buffer.map_handle);
} }
std::memset(output.data(), 0, output.size()); params = {};
return NvResult::Success; return NvResult::Success;
} }

View file

@ -108,11 +108,11 @@ protected:
/// Ioctl command implementations /// Ioctl command implementations
NvResult SetNVMAPfd(IoctlSetNvmapFD&); NvResult SetNVMAPfd(IoctlSetNvmapFD&);
NvResult Submit(DeviceFD fd, std::span<const u8> input, std::span<u8> output); NvResult Submit(IoctlSubmit& params, std::span<u8> input, DeviceFD fd);
NvResult GetSyncpoint(IoctlGetSyncpoint& params); NvResult GetSyncpoint(IoctlGetSyncpoint& params);
NvResult GetWaitbase(IoctlGetWaitbase& params); NvResult GetWaitbase(IoctlGetWaitbase& params);
NvResult MapBuffer(std::span<const u8> input, std::span<u8> output); NvResult MapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries);
NvResult UnmapBuffer(std::span<const u8> input, std::span<u8> output); NvResult UnmapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries);
NvResult SetSubmitTimeout(u32 timeout); NvResult SetSubmitTimeout(u32 timeout);
Kernel::KEvent* QueryEvent(u32 event_id) override; Kernel::KEvent* QueryEvent(u32 event_id) override;

View file

@ -19,7 +19,7 @@ NvResult nvhost_nvjpg::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> in
case 'H': case 'H':
switch (command.cmd) { switch (command.cmd) {
case 0x1: case 0x1:
return Wrap1(&nvhost_nvjpg::SetNVMAPfd, input, output); return WrapFixed(this, &nvhost_nvjpg::SetNVMAPfd, input, output);
default: default:
break; break;
} }

View file

@ -26,16 +26,16 @@ NvResult nvhost_vic::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> inpu
if (!host1x_file.fd_to_id.contains(fd)) { if (!host1x_file.fd_to_id.contains(fd)) {
host1x_file.fd_to_id[fd] = host1x_file.vic_next_id++; host1x_file.fd_to_id[fd] = host1x_file.vic_next_id++;
} }
return Submit(fd, input, output); return WrapFixedVariable(this, &nvhost_vic::Submit, input, output, fd);
} }
case 0x2: case 0x2:
return Wrap1(&nvhost_vic::GetSyncpoint, input, output); return WrapFixed(this, &nvhost_vic::GetSyncpoint, input, output);
case 0x3: case 0x3:
return Wrap1(&nvhost_vic::GetWaitbase, input, output); return WrapFixed(this, &nvhost_vic::GetWaitbase, input, output);
case 0x9: case 0x9:
return MapBuffer(input, output); return WrapFixedVariable(this, &nvhost_vic::MapBuffer, input, output);
case 0xa: case 0xa:
return UnmapBuffer(input, output); return WrapFixedVariable(this, &nvhost_vic::UnmapBuffer, input, output);
default: default:
break; break;
} }
@ -43,7 +43,7 @@ NvResult nvhost_vic::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> inpu
case 'H': case 'H':
switch (command.cmd) { switch (command.cmd) {
case 0x1: case 0x1:
return Wrap1(&nvhost_vic::SetNVMAPfd, input, output); return WrapFixed(this, &nvhost_vic::SetNVMAPfd, input, output);
default: default:
break; break;
} }

View file

@ -32,17 +32,17 @@ NvResult nvmap::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
case 0x1: case 0x1:
switch (command.cmd) { switch (command.cmd) {
case 0x1: case 0x1:
return Wrap1(&nvmap::IocCreate, input, output); return WrapFixed(this, &nvmap::IocCreate, input, output);
case 0x3: case 0x3:
return Wrap1(&nvmap::IocFromId, input, output); return WrapFixed(this, &nvmap::IocFromId, input, output);
case 0x4: case 0x4:
return Wrap1(&nvmap::IocAlloc, input, output); return WrapFixed(this, &nvmap::IocAlloc, input, output);
case 0x5: case 0x5:
return Wrap1(&nvmap::IocFree, input, output); return WrapFixed(this, &nvmap::IocFree, input, output);
case 0x9: case 0x9:
return Wrap1(&nvmap::IocParam, input, output); return WrapFixed(this, &nvmap::IocParam, input, output);
case 0xe: case 0xe:
return Wrap1(&nvmap::IocGetId, input, output); return WrapFixed(this, &nvmap::IocGetId, input, output);
default: default:
break; break;
} }