Merge pull request #6846 from ameerj/nvdec-gpu-decode

nvdec: Add GPU video decoding for all capable drivers and platforms
This commit is contained in:
Fernando S 2021-09-11 23:11:32 +02:00 committed by GitHub
commit be4e192903
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
16 changed files with 337 additions and 135 deletions

View file

@ -518,6 +518,10 @@ set(FFmpeg_COMPONENTS
avutil
swscale)
if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
Include(FindPkgConfig REQUIRED)
pkg_check_modules(LIBVA libva)
endif()
if (NOT YUZU_USE_BUNDLED_FFMPEG)
# Use system installed FFmpeg
find_package(FFmpeg QUIET COMPONENTS ${FFmpeg_COMPONENTS})
@ -540,6 +544,9 @@ endif()
if (YUZU_USE_BUNDLED_FFMPEG)
if (NOT WIN32)
# TODO(lat9nq): Move this to externals/ffmpeg/CMakeLists.txt (and move externals/ffmpeg to
# externals/ffmpeg/ffmpeg)
# Build FFmpeg from externals
message(STATUS "Using FFmpeg from externals")
@ -579,20 +586,23 @@ if (YUZU_USE_BUNDLED_FFMPEG)
CACHE PATH "Paths to FFmpeg libraries" FORCE)
endforeach()
set(FFmpeg_INCLUDE_DIR
"${FFmpeg_PREFIX};${FFmpeg_BUILD_DIR}"
CACHE PATH "Path to FFmpeg headers" FORCE)
Include(FindPkgConfig REQUIRED)
pkg_check_modules(LIBVA libva)
pkg_check_modules(CUDA cuda)
pkg_check_modules(FFNVCODEC ffnvcodec)
pkg_check_modules(VDPAU vdpau)
set(FFmpeg_HWACCEL_LIBRARIES)
set(FFmpeg_HWACCEL_FLAGS)
set(FFmpeg_HWACCEL_INCLUDE_DIRS)
set(FFmpeg_HWACCEL_LDFLAGS)
if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
Include(FindPkgConfig REQUIRED)
pkg_check_modules(LIBVA libva)
endif()
if(LIBVA_FOUND)
pkg_check_modules(LIBDRM libdrm REQUIRED)
find_package(X11 REQUIRED)
pkg_check_modules(LIBVA-DRM libva-drm REQUIRED)
pkg_check_modules(LIBVA-X11 libva-x11 REQUIRED)
set(FFmpeg_LIBVA_LIBRARIES
list(APPEND FFmpeg_HWACCEL_LIBRARIES
${LIBDRM_LIBRARIES}
${X11_LIBRARIES}
${LIBVA-DRM_LIBRARIES}
@ -602,11 +612,56 @@ if (YUZU_USE_BUNDLED_FFMPEG)
--enable-hwaccel=h264_vaapi
--enable-hwaccel=vp9_vaapi
--enable-libdrm)
list(APPEND FFmpeg_HWACCEL_INCLUDE_DIRS
${LIBDRM_INCLUDE_DIRS}
${X11_INCLUDE_DIRS}
${LIBVA-DRM_INCLUDE_DIRS}
${LIBVA-X11_INCLUDE_DIRS}
${LIBVA_INCLUDE_DIRS}
)
message(STATUS "VA-API found")
else()
set(FFmpeg_HWACCEL_FLAGS --disable-vaapi)
endif()
if (FFNVCODEC_FOUND AND CUDA_FOUND)
list(APPEND FFmpeg_HWACCEL_FLAGS
--enable-cuvid
--enable-ffnvcodec
--enable-nvdec
--enable-hwaccel=h264_nvdec
--enable-hwaccel=vp9_nvdec
--extra-cflags=-I${CUDA_INCLUDE_DIRS}
)
list(APPEND FFmpeg_HWACCEL_LIBRARIES
${FFNVCODEC_LIBRARIES}
${CUDA_LIBRARIES}
)
list(APPEND FFmpeg_HWACCEL_INCLUDE_DIRS
${FFNVCODEC_INCLUDE_DIRS}
${CUDA_INCLUDE_DIRS}
)
list(APPEND FFmpeg_HWACCEL_LDFLAGS
${FFNVCODEC_LDFLAGS}
${CUDA_LDFLAGS}
)
message(STATUS "ffnvcodec libraries version ${FFNVCODEC_VERSION} found")
endif()
if (VDPAU_FOUND)
list(APPEND FFmpeg_HWACCEL_FLAGS
--enable-vdpau
--enable-hwaccel=h264_vdpau
--enable-hwaccel=vp9_vdpau
)
list(APPEND FFmpeg_HWACCEL_LIBRARIES ${VDPAU_LIBRARIES})
list(APPEND FFmpeg_HWACCEL_INCLUDE_DIRS ${VDPAU_INCLUDE_DIRS})
list(APPEND FFmpeg_HWACCEL_LDFLAGS ${VDPAU_LDFLAGS})
message(STATUS "vdpau libraries version ${VDPAU_VERSION} found")
else()
list(APPEND FFmpeg_HWACCEL_FLAGS --disable-vdpau)
endif()
# `configure` parameters builds only exactly what yuzu needs from FFmpeg
# `--disable-vdpau` is needed to avoid linking issues
add_custom_command(
@ -624,7 +679,6 @@ if (YUZU_USE_BUNDLED_FFMPEG)
--disable-network
--disable-postproc
--disable-swresample
--disable-vdpau
--enable-decoder=h264
--enable-decoder=vp9
--cc="${CMAKE_C_COMPILER}"
@ -653,15 +707,26 @@ if (YUZU_USE_BUNDLED_FFMPEG)
${FFmpeg_BUILD_DIR}
)
set(FFmpeg_INCLUDE_DIR
"${FFmpeg_PREFIX};${FFmpeg_BUILD_DIR};${FFmpeg_HWACCEL_INCLUDE_DIRS}"
CACHE PATH "Path to FFmpeg headers" FORCE)
set(FFmpeg_LDFLAGS
"${FFmpeg_HWACCEL_LDFLAGS}"
CACHE STRING "FFmpeg linker flags" FORCE)
# ALL makes this custom target build every time
# but it won't actually build if the DEPENDS parameter is up to date
add_custom_target(ffmpeg-configure ALL DEPENDS ${FFmpeg_MAKEFILE})
add_custom_target(ffmpeg-build ALL DEPENDS ${FFmpeg_BUILD_LIBRARIES} ffmpeg-configure)
link_libraries(${FFmpeg_LIBVA_LIBRARIES})
set(FFmpeg_LIBRARIES ${FFmpeg_LIBVA_LIBRARIES} ${FFmpeg_BUILD_LIBRARIES}
set(FFmpeg_LIBRARIES ${FFmpeg_BUILD_LIBRARIES} ${FFmpeg_HWACCEL_LIBRARIES}
CACHE PATH "Paths to FFmpeg libraries" FORCE)
unset(FFmpeg_BUILD_LIBRARIES)
unset(FFmpeg_LIBVA_LIBRARIES)
unset(FFmpeg_HWACCEL_FLAGS)
unset(FFmpeg_HWACCEL_INCLUDE_DIRS)
unset(FFmpeg_HWACCEL_LDFLAGS)
unset(FFmpeg_HWACCEL_LIBRARIES)
if (FFmpeg_FOUND)
message(STATUS "Found FFmpeg version ${FFmpeg_VERSION}")
@ -670,12 +735,13 @@ if (YUZU_USE_BUNDLED_FFMPEG)
endif()
else() # WIN32
# Use yuzu FFmpeg binaries
set(FFmpeg_EXT_NAME "ffmpeg-4.3.1")
set(FFmpeg_EXT_NAME "ffmpeg-4.4")
set(FFmpeg_PATH "${CMAKE_BINARY_DIR}/externals/${FFmpeg_EXT_NAME}")
download_bundled_external("ffmpeg/" ${FFmpeg_EXT_NAME} "")
set(FFmpeg_FOUND YES)
set(FFmpeg_INCLUDE_DIR "${FFmpeg_PATH}/include" CACHE PATH "Path to FFmpeg headers" FORCE)
set(FFmpeg_LIBRARY_DIR "${FFmpeg_PATH}/bin" CACHE PATH "Path to FFmpeg library directory" FORCE)
set(FFmpeg_LDFLAGS "" CACHE STRING "FFmpeg linker flags" FORCE)
set(FFmpeg_DLL_DIR "${FFmpeg_PATH}/bin" CACHE PATH "Path to FFmpeg dll's" FORCE)
set(FFmpeg_LIBRARIES
${FFmpeg_LIBRARY_DIR}/swscale.lib

View file

@ -54,7 +54,7 @@ void LogSettings() {
log_setting("Renderer_GPUAccuracyLevel", values.gpu_accuracy.GetValue());
log_setting("Renderer_UseAsynchronousGpuEmulation",
values.use_asynchronous_gpu_emulation.GetValue());
log_setting("Renderer_UseNvdecEmulation", values.use_nvdec_emulation.GetValue());
log_setting("Renderer_NvdecEmulation", values.nvdec_emulation.GetValue());
log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue());
log_setting("Renderer_UseVsync", values.use_vsync.GetValue());
log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue());
@ -136,7 +136,7 @@ void RestoreGlobalState(bool is_powered_on) {
values.use_disk_shader_cache.SetGlobal(true);
values.gpu_accuracy.SetGlobal(true);
values.use_asynchronous_gpu_emulation.SetGlobal(true);
values.use_nvdec_emulation.SetGlobal(true);
values.nvdec_emulation.SetGlobal(true);
values.accelerate_astc.SetGlobal(true);
values.use_vsync.SetGlobal(true);
values.shader_backend.SetGlobal(true);

View file

@ -48,6 +48,12 @@ enum class FullscreenMode : u32 {
Exclusive = 1,
};
enum class NvdecEmulation : u32 {
Off = 0,
CPU = 1,
GPU = 2,
};
/** The BasicSetting class is a simple resource manager. It defines a label and default value
* alongside the actual value of the setting for simpler and less-error prone use with frontend
* configurations. Setting a default value and label is required, though subclasses may deviate from
@ -466,7 +472,7 @@ struct Values {
RangedSetting<GPUAccuracy> gpu_accuracy{GPUAccuracy::High, GPUAccuracy::Normal,
GPUAccuracy::Extreme, "gpu_accuracy"};
Setting<bool> use_asynchronous_gpu_emulation{true, "use_asynchronous_gpu_emulation"};
Setting<bool> use_nvdec_emulation{true, "use_nvdec_emulation"};
Setting<NvdecEmulation> nvdec_emulation{NvdecEmulation::GPU, "nvdec_emulation"};
Setting<bool> accelerate_astc{true, "accelerate_astc"};
Setting<bool> use_vsync{true, "use_vsync"};
BasicRangedSetting<u16> fps_cap{1000, 1, 1000, "fps_cap"};

View file

@ -72,6 +72,18 @@ static const char* TranslateGPUAccuracyLevel(Settings::GPUAccuracy backend) {
return "Unknown";
}
static const char* TranslateNvdecEmulation(Settings::NvdecEmulation backend) {
switch (backend) {
case Settings::NvdecEmulation::Off:
return "Off";
case Settings::NvdecEmulation::CPU:
return "CPU";
case Settings::NvdecEmulation::GPU:
return "GPU";
}
return "Unknown";
}
u64 GetTelemetryId() {
u64 telemetry_id{};
const auto filename = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ConfigDir) / "telemetry_id";
@ -229,8 +241,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader,
TranslateGPUAccuracyLevel(Settings::values.gpu_accuracy.GetValue()));
AddField(field_type, "Renderer_UseAsynchronousGpuEmulation",
Settings::values.use_asynchronous_gpu_emulation.GetValue());
AddField(field_type, "Renderer_UseNvdecEmulation",
Settings::values.use_nvdec_emulation.GetValue());
AddField(field_type, "Renderer_NvdecEmulation",
TranslateNvdecEmulation(Settings::values.nvdec_emulation.GetValue()));
AddField(field_type, "Renderer_AccelerateASTC", Settings::values.accelerate_astc.GetValue());
AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync.GetValue());
AddField(field_type, "Renderer_ShaderBackend",

View file

@ -231,6 +231,7 @@ endif()
target_include_directories(video_core PRIVATE ${FFmpeg_INCLUDE_DIR})
target_link_libraries(video_core PRIVATE ${FFmpeg_LIBRARIES})
target_link_options(video_core PRIVATE ${FFmpeg_LDFLAGS})
add_dependencies(video_core host_shaders)
target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE})

View file

@ -5,6 +5,7 @@
#include <fstream>
#include <vector>
#include "common/assert.h"
#include "common/settings.h"
#include "video_core/command_classes/codecs/codec.h"
#include "video_core/command_classes/codecs/h264.h"
#include "video_core/command_classes/codecs/vp9.h"
@ -16,44 +17,28 @@ extern "C" {
}
namespace Tegra {
#if defined(LIBVA_FOUND)
// Hardware acceleration code from FFmpeg/doc/examples/hw_decode.c originally under MIT license
namespace {
constexpr std::array<const char*, 2> VAAPI_DRIVERS = {
"i915",
"amdgpu",
};
constexpr AVPixelFormat PREFERRED_GPU_FMT = AV_PIX_FMT_NV12;
constexpr AVPixelFormat PREFERRED_CPU_FMT = AV_PIX_FMT_YUV420P;
AVPixelFormat GetHwFormat(AVCodecContext*, const AVPixelFormat* pix_fmts) {
void AVPacketDeleter(AVPacket* ptr) {
av_packet_free(&ptr);
}
using AVPacketPtr = std::unique_ptr<AVPacket, decltype(&AVPacketDeleter)>;
AVPixelFormat GetGpuFormat(AVCodecContext* av_codec_ctx, const AVPixelFormat* pix_fmts) {
for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) {
if (*p == AV_PIX_FMT_VAAPI) {
return AV_PIX_FMT_VAAPI;
if (*p == av_codec_ctx->pix_fmt) {
return av_codec_ctx->pix_fmt;
}
}
LOG_INFO(Service_NVDRV, "Could not find compatible GPU AV format, falling back to CPU");
return *pix_fmts;
}
bool CreateVaapiHwdevice(AVBufferRef** av_hw_device) {
AVDictionary* hwdevice_options = nullptr;
av_dict_set(&hwdevice_options, "connection_type", "drm", 0);
for (const auto& driver : VAAPI_DRIVERS) {
av_dict_set(&hwdevice_options, "kernel_driver", driver, 0);
const int hwdevice_error = av_hwdevice_ctx_create(av_hw_device, AV_HWDEVICE_TYPE_VAAPI,
nullptr, hwdevice_options, 0);
if (hwdevice_error >= 0) {
LOG_INFO(Service_NVDRV, "Using VA-API with {}", driver);
av_dict_free(&hwdevice_options);
return true;
}
LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed {}", hwdevice_error);
}
LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed for all drivers");
av_dict_free(&hwdevice_options);
return false;
av_buffer_unref(&av_codec_ctx->hw_device_ctx);
av_codec_ctx->pix_fmt = PREFERRED_CPU_FMT;
return PREFERRED_CPU_FMT;
}
} // namespace
#endif
void AVFrameDeleter(AVFrame* ptr) {
av_frame_free(&ptr);
@ -68,56 +53,110 @@ Codec::~Codec() {
return;
}
// Free libav memory
avcodec_send_packet(av_codec_ctx, nullptr);
AVFrame* av_frame = av_frame_alloc();
avcodec_receive_frame(av_codec_ctx, av_frame);
avcodec_flush_buffers(av_codec_ctx);
av_frame_free(&av_frame);
avcodec_close(av_codec_ctx);
av_buffer_unref(&av_hw_device);
avcodec_free_context(&av_codec_ctx);
av_buffer_unref(&av_gpu_decoder);
}
void Codec::InitializeHwdec() {
// Prioritize integrated GPU to mitigate bandwidth bottlenecks
bool Codec::CreateGpuAvDevice() {
#if defined(LIBVA_FOUND)
if (CreateVaapiHwdevice(&av_hw_device)) {
const auto hw_device_ctx = av_buffer_ref(av_hw_device);
ASSERT_MSG(hw_device_ctx, "av_buffer_ref failed");
av_codec_ctx->hw_device_ctx = hw_device_ctx;
av_codec_ctx->get_format = GetHwFormat;
static constexpr std::array<const char*, 3> VAAPI_DRIVERS = {
"i915",
"iHD",
"amdgpu",
};
AVDictionary* hwdevice_options = nullptr;
av_dict_set(&hwdevice_options, "connection_type", "drm", 0);
for (const auto& driver : VAAPI_DRIVERS) {
av_dict_set(&hwdevice_options, "kernel_driver", driver, 0);
const int hwdevice_error = av_hwdevice_ctx_create(&av_gpu_decoder, AV_HWDEVICE_TYPE_VAAPI,
nullptr, hwdevice_options, 0);
if (hwdevice_error >= 0) {
LOG_INFO(Service_NVDRV, "Using VA-API with {}", driver);
av_dict_free(&hwdevice_options);
av_codec_ctx->pix_fmt = AV_PIX_FMT_VAAPI;
return true;
}
LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed {}", hwdevice_error);
}
LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed for all drivers");
av_dict_free(&hwdevice_options);
#endif
static constexpr auto HW_CONFIG_METHOD = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX;
static constexpr std::array GPU_DECODER_TYPES{
AV_HWDEVICE_TYPE_CUDA,
#ifdef _WIN32
AV_HWDEVICE_TYPE_D3D11VA,
#else
AV_HWDEVICE_TYPE_VDPAU,
#endif
};
for (const auto& type : GPU_DECODER_TYPES) {
const int hwdevice_res = av_hwdevice_ctx_create(&av_gpu_decoder, type, nullptr, nullptr, 0);
if (hwdevice_res < 0) {
LOG_DEBUG(Service_NVDRV, "{} av_hwdevice_ctx_create failed {}",
av_hwdevice_get_type_name(type), hwdevice_res);
continue;
}
for (int i = 0;; i++) {
const AVCodecHWConfig* config = avcodec_get_hw_config(av_codec, i);
if (!config) {
LOG_DEBUG(Service_NVDRV, "{} decoder does not support device type {}.",
av_codec->name, av_hwdevice_get_type_name(type));
break;
}
if (config->methods & HW_CONFIG_METHOD && config->device_type == type) {
av_codec_ctx->pix_fmt = config->pix_fmt;
LOG_INFO(Service_NVDRV, "Using {} GPU decoder", av_hwdevice_get_type_name(type));
return true;
}
}
}
return false;
}
void Codec::InitializeAvCodecContext() {
av_codec_ctx = avcodec_alloc_context3(av_codec);
av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
}
void Codec::InitializeGpuDecoder() {
if (!CreateGpuAvDevice()) {
av_buffer_unref(&av_gpu_decoder);
return;
}
#endif
// TODO more GPU accelerated decoders
auto* hw_device_ctx = av_buffer_ref(av_gpu_decoder);
ASSERT_MSG(hw_device_ctx, "av_buffer_ref failed");
av_codec_ctx->hw_device_ctx = hw_device_ctx;
av_codec_ctx->get_format = GetGpuFormat;
}
void Codec::Initialize() {
AVCodecID codec;
switch (current_codec) {
case NvdecCommon::VideoCodec::H264:
codec = AV_CODEC_ID_H264;
break;
case NvdecCommon::VideoCodec::Vp9:
codec = AV_CODEC_ID_VP9;
break;
default:
UNIMPLEMENTED_MSG("Unknown codec {}", current_codec);
const AVCodecID codec = [&] {
switch (current_codec) {
case NvdecCommon::VideoCodec::H264:
return AV_CODEC_ID_H264;
case NvdecCommon::VideoCodec::Vp9:
return AV_CODEC_ID_VP9;
default:
UNIMPLEMENTED_MSG("Unknown codec {}", current_codec);
return AV_CODEC_ID_NONE;
}
}();
av_codec = avcodec_find_decoder(codec);
InitializeAvCodecContext();
if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::GPU) {
InitializeGpuDecoder();
}
if (const int res = avcodec_open2(av_codec_ctx, av_codec, nullptr); res < 0) {
LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed with result {}", res);
avcodec_free_context(&av_codec_ctx);
av_buffer_unref(&av_gpu_decoder);
return;
}
av_codec = avcodec_find_decoder(codec);
av_codec_ctx = avcodec_alloc_context3(av_codec);
av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
InitializeHwdec();
if (!av_codec_ctx->hw_device_ctx) {
LOG_INFO(Service_NVDRV, "Using FFmpeg software decoding");
}
const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr);
if (av_error < 0) {
LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed.");
avcodec_close(av_codec_ctx);
av_buffer_unref(&av_hw_device);
return;
}
initialized = true;
}
@ -133,6 +172,9 @@ void Codec::Decode() {
if (is_first_frame) {
Initialize();
}
if (!initialized) {
return;
}
bool vp9_hidden_frame = false;
std::vector<u8> frame_data;
if (current_codec == NvdecCommon::VideoCodec::H264) {
@ -141,50 +183,48 @@ void Codec::Decode() {
frame_data = vp9_decoder->ComposeFrameHeader(state);
vp9_hidden_frame = vp9_decoder->WasFrameHidden();
}
AVPacket packet{};
av_init_packet(&packet);
packet.data = frame_data.data();
packet.size = static_cast<s32>(frame_data.size());
if (const int ret = avcodec_send_packet(av_codec_ctx, &packet); ret) {
LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", ret);
AVPacketPtr packet{av_packet_alloc(), AVPacketDeleter};
if (!packet) {
LOG_ERROR(Service_NVDRV, "av_packet_alloc failed");
return;
}
packet->data = frame_data.data();
packet->size = static_cast<s32>(frame_data.size());
if (const int res = avcodec_send_packet(av_codec_ctx, packet.get()); res != 0) {
LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", res);
return;
}
// Only receive/store visible frames
if (vp9_hidden_frame) {
return;
}
AVFrame* hw_frame = av_frame_alloc();
AVFrame* sw_frame = hw_frame;
ASSERT_MSG(hw_frame, "av_frame_alloc hw_frame failed");
if (const int ret = avcodec_receive_frame(av_codec_ctx, hw_frame); ret) {
AVFramePtr initial_frame{av_frame_alloc(), AVFrameDeleter};
AVFramePtr final_frame{nullptr, AVFrameDeleter};
ASSERT_MSG(initial_frame, "av_frame_alloc initial_frame failed");
if (const int ret = avcodec_receive_frame(av_codec_ctx, initial_frame.get()); ret) {
LOG_DEBUG(Service_NVDRV, "avcodec_receive_frame error {}", ret);
av_frame_free(&hw_frame);
return;
}
if (!hw_frame->width || !hw_frame->height) {
if (initial_frame->width == 0 || initial_frame->height == 0) {
LOG_WARNING(Service_NVDRV, "Zero width or height in frame");
av_frame_free(&hw_frame);
return;
}
#if defined(LIBVA_FOUND)
// Hardware acceleration code from FFmpeg/doc/examples/hw_decode.c under MIT license
if (hw_frame->format == AV_PIX_FMT_VAAPI) {
sw_frame = av_frame_alloc();
ASSERT_MSG(sw_frame, "av_frame_alloc sw_frame failed");
if (av_codec_ctx->hw_device_ctx) {
final_frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter};
ASSERT_MSG(final_frame, "av_frame_alloc final_frame failed");
// Can't use AV_PIX_FMT_YUV420P and share code with software decoding in vic.cpp
// because Intel drivers crash unless using AV_PIX_FMT_NV12
sw_frame->format = AV_PIX_FMT_NV12;
const int transfer_data_ret = av_hwframe_transfer_data(sw_frame, hw_frame, 0);
ASSERT_MSG(!transfer_data_ret, "av_hwframe_transfer_data error {}", transfer_data_ret);
av_frame_free(&hw_frame);
final_frame->format = PREFERRED_GPU_FMT;
const int ret = av_hwframe_transfer_data(final_frame.get(), initial_frame.get(), 0);
ASSERT_MSG(!ret, "av_hwframe_transfer_data error {}", ret);
} else {
final_frame = std::move(initial_frame);
}
#endif
if (sw_frame->format != AV_PIX_FMT_YUV420P && sw_frame->format != AV_PIX_FMT_NV12) {
UNIMPLEMENTED_MSG("Unexpected video format from host graphics: {}", sw_frame->format);
av_frame_free(&sw_frame);
if (final_frame->format != PREFERRED_CPU_FMT && final_frame->format != PREFERRED_GPU_FMT) {
UNIMPLEMENTED_MSG("Unexpected video format: {}", final_frame->format);
return;
}
av_frames.push(AVFramePtr{sw_frame, AVFrameDeleter});
av_frames.push(std::move(final_frame));
if (av_frames.size() > 10) {
LOG_TRACE(Service_NVDRV, "av_frames.push overflow dropped frame");
av_frames.pop();

View file

@ -50,18 +50,23 @@ public:
/// Returns the value of current_codec
[[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const;
/// Return name of the current codec
[[nodiscard]] std::string_view GetCurrentCodecName() const;
private:
void InitializeHwdec();
void InitializeAvCodecContext();
void InitializeGpuDecoder();
bool CreateGpuAvDevice();
bool initialized{};
NvdecCommon::VideoCodec current_codec{NvdecCommon::VideoCodec::None};
AVCodec* av_codec{nullptr};
AVBufferRef* av_hw_device{nullptr};
AVCodecContext* av_codec_ctx{nullptr};
AVBufferRef* av_gpu_decoder{nullptr};
GPU& gpu;
const NvdecCommon::NvdecRegisters& state;

View file

@ -95,7 +95,8 @@ const std::vector<u8>& H264::ComposeFrameHeader(const NvdecCommon::NvdecRegister
const s32 pic_height = context.h264_parameter_set.frame_height_in_map_units /
(context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2);
writer.WriteUe(16);
// TODO (ameerj): Where do we get this number, it seems to be particular for each stream
writer.WriteUe(6); // Max number of reference frames
writer.WriteBit(false);
writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1);
writer.WriteUe(pic_height - 1);

View file

@ -37,7 +37,8 @@ std::unique_ptr<VideoCore::RendererBase> CreateRenderer(
namespace VideoCore {
std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) {
const bool use_nvdec = Settings::values.use_nvdec_emulation.GetValue();
const auto nvdec_value = Settings::values.nvdec_emulation.GetValue();
const bool use_nvdec = nvdec_value != Settings::NvdecEmulation::Off;
const bool use_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
auto gpu = std::make_unique<Tegra::GPU>(system, use_async, use_nvdec);
auto context = emu_window.CreateSharedContext();

View file

@ -812,7 +812,7 @@ void Config::ReadRendererValues() {
ReadGlobalSetting(Settings::values.use_disk_shader_cache);
ReadGlobalSetting(Settings::values.gpu_accuracy);
ReadGlobalSetting(Settings::values.use_asynchronous_gpu_emulation);
ReadGlobalSetting(Settings::values.use_nvdec_emulation);
ReadGlobalSetting(Settings::values.nvdec_emulation);
ReadGlobalSetting(Settings::values.accelerate_astc);
ReadGlobalSetting(Settings::values.use_vsync);
ReadGlobalSetting(Settings::values.shader_backend);
@ -1349,7 +1349,10 @@ void Config::SaveRendererValues() {
static_cast<u32>(Settings::values.gpu_accuracy.GetDefault()),
Settings::values.gpu_accuracy.UsingGlobal());
WriteGlobalSetting(Settings::values.use_asynchronous_gpu_emulation);
WriteGlobalSetting(Settings::values.use_nvdec_emulation);
WriteSetting(QString::fromStdString(Settings::values.nvdec_emulation.GetLabel()),
static_cast<u32>(Settings::values.nvdec_emulation.GetValue(global)),
static_cast<u32>(Settings::values.nvdec_emulation.GetDefault()),
Settings::values.nvdec_emulation.UsingGlobal());
WriteGlobalSetting(Settings::values.accelerate_astc);
WriteGlobalSetting(Settings::values.use_vsync);
WriteSetting(QString::fromStdString(Settings::values.shader_backend.GetLabel()),

View file

@ -182,5 +182,6 @@ private:
Q_DECLARE_METATYPE(Settings::CPUAccuracy);
Q_DECLARE_METATYPE(Settings::GPUAccuracy);
Q_DECLARE_METATYPE(Settings::FullscreenMode);
Q_DECLARE_METATYPE(Settings::NvdecEmulation);
Q_DECLARE_METATYPE(Settings::RendererBackend);
Q_DECLARE_METATYPE(Settings::ShaderBackend);

View file

@ -88,24 +88,30 @@ void ConfigureGraphics::SetConfiguration() {
ui->api_widget->setEnabled(runtime_lock);
ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock);
ui->use_disk_shader_cache->setEnabled(runtime_lock);
ui->use_nvdec_emulation->setEnabled(runtime_lock);
ui->nvdec_emulation_widget->setEnabled(runtime_lock);
ui->accelerate_astc->setEnabled(runtime_lock);
ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache.GetValue());
ui->use_asynchronous_gpu_emulation->setChecked(
Settings::values.use_asynchronous_gpu_emulation.GetValue());
ui->use_nvdec_emulation->setChecked(Settings::values.use_nvdec_emulation.GetValue());
ui->accelerate_astc->setChecked(Settings::values.accelerate_astc.GetValue());
if (Settings::IsConfiguringGlobal()) {
ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend.GetValue()));
ui->fullscreen_mode_combobox->setCurrentIndex(
static_cast<int>(Settings::values.fullscreen_mode.GetValue()));
ui->nvdec_emulation->setCurrentIndex(
static_cast<int>(Settings::values.nvdec_emulation.GetValue()));
ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio.GetValue());
} else {
ConfigurationShared::SetPerGameSetting(ui->api, &Settings::values.renderer_backend);
ConfigurationShared::SetHighlight(ui->api_widget,
!Settings::values.renderer_backend.UsingGlobal());
ConfigurationShared::SetPerGameSetting(ui->nvdec_emulation,
&Settings::values.nvdec_emulation);
ConfigurationShared::SetHighlight(ui->nvdec_emulation_widget,
!Settings::values.nvdec_emulation.UsingGlobal());
ConfigurationShared::SetPerGameSetting(ui->fullscreen_mode_combobox,
&Settings::values.fullscreen_mode);
ConfigurationShared::SetHighlight(ui->fullscreen_mode_label,
@ -137,8 +143,6 @@ void ConfigureGraphics::ApplyConfiguration() {
ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_gpu_emulation,
ui->use_asynchronous_gpu_emulation,
use_asynchronous_gpu_emulation);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_nvdec_emulation,
ui->use_nvdec_emulation, use_nvdec_emulation);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.accelerate_astc, ui->accelerate_astc,
accelerate_astc);
@ -147,6 +151,9 @@ void ConfigureGraphics::ApplyConfiguration() {
if (Settings::values.renderer_backend.UsingGlobal()) {
Settings::values.renderer_backend.SetValue(GetCurrentGraphicsBackend());
}
if (Settings::values.nvdec_emulation.UsingGlobal()) {
Settings::values.nvdec_emulation.SetValue(GetCurrentNvdecEmulation());
}
if (Settings::values.shader_backend.UsingGlobal()) {
Settings::values.shader_backend.SetValue(shader_backend);
}
@ -180,6 +187,13 @@ void ConfigureGraphics::ApplyConfiguration() {
}
}
if (ui->nvdec_emulation->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) {
Settings::values.nvdec_emulation.SetGlobal(true);
} else {
Settings::values.nvdec_emulation.SetGlobal(false);
Settings::values.nvdec_emulation.SetValue(GetCurrentNvdecEmulation());
}
if (ui->bg_combobox->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) {
Settings::values.bg_red.SetGlobal(true);
Settings::values.bg_green.SetGlobal(true);
@ -278,6 +292,20 @@ Settings::RendererBackend ConfigureGraphics::GetCurrentGraphicsBackend() const {
ConfigurationShared::USE_GLOBAL_OFFSET);
}
Settings::NvdecEmulation ConfigureGraphics::GetCurrentNvdecEmulation() const {
if (Settings::IsConfiguringGlobal()) {
return static_cast<Settings::NvdecEmulation>(ui->nvdec_emulation->currentIndex());
}
if (ui->nvdec_emulation->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) {
Settings::values.nvdec_emulation.SetGlobal(true);
return Settings::values.nvdec_emulation.GetValue();
}
Settings::values.nvdec_emulation.SetGlobal(false);
return static_cast<Settings::NvdecEmulation>(ui->nvdec_emulation->currentIndex() -
ConfigurationShared::USE_GLOBAL_OFFSET);
}
void ConfigureGraphics::SetupPerGameUI() {
if (Settings::IsConfiguringGlobal()) {
ui->api->setEnabled(Settings::values.renderer_backend.UsingGlobal());
@ -286,7 +314,7 @@ void ConfigureGraphics::SetupPerGameUI() {
ui->aspect_ratio_combobox->setEnabled(Settings::values.aspect_ratio.UsingGlobal());
ui->use_asynchronous_gpu_emulation->setEnabled(
Settings::values.use_asynchronous_gpu_emulation.UsingGlobal());
ui->use_nvdec_emulation->setEnabled(Settings::values.use_nvdec_emulation.UsingGlobal());
ui->nvdec_emulation->setEnabled(Settings::values.nvdec_emulation.UsingGlobal());
ui->accelerate_astc->setEnabled(Settings::values.accelerate_astc.UsingGlobal());
ui->use_disk_shader_cache->setEnabled(Settings::values.use_disk_shader_cache.UsingGlobal());
ui->bg_button->setEnabled(Settings::values.bg_red.UsingGlobal());
@ -301,8 +329,6 @@ void ConfigureGraphics::SetupPerGameUI() {
ConfigurationShared::SetColoredTristate(
ui->use_disk_shader_cache, Settings::values.use_disk_shader_cache, use_disk_shader_cache);
ConfigurationShared::SetColoredTristate(
ui->use_nvdec_emulation, Settings::values.use_nvdec_emulation, use_nvdec_emulation);
ConfigurationShared::SetColoredTristate(ui->accelerate_astc, Settings::values.accelerate_astc,
accelerate_astc);
ConfigurationShared::SetColoredTristate(ui->use_asynchronous_gpu_emulation,
@ -316,4 +342,6 @@ void ConfigureGraphics::SetupPerGameUI() {
static_cast<int>(Settings::values.fullscreen_mode.GetValue(true)));
ConfigurationShared::InsertGlobalItem(
ui->api, static_cast<int>(Settings::values.renderer_backend.GetValue(true)));
ConfigurationShared::InsertGlobalItem(
ui->nvdec_emulation, static_cast<int>(Settings::values.nvdec_emulation.GetValue(true)));
}

View file

@ -43,6 +43,7 @@ private:
void SetupPerGameUI();
Settings::RendererBackend GetCurrentGraphicsBackend() const;
Settings::NvdecEmulation GetCurrentNvdecEmulation() const;
std::unique_ptr<Ui::ConfigureGraphics> ui;
QColor bg_color;

View file

@ -167,13 +167,6 @@
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="use_nvdec_emulation">
<property name="text">
<string>Use NVDEC emulation</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="accelerate_astc">
<property name="text">
@ -181,6 +174,50 @@
</property>
</widget>
</item>
<item>
<widget class="QWidget" name="nvdec_emulation_widget" native="true">
<layout class="QHBoxLayout" name="nvdec_emulation_layout">
<property name="leftMargin">
<number>0</number>
</property>
<property name="topMargin">
<number>0</number>
</property>
<property name="rightMargin">
<number>0</number>
</property>
<property name="bottomMargin">
<number>0</number>
</property>
<item>
<widget class="QLabel" name="nvdec_emulation_label">
<property name="text">
<string>NVDEC emulation:</string>
</property>
</widget>
</item>
<item>
<widget class="QComboBox" name="nvdec_emulation">
<item>
<property name="text">
<string>Disabled</string>
</property>
</item>
<item>
<property name="text">
<string>CPU Decoding</string>
</property>
</item>
<item>
<property name="text">
<string>GPU Decoding</string>
</property>
</item>
</widget>
</item>
</layout>
</widget>
</item>
<item>
<widget class="QWidget" name="fullscreen_mode_layout" native="true">
<layout class="QHBoxLayout" name="horizontalLayout_1">

View file

@ -465,7 +465,7 @@ void Config::ReadValues() {
ReadSetting("Renderer", Settings::values.disable_fps_limit);
ReadSetting("Renderer", Settings::values.shader_backend);
ReadSetting("Renderer", Settings::values.use_asynchronous_shaders);
ReadSetting("Renderer", Settings::values.use_nvdec_emulation);
ReadSetting("Renderer", Settings::values.nvdec_emulation);
ReadSetting("Renderer", Settings::values.accelerate_astc);
ReadSetting("Renderer", Settings::values.use_fast_gpu_time);

View file

@ -261,9 +261,9 @@ shader_backend =
# 0 (default): Off, 1: On
use_asynchronous_shaders =
# Enable NVDEC emulation.
# 0: Off, 1 (default): On
use_nvdec_emulation =
# NVDEC emulation.
# 0: Disabled, 1: CPU Decoding, 2 (default): GPU Decoding
nvdec_emulation =
# Accelerate ASTC texture decoding.
# 0: Off, 1 (default): On