early-access version 1829
This commit is contained in:
parent
230a0aa491
commit
d2ffa9ae83
10 changed files with 93 additions and 67 deletions
|
@ -1,7 +1,7 @@
|
|||
yuzu emulator early access
|
||||
=============
|
||||
|
||||
This is the source code for early-access 1826.
|
||||
This is the source code for early-access 1829.
|
||||
|
||||
## Legal Notice
|
||||
|
||||
|
|
|
@ -19,29 +19,26 @@
|
|||
namespace Service::Nvidia::Devices {
|
||||
|
||||
namespace {
|
||||
// Copies count amount of type T from the input vector into the dst vector.
|
||||
// Returns the number of bytes written into dst.
|
||||
// Splice vectors will copy count amount of type T from the input vector into the dst vector.
|
||||
template <typename T>
|
||||
std::size_t SpliceVectors(const std::vector<u8>& input, std::vector<T>& dst, std::size_t count,
|
||||
std::size_t offset) {
|
||||
if (dst.empty()) {
|
||||
return 0;
|
||||
if (!dst.empty()) {
|
||||
std::memcpy(dst.data(), input.data() + offset, count * sizeof(T));
|
||||
}
|
||||
const size_t bytes_copied = count * sizeof(T);
|
||||
std::memcpy(dst.data(), input.data() + offset, bytes_copied);
|
||||
return bytes_copied;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Writes the data in src to an offset into the dst vector. The offset is specified in bytes
|
||||
// Returns the number of bytes written into dst.
|
||||
// Write vectors will write data to the output buffer
|
||||
template <typename T>
|
||||
std::size_t WriteVectors(std::vector<u8>& dst, const std::vector<T>& src, std::size_t offset) {
|
||||
if (src.empty()) {
|
||||
return 0;
|
||||
} else {
|
||||
std::memcpy(dst.data() + offset, src.data(), src.size() * sizeof(T));
|
||||
offset += src.size() * sizeof(T);
|
||||
return offset;
|
||||
}
|
||||
const size_t bytes_copied = src.size() * sizeof(T);
|
||||
std::memcpy(dst.data() + offset, src.data(), bytes_copied);
|
||||
return bytes_copied;
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
|
@ -65,6 +62,7 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
|
|||
LOG_DEBUG(Service_NVDRV, "called NVDEC Submit, cmd_buffer_count={}", params.cmd_buffer_count);
|
||||
|
||||
// Instantiate param buffers
|
||||
std::size_t offset = sizeof(IoctlSubmit);
|
||||
std::vector<CommandBuffer> command_buffers(params.cmd_buffer_count);
|
||||
std::vector<Reloc> relocs(params.relocation_count);
|
||||
std::vector<u32> reloc_shifts(params.relocation_count);
|
||||
|
@ -73,13 +71,12 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
|
|||
std::vector<Fence> fences(params.fence_count);
|
||||
|
||||
// Splice input into their respective buffers
|
||||
std::size_t offset = sizeof(IoctlSubmit);
|
||||
offset += SpliceVectors(input, command_buffers, params.cmd_buffer_count, offset);
|
||||
offset += SpliceVectors(input, relocs, params.relocation_count, offset);
|
||||
offset += SpliceVectors(input, reloc_shifts, params.relocation_count, offset);
|
||||
offset += SpliceVectors(input, syncpt_increments, params.syncpoint_count, offset);
|
||||
offset += SpliceVectors(input, wait_checks, params.syncpoint_count, offset);
|
||||
offset += SpliceVectors(input, fences, params.fence_count, offset);
|
||||
offset = SpliceVectors(input, command_buffers, params.cmd_buffer_count, offset);
|
||||
offset = SpliceVectors(input, relocs, params.relocation_count, offset);
|
||||
offset = SpliceVectors(input, reloc_shifts, params.relocation_count, offset);
|
||||
offset = SpliceVectors(input, syncpt_increments, params.syncpoint_count, offset);
|
||||
offset = SpliceVectors(input, wait_checks, params.syncpoint_count, offset);
|
||||
offset = SpliceVectors(input, fences, params.fence_count, offset);
|
||||
|
||||
auto& gpu = system.GPU();
|
||||
if (gpu.UseNvdec()) {
|
||||
|
@ -91,7 +88,7 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
|
|||
}
|
||||
}
|
||||
for (const auto& cmd_buffer : command_buffers) {
|
||||
const auto object = nvmap_dev->GetObject(cmd_buffer.memory_id);
|
||||
auto object = nvmap_dev->GetObject(cmd_buffer.memory_id);
|
||||
ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;);
|
||||
const auto map = FindBufferMap(object->dma_map_addr);
|
||||
if (!map) {
|
||||
|
@ -105,19 +102,21 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
|
|||
gpu.PushCommandBuffer(cmdlist);
|
||||
}
|
||||
if (gpu.UseNvdec()) {
|
||||
|
||||
fences[0].value = syncpoint_manager.IncreaseSyncpoint(fences[0].id, 1);
|
||||
|
||||
Tegra::ChCommandHeaderList cmdlist{{(4 << 28) | fences[0].id}};
|
||||
gpu.PushCommandBuffer(cmdlist);
|
||||
}
|
||||
std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmit));
|
||||
// Some games expect command_buffers to be written back
|
||||
offset = sizeof(IoctlSubmit);
|
||||
offset += WriteVectors(output, command_buffers, offset);
|
||||
offset += WriteVectors(output, relocs, offset);
|
||||
offset += WriteVectors(output, reloc_shifts, offset);
|
||||
offset += WriteVectors(output, syncpt_increments, offset);
|
||||
offset += WriteVectors(output, wait_checks, offset);
|
||||
offset += WriteVectors(output, fences, offset);
|
||||
offset = WriteVectors(output, command_buffers, offset);
|
||||
offset = WriteVectors(output, relocs, offset);
|
||||
offset = WriteVectors(output, reloc_shifts, offset);
|
||||
offset = WriteVectors(output, syncpt_increments, offset);
|
||||
offset = WriteVectors(output, wait_checks, offset);
|
||||
offset = WriteVectors(output, fences, offset);
|
||||
|
||||
return NvResult::Success;
|
||||
}
|
||||
|
@ -153,10 +152,10 @@ NvResult nvhost_nvdec_common::MapBuffer(const std::vector<u8>& input, std::vecto
|
|||
|
||||
auto& gpu = system.GPU();
|
||||
|
||||
for (auto& cmd_buffer : cmd_buffer_handles) {
|
||||
auto object{nvmap_dev->GetObject(cmd_buffer.map_handle)};
|
||||
for (auto& cmf_buff : cmd_buffer_handles) {
|
||||
auto object{nvmap_dev->GetObject(cmf_buff.map_handle)};
|
||||
if (!object) {
|
||||
LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmd_buffer.map_handle);
|
||||
LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmf_buff.map_handle);
|
||||
std::memcpy(output.data(), ¶ms, output.size());
|
||||
return NvResult::InvalidState;
|
||||
}
|
||||
|
@ -171,7 +170,7 @@ NvResult nvhost_nvdec_common::MapBuffer(const std::vector<u8>& input, std::vecto
|
|||
if (!object->dma_map_addr) {
|
||||
LOG_ERROR(Service_NVDRV, "failed to map size={}", object->size);
|
||||
} else {
|
||||
cmd_buffer.map_address = object->dma_map_addr;
|
||||
cmf_buff.map_address = object->dma_map_addr;
|
||||
AddBufferMap(object->dma_map_addr, object->size, object->addr,
|
||||
object->status == nvmap::Object::Status::Allocated);
|
||||
}
|
||||
|
@ -191,10 +190,10 @@ NvResult nvhost_nvdec_common::UnmapBuffer(const std::vector<u8>& input, std::vec
|
|||
|
||||
auto& gpu = system.GPU();
|
||||
|
||||
for (auto& cmd_buffer : cmd_buffer_handles) {
|
||||
const auto object{nvmap_dev->GetObject(cmd_buffer.map_handle)};
|
||||
for (auto& cmf_buff : cmd_buffer_handles) {
|
||||
const auto object{nvmap_dev->GetObject(cmf_buff.map_handle)};
|
||||
if (!object) {
|
||||
LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmd_buffer.map_handle);
|
||||
LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmf_buff.map_handle);
|
||||
std::memcpy(output.data(), ¶ms, output.size());
|
||||
return NvResult::InvalidState;
|
||||
}
|
||||
|
|
|
@ -293,6 +293,7 @@ endif()
|
|||
if (MSVC)
|
||||
target_compile_options(video_core PRIVATE
|
||||
/we4267 # 'var' : conversion from 'size_t' to 'type', possible loss of data
|
||||
/we4244 # 'var' : conversion from integer to 'type', possible loss of data
|
||||
/we4456 # Declaration of 'identifier' hides previous local declaration
|
||||
/we4457 # Declaration of 'identifier' hides function parameter
|
||||
/we4458 # Declaration of 'identifier' hides class member
|
||||
|
|
|
@ -99,7 +99,7 @@ class BufferCache {
|
|||
};
|
||||
|
||||
public:
|
||||
static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = 4_KiB;
|
||||
static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB);
|
||||
|
||||
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
|
||||
Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
|
@ -109,8 +109,6 @@ public:
|
|||
|
||||
void TickFrame();
|
||||
|
||||
void RunGarbageCollector();
|
||||
|
||||
void WriteMemory(VAddr cpu_addr, u64 size);
|
||||
|
||||
void CachedWriteMemory(VAddr cpu_addr, u64 size);
|
||||
|
@ -197,6 +195,8 @@ private:
|
|||
((cpu_addr + size) & ~Core::Memory::PAGE_MASK);
|
||||
}
|
||||
|
||||
void RunGarbageCollector();
|
||||
|
||||
void BindHostIndexBuffer();
|
||||
|
||||
void BindHostVertexBuffers();
|
||||
|
@ -416,8 +416,9 @@ void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) {
|
|||
|
||||
template <class P>
|
||||
void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) {
|
||||
ForEachBufferInRange(cpu_addr, size,
|
||||
[&](BufferId, Buffer& buffer) { DownloadBufferMemory(buffer); });
|
||||
ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) {
|
||||
DownloadBufferMemory(buffer, cpu_addr, size);
|
||||
});
|
||||
}
|
||||
|
||||
template <class P>
|
||||
|
|
|
@ -14,10 +14,18 @@ extern "C" {
|
|||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wconversion"
|
||||
#endif
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable : 4242) // conversion from 'type' to 'type', possible loss of data
|
||||
#pragma warning(disable : 4244) // conversion from 'type' to 'type', possible loss of data
|
||||
#endif
|
||||
#include <libavcodec/avcodec.h>
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(pop)
|
||||
#endif
|
||||
}
|
||||
|
||||
namespace Tegra {
|
||||
|
|
|
@ -3,7 +3,28 @@
|
|||
// Refer to the license.txt file included.
|
||||
|
||||
#include <array>
|
||||
|
||||
extern "C" {
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wconversion"
|
||||
#endif
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(disable : 4244) // conversion from 'type' to 'type', possible loss of data
|
||||
#pragma warning(push)
|
||||
#endif
|
||||
#include <libswscale/swscale.h>
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(pop)
|
||||
#endif
|
||||
}
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
|
||||
#include "video_core/command_classes/nvdec.h"
|
||||
#include "video_core/command_classes/vic.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
|
@ -11,10 +32,6 @@
|
|||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
|
||||
extern "C" {
|
||||
#include <libswscale/swscale.h>
|
||||
}
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_)
|
||||
|
|
|
@ -55,8 +55,9 @@ size_t BytesPerIndex(VkIndexType index_type) {
|
|||
template <typename T>
|
||||
std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) {
|
||||
std::array<T, 6> indices{0, 1, 2, 0, 2, 3};
|
||||
std::ranges::transform(indices, indices.begin(),
|
||||
[quad, first](u32 index) { return first + index + quad * 4; });
|
||||
for (T& index : indices) {
|
||||
index = static_cast<T>(first + index + quad * 4);
|
||||
}
|
||||
return indices;
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
|
|
@ -111,9 +111,6 @@ public:
|
|||
/// Notify the cache that a new frame has been queued
|
||||
void TickFrame();
|
||||
|
||||
/// Runs the Garbage Collector.
|
||||
void RunGarbageCollector();
|
||||
|
||||
/// Return a constant reference to the given image view id
|
||||
[[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept;
|
||||
|
||||
|
@ -226,6 +223,9 @@ private:
|
|||
}
|
||||
}
|
||||
|
||||
/// Runs the Garbage Collector.
|
||||
void RunGarbageCollector();
|
||||
|
||||
/// Fills image_view_ids in the image views in indices
|
||||
void FillImageViews(DescriptorTable<TICEntry>& table,
|
||||
std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices,
|
||||
|
|
|
@ -842,9 +842,9 @@ void Device::CollectTelemetryParameters() {
|
|||
|
||||
void Device::CollectPhysicalMemoryInfo() {
|
||||
const auto mem_properties = physical.GetMemoryProperties();
|
||||
const std::size_t num_properties = mem_properties.memoryHeapCount;
|
||||
const size_t num_properties = mem_properties.memoryHeapCount;
|
||||
device_access_memory = 0;
|
||||
for (std::size_t element = 0; element < num_properties; element++) {
|
||||
for (size_t element = 0; element < num_properties; ++element) {
|
||||
if ((mem_properties.memoryHeaps[element].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) != 0) {
|
||||
device_access_memory += mem_properties.memoryHeaps[element].size;
|
||||
}
|
||||
|
|
|
@ -267,21 +267,22 @@ private:
|
|||
bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
|
||||
FormatType format_type) const;
|
||||
|
||||
VkInstance instance; ///< Vulkan instance.
|
||||
vk::DeviceDispatch dld; ///< Device function pointers.
|
||||
vk::PhysicalDevice physical; ///< Physical device.
|
||||
VkPhysicalDeviceProperties properties; ///< Device properties.
|
||||
vk::Device logical; ///< Logical device.
|
||||
vk::Queue graphics_queue; ///< Main graphics queue.
|
||||
vk::Queue present_queue; ///< Main present queue.
|
||||
u32 instance_version{}; ///< Vulkan onstance version.
|
||||
u32 graphics_family{}; ///< Main graphics queue family index.
|
||||
u32 present_family{}; ///< Main present queue family index.
|
||||
VkDriverIdKHR driver_id{}; ///< Driver ID.
|
||||
VkShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.ed
|
||||
bool is_optimal_astc_supported{}; ///< Support for native ASTC.
|
||||
bool is_float16_supported{}; ///< Support for float16 arithmetics.
|
||||
bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
|
||||
VkInstance instance; ///< Vulkan instance.
|
||||
vk::DeviceDispatch dld; ///< Device function pointers.
|
||||
vk::PhysicalDevice physical; ///< Physical device.
|
||||
VkPhysicalDeviceProperties properties; ///< Device properties.
|
||||
vk::Device logical; ///< Logical device.
|
||||
vk::Queue graphics_queue; ///< Main graphics queue.
|
||||
vk::Queue present_queue; ///< Main present queue.
|
||||
u32 instance_version{}; ///< Vulkan onstance version.
|
||||
u32 graphics_family{}; ///< Main graphics queue family index.
|
||||
u32 present_family{}; ///< Main present queue family index.
|
||||
VkDriverIdKHR driver_id{}; ///< Driver ID.
|
||||
VkShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.
|
||||
u64 device_access_memory{}; ///< Total size of device local memory in bytes.
|
||||
bool is_optimal_astc_supported{}; ///< Support for native ASTC.
|
||||
bool is_float16_supported{}; ///< Support for float16 arithmetics.
|
||||
bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
|
||||
bool is_formatless_image_load_supported{}; ///< Support for shader image read without format.
|
||||
bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images.
|
||||
bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil.
|
||||
|
@ -312,8 +313,6 @@ private:
|
|||
|
||||
/// Nsight Aftermath GPU crash tracker
|
||||
std::unique_ptr<NsightAftermathTracker> nsight_aftermath_tracker;
|
||||
|
||||
u64 device_access_memory;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
Loading…
Reference in a new issue