early-access version 1792
This commit is contained in:
parent
353c0bee63
commit
d3ce8c0322
17 changed files with 124 additions and 50 deletions
|
@ -1,7 +1,7 @@
|
|||
yuzu emulator early access
|
||||
=============
|
||||
|
||||
This is the source code for early-access 1791.
|
||||
This is the source code for early-access 1792.
|
||||
|
||||
## Legal Notice
|
||||
|
||||
|
|
|
@ -24,6 +24,7 @@ enum : u64 {
|
|||
Size_128_MB = 128ULL * Size_1_MB,
|
||||
Size_448_MB = 448ULL * Size_1_MB,
|
||||
Size_507_MB = 507ULL * Size_1_MB,
|
||||
Size_512_MB = 512ULL * Size_1_MB,
|
||||
Size_562_MB = 562ULL * Size_1_MB,
|
||||
Size_1554_MB = 1554ULL * Size_1_MB,
|
||||
Size_2048_MB = 2048ULL * Size_1_MB,
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
#include <boost/container/small_vector.hpp>
|
||||
|
||||
#include "common/common_sizes.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/div_ceil.h"
|
||||
#include "common/microprofile.h"
|
||||
|
@ -65,8 +66,8 @@ class BufferCache {
|
|||
|
||||
static constexpr BufferId NULL_BUFFER_ID{0};
|
||||
|
||||
static constexpr u64 expected_memory = 512ULL * 1024ULL * 1024ULL;
|
||||
static constexpr u64 critical_memory = 1024ULL * 1024ULL * 1024ULL;
|
||||
static constexpr u64 EXPECTED_MEMORY = Common::Size_512_MB;
|
||||
static constexpr u64 CRITICAL_MEMORY = Common::Size_1_GB;
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
|
@ -105,6 +106,8 @@ public:
|
|||
|
||||
void TickFrame();
|
||||
|
||||
void RunGarbageCollector();
|
||||
|
||||
void WriteMemory(VAddr cpu_addr, u64 size);
|
||||
|
||||
void CachedWriteMemory(VAddr cpu_addr, u64 size);
|
||||
|
@ -349,30 +352,8 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
|
|||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::TickFrame() {
|
||||
const bool enabled_gc = Settings::values.use_caches_gc.GetValue();
|
||||
SCOPE_EXIT({
|
||||
++frame_tick;
|
||||
delayed_destruction_ring.Tick();
|
||||
});
|
||||
// Calculate hits and shots and move hit bits to the right
|
||||
const u32 hits = std::reduce(uniform_cache_hits.begin(), uniform_cache_hits.end());
|
||||
const u32 shots = std::reduce(uniform_cache_shots.begin(), uniform_cache_shots.end());
|
||||
std::copy_n(uniform_cache_hits.begin(), uniform_cache_hits.size() - 1,
|
||||
uniform_cache_hits.begin() + 1);
|
||||
std::copy_n(uniform_cache_shots.begin(), uniform_cache_shots.size() - 1,
|
||||
uniform_cache_shots.begin() + 1);
|
||||
uniform_cache_hits[0] = 0;
|
||||
uniform_cache_shots[0] = 0;
|
||||
|
||||
const bool skip_preferred = hits * 256 < shots * 251;
|
||||
uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0;
|
||||
|
||||
const bool activate_gc = enabled_gc && total_used_memory >= expected_memory;
|
||||
if (!activate_gc) {
|
||||
return;
|
||||
}
|
||||
const bool aggressive_gc = total_used_memory >= critical_memory;
|
||||
void BufferCache<P>::RunGarbageCollector() {
|
||||
const bool aggressive_gc = total_used_memory >= CRITICAL_MEMORY;
|
||||
const u64 ticks_to_destroy = aggressive_gc ? 60 : 120;
|
||||
int num_iterations = aggressive_gc ? 64 : 32;
|
||||
for (; num_iterations > 0; --num_iterations) {
|
||||
|
@ -391,6 +372,28 @@ void BufferCache<P>::TickFrame() {
|
|||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::TickFrame() {
|
||||
// Calculate hits and shots and move hit bits to the right
|
||||
const u32 hits = std::reduce(uniform_cache_hits.begin(), uniform_cache_hits.end());
|
||||
const u32 shots = std::reduce(uniform_cache_shots.begin(), uniform_cache_shots.end());
|
||||
std::copy_n(uniform_cache_hits.begin(), uniform_cache_hits.size() - 1,
|
||||
uniform_cache_hits.begin() + 1);
|
||||
std::copy_n(uniform_cache_shots.begin(), uniform_cache_shots.size() - 1,
|
||||
uniform_cache_shots.begin() + 1);
|
||||
uniform_cache_hits[0] = 0;
|
||||
uniform_cache_shots[0] = 0;
|
||||
|
||||
const bool skip_preferred = hits * 256 < shots * 251;
|
||||
uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0;
|
||||
|
||||
if (Settings::values.use_caches_gc.GetValue() && total_used_memory >= EXPECTED_MEMORY) {
|
||||
RunGarbageCollector();
|
||||
}
|
||||
++frame_tick;
|
||||
delayed_destruction_ring.Tick();
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) {
|
||||
ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) {
|
||||
|
|
|
@ -235,6 +235,7 @@ struct TextureCacheParams {
|
|||
static constexpr bool ENABLE_VALIDATION = true;
|
||||
static constexpr bool FRAMEBUFFER_BLITS = true;
|
||||
static constexpr bool HAS_EMULATED_COPIES = true;
|
||||
static constexpr bool HAS_DEVICE_MEMORY_INFO = false;
|
||||
|
||||
using Runtime = OpenGL::TextureCacheRuntime;
|
||||
using Image = OpenGL::Image;
|
||||
|
|
|
@ -818,6 +818,10 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
|
|||
});
|
||||
}
|
||||
|
||||
u64 TextureCacheRuntime::GetDeviceLocalMemory() const {
|
||||
return device.GetDeviceLocalMemory();
|
||||
}
|
||||
|
||||
Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_,
|
||||
VAddr cpu_addr_)
|
||||
: VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler},
|
||||
|
|
|
@ -97,6 +97,8 @@ struct TextureCacheRuntime {
|
|||
// All known Vulkan drivers can natively handle BGR textures
|
||||
return true;
|
||||
}
|
||||
|
||||
u64 GetDeviceLocalMemory() const;
|
||||
};
|
||||
|
||||
class Image : public VideoCommon::ImageBase {
|
||||
|
@ -257,6 +259,7 @@ struct TextureCacheParams {
|
|||
static constexpr bool ENABLE_VALIDATION = true;
|
||||
static constexpr bool FRAMEBUFFER_BLITS = false;
|
||||
static constexpr bool HAS_EMULATED_COPIES = false;
|
||||
static constexpr bool HAS_DEVICE_MEMORY_INFO = true;
|
||||
|
||||
using Runtime = Vulkan::TextureCacheRuntime;
|
||||
using Image = Vulkan::Image;
|
||||
|
|
|
@ -283,4 +283,11 @@ std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) {
|
|||
return {DefaultBlockWidth(format), DefaultBlockHeight(format)};
|
||||
}
|
||||
|
||||
u64 EstimatedDecompressedSize(u64 base_size, PixelFormat format) {
|
||||
constexpr u64 RGBA8_PIXEL_SIZE = 4;
|
||||
const u64 base_block_size = static_cast<u64>(DefaultBlockWidth(format)) *
|
||||
static_cast<u64>(DefaultBlockHeight(format)) * RGBA8_PIXEL_SIZE;
|
||||
return (base_size * base_block_size) / BytesPerBlock(format);
|
||||
}
|
||||
|
||||
} // namespace VideoCore::Surface
|
||||
|
|
|
@ -462,4 +462,6 @@ bool IsPixelFormatSRGB(PixelFormat format);
|
|||
|
||||
std::pair<u32, u32> GetASTCBlockSize(PixelFormat format);
|
||||
|
||||
u64 EstimatedDecompressedSize(u64 base_size, PixelFormat format);
|
||||
|
||||
} // namespace VideoCore::Surface
|
||||
|
|
|
@ -79,7 +79,7 @@ public:
|
|||
Iterator(SlotVector<T>* slot_vector_, SlotId id_) noexcept
|
||||
: slot_vector{slot_vector_}, id{id_} {}
|
||||
|
||||
bool IsValid(const u64* bitset) noexcept {
|
||||
bool IsValid(const u64* bitset) const noexcept {
|
||||
return ((bitset[id.index / 64] >> (id.index % 64)) & 1) != 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
|
||||
#include "common/alignment.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_sizes.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/settings.h"
|
||||
|
@ -70,14 +71,16 @@ class TextureCache {
|
|||
static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS;
|
||||
/// True when some copies have to be emulated
|
||||
static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES;
|
||||
/// True when the API can provide info about the memory of the device.
|
||||
static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
|
||||
|
||||
/// Image view ID for null descriptors
|
||||
static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0};
|
||||
/// Sampler ID for bugged sampler ids
|
||||
static constexpr SamplerId NULL_SAMPLER_ID{0};
|
||||
|
||||
static constexpr u64 expected_memory = 1024ULL * 1024ULL * 1024ULL;
|
||||
static constexpr u64 critical_memory = 2 * 1024ULL * 1024ULL * 1024ULL;
|
||||
static constexpr u64 DEFAULT_EXPECTED_MEMORY = Common::Size_1_GB;
|
||||
static constexpr u64 DEFAULT_CRITICAL_MEMORY = Common::Size_2_GB;
|
||||
|
||||
using Runtime = typename P::Runtime;
|
||||
using Image = typename P::Image;
|
||||
|
@ -107,6 +110,9 @@ public:
|
|||
/// Notify the cache that a new frame has been queued
|
||||
void TickFrame();
|
||||
|
||||
/// Runs the Garbage Collector.
|
||||
void RunGarbageCollector();
|
||||
|
||||
/// Return a constant reference to the given image view id
|
||||
[[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept;
|
||||
|
||||
|
@ -338,6 +344,8 @@ private:
|
|||
|
||||
bool has_deleted_images = false;
|
||||
u64 total_used_memory = 0;
|
||||
u64 expected_memory;
|
||||
u64 critical_memory;
|
||||
|
||||
SlotVector<Image> slot_images;
|
||||
SlotVector<ImageView> slot_image_views;
|
||||
|
@ -381,19 +389,21 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
|
|||
void(slot_samplers.insert(runtime, sampler_descriptor));
|
||||
|
||||
deletion_iterator = slot_images.begin();
|
||||
|
||||
if constexpr (HAS_DEVICE_MEMORY_INFO) {
|
||||
const auto device_memory = runtime.GetDeviceLocalMemory();
|
||||
const u64 possible_expected_memory = (device_memory * 3) / 10;
|
||||
const u64 possible_critical_memory = (device_memory * 6) / 10;
|
||||
expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY);
|
||||
critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY);
|
||||
} else {
|
||||
expected_memory = DEFAULT_EXPECTED_MEMORY;
|
||||
critical_memory = DEFAULT_CRITICAL_MEMORY;
|
||||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::TickFrame() {
|
||||
const bool enabled_gc = Settings::values.use_caches_gc.GetValue();
|
||||
if (!enabled_gc) {
|
||||
// @Note(Blinkhawk): compile error with SCOPE_EXIT on msvc.
|
||||
sentenced_images.Tick();
|
||||
sentenced_framebuffers.Tick();
|
||||
sentenced_image_view.Tick();
|
||||
++frame_tick;
|
||||
return;
|
||||
}
|
||||
void TextureCache<P>::RunGarbageCollector() {
|
||||
const bool high_priority_mode = total_used_memory >= expected_memory;
|
||||
const bool aggressive_mode = total_used_memory >= critical_memory;
|
||||
const u64 ticks_to_destroy = high_priority_mode ? 60 : 100;
|
||||
|
@ -450,11 +460,18 @@ void TextureCache<P>::TickFrame() {
|
|||
UnregisterImage(image_id);
|
||||
DeleteImage(image_id);
|
||||
if (is_bad_overlap) {
|
||||
num_iterations++;
|
||||
++num_iterations;
|
||||
}
|
||||
}
|
||||
++deletion_iterator;
|
||||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::TickFrame() {
|
||||
if (Settings::values.use_caches_gc.GetValue()) {
|
||||
RunGarbageCollector();
|
||||
}
|
||||
sentenced_images.Tick();
|
||||
sentenced_framebuffers.Tick();
|
||||
sentenced_image_view.Tick();
|
||||
|
@ -1276,8 +1293,13 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
|
|||
image.flags |= ImageFlagBits::Registered;
|
||||
ForEachPage(image.cpu_addr, image.guest_size_bytes,
|
||||
[this, image_id](u64 page) { page_table[page].push_back(image_id); });
|
||||
total_used_memory +=
|
||||
Common::AlignUp(std::max(image.guest_size_bytes, image.unswizzled_size_bytes), 1024);
|
||||
u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
|
||||
if ((IsPixelFormatASTC(image.info.format) &&
|
||||
True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
|
||||
True(image.flags & ImageFlagBits::Converted)) {
|
||||
tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
|
||||
}
|
||||
total_used_memory += Common::AlignUp(tentative_size, 1024);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
|
@ -1287,8 +1309,13 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
|
|||
"Trying to unregister an already registered image");
|
||||
image.flags &= ~ImageFlagBits::Registered;
|
||||
image.flags &= ~ImageFlagBits::BadOverlap;
|
||||
total_used_memory -=
|
||||
Common::AlignUp(std::max(image.guest_size_bytes, image.unswizzled_size_bytes), 1024);
|
||||
u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
|
||||
if ((IsPixelFormatASTC(image.info.format) &&
|
||||
True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
|
||||
True(image.flags & ImageFlagBits::Converted)) {
|
||||
tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
|
||||
}
|
||||
total_used_memory -= Common::AlignUp(tentative_size, 1024);
|
||||
ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) {
|
||||
const auto page_it = page_table.find(page);
|
||||
if (page_it == page_table.end()) {
|
||||
|
|
|
@ -408,6 +408,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
|||
}
|
||||
logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld);
|
||||
|
||||
CollectPhysicalMemoryInfo();
|
||||
CollectTelemetryParameters();
|
||||
CollectToolingInfo();
|
||||
|
||||
|
@ -818,6 +819,19 @@ void Device::CollectTelemetryParameters() {
|
|||
}
|
||||
}
|
||||
|
||||
void Device::CollectPhysicalMemoryInfo() {
|
||||
const auto mem_properties = physical.GetMemoryProperties();
|
||||
const std::size_t num_properties = mem_properties.memoryTypeCount;
|
||||
device_access_memory = 0;
|
||||
for (std::size_t element = 0; element < num_properties; element++) {
|
||||
if ((mem_properties.memoryTypes[element].propertyFlags &
|
||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0) {
|
||||
const std::size_t heap_index = mem_properties.memoryTypes[element].heapIndex;
|
||||
device_access_memory += mem_properties.memoryHeaps[heap_index].size;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Device::CollectToolingInfo() {
|
||||
if (!ext_tooling_info) {
|
||||
return;
|
||||
|
|
|
@ -225,6 +225,10 @@ public:
|
|||
return use_asynchronous_shaders;
|
||||
}
|
||||
|
||||
u64 GetDeviceLocalMemory() const {
|
||||
return device_access_memory;
|
||||
}
|
||||
|
||||
private:
|
||||
/// Checks if the physical device is suitable.
|
||||
void CheckSuitability(bool requires_swapchain) const;
|
||||
|
@ -244,6 +248,9 @@ private:
|
|||
/// Collects information about attached tools.
|
||||
void CollectToolingInfo();
|
||||
|
||||
/// Collects information about the device's local memory.
|
||||
void CollectPhysicalMemoryInfo();
|
||||
|
||||
/// Returns a list of queue initialization descriptors.
|
||||
std::vector<VkDeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const;
|
||||
|
||||
|
@ -302,6 +309,8 @@ private:
|
|||
|
||||
/// Nsight Aftermath GPU crash tracker
|
||||
std::unique_ptr<NsightAftermathTracker> nsight_aftermath_tracker;
|
||||
|
||||
u64 device_access_memory;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -221,7 +221,7 @@ const std::array<int, Settings::NativeKeyboard::NumKeyboardMods> Config::default
|
|||
// This must be in alphabetical order according to action name as it must have the same order as
|
||||
// UISetting::values.shortcuts, which is alphabetically ordered.
|
||||
// clang-format off
|
||||
const std::array<UISettings::Shortcut, 17> Config::default_hotkeys{{
|
||||
const std::array<UISettings::Shortcut, 18> Config::default_hotkeys{{
|
||||
{QStringLiteral("Capture Screenshot"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+P"), Qt::WidgetWithChildrenShortcut}},
|
||||
{QStringLiteral("Change Docked Mode"), QStringLiteral("Main Window"), {QStringLiteral("F10"), Qt::ApplicationShortcut}},
|
||||
{QStringLiteral("Continue/Pause Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F4"), Qt::WindowShortcut}},
|
||||
|
@ -238,6 +238,7 @@ const std::array<UISettings::Shortcut, 17> Config::default_hotkeys{{
|
|||
{QStringLiteral("Toggle Filter Bar"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+F"), Qt::WindowShortcut}},
|
||||
{QStringLiteral("Toggle Mouse Panning"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+F9"), Qt::ApplicationShortcut}},
|
||||
{QStringLiteral("Toggle Speed Limit"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+Z"), Qt::ApplicationShortcut}},
|
||||
{QStringLiteral("Toggle Frame Limiter"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+U"), Qt::ApplicationShortcut}},
|
||||
{QStringLiteral("Toggle Status Bar"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+S"), Qt::WindowShortcut}},
|
||||
}};
|
||||
// clang-format on
|
||||
|
|
|
@ -42,7 +42,7 @@ public:
|
|||
default_mouse_buttons;
|
||||
static const std::array<int, Settings::NativeKeyboard::NumKeyboardKeys> default_keyboard_keys;
|
||||
static const std::array<int, Settings::NativeKeyboard::NumKeyboardMods> default_keyboard_mods;
|
||||
static const std::array<UISettings::Shortcut, 17> default_hotkeys;
|
||||
static const std::array<UISettings::Shortcut, 18> default_hotkeys;
|
||||
|
||||
private:
|
||||
void Initialize(const std::string& config_name);
|
||||
|
|
|
@ -109,7 +109,7 @@
|
|||
<string>Enables garbage collection for the GPU caches, this will try to keep VRAM within 3-4 GB by flushing the least used textures/buffers. May cause issues in a few games.</string>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>Enable GPU caches garbage collection (unsafe)</string>
|
||||
<string>Enable GPU cache garbage collection (unsafe)</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
|
|
|
@ -1025,7 +1025,9 @@ void GMainWindow::InitializeHotkeys() {
|
|||
connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Mute Audio"), this),
|
||||
&QShortcut::activated, this,
|
||||
[] { Settings::values.audio_muted = !Settings::values.audio_muted; });
|
||||
|
||||
connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Toggle Frame Limiter"), this),
|
||||
&QShortcut::activated, this,
|
||||
[] { Settings::values.unlimit_fps = !Settings::values.unlimit_fps; });
|
||||
connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Toggle Mouse Panning"), this),
|
||||
&QShortcut::activated, this, [&] {
|
||||
Settings::values.mouse_panning = !Settings::values.mouse_panning;
|
||||
|
|
|
@ -227,7 +227,7 @@ use_asynchronous_gpu_emulation =
|
|||
# 0: Off, 1 (default): On
|
||||
use_vsync =
|
||||
|
||||
# Whether to use garbage collection or not.
|
||||
# Whether to use garbage collection or not for GPU caches.
|
||||
# 0 (default): Off, 1: On
|
||||
use_caches_gc =
|
||||
|
||||
|
|
Loading…
Reference in a new issue