From d15d58f409fd8c74171a423129b27d378f8f602d Mon Sep 17 00:00:00 2001 From: pineappleEA Date: Sun, 3 Apr 2022 03:41:15 +0200 Subject: [PATCH] early-access version 2639 --- README.md | 2 +- externals/CMakeLists.txt | 3 - src/audio_core/CMakeLists.txt | 3 - src/audio_core/cubeb_sink.cpp | 25 +---- src/audio_core/sdl2_sink.cpp | 4 +- src/common/atomic_ops.h | 73 +++++++++++++ src/common/settings.h | 3 +- src/common/x64/native_clock.cpp | 10 +- src/core/arm/dynarmic/arm_dynarmic_32.cpp | 50 +++++---- src/core/arm/dynarmic/arm_dynarmic_64.cpp | 50 +++++---- .../nvflinger/buffer_queue_consumer.cpp | 44 ++++++-- .../service/nvflinger/buffer_queue_consumer.h | 4 +- .../service/nvflinger/buffer_queue_core.cpp | 1 - .../hle/service/nvflinger/buffer_queue_core.h | 2 - .../nvflinger/buffer_queue_producer.cpp | 103 +++++++----------- .../service/nvflinger/buffer_queue_producer.h | 2 +- .../hle/service/nvflinger/consumer_base.cpp | 29 ++--- .../hle/service/nvflinger/consumer_base.h | 3 +- src/core/hle/service/nvflinger/nvflinger.cpp | 4 +- src/video_core/memory_manager.cpp | 26 ++--- src/video_core/memory_manager.h | 1 - src/video_core/texture_cache/texture_cache.h | 19 ++-- .../texture_cache/texture_cache_base.h | 3 + src/yuzu/configuration/configure_cpu.ui | 5 + src/yuzu_cmd/default_ini.h | 6 - 25 files changed, 272 insertions(+), 203 deletions(-) diff --git a/README.md b/README.md index 049dfb30e..fd9807a9a 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 2637. +This is the source code for early-access 2639. ## Legal Notice diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 82e8ef18c..64361de5f 100755 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -68,9 +68,6 @@ if (YUZU_USE_EXTERNAL_SDL2) add_library(SDL2 ALIAS SDL2-static) endif() -# SoundTouch -add_subdirectory(soundtouch) - # Cubeb if(ENABLE_CUBEB) set(BUILD_TESTS OFF CACHE BOOL "") diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt index 090dd19b1..e553b8203 100755 --- a/src/audio_core/CMakeLists.txt +++ b/src/audio_core/CMakeLists.txt @@ -36,8 +36,6 @@ add_library(audio_core STATIC splitter_context.h stream.cpp stream.h - time_stretch.cpp - time_stretch.h voice_context.cpp voice_context.h @@ -63,7 +61,6 @@ if (NOT MSVC) endif() target_link_libraries(audio_core PUBLIC common core) -target_link_libraries(audio_core PRIVATE SoundTouch) if(ENABLE_CUBEB) target_link_libraries(audio_core PRIVATE cubeb) diff --git a/src/audio_core/cubeb_sink.cpp b/src/audio_core/cubeb_sink.cpp index 93c35e785..13de3087c 100755 --- a/src/audio_core/cubeb_sink.cpp +++ b/src/audio_core/cubeb_sink.cpp @@ -7,7 +7,6 @@ #include #include "audio_core/cubeb_sink.h" #include "audio_core/stream.h" -#include "audio_core/time_stretch.h" #include "common/assert.h" #include "common/logging/log.h" #include "common/ring_buffer.h" @@ -23,8 +22,7 @@ class CubebSinkStream final : public SinkStream { public: CubebSinkStream(cubeb* ctx_, u32 sample_rate, u32 num_channels_, cubeb_devid output_device, const std::string& name) - : ctx{ctx_}, num_channels{std::min(num_channels_, 6u)}, time_stretch{sample_rate, - num_channels} { + : ctx{ctx_}, num_channels{std::min(num_channels_, 6u)} { cubeb_stream_params params{}; params.rate = sample_rate; @@ -131,7 +129,6 @@ private: Common::RingBuffer queue; std::array last_frame{}; std::atomic should_flush{}; - TimeStretcher time_stretch; static long DataCallback(cubeb_stream* stream, void* user_data, const void* input_buffer, void* output_buffer, long num_frames); @@ -205,25 +202,7 @@ long CubebSinkStream::DataCallback([[maybe_unused]] cubeb_stream* stream, void* const std::size_t num_channels = impl->GetNumChannels(); const std::size_t samples_to_write = num_channels * num_frames; - std::size_t samples_written; - - /* - if (Settings::values.enable_audio_stretching.GetValue()) { - const std::vector in{impl->queue.Pop()}; - const std::size_t num_in{in.size() / num_channels}; - s16* const out{reinterpret_cast(buffer)}; - const std::size_t out_frames = - impl->time_stretch.Process(in.data(), num_in, out, num_frames); - samples_written = out_frames * num_channels; - - if (impl->should_flush) { - impl->time_stretch.Flush(); - impl->should_flush = false; - } - } else { - samples_written = impl->queue.Pop(buffer, samples_to_write); - }*/ - samples_written = impl->queue.Pop(buffer, samples_to_write); + const std::size_t samples_written = impl->queue.Pop(buffer, samples_to_write); if (samples_written >= num_channels) { std::memcpy(&impl->last_frame[0], buffer + (samples_written - num_channels) * sizeof(s16), diff --git a/src/audio_core/sdl2_sink.cpp b/src/audio_core/sdl2_sink.cpp index 62d3716a6..2d14ce2cb 100755 --- a/src/audio_core/sdl2_sink.cpp +++ b/src/audio_core/sdl2_sink.cpp @@ -7,7 +7,6 @@ #include #include "audio_core/sdl2_sink.h" #include "audio_core/stream.h" -#include "audio_core/time_stretch.h" #include "common/assert.h" #include "common/logging/log.h" //#include "common/settings.h" @@ -27,7 +26,7 @@ namespace AudioCore { class SDLSinkStream final : public SinkStream { public: SDLSinkStream(u32 sample_rate, u32 num_channels_, const std::string& output_device) - : num_channels{std::min(num_channels_, 6u)}, time_stretch{sample_rate, num_channels} { + : num_channels{std::min(num_channels_, 6u)} { SDL_AudioSpec spec; spec.freq = sample_rate; @@ -116,7 +115,6 @@ private: SDL_AudioDeviceID dev = 0; u32 num_channels{}; std::atomic should_flush{}; - TimeStretcher time_stretch; }; SDLSink::SDLSink(std::string_view target_device_name) { diff --git a/src/common/atomic_ops.h b/src/common/atomic_ops.h index b963e7b99..69fde8421 100755 --- a/src/common/atomic_ops.h +++ b/src/common/atomic_ops.h @@ -46,6 +46,43 @@ namespace Common { reinterpret_cast<__int64*>(expected.data())) != 0; } +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected, + u8& actual) { + actual = + _InterlockedCompareExchange8(reinterpret_cast(pointer), value, expected); + return actual == expected; +} + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected, + u16& actual) { + actual = + _InterlockedCompareExchange16(reinterpret_cast(pointer), value, expected); + return actual == expected; +} + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected, + u32& actual) { + actual = + _InterlockedCompareExchange(reinterpret_cast(pointer), value, expected); + return actual == expected; +} + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected, + u64& actual) { + actual = _InterlockedCompareExchange64(reinterpret_cast(pointer), value, + expected); + return actual == expected; +} + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected, + u128& actual) { + const bool result = + _InterlockedCompareExchange128(reinterpret_cast(pointer), value[1], + value[0], reinterpret_cast<__int64*>(expected.data())) != 0; + actual = expected; + return result; +} + [[nodiscard]] inline u128 AtomicLoad128(volatile u64* pointer) { u128 result{}; _InterlockedCompareExchange128(reinterpret_cast(pointer), result[1], @@ -79,6 +116,42 @@ namespace Common { return __sync_bool_compare_and_swap((unsigned __int128*)pointer, expected_a, value_a); } +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected, + u8& actual) { + actual = __sync_val_compare_and_swap(pointer, expected, value); + return actual == expected; +} + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected, + u16& actual) { + actual = __sync_val_compare_and_swap(pointer, expected, value); + return actual == expected; +} + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected, + u32& actual) { + actual = __sync_val_compare_and_swap(pointer, expected, value); + return actual == expected; +} + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected, + u64& actual) { + actual = __sync_val_compare_and_swap(pointer, expected, value); + return actual == expected; +} + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected, + u128& actual) { + unsigned __int128 value_a; + unsigned __int128 expected_a; + unsigned __int128 actual_a; + std::memcpy(&value_a, value.data(), sizeof(u128)); + std::memcpy(&expected_a, expected.data(), sizeof(u128)); + actual_a = __sync_val_compare_and_swap((unsigned __int128*)pointer, expected_a, value_a); + std::memcpy(actual.data(), &actual_a, sizeof(u128)); + return actual_a == expected_a; +} + [[nodiscard]] inline u128 AtomicLoad128(volatile u64* pointer) { unsigned __int128 zeros_a = 0; unsigned __int128 result_a = diff --git a/src/common/settings.h b/src/common/settings.h index 3de4ba1a7..3b7be63b3 100755 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -38,6 +38,7 @@ enum class CPUAccuracy : u32 { Auto = 0, Accurate = 1, Unsafe = 2, + Paranoid = 3, }; enum class FullscreenMode : u32 { @@ -470,7 +471,7 @@ struct Values { // Cpu RangedSetting cpu_accuracy{CPUAccuracy::Auto, CPUAccuracy::Auto, - CPUAccuracy::Unsafe, "cpu_accuracy"}; + CPUAccuracy::Paranoid, "cpu_accuracy"}; // TODO: remove cpu_accuracy_first_time, migration setting added 8 July 2021 BasicSetting cpu_accuracy_first_time{true, "cpu_accuracy_first_time"}; BasicSetting cpu_debug_mode{false, "cpu_debug_mode"}; diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index 2a2664e5d..7a3f21dcf 100755 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp @@ -55,8 +55,9 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen u64 NativeClock::GetRTSC() { TimePoint new_time_point{}; TimePoint current_time_point{}; + + current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); do { - current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); _mm_mfence(); const u64 current_measure = __rdtsc(); u64 diff = current_measure - current_time_point.inner.last_measure; @@ -66,7 +67,7 @@ u64 NativeClock::GetRTSC() { : current_time_point.inner.last_measure; new_time_point.inner.accumulated_ticks = current_time_point.inner.accumulated_ticks + diff; } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, - current_time_point.pack)); + current_time_point.pack, current_time_point.pack)); /// The clock cannot be more precise than the guest timer, remove the lower bits return new_time_point.inner.accumulated_ticks & inaccuracy_mask; } @@ -75,13 +76,14 @@ void NativeClock::Pause(bool is_paused) { if (!is_paused) { TimePoint current_time_point{}; TimePoint new_time_point{}; + + current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); do { - current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); new_time_point.pack = current_time_point.pack; _mm_mfence(); new_time_point.inner.last_measure = __rdtsc(); } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, - current_time_point.pack)); + current_time_point.pack, current_time_point.pack)); } } diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index 286976623..d4cbd0c20 100755 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -186,35 +186,41 @@ std::shared_ptr ARM_Dynarmic_32::MakeJit(Common::PageTable* if (!Settings::values.cpuopt_recompile_exclusives) { config.recompile_on_exclusive_fastmem_failure = false; } - } + } else { + // Unsafe optimizations + if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Unsafe) { + config.unsafe_optimizations = true; + if (Settings::values.cpuopt_unsafe_unfuse_fma) { + config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA; + } + if (Settings::values.cpuopt_unsafe_reduce_fp_error) { + config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP; + } + if (Settings::values.cpuopt_unsafe_ignore_standard_fpcr) { + config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreStandardFPCRValue; + } + if (Settings::values.cpuopt_unsafe_inaccurate_nan) { + config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN; + } + if (Settings::values.cpuopt_unsafe_ignore_global_monitor) { + config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor; + } + } - // Unsafe optimizations - if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Unsafe) { - config.unsafe_optimizations = true; - if (Settings::values.cpuopt_unsafe_unfuse_fma) { + // Curated optimizations + if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Auto) { + config.unsafe_optimizations = true; config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA; - } - if (Settings::values.cpuopt_unsafe_reduce_fp_error) { - config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP; - } - if (Settings::values.cpuopt_unsafe_ignore_standard_fpcr) { config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreStandardFPCRValue; - } - if (Settings::values.cpuopt_unsafe_inaccurate_nan) { config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN; - } - if (Settings::values.cpuopt_unsafe_ignore_global_monitor) { config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor; } - } - // Curated optimizations - if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Auto) { - config.unsafe_optimizations = true; - config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA; - config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreStandardFPCRValue; - config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN; - config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor; + // Paranoia mode for debugging optimizations + if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Paranoid) { + config.unsafe_optimizations = false; + config.optimizations = Dynarmic::no_optimizations; + } } return std::make_unique(config); diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 24107f9f6..6d5a1ecfd 100755 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -248,35 +248,41 @@ std::shared_ptr ARM_Dynarmic_64::MakeJit(Common::PageTable* if (!Settings::values.cpuopt_recompile_exclusives) { config.recompile_on_exclusive_fastmem_failure = false; } - } + } else { + // Unsafe optimizations + if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Unsafe) { + config.unsafe_optimizations = true; + if (Settings::values.cpuopt_unsafe_unfuse_fma) { + config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA; + } + if (Settings::values.cpuopt_unsafe_reduce_fp_error) { + config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP; + } + if (Settings::values.cpuopt_unsafe_inaccurate_nan) { + config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN; + } + if (Settings::values.cpuopt_unsafe_fastmem_check) { + config.fastmem_address_space_bits = 64; + } + if (Settings::values.cpuopt_unsafe_ignore_global_monitor) { + config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor; + } + } - // Unsafe optimizations - if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Unsafe) { - config.unsafe_optimizations = true; - if (Settings::values.cpuopt_unsafe_unfuse_fma) { + // Curated optimizations + if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Auto) { + config.unsafe_optimizations = true; config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA; - } - if (Settings::values.cpuopt_unsafe_reduce_fp_error) { - config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP; - } - if (Settings::values.cpuopt_unsafe_inaccurate_nan) { config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN; - } - if (Settings::values.cpuopt_unsafe_fastmem_check) { config.fastmem_address_space_bits = 64; - } - if (Settings::values.cpuopt_unsafe_ignore_global_monitor) { config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor; } - } - // Curated optimizations - if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Auto) { - config.unsafe_optimizations = true; - config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA; - config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN; - config.fastmem_address_space_bits = 64; - config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor; + // Paranoia mode for debugging optimizations + if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Paranoid) { + config.unsafe_optimizations = false; + config.optimizations = Dynarmic::no_optimizations; + } } return std::make_shared(config); diff --git a/src/core/hle/service/nvflinger/buffer_queue_consumer.cpp b/src/core/hle/service/nvflinger/buffer_queue_consumer.cpp index 41fbba219..c527c577e 100755 --- a/src/core/hle/service/nvflinger/buffer_queue_consumer.cpp +++ b/src/core/hle/service/nvflinger/buffer_queue_consumer.cpp @@ -18,8 +18,7 @@ BufferQueueConsumer::BufferQueueConsumer(std::shared_ptr core_) BufferQueueConsumer::~BufferQueueConsumer() = default; Status BufferQueueConsumer::AcquireBuffer(BufferItem* out_buffer, - std::chrono::nanoseconds expected_present, - u64 max_frame_number) { + std::chrono::nanoseconds expected_present) { std::scoped_lock lock(core->mutex); // Check that the consumer doesn't currently have the maximum number of buffers acquired. @@ -50,12 +49,6 @@ Status BufferQueueConsumer::AcquireBuffer(BufferItem* out_buffer, while (core->queue.size() > 1 && !core->queue[0].is_auto_timestamp) { const auto& buffer_item{core->queue[1]}; - // If dropping entry[0] would leave us with a buffer that the consumer is not yet ready - // for, don't drop it. - if (max_frame_number && buffer_item.frame_number > max_frame_number) { - break; - } - // If entry[1] is timely, drop entry[0] (and repeat). const auto desired_present = buffer_item.timestamp; if (desired_present < expected_present.count() - MAX_REASONABLE_NSEC || @@ -200,4 +193,39 @@ Status BufferQueueConsumer::Connect(std::shared_ptr consumer_ return Status::NoError; } +Status BufferQueueConsumer::GetReleasedBuffers(u64* out_slot_mask) { + if (out_slot_mask == nullptr) { + LOG_ERROR(Service_NVFlinger, "out_slot_mask may not be nullptr"); + return Status::BadValue; + } + + std::scoped_lock lock(core->mutex); + + if (core->is_abandoned) { + LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned"); + return Status::NoInit; + } + + u64 mask = 0; + for (int s = 0; s < BufferQueueDefs::NUM_BUFFER_SLOTS; ++s) { + if (!slots[s].acquire_called) { + mask |= (1ULL << s); + } + } + + // Remove from the mask queued buffers for which acquire has been called, since the consumer + // will not receive their buffer addresses and so must retain their cached information + auto current(core->queue.begin()); + while (current != core->queue.end()) { + if (current->acquire_called) { + mask &= ~(1ULL << current->slot); + } + ++current; + } + + LOG_DEBUG(Service_NVFlinger, "returning mask {}", mask); + *out_slot_mask = mask; + return Status::NoError; +} + } // namespace Service::android diff --git a/src/core/hle/service/nvflinger/buffer_queue_consumer.h b/src/core/hle/service/nvflinger/buffer_queue_consumer.h index f22854394..8a047fe06 100755 --- a/src/core/hle/service/nvflinger/buffer_queue_consumer.h +++ b/src/core/hle/service/nvflinger/buffer_queue_consumer.h @@ -24,10 +24,10 @@ public: explicit BufferQueueConsumer(std::shared_ptr core_); ~BufferQueueConsumer(); - Status AcquireBuffer(BufferItem* out_buffer, std::chrono::nanoseconds expected_present, - u64 max_frame_number = 0); + Status AcquireBuffer(BufferItem* out_buffer, std::chrono::nanoseconds expected_present); Status ReleaseBuffer(s32 slot, u64 frame_number, const Fence& release_fence); Status Connect(std::shared_ptr consumer_listener, bool controlled_by_app); + Status GetReleasedBuffers(u64* out_slot_mask); private: std::shared_ptr core; diff --git a/src/core/hle/service/nvflinger/buffer_queue_core.cpp b/src/core/hle/service/nvflinger/buffer_queue_core.cpp index 6082610e0..3a0481786 100755 --- a/src/core/hle/service/nvflinger/buffer_queue_core.cpp +++ b/src/core/hle/service/nvflinger/buffer_queue_core.cpp @@ -95,7 +95,6 @@ void BufferQueueCore::FreeBufferLocked(s32 slot) { } void BufferQueueCore::FreeAllBuffersLocked() { - queue.clear(); buffer_has_been_queued = false; for (s32 slot = 0; slot < BufferQueueDefs::NUM_BUFFER_SLOTS; ++slot) { diff --git a/src/core/hle/service/nvflinger/buffer_queue_core.h b/src/core/hle/service/nvflinger/buffer_queue_core.h index 4dfd53387..e4e0937cb 100755 --- a/src/core/hle/service/nvflinger/buffer_queue_core.h +++ b/src/core/hle/service/nvflinger/buffer_queue_core.h @@ -73,8 +73,6 @@ private: u32 transform_hint{}; bool is_allocating{}; mutable std::condition_variable_any is_allocating_condition; - bool allow_allocation{true}; - u64 buffer_age{}; bool is_shutting_down{}; }; diff --git a/src/core/hle/service/nvflinger/buffer_queue_producer.cpp b/src/core/hle/service/nvflinger/buffer_queue_producer.cpp index 0833be57a..3d6e990c3 100755 --- a/src/core/hle/service/nvflinger/buffer_queue_producer.cpp +++ b/src/core/hle/service/nvflinger/buffer_queue_producer.cpp @@ -62,11 +62,12 @@ Status BufferQueueProducer::RequestBuffer(s32 slot, std::shared_ptr listener; + std::shared_ptr listener; { std::scoped_lock lock(core->mutex); core->WaitWhileAllocatingLocked(); + if (core->is_abandoned) { LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned"); return Status::NoInit; @@ -120,7 +121,7 @@ Status BufferQueueProducer::SetBufferCount(s32 buffer_count) { } Status BufferQueueProducer::WaitForFreeSlotThenRelock(bool async, s32* found, - Status* returnFlags) const { + Status* return_flags) const { bool try_again = true; while (try_again) { @@ -142,10 +143,12 @@ Status BufferQueueProducer::WaitForFreeSlotThenRelock(bool async, s32* found, ASSERT(slots[s].buffer_state == BufferState::Free); if (slots[s].graphic_buffer != nullptr) { core->FreeBufferLocked(s); - *returnFlags |= Status::ReleaseAllBuffers; + *return_flags |= Status::ReleaseAllBuffers; } } + // Look for a free buffer to give to the client + *found = BufferQueueCore::INVALID_BUFFER_SLOT; s32 dequeued_count{}; s32 acquired_count{}; for (s32 s{}; s < max_buffer_count; ++s) { @@ -235,68 +238,50 @@ Status BufferQueueProducer::DequeueBuffer(s32* out_slot, Fence* out_fence, bool { std::scoped_lock lock(core->mutex); core->WaitWhileAllocatingLocked(); + if (format == PixelFormat::NoFormat) { format = core->default_buffer_format; } // Enable the usage bits the consumer requested usage |= core->consumer_usage_bit; + + s32 found{}; + Status status = WaitForFreeSlotThenRelock(async, &found, &return_flags); + if (status != Status::NoError) { + return status; + } + + // This should not happen + if (found == BufferQueueCore::INVALID_BUFFER_SLOT) { + LOG_ERROR(Service_NVFlinger, "no available buffer slots"); + return Status::Busy; + } + + *out_slot = found; + + attached_by_consumer = slots[found].attached_by_consumer; + const bool use_default_size = !width && !height; if (use_default_size) { width = core->default_width; height = core->default_height; } - s32 found = BufferItem::INVALID_BUFFER_SLOT; - while (found == BufferItem::INVALID_BUFFER_SLOT) { - Status status = WaitForFreeSlotThenRelock(async, &found, &return_flags); - if (status != Status::NoError) { - return status; - } - - // This should not happen - if (found == BufferQueueCore::INVALID_BUFFER_SLOT) { - LOG_DEBUG(Service_NVFlinger, "no available buffer slots"); - return Status::Busy; - } - - const std::shared_ptr& buffer(slots[found].graphic_buffer); - - // If we are not allowed to allocate new buffers, WaitForFreeSlotThenRelock must have - // returned a slot containing a buffer. If this buffer would require reallocation to - // meet the requested attributes, we free it and attempt to get another one. - if (!core->allow_allocation) { - if (buffer->NeedsReallocation(width, height, format, usage)) { - core->FreeBufferLocked(found); - found = BufferItem::INVALID_BUFFER_SLOT; - continue; - } - } - } - - *out_slot = found; - attached_by_consumer = slots[found].attached_by_consumer; slots[found].buffer_state = BufferState::Dequeued; const std::shared_ptr& buffer(slots[found].graphic_buffer); - - if ((buffer == nullptr) || buffer->NeedsReallocation(width, height, format, usage)) { + if ((buffer == nullptr) || (buffer->Width() != width) || (buffer->Height() != height) || + (buffer->Format() != format) || ((buffer->Usage() & usage) != usage)) { slots[found].acquire_called = false; slots[found].graphic_buffer = nullptr; slots[found].request_buffer_called = false; slots[found].fence = Fence::NoFence(); - core->buffer_age = 0; + return_flags |= Status::BufferNeedsReallocation; - } else { - // We add 1 because that will be the frame number when this buffer - // is queued - core->buffer_age = core->frame_counter + 1 - slots[found].frame_number; } - LOG_DEBUG(Service_NVFlinger, "setting buffer age to {}", core->buffer_age); - *out_fence = slots[found].fence; - slots[found].fence = Fence::NoFence(); } @@ -311,6 +296,7 @@ Status BufferQueueProducer::DequeueBuffer(s32* out_slot, Fence* out_fence, bool { std::scoped_lock lock(core->mutex); + if (core->is_abandoned) { LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned"); return Status::NoInit; @@ -327,6 +313,7 @@ Status BufferQueueProducer::DequeueBuffer(s32* out_slot, Fence* out_fence, bool LOG_DEBUG(Service_NVFlinger, "returning slot={} frame={}, flags={}", *out_slot, slots[*out_slot].frame_number, return_flags); + return return_flags; } @@ -334,6 +321,7 @@ Status BufferQueueProducer::DetachBuffer(s32 slot) { LOG_DEBUG(Service_NVFlinger, "slot {}", slot); std::scoped_lock lock(core->mutex); + if (core->is_abandoned) { LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned"); return Status::NoInit; @@ -369,7 +357,6 @@ Status BufferQueueProducer::DetachNextBuffer(std::shared_ptr* out } std::scoped_lock lock(core->mutex); - core->WaitWhileAllocatingLocked(); if (core->is_abandoned) { @@ -423,6 +410,7 @@ Status BufferQueueProducer::AttachBuffer(s32* out_slot, return status; } + // This should not happen if (found == BufferQueueCore::INVALID_BUFFER_SLOT) { LOG_ERROR(Service_NVFlinger, "No available buffer slots"); return Status::Busy; @@ -466,8 +454,8 @@ Status BufferQueueProducer::QueueBuffer(s32 slot, const QueueBufferInput& input, return Status::BadValue; } - std::shared_ptr frameAvailableListener; - std::shared_ptr frameReplacedListener; + std::shared_ptr frame_available_listener; + std::shared_ptr frame_replaced_listener; s32 callback_ticket{}; BufferItem item; @@ -541,12 +529,13 @@ Status BufferQueueProducer::QueueBuffer(s32 slot, const QueueBufferInput& input, item.fence = fence; item.is_droppable = core->dequeue_buffer_cannot_block || async; item.swap_interval = swap_interval; + sticky_transform = sticky_transform_; if (core->queue.empty()) { // When the queue is empty, we can simply queue this buffer core->queue.push_back(item); - frameAvailableListener = core->consumer_listener; + frame_available_listener = core->consumer_listener; } else { // When the queue is not empty, we need to look at the front buffer // state to see if we need to replace it @@ -563,10 +552,10 @@ Status BufferQueueProducer::QueueBuffer(s32 slot, const QueueBufferInput& input, } // Overwrite the droppable buffer with the incoming one *front = item; - frameReplacedListener = core->consumer_listener; + frame_replaced_listener = core->consumer_listener; } else { core->queue.push_back(item); - frameAvailableListener = core->consumer_listener; + frame_available_listener = core->consumer_listener; } } @@ -592,10 +581,10 @@ Status BufferQueueProducer::QueueBuffer(s32 slot, const QueueBufferInput& input, callback_condition.wait(callback_mutex); } - if (frameAvailableListener != nullptr) { - frameAvailableListener->OnFrameAvailable(item); - } else if (frameReplacedListener != nullptr) { - frameReplacedListener->OnFrameReplaced(item); + if (frame_available_listener != nullptr) { + frame_available_listener->OnFrameAvailable(item); + } else if (frame_replaced_listener != nullptr) { + frame_replaced_listener->OnFrameReplaced(item); } ++current_callback_ticket; @@ -669,13 +658,6 @@ Status BufferQueueProducer::Query(NativeWindow what, s32* out_value) { case NativeWindow::ConsumerUsageBits: value = core->consumer_usage_bit; break; - case NativeWindow::BufferAge: - if (core->buffer_age > INT32_MAX) { - value = 0; - } else { - value = static_cast(core->buffer_age); - } - break; default: UNREACHABLE(); return Status::BadValue; @@ -737,7 +719,6 @@ Status BufferQueueProducer::Connect(const std::shared_ptr& li core->buffer_has_been_queued = false; core->dequeue_buffer_cannot_block = core->consumer_controlled_by_app && producer_controlled_by_app; - core->allow_allocation = true; return status; } @@ -770,7 +751,7 @@ Status BufferQueueProducer::Disconnect(NativeWindowApi api) { core->SignalDequeueCondition(); buffer_wait_event->GetWritableEvent().Signal(); listener = core->consumer_listener; - } else if (core->connected_api != NativeWindowApi::NoConnectedApi) { + } else { LOG_ERROR(Service_NVFlinger, "still connected to another api (cur = {} req = {})", core->connected_api, api); status = Status::BadValue; diff --git a/src/core/hle/service/nvflinger/buffer_queue_producer.h b/src/core/hle/service/nvflinger/buffer_queue_producer.h index 77fdcae8e..c4ca68fd3 100755 --- a/src/core/hle/service/nvflinger/buffer_queue_producer.h +++ b/src/core/hle/service/nvflinger/buffer_queue_producer.h @@ -66,7 +66,7 @@ public: private: BufferQueueProducer(const BufferQueueProducer&) = delete; - Status WaitForFreeSlotThenRelock(bool async, s32* found, Status* returnFlags) const; + Status WaitForFreeSlotThenRelock(bool async, s32* found, Status* return_flags) const; Kernel::KEvent* buffer_wait_event{}; Service::KernelHelpers::ServiceContext& service_context; diff --git a/src/core/hle/service/nvflinger/consumer_base.cpp b/src/core/hle/service/nvflinger/consumer_base.cpp index be65a3f88..c2c80832c 100755 --- a/src/core/hle/service/nvflinger/consumer_base.cpp +++ b/src/core/hle/service/nvflinger/consumer_base.cpp @@ -36,38 +36,41 @@ void ConsumerBase::FreeBufferLocked(s32 slot_index) { } void ConsumerBase::OnFrameAvailable(const BufferItem& item) { - std::scoped_lock lock(mutex); LOG_DEBUG(Service_NVFlinger, "called"); } void ConsumerBase::OnFrameReplaced(const BufferItem& item) { - std::scoped_lock lock(mutex); LOG_DEBUG(Service_NVFlinger, "called"); } void ConsumerBase::OnBuffersReleased() { std::scoped_lock lock(mutex); + LOG_DEBUG(Service_NVFlinger, "called"); + + if (is_abandoned) { + // Nothing to do if we're already abandoned. + return; + } + + u64 mask = 0; + consumer->GetReleasedBuffers(&mask); + for (int i = 0; i < BufferQueueDefs::NUM_BUFFER_SLOTS; i++) { + if (mask & (1ULL << i)) { + FreeBufferLocked(i); + } + } } void ConsumerBase::OnSidebandStreamChanged() {} -Status ConsumerBase::AcquireBufferLocked(BufferItem* item, std::chrono::nanoseconds present_when, - u64 max_frame_number) { - if (is_abandoned) { - LOG_ERROR(Service_NVFlinger, "consumer is abandoned!"); - return Status::NoInit; - } - - Status err = consumer->AcquireBuffer(item, present_when, max_frame_number); +Status ConsumerBase::AcquireBufferLocked(BufferItem* item, std::chrono::nanoseconds present_when) { + Status err = consumer->AcquireBuffer(item, present_when); if (err != Status::NoError) { return err; } if (item->graphic_buffer != nullptr) { - if (slots[item->slot].graphic_buffer != nullptr) { - FreeBufferLocked(item->slot); - } slots[item->slot].graphic_buffer = item->graphic_buffer; } diff --git a/src/core/hle/service/nvflinger/consumer_base.h b/src/core/hle/service/nvflinger/consumer_base.h index 9ab949420..736080e3a 100755 --- a/src/core/hle/service/nvflinger/consumer_base.h +++ b/src/core/hle/service/nvflinger/consumer_base.h @@ -35,8 +35,7 @@ protected: virtual void OnSidebandStreamChanged() override; void FreeBufferLocked(s32 slot_index); - Status AcquireBufferLocked(BufferItem* item, std::chrono::nanoseconds present_when, - u64 max_frame_number = 0); + Status AcquireBufferLocked(BufferItem* item, std::chrono::nanoseconds present_when); Status ReleaseBufferLocked(s32 slot, const std::shared_ptr graphic_buffer); bool StillTracking(s32 slot, const std::shared_ptr graphic_buffer) const; Status AddReleaseFenceLocked(s32 slot, const std::shared_ptr graphic_buffer, diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 76ce1fbfd..6fb2cdff1 100755 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -104,7 +104,7 @@ void NVFlinger::SetNVDrvInstance(std::shared_ptr instance) { std::optional NVFlinger::OpenDisplay(std::string_view name) { const auto lock_guard = Lock(); - LOG_DEBUG(Service, "Opening \"{}\" display", name); + LOG_DEBUG(Service_NVFlinger, "Opening \"{}\" display", name); const auto itr = std::find_if(displays.begin(), displays.end(), @@ -219,7 +219,7 @@ VI::Layer* NVFlinger::FindOrCreateLayer(u64 display_id, u64 layer_id) { auto* layer = display->FindLayer(layer_id); if (layer == nullptr) { - LOG_DEBUG(Service, "Layer at id {} not found. Trying to create it.", layer_id); + LOG_DEBUG(Service_NVFlinger, "Layer at id {} not found. Trying to create it.", layer_id); CreateLayerAtId(*display, layer_id); return display->FindLayer(layer_id); } diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index d7011b3b4..65a0d826c 100755 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -39,21 +39,9 @@ GPUVAddr MemoryManager::UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std return gpu_addr; } -void MemoryManager::UnmapSubmappedRanges(GPUVAddr gpu_addr, std::size_t size) { - const auto submapped_ranges = GetSubmappedRange(gpu_addr, size); - for (const auto& [map_addr, map_size] : submapped_ranges) { - // Flush and invalidate through the GPU interface, to be asynchronous if possible. - const std::optional cpu_vaddr = GpuToCpuAddress(map_addr); - if (!cpu_vaddr) { - continue; - } - rasterizer->UnmapMemory(*cpu_vaddr, map_size); - } -} - GPUVAddr MemoryManager::Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size) { - // Unmap any pre-existing rasterizer memory in this range - UnmapSubmappedRanges(gpu_addr, size); + // Mark any pre-existing rasterizer memory in this range as remapped + rasterizer->ModifyGPUMemory(gpu_addr, size); const auto it = std::ranges::lower_bound(map_ranges, gpu_addr, {}, &MapRange::first); if (it != map_ranges.end() && it->first == gpu_addr) { @@ -85,8 +73,16 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { } else { UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr); } + const auto submapped_ranges = GetSubmappedRange(gpu_addr, size); + + for (const auto& [map_addr, map_size] : submapped_ranges) { + // Flush and invalidate through the GPU interface, to be asynchronous if possible. + const std::optional cpu_addr = GpuToCpuAddress(map_addr); + ASSERT(cpu_addr); + + rasterizer->UnmapMemory(*cpu_addr, map_size); + } - UnmapSubmappedRanges(gpu_addr, size); UpdateRange(gpu_addr, PageEntry::State::Unmapped, size); } diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index ebeea7a40..61bfe47c7 100755 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -149,7 +149,6 @@ private: [[nodiscard]] PageEntry GetPageEntry(GPUVAddr gpu_addr) const; void SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size = page_size); GPUVAddr UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size); - void UnmapSubmappedRanges(GPUVAddr gpu_addr, std::size_t size); [[nodiscard]] std::optional FindFreeRange(std::size_t size, std::size_t align, bool start_32bit_address = false) const; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 8fef74117..77450ddfe 100755 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -454,15 +454,20 @@ void TextureCache

::DownloadMemory(VAddr cpu_addr, size_t size) { return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick; }); for (const ImageId image_id : images) { - Image& image = slot_images[image_id]; - auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); - const auto copies = FullDownloadCopies(image.info); - image.DownloadMemory(map, copies); - runtime.Finish(); - SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); + DownloadImage(image_id); } } +template +void TextureCache

::DownloadImage(ImageId image_id) { + Image& image = slot_images[image_id]; + auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); + const auto copies = FullDownloadCopies(image.info); + image.DownloadMemory(map, copies); + runtime.Finish(); + SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); +} + template void TextureCache

::UnmapMemory(VAddr cpu_addr, size_t size) { std::vector deleted_images; @@ -1058,7 +1063,7 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA for (const ImageId overlap_id : ignore_textures) { Image& overlap = slot_images[overlap_id]; if (True(overlap.flags & ImageFlagBits::GpuModified)) { - UNIMPLEMENTED(); + DownloadImage(overlap_id); } if (True(overlap.flags & ImageFlagBits::Tracked)) { UntrackImage(overlap, overlap_id); diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index b1324edf3..7d9b9e5e9 100755 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -139,6 +139,9 @@ public: /// Download contents of host images to guest memory in a region void DownloadMemory(VAddr cpu_addr, size_t size); + /// Download contents of host images to guest memory + void DownloadImage(ImageId image_id); + /// Remove images in a region void UnmapMemory(VAddr cpu_addr, size_t size); diff --git a/src/yuzu/configuration/configure_cpu.ui b/src/yuzu/configuration/configure_cpu.ui index 5d80a8c91..8ae569ee6 100755 --- a/src/yuzu/configuration/configure_cpu.ui +++ b/src/yuzu/configuration/configure_cpu.ui @@ -52,6 +52,11 @@ Unsafe + + + Paranoid (disables most optimizations) + + diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 34782c378..f34d6b728 100755 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -342,12 +342,6 @@ fps_cap = # null: No audio output output_engine = -# Whether or not to enable the audio-stretching post-processing effect. -# This effect adjusts audio speed to match emulation speed and helps prevent audio stutter, -# at the cost of increasing audio latency. -# 0: No, 1 (default): Yes -enable_audio_stretching = - # Which audio device to use. # auto (default): Auto-select output_device =