early-access version 2639

This commit is contained in:
pineappleEA 2022-04-03 03:41:15 +02:00
parent 0397c1ff98
commit d15d58f409
25 changed files with 272 additions and 203 deletions

View file

@ -1,7 +1,7 @@
yuzu emulator early access yuzu emulator early access
============= =============
This is the source code for early-access 2637. This is the source code for early-access 2639.
## Legal Notice ## Legal Notice

View file

@ -68,9 +68,6 @@ if (YUZU_USE_EXTERNAL_SDL2)
add_library(SDL2 ALIAS SDL2-static) add_library(SDL2 ALIAS SDL2-static)
endif() endif()
# SoundTouch
add_subdirectory(soundtouch)
# Cubeb # Cubeb
if(ENABLE_CUBEB) if(ENABLE_CUBEB)
set(BUILD_TESTS OFF CACHE BOOL "") set(BUILD_TESTS OFF CACHE BOOL "")

View file

@ -36,8 +36,6 @@ add_library(audio_core STATIC
splitter_context.h splitter_context.h
stream.cpp stream.cpp
stream.h stream.h
time_stretch.cpp
time_stretch.h
voice_context.cpp voice_context.cpp
voice_context.h voice_context.h
@ -63,7 +61,6 @@ if (NOT MSVC)
endif() endif()
target_link_libraries(audio_core PUBLIC common core) target_link_libraries(audio_core PUBLIC common core)
target_link_libraries(audio_core PRIVATE SoundTouch)
if(ENABLE_CUBEB) if(ENABLE_CUBEB)
target_link_libraries(audio_core PRIVATE cubeb) target_link_libraries(audio_core PRIVATE cubeb)

View file

@ -7,7 +7,6 @@
#include <cstring> #include <cstring>
#include "audio_core/cubeb_sink.h" #include "audio_core/cubeb_sink.h"
#include "audio_core/stream.h" #include "audio_core/stream.h"
#include "audio_core/time_stretch.h"
#include "common/assert.h" #include "common/assert.h"
#include "common/logging/log.h" #include "common/logging/log.h"
#include "common/ring_buffer.h" #include "common/ring_buffer.h"
@ -23,8 +22,7 @@ class CubebSinkStream final : public SinkStream {
public: public:
CubebSinkStream(cubeb* ctx_, u32 sample_rate, u32 num_channels_, cubeb_devid output_device, CubebSinkStream(cubeb* ctx_, u32 sample_rate, u32 num_channels_, cubeb_devid output_device,
const std::string& name) const std::string& name)
: ctx{ctx_}, num_channels{std::min(num_channels_, 6u)}, time_stretch{sample_rate, : ctx{ctx_}, num_channels{std::min(num_channels_, 6u)} {
num_channels} {
cubeb_stream_params params{}; cubeb_stream_params params{};
params.rate = sample_rate; params.rate = sample_rate;
@ -131,7 +129,6 @@ private:
Common::RingBuffer<s16, 0x10000> queue; Common::RingBuffer<s16, 0x10000> queue;
std::array<s16, 2> last_frame{}; std::array<s16, 2> last_frame{};
std::atomic<bool> should_flush{}; std::atomic<bool> should_flush{};
TimeStretcher time_stretch;
static long DataCallback(cubeb_stream* stream, void* user_data, const void* input_buffer, static long DataCallback(cubeb_stream* stream, void* user_data, const void* input_buffer,
void* output_buffer, long num_frames); void* output_buffer, long num_frames);
@ -205,25 +202,7 @@ long CubebSinkStream::DataCallback([[maybe_unused]] cubeb_stream* stream, void*
const std::size_t num_channels = impl->GetNumChannels(); const std::size_t num_channels = impl->GetNumChannels();
const std::size_t samples_to_write = num_channels * num_frames; const std::size_t samples_to_write = num_channels * num_frames;
std::size_t samples_written; const std::size_t samples_written = impl->queue.Pop(buffer, samples_to_write);
/*
if (Settings::values.enable_audio_stretching.GetValue()) {
const std::vector<s16> in{impl->queue.Pop()};
const std::size_t num_in{in.size() / num_channels};
s16* const out{reinterpret_cast<s16*>(buffer)};
const std::size_t out_frames =
impl->time_stretch.Process(in.data(), num_in, out, num_frames);
samples_written = out_frames * num_channels;
if (impl->should_flush) {
impl->time_stretch.Flush();
impl->should_flush = false;
}
} else {
samples_written = impl->queue.Pop(buffer, samples_to_write);
}*/
samples_written = impl->queue.Pop(buffer, samples_to_write);
if (samples_written >= num_channels) { if (samples_written >= num_channels) {
std::memcpy(&impl->last_frame[0], buffer + (samples_written - num_channels) * sizeof(s16), std::memcpy(&impl->last_frame[0], buffer + (samples_written - num_channels) * sizeof(s16),

View file

@ -7,7 +7,6 @@
#include <cstring> #include <cstring>
#include "audio_core/sdl2_sink.h" #include "audio_core/sdl2_sink.h"
#include "audio_core/stream.h" #include "audio_core/stream.h"
#include "audio_core/time_stretch.h"
#include "common/assert.h" #include "common/assert.h"
#include "common/logging/log.h" #include "common/logging/log.h"
//#include "common/settings.h" //#include "common/settings.h"
@ -27,7 +26,7 @@ namespace AudioCore {
class SDLSinkStream final : public SinkStream { class SDLSinkStream final : public SinkStream {
public: public:
SDLSinkStream(u32 sample_rate, u32 num_channels_, const std::string& output_device) SDLSinkStream(u32 sample_rate, u32 num_channels_, const std::string& output_device)
: num_channels{std::min(num_channels_, 6u)}, time_stretch{sample_rate, num_channels} { : num_channels{std::min(num_channels_, 6u)} {
SDL_AudioSpec spec; SDL_AudioSpec spec;
spec.freq = sample_rate; spec.freq = sample_rate;
@ -116,7 +115,6 @@ private:
SDL_AudioDeviceID dev = 0; SDL_AudioDeviceID dev = 0;
u32 num_channels{}; u32 num_channels{};
std::atomic<bool> should_flush{}; std::atomic<bool> should_flush{};
TimeStretcher time_stretch;
}; };
SDLSink::SDLSink(std::string_view target_device_name) { SDLSink::SDLSink(std::string_view target_device_name) {

View file

@ -46,6 +46,43 @@ namespace Common {
reinterpret_cast<__int64*>(expected.data())) != 0; reinterpret_cast<__int64*>(expected.data())) != 0;
} }
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected,
u8& actual) {
actual =
_InterlockedCompareExchange8(reinterpret_cast<volatile char*>(pointer), value, expected);
return actual == expected;
}
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected,
u16& actual) {
actual =
_InterlockedCompareExchange16(reinterpret_cast<volatile short*>(pointer), value, expected);
return actual == expected;
}
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected,
u32& actual) {
actual =
_InterlockedCompareExchange(reinterpret_cast<volatile long*>(pointer), value, expected);
return actual == expected;
}
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected,
u64& actual) {
actual = _InterlockedCompareExchange64(reinterpret_cast<volatile __int64*>(pointer), value,
expected);
return actual == expected;
}
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected,
u128& actual) {
const bool result =
_InterlockedCompareExchange128(reinterpret_cast<volatile __int64*>(pointer), value[1],
value[0], reinterpret_cast<__int64*>(expected.data())) != 0;
actual = expected;
return result;
}
[[nodiscard]] inline u128 AtomicLoad128(volatile u64* pointer) { [[nodiscard]] inline u128 AtomicLoad128(volatile u64* pointer) {
u128 result{}; u128 result{};
_InterlockedCompareExchange128(reinterpret_cast<volatile __int64*>(pointer), result[1], _InterlockedCompareExchange128(reinterpret_cast<volatile __int64*>(pointer), result[1],
@ -79,6 +116,42 @@ namespace Common {
return __sync_bool_compare_and_swap((unsigned __int128*)pointer, expected_a, value_a); return __sync_bool_compare_and_swap((unsigned __int128*)pointer, expected_a, value_a);
} }
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected,
u8& actual) {
actual = __sync_val_compare_and_swap(pointer, expected, value);
return actual == expected;
}
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected,
u16& actual) {
actual = __sync_val_compare_and_swap(pointer, expected, value);
return actual == expected;
}
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected,
u32& actual) {
actual = __sync_val_compare_and_swap(pointer, expected, value);
return actual == expected;
}
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected,
u64& actual) {
actual = __sync_val_compare_and_swap(pointer, expected, value);
return actual == expected;
}
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected,
u128& actual) {
unsigned __int128 value_a;
unsigned __int128 expected_a;
unsigned __int128 actual_a;
std::memcpy(&value_a, value.data(), sizeof(u128));
std::memcpy(&expected_a, expected.data(), sizeof(u128));
actual_a = __sync_val_compare_and_swap((unsigned __int128*)pointer, expected_a, value_a);
std::memcpy(actual.data(), &actual_a, sizeof(u128));
return actual_a == expected_a;
}
[[nodiscard]] inline u128 AtomicLoad128(volatile u64* pointer) { [[nodiscard]] inline u128 AtomicLoad128(volatile u64* pointer) {
unsigned __int128 zeros_a = 0; unsigned __int128 zeros_a = 0;
unsigned __int128 result_a = unsigned __int128 result_a =

View file

@ -38,6 +38,7 @@ enum class CPUAccuracy : u32 {
Auto = 0, Auto = 0,
Accurate = 1, Accurate = 1,
Unsafe = 2, Unsafe = 2,
Paranoid = 3,
}; };
enum class FullscreenMode : u32 { enum class FullscreenMode : u32 {
@ -470,7 +471,7 @@ struct Values {
// Cpu // Cpu
RangedSetting<CPUAccuracy> cpu_accuracy{CPUAccuracy::Auto, CPUAccuracy::Auto, RangedSetting<CPUAccuracy> cpu_accuracy{CPUAccuracy::Auto, CPUAccuracy::Auto,
CPUAccuracy::Unsafe, "cpu_accuracy"}; CPUAccuracy::Paranoid, "cpu_accuracy"};
// TODO: remove cpu_accuracy_first_time, migration setting added 8 July 2021 // TODO: remove cpu_accuracy_first_time, migration setting added 8 July 2021
BasicSetting<bool> cpu_accuracy_first_time{true, "cpu_accuracy_first_time"}; BasicSetting<bool> cpu_accuracy_first_time{true, "cpu_accuracy_first_time"};
BasicSetting<bool> cpu_debug_mode{false, "cpu_debug_mode"}; BasicSetting<bool> cpu_debug_mode{false, "cpu_debug_mode"};

View file

@ -55,8 +55,9 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen
u64 NativeClock::GetRTSC() { u64 NativeClock::GetRTSC() {
TimePoint new_time_point{}; TimePoint new_time_point{};
TimePoint current_time_point{}; TimePoint current_time_point{};
do {
current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); current_time_point.pack = Common::AtomicLoad128(time_point.pack.data());
do {
_mm_mfence(); _mm_mfence();
const u64 current_measure = __rdtsc(); const u64 current_measure = __rdtsc();
u64 diff = current_measure - current_time_point.inner.last_measure; u64 diff = current_measure - current_time_point.inner.last_measure;
@ -66,7 +67,7 @@ u64 NativeClock::GetRTSC() {
: current_time_point.inner.last_measure; : current_time_point.inner.last_measure;
new_time_point.inner.accumulated_ticks = current_time_point.inner.accumulated_ticks + diff; new_time_point.inner.accumulated_ticks = current_time_point.inner.accumulated_ticks + diff;
} while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack,
current_time_point.pack)); current_time_point.pack, current_time_point.pack));
/// The clock cannot be more precise than the guest timer, remove the lower bits /// The clock cannot be more precise than the guest timer, remove the lower bits
return new_time_point.inner.accumulated_ticks & inaccuracy_mask; return new_time_point.inner.accumulated_ticks & inaccuracy_mask;
} }
@ -75,13 +76,14 @@ void NativeClock::Pause(bool is_paused) {
if (!is_paused) { if (!is_paused) {
TimePoint current_time_point{}; TimePoint current_time_point{};
TimePoint new_time_point{}; TimePoint new_time_point{};
do {
current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); current_time_point.pack = Common::AtomicLoad128(time_point.pack.data());
do {
new_time_point.pack = current_time_point.pack; new_time_point.pack = current_time_point.pack;
_mm_mfence(); _mm_mfence();
new_time_point.inner.last_measure = __rdtsc(); new_time_point.inner.last_measure = __rdtsc();
} while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack,
current_time_point.pack)); current_time_point.pack, current_time_point.pack));
} }
} }

View file

@ -186,8 +186,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
if (!Settings::values.cpuopt_recompile_exclusives) { if (!Settings::values.cpuopt_recompile_exclusives) {
config.recompile_on_exclusive_fastmem_failure = false; config.recompile_on_exclusive_fastmem_failure = false;
} }
} } else {
// Unsafe optimizations // Unsafe optimizations
if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Unsafe) { if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Unsafe) {
config.unsafe_optimizations = true; config.unsafe_optimizations = true;
@ -217,6 +216,13 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor; config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor;
} }
// Paranoia mode for debugging optimizations
if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Paranoid) {
config.unsafe_optimizations = false;
config.optimizations = Dynarmic::no_optimizations;
}
}
return std::make_unique<Dynarmic::A32::Jit>(config); return std::make_unique<Dynarmic::A32::Jit>(config);
} }

View file

@ -248,8 +248,7 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
if (!Settings::values.cpuopt_recompile_exclusives) { if (!Settings::values.cpuopt_recompile_exclusives) {
config.recompile_on_exclusive_fastmem_failure = false; config.recompile_on_exclusive_fastmem_failure = false;
} }
} } else {
// Unsafe optimizations // Unsafe optimizations
if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Unsafe) { if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Unsafe) {
config.unsafe_optimizations = true; config.unsafe_optimizations = true;
@ -279,6 +278,13 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor; config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor;
} }
// Paranoia mode for debugging optimizations
if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Paranoid) {
config.unsafe_optimizations = false;
config.optimizations = Dynarmic::no_optimizations;
}
}
return std::make_shared<Dynarmic::A64::Jit>(config); return std::make_shared<Dynarmic::A64::Jit>(config);
} }

View file

@ -18,8 +18,7 @@ BufferQueueConsumer::BufferQueueConsumer(std::shared_ptr<BufferQueueCore> core_)
BufferQueueConsumer::~BufferQueueConsumer() = default; BufferQueueConsumer::~BufferQueueConsumer() = default;
Status BufferQueueConsumer::AcquireBuffer(BufferItem* out_buffer, Status BufferQueueConsumer::AcquireBuffer(BufferItem* out_buffer,
std::chrono::nanoseconds expected_present, std::chrono::nanoseconds expected_present) {
u64 max_frame_number) {
std::scoped_lock lock(core->mutex); std::scoped_lock lock(core->mutex);
// Check that the consumer doesn't currently have the maximum number of buffers acquired. // Check that the consumer doesn't currently have the maximum number of buffers acquired.
@ -50,12 +49,6 @@ Status BufferQueueConsumer::AcquireBuffer(BufferItem* out_buffer,
while (core->queue.size() > 1 && !core->queue[0].is_auto_timestamp) { while (core->queue.size() > 1 && !core->queue[0].is_auto_timestamp) {
const auto& buffer_item{core->queue[1]}; const auto& buffer_item{core->queue[1]};
// If dropping entry[0] would leave us with a buffer that the consumer is not yet ready
// for, don't drop it.
if (max_frame_number && buffer_item.frame_number > max_frame_number) {
break;
}
// If entry[1] is timely, drop entry[0] (and repeat). // If entry[1] is timely, drop entry[0] (and repeat).
const auto desired_present = buffer_item.timestamp; const auto desired_present = buffer_item.timestamp;
if (desired_present < expected_present.count() - MAX_REASONABLE_NSEC || if (desired_present < expected_present.count() - MAX_REASONABLE_NSEC ||
@ -200,4 +193,39 @@ Status BufferQueueConsumer::Connect(std::shared_ptr<IConsumerListener> consumer_
return Status::NoError; return Status::NoError;
} }
Status BufferQueueConsumer::GetReleasedBuffers(u64* out_slot_mask) {
if (out_slot_mask == nullptr) {
LOG_ERROR(Service_NVFlinger, "out_slot_mask may not be nullptr");
return Status::BadValue;
}
std::scoped_lock lock(core->mutex);
if (core->is_abandoned) {
LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned");
return Status::NoInit;
}
u64 mask = 0;
for (int s = 0; s < BufferQueueDefs::NUM_BUFFER_SLOTS; ++s) {
if (!slots[s].acquire_called) {
mask |= (1ULL << s);
}
}
// Remove from the mask queued buffers for which acquire has been called, since the consumer
// will not receive their buffer addresses and so must retain their cached information
auto current(core->queue.begin());
while (current != core->queue.end()) {
if (current->acquire_called) {
mask &= ~(1ULL << current->slot);
}
++current;
}
LOG_DEBUG(Service_NVFlinger, "returning mask {}", mask);
*out_slot_mask = mask;
return Status::NoError;
}
} // namespace Service::android } // namespace Service::android

View file

@ -24,10 +24,10 @@ public:
explicit BufferQueueConsumer(std::shared_ptr<BufferQueueCore> core_); explicit BufferQueueConsumer(std::shared_ptr<BufferQueueCore> core_);
~BufferQueueConsumer(); ~BufferQueueConsumer();
Status AcquireBuffer(BufferItem* out_buffer, std::chrono::nanoseconds expected_present, Status AcquireBuffer(BufferItem* out_buffer, std::chrono::nanoseconds expected_present);
u64 max_frame_number = 0);
Status ReleaseBuffer(s32 slot, u64 frame_number, const Fence& release_fence); Status ReleaseBuffer(s32 slot, u64 frame_number, const Fence& release_fence);
Status Connect(std::shared_ptr<IConsumerListener> consumer_listener, bool controlled_by_app); Status Connect(std::shared_ptr<IConsumerListener> consumer_listener, bool controlled_by_app);
Status GetReleasedBuffers(u64* out_slot_mask);
private: private:
std::shared_ptr<BufferQueueCore> core; std::shared_ptr<BufferQueueCore> core;

View file

@ -95,7 +95,6 @@ void BufferQueueCore::FreeBufferLocked(s32 slot) {
} }
void BufferQueueCore::FreeAllBuffersLocked() { void BufferQueueCore::FreeAllBuffersLocked() {
queue.clear();
buffer_has_been_queued = false; buffer_has_been_queued = false;
for (s32 slot = 0; slot < BufferQueueDefs::NUM_BUFFER_SLOTS; ++slot) { for (s32 slot = 0; slot < BufferQueueDefs::NUM_BUFFER_SLOTS; ++slot) {

View file

@ -73,8 +73,6 @@ private:
u32 transform_hint{}; u32 transform_hint{};
bool is_allocating{}; bool is_allocating{};
mutable std::condition_variable_any is_allocating_condition; mutable std::condition_variable_any is_allocating_condition;
bool allow_allocation{true};
u64 buffer_age{};
bool is_shutting_down{}; bool is_shutting_down{};
}; };

View file

@ -62,11 +62,12 @@ Status BufferQueueProducer::RequestBuffer(s32 slot, std::shared_ptr<GraphicBuffe
Status BufferQueueProducer::SetBufferCount(s32 buffer_count) { Status BufferQueueProducer::SetBufferCount(s32 buffer_count) {
LOG_DEBUG(Service_NVFlinger, "count = {}", buffer_count); LOG_DEBUG(Service_NVFlinger, "count = {}", buffer_count);
std::shared_ptr<IConsumerListener> listener;
std::shared_ptr<IConsumerListener> listener;
{ {
std::scoped_lock lock(core->mutex); std::scoped_lock lock(core->mutex);
core->WaitWhileAllocatingLocked(); core->WaitWhileAllocatingLocked();
if (core->is_abandoned) { if (core->is_abandoned) {
LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned"); LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned");
return Status::NoInit; return Status::NoInit;
@ -120,7 +121,7 @@ Status BufferQueueProducer::SetBufferCount(s32 buffer_count) {
} }
Status BufferQueueProducer::WaitForFreeSlotThenRelock(bool async, s32* found, Status BufferQueueProducer::WaitForFreeSlotThenRelock(bool async, s32* found,
Status* returnFlags) const { Status* return_flags) const {
bool try_again = true; bool try_again = true;
while (try_again) { while (try_again) {
@ -142,10 +143,12 @@ Status BufferQueueProducer::WaitForFreeSlotThenRelock(bool async, s32* found,
ASSERT(slots[s].buffer_state == BufferState::Free); ASSERT(slots[s].buffer_state == BufferState::Free);
if (slots[s].graphic_buffer != nullptr) { if (slots[s].graphic_buffer != nullptr) {
core->FreeBufferLocked(s); core->FreeBufferLocked(s);
*returnFlags |= Status::ReleaseAllBuffers; *return_flags |= Status::ReleaseAllBuffers;
} }
} }
// Look for a free buffer to give to the client
*found = BufferQueueCore::INVALID_BUFFER_SLOT;
s32 dequeued_count{}; s32 dequeued_count{};
s32 acquired_count{}; s32 acquired_count{};
for (s32 s{}; s < max_buffer_count; ++s) { for (s32 s{}; s < max_buffer_count; ++s) {
@ -235,20 +238,15 @@ Status BufferQueueProducer::DequeueBuffer(s32* out_slot, Fence* out_fence, bool
{ {
std::scoped_lock lock(core->mutex); std::scoped_lock lock(core->mutex);
core->WaitWhileAllocatingLocked(); core->WaitWhileAllocatingLocked();
if (format == PixelFormat::NoFormat) { if (format == PixelFormat::NoFormat) {
format = core->default_buffer_format; format = core->default_buffer_format;
} }
// Enable the usage bits the consumer requested // Enable the usage bits the consumer requested
usage |= core->consumer_usage_bit; usage |= core->consumer_usage_bit;
const bool use_default_size = !width && !height;
if (use_default_size) {
width = core->default_width;
height = core->default_height;
}
s32 found = BufferItem::INVALID_BUFFER_SLOT; s32 found{};
while (found == BufferItem::INVALID_BUFFER_SLOT) {
Status status = WaitForFreeSlotThenRelock(async, &found, &return_flags); Status status = WaitForFreeSlotThenRelock(async, &found, &return_flags);
if (status != Status::NoError) { if (status != Status::NoError) {
return status; return status;
@ -256,47 +254,34 @@ Status BufferQueueProducer::DequeueBuffer(s32* out_slot, Fence* out_fence, bool
// This should not happen // This should not happen
if (found == BufferQueueCore::INVALID_BUFFER_SLOT) { if (found == BufferQueueCore::INVALID_BUFFER_SLOT) {
LOG_DEBUG(Service_NVFlinger, "no available buffer slots"); LOG_ERROR(Service_NVFlinger, "no available buffer slots");
return Status::Busy; return Status::Busy;
} }
const std::shared_ptr<GraphicBuffer>& buffer(slots[found].graphic_buffer);
// If we are not allowed to allocate new buffers, WaitForFreeSlotThenRelock must have
// returned a slot containing a buffer. If this buffer would require reallocation to
// meet the requested attributes, we free it and attempt to get another one.
if (!core->allow_allocation) {
if (buffer->NeedsReallocation(width, height, format, usage)) {
core->FreeBufferLocked(found);
found = BufferItem::INVALID_BUFFER_SLOT;
continue;
}
}
}
*out_slot = found; *out_slot = found;
attached_by_consumer = slots[found].attached_by_consumer; attached_by_consumer = slots[found].attached_by_consumer;
const bool use_default_size = !width && !height;
if (use_default_size) {
width = core->default_width;
height = core->default_height;
}
slots[found].buffer_state = BufferState::Dequeued; slots[found].buffer_state = BufferState::Dequeued;
const std::shared_ptr<GraphicBuffer>& buffer(slots[found].graphic_buffer); const std::shared_ptr<GraphicBuffer>& buffer(slots[found].graphic_buffer);
if ((buffer == nullptr) || (buffer->Width() != width) || (buffer->Height() != height) ||
if ((buffer == nullptr) || buffer->NeedsReallocation(width, height, format, usage)) { (buffer->Format() != format) || ((buffer->Usage() & usage) != usage)) {
slots[found].acquire_called = false; slots[found].acquire_called = false;
slots[found].graphic_buffer = nullptr; slots[found].graphic_buffer = nullptr;
slots[found].request_buffer_called = false; slots[found].request_buffer_called = false;
slots[found].fence = Fence::NoFence(); slots[found].fence = Fence::NoFence();
core->buffer_age = 0;
return_flags |= Status::BufferNeedsReallocation; return_flags |= Status::BufferNeedsReallocation;
} else {
// We add 1 because that will be the frame number when this buffer
// is queued
core->buffer_age = core->frame_counter + 1 - slots[found].frame_number;
} }
LOG_DEBUG(Service_NVFlinger, "setting buffer age to {}", core->buffer_age);
*out_fence = slots[found].fence; *out_fence = slots[found].fence;
slots[found].fence = Fence::NoFence(); slots[found].fence = Fence::NoFence();
} }
@ -311,6 +296,7 @@ Status BufferQueueProducer::DequeueBuffer(s32* out_slot, Fence* out_fence, bool
{ {
std::scoped_lock lock(core->mutex); std::scoped_lock lock(core->mutex);
if (core->is_abandoned) { if (core->is_abandoned) {
LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned"); LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned");
return Status::NoInit; return Status::NoInit;
@ -327,6 +313,7 @@ Status BufferQueueProducer::DequeueBuffer(s32* out_slot, Fence* out_fence, bool
LOG_DEBUG(Service_NVFlinger, "returning slot={} frame={}, flags={}", *out_slot, LOG_DEBUG(Service_NVFlinger, "returning slot={} frame={}, flags={}", *out_slot,
slots[*out_slot].frame_number, return_flags); slots[*out_slot].frame_number, return_flags);
return return_flags; return return_flags;
} }
@ -334,6 +321,7 @@ Status BufferQueueProducer::DetachBuffer(s32 slot) {
LOG_DEBUG(Service_NVFlinger, "slot {}", slot); LOG_DEBUG(Service_NVFlinger, "slot {}", slot);
std::scoped_lock lock(core->mutex); std::scoped_lock lock(core->mutex);
if (core->is_abandoned) { if (core->is_abandoned) {
LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned"); LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned");
return Status::NoInit; return Status::NoInit;
@ -369,7 +357,6 @@ Status BufferQueueProducer::DetachNextBuffer(std::shared_ptr<GraphicBuffer>* out
} }
std::scoped_lock lock(core->mutex); std::scoped_lock lock(core->mutex);
core->WaitWhileAllocatingLocked(); core->WaitWhileAllocatingLocked();
if (core->is_abandoned) { if (core->is_abandoned) {
@ -423,6 +410,7 @@ Status BufferQueueProducer::AttachBuffer(s32* out_slot,
return status; return status;
} }
// This should not happen
if (found == BufferQueueCore::INVALID_BUFFER_SLOT) { if (found == BufferQueueCore::INVALID_BUFFER_SLOT) {
LOG_ERROR(Service_NVFlinger, "No available buffer slots"); LOG_ERROR(Service_NVFlinger, "No available buffer slots");
return Status::Busy; return Status::Busy;
@ -466,8 +454,8 @@ Status BufferQueueProducer::QueueBuffer(s32 slot, const QueueBufferInput& input,
return Status::BadValue; return Status::BadValue;
} }
std::shared_ptr<IConsumerListener> frameAvailableListener; std::shared_ptr<IConsumerListener> frame_available_listener;
std::shared_ptr<IConsumerListener> frameReplacedListener; std::shared_ptr<IConsumerListener> frame_replaced_listener;
s32 callback_ticket{}; s32 callback_ticket{};
BufferItem item; BufferItem item;
@ -541,12 +529,13 @@ Status BufferQueueProducer::QueueBuffer(s32 slot, const QueueBufferInput& input,
item.fence = fence; item.fence = fence;
item.is_droppable = core->dequeue_buffer_cannot_block || async; item.is_droppable = core->dequeue_buffer_cannot_block || async;
item.swap_interval = swap_interval; item.swap_interval = swap_interval;
sticky_transform = sticky_transform_; sticky_transform = sticky_transform_;
if (core->queue.empty()) { if (core->queue.empty()) {
// When the queue is empty, we can simply queue this buffer // When the queue is empty, we can simply queue this buffer
core->queue.push_back(item); core->queue.push_back(item);
frameAvailableListener = core->consumer_listener; frame_available_listener = core->consumer_listener;
} else { } else {
// When the queue is not empty, we need to look at the front buffer // When the queue is not empty, we need to look at the front buffer
// state to see if we need to replace it // state to see if we need to replace it
@ -563,10 +552,10 @@ Status BufferQueueProducer::QueueBuffer(s32 slot, const QueueBufferInput& input,
} }
// Overwrite the droppable buffer with the incoming one // Overwrite the droppable buffer with the incoming one
*front = item; *front = item;
frameReplacedListener = core->consumer_listener; frame_replaced_listener = core->consumer_listener;
} else { } else {
core->queue.push_back(item); core->queue.push_back(item);
frameAvailableListener = core->consumer_listener; frame_available_listener = core->consumer_listener;
} }
} }
@ -592,10 +581,10 @@ Status BufferQueueProducer::QueueBuffer(s32 slot, const QueueBufferInput& input,
callback_condition.wait(callback_mutex); callback_condition.wait(callback_mutex);
} }
if (frameAvailableListener != nullptr) { if (frame_available_listener != nullptr) {
frameAvailableListener->OnFrameAvailable(item); frame_available_listener->OnFrameAvailable(item);
} else if (frameReplacedListener != nullptr) { } else if (frame_replaced_listener != nullptr) {
frameReplacedListener->OnFrameReplaced(item); frame_replaced_listener->OnFrameReplaced(item);
} }
++current_callback_ticket; ++current_callback_ticket;
@ -669,13 +658,6 @@ Status BufferQueueProducer::Query(NativeWindow what, s32* out_value) {
case NativeWindow::ConsumerUsageBits: case NativeWindow::ConsumerUsageBits:
value = core->consumer_usage_bit; value = core->consumer_usage_bit;
break; break;
case NativeWindow::BufferAge:
if (core->buffer_age > INT32_MAX) {
value = 0;
} else {
value = static_cast<u32>(core->buffer_age);
}
break;
default: default:
UNREACHABLE(); UNREACHABLE();
return Status::BadValue; return Status::BadValue;
@ -737,7 +719,6 @@ Status BufferQueueProducer::Connect(const std::shared_ptr<IProducerListener>& li
core->buffer_has_been_queued = false; core->buffer_has_been_queued = false;
core->dequeue_buffer_cannot_block = core->dequeue_buffer_cannot_block =
core->consumer_controlled_by_app && producer_controlled_by_app; core->consumer_controlled_by_app && producer_controlled_by_app;
core->allow_allocation = true;
return status; return status;
} }
@ -770,7 +751,7 @@ Status BufferQueueProducer::Disconnect(NativeWindowApi api) {
core->SignalDequeueCondition(); core->SignalDequeueCondition();
buffer_wait_event->GetWritableEvent().Signal(); buffer_wait_event->GetWritableEvent().Signal();
listener = core->consumer_listener; listener = core->consumer_listener;
} else if (core->connected_api != NativeWindowApi::NoConnectedApi) { } else {
LOG_ERROR(Service_NVFlinger, "still connected to another api (cur = {} req = {})", LOG_ERROR(Service_NVFlinger, "still connected to another api (cur = {} req = {})",
core->connected_api, api); core->connected_api, api);
status = Status::BadValue; status = Status::BadValue;

View file

@ -66,7 +66,7 @@ public:
private: private:
BufferQueueProducer(const BufferQueueProducer&) = delete; BufferQueueProducer(const BufferQueueProducer&) = delete;
Status WaitForFreeSlotThenRelock(bool async, s32* found, Status* returnFlags) const; Status WaitForFreeSlotThenRelock(bool async, s32* found, Status* return_flags) const;
Kernel::KEvent* buffer_wait_event{}; Kernel::KEvent* buffer_wait_event{};
Service::KernelHelpers::ServiceContext& service_context; Service::KernelHelpers::ServiceContext& service_context;

View file

@ -36,38 +36,41 @@ void ConsumerBase::FreeBufferLocked(s32 slot_index) {
} }
void ConsumerBase::OnFrameAvailable(const BufferItem& item) { void ConsumerBase::OnFrameAvailable(const BufferItem& item) {
std::scoped_lock lock(mutex);
LOG_DEBUG(Service_NVFlinger, "called"); LOG_DEBUG(Service_NVFlinger, "called");
} }
void ConsumerBase::OnFrameReplaced(const BufferItem& item) { void ConsumerBase::OnFrameReplaced(const BufferItem& item) {
std::scoped_lock lock(mutex);
LOG_DEBUG(Service_NVFlinger, "called"); LOG_DEBUG(Service_NVFlinger, "called");
} }
void ConsumerBase::OnBuffersReleased() { void ConsumerBase::OnBuffersReleased() {
std::scoped_lock lock(mutex); std::scoped_lock lock(mutex);
LOG_DEBUG(Service_NVFlinger, "called"); LOG_DEBUG(Service_NVFlinger, "called");
if (is_abandoned) {
// Nothing to do if we're already abandoned.
return;
}
u64 mask = 0;
consumer->GetReleasedBuffers(&mask);
for (int i = 0; i < BufferQueueDefs::NUM_BUFFER_SLOTS; i++) {
if (mask & (1ULL << i)) {
FreeBufferLocked(i);
}
}
} }
void ConsumerBase::OnSidebandStreamChanged() {} void ConsumerBase::OnSidebandStreamChanged() {}
Status ConsumerBase::AcquireBufferLocked(BufferItem* item, std::chrono::nanoseconds present_when, Status ConsumerBase::AcquireBufferLocked(BufferItem* item, std::chrono::nanoseconds present_when) {
u64 max_frame_number) { Status err = consumer->AcquireBuffer(item, present_when);
if (is_abandoned) {
LOG_ERROR(Service_NVFlinger, "consumer is abandoned!");
return Status::NoInit;
}
Status err = consumer->AcquireBuffer(item, present_when, max_frame_number);
if (err != Status::NoError) { if (err != Status::NoError) {
return err; return err;
} }
if (item->graphic_buffer != nullptr) { if (item->graphic_buffer != nullptr) {
if (slots[item->slot].graphic_buffer != nullptr) {
FreeBufferLocked(item->slot);
}
slots[item->slot].graphic_buffer = item->graphic_buffer; slots[item->slot].graphic_buffer = item->graphic_buffer;
} }

View file

@ -35,8 +35,7 @@ protected:
virtual void OnSidebandStreamChanged() override; virtual void OnSidebandStreamChanged() override;
void FreeBufferLocked(s32 slot_index); void FreeBufferLocked(s32 slot_index);
Status AcquireBufferLocked(BufferItem* item, std::chrono::nanoseconds present_when, Status AcquireBufferLocked(BufferItem* item, std::chrono::nanoseconds present_when);
u64 max_frame_number = 0);
Status ReleaseBufferLocked(s32 slot, const std::shared_ptr<GraphicBuffer> graphic_buffer); Status ReleaseBufferLocked(s32 slot, const std::shared_ptr<GraphicBuffer> graphic_buffer);
bool StillTracking(s32 slot, const std::shared_ptr<GraphicBuffer> graphic_buffer) const; bool StillTracking(s32 slot, const std::shared_ptr<GraphicBuffer> graphic_buffer) const;
Status AddReleaseFenceLocked(s32 slot, const std::shared_ptr<GraphicBuffer> graphic_buffer, Status AddReleaseFenceLocked(s32 slot, const std::shared_ptr<GraphicBuffer> graphic_buffer,

View file

@ -104,7 +104,7 @@ void NVFlinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) {
std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) { std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) {
const auto lock_guard = Lock(); const auto lock_guard = Lock();
LOG_DEBUG(Service, "Opening \"{}\" display", name); LOG_DEBUG(Service_NVFlinger, "Opening \"{}\" display", name);
const auto itr = const auto itr =
std::find_if(displays.begin(), displays.end(), std::find_if(displays.begin(), displays.end(),
@ -219,7 +219,7 @@ VI::Layer* NVFlinger::FindOrCreateLayer(u64 display_id, u64 layer_id) {
auto* layer = display->FindLayer(layer_id); auto* layer = display->FindLayer(layer_id);
if (layer == nullptr) { if (layer == nullptr) {
LOG_DEBUG(Service, "Layer at id {} not found. Trying to create it.", layer_id); LOG_DEBUG(Service_NVFlinger, "Layer at id {} not found. Trying to create it.", layer_id);
CreateLayerAtId(*display, layer_id); CreateLayerAtId(*display, layer_id);
return display->FindLayer(layer_id); return display->FindLayer(layer_id);
} }

View file

@ -39,21 +39,9 @@ GPUVAddr MemoryManager::UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std
return gpu_addr; return gpu_addr;
} }
void MemoryManager::UnmapSubmappedRanges(GPUVAddr gpu_addr, std::size_t size) {
const auto submapped_ranges = GetSubmappedRange(gpu_addr, size);
for (const auto& [map_addr, map_size] : submapped_ranges) {
// Flush and invalidate through the GPU interface, to be asynchronous if possible.
const std::optional<VAddr> cpu_vaddr = GpuToCpuAddress(map_addr);
if (!cpu_vaddr) {
continue;
}
rasterizer->UnmapMemory(*cpu_vaddr, map_size);
}
}
GPUVAddr MemoryManager::Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size) { GPUVAddr MemoryManager::Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size) {
// Unmap any pre-existing rasterizer memory in this range // Mark any pre-existing rasterizer memory in this range as remapped
UnmapSubmappedRanges(gpu_addr, size); rasterizer->ModifyGPUMemory(gpu_addr, size);
const auto it = std::ranges::lower_bound(map_ranges, gpu_addr, {}, &MapRange::first); const auto it = std::ranges::lower_bound(map_ranges, gpu_addr, {}, &MapRange::first);
if (it != map_ranges.end() && it->first == gpu_addr) { if (it != map_ranges.end() && it->first == gpu_addr) {
@ -85,8 +73,16 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
} else { } else {
UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr); UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr);
} }
const auto submapped_ranges = GetSubmappedRange(gpu_addr, size);
for (const auto& [map_addr, map_size] : submapped_ranges) {
// Flush and invalidate through the GPU interface, to be asynchronous if possible.
const std::optional<VAddr> cpu_addr = GpuToCpuAddress(map_addr);
ASSERT(cpu_addr);
rasterizer->UnmapMemory(*cpu_addr, map_size);
}
UnmapSubmappedRanges(gpu_addr, size);
UpdateRange(gpu_addr, PageEntry::State::Unmapped, size); UpdateRange(gpu_addr, PageEntry::State::Unmapped, size);
} }

View file

@ -149,7 +149,6 @@ private:
[[nodiscard]] PageEntry GetPageEntry(GPUVAddr gpu_addr) const; [[nodiscard]] PageEntry GetPageEntry(GPUVAddr gpu_addr) const;
void SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size = page_size); void SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size = page_size);
GPUVAddr UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size); GPUVAddr UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size);
void UnmapSubmappedRanges(GPUVAddr gpu_addr, std::size_t size);
[[nodiscard]] std::optional<GPUVAddr> FindFreeRange(std::size_t size, std::size_t align, [[nodiscard]] std::optional<GPUVAddr> FindFreeRange(std::size_t size, std::size_t align,
bool start_32bit_address = false) const; bool start_32bit_address = false) const;

View file

@ -454,13 +454,18 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick; return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick;
}); });
for (const ImageId image_id : images) { for (const ImageId image_id : images) {
DownloadImage(image_id);
}
}
template <class P>
void TextureCache<P>::DownloadImage(ImageId image_id) {
Image& image = slot_images[image_id]; Image& image = slot_images[image_id];
auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes);
const auto copies = FullDownloadCopies(image.info); const auto copies = FullDownloadCopies(image.info);
image.DownloadMemory(map, copies); image.DownloadMemory(map, copies);
runtime.Finish(); runtime.Finish();
SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
}
} }
template <class P> template <class P>
@ -1058,7 +1063,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
for (const ImageId overlap_id : ignore_textures) { for (const ImageId overlap_id : ignore_textures) {
Image& overlap = slot_images[overlap_id]; Image& overlap = slot_images[overlap_id];
if (True(overlap.flags & ImageFlagBits::GpuModified)) { if (True(overlap.flags & ImageFlagBits::GpuModified)) {
UNIMPLEMENTED(); DownloadImage(overlap_id);
} }
if (True(overlap.flags & ImageFlagBits::Tracked)) { if (True(overlap.flags & ImageFlagBits::Tracked)) {
UntrackImage(overlap, overlap_id); UntrackImage(overlap, overlap_id);

View file

@ -139,6 +139,9 @@ public:
/// Download contents of host images to guest memory in a region /// Download contents of host images to guest memory in a region
void DownloadMemory(VAddr cpu_addr, size_t size); void DownloadMemory(VAddr cpu_addr, size_t size);
/// Download contents of host images to guest memory
void DownloadImage(ImageId image_id);
/// Remove images in a region /// Remove images in a region
void UnmapMemory(VAddr cpu_addr, size_t size); void UnmapMemory(VAddr cpu_addr, size_t size);

View file

@ -52,6 +52,11 @@
<string>Unsafe</string> <string>Unsafe</string>
</property> </property>
</item> </item>
<item>
<property name="text">
<string>Paranoid (disables most optimizations)</string>
</property>
</item>
</widget> </widget>
</item> </item>
</layout> </layout>

View file

@ -342,12 +342,6 @@ fps_cap =
# null: No audio output # null: No audio output
output_engine = output_engine =
# Whether or not to enable the audio-stretching post-processing effect.
# This effect adjusts audio speed to match emulation speed and helps prevent audio stutter,
# at the cost of increasing audio latency.
# 0: No, 1 (default): Yes
enable_audio_stretching =
# Which audio device to use. # Which audio device to use.
# auto (default): Auto-select # auto (default): Auto-select
output_device = output_device =