early-access version 1786
This commit is contained in:
parent
53c119c6c4
commit
0155a19876
27 changed files with 473 additions and 216 deletions
|
@ -1,7 +1,7 @@
|
||||||
yuzu emulator early access
|
yuzu emulator early access
|
||||||
=============
|
=============
|
||||||
|
|
||||||
This is the source code for early-access 1785.
|
This is the source code for early-access 1786.
|
||||||
|
|
||||||
## Legal Notice
|
## Legal Notice
|
||||||
|
|
||||||
|
|
|
@ -55,9 +55,11 @@ void LogSettings() {
|
||||||
log_setting("Renderer_UseAsynchronousGpuEmulation",
|
log_setting("Renderer_UseAsynchronousGpuEmulation",
|
||||||
values.use_asynchronous_gpu_emulation.GetValue());
|
values.use_asynchronous_gpu_emulation.GetValue());
|
||||||
log_setting("Renderer_UseNvdecEmulation", values.use_nvdec_emulation.GetValue());
|
log_setting("Renderer_UseNvdecEmulation", values.use_nvdec_emulation.GetValue());
|
||||||
|
log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue());
|
||||||
log_setting("Renderer_UseVsync", values.use_vsync.GetValue());
|
log_setting("Renderer_UseVsync", values.use_vsync.GetValue());
|
||||||
log_setting("Renderer_UseAssemblyShaders", values.use_assembly_shaders.GetValue());
|
log_setting("Renderer_UseAssemblyShaders", values.use_assembly_shaders.GetValue());
|
||||||
log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue());
|
log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue());
|
||||||
|
log_setting("Renderer_UseGarbageCollection", values.use_caches_gc.GetValue());
|
||||||
log_setting("Renderer_AnisotropicFilteringLevel", values.max_anisotropy.GetValue());
|
log_setting("Renderer_AnisotropicFilteringLevel", values.max_anisotropy.GetValue());
|
||||||
log_setting("Audio_OutputEngine", values.sink_id);
|
log_setting("Audio_OutputEngine", values.sink_id);
|
||||||
log_setting("Audio_EnableAudioStretching", values.enable_audio_stretching.GetValue());
|
log_setting("Audio_EnableAudioStretching", values.enable_audio_stretching.GetValue());
|
||||||
|
@ -135,10 +137,12 @@ void RestoreGlobalState(bool is_powered_on) {
|
||||||
values.gpu_accuracy.SetGlobal(true);
|
values.gpu_accuracy.SetGlobal(true);
|
||||||
values.use_asynchronous_gpu_emulation.SetGlobal(true);
|
values.use_asynchronous_gpu_emulation.SetGlobal(true);
|
||||||
values.use_nvdec_emulation.SetGlobal(true);
|
values.use_nvdec_emulation.SetGlobal(true);
|
||||||
|
values.accelerate_astc.SetGlobal(true);
|
||||||
values.use_vsync.SetGlobal(true);
|
values.use_vsync.SetGlobal(true);
|
||||||
values.use_assembly_shaders.SetGlobal(true);
|
values.use_assembly_shaders.SetGlobal(true);
|
||||||
values.use_asynchronous_shaders.SetGlobal(true);
|
values.use_asynchronous_shaders.SetGlobal(true);
|
||||||
values.use_fast_gpu_time.SetGlobal(true);
|
values.use_fast_gpu_time.SetGlobal(true);
|
||||||
|
values.use_caches_gc.SetGlobal(true);
|
||||||
values.bg_red.SetGlobal(true);
|
values.bg_red.SetGlobal(true);
|
||||||
values.bg_green.SetGlobal(true);
|
values.bg_green.SetGlobal(true);
|
||||||
values.bg_blue.SetGlobal(true);
|
values.bg_blue.SetGlobal(true);
|
||||||
|
|
|
@ -147,10 +147,12 @@ struct Values {
|
||||||
Setting<GPUAccuracy> gpu_accuracy;
|
Setting<GPUAccuracy> gpu_accuracy;
|
||||||
Setting<bool> use_asynchronous_gpu_emulation;
|
Setting<bool> use_asynchronous_gpu_emulation;
|
||||||
Setting<bool> use_nvdec_emulation;
|
Setting<bool> use_nvdec_emulation;
|
||||||
|
Setting<bool> accelerate_astc;
|
||||||
Setting<bool> use_vsync;
|
Setting<bool> use_vsync;
|
||||||
Setting<bool> use_assembly_shaders;
|
Setting<bool> use_assembly_shaders;
|
||||||
Setting<bool> use_asynchronous_shaders;
|
Setting<bool> use_asynchronous_shaders;
|
||||||
Setting<bool> use_fast_gpu_time;
|
Setting<bool> use_fast_gpu_time;
|
||||||
|
Setting<bool> use_caches_gc;
|
||||||
|
|
||||||
Setting<float> bg_red;
|
Setting<float> bg_red;
|
||||||
Setting<float> bg_green;
|
Setting<float> bg_green;
|
||||||
|
|
|
@ -230,6 +230,7 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader,
|
||||||
Settings::values.use_asynchronous_gpu_emulation.GetValue());
|
Settings::values.use_asynchronous_gpu_emulation.GetValue());
|
||||||
AddField(field_type, "Renderer_UseNvdecEmulation",
|
AddField(field_type, "Renderer_UseNvdecEmulation",
|
||||||
Settings::values.use_nvdec_emulation.GetValue());
|
Settings::values.use_nvdec_emulation.GetValue());
|
||||||
|
AddField(field_type, "Renderer_AccelerateASTC", Settings::values.accelerate_astc.GetValue());
|
||||||
AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync.GetValue());
|
AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync.GetValue());
|
||||||
AddField(field_type, "Renderer_UseAssemblyShaders",
|
AddField(field_type, "Renderer_UseAssemblyShaders",
|
||||||
Settings::values.use_assembly_shaders.GetValue());
|
Settings::values.use_assembly_shaders.GetValue());
|
||||||
|
|
|
@ -237,6 +237,7 @@ add_library(video_core STATIC
|
||||||
texture_cache/util.cpp
|
texture_cache/util.cpp
|
||||||
texture_cache/util.h
|
texture_cache/util.h
|
||||||
textures/astc.h
|
textures/astc.h
|
||||||
|
textures/astc.cpp
|
||||||
textures/decoders.cpp
|
textures/decoders.cpp
|
||||||
textures/decoders.h
|
textures/decoders.h
|
||||||
textures/texture.cpp
|
textures/texture.cpp
|
||||||
|
|
|
@ -256,6 +256,16 @@ public:
|
||||||
stream_score += score;
|
stream_score += score;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Sets the new frame tick
|
||||||
|
void SetFrameTick(u64 new_frame_tick) noexcept {
|
||||||
|
frame_tick = new_frame_tick;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the new frame tick
|
||||||
|
[[nodiscard]] u64 FrameTick() const noexcept {
|
||||||
|
return frame_tick;
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns the likeliness of this being a stream buffer
|
/// Returns the likeliness of this being a stream buffer
|
||||||
[[nodiscard]] int StreamScore() const noexcept {
|
[[nodiscard]] int StreamScore() const noexcept {
|
||||||
return stream_score;
|
return stream_score;
|
||||||
|
@ -586,6 +596,7 @@ private:
|
||||||
RasterizerInterface* rasterizer = nullptr;
|
RasterizerInterface* rasterizer = nullptr;
|
||||||
VAddr cpu_addr = 0;
|
VAddr cpu_addr = 0;
|
||||||
Words words;
|
Words words;
|
||||||
|
u64 frame_tick = 0;
|
||||||
BufferFlagBits flags{};
|
BufferFlagBits flags{};
|
||||||
int stream_score = 0;
|
int stream_score = 0;
|
||||||
};
|
};
|
||||||
|
|
|
@ -65,6 +65,9 @@ class BufferCache {
|
||||||
|
|
||||||
static constexpr BufferId NULL_BUFFER_ID{0};
|
static constexpr BufferId NULL_BUFFER_ID{0};
|
||||||
|
|
||||||
|
static constexpr u64 expected_memory = 512ULL * 1024ULL * 1024ULL;
|
||||||
|
static constexpr u64 critical_memory = 1024ULL * 1024ULL * 1024ULL;
|
||||||
|
|
||||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||||
|
|
||||||
using Runtime = typename P::Runtime;
|
using Runtime = typename P::Runtime;
|
||||||
|
@ -243,6 +246,8 @@ private:
|
||||||
template <bool insert>
|
template <bool insert>
|
||||||
void ChangeRegister(BufferId buffer_id);
|
void ChangeRegister(BufferId buffer_id);
|
||||||
|
|
||||||
|
void TouchBuffer(Buffer& buffer) const noexcept;
|
||||||
|
|
||||||
bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size);
|
bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size);
|
||||||
|
|
||||||
bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size);
|
bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size);
|
||||||
|
@ -255,6 +260,10 @@ private:
|
||||||
|
|
||||||
void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies);
|
void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies);
|
||||||
|
|
||||||
|
void DownloadBufferMemory(Buffer& buffer_id);
|
||||||
|
|
||||||
|
void DownloadBufferMemory(Buffer& buffer_id, VAddr cpu_addr, u64 size);
|
||||||
|
|
||||||
void DeleteBuffer(BufferId buffer_id);
|
void DeleteBuffer(BufferId buffer_id);
|
||||||
|
|
||||||
void ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id);
|
void ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id);
|
||||||
|
@ -319,6 +328,10 @@ private:
|
||||||
size_t immediate_buffer_capacity = 0;
|
size_t immediate_buffer_capacity = 0;
|
||||||
std::unique_ptr<u8[]> immediate_buffer_alloc;
|
std::unique_ptr<u8[]> immediate_buffer_alloc;
|
||||||
|
|
||||||
|
typename SlotVector<Buffer>::Iterator deletion_iterator;
|
||||||
|
u64 frame_tick = 0;
|
||||||
|
u64 total_used_memory = 0;
|
||||||
|
|
||||||
std::array<BufferId, ((1ULL << 39) >> PAGE_BITS)> page_table;
|
std::array<BufferId, ((1ULL << 39) >> PAGE_BITS)> page_table;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -332,10 +345,16 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
|
||||||
gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} {
|
gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} {
|
||||||
// Ensure the first slot is used for the null buffer
|
// Ensure the first slot is used for the null buffer
|
||||||
void(slot_buffers.insert(runtime, NullBufferParams{}));
|
void(slot_buffers.insert(runtime, NullBufferParams{}));
|
||||||
|
deletion_iterator = slot_buffers.end();
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
void BufferCache<P>::TickFrame() {
|
void BufferCache<P>::TickFrame() {
|
||||||
|
const bool enabled_gc = Settings::values.use_caches_gc.GetValue();
|
||||||
|
SCOPE_EXIT({
|
||||||
|
++frame_tick;
|
||||||
|
delayed_destruction_ring.Tick();
|
||||||
|
});
|
||||||
// Calculate hits and shots and move hit bits to the right
|
// Calculate hits and shots and move hit bits to the right
|
||||||
const u32 hits = std::reduce(uniform_cache_hits.begin(), uniform_cache_hits.end());
|
const u32 hits = std::reduce(uniform_cache_hits.begin(), uniform_cache_hits.end());
|
||||||
const u32 shots = std::reduce(uniform_cache_shots.begin(), uniform_cache_shots.end());
|
const u32 shots = std::reduce(uniform_cache_shots.begin(), uniform_cache_shots.end());
|
||||||
|
@ -349,7 +368,27 @@ void BufferCache<P>::TickFrame() {
|
||||||
const bool skip_preferred = hits * 256 < shots * 251;
|
const bool skip_preferred = hits * 256 < shots * 251;
|
||||||
uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0;
|
uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0;
|
||||||
|
|
||||||
delayed_destruction_ring.Tick();
|
const bool activate_gc = enabled_gc && total_used_memory >= expected_memory;
|
||||||
|
if (!activate_gc) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const bool aggressive_gc = total_used_memory >= critical_memory;
|
||||||
|
const u64 ticks_to_destroy = aggressive_gc ? 60 : 120;
|
||||||
|
int num_iterations = aggressive_gc ? 64 : 32;
|
||||||
|
for (; num_iterations > 0; --num_iterations) {
|
||||||
|
if (deletion_iterator == slot_buffers.end()) {
|
||||||
|
deletion_iterator = slot_buffers.begin();
|
||||||
|
}
|
||||||
|
++deletion_iterator;
|
||||||
|
if (deletion_iterator == slot_buffers.end()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
const auto [buffer_id, buffer] = *deletion_iterator;
|
||||||
|
if (buffer->FrameTick() + ticks_to_destroy < frame_tick) {
|
||||||
|
DownloadBufferMemory(*buffer);
|
||||||
|
DeleteBuffer(buffer_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
|
@ -371,50 +410,8 @@ void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) {
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) {
|
void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) {
|
||||||
ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) {
|
ForEachBufferInRange(cpu_addr, size,
|
||||||
boost::container::small_vector<BufferCopy, 1> copies;
|
[&](BufferId, Buffer& buffer) { DownloadBufferMemory(buffer); });
|
||||||
u64 total_size_bytes = 0;
|
|
||||||
u64 largest_copy = 0;
|
|
||||||
buffer.ForEachDownloadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) {
|
|
||||||
copies.push_back(BufferCopy{
|
|
||||||
.src_offset = range_offset,
|
|
||||||
.dst_offset = total_size_bytes,
|
|
||||||
.size = range_size,
|
|
||||||
});
|
|
||||||
total_size_bytes += range_size;
|
|
||||||
largest_copy = std::max(largest_copy, range_size);
|
|
||||||
});
|
|
||||||
if (total_size_bytes == 0) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
MICROPROFILE_SCOPE(GPU_DownloadMemory);
|
|
||||||
|
|
||||||
if constexpr (USE_MEMORY_MAPS) {
|
|
||||||
auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
|
|
||||||
const u8* const mapped_memory = download_staging.mapped_span.data();
|
|
||||||
const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size());
|
|
||||||
for (BufferCopy& copy : copies) {
|
|
||||||
// Modify copies to have the staging offset in mind
|
|
||||||
copy.dst_offset += download_staging.offset;
|
|
||||||
}
|
|
||||||
runtime.CopyBuffer(download_staging.buffer, buffer, copies_span);
|
|
||||||
runtime.Finish();
|
|
||||||
for (const BufferCopy& copy : copies) {
|
|
||||||
const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
|
|
||||||
// Undo the modified offset
|
|
||||||
const u64 dst_offset = copy.dst_offset - download_staging.offset;
|
|
||||||
const u8* copy_mapped_memory = mapped_memory + dst_offset;
|
|
||||||
cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
|
|
||||||
for (const BufferCopy& copy : copies) {
|
|
||||||
buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size));
|
|
||||||
const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
|
|
||||||
cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
|
@ -640,6 +637,7 @@ bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
|
||||||
template <class P>
|
template <class P>
|
||||||
void BufferCache<P>::BindHostIndexBuffer() {
|
void BufferCache<P>::BindHostIndexBuffer() {
|
||||||
Buffer& buffer = slot_buffers[index_buffer.buffer_id];
|
Buffer& buffer = slot_buffers[index_buffer.buffer_id];
|
||||||
|
TouchBuffer(buffer);
|
||||||
const u32 offset = buffer.Offset(index_buffer.cpu_addr);
|
const u32 offset = buffer.Offset(index_buffer.cpu_addr);
|
||||||
const u32 size = index_buffer.size;
|
const u32 size = index_buffer.size;
|
||||||
SynchronizeBuffer(buffer, index_buffer.cpu_addr, size);
|
SynchronizeBuffer(buffer, index_buffer.cpu_addr, size);
|
||||||
|
@ -658,6 +656,7 @@ void BufferCache<P>::BindHostVertexBuffers() {
|
||||||
for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
|
for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
|
||||||
const Binding& binding = vertex_buffers[index];
|
const Binding& binding = vertex_buffers[index];
|
||||||
Buffer& buffer = slot_buffers[binding.buffer_id];
|
Buffer& buffer = slot_buffers[binding.buffer_id];
|
||||||
|
TouchBuffer(buffer);
|
||||||
SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
|
SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
|
||||||
if (!flags[Dirty::VertexBuffer0 + index]) {
|
if (!flags[Dirty::VertexBuffer0 + index]) {
|
||||||
continue;
|
continue;
|
||||||
|
@ -693,6 +692,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
|
||||||
const VAddr cpu_addr = binding.cpu_addr;
|
const VAddr cpu_addr = binding.cpu_addr;
|
||||||
const u32 size = binding.size;
|
const u32 size = binding.size;
|
||||||
Buffer& buffer = slot_buffers[binding.buffer_id];
|
Buffer& buffer = slot_buffers[binding.buffer_id];
|
||||||
|
TouchBuffer(buffer);
|
||||||
const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID &&
|
const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID &&
|
||||||
size <= uniform_buffer_skip_cache_size &&
|
size <= uniform_buffer_skip_cache_size &&
|
||||||
!buffer.IsRegionGpuModified(cpu_addr, size);
|
!buffer.IsRegionGpuModified(cpu_addr, size);
|
||||||
|
@ -744,6 +744,7 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
|
||||||
ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) {
|
ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) {
|
||||||
const Binding& binding = storage_buffers[stage][index];
|
const Binding& binding = storage_buffers[stage][index];
|
||||||
Buffer& buffer = slot_buffers[binding.buffer_id];
|
Buffer& buffer = slot_buffers[binding.buffer_id];
|
||||||
|
TouchBuffer(buffer);
|
||||||
const u32 size = binding.size;
|
const u32 size = binding.size;
|
||||||
SynchronizeBuffer(buffer, binding.cpu_addr, size);
|
SynchronizeBuffer(buffer, binding.cpu_addr, size);
|
||||||
|
|
||||||
|
@ -766,6 +767,7 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() {
|
||||||
for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
|
for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
|
||||||
const Binding& binding = transform_feedback_buffers[index];
|
const Binding& binding = transform_feedback_buffers[index];
|
||||||
Buffer& buffer = slot_buffers[binding.buffer_id];
|
Buffer& buffer = slot_buffers[binding.buffer_id];
|
||||||
|
TouchBuffer(buffer);
|
||||||
const u32 size = binding.size;
|
const u32 size = binding.size;
|
||||||
SynchronizeBuffer(buffer, binding.cpu_addr, size);
|
SynchronizeBuffer(buffer, binding.cpu_addr, size);
|
||||||
|
|
||||||
|
@ -784,6 +786,7 @@ void BufferCache<P>::BindHostComputeUniformBuffers() {
|
||||||
ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) {
|
ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) {
|
||||||
const Binding& binding = compute_uniform_buffers[index];
|
const Binding& binding = compute_uniform_buffers[index];
|
||||||
Buffer& buffer = slot_buffers[binding.buffer_id];
|
Buffer& buffer = slot_buffers[binding.buffer_id];
|
||||||
|
TouchBuffer(buffer);
|
||||||
const u32 size = binding.size;
|
const u32 size = binding.size;
|
||||||
SynchronizeBuffer(buffer, binding.cpu_addr, size);
|
SynchronizeBuffer(buffer, binding.cpu_addr, size);
|
||||||
|
|
||||||
|
@ -803,6 +806,7 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {
|
||||||
ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
|
ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
|
||||||
const Binding& binding = compute_storage_buffers[index];
|
const Binding& binding = compute_storage_buffers[index];
|
||||||
Buffer& buffer = slot_buffers[binding.buffer_id];
|
Buffer& buffer = slot_buffers[binding.buffer_id];
|
||||||
|
TouchBuffer(buffer);
|
||||||
const u32 size = binding.size;
|
const u32 size = binding.size;
|
||||||
SynchronizeBuffer(buffer, binding.cpu_addr, size);
|
SynchronizeBuffer(buffer, binding.cpu_addr, size);
|
||||||
|
|
||||||
|
@ -1101,6 +1105,7 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) {
|
||||||
const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size);
|
const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size);
|
||||||
const u32 size = static_cast<u32>(overlap.end - overlap.begin);
|
const u32 size = static_cast<u32>(overlap.end - overlap.begin);
|
||||||
const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size);
|
const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size);
|
||||||
|
TouchBuffer(slot_buffers[new_buffer_id]);
|
||||||
for (const BufferId overlap_id : overlap.ids) {
|
for (const BufferId overlap_id : overlap.ids) {
|
||||||
JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
|
JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
|
||||||
}
|
}
|
||||||
|
@ -1122,8 +1127,14 @@ template <class P>
|
||||||
template <bool insert>
|
template <bool insert>
|
||||||
void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
|
void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
|
||||||
const Buffer& buffer = slot_buffers[buffer_id];
|
const Buffer& buffer = slot_buffers[buffer_id];
|
||||||
|
const auto size = buffer.SizeBytes();
|
||||||
|
if (insert) {
|
||||||
|
total_used_memory += Common::AlignUp(size, 1024);
|
||||||
|
} else {
|
||||||
|
total_used_memory -= Common::AlignUp(size, 1024);
|
||||||
|
}
|
||||||
const VAddr cpu_addr_begin = buffer.CpuAddr();
|
const VAddr cpu_addr_begin = buffer.CpuAddr();
|
||||||
const VAddr cpu_addr_end = cpu_addr_begin + buffer.SizeBytes();
|
const VAddr cpu_addr_end = cpu_addr_begin + size;
|
||||||
const u64 page_begin = cpu_addr_begin / PAGE_SIZE;
|
const u64 page_begin = cpu_addr_begin / PAGE_SIZE;
|
||||||
const u64 page_end = Common::DivCeil(cpu_addr_end, PAGE_SIZE);
|
const u64 page_end = Common::DivCeil(cpu_addr_end, PAGE_SIZE);
|
||||||
for (u64 page = page_begin; page != page_end; ++page) {
|
for (u64 page = page_begin; page != page_end; ++page) {
|
||||||
|
@ -1135,6 +1146,11 @@ void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class P>
|
||||||
|
void BufferCache<P>::TouchBuffer(Buffer& buffer) const noexcept {
|
||||||
|
buffer.SetFrameTick(frame_tick);
|
||||||
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) {
|
bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) {
|
||||||
if (buffer.CpuAddr() == 0) {
|
if (buffer.CpuAddr() == 0) {
|
||||||
|
@ -1211,6 +1227,57 @@ void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes,
|
||||||
runtime.CopyBuffer(buffer, upload_staging.buffer, copies);
|
runtime.CopyBuffer(buffer, upload_staging.buffer, copies);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class P>
|
||||||
|
void BufferCache<P>::DownloadBufferMemory(Buffer& buffer) {
|
||||||
|
DownloadBufferMemory(buffer, buffer.CpuAddr(), buffer.SizeBytes());
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class P>
|
||||||
|
void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 size) {
|
||||||
|
boost::container::small_vector<BufferCopy, 1> copies;
|
||||||
|
u64 total_size_bytes = 0;
|
||||||
|
u64 largest_copy = 0;
|
||||||
|
buffer.ForEachDownloadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) {
|
||||||
|
copies.push_back(BufferCopy{
|
||||||
|
.src_offset = range_offset,
|
||||||
|
.dst_offset = total_size_bytes,
|
||||||
|
.size = range_size,
|
||||||
|
});
|
||||||
|
total_size_bytes += range_size;
|
||||||
|
largest_copy = std::max(largest_copy, range_size);
|
||||||
|
});
|
||||||
|
if (total_size_bytes == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
MICROPROFILE_SCOPE(GPU_DownloadMemory);
|
||||||
|
|
||||||
|
if constexpr (USE_MEMORY_MAPS) {
|
||||||
|
auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
|
||||||
|
const u8* const mapped_memory = download_staging.mapped_span.data();
|
||||||
|
const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size());
|
||||||
|
for (BufferCopy& copy : copies) {
|
||||||
|
// Modify copies to have the staging offset in mind
|
||||||
|
copy.dst_offset += download_staging.offset;
|
||||||
|
}
|
||||||
|
runtime.CopyBuffer(download_staging.buffer, buffer, copies_span);
|
||||||
|
runtime.Finish();
|
||||||
|
for (const BufferCopy& copy : copies) {
|
||||||
|
const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
|
||||||
|
// Undo the modified offset
|
||||||
|
const u64 dst_offset = copy.dst_offset - download_staging.offset;
|
||||||
|
const u8* copy_mapped_memory = mapped_memory + dst_offset;
|
||||||
|
cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
|
||||||
|
for (const BufferCopy& copy : copies) {
|
||||||
|
buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size));
|
||||||
|
const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
|
||||||
|
cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
void BufferCache<P>::DeleteBuffer(BufferId buffer_id) {
|
void BufferCache<P>::DeleteBuffer(BufferId buffer_id) {
|
||||||
const auto scalar_replace = [buffer_id](Binding& binding) {
|
const auto scalar_replace = [buffer_id](Binding& binding) {
|
||||||
|
@ -1236,6 +1303,7 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id) {
|
||||||
|
|
||||||
Unregister(buffer_id);
|
Unregister(buffer_id);
|
||||||
delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id]));
|
delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id]));
|
||||||
|
slot_buffers.erase(buffer_id);
|
||||||
|
|
||||||
NotifyBufferDeletion();
|
NotifyBufferDeletion();
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,6 +9,8 @@
|
||||||
|
|
||||||
#include <glad/glad.h>
|
#include <glad/glad.h>
|
||||||
|
|
||||||
|
#include "common/settings.h"
|
||||||
|
|
||||||
#include "video_core/renderer_opengl/gl_device.h"
|
#include "video_core/renderer_opengl/gl_device.h"
|
||||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||||
#include "video_core/renderer_opengl/gl_state_tracker.h"
|
#include "video_core/renderer_opengl/gl_state_tracker.h"
|
||||||
|
@ -307,7 +309,9 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
|
||||||
|
|
||||||
[[nodiscard]] bool CanBeAccelerated(const TextureCacheRuntime& runtime,
|
[[nodiscard]] bool CanBeAccelerated(const TextureCacheRuntime& runtime,
|
||||||
const VideoCommon::ImageInfo& info) {
|
const VideoCommon::ImageInfo& info) {
|
||||||
return !runtime.HasNativeASTC() && IsPixelFormatASTC(info.format);
|
if (IsPixelFormatASTC(info.format)) {
|
||||||
|
return !runtime.HasNativeASTC() && Settings::values.accelerate_astc.GetValue();
|
||||||
|
}
|
||||||
// Disable other accelerated uploads for now as they don't implement swizzled uploads
|
// Disable other accelerated uploads for now as they don't implement swizzled uploads
|
||||||
return false;
|
return false;
|
||||||
switch (info.type) {
|
switch (info.type) {
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "common/bit_cast.h"
|
#include "common/bit_cast.h"
|
||||||
|
#include "common/settings.h"
|
||||||
|
|
||||||
#include "video_core/engines/fermi_2d.h"
|
#include "video_core/engines/fermi_2d.h"
|
||||||
#include "video_core/renderer_vulkan/blit_image.h"
|
#include "video_core/renderer_vulkan/blit_image.h"
|
||||||
|
@ -828,7 +829,11 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_
|
||||||
commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
|
commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
|
||||||
}
|
}
|
||||||
if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) {
|
if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) {
|
||||||
flags |= VideoCommon::ImageFlagBits::AcceleratedUpload;
|
if (Settings::values.accelerate_astc.GetValue()) {
|
||||||
|
flags |= VideoCommon::ImageFlagBits::AcceleratedUpload;
|
||||||
|
} else {
|
||||||
|
flags |= VideoCommon::ImageFlagBits::Converted;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (runtime.device.HasDebuggingToolAttached()) {
|
if (runtime.device.HasDebuggingToolAttached()) {
|
||||||
if (image) {
|
if (image) {
|
||||||
|
|
|
@ -113,6 +113,43 @@ void ImageBase::InsertView(const ImageViewInfo& view_info, ImageViewId image_vie
|
||||||
image_view_ids.push_back(image_view_id);
|
image_view_ids.push_back(image_view_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ImageBase::IsSafeDownload() const noexcept {
|
||||||
|
// Skip images that were not modified from the GPU
|
||||||
|
if (False(flags & ImageFlagBits::GpuModified)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Skip images that .are. modified from the CPU
|
||||||
|
// We don't want to write sensitive data from the guest
|
||||||
|
if (True(flags & ImageFlagBits::CpuModified)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (info.num_samples > 1) {
|
||||||
|
LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ImageBase::CheckBadOverlapState() {
|
||||||
|
if (False(flags & ImageFlagBits::BadOverlap)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!overlapping_images.empty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
flags &= ~ImageFlagBits::BadOverlap;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ImageBase::CheckAliasState() {
|
||||||
|
if (False(flags & ImageFlagBits::Alias)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!aliased_images.empty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
flags &= ~ImageFlagBits::Alias;
|
||||||
|
}
|
||||||
|
|
||||||
void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) {
|
void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) {
|
||||||
static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format;
|
static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format;
|
||||||
ASSERT(lhs.info.type == rhs.info.type);
|
ASSERT(lhs.info.type == rhs.info.type);
|
||||||
|
|
|
@ -25,6 +25,12 @@ enum class ImageFlagBits : u32 {
|
||||||
Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted
|
Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted
|
||||||
Registered = 1 << 6, ///< True when the image is registered
|
Registered = 1 << 6, ///< True when the image is registered
|
||||||
Picked = 1 << 7, ///< Temporary flag to mark the image as picked
|
Picked = 1 << 7, ///< Temporary flag to mark the image as picked
|
||||||
|
|
||||||
|
// Garbage Collection Flags
|
||||||
|
BadOverlap = 1 << 8, ///< This image overlaps other but doesn't fit, has higher
|
||||||
|
///< garbage collection priority
|
||||||
|
Alias = 1 << 9, ///< This image has aliases and has priority on garbage
|
||||||
|
///< collection
|
||||||
};
|
};
|
||||||
DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
|
DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
|
||||||
|
|
||||||
|
@ -44,11 +50,16 @@ struct ImageBase {
|
||||||
|
|
||||||
void InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id);
|
void InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id);
|
||||||
|
|
||||||
|
[[nodiscard]] bool IsSafeDownload() const noexcept;
|
||||||
|
|
||||||
[[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept {
|
[[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept {
|
||||||
const VAddr overlap_end = overlap_cpu_addr + overlap_size;
|
const VAddr overlap_end = overlap_cpu_addr + overlap_size;
|
||||||
return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end;
|
return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CheckBadOverlapState();
|
||||||
|
void CheckAliasState();
|
||||||
|
|
||||||
ImageInfo info;
|
ImageInfo info;
|
||||||
|
|
||||||
u32 guest_size_bytes = 0;
|
u32 guest_size_bytes = 0;
|
||||||
|
@ -72,6 +83,7 @@ struct ImageBase {
|
||||||
std::vector<SubresourceBase> slice_subresources;
|
std::vector<SubresourceBase> slice_subresources;
|
||||||
|
|
||||||
std::vector<AliasedImage> aliased_images;
|
std::vector<AliasedImage> aliased_images;
|
||||||
|
std::vector<ImageId> overlapping_images;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ImageAllocBase {
|
struct ImageAllocBase {
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
|
#include <bit>
|
||||||
#include <concepts>
|
#include <concepts>
|
||||||
#include <numeric>
|
#include <numeric>
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
|
@ -32,6 +33,60 @@ template <class T>
|
||||||
requires std::is_nothrow_move_assignable_v<T>&&
|
requires std::is_nothrow_move_assignable_v<T>&&
|
||||||
std::is_nothrow_move_constructible_v<T> class SlotVector {
|
std::is_nothrow_move_constructible_v<T> class SlotVector {
|
||||||
public:
|
public:
|
||||||
|
class Iterator {
|
||||||
|
friend SlotVector<T>;
|
||||||
|
|
||||||
|
public:
|
||||||
|
constexpr Iterator() = default;
|
||||||
|
|
||||||
|
Iterator& operator++() noexcept {
|
||||||
|
const u64* const bitset = slot_vector->stored_bitset.data();
|
||||||
|
const u32 size = static_cast<u32>(slot_vector->stored_bitset.size()) * 64;
|
||||||
|
if (id.index < size) {
|
||||||
|
do {
|
||||||
|
++id.index;
|
||||||
|
} while (id.index < size && !IsValid(bitset));
|
||||||
|
if (id.index == size) {
|
||||||
|
id.index = SlotId::INVALID_INDEX;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
Iterator operator++(int) noexcept {
|
||||||
|
const Iterator copy{*this};
|
||||||
|
++*this;
|
||||||
|
return copy;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool operator==(const Iterator& other) const noexcept {
|
||||||
|
return id.index == other.id.index;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool operator!=(const Iterator& other) const noexcept {
|
||||||
|
return id.index != other.id.index;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::pair<SlotId, T*> operator*() const noexcept {
|
||||||
|
return {id, std::addressof((*slot_vector)[id])};
|
||||||
|
}
|
||||||
|
|
||||||
|
T* operator->() const noexcept {
|
||||||
|
return std::addressof((*slot_vector)[id]);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
Iterator(SlotVector<T>* slot_vector_, SlotId id_) noexcept
|
||||||
|
: slot_vector{slot_vector_}, id{id_} {}
|
||||||
|
|
||||||
|
bool IsValid(const u64* bitset) noexcept {
|
||||||
|
return ((bitset[id.index / 64] >> (id.index % 64)) & 1) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
SlotVector<T>* slot_vector;
|
||||||
|
SlotId id;
|
||||||
|
};
|
||||||
|
|
||||||
~SlotVector() noexcept {
|
~SlotVector() noexcept {
|
||||||
size_t index = 0;
|
size_t index = 0;
|
||||||
for (u64 bits : stored_bitset) {
|
for (u64 bits : stored_bitset) {
|
||||||
|
@ -70,6 +125,20 @@ public:
|
||||||
ResetStorageBit(id.index);
|
ResetStorageBit(id.index);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] Iterator begin() noexcept {
|
||||||
|
const auto it = std::ranges::find_if(stored_bitset, [](u64 value) { return value != 0; });
|
||||||
|
if (it == stored_bitset.end()) {
|
||||||
|
return end();
|
||||||
|
}
|
||||||
|
const u32 word_index = static_cast<u32>(std::distance(it, stored_bitset.begin()));
|
||||||
|
const SlotId first_id{word_index * 64 + static_cast<u32>(std::countr_zero(*it))};
|
||||||
|
return Iterator(this, first_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] Iterator end() noexcept {
|
||||||
|
return Iterator(this, SlotId{SlotId::INVALID_INDEX});
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
struct NonTrivialDummy {
|
struct NonTrivialDummy {
|
||||||
NonTrivialDummy() noexcept {}
|
NonTrivialDummy() noexcept {}
|
||||||
|
@ -140,7 +209,6 @@ private:
|
||||||
|
|
||||||
Entry* values = nullptr;
|
Entry* values = nullptr;
|
||||||
size_t values_capacity = 0;
|
size_t values_capacity = 0;
|
||||||
size_t values_size = 0;
|
|
||||||
|
|
||||||
std::vector<u64> stored_bitset;
|
std::vector<u64> stored_bitset;
|
||||||
std::vector<u32> free_list;
|
std::vector<u32> free_list;
|
||||||
|
|
|
@ -22,6 +22,7 @@
|
||||||
#include "common/common_funcs.h"
|
#include "common/common_funcs.h"
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "common/logging/log.h"
|
#include "common/logging/log.h"
|
||||||
|
#include "common/settings.h"
|
||||||
#include "video_core/compatible_formats.h"
|
#include "video_core/compatible_formats.h"
|
||||||
#include "video_core/delayed_destruction_ring.h"
|
#include "video_core/delayed_destruction_ring.h"
|
||||||
#include "video_core/dirty_flags.h"
|
#include "video_core/dirty_flags.h"
|
||||||
|
@ -75,6 +76,9 @@ class TextureCache {
|
||||||
/// Sampler ID for bugged sampler ids
|
/// Sampler ID for bugged sampler ids
|
||||||
static constexpr SamplerId NULL_SAMPLER_ID{0};
|
static constexpr SamplerId NULL_SAMPLER_ID{0};
|
||||||
|
|
||||||
|
static constexpr u64 expected_memory = 1024ULL * 1024ULL * 1024ULL;
|
||||||
|
static constexpr u64 critical_memory = 2 * 1024ULL * 1024ULL * 1024ULL;
|
||||||
|
|
||||||
using Runtime = typename P::Runtime;
|
using Runtime = typename P::Runtime;
|
||||||
using Image = typename P::Image;
|
using Image = typename P::Image;
|
||||||
using ImageAlloc = typename P::ImageAlloc;
|
using ImageAlloc = typename P::ImageAlloc;
|
||||||
|
@ -333,6 +337,7 @@ private:
|
||||||
std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table;
|
std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table;
|
||||||
|
|
||||||
bool has_deleted_images = false;
|
bool has_deleted_images = false;
|
||||||
|
u64 total_used_memory = 0;
|
||||||
|
|
||||||
SlotVector<Image> slot_images;
|
SlotVector<Image> slot_images;
|
||||||
SlotVector<ImageView> slot_image_views;
|
SlotVector<ImageView> slot_image_views;
|
||||||
|
@ -353,6 +358,7 @@ private:
|
||||||
|
|
||||||
u64 modification_tick = 0;
|
u64 modification_tick = 0;
|
||||||
u64 frame_tick = 0;
|
u64 frame_tick = 0;
|
||||||
|
typename SlotVector<Image>::Iterator deletion_iterator;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
|
@ -373,11 +379,82 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
|
||||||
// This way the null resource becomes a compile time constant
|
// This way the null resource becomes a compile time constant
|
||||||
void(slot_image_views.insert(runtime, NullImageParams{}));
|
void(slot_image_views.insert(runtime, NullImageParams{}));
|
||||||
void(slot_samplers.insert(runtime, sampler_descriptor));
|
void(slot_samplers.insert(runtime, sampler_descriptor));
|
||||||
|
|
||||||
|
deletion_iterator = slot_images.begin();
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
void TextureCache<P>::TickFrame() {
|
void TextureCache<P>::TickFrame() {
|
||||||
// Tick sentenced resources in this order to ensure they are destroyed in the right order
|
const bool enabled_gc = Settings::values.use_caches_gc.GetValue();
|
||||||
|
if (!enabled_gc) {
|
||||||
|
// @Note(Blinkhawk): compile error with SCOPE_EXIT on msvc.
|
||||||
|
sentenced_images.Tick();
|
||||||
|
sentenced_framebuffers.Tick();
|
||||||
|
sentenced_image_view.Tick();
|
||||||
|
++frame_tick;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const bool high_priority_mode = total_used_memory >= expected_memory;
|
||||||
|
const bool aggressive_mode = total_used_memory >= critical_memory;
|
||||||
|
const u64 ticks_to_destroy = high_priority_mode ? 60 : 100;
|
||||||
|
int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64);
|
||||||
|
for (; num_iterations > 0; --num_iterations) {
|
||||||
|
if (deletion_iterator == slot_images.end()) {
|
||||||
|
deletion_iterator = slot_images.begin();
|
||||||
|
if (deletion_iterator == slot_images.end()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto [image_id, image_tmp] = *deletion_iterator;
|
||||||
|
Image* image = image_tmp; // fix clang error.
|
||||||
|
const bool is_alias = True(image->flags & ImageFlagBits::Alias);
|
||||||
|
const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap);
|
||||||
|
const bool must_download = image->IsSafeDownload();
|
||||||
|
bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download);
|
||||||
|
const u64 ticks_needed =
|
||||||
|
is_bad_overlap
|
||||||
|
? ticks_to_destroy >> 4
|
||||||
|
: ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy);
|
||||||
|
should_care |= aggressive_mode;
|
||||||
|
if (should_care && image->frame_tick + ticks_needed < frame_tick) {
|
||||||
|
if (is_bad_overlap) {
|
||||||
|
const bool overlap_check = std::ranges::all_of(
|
||||||
|
image->overlapping_images, [&, image](const ImageId& overlap_id) {
|
||||||
|
auto& overlap = slot_images[overlap_id];
|
||||||
|
return overlap.frame_tick >= image->frame_tick;
|
||||||
|
});
|
||||||
|
if (!overlap_check) {
|
||||||
|
++deletion_iterator;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!is_bad_overlap && must_download) {
|
||||||
|
const bool alias_check = std::ranges::none_of(
|
||||||
|
image->aliased_images, [&, image](const AliasedImage& alias) {
|
||||||
|
auto& alias_image = slot_images[alias.id];
|
||||||
|
return (alias_image.frame_tick < image->frame_tick) ||
|
||||||
|
(alias_image.modification_tick < image->modification_tick);
|
||||||
|
});
|
||||||
|
|
||||||
|
if (alias_check) {
|
||||||
|
auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes);
|
||||||
|
const auto copies = FullDownloadCopies(image->info);
|
||||||
|
image->DownloadMemory(map, copies);
|
||||||
|
runtime.Finish();
|
||||||
|
SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (True(image->flags & ImageFlagBits::Tracked)) {
|
||||||
|
UntrackImage(*image);
|
||||||
|
}
|
||||||
|
UnregisterImage(image_id);
|
||||||
|
DeleteImage(image_id);
|
||||||
|
if (is_bad_overlap) {
|
||||||
|
num_iterations++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
++deletion_iterator;
|
||||||
|
}
|
||||||
sentenced_images.Tick();
|
sentenced_images.Tick();
|
||||||
sentenced_framebuffers.Tick();
|
sentenced_framebuffers.Tick();
|
||||||
sentenced_image_view.Tick();
|
sentenced_image_view.Tick();
|
||||||
|
@ -568,17 +645,7 @@ template <class P>
|
||||||
void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
|
void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
|
||||||
std::vector<ImageId> images;
|
std::vector<ImageId> images;
|
||||||
ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) {
|
ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) {
|
||||||
// Skip images that were not modified from the GPU
|
if (!image.IsSafeDownload()) {
|
||||||
if (False(image.flags & ImageFlagBits::GpuModified)) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
// Skip images that .are. modified from the CPU
|
|
||||||
// We don't want to write sensitive data from the guest
|
|
||||||
if (True(image.flags & ImageFlagBits::CpuModified)) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (image.info.num_samples > 1) {
|
|
||||||
LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented");
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
image.flags &= ~ImageFlagBits::GpuModified;
|
image.flags &= ~ImageFlagBits::GpuModified;
|
||||||
|
@ -967,6 +1034,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
||||||
std::vector<ImageId> overlap_ids;
|
std::vector<ImageId> overlap_ids;
|
||||||
std::vector<ImageId> left_aliased_ids;
|
std::vector<ImageId> left_aliased_ids;
|
||||||
std::vector<ImageId> right_aliased_ids;
|
std::vector<ImageId> right_aliased_ids;
|
||||||
|
std::vector<ImageId> bad_overlap_ids;
|
||||||
ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) {
|
ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) {
|
||||||
if (info.type != overlap.info.type) {
|
if (info.type != overlap.info.type) {
|
||||||
return;
|
return;
|
||||||
|
@ -992,9 +1060,14 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
||||||
const ImageBase new_image_base(new_info, gpu_addr, cpu_addr);
|
const ImageBase new_image_base(new_info, gpu_addr, cpu_addr);
|
||||||
if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) {
|
if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) {
|
||||||
left_aliased_ids.push_back(overlap_id);
|
left_aliased_ids.push_back(overlap_id);
|
||||||
|
overlap.flags |= ImageFlagBits::Alias;
|
||||||
} else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options,
|
} else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options,
|
||||||
broken_views, native_bgr)) {
|
broken_views, native_bgr)) {
|
||||||
right_aliased_ids.push_back(overlap_id);
|
right_aliased_ids.push_back(overlap_id);
|
||||||
|
overlap.flags |= ImageFlagBits::Alias;
|
||||||
|
} else {
|
||||||
|
bad_overlap_ids.push_back(overlap_id);
|
||||||
|
overlap.flags |= ImageFlagBits::BadOverlap;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
|
const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
|
||||||
|
@ -1022,10 +1095,18 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
||||||
for (const ImageId aliased_id : right_aliased_ids) {
|
for (const ImageId aliased_id : right_aliased_ids) {
|
||||||
ImageBase& aliased = slot_images[aliased_id];
|
ImageBase& aliased = slot_images[aliased_id];
|
||||||
AddImageAlias(new_image_base, aliased, new_image_id, aliased_id);
|
AddImageAlias(new_image_base, aliased, new_image_id, aliased_id);
|
||||||
|
new_image.flags |= ImageFlagBits::Alias;
|
||||||
}
|
}
|
||||||
for (const ImageId aliased_id : left_aliased_ids) {
|
for (const ImageId aliased_id : left_aliased_ids) {
|
||||||
ImageBase& aliased = slot_images[aliased_id];
|
ImageBase& aliased = slot_images[aliased_id];
|
||||||
AddImageAlias(aliased, new_image_base, aliased_id, new_image_id);
|
AddImageAlias(aliased, new_image_base, aliased_id, new_image_id);
|
||||||
|
new_image.flags |= ImageFlagBits::Alias;
|
||||||
|
}
|
||||||
|
for (const ImageId aliased_id : bad_overlap_ids) {
|
||||||
|
ImageBase& aliased = slot_images[aliased_id];
|
||||||
|
aliased.overlapping_images.push_back(new_image_id);
|
||||||
|
new_image.overlapping_images.push_back(aliased_id);
|
||||||
|
new_image.flags |= ImageFlagBits::BadOverlap;
|
||||||
}
|
}
|
||||||
RegisterImage(new_image_id);
|
RegisterImage(new_image_id);
|
||||||
return new_image_id;
|
return new_image_id;
|
||||||
|
@ -1195,6 +1276,8 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
|
||||||
image.flags |= ImageFlagBits::Registered;
|
image.flags |= ImageFlagBits::Registered;
|
||||||
ForEachPage(image.cpu_addr, image.guest_size_bytes,
|
ForEachPage(image.cpu_addr, image.guest_size_bytes,
|
||||||
[this, image_id](u64 page) { page_table[page].push_back(image_id); });
|
[this, image_id](u64 page) { page_table[page].push_back(image_id); });
|
||||||
|
total_used_memory +=
|
||||||
|
Common::AlignUp(std::max(image.guest_size_bytes, image.unswizzled_size_bytes), 1024);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
|
@ -1203,6 +1286,9 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
|
||||||
ASSERT_MSG(True(image.flags & ImageFlagBits::Registered),
|
ASSERT_MSG(True(image.flags & ImageFlagBits::Registered),
|
||||||
"Trying to unregister an already registered image");
|
"Trying to unregister an already registered image");
|
||||||
image.flags &= ~ImageFlagBits::Registered;
|
image.flags &= ~ImageFlagBits::Registered;
|
||||||
|
image.flags &= ~ImageFlagBits::BadOverlap;
|
||||||
|
total_used_memory -=
|
||||||
|
Common::AlignUp(std::max(image.guest_size_bytes, image.unswizzled_size_bytes), 1024);
|
||||||
ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) {
|
ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) {
|
||||||
const auto page_it = page_table.find(page);
|
const auto page_it = page_table.find(page);
|
||||||
if (page_it == page_table.end()) {
|
if (page_it == page_table.end()) {
|
||||||
|
@ -1276,9 +1362,19 @@ void TextureCache<P>::DeleteImage(ImageId image_id) {
|
||||||
std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) {
|
std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) {
|
||||||
return other_alias.id == image_id;
|
return other_alias.id == image_id;
|
||||||
});
|
});
|
||||||
|
other_image.CheckAliasState();
|
||||||
ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}",
|
ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}",
|
||||||
num_removed_aliases);
|
num_removed_aliases);
|
||||||
}
|
}
|
||||||
|
for (const ImageId overlap_id : image.overlapping_images) {
|
||||||
|
ImageBase& other_image = slot_images[overlap_id];
|
||||||
|
[[maybe_unused]] const size_t num_removed_overlaps = std::erase_if(
|
||||||
|
other_image.overlapping_images,
|
||||||
|
[image_id](const ImageId other_overlap_id) { return other_overlap_id == image_id; });
|
||||||
|
other_image.CheckBadOverlapState();
|
||||||
|
ASSERT_MSG(num_removed_overlaps == 1, "Invalid number of removed overlapps: {}",
|
||||||
|
num_removed_overlaps);
|
||||||
|
}
|
||||||
for (const ImageViewId image_view_id : image_view_ids) {
|
for (const ImageViewId image_view_id : image_view_ids) {
|
||||||
sentenced_image_view.Push(std::move(slot_image_views[image_view_id]));
|
sentenced_image_view.Push(std::move(slot_image_views[image_view_id]));
|
||||||
slot_image_views.erase(image_view_id);
|
slot_image_views.erase(image_view_id);
|
||||||
|
|
|
@ -47,6 +47,7 @@
|
||||||
#include "video_core/texture_cache/formatter.h"
|
#include "video_core/texture_cache/formatter.h"
|
||||||
#include "video_core/texture_cache/samples_helper.h"
|
#include "video_core/texture_cache/samples_helper.h"
|
||||||
#include "video_core/texture_cache/util.h"
|
#include "video_core/texture_cache/util.h"
|
||||||
|
#include "video_core/textures/astc.h"
|
||||||
#include "video_core/textures/decoders.h"
|
#include "video_core/textures/decoders.h"
|
||||||
|
|
||||||
namespace VideoCommon {
|
namespace VideoCommon {
|
||||||
|
@ -580,6 +581,8 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr
|
||||||
|
|
||||||
for (s32 layer = 0; layer < info.resources.layers; ++layer) {
|
for (s32 layer = 0; layer < info.resources.layers; ++layer) {
|
||||||
const std::span<const u8> src = input.subspan(host_offset);
|
const std::span<const u8> src = input.subspan(host_offset);
|
||||||
|
gpu_memory.ReadBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes());
|
||||||
|
|
||||||
SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height,
|
SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height,
|
||||||
num_tiles.depth, block.height, block.depth);
|
num_tiles.depth, block.height, block.depth);
|
||||||
|
|
||||||
|
@ -884,8 +887,16 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8
|
||||||
ASSERT(copy.image_extent == mip_size);
|
ASSERT(copy.image_extent == mip_size);
|
||||||
ASSERT(copy.buffer_row_length == Common::AlignUp(mip_size.width, tile_size.width));
|
ASSERT(copy.buffer_row_length == Common::AlignUp(mip_size.width, tile_size.width));
|
||||||
ASSERT(copy.buffer_image_height == Common::AlignUp(mip_size.height, tile_size.height));
|
ASSERT(copy.buffer_image_height == Common::AlignUp(mip_size.height, tile_size.height));
|
||||||
DecompressBC4(input.subspan(copy.buffer_offset), copy.image_extent,
|
if (IsPixelFormatASTC(info.format)) {
|
||||||
output.subspan(output_offset));
|
ASSERT(copy.image_extent.depth == 1);
|
||||||
|
Tegra::Texture::ASTC::Decompress(input.subspan(copy.buffer_offset),
|
||||||
|
copy.image_extent.width, copy.image_extent.height,
|
||||||
|
copy.image_subresource.num_layers, tile_size.width,
|
||||||
|
tile_size.height, output.subspan(output_offset));
|
||||||
|
} else {
|
||||||
|
DecompressBC4(input.subspan(copy.buffer_offset), copy.image_extent,
|
||||||
|
output.subspan(output_offset));
|
||||||
|
}
|
||||||
copy.buffer_offset = output_offset;
|
copy.buffer_offset = output_offset;
|
||||||
copy.buffer_row_length = mip_size.width;
|
copy.buffer_row_length = mip_size.width;
|
||||||
copy.buffer_image_height = mip_size.height;
|
copy.buffer_image_height = mip_size.height;
|
||||||
|
|
|
@ -24,22 +24,8 @@
|
||||||
#include <boost/container/static_vector.hpp>
|
#include <boost/container/static_vector.hpp>
|
||||||
|
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
|
|
||||||
#include "video_core/textures/astc.h"
|
#include "video_core/textures/astc.h"
|
||||||
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
/// Count the number of bits set in a number.
|
|
||||||
constexpr u32 Popcnt(u32 n) {
|
|
||||||
u32 c = 0;
|
|
||||||
for (; n; c++) {
|
|
||||||
n &= n - 1;
|
|
||||||
}
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // Anonymous namespace
|
|
||||||
|
|
||||||
class InputBitStream {
|
class InputBitStream {
|
||||||
public:
|
public:
|
||||||
constexpr explicit InputBitStream(std::span<const u8> data, size_t start_offset = 0)
|
constexpr explicit InputBitStream(std::span<const u8> data, size_t start_offset = 0)
|
||||||
|
@ -165,37 +151,7 @@ private:
|
||||||
const IntType& m_Bits;
|
const IntType& m_Bits;
|
||||||
};
|
};
|
||||||
|
|
||||||
enum class IntegerEncoding { JustBits, Qus32, Trit };
|
namespace Tegra::Texture::ASTC {
|
||||||
|
|
||||||
struct IntegerEncodedValue {
|
|
||||||
constexpr IntegerEncodedValue() = default;
|
|
||||||
|
|
||||||
constexpr IntegerEncodedValue(IntegerEncoding encoding_, u32 num_bits_)
|
|
||||||
: encoding{encoding_}, num_bits{num_bits_} {}
|
|
||||||
|
|
||||||
constexpr bool MatchesEncoding(const IntegerEncodedValue& other) const {
|
|
||||||
return encoding == other.encoding && num_bits == other.num_bits;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Returns the number of bits required to encode nVals values.
|
|
||||||
u32 GetBitLength(u32 nVals) const {
|
|
||||||
u32 totalBits = num_bits * nVals;
|
|
||||||
if (encoding == IntegerEncoding::Trit) {
|
|
||||||
totalBits += (nVals * 8 + 4) / 5;
|
|
||||||
} else if (encoding == IntegerEncoding::Qus32) {
|
|
||||||
totalBits += (nVals * 7 + 2) / 3;
|
|
||||||
}
|
|
||||||
return totalBits;
|
|
||||||
}
|
|
||||||
|
|
||||||
IntegerEncoding encoding{};
|
|
||||||
u32 num_bits = 0;
|
|
||||||
u32 bit_value = 0;
|
|
||||||
union {
|
|
||||||
u32 qus32_value = 0;
|
|
||||||
u32 trit_value;
|
|
||||||
};
|
|
||||||
};
|
|
||||||
using IntegerEncodedVector = boost::container::static_vector<
|
using IntegerEncodedVector = boost::container::static_vector<
|
||||||
IntegerEncodedValue, 256,
|
IntegerEncodedValue, 256,
|
||||||
boost::container::static_vector_options<
|
boost::container::static_vector_options<
|
||||||
|
@ -260,7 +216,7 @@ static void DecodeTritBlock(InputBitStream& bits, IntegerEncodedVector& result,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void DecodeQus32Block(InputBitStream& bits, IntegerEncodedVector& result,
|
static void DecodeQuintBlock(InputBitStream& bits, IntegerEncodedVector& result,
|
||||||
u32 nBitsPerValue) {
|
u32 nBitsPerValue) {
|
||||||
// Implement the algorithm in section C.2.12
|
// Implement the algorithm in section C.2.12
|
||||||
u32 m[3];
|
u32 m[3];
|
||||||
|
@ -301,50 +257,12 @@ static void DecodeQus32Block(InputBitStream& bits, IntegerEncodedVector& result,
|
||||||
}
|
}
|
||||||
|
|
||||||
for (std::size_t i = 0; i < 3; ++i) {
|
for (std::size_t i = 0; i < 3; ++i) {
|
||||||
IntegerEncodedValue& val = result.emplace_back(IntegerEncoding::Qus32, nBitsPerValue);
|
IntegerEncodedValue& val = result.emplace_back(IntegerEncoding::Quint, nBitsPerValue);
|
||||||
val.bit_value = m[i];
|
val.bit_value = m[i];
|
||||||
val.qus32_value = q[i];
|
val.quint_value = q[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns a new instance of this struct that corresponds to the
|
|
||||||
// can take no more than maxval values
|
|
||||||
static constexpr IntegerEncodedValue CreateEncoding(u32 maxVal) {
|
|
||||||
while (maxVal > 0) {
|
|
||||||
u32 check = maxVal + 1;
|
|
||||||
|
|
||||||
// Is maxVal a power of two?
|
|
||||||
if (!(check & (check - 1))) {
|
|
||||||
return IntegerEncodedValue(IntegerEncoding::JustBits, Popcnt(maxVal));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Is maxVal of the type 3*2^n - 1?
|
|
||||||
if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) {
|
|
||||||
return IntegerEncodedValue(IntegerEncoding::Trit, Popcnt(check / 3 - 1));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Is maxVal of the type 5*2^n - 1?
|
|
||||||
if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) {
|
|
||||||
return IntegerEncodedValue(IntegerEncoding::Qus32, Popcnt(check / 5 - 1));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Apparently it can't be represented with a bounded integer sequence...
|
|
||||||
// just iterate.
|
|
||||||
maxVal--;
|
|
||||||
}
|
|
||||||
return IntegerEncodedValue(IntegerEncoding::JustBits, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
static constexpr std::array<IntegerEncodedValue, 256> MakeEncodedValues() {
|
|
||||||
std::array<IntegerEncodedValue, 256> encodings{};
|
|
||||||
for (std::size_t i = 0; i < encodings.size(); ++i) {
|
|
||||||
encodings[i] = CreateEncoding(static_cast<u32>(i));
|
|
||||||
}
|
|
||||||
return encodings;
|
|
||||||
}
|
|
||||||
|
|
||||||
static constexpr std::array EncodingsValues = MakeEncodedValues();
|
|
||||||
|
|
||||||
// Fills result with the values that are encoded in the given
|
// Fills result with the values that are encoded in the given
|
||||||
// bitstream. We must know beforehand what the maximum possible
|
// bitstream. We must know beforehand what the maximum possible
|
||||||
// value is, and how many values we're decoding.
|
// value is, and how many values we're decoding.
|
||||||
|
@ -357,8 +275,8 @@ static void DecodeIntegerSequence(IntegerEncodedVector& result, InputBitStream&
|
||||||
u32 nValsDecoded = 0;
|
u32 nValsDecoded = 0;
|
||||||
while (nValsDecoded < nValues) {
|
while (nValsDecoded < nValues) {
|
||||||
switch (val.encoding) {
|
switch (val.encoding) {
|
||||||
case IntegerEncoding::Qus32:
|
case IntegerEncoding::Quint:
|
||||||
DecodeQus32Block(bits, result, val.num_bits);
|
DecodeQuintBlock(bits, result, val.num_bits);
|
||||||
nValsDecoded += 3;
|
nValsDecoded += 3;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -376,8 +294,6 @@ static void DecodeIntegerSequence(IntegerEncodedVector& result, InputBitStream&
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace ASTCC {
|
|
||||||
|
|
||||||
struct TexelWeightParams {
|
struct TexelWeightParams {
|
||||||
u32 m_Width = 0;
|
u32 m_Width = 0;
|
||||||
u32 m_Height = 0;
|
u32 m_Height = 0;
|
||||||
|
@ -635,48 +551,6 @@ static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Replicates low numBits such that [(toBit - 1):(toBit - 1 - fromBit)]
|
|
||||||
// is the same as [(numBits - 1):0] and repeats all the way down.
|
|
||||||
template <typename IntType>
|
|
||||||
static constexpr IntType Replicate(IntType val, u32 numBits, u32 toBit) {
|
|
||||||
if (numBits == 0) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
if (toBit == 0) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
const IntType v = val & static_cast<IntType>((1 << numBits) - 1);
|
|
||||||
IntType res = v;
|
|
||||||
u32 reslen = numBits;
|
|
||||||
while (reslen < toBit) {
|
|
||||||
u32 comp = 0;
|
|
||||||
if (numBits > toBit - reslen) {
|
|
||||||
u32 newshift = toBit - reslen;
|
|
||||||
comp = numBits - newshift;
|
|
||||||
numBits = newshift;
|
|
||||||
}
|
|
||||||
res = static_cast<IntType>(res << numBits);
|
|
||||||
res = static_cast<IntType>(res | (v >> comp));
|
|
||||||
reslen += numBits;
|
|
||||||
}
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
static constexpr std::size_t NumReplicateEntries(u32 num_bits) {
|
|
||||||
return std::size_t(1) << num_bits;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename IntType, u32 num_bits, u32 to_bit>
|
|
||||||
static constexpr auto MakeReplicateTable() {
|
|
||||||
std::array<IntType, NumReplicateEntries(num_bits)> table{};
|
|
||||||
for (IntType value = 0; value < static_cast<IntType>(std::size(table)); ++value) {
|
|
||||||
table[value] = Replicate(value, num_bits, to_bit);
|
|
||||||
}
|
|
||||||
return table;
|
|
||||||
}
|
|
||||||
|
|
||||||
static constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable<u32, 8, 16>();
|
|
||||||
static constexpr u32 ReplicateByteTo16(std::size_t value) {
|
static constexpr u32 ReplicateByteTo16(std::size_t value) {
|
||||||
return REPLICATE_BYTE_TO_16_TABLE[value];
|
return REPLICATE_BYTE_TO_16_TABLE[value];
|
||||||
}
|
}
|
||||||
|
@ -696,9 +570,6 @@ static constexpr auto REPLICATE_2_BIT_TO_8_TABLE = MakeReplicateTable<u32, 2, 8>
|
||||||
static constexpr auto REPLICATE_3_BIT_TO_8_TABLE = MakeReplicateTable<u32, 3, 8>();
|
static constexpr auto REPLICATE_3_BIT_TO_8_TABLE = MakeReplicateTable<u32, 3, 8>();
|
||||||
static constexpr auto REPLICATE_4_BIT_TO_8_TABLE = MakeReplicateTable<u32, 4, 8>();
|
static constexpr auto REPLICATE_4_BIT_TO_8_TABLE = MakeReplicateTable<u32, 4, 8>();
|
||||||
static constexpr auto REPLICATE_5_BIT_TO_8_TABLE = MakeReplicateTable<u32, 5, 8>();
|
static constexpr auto REPLICATE_5_BIT_TO_8_TABLE = MakeReplicateTable<u32, 5, 8>();
|
||||||
static constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>();
|
|
||||||
static constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>();
|
|
||||||
static constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>();
|
|
||||||
/// Use a precompiled table with the most common usages, if it's not in the expected range, fallback
|
/// Use a precompiled table with the most common usages, if it's not in the expected range, fallback
|
||||||
/// to the runtime implementation
|
/// to the runtime implementation
|
||||||
static constexpr u32 FastReplicateTo8(u32 value, u32 num_bits) {
|
static constexpr u32 FastReplicateTo8(u32 value, u32 num_bits) {
|
||||||
|
@ -982,9 +853,9 @@ static void DecodeColorValues(u32* out, std::span<u8> data, const u32* modes, co
|
||||||
} // case IntegerEncoding::Trit
|
} // case IntegerEncoding::Trit
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case IntegerEncoding::Qus32: {
|
case IntegerEncoding::Quint: {
|
||||||
|
|
||||||
D = val.qus32_value;
|
D = val.quint_value;
|
||||||
|
|
||||||
switch (bitlen) {
|
switch (bitlen) {
|
||||||
case 1: {
|
case 1: {
|
||||||
|
@ -1023,7 +894,7 @@ static void DecodeColorValues(u32* out, std::span<u8> data, const u32* modes, co
|
||||||
assert(false && "Unsupported quint encoding for color values!");
|
assert(false && "Unsupported quint encoding for color values!");
|
||||||
break;
|
break;
|
||||||
} // switch(bitlen)
|
} // switch(bitlen)
|
||||||
} // case IntegerEncoding::Qus32
|
} // case IntegerEncoding::Quint
|
||||||
break;
|
break;
|
||||||
} // switch(val.encoding)
|
} // switch(val.encoding)
|
||||||
|
|
||||||
|
@ -1086,8 +957,8 @@ static u32 UnquantizeTexelWeight(const IntegerEncodedValue& val) {
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
|
|
||||||
case IntegerEncoding::Qus32: {
|
case IntegerEncoding::Quint: {
|
||||||
D = val.qus32_value;
|
D = val.quint_value;
|
||||||
assert(D < 5);
|
assert(D < 5);
|
||||||
|
|
||||||
switch (bitlen) {
|
switch (bitlen) {
|
||||||
|
@ -1675,10 +1546,6 @@ static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace ASTCC
|
|
||||||
|
|
||||||
namespace Tegra::Texture::ASTC {
|
|
||||||
|
|
||||||
void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
|
void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
|
||||||
uint32_t block_width, uint32_t block_height, std::span<uint8_t> output) {
|
uint32_t block_width, uint32_t block_height, std::span<uint8_t> output) {
|
||||||
u32 block_index = 0;
|
u32 block_index = 0;
|
||||||
|
@ -1690,7 +1557,7 @@ void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height,
|
||||||
|
|
||||||
// Blocks can be at most 12x12
|
// Blocks can be at most 12x12
|
||||||
std::array<u32, 12 * 12> uncompData;
|
std::array<u32, 12 * 12> uncompData;
|
||||||
ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData);
|
DecompressBlock(blockPtr, block_width, block_height, uncompData);
|
||||||
|
|
||||||
u32 decompWidth = std::min(block_width, width - x);
|
u32 decompWidth = std::min(block_width, width - x);
|
||||||
u32 decompHeight = std::min(block_height, height - y);
|
u32 decompHeight = std::min(block_height, height - y);
|
||||||
|
|
|
@ -129,4 +129,7 @@ struct AstcBufferData {
|
||||||
decltype(REPLICATE_BYTE_TO_16_TABLE) replicate_byte_to_16 = REPLICATE_BYTE_TO_16_TABLE;
|
decltype(REPLICATE_BYTE_TO_16_TABLE) replicate_byte_to_16 = REPLICATE_BYTE_TO_16_TABLE;
|
||||||
} constexpr ASTC_BUFFER_DATA;
|
} constexpr ASTC_BUFFER_DATA;
|
||||||
|
|
||||||
|
void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
|
||||||
|
uint32_t block_width, uint32_t block_height, std::span<uint8_t> output);
|
||||||
|
|
||||||
} // namespace Tegra::Texture::ASTC
|
} // namespace Tegra::Texture::ASTC
|
||||||
|
|
|
@ -69,10 +69,10 @@ constexpr VkExportMemoryAllocateInfo EXPORT_ALLOCATE_INFO{
|
||||||
|
|
||||||
class MemoryAllocation {
|
class MemoryAllocation {
|
||||||
public:
|
public:
|
||||||
explicit MemoryAllocation(vk::DeviceMemory memory_, VkMemoryPropertyFlags properties,
|
explicit MemoryAllocation(MemoryAllocator* const allocator_, vk::DeviceMemory memory_,
|
||||||
u64 allocation_size_, u32 type)
|
VkMemoryPropertyFlags properties, u64 allocation_size_, u32 type)
|
||||||
: memory{std::move(memory_)}, allocation_size{allocation_size_}, property_flags{properties},
|
: allocator{allocator_}, memory{std::move(memory_)}, allocation_size{allocation_size_},
|
||||||
shifted_memory_type{1U << type} {}
|
property_flags{properties}, shifted_memory_type{1U << type} {}
|
||||||
|
|
||||||
#if defined(_WIN32) || defined(__unix__)
|
#if defined(_WIN32) || defined(__unix__)
|
||||||
~MemoryAllocation() {
|
~MemoryAllocation() {
|
||||||
|
@ -106,6 +106,10 @@ public:
|
||||||
const auto it = std::ranges::find(commits, begin, &Range::begin);
|
const auto it = std::ranges::find(commits, begin, &Range::begin);
|
||||||
ASSERT_MSG(it != commits.end(), "Invalid commit");
|
ASSERT_MSG(it != commits.end(), "Invalid commit");
|
||||||
commits.erase(it);
|
commits.erase(it);
|
||||||
|
if (commits.empty()) {
|
||||||
|
// Do not call any code involving 'this' after this call, the object will be destroyed
|
||||||
|
allocator->ReleaseMemory(this);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] std::span<u8> Map() {
|
[[nodiscard]] std::span<u8> Map() {
|
||||||
|
@ -171,6 +175,7 @@ private:
|
||||||
return candidate;
|
return candidate;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MemoryAllocator* const allocator; ///< Parent memory allocation.
|
||||||
const vk::DeviceMemory memory; ///< Vulkan memory allocation handler.
|
const vk::DeviceMemory memory; ///< Vulkan memory allocation handler.
|
||||||
const u64 allocation_size; ///< Size of this allocation.
|
const u64 allocation_size; ///< Size of this allocation.
|
||||||
const VkMemoryPropertyFlags property_flags; ///< Vulkan memory property flags.
|
const VkMemoryPropertyFlags property_flags; ///< Vulkan memory property flags.
|
||||||
|
@ -275,10 +280,17 @@ bool MemoryAllocator::TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
allocations.push_back(std::make_unique<MemoryAllocation>(std::move(memory), flags, size, type));
|
allocations.push_back(
|
||||||
|
std::make_unique<MemoryAllocation>(this, std::move(memory), flags, size, type));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MemoryAllocator::ReleaseMemory(MemoryAllocation* alloc) {
|
||||||
|
const auto it = std::ranges::find(allocations, alloc, &std::unique_ptr<MemoryAllocation>::get);
|
||||||
|
ASSERT(it != allocations.end());
|
||||||
|
allocations.erase(it);
|
||||||
|
}
|
||||||
|
|
||||||
std::optional<MemoryCommit> MemoryAllocator::TryCommit(const VkMemoryRequirements& requirements,
|
std::optional<MemoryCommit> MemoryAllocator::TryCommit(const VkMemoryRequirements& requirements,
|
||||||
VkMemoryPropertyFlags flags) {
|
VkMemoryPropertyFlags flags) {
|
||||||
for (auto& allocation : allocations) {
|
for (auto& allocation : allocations) {
|
||||||
|
|
|
@ -69,6 +69,8 @@ private:
|
||||||
/// Memory allocator container.
|
/// Memory allocator container.
|
||||||
/// Allocates and releases memory allocations on demand.
|
/// Allocates and releases memory allocations on demand.
|
||||||
class MemoryAllocator {
|
class MemoryAllocator {
|
||||||
|
friend MemoryAllocation;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
/**
|
/**
|
||||||
* Construct memory allocator
|
* Construct memory allocator
|
||||||
|
@ -104,6 +106,9 @@ private:
|
||||||
/// Tries to allocate a chunk of memory.
|
/// Tries to allocate a chunk of memory.
|
||||||
bool TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size);
|
bool TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size);
|
||||||
|
|
||||||
|
/// Releases a chunk of memory.
|
||||||
|
void ReleaseMemory(MemoryAllocation* alloc);
|
||||||
|
|
||||||
/// Tries to allocate a memory commit.
|
/// Tries to allocate a memory commit.
|
||||||
std::optional<MemoryCommit> TryCommit(const VkMemoryRequirements& requirements,
|
std::optional<MemoryCommit> TryCommit(const VkMemoryRequirements& requirements,
|
||||||
VkMemoryPropertyFlags flags);
|
VkMemoryPropertyFlags flags);
|
||||||
|
|
|
@ -809,6 +809,7 @@ void Config::ReadRendererValues() {
|
||||||
QStringLiteral("use_asynchronous_gpu_emulation"), true);
|
QStringLiteral("use_asynchronous_gpu_emulation"), true);
|
||||||
ReadSettingGlobal(Settings::values.use_nvdec_emulation, QStringLiteral("use_nvdec_emulation"),
|
ReadSettingGlobal(Settings::values.use_nvdec_emulation, QStringLiteral("use_nvdec_emulation"),
|
||||||
true);
|
true);
|
||||||
|
ReadSettingGlobal(Settings::values.accelerate_astc, QStringLiteral("accelerate_astc"), true);
|
||||||
ReadSettingGlobal(Settings::values.use_vsync, QStringLiteral("use_vsync"), true);
|
ReadSettingGlobal(Settings::values.use_vsync, QStringLiteral("use_vsync"), true);
|
||||||
ReadSettingGlobal(Settings::values.use_assembly_shaders, QStringLiteral("use_assembly_shaders"),
|
ReadSettingGlobal(Settings::values.use_assembly_shaders, QStringLiteral("use_assembly_shaders"),
|
||||||
false);
|
false);
|
||||||
|
@ -816,6 +817,7 @@ void Config::ReadRendererValues() {
|
||||||
QStringLiteral("use_asynchronous_shaders"), false);
|
QStringLiteral("use_asynchronous_shaders"), false);
|
||||||
ReadSettingGlobal(Settings::values.use_fast_gpu_time, QStringLiteral("use_fast_gpu_time"),
|
ReadSettingGlobal(Settings::values.use_fast_gpu_time, QStringLiteral("use_fast_gpu_time"),
|
||||||
true);
|
true);
|
||||||
|
ReadSettingGlobal(Settings::values.use_caches_gc, QStringLiteral("use_caches_gc"), false);
|
||||||
ReadSettingGlobal(Settings::values.bg_red, QStringLiteral("bg_red"), 0.0);
|
ReadSettingGlobal(Settings::values.bg_red, QStringLiteral("bg_red"), 0.0);
|
||||||
ReadSettingGlobal(Settings::values.bg_green, QStringLiteral("bg_green"), 0.0);
|
ReadSettingGlobal(Settings::values.bg_green, QStringLiteral("bg_green"), 0.0);
|
||||||
ReadSettingGlobal(Settings::values.bg_blue, QStringLiteral("bg_blue"), 0.0);
|
ReadSettingGlobal(Settings::values.bg_blue, QStringLiteral("bg_blue"), 0.0);
|
||||||
|
@ -1392,6 +1394,7 @@ void Config::SaveRendererValues() {
|
||||||
Settings::values.use_asynchronous_gpu_emulation, true);
|
Settings::values.use_asynchronous_gpu_emulation, true);
|
||||||
WriteSettingGlobal(QStringLiteral("use_nvdec_emulation"), Settings::values.use_nvdec_emulation,
|
WriteSettingGlobal(QStringLiteral("use_nvdec_emulation"), Settings::values.use_nvdec_emulation,
|
||||||
true);
|
true);
|
||||||
|
WriteSettingGlobal(QStringLiteral("accelerate_astc"), Settings::values.accelerate_astc, true);
|
||||||
WriteSettingGlobal(QStringLiteral("use_vsync"), Settings::values.use_vsync, true);
|
WriteSettingGlobal(QStringLiteral("use_vsync"), Settings::values.use_vsync, true);
|
||||||
WriteSettingGlobal(QStringLiteral("use_assembly_shaders"),
|
WriteSettingGlobal(QStringLiteral("use_assembly_shaders"),
|
||||||
Settings::values.use_assembly_shaders, false);
|
Settings::values.use_assembly_shaders, false);
|
||||||
|
@ -1399,6 +1402,7 @@ void Config::SaveRendererValues() {
|
||||||
Settings::values.use_asynchronous_shaders, false);
|
Settings::values.use_asynchronous_shaders, false);
|
||||||
WriteSettingGlobal(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time,
|
WriteSettingGlobal(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time,
|
||||||
true);
|
true);
|
||||||
|
WriteSettingGlobal(QStringLiteral("use_caches_gc"), Settings::values.use_caches_gc, false);
|
||||||
// Cast to double because Qt's written float values are not human-readable
|
// Cast to double because Qt's written float values are not human-readable
|
||||||
WriteSettingGlobal(QStringLiteral("bg_red"), Settings::values.bg_red, 0.0);
|
WriteSettingGlobal(QStringLiteral("bg_red"), Settings::values.bg_red, 0.0);
|
||||||
WriteSettingGlobal(QStringLiteral("bg_green"), Settings::values.bg_green, 0.0);
|
WriteSettingGlobal(QStringLiteral("bg_green"), Settings::values.bg_green, 0.0);
|
||||||
|
|
|
@ -70,10 +70,12 @@ void ConfigureGraphics::SetConfiguration() {
|
||||||
ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock);
|
ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock);
|
||||||
ui->use_disk_shader_cache->setEnabled(runtime_lock);
|
ui->use_disk_shader_cache->setEnabled(runtime_lock);
|
||||||
ui->use_nvdec_emulation->setEnabled(runtime_lock);
|
ui->use_nvdec_emulation->setEnabled(runtime_lock);
|
||||||
|
ui->accelerate_astc->setEnabled(runtime_lock);
|
||||||
ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache.GetValue());
|
ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache.GetValue());
|
||||||
ui->use_asynchronous_gpu_emulation->setChecked(
|
ui->use_asynchronous_gpu_emulation->setChecked(
|
||||||
Settings::values.use_asynchronous_gpu_emulation.GetValue());
|
Settings::values.use_asynchronous_gpu_emulation.GetValue());
|
||||||
ui->use_nvdec_emulation->setChecked(Settings::values.use_nvdec_emulation.GetValue());
|
ui->use_nvdec_emulation->setChecked(Settings::values.use_nvdec_emulation.GetValue());
|
||||||
|
ui->accelerate_astc->setChecked(Settings::values.accelerate_astc.GetValue());
|
||||||
|
|
||||||
if (Settings::IsConfiguringGlobal()) {
|
if (Settings::IsConfiguringGlobal()) {
|
||||||
ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend.GetValue()));
|
ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend.GetValue()));
|
||||||
|
@ -118,6 +120,8 @@ void ConfigureGraphics::ApplyConfiguration() {
|
||||||
use_asynchronous_gpu_emulation);
|
use_asynchronous_gpu_emulation);
|
||||||
ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_nvdec_emulation,
|
ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_nvdec_emulation,
|
||||||
ui->use_nvdec_emulation, use_nvdec_emulation);
|
ui->use_nvdec_emulation, use_nvdec_emulation);
|
||||||
|
ConfigurationShared::ApplyPerGameSetting(&Settings::values.accelerate_astc, ui->accelerate_astc,
|
||||||
|
accelerate_astc);
|
||||||
|
|
||||||
if (Settings::IsConfiguringGlobal()) {
|
if (Settings::IsConfiguringGlobal()) {
|
||||||
// Guard if during game and set to game-specific value
|
// Guard if during game and set to game-specific value
|
||||||
|
@ -254,6 +258,7 @@ void ConfigureGraphics::SetupPerGameUI() {
|
||||||
ui->use_asynchronous_gpu_emulation->setEnabled(
|
ui->use_asynchronous_gpu_emulation->setEnabled(
|
||||||
Settings::values.use_asynchronous_gpu_emulation.UsingGlobal());
|
Settings::values.use_asynchronous_gpu_emulation.UsingGlobal());
|
||||||
ui->use_nvdec_emulation->setEnabled(Settings::values.use_nvdec_emulation.UsingGlobal());
|
ui->use_nvdec_emulation->setEnabled(Settings::values.use_nvdec_emulation.UsingGlobal());
|
||||||
|
ui->accelerate_astc->setEnabled(Settings::values.accelerate_astc.UsingGlobal());
|
||||||
ui->use_disk_shader_cache->setEnabled(Settings::values.use_disk_shader_cache.UsingGlobal());
|
ui->use_disk_shader_cache->setEnabled(Settings::values.use_disk_shader_cache.UsingGlobal());
|
||||||
ui->bg_button->setEnabled(Settings::values.bg_red.UsingGlobal());
|
ui->bg_button->setEnabled(Settings::values.bg_red.UsingGlobal());
|
||||||
|
|
||||||
|
@ -269,6 +274,8 @@ void ConfigureGraphics::SetupPerGameUI() {
|
||||||
ui->use_disk_shader_cache, Settings::values.use_disk_shader_cache, use_disk_shader_cache);
|
ui->use_disk_shader_cache, Settings::values.use_disk_shader_cache, use_disk_shader_cache);
|
||||||
ConfigurationShared::SetColoredTristate(
|
ConfigurationShared::SetColoredTristate(
|
||||||
ui->use_nvdec_emulation, Settings::values.use_nvdec_emulation, use_nvdec_emulation);
|
ui->use_nvdec_emulation, Settings::values.use_nvdec_emulation, use_nvdec_emulation);
|
||||||
|
ConfigurationShared::SetColoredTristate(ui->accelerate_astc, Settings::values.accelerate_astc,
|
||||||
|
accelerate_astc);
|
||||||
ConfigurationShared::SetColoredTristate(ui->use_asynchronous_gpu_emulation,
|
ConfigurationShared::SetColoredTristate(ui->use_asynchronous_gpu_emulation,
|
||||||
Settings::values.use_asynchronous_gpu_emulation,
|
Settings::values.use_asynchronous_gpu_emulation,
|
||||||
use_asynchronous_gpu_emulation);
|
use_asynchronous_gpu_emulation);
|
||||||
|
|
|
@ -47,6 +47,7 @@ private:
|
||||||
QColor bg_color;
|
QColor bg_color;
|
||||||
|
|
||||||
ConfigurationShared::CheckState use_nvdec_emulation;
|
ConfigurationShared::CheckState use_nvdec_emulation;
|
||||||
|
ConfigurationShared::CheckState accelerate_astc;
|
||||||
ConfigurationShared::CheckState use_disk_shader_cache;
|
ConfigurationShared::CheckState use_disk_shader_cache;
|
||||||
ConfigurationShared::CheckState use_asynchronous_gpu_emulation;
|
ConfigurationShared::CheckState use_asynchronous_gpu_emulation;
|
||||||
|
|
||||||
|
|
|
@ -104,6 +104,13 @@
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
|
<item>
|
||||||
|
<widget class="QCheckBox" name="accelerate_astc">
|
||||||
|
<property name="text">
|
||||||
|
<string>Accelerate ASTC texture decoding</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
<item>
|
<item>
|
||||||
<widget class="QWidget" name="fullscreen_mode_layout" native="true">
|
<widget class="QWidget" name="fullscreen_mode_layout" native="true">
|
||||||
<layout class="QHBoxLayout" name="horizontalLayout_1">
|
<layout class="QHBoxLayout" name="horizontalLayout_1">
|
||||||
|
|
|
@ -30,6 +30,7 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
|
||||||
ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue());
|
ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue());
|
||||||
ui->use_assembly_shaders->setChecked(Settings::values.use_assembly_shaders.GetValue());
|
ui->use_assembly_shaders->setChecked(Settings::values.use_assembly_shaders.GetValue());
|
||||||
ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue());
|
ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue());
|
||||||
|
ui->use_caches_gc->setChecked(Settings::values.use_caches_gc.GetValue());
|
||||||
ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue());
|
ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue());
|
||||||
|
|
||||||
if (Settings::IsConfiguringGlobal()) {
|
if (Settings::IsConfiguringGlobal()) {
|
||||||
|
@ -62,6 +63,8 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() {
|
||||||
ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders,
|
ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders,
|
||||||
ui->use_asynchronous_shaders,
|
ui->use_asynchronous_shaders,
|
||||||
use_asynchronous_shaders);
|
use_asynchronous_shaders);
|
||||||
|
ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_caches_gc, ui->use_caches_gc,
|
||||||
|
use_caches_gc);
|
||||||
ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_fast_gpu_time,
|
ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_fast_gpu_time,
|
||||||
ui->use_fast_gpu_time, use_fast_gpu_time);
|
ui->use_fast_gpu_time, use_fast_gpu_time);
|
||||||
|
|
||||||
|
@ -101,6 +104,7 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
|
||||||
ui->use_asynchronous_shaders->setEnabled(
|
ui->use_asynchronous_shaders->setEnabled(
|
||||||
Settings::values.use_asynchronous_shaders.UsingGlobal());
|
Settings::values.use_asynchronous_shaders.UsingGlobal());
|
||||||
ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal());
|
ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal());
|
||||||
|
ui->use_caches_gc->setEnabled(Settings::values.use_caches_gc.UsingGlobal());
|
||||||
ui->anisotropic_filtering_combobox->setEnabled(
|
ui->anisotropic_filtering_combobox->setEnabled(
|
||||||
Settings::values.max_anisotropy.UsingGlobal());
|
Settings::values.max_anisotropy.UsingGlobal());
|
||||||
|
|
||||||
|
@ -115,6 +119,8 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
|
||||||
use_asynchronous_shaders);
|
use_asynchronous_shaders);
|
||||||
ConfigurationShared::SetColoredTristate(ui->use_fast_gpu_time,
|
ConfigurationShared::SetColoredTristate(ui->use_fast_gpu_time,
|
||||||
Settings::values.use_fast_gpu_time, use_fast_gpu_time);
|
Settings::values.use_fast_gpu_time, use_fast_gpu_time);
|
||||||
|
ConfigurationShared::SetColoredTristate(ui->use_caches_gc, Settings::values.use_caches_gc,
|
||||||
|
use_caches_gc);
|
||||||
ConfigurationShared::SetColoredComboBox(
|
ConfigurationShared::SetColoredComboBox(
|
||||||
ui->gpu_accuracy, ui->label_gpu_accuracy,
|
ui->gpu_accuracy, ui->label_gpu_accuracy,
|
||||||
static_cast<int>(Settings::values.gpu_accuracy.GetValue(true)));
|
static_cast<int>(Settings::values.gpu_accuracy.GetValue(true)));
|
||||||
|
|
|
@ -38,4 +38,5 @@ private:
|
||||||
ConfigurationShared::CheckState use_assembly_shaders;
|
ConfigurationShared::CheckState use_assembly_shaders;
|
||||||
ConfigurationShared::CheckState use_asynchronous_shaders;
|
ConfigurationShared::CheckState use_asynchronous_shaders;
|
||||||
ConfigurationShared::CheckState use_fast_gpu_time;
|
ConfigurationShared::CheckState use_fast_gpu_time;
|
||||||
|
ConfigurationShared::CheckState use_caches_gc;
|
||||||
};
|
};
|
||||||
|
|
|
@ -103,6 +103,16 @@
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
|
<item>
|
||||||
|
<widget class="QCheckBox" name="use_caches_gc">
|
||||||
|
<property name="toolTip">
|
||||||
|
<string>Enables garbage collection for the GPU caches, this will try to keep VRAM within 3-4 GB by flushing the least used textures/buffers. May cause issues in a few games.</string>
|
||||||
|
</property>
|
||||||
|
<property name="text">
|
||||||
|
<string>Enable GPU caches garbage collection (unsafe)</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
<item>
|
<item>
|
||||||
<widget class="QWidget" name="af_layout" native="true">
|
<widget class="QWidget" name="af_layout" native="true">
|
||||||
<layout class="QHBoxLayout" name="horizontalLayout_1">
|
<layout class="QHBoxLayout" name="horizontalLayout_1">
|
||||||
|
|
|
@ -447,8 +447,10 @@ void Config::ReadValues() {
|
||||||
sdl2_config->GetBoolean("Renderer", "use_assembly_shaders", true));
|
sdl2_config->GetBoolean("Renderer", "use_assembly_shaders", true));
|
||||||
Settings::values.use_asynchronous_shaders.SetValue(
|
Settings::values.use_asynchronous_shaders.SetValue(
|
||||||
sdl2_config->GetBoolean("Renderer", "use_asynchronous_shaders", false));
|
sdl2_config->GetBoolean("Renderer", "use_asynchronous_shaders", false));
|
||||||
Settings::values.use_asynchronous_shaders.SetValue(
|
Settings::values.use_nvdec_emulation.SetValue(
|
||||||
sdl2_config->GetBoolean("Renderer", "use_asynchronous_shaders", false));
|
sdl2_config->GetBoolean("Renderer", "use_nvdec_emulation", true));
|
||||||
|
Settings::values.accelerate_astc.SetValue(
|
||||||
|
sdl2_config->GetBoolean("Renderer", "accelerate_astc", true));
|
||||||
Settings::values.use_fast_gpu_time.SetValue(
|
Settings::values.use_fast_gpu_time.SetValue(
|
||||||
sdl2_config->GetBoolean("Renderer", "use_fast_gpu_time", true));
|
sdl2_config->GetBoolean("Renderer", "use_fast_gpu_time", true));
|
||||||
|
|
||||||
|
|
|
@ -194,6 +194,14 @@ use_assembly_shaders =
|
||||||
# 0 (default): Off, 1: On
|
# 0 (default): Off, 1: On
|
||||||
use_asynchronous_shaders =
|
use_asynchronous_shaders =
|
||||||
|
|
||||||
|
# Enable NVDEC emulation.
|
||||||
|
# 0: Off, 1 (default): On
|
||||||
|
use_nvdec_emulation =
|
||||||
|
|
||||||
|
# Accelerate ASTC texture decoding.
|
||||||
|
# 0: Off, 1 (default): On
|
||||||
|
accelerate_astc =
|
||||||
|
|
||||||
# Turns on the frame limiter, which will limit frames output to the target game speed
|
# Turns on the frame limiter, which will limit frames output to the target game speed
|
||||||
# 0: Off, 1: On (default)
|
# 0: Off, 1: On (default)
|
||||||
use_frame_limit =
|
use_frame_limit =
|
||||||
|
@ -219,6 +227,10 @@ use_asynchronous_gpu_emulation =
|
||||||
# 0: Off, 1 (default): On
|
# 0: Off, 1 (default): On
|
||||||
use_vsync =
|
use_vsync =
|
||||||
|
|
||||||
|
# Whether to use garbage collection or not.
|
||||||
|
# 0 (default): Off, 1: On
|
||||||
|
use_caches_gc =
|
||||||
|
|
||||||
# The clear color for the renderer. What shows up on the sides of the bottom screen.
|
# The clear color for the renderer. What shows up on the sides of the bottom screen.
|
||||||
# Must be in range of 0.0-1.0. Defaults to 1.0 for all.
|
# Must be in range of 0.0-1.0. Defaults to 1.0 for all.
|
||||||
bg_red =
|
bg_red =
|
||||||
|
|
Loading…
Reference in a new issue