mirror of
https://git.suyu.dev/suyu/suyu.git
synced 2024-11-26 00:32:48 +01:00
Query Cache: address issues
This commit is contained in:
parent
aa6587d854
commit
282ae8fa51
21 changed files with 270 additions and 214 deletions
|
@ -276,9 +276,8 @@ std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_ad
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer(VAddr cpu_addr, u32 size,
|
std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer(
|
||||||
ObtainBufferSynchronize sync_info,
|
VAddr cpu_addr, u32 size, ObtainBufferSynchronize sync_info, ObtainBufferOperation post_op) {
|
||||||
ObtainBufferOperation post_op) {
|
|
||||||
const BufferId buffer_id = FindBuffer(cpu_addr, size);
|
const BufferId buffer_id = FindBuffer(cpu_addr, size);
|
||||||
Buffer& buffer = slot_buffers[buffer_id];
|
Buffer& buffer = slot_buffers[buffer_id];
|
||||||
|
|
||||||
|
|
|
@ -297,8 +297,8 @@ public:
|
||||||
ObtainBufferOperation post_op);
|
ObtainBufferOperation post_op);
|
||||||
|
|
||||||
[[nodiscard]] std::pair<Buffer*, u32> ObtainCPUBuffer(VAddr gpu_addr, u32 size,
|
[[nodiscard]] std::pair<Buffer*, u32> ObtainCPUBuffer(VAddr gpu_addr, u32 size,
|
||||||
ObtainBufferSynchronize sync_info,
|
ObtainBufferSynchronize sync_info,
|
||||||
ObtainBufferOperation post_op);
|
ObtainBufferOperation post_op);
|
||||||
void FlushCachedWrites();
|
void FlushCachedWrites();
|
||||||
|
|
||||||
/// Return true when there are uncommitted buffers to be downloaded
|
/// Return true when there are uncommitted buffers to be downloaded
|
||||||
|
|
|
@ -596,12 +596,6 @@ void Maxwell3D::ProcessCounterReset() {
|
||||||
case Regs::ClearReport::ZPassPixelCount:
|
case Regs::ClearReport::ZPassPixelCount:
|
||||||
rasterizer->ResetCounter(VideoCommon::QueryType::ZPassPixelCount64);
|
rasterizer->ResetCounter(VideoCommon::QueryType::ZPassPixelCount64);
|
||||||
break;
|
break;
|
||||||
case Regs::ClearReport::PrimitivesGenerated:
|
|
||||||
rasterizer->ResetCounter(VideoCommon::QueryType::StreamingByteCount);
|
|
||||||
break;
|
|
||||||
case Regs::ClearReport::VtgPrimitivesOut:
|
|
||||||
rasterizer->ResetCounter(VideoCommon::QueryType::StreamingByteCount);
|
|
||||||
break;
|
|
||||||
default:
|
default:
|
||||||
LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.clear_report_value);
|
LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.clear_report_value);
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -82,7 +82,8 @@ void Puller::ProcessSemaphoreTriggerMethod() {
|
||||||
if (op == GpuSemaphoreOperation::WriteLong) {
|
if (op == GpuSemaphoreOperation::WriteLong) {
|
||||||
const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()};
|
const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()};
|
||||||
const u32 payload = regs.semaphore_sequence;
|
const u32 payload = regs.semaphore_sequence;
|
||||||
rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, VideoCommon::QueryPropertiesFlags::HasTimeout, payload, 0);
|
rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload,
|
||||||
|
VideoCommon::QueryPropertiesFlags::HasTimeout, payload, 0);
|
||||||
} else {
|
} else {
|
||||||
do {
|
do {
|
||||||
const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())};
|
const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())};
|
||||||
|
@ -117,7 +118,8 @@ void Puller::ProcessSemaphoreTriggerMethod() {
|
||||||
void Puller::ProcessSemaphoreRelease() {
|
void Puller::ProcessSemaphoreRelease() {
|
||||||
const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()};
|
const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()};
|
||||||
const u32 payload = regs.semaphore_release;
|
const u32 payload = regs.semaphore_release;
|
||||||
rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, VideoCommon::QueryPropertiesFlags::IsAFence, payload, 0);
|
rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload,
|
||||||
|
VideoCommon::QueryPropertiesFlags::IsAFence, payload, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Puller::ProcessSemaphoreAcquire() {
|
void Puller::ProcessSemaphoreAcquire() {
|
||||||
|
|
|
@ -55,6 +55,9 @@ public:
|
||||||
|
|
||||||
// Unlike other fences, this one doesn't
|
// Unlike other fences, this one doesn't
|
||||||
void SignalOrdering() {
|
void SignalOrdering() {
|
||||||
|
if constexpr (!can_async_check) {
|
||||||
|
TryReleasePendingFences<false>();
|
||||||
|
}
|
||||||
std::scoped_lock lock{buffer_cache.mutex};
|
std::scoped_lock lock{buffer_cache.mutex};
|
||||||
buffer_cache.AccumulateFlushes();
|
buffer_cache.AccumulateFlushes();
|
||||||
}
|
}
|
||||||
|
@ -104,13 +107,9 @@ public:
|
||||||
SignalFence(std::move(func));
|
SignalFence(std::move(func));
|
||||||
}
|
}
|
||||||
|
|
||||||
void WaitPendingFences(bool force) {
|
void WaitPendingFences([[maybe_unused]] bool force) {
|
||||||
if constexpr (!can_async_check) {
|
if constexpr (!can_async_check) {
|
||||||
if (force) {
|
TryReleasePendingFences<true>();
|
||||||
TryReleasePendingFences<true>();
|
|
||||||
} else {
|
|
||||||
TryReleasePendingFences<false>();
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
if (!force) {
|
if (!force) {
|
||||||
return;
|
return;
|
||||||
|
@ -125,7 +124,8 @@ public:
|
||||||
});
|
});
|
||||||
SignalFence(std::move(func));
|
SignalFence(std::move(func));
|
||||||
std::unique_lock lk(wait_mutex);
|
std::unique_lock lk(wait_mutex);
|
||||||
wait_cv.wait(lk, [&wait_finished] { return wait_finished.load(std::memory_order_relaxed); });
|
wait_cv.wait(
|
||||||
|
lk, [&wait_finished] { return wait_finished.load(std::memory_order_relaxed); });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -7,21 +7,19 @@
|
||||||
#include <deque>
|
#include <deque>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
|
||||||
|
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
|
|
||||||
namespace VideoCommon {
|
namespace VideoCommon {
|
||||||
|
|
||||||
class BankBase {
|
class BankBase {
|
||||||
protected:
|
protected:
|
||||||
const size_t base_bank_size;
|
const size_t base_bank_size{};
|
||||||
size_t bank_size;
|
size_t bank_size{};
|
||||||
std::atomic<size_t> references;
|
std::atomic<size_t> references{};
|
||||||
size_t current_slot;
|
size_t current_slot{};
|
||||||
|
|
||||||
public:
|
public:
|
||||||
BankBase(size_t bank_size_)
|
explicit BankBase(size_t bank_size_) : base_bank_size{bank_size_}, bank_size(bank_size_) {}
|
||||||
: base_bank_size{bank_size_}, bank_size(bank_size_), references(0), current_slot(0) {}
|
|
||||||
|
|
||||||
virtual ~BankBase() = default;
|
virtual ~BankBase() = default;
|
||||||
|
|
||||||
|
@ -58,11 +56,11 @@ public:
|
||||||
bank_size = current_slot;
|
bank_size = current_slot;
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr bool IsClosed() {
|
bool IsClosed() const {
|
||||||
return current_slot >= bank_size;
|
return current_slot >= bank_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool IsDead() {
|
bool IsDead() const {
|
||||||
return IsClosed() && references == 0;
|
return IsClosed() && references == 0;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -9,28 +9,28 @@
|
||||||
namespace VideoCommon {
|
namespace VideoCommon {
|
||||||
|
|
||||||
enum class QueryFlagBits : u32 {
|
enum class QueryFlagBits : u32 {
|
||||||
HasTimestamp = 1 << 0, ///< Indicates if this query has a tiemstamp.
|
HasTimestamp = 1 << 0, ///< Indicates if this query has a timestamp.
|
||||||
IsFinalValueSynced = 1 << 1, ///< Indicates if the query has been synced in the host
|
IsFinalValueSynced = 1 << 1, ///< Indicates if the query has been synced in the host
|
||||||
IsHostSynced = 1 << 2, ///< Indicates if the query has been synced in the host
|
IsHostSynced = 1 << 2, ///< Indicates if the query has been synced in the host
|
||||||
IsGuestSynced = 1 << 3, ///< Indicates if the query has been synced with the guest.
|
IsGuestSynced = 1 << 3, ///< Indicates if the query has been synced with the guest.
|
||||||
IsHostManaged = 1 << 4, ///< Indicates if this query points to a host query
|
IsHostManaged = 1 << 4, ///< Indicates if this query points to a host query
|
||||||
IsRewritten = 1 << 5, ///< Indicates if this query was rewritten by another query
|
IsRewritten = 1 << 5, ///< Indicates if this query was rewritten by another query
|
||||||
IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified.
|
IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified.
|
||||||
IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query.
|
IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query.
|
||||||
IsFence = 1 << 8, ///< Indicates the query is a fence.
|
IsFence = 1 << 8, ///< Indicates the query is a fence.
|
||||||
IsQueuedForAsyncFlush = 1 <<9,///< Indicates that the query can be flushed at any moment
|
IsQueuedForAsyncFlush = 1 << 9, ///< Indicates that the query can be flushed at any moment
|
||||||
};
|
};
|
||||||
DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits)
|
DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits)
|
||||||
|
|
||||||
class QueryBase {
|
class QueryBase {
|
||||||
public:
|
public:
|
||||||
VAddr guest_address;
|
VAddr guest_address{};
|
||||||
QueryFlagBits flags;
|
QueryFlagBits flags{};
|
||||||
u64 value;
|
u64 value{};
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
// Default constructor
|
// Default constructor
|
||||||
QueryBase() : guest_address(0), flags{}, value{} {}
|
QueryBase() = default;
|
||||||
|
|
||||||
// Parameterized constructor
|
// Parameterized constructor
|
||||||
QueryBase(VAddr address, QueryFlagBits flags_, u64 value_)
|
QueryBase(VAddr address, QueryFlagBits flags_, u64 value_)
|
||||||
|
@ -51,23 +51,21 @@ public:
|
||||||
class HostQueryBase : public QueryBase {
|
class HostQueryBase : public QueryBase {
|
||||||
public:
|
public:
|
||||||
// Default constructor
|
// Default constructor
|
||||||
HostQueryBase()
|
HostQueryBase() : QueryBase(0, QueryFlagBits::IsHostManaged | QueryFlagBits::IsOrphan, 0) {}
|
||||||
: QueryBase(0, QueryFlagBits::IsHostManaged | QueryFlagBits::IsOrphan, 0), start_bank_id{},
|
|
||||||
size_banks{}, start_slot{}, size_slots{} {}
|
|
||||||
|
|
||||||
// Parameterized constructor
|
// Parameterized constructor
|
||||||
HostQueryBase(bool isLong, VAddr address)
|
HostQueryBase(bool has_timestamp, VAddr address)
|
||||||
: QueryBase(address, QueryFlagBits::IsHostManaged, 0), start_bank_id{}, size_banks{},
|
: QueryBase(address, QueryFlagBits::IsHostManaged, 0), start_bank_id{}, size_banks{},
|
||||||
start_slot{}, size_slots{} {
|
start_slot{}, size_slots{} {
|
||||||
if (isLong) {
|
if (has_timestamp) {
|
||||||
flags |= QueryFlagBits::HasTimestamp;
|
flags |= QueryFlagBits::HasTimestamp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 start_bank_id;
|
u32 start_bank_id{};
|
||||||
u32 size_banks;
|
u32 size_banks{};
|
||||||
size_t start_slot;
|
size_t start_slot{};
|
||||||
size_t size_slots;
|
size_t size_slots{};
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace VideoCommon
|
} // namespace VideoCommon
|
|
@ -54,7 +54,7 @@ public:
|
||||||
return new_id;
|
return new_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool HasPendingSync() override {
|
bool HasPendingSync() const override {
|
||||||
return !pending_sync.empty();
|
return !pending_sync.empty();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -71,8 +71,10 @@ public:
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
query.flags |= QueryFlagBits::IsHostSynced;
|
query.flags |= QueryFlagBits::IsHostSynced;
|
||||||
sync_values.emplace_back(query.guest_address, query.value,
|
sync_values.emplace_back(SyncValuesStruct{
|
||||||
True(query.flags & QueryFlagBits::HasTimestamp) ? 8 : 4);
|
.address = query.guest_address,
|
||||||
|
.value = query.value,
|
||||||
|
.size = static_cast<u64>(True(query.flags & QueryFlagBits::HasTimestamp) ? 8 : 4)});
|
||||||
}
|
}
|
||||||
pending_sync.clear();
|
pending_sync.clear();
|
||||||
if (sync_values.size() > 0) {
|
if (sync_values.size() > 0) {
|
||||||
|
@ -90,15 +92,20 @@ class StubStreamer : public GuestStreamer<Traits> {
|
||||||
public:
|
public:
|
||||||
using RuntimeType = typename Traits::RuntimeType;
|
using RuntimeType = typename Traits::RuntimeType;
|
||||||
|
|
||||||
StubStreamer(size_t id_, RuntimeType& runtime_) : GuestStreamer<Traits>(id_, runtime_) {}
|
StubStreamer(size_t id_, RuntimeType& runtime_, u32 stub_value_)
|
||||||
|
: GuestStreamer<Traits>(id_, runtime_), stub_value{stub_value_} {}
|
||||||
|
|
||||||
~StubStreamer() override = default;
|
~StubStreamer() override = default;
|
||||||
|
|
||||||
size_t WriteCounter(VAddr address, bool has_timestamp, [[maybe_unused]] u32 value,
|
size_t WriteCounter(VAddr address, bool has_timestamp, [[maybe_unused]] u32 value,
|
||||||
std::optional<u32> subreport = std::nullopt) override {
|
std::optional<u32> subreport = std::nullopt) override {
|
||||||
size_t new_id = GuestStreamer<Traits>::WriteCounter(address, has_timestamp, 1U, subreport);
|
size_t new_id =
|
||||||
|
GuestStreamer<Traits>::WriteCounter(address, has_timestamp, stub_value, subreport);
|
||||||
return new_id;
|
return new_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
u32 stub_value;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Traits>
|
template <typename Traits>
|
||||||
|
@ -113,7 +120,7 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl {
|
||||||
for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) {
|
for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) {
|
||||||
streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i));
|
streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i));
|
||||||
if (streamers[i]) {
|
if (streamers[i]) {
|
||||||
streamer_mask |= 1ULL << i;
|
streamer_mask |= 1ULL << streamers[i]->GetId();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -152,7 +159,7 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl {
|
||||||
QueryCacheBase<Traits>* owner;
|
QueryCacheBase<Traits>* owner;
|
||||||
VideoCore::RasterizerInterface& rasterizer;
|
VideoCore::RasterizerInterface& rasterizer;
|
||||||
Core::Memory::Memory& cpu_memory;
|
Core::Memory::Memory& cpu_memory;
|
||||||
Traits::RuntimeType& runtime;
|
RuntimeType& runtime;
|
||||||
Tegra::GPU& gpu;
|
Tegra::GPU& gpu;
|
||||||
std::array<StreamerInterface*, static_cast<size_t>(QueryType::MaxQueryTypes)> streamers;
|
std::array<StreamerInterface*, static_cast<size_t>(QueryType::MaxQueryTypes)> streamers;
|
||||||
u64 streamer_mask;
|
u64 streamer_mask;
|
||||||
|
@ -223,15 +230,11 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type
|
||||||
const bool is_fence = True(flags & QueryPropertiesFlags::IsAFence);
|
const bool is_fence = True(flags & QueryPropertiesFlags::IsAFence);
|
||||||
size_t streamer_id = static_cast<size_t>(counter_type);
|
size_t streamer_id = static_cast<size_t>(counter_type);
|
||||||
auto* streamer = impl->streamers[streamer_id];
|
auto* streamer = impl->streamers[streamer_id];
|
||||||
if (!streamer) [[unlikely]] {
|
if (streamer == nullptr) [[unlikely]] {
|
||||||
if (has_timestamp) {
|
counter_type = QueryType::Payload;
|
||||||
u64 timestamp = impl->gpu.GetTicks();
|
payload = 1U;
|
||||||
gpu_memory->Write<u64>(addr + 8, timestamp);
|
streamer_id = static_cast<size_t>(counter_type);
|
||||||
gpu_memory->Write<u64>(addr, 1ULL);
|
streamer = impl->streamers[streamer_id];
|
||||||
} else {
|
|
||||||
gpu_memory->Write<u32>(addr, 1U);
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
auto cpu_addr_opt = gpu_memory->GpuToCpuAddress(addr);
|
auto cpu_addr_opt = gpu_memory->GpuToCpuAddress(addr);
|
||||||
if (!cpu_addr_opt) [[unlikely]] {
|
if (!cpu_addr_opt) [[unlikely]] {
|
||||||
|
@ -403,12 +406,6 @@ bool QueryCacheBase<Traits>::AccelerateHostConditionalRendering() {
|
||||||
impl->runtime.EndHostConditionalRendering();
|
impl->runtime.EndHostConditionalRendering();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
/*if (!Settings::IsGPULevelHigh()) {
|
|
||||||
impl->runtime.EndHostConditionalRendering();
|
|
||||||
return gpu_memory->IsMemoryDirty(regs.render_enable.Address(), 24,
|
|
||||||
VideoCommon::CacheType::BufferCache |
|
|
||||||
VideoCommon::CacheType::QueryCache);
|
|
||||||
}*/
|
|
||||||
const ComparisonMode mode = static_cast<ComparisonMode>(regs.render_enable.mode);
|
const ComparisonMode mode = static_cast<ComparisonMode>(regs.render_enable.mode);
|
||||||
const GPUVAddr address = regs.render_enable.Address();
|
const GPUVAddr address = regs.render_enable.Address();
|
||||||
switch (mode) {
|
switch (mode) {
|
||||||
|
@ -442,6 +439,9 @@ bool QueryCacheBase<Traits>::AccelerateHostConditionalRendering() {
|
||||||
// Async downloads
|
// Async downloads
|
||||||
template <typename Traits>
|
template <typename Traits>
|
||||||
void QueryCacheBase<Traits>::CommitAsyncFlushes() {
|
void QueryCacheBase<Traits>::CommitAsyncFlushes() {
|
||||||
|
// Make sure to have the results synced in Host.
|
||||||
|
NotifyWFI();
|
||||||
|
|
||||||
u64 mask{};
|
u64 mask{};
|
||||||
{
|
{
|
||||||
std::scoped_lock lk(impl->flush_guard);
|
std::scoped_lock lk(impl->flush_guard);
|
||||||
|
@ -458,8 +458,19 @@ void QueryCacheBase<Traits>::CommitAsyncFlushes() {
|
||||||
if (mask == 0) {
|
if (mask == 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
impl->ForEachStreamerIn(mask,
|
u64 ran_mask = ~mask;
|
||||||
[](StreamerInterface* streamer) { streamer->PushUnsyncedQueries(); });
|
while (mask) {
|
||||||
|
impl->ForEachStreamerIn(mask, [&mask, &ran_mask](StreamerInterface* streamer) {
|
||||||
|
u64 dep_mask = streamer->GetDependentMask();
|
||||||
|
if ((dep_mask & ~ran_mask) != 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
u64 index = streamer->GetId();
|
||||||
|
ran_mask |= (1ULL << index);
|
||||||
|
mask &= ~(1ULL << index);
|
||||||
|
streamer->PushUnsyncedQueries();
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Traits>
|
template <typename Traits>
|
||||||
|
@ -489,13 +500,11 @@ void QueryCacheBase<Traits>::PopAsyncFlushes() {
|
||||||
if (mask == 0) {
|
if (mask == 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
u64 ran_mask = 0;
|
u64 ran_mask = ~mask;
|
||||||
u64 next_phase = 0;
|
|
||||||
while (mask) {
|
while (mask) {
|
||||||
impl->ForEachStreamerIn(mask, [&mask, &ran_mask, &next_phase](StreamerInterface* streamer) {
|
impl->ForEachStreamerIn(mask, [&mask, &ran_mask](StreamerInterface* streamer) {
|
||||||
u64 dep_mask = streamer->GetDependenceMask();
|
u64 dep_mask = streamer->GetDependenceMask();
|
||||||
if ((dep_mask & ~ran_mask) != 0) {
|
if ((dep_mask & ~ran_mask) != 0) {
|
||||||
next_phase |= dep_mask;
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
u64 index = streamer->GetId();
|
u64 index = streamer->GetId();
|
||||||
|
@ -503,7 +512,6 @@ void QueryCacheBase<Traits>::PopAsyncFlushes() {
|
||||||
mask &= ~(1ULL << index);
|
mask &= ~(1ULL << index);
|
||||||
streamer->PopUnsyncedQueries();
|
streamer->PopUnsyncedQueries();
|
||||||
});
|
});
|
||||||
ran_mask |= next_phase;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -47,7 +47,7 @@ public:
|
||||||
BitField<0, 27, u32> query_id;
|
BitField<0, 27, u32> query_id;
|
||||||
u32 raw;
|
u32 raw;
|
||||||
|
|
||||||
std::pair<size_t, size_t> unpack() {
|
std::pair<size_t, size_t> unpack() const {
|
||||||
return {static_cast<size_t>(stream_id.Value()), static_cast<size_t>(query_id.Value())};
|
return {static_cast<size_t>(stream_id.Value()), static_cast<size_t>(query_id.Value())};
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -73,7 +73,7 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static u64 BuildMask(std::span<QueryType> types) {
|
static u64 BuildMask(std::span<const QueryType> types) {
|
||||||
u64 mask = 0;
|
u64 mask = 0;
|
||||||
for (auto query_type : types) {
|
for (auto query_type : types) {
|
||||||
mask |= 1ULL << (static_cast<u64>(query_type));
|
mask |= 1ULL << (static_cast<u64>(query_type));
|
||||||
|
@ -160,7 +160,7 @@ protected:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
using ContentCache = typename std::unordered_map<u64, std::unordered_map<u32, QueryLocation>>;
|
using ContentCache = std::unordered_map<u64, std::unordered_map<u32, QueryLocation>>;
|
||||||
|
|
||||||
void InvalidateQuery(QueryLocation location);
|
void InvalidateQuery(QueryLocation location);
|
||||||
bool IsQueryDirty(QueryLocation location);
|
bool IsQueryDirty(QueryLocation location);
|
||||||
|
@ -175,7 +175,7 @@ protected:
|
||||||
friend struct QueryCacheBaseImpl;
|
friend struct QueryCacheBaseImpl;
|
||||||
friend RuntimeType;
|
friend RuntimeType;
|
||||||
|
|
||||||
std::unique_ptr<struct QueryCacheBaseImpl> impl;
|
std::unique_ptr<QueryCacheBaseImpl> impl;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace VideoCommon
|
} // namespace VideoCommon
|
|
@ -16,7 +16,7 @@ namespace VideoCommon {
|
||||||
|
|
||||||
class StreamerInterface {
|
class StreamerInterface {
|
||||||
public:
|
public:
|
||||||
StreamerInterface(size_t id_, u64 dependance_mask_ = 0) : id{id_}, dependance_mask{dependance_mask_} {}
|
explicit StreamerInterface(size_t id_) : id{id_}, dependence_mask{}, dependent_mask{} {}
|
||||||
virtual ~StreamerInterface() = default;
|
virtual ~StreamerInterface() = default;
|
||||||
|
|
||||||
virtual QueryBase* GetQuery(size_t id) = 0;
|
virtual QueryBase* GetQuery(size_t id) = 0;
|
||||||
|
@ -37,7 +37,7 @@ public:
|
||||||
/* Do Nothing */
|
/* Do Nothing */
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual bool HasPendingSync() {
|
virtual bool HasPendingSync() const {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -52,7 +52,7 @@ public:
|
||||||
virtual size_t WriteCounter(VAddr address, bool has_timestamp, u32 value,
|
virtual size_t WriteCounter(VAddr address, bool has_timestamp, u32 value,
|
||||||
std::optional<u32> subreport = std::nullopt) = 0;
|
std::optional<u32> subreport = std::nullopt) = 0;
|
||||||
|
|
||||||
virtual bool HasUnsyncedQueries() {
|
virtual bool HasUnsyncedQueries() const {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -71,18 +71,28 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 GetDependenceMask() const {
|
u64 GetDependenceMask() const {
|
||||||
return dependance_mask;
|
return dependence_mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 GetDependentMask() const {
|
||||||
|
return dependence_mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
void MakeDependent(StreamerInterface* depend_on) {
|
||||||
|
dependence_mask |= 1ULL << depend_on->id;
|
||||||
|
depend_on->dependent_mask |= 1ULL << id;
|
||||||
|
}
|
||||||
|
|
||||||
const size_t id;
|
const size_t id;
|
||||||
const u64 dependance_mask;
|
u64 dependence_mask;
|
||||||
|
u64 dependent_mask;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename QueryType>
|
template <typename QueryType>
|
||||||
class SimpleStreamer : public StreamerInterface {
|
class SimpleStreamer : public StreamerInterface {
|
||||||
public:
|
public:
|
||||||
SimpleStreamer(size_t id_, u64 dependance_mask_ = 0) : StreamerInterface{id_, dependance_mask_} {}
|
explicit SimpleStreamer(size_t id_) : StreamerInterface{id_} {}
|
||||||
virtual ~SimpleStreamer() = default;
|
virtual ~SimpleStreamer() = default;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
|
@ -9,10 +9,10 @@
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "common/polyfill_thread.h"
|
#include "common/polyfill_thread.h"
|
||||||
#include "video_core/query_cache/types.h"
|
|
||||||
#include "video_core/cache_types.h"
|
#include "video_core/cache_types.h"
|
||||||
#include "video_core/engines/fermi_2d.h"
|
#include "video_core/engines/fermi_2d.h"
|
||||||
#include "video_core/gpu.h"
|
#include "video_core/gpu.h"
|
||||||
|
#include "video_core/query_cache/types.h"
|
||||||
#include "video_core/rasterizer_download_area.h"
|
#include "video_core/rasterizer_download_area.h"
|
||||||
|
|
||||||
namespace Tegra {
|
namespace Tegra {
|
||||||
|
@ -57,7 +57,8 @@ public:
|
||||||
virtual void ResetCounter(VideoCommon::QueryType type) = 0;
|
virtual void ResetCounter(VideoCommon::QueryType type) = 0;
|
||||||
|
|
||||||
/// Records a GPU query and caches it
|
/// Records a GPU query and caches it
|
||||||
virtual void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) = 0;
|
virtual void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type,
|
||||||
|
VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) = 0;
|
||||||
|
|
||||||
/// Signal an uniform buffer binding
|
/// Signal an uniform buffer binding
|
||||||
virtual void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
|
virtual void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
|
||||||
|
|
|
@ -43,7 +43,8 @@ public:
|
||||||
void Clear(u32 layer_count) override;
|
void Clear(u32 layer_count) override;
|
||||||
void DispatchCompute() override;
|
void DispatchCompute() override;
|
||||||
void ResetCounter(VideoCommon::QueryType type) override;
|
void ResetCounter(VideoCommon::QueryType type) override;
|
||||||
void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override;
|
void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type,
|
||||||
|
VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override;
|
||||||
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
|
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
|
||||||
void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
|
void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
|
||||||
void FlushAll() override;
|
void FlushAll() override;
|
||||||
|
|
|
@ -405,8 +405,6 @@ void RasterizerOpenGL::ResetCounter(VideoCommon::QueryType type) {
|
||||||
void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type,
|
void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type,
|
||||||
VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) {
|
VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) {
|
||||||
if (type == VideoCommon::QueryType::ZPassPixelCount64) {
|
if (type == VideoCommon::QueryType::ZPassPixelCount64) {
|
||||||
std::optional<u64> timestamp{True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)
|
|
||||||
? std::make_optional<u64>(gpu.GetTicks()) : std:: nullopt };
|
|
||||||
if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) {
|
if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) {
|
||||||
query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, {gpu.GetTicks()});
|
query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, {gpu.GetTicks()});
|
||||||
} else {
|
} else {
|
||||||
|
@ -414,13 +412,23 @@ void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type,
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) {
|
if (type != VideoCommon::QueryType::Payload) {
|
||||||
u64 ticks = gpu.GetTicks();
|
payload = 1u;
|
||||||
gpu_memory->Write<u64>(gpu_addr + 8, ticks);
|
|
||||||
gpu_memory->Write<u64>(gpu_addr, static_cast<u64>(payload));
|
|
||||||
} else {
|
|
||||||
gpu_memory->Write<u32>(gpu_addr, payload);
|
|
||||||
}
|
}
|
||||||
|
std::function<void()> func([this, gpu_addr, flags, memory_manager = gpu_memory, payload]() {
|
||||||
|
if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) {
|
||||||
|
u64 ticks = gpu.GetTicks();
|
||||||
|
memory_manager->Write<u64>(gpu_addr + 8, ticks);
|
||||||
|
memory_manager->Write<u64>(gpu_addr, static_cast<u64>(payload));
|
||||||
|
} else {
|
||||||
|
memory_manager->Write<u32>(gpu_addr, payload);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
if (True(flags & VideoCommon::QueryPropertiesFlags::IsAFence)) {
|
||||||
|
SignalFence(std::move(func));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
func();
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
|
void RasterizerOpenGL::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
|
||||||
|
|
|
@ -87,7 +87,8 @@ public:
|
||||||
void Clear(u32 layer_count) override;
|
void Clear(u32 layer_count) override;
|
||||||
void DispatchCompute() override;
|
void DispatchCompute() override;
|
||||||
void ResetCounter(VideoCommon::QueryType type) override;
|
void ResetCounter(VideoCommon::QueryType type) override;
|
||||||
void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override;
|
void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type,
|
||||||
|
VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override;
|
||||||
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
|
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
|
||||||
void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
|
void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
|
||||||
void FlushAll() override;
|
void FlushAll() override;
|
||||||
|
|
|
@ -303,9 +303,9 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
|
||||||
return {staging.buffer, staging.offset};
|
return {staging.buffer, staging.offset};
|
||||||
}
|
}
|
||||||
|
|
||||||
ConditionalRenderingResolvePass::ConditionalRenderingResolvePass(const Device& device_,
|
ConditionalRenderingResolvePass::ConditionalRenderingResolvePass(
|
||||||
Scheduler& scheduler_,
|
const Device& device_, Scheduler& scheduler_, DescriptorPool& descriptor_pool_,
|
||||||
DescriptorPool& descriptor_pool_, ComputePassDescriptorQueue& compute_pass_descriptor_queue_)
|
ComputePassDescriptorQueue& compute_pass_descriptor_queue_)
|
||||||
: ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS,
|
: ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS,
|
||||||
INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, nullptr,
|
INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, nullptr,
|
||||||
RESOLVE_CONDITIONAL_RENDER_COMP_SPV),
|
RESOLVE_CONDITIONAL_RENDER_COMP_SPV),
|
||||||
|
|
|
@ -7,8 +7,8 @@
|
||||||
|
|
||||||
#include "video_core/fence_manager.h"
|
#include "video_core/fence_manager.h"
|
||||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
|
||||||
#include "video_core/renderer_vulkan/vk_query_cache.h"
|
#include "video_core/renderer_vulkan/vk_query_cache.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||||
|
|
||||||
namespace Core {
|
namespace Core {
|
||||||
class System;
|
class System;
|
||||||
|
|
|
@ -11,11 +11,9 @@
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include <boost/container/small_vector.hpp>
|
|
||||||
#include <boost/icl/interval_set.hpp>
|
|
||||||
|
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "core/memory.h"
|
#include "core/memory.h"
|
||||||
|
#include "video_core/engines/draw_manager.h"
|
||||||
#include "video_core/query_cache/query_cache.h"
|
#include "video_core/query_cache/query_cache.h"
|
||||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||||
#include "video_core/renderer_vulkan/vk_compute_pass.h"
|
#include "video_core/renderer_vulkan/vk_compute_pass.h"
|
||||||
|
@ -30,6 +28,7 @@
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
|
using Tegra::Engines::Maxwell3D;
|
||||||
using VideoCommon::QueryType;
|
using VideoCommon::QueryType;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
@ -37,7 +36,7 @@ class SamplesQueryBank : public VideoCommon::BankBase {
|
||||||
public:
|
public:
|
||||||
static constexpr size_t BANK_SIZE = 256;
|
static constexpr size_t BANK_SIZE = 256;
|
||||||
static constexpr size_t QUERY_SIZE = 8;
|
static constexpr size_t QUERY_SIZE = 8;
|
||||||
SamplesQueryBank(const Device& device_, size_t index_)
|
explicit SamplesQueryBank(const Device& device_, size_t index_)
|
||||||
: BankBase(BANK_SIZE), device{device_}, index{index_} {
|
: BankBase(BANK_SIZE), device{device_}, index{index_} {
|
||||||
const auto& dev = device.GetLogical();
|
const auto& dev = device.GetLogical();
|
||||||
query_pool = dev.CreateQueryPool({
|
query_pool = dev.CreateQueryPool({
|
||||||
|
@ -109,18 +108,19 @@ struct HostSyncValues {
|
||||||
static constexpr bool GeneratesBaseBuffer = false;
|
static constexpr bool GeneratesBaseBuffer = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Traits>
|
|
||||||
class SamplesStreamer : public BaseStreamer {
|
class SamplesStreamer : public BaseStreamer {
|
||||||
public:
|
public:
|
||||||
SamplesStreamer(size_t id, QueryCacheRuntime& runtime_, const Device& device_,
|
explicit SamplesStreamer(size_t id_, QueryCacheRuntime& runtime_, const Device& device_,
|
||||||
Scheduler& scheduler_, const MemoryAllocator& memory_allocator_)
|
Scheduler& scheduler_, const MemoryAllocator& memory_allocator_)
|
||||||
: BaseStreamer(id), runtime{runtime_}, device{device_}, scheduler{scheduler_},
|
: BaseStreamer(id_), runtime{runtime_}, device{device_}, scheduler{scheduler_},
|
||||||
memory_allocator{memory_allocator_} {
|
memory_allocator{memory_allocator_} {
|
||||||
BuildResolveBuffer();
|
BuildResolveBuffer();
|
||||||
current_bank = nullptr;
|
current_bank = nullptr;
|
||||||
current_query = nullptr;
|
current_query = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
~SamplesStreamer() = default;
|
||||||
|
|
||||||
void StartCounter() override {
|
void StartCounter() override {
|
||||||
if (has_started) {
|
if (has_started) {
|
||||||
return;
|
return;
|
||||||
|
@ -157,7 +157,7 @@ public:
|
||||||
PauseCounter();
|
PauseCounter();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool HasPendingSync() override {
|
bool HasPendingSync() const override {
|
||||||
return !pending_sync.empty();
|
return !pending_sync.empty();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -198,7 +198,7 @@ public:
|
||||||
}
|
}
|
||||||
resolve_slots_remaining = resolve_slots;
|
resolve_slots_remaining = resolve_slots;
|
||||||
sync_values_stash.emplace_back();
|
sync_values_stash.emplace_back();
|
||||||
sync_values = sync_values = &sync_values_stash.back();
|
sync_values = &sync_values_stash.back();
|
||||||
sync_values->reserve(resolve_slots * SamplesQueryBank::BANK_SIZE);
|
sync_values->reserve(resolve_slots * SamplesQueryBank::BANK_SIZE);
|
||||||
}
|
}
|
||||||
resolve_slots_remaining--;
|
resolve_slots_remaining--;
|
||||||
|
@ -207,6 +207,7 @@ public:
|
||||||
const size_t base_offset = SamplesQueryBank::QUERY_SIZE * SamplesQueryBank::BANK_SIZE *
|
const size_t base_offset = SamplesQueryBank::QUERY_SIZE * SamplesQueryBank::BANK_SIZE *
|
||||||
(resolve_slots - resolve_slots_remaining - 1);
|
(resolve_slots - resolve_slots_remaining - 1);
|
||||||
VkQueryPool query_pool = bank->GetInnerPool();
|
VkQueryPool query_pool = bank->GetInnerPool();
|
||||||
|
scheduler.RequestOutsideRenderPassOperationContext();
|
||||||
scheduler.Record([start, amount, base_offset, query_pool,
|
scheduler.Record([start, amount, base_offset, query_pool,
|
||||||
buffer = *resolve_buffer](vk::CommandBuffer cmdbuf) {
|
buffer = *resolve_buffer](vk::CommandBuffer cmdbuf) {
|
||||||
size_t final_offset = base_offset + start * SamplesQueryBank::QUERY_SIZE;
|
size_t final_offset = base_offset + start * SamplesQueryBank::QUERY_SIZE;
|
||||||
|
@ -284,7 +285,7 @@ public:
|
||||||
return index;
|
return index;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool HasUnsyncedQueries() override {
|
bool HasUnsyncedQueries() const override {
|
||||||
return !pending_flush_queries.empty();
|
return !pending_flush_queries.empty();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -348,8 +349,8 @@ private:
|
||||||
for (auto q : queries) {
|
for (auto q : queries) {
|
||||||
auto* query = GetQuery(q);
|
auto* query = GetQuery(q);
|
||||||
ApplyBankOp(query, [&indexer](SamplesQueryBank* bank, size_t start, size_t amount) {
|
ApplyBankOp(query, [&indexer](SamplesQueryBank* bank, size_t start, size_t amount) {
|
||||||
auto id = bank->GetIndex();
|
auto id_ = bank->GetIndex();
|
||||||
auto pair = indexer.try_emplace(id, std::numeric_limits<size_t>::max(),
|
auto pair = indexer.try_emplace(id_, std::numeric_limits<size_t>::max(),
|
||||||
std::numeric_limits<size_t>::min());
|
std::numeric_limits<size_t>::min());
|
||||||
auto& current_pair = pair.first->second;
|
auto& current_pair = pair.first->second;
|
||||||
current_pair.first = std::min(current_pair.first, start);
|
current_pair.first = std::min(current_pair.first, start);
|
||||||
|
@ -434,13 +435,14 @@ private:
|
||||||
.pNext = nullptr,
|
.pNext = nullptr,
|
||||||
.flags = 0,
|
.flags = 0,
|
||||||
.size = SamplesQueryBank::QUERY_SIZE * SamplesQueryBank::BANK_SIZE * resolve_slots,
|
.size = SamplesQueryBank::QUERY_SIZE * SamplesQueryBank::BANK_SIZE * resolve_slots,
|
||||||
.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
|
.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
|
||||||
|
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
|
||||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||||
.queueFamilyIndexCount = 0,
|
.queueFamilyIndexCount = 0,
|
||||||
.pQueueFamilyIndices = nullptr,
|
.pQueueFamilyIndices = nullptr,
|
||||||
};
|
};
|
||||||
resolve_buffers.emplace_back(
|
resolve_buffers.emplace_back(
|
||||||
std::move(memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal)));
|
memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal));
|
||||||
}
|
}
|
||||||
|
|
||||||
static constexpr size_t resolve_slots = 8;
|
static constexpr size_t resolve_slots = 8;
|
||||||
|
@ -476,7 +478,8 @@ class TFBQueryBank : public VideoCommon::BankBase {
|
||||||
public:
|
public:
|
||||||
static constexpr size_t BANK_SIZE = 1024;
|
static constexpr size_t BANK_SIZE = 1024;
|
||||||
static constexpr size_t QUERY_SIZE = 4;
|
static constexpr size_t QUERY_SIZE = 4;
|
||||||
TFBQueryBank(Scheduler& scheduler_, const MemoryAllocator& memory_allocator, size_t index_)
|
explicit TFBQueryBank(Scheduler& scheduler_, const MemoryAllocator& memory_allocator,
|
||||||
|
size_t index_)
|
||||||
: BankBase(BANK_SIZE), scheduler{scheduler_}, index{index_} {
|
: BankBase(BANK_SIZE), scheduler{scheduler_}, index{index_} {
|
||||||
const VkBufferCreateInfo buffer_ci = {
|
const VkBufferCreateInfo buffer_ci = {
|
||||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||||
|
@ -525,22 +528,21 @@ private:
|
||||||
vk::Buffer buffer;
|
vk::Buffer buffer;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Traits>
|
|
||||||
class PrimitivesSucceededStreamer;
|
class PrimitivesSucceededStreamer;
|
||||||
|
|
||||||
template <typename Traits>
|
|
||||||
class TFBCounterStreamer : public BaseStreamer {
|
class TFBCounterStreamer : public BaseStreamer {
|
||||||
public:
|
public:
|
||||||
TFBCounterStreamer(size_t id, QueryCacheRuntime& runtime_, const Device& device_,
|
explicit TFBCounterStreamer(size_t id_, QueryCacheRuntime& runtime_, const Device& device_,
|
||||||
Scheduler& scheduler_, const MemoryAllocator& memory_allocator_,
|
Scheduler& scheduler_, const MemoryAllocator& memory_allocator_,
|
||||||
StagingBufferPool& staging_pool_)
|
StagingBufferPool& staging_pool_)
|
||||||
: BaseStreamer(id), runtime{runtime_}, device{device_}, scheduler{scheduler_},
|
: BaseStreamer(id_), runtime{runtime_}, device{device_}, scheduler{scheduler_},
|
||||||
memory_allocator{memory_allocator_}, staging_pool{staging_pool_} {
|
memory_allocator{memory_allocator_}, staging_pool{staging_pool_} {
|
||||||
buffers_count = 0;
|
buffers_count = 0;
|
||||||
current_bank = nullptr;
|
current_bank = nullptr;
|
||||||
counter_buffers.fill(VK_NULL_HANDLE);
|
counter_buffers.fill(VK_NULL_HANDLE);
|
||||||
offsets.fill(0);
|
offsets.fill(0);
|
||||||
last_queries.fill(0);
|
last_queries.fill(0);
|
||||||
|
last_queries_stride.fill(1);
|
||||||
const VkBufferCreateInfo buffer_ci = {
|
const VkBufferCreateInfo buffer_ci = {
|
||||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||||
.pNext = nullptr,
|
.pNext = nullptr,
|
||||||
|
@ -564,6 +566,8 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
~TFBCounterStreamer() = default;
|
||||||
|
|
||||||
void StartCounter() override {
|
void StartCounter() override {
|
||||||
FlushBeginTFB();
|
FlushBeginTFB();
|
||||||
has_started = true;
|
has_started = true;
|
||||||
|
@ -581,15 +585,15 @@ public:
|
||||||
if (has_flushed_end_pending) {
|
if (has_flushed_end_pending) {
|
||||||
FlushEndTFB();
|
FlushEndTFB();
|
||||||
}
|
}
|
||||||
runtime.View3DRegs([this](Tegra::Engines::Maxwell3D::Regs& regs) {
|
runtime.View3DRegs([this](Maxwell3D& maxwell3d) {
|
||||||
if (regs.transform_feedback_enabled == 0) {
|
if (maxwell3d.regs.transform_feedback_enabled == 0) {
|
||||||
streams_mask = 0;
|
streams_mask = 0;
|
||||||
has_started = false;
|
has_started = false;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
bool HasPendingSync() override {
|
bool HasPendingSync() const override {
|
||||||
return !pending_sync.empty();
|
return !pending_sync.empty();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -650,14 +654,19 @@ public:
|
||||||
return index;
|
return index;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::optional<VAddr> GetLastQueryStream(size_t stream) {
|
std::optional<std::pair<VAddr, size_t>> GetLastQueryStream(size_t stream) {
|
||||||
if (last_queries[stream] != 0) {
|
if (last_queries[stream] != 0) {
|
||||||
return {last_queries[stream]};
|
std::pair<VAddr, size_t> result(last_queries[stream], last_queries_stride[stream]);
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool HasUnsyncedQueries() override {
|
Maxwell3D::Regs::PrimitiveTopology GetOutputTopology() const {
|
||||||
|
return out_topology;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool HasUnsyncedQueries() const override {
|
||||||
return !pending_flush_queries.empty();
|
return !pending_flush_queries.empty();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -762,15 +771,17 @@ private:
|
||||||
|
|
||||||
void UpdateBuffers() {
|
void UpdateBuffers() {
|
||||||
last_queries.fill(0);
|
last_queries.fill(0);
|
||||||
runtime.View3DRegs([this](Tegra::Engines::Maxwell3D::Regs& regs) {
|
last_queries_stride.fill(1);
|
||||||
|
runtime.View3DRegs([this](Maxwell3D& maxwell3d) {
|
||||||
buffers_count = 0;
|
buffers_count = 0;
|
||||||
for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers;
|
out_topology = maxwell3d.draw_manager->GetDrawState().topology;
|
||||||
i++) {
|
for (size_t i = 0; i < Maxwell3D::Regs::NumTransformFeedbackBuffers; i++) {
|
||||||
const auto& tf = regs.transform_feedback;
|
const auto& tf = maxwell3d.regs.transform_feedback;
|
||||||
if (tf.buffers[i].enable == 0) {
|
if (tf.buffers[i].enable == 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const size_t stream = tf.controls[i].stream;
|
const size_t stream = tf.controls[i].stream;
|
||||||
|
last_queries_stride[stream] = tf.controls[i].stride;
|
||||||
streams_mask |= 1ULL << stream;
|
streams_mask |= 1ULL << stream;
|
||||||
buffers_count = std::max<size_t>(buffers_count, stream + 1);
|
buffers_count = std::max<size_t>(buffers_count, stream + 1);
|
||||||
}
|
}
|
||||||
|
@ -785,7 +796,8 @@ private:
|
||||||
});
|
});
|
||||||
current_bank = &bank_pool.GetBank(current_bank_id);
|
current_bank = &bank_pool.GetBank(current_bank_id);
|
||||||
}
|
}
|
||||||
auto [dont_care, slot] = current_bank->Reserve();
|
auto [dont_care, other] = current_bank->Reserve();
|
||||||
|
const size_t slot = other; // workaround to compile bug.
|
||||||
current_bank->AddReference();
|
current_bank->AddReference();
|
||||||
|
|
||||||
static constexpr VkMemoryBarrier READ_BARRIER{
|
static constexpr VkMemoryBarrier READ_BARRIER{
|
||||||
|
@ -818,11 +830,9 @@ private:
|
||||||
return {current_bank_id, slot};
|
return {current_bank_id, slot};
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Traits>
|
|
||||||
friend class PrimitivesSucceededStreamer;
|
friend class PrimitivesSucceededStreamer;
|
||||||
|
|
||||||
static constexpr size_t NUM_STREAMS = 4;
|
static constexpr size_t NUM_STREAMS = 4;
|
||||||
static constexpr size_t STREAMS_MASK = (1ULL << NUM_STREAMS) - 1ULL;
|
|
||||||
|
|
||||||
QueryCacheRuntime& runtime;
|
QueryCacheRuntime& runtime;
|
||||||
const Device& device;
|
const Device& device;
|
||||||
|
@ -851,6 +861,8 @@ private:
|
||||||
std::array<VkBuffer, NUM_STREAMS> counter_buffers{};
|
std::array<VkBuffer, NUM_STREAMS> counter_buffers{};
|
||||||
std::array<VkDeviceSize, NUM_STREAMS> offsets{};
|
std::array<VkDeviceSize, NUM_STREAMS> offsets{};
|
||||||
std::array<VAddr, NUM_STREAMS> last_queries;
|
std::array<VAddr, NUM_STREAMS> last_queries;
|
||||||
|
std::array<size_t, NUM_STREAMS> last_queries_stride;
|
||||||
|
Maxwell3D::Regs::PrimitiveTopology out_topology;
|
||||||
u64 streams_mask;
|
u64 streams_mask;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -858,32 +870,34 @@ class PrimitivesQueryBase : public VideoCommon::QueryBase {
|
||||||
public:
|
public:
|
||||||
// Default constructor
|
// Default constructor
|
||||||
PrimitivesQueryBase()
|
PrimitivesQueryBase()
|
||||||
: VideoCommon::QueryBase(0, VideoCommon::QueryFlagBits::IsHostManaged, 0), stride{},
|
: VideoCommon::QueryBase(0, VideoCommon::QueryFlagBits::IsHostManaged, 0) {}
|
||||||
dependant_index{}, dependant_manage{} {}
|
|
||||||
|
|
||||||
// Parameterized constructor
|
// Parameterized constructor
|
||||||
PrimitivesQueryBase(bool is_long, VAddr address)
|
PrimitivesQueryBase(bool has_timestamp, VAddr address)
|
||||||
: VideoCommon::QueryBase(address, VideoCommon::QueryFlagBits::IsHostManaged, 0), stride{},
|
: VideoCommon::QueryBase(address, VideoCommon::QueryFlagBits::IsHostManaged, 0) {
|
||||||
dependant_index{}, dependant_manage{} {
|
if (has_timestamp) {
|
||||||
if (is_long) {
|
|
||||||
flags |= VideoCommon::QueryFlagBits::HasTimestamp;
|
flags |= VideoCommon::QueryFlagBits::HasTimestamp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 stride;
|
u64 stride{};
|
||||||
VAddr dependant_address;
|
VAddr dependant_address{};
|
||||||
size_t dependant_index;
|
Maxwell3D::Regs::PrimitiveTopology topology{Maxwell3D::Regs::PrimitiveTopology::Points};
|
||||||
bool dependant_manage;
|
size_t dependant_index{};
|
||||||
|
bool dependant_manage{};
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Traits>
|
|
||||||
class PrimitivesSucceededStreamer : public VideoCommon::SimpleStreamer<PrimitivesQueryBase> {
|
class PrimitivesSucceededStreamer : public VideoCommon::SimpleStreamer<PrimitivesQueryBase> {
|
||||||
public:
|
public:
|
||||||
PrimitivesSucceededStreamer(size_t id, QueryCacheRuntime& runtime_,
|
explicit PrimitivesSucceededStreamer(size_t id_, QueryCacheRuntime& runtime_,
|
||||||
TFBCounterStreamer<QueryCacheParams>& tfb_streamer_, Core::Memory::Memory& cpu_memory_)
|
TFBCounterStreamer& tfb_streamer_,
|
||||||
: VideoCommon::SimpleStreamer<PrimitivesQueryBase>(
|
Core::Memory::Memory& cpu_memory_)
|
||||||
id, 1ULL << static_cast<u64>(VideoCommon::QueryType::StreamingByteCount)),
|
: VideoCommon::SimpleStreamer<PrimitivesQueryBase>(id_), runtime{runtime_},
|
||||||
runtime{runtime_}, tfb_streamer{tfb_streamer_}, cpu_memory{cpu_memory_} {}
|
tfb_streamer{tfb_streamer_}, cpu_memory{cpu_memory_} {
|
||||||
|
MakeDependent(&tfb_streamer);
|
||||||
|
}
|
||||||
|
|
||||||
|
~PrimitivesSucceededStreamer() = default;
|
||||||
|
|
||||||
size_t WriteCounter(VAddr address, bool has_timestamp, u32 value,
|
size_t WriteCounter(VAddr address, bool has_timestamp, u32 value,
|
||||||
std::optional<u32> subreport_) override {
|
std::optional<u32> subreport_) override {
|
||||||
|
@ -901,8 +915,11 @@ public:
|
||||||
const size_t subreport = static_cast<size_t>(*subreport_);
|
const size_t subreport = static_cast<size_t>(*subreport_);
|
||||||
auto dependant_address_opt = tfb_streamer.GetLastQueryStream(subreport);
|
auto dependant_address_opt = tfb_streamer.GetLastQueryStream(subreport);
|
||||||
bool must_manage_dependance = false;
|
bool must_manage_dependance = false;
|
||||||
|
new_query->topology = tfb_streamer.GetOutputTopology();
|
||||||
if (dependant_address_opt) {
|
if (dependant_address_opt) {
|
||||||
new_query->dependant_address = *dependant_address_opt;
|
auto [dep_address, stride] = *dependant_address_opt;
|
||||||
|
new_query->dependant_address = dep_address;
|
||||||
|
new_query->stride = stride;
|
||||||
} else {
|
} else {
|
||||||
new_query->dependant_index =
|
new_query->dependant_index =
|
||||||
tfb_streamer.WriteCounter(address, has_timestamp, value, subreport_);
|
tfb_streamer.WriteCounter(address, has_timestamp, value, subreport_);
|
||||||
|
@ -917,25 +934,28 @@ public:
|
||||||
}
|
}
|
||||||
return index;
|
return index;
|
||||||
}
|
}
|
||||||
|
new_query->stride = 1;
|
||||||
|
runtime.View3DRegs([new_query, subreport](Maxwell3D& maxwell3d) {
|
||||||
|
for (size_t i = 0; i < Maxwell3D::Regs::NumTransformFeedbackBuffers; i++) {
|
||||||
|
const auto& tf = maxwell3d.regs.transform_feedback;
|
||||||
|
if (tf.buffers[i].enable == 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (tf.controls[i].stream != subreport) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
new_query->stride = tf.controls[i].stride;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
new_query->dependant_manage = must_manage_dependance;
|
new_query->dependant_manage = must_manage_dependance;
|
||||||
runtime.View3DRegs([new_query, subreport](Tegra::Engines::Maxwell3D::Regs& regs) {
|
|
||||||
for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers;
|
|
||||||
i++) {
|
|
||||||
const auto& tf = regs.transform_feedback;
|
|
||||||
if (tf.controls[i].stream != subreport) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
new_query->stride = tf.controls[i].stride;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
pending_flush_queries.push_back(index);
|
pending_flush_queries.push_back(index);
|
||||||
return index;
|
return index;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool HasUnsyncedQueries() override {
|
bool HasUnsyncedQueries() const override {
|
||||||
return !pending_flush_queries.empty();
|
return !pending_flush_queries.empty();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -960,22 +980,49 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced;
|
query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced;
|
||||||
|
u64 num_vertices = 0;
|
||||||
if (query->dependant_manage) {
|
if (query->dependant_manage) {
|
||||||
auto* dependant_query = tfb_streamer.GetQuery(query->dependant_index);
|
auto* dependant_query = tfb_streamer.GetQuery(query->dependant_index);
|
||||||
query->value = dependant_query->value / query->stride;
|
num_vertices = dependant_query->value / query->stride;
|
||||||
tfb_streamer.Free(query->dependant_index);
|
tfb_streamer.Free(query->dependant_index);
|
||||||
} else {
|
} else {
|
||||||
u8* pointer = cpu_memory.GetPointer(query->dependant_address);
|
u8* pointer = cpu_memory.GetPointer(query->dependant_address);
|
||||||
u32 result;
|
u32 result;
|
||||||
std::memcpy(&result, pointer, sizeof(u32));
|
std::memcpy(&result, pointer, sizeof(u32));
|
||||||
query->value = static_cast<u64>(result) / query->stride;
|
num_vertices = static_cast<u64>(result) / query->stride;
|
||||||
}
|
}
|
||||||
|
query->value = [&]() -> u64 {
|
||||||
|
switch (query->topology) {
|
||||||
|
case Maxwell3D::Regs::PrimitiveTopology::Points:
|
||||||
|
return num_vertices;
|
||||||
|
case Maxwell3D::Regs::PrimitiveTopology::Lines:
|
||||||
|
return num_vertices / 2;
|
||||||
|
case Maxwell3D::Regs::PrimitiveTopology::LineLoop:
|
||||||
|
return (num_vertices / 2) + 1;
|
||||||
|
case Maxwell3D::Regs::PrimitiveTopology::LineStrip:
|
||||||
|
return num_vertices - 1;
|
||||||
|
case Maxwell3D::Regs::PrimitiveTopology::Patches:
|
||||||
|
case Maxwell3D::Regs::PrimitiveTopology::Triangles:
|
||||||
|
case Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency:
|
||||||
|
return num_vertices / 3;
|
||||||
|
case Maxwell3D::Regs::PrimitiveTopology::TriangleFan:
|
||||||
|
case Maxwell3D::Regs::PrimitiveTopology::TriangleStrip:
|
||||||
|
case Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency:
|
||||||
|
return num_vertices - 2;
|
||||||
|
case Maxwell3D::Regs::PrimitiveTopology::Quads:
|
||||||
|
return num_vertices / 4;
|
||||||
|
case Maxwell3D::Regs::PrimitiveTopology::Polygon:
|
||||||
|
return 1U;
|
||||||
|
default:
|
||||||
|
return num_vertices;
|
||||||
|
}
|
||||||
|
}();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
QueryCacheRuntime& runtime;
|
QueryCacheRuntime& runtime;
|
||||||
TFBCounterStreamer<QueryCacheParams>& tfb_streamer;
|
TFBCounterStreamer& tfb_streamer;
|
||||||
Core::Memory::Memory& cpu_memory;
|
Core::Memory::Memory& cpu_memory;
|
||||||
|
|
||||||
// syncing queue
|
// syncing queue
|
||||||
|
@ -1005,7 +1052,10 @@ struct QueryCacheRuntimeImpl {
|
||||||
tfb_streamer(static_cast<size_t>(QueryType::StreamingByteCount), runtime, device,
|
tfb_streamer(static_cast<size_t>(QueryType::StreamingByteCount), runtime, device,
|
||||||
scheduler, memory_allocator, staging_pool),
|
scheduler, memory_allocator, staging_pool),
|
||||||
primitives_succeeded_streamer(
|
primitives_succeeded_streamer(
|
||||||
static_cast<size_t>(QueryType::StreamingPrimitivesSucceeded), runtime, tfb_streamer, cpu_memory_),
|
static_cast<size_t>(QueryType::StreamingPrimitivesSucceeded), runtime, tfb_streamer,
|
||||||
|
cpu_memory_),
|
||||||
|
primitives_needed_minus_suceeded_streamer(
|
||||||
|
static_cast<size_t>(QueryType::StreamingPrimitivesNeededMinusSucceeded), runtime, 0u),
|
||||||
hcr_setup{}, hcr_is_set{}, is_hcr_running{} {
|
hcr_setup{}, hcr_is_set{}, is_hcr_running{} {
|
||||||
|
|
||||||
hcr_setup.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT;
|
hcr_setup.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT;
|
||||||
|
@ -1040,9 +1090,10 @@ struct QueryCacheRuntimeImpl {
|
||||||
|
|
||||||
// Streamers
|
// Streamers
|
||||||
VideoCommon::GuestStreamer<QueryCacheParams> guest_streamer;
|
VideoCommon::GuestStreamer<QueryCacheParams> guest_streamer;
|
||||||
SamplesStreamer<QueryCacheParams> sample_streamer;
|
SamplesStreamer sample_streamer;
|
||||||
TFBCounterStreamer<QueryCacheParams> tfb_streamer;
|
TFBCounterStreamer tfb_streamer;
|
||||||
PrimitivesSucceededStreamer<QueryCacheParams> primitives_succeeded_streamer;
|
PrimitivesSucceededStreamer primitives_succeeded_streamer;
|
||||||
|
VideoCommon::StubStreamer<QueryCacheParams> primitives_needed_minus_suceeded_streamer;
|
||||||
|
|
||||||
std::vector<std::pair<VAddr, VAddr>> little_cache;
|
std::vector<std::pair<VAddr, VAddr>> little_cache;
|
||||||
std::vector<std::pair<VkBuffer, VkDeviceSize>> buffers_to_upload_to;
|
std::vector<std::pair<VkBuffer, VkDeviceSize>> buffers_to_upload_to;
|
||||||
|
@ -1059,7 +1110,7 @@ struct QueryCacheRuntimeImpl {
|
||||||
bool is_hcr_running;
|
bool is_hcr_running;
|
||||||
|
|
||||||
// maxwell3d
|
// maxwell3d
|
||||||
Tegra::Engines::Maxwell3D* maxwell3d;
|
Maxwell3D* maxwell3d;
|
||||||
};
|
};
|
||||||
|
|
||||||
QueryCacheRuntime::QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer,
|
QueryCacheRuntime::QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer,
|
||||||
|
@ -1074,13 +1125,13 @@ QueryCacheRuntime::QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer,
|
||||||
staging_pool_, compute_pass_descriptor_queue, descriptor_pool);
|
staging_pool_, compute_pass_descriptor_queue, descriptor_pool);
|
||||||
}
|
}
|
||||||
|
|
||||||
void QueryCacheRuntime::Bind3DEngine(Tegra::Engines::Maxwell3D* maxwell3d) {
|
void QueryCacheRuntime::Bind3DEngine(Maxwell3D* maxwell3d) {
|
||||||
impl->maxwell3d = maxwell3d;
|
impl->maxwell3d = maxwell3d;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Func>
|
template <typename Func>
|
||||||
void QueryCacheRuntime::View3DRegs(Func&& func) {
|
void QueryCacheRuntime::View3DRegs(Func&& func) {
|
||||||
func(impl->maxwell3d->regs);
|
func(*impl->maxwell3d);
|
||||||
}
|
}
|
||||||
|
|
||||||
void QueryCacheRuntime::EndHostConditionalRendering() {
|
void QueryCacheRuntime::EndHostConditionalRendering() {
|
||||||
|
@ -1240,8 +1291,12 @@ VideoCommon::StreamerInterface* QueryCacheRuntime::GetStreamerInterface(QueryTyp
|
||||||
return &impl->sample_streamer;
|
return &impl->sample_streamer;
|
||||||
case QueryType::StreamingByteCount:
|
case QueryType::StreamingByteCount:
|
||||||
return &impl->tfb_streamer;
|
return &impl->tfb_streamer;
|
||||||
|
case QueryType::StreamingPrimitivesNeeded:
|
||||||
|
case QueryType::VtgPrimitivesOut:
|
||||||
case QueryType::StreamingPrimitivesSucceeded:
|
case QueryType::StreamingPrimitivesSucceeded:
|
||||||
return &impl->primitives_succeeded_streamer;
|
return &impl->primitives_succeeded_streamer;
|
||||||
|
case QueryType::StreamingPrimitivesNeededMinusSucceeded:
|
||||||
|
return &impl->primitives_needed_minus_suceeded_streamer;
|
||||||
default:
|
default:
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
|
@ -49,7 +49,8 @@ public:
|
||||||
bool HostConditionalRenderingCompareValue(VideoCommon::LookupData object_1, bool qc_dirty);
|
bool HostConditionalRenderingCompareValue(VideoCommon::LookupData object_1, bool qc_dirty);
|
||||||
|
|
||||||
bool HostConditionalRenderingCompareValues(VideoCommon::LookupData object_1,
|
bool HostConditionalRenderingCompareValues(VideoCommon::LookupData object_1,
|
||||||
VideoCommon::LookupData object_2, bool qc_dirty, bool equal_check);
|
VideoCommon::LookupData object_2, bool qc_dirty,
|
||||||
|
bool equal_check);
|
||||||
|
|
||||||
VideoCommon::StreamerInterface* GetStreamerInterface(VideoCommon::QueryType query_type);
|
VideoCommon::StreamerInterface* GetStreamerInterface(VideoCommon::QueryType query_type);
|
||||||
|
|
||||||
|
@ -66,7 +67,7 @@ private:
|
||||||
};
|
};
|
||||||
|
|
||||||
struct QueryCacheParams {
|
struct QueryCacheParams {
|
||||||
using RuntimeType = Vulkan::QueryCacheRuntime;
|
using RuntimeType = typename Vulkan::QueryCacheRuntime;
|
||||||
};
|
};
|
||||||
|
|
||||||
using QueryCache = VideoCommon::QueryCacheBase<QueryCacheParams>;
|
using QueryCache = VideoCommon::QueryCacheBase<QueryCacheParams>;
|
||||||
|
|
|
@ -194,15 +194,6 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
|
||||||
|
|
||||||
query_cache.NotifySegment(true);
|
query_cache.NotifySegment(true);
|
||||||
|
|
||||||
#if ANDROID
|
|
||||||
if (Settings::IsGPULevelHigh()) {
|
|
||||||
// This is problematic on Android, disable on GPU Normal.
|
|
||||||
// query_cache.UpdateCounters();
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
// query_cache.UpdateCounters();
|
|
||||||
#endif
|
|
||||||
|
|
||||||
GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()};
|
GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()};
|
||||||
if (!pipeline) {
|
if (!pipeline) {
|
||||||
return;
|
return;
|
||||||
|
@ -294,15 +285,6 @@ void RasterizerVulkan::DrawTexture() {
|
||||||
|
|
||||||
query_cache.NotifySegment(true);
|
query_cache.NotifySegment(true);
|
||||||
|
|
||||||
#if ANDROID
|
|
||||||
if (Settings::IsGPULevelHigh()) {
|
|
||||||
// This is problematic on Android, disable on GPU Normal.
|
|
||||||
// query_cache.UpdateCounters();
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
// query_cache.UpdateCounters();
|
|
||||||
#endif
|
|
||||||
|
|
||||||
texture_cache.SynchronizeGraphicsDescriptors();
|
texture_cache.SynchronizeGraphicsDescriptors();
|
||||||
texture_cache.UpdateRenderTargets(false);
|
texture_cache.UpdateRenderTargets(false);
|
||||||
|
|
||||||
|
@ -332,15 +314,6 @@ void RasterizerVulkan::Clear(u32 layer_count) {
|
||||||
FlushWork();
|
FlushWork();
|
||||||
gpu_memory->FlushCaching();
|
gpu_memory->FlushCaching();
|
||||||
|
|
||||||
#if ANDROID
|
|
||||||
if (Settings::IsGPULevelHigh()) {
|
|
||||||
// This is problematic on Android, disable on GPU Normal.
|
|
||||||
// query_cache.UpdateCounters();
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
// query_cache.UpdateCounters();
|
|
||||||
#endif
|
|
||||||
|
|
||||||
query_cache.NotifySegment(true);
|
query_cache.NotifySegment(true);
|
||||||
query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64,
|
query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64,
|
||||||
maxwell3d->regs.zpass_pixel_count_enable);
|
maxwell3d->regs.zpass_pixel_count_enable);
|
||||||
|
|
|
@ -85,7 +85,8 @@ public:
|
||||||
void Clear(u32 layer_count) override;
|
void Clear(u32 layer_count) override;
|
||||||
void DispatchCompute() override;
|
void DispatchCompute() override;
|
||||||
void ResetCounter(VideoCommon::QueryType type) override;
|
void ResetCounter(VideoCommon::QueryType type) override;
|
||||||
void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override;
|
void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type,
|
||||||
|
VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override;
|
||||||
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
|
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
|
||||||
void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
|
void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
|
||||||
void FlushAll() override;
|
void FlushAll() override;
|
||||||
|
|
|
@ -15,9 +15,13 @@
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "common/polyfill_thread.h"
|
#include "common/polyfill_thread.h"
|
||||||
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
|
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
|
||||||
#include "video_core/renderer_vulkan/vk_query_cache.h"
|
|
||||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||||
|
|
||||||
|
namespace VideoCommon {
|
||||||
|
template <typename Trait>
|
||||||
|
class QueryCacheBase;
|
||||||
|
}
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
class CommandPool;
|
class CommandPool;
|
||||||
|
@ -26,6 +30,8 @@ class Framebuffer;
|
||||||
class GraphicsPipeline;
|
class GraphicsPipeline;
|
||||||
class StateTracker;
|
class StateTracker;
|
||||||
|
|
||||||
|
struct QueryCacheParams;
|
||||||
|
|
||||||
/// The scheduler abstracts command buffer and fence management with an interface that's able to do
|
/// The scheduler abstracts command buffer and fence management with an interface that's able to do
|
||||||
/// OpenGL-like operations on Vulkan command buffers.
|
/// OpenGL-like operations on Vulkan command buffers.
|
||||||
class Scheduler {
|
class Scheduler {
|
||||||
|
@ -63,7 +69,7 @@ public:
|
||||||
void InvalidateState();
|
void InvalidateState();
|
||||||
|
|
||||||
/// Assigns the query cache.
|
/// Assigns the query cache.
|
||||||
void SetQueryCache(QueryCache& query_cache_) {
|
void SetQueryCache(VideoCommon::QueryCacheBase<QueryCacheParams>& query_cache_) {
|
||||||
query_cache = &query_cache_;
|
query_cache = &query_cache_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -219,7 +225,7 @@ private:
|
||||||
std::unique_ptr<MasterSemaphore> master_semaphore;
|
std::unique_ptr<MasterSemaphore> master_semaphore;
|
||||||
std::unique_ptr<CommandPool> command_pool;
|
std::unique_ptr<CommandPool> command_pool;
|
||||||
|
|
||||||
QueryCache* query_cache = nullptr;
|
VideoCommon::QueryCacheBase<QueryCacheParams>* query_cache = nullptr;
|
||||||
|
|
||||||
vk::CommandBuffer current_cmdbuf;
|
vk::CommandBuffer current_cmdbuf;
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue