From c31382ced54c07650ae41fa2f75dc53da894784e Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Tue, 11 Feb 2020 16:02:41 -0300
Subject: [PATCH] query_cache: Abstract OpenGL implementation

Abstract the current OpenGL implementation into the VideoCommon
namespace and reimplement it on top of that. Doing this avoids repeating
code and logic in the Vulkan implementation.
---
 src/video_core/CMakeLists.txt                 |   1 +
 src/video_core/query_cache.h                  | 323 ++++++++++++++++++
 .../renderer_opengl/gl_query_cache.cpp        | 287 +++-------------
 .../renderer_opengl/gl_query_cache.h          | 122 ++-----
 4 files changed, 394 insertions(+), 339 deletions(-)
 create mode 100644 src/video_core/query_cache.h
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 3208f4993d..bb5895e992 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -37,6 +37,7 @@ add_library(video_core STATIC
     memory_manager.h
     morton.cpp
     morton.h
+    query_cache.h
     rasterizer_accelerated.cpp
     rasterizer_accelerated.h
     rasterizer_cache.cpp
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
new file mode 100644
index 0000000000..4c9151ce8c
--- /dev/null
+++ b/src/video_core/query_cache.h
@@ -0,0 +1,323 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <algorithm>
+#include <array>
+#include <cstring>
+#include <iterator>
+#include <memory>
+#include <optional>
+#include <unordered_map>
+#include <vector>
+
+#include "common/assert.h"
+#include "core/core.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/gpu.h"
+#include "video_core/memory_manager.h"
+#include "video_core/rasterizer_interface.h"
+
+namespace VideoCommon {
+
+template <class QueryCache, class HostCounter>
+class CounterStreamBase {
+public:
+    explicit CounterStreamBase(QueryCache& cache, VideoCore::QueryType type)
+        : cache{cache}, type{type} {}
+
+    /// Updates the state of the stream, enabling or disabling as needed.
+    void Update(bool enabled) {
+        if (enabled) {
+            Enable();
+        } else {
+            Disable();
+        }
+    }
+
+    /// Resets the stream to zero. It doesn't disable the query after resetting.
+    void Reset() {
+        if (current) {
+            current->EndQuery();
+
+            // Immediately start a new query to avoid disabling its state.
+            current = cache.Counter(nullptr, type);
+        }
+        last = nullptr;
+    }
+
+    /// Returns the current counter slicing as needed.
+    std::shared_ptr<HostCounter> Current() {
+        if (!current) {
+            return nullptr;
+        }
+        current->EndQuery();
+        last = std::move(current);
+        current = cache.Counter(last, type);
+        return last;
+    }
+
+    /// Returns true when the counter stream is enabled.
+    bool IsEnabled() const {
+        return static_cast<bool>(current);
+    }
+
+private:
+    /// Enables the stream.
+    void Enable() {
+        if (current) {
+            return;
+        }
+        current = cache.Counter(last, type);
+    }
+
+    // Disables the stream.
+    void Disable() {
+        if (current) {
+            current->EndQuery();
+        }
+        last = std::exchange(current, nullptr);
+    }
+
+    QueryCache& cache;
+    const VideoCore::QueryType type;
+
+    std::shared_ptr<HostCounter> current;
+    std::shared_ptr<HostCounter> last;
+};
+
+template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter>
+class QueryCacheBase {
+public:
+    explicit QueryCacheBase(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
+        : system{system}, rasterizer{rasterizer}, streams{{CounterStream{
+                                                      static_cast<QueryCache&>(*this),
+                                                      VideoCore::QueryType::SamplesPassed}}} {}
+
+    void InvalidateRegion(CacheAddr addr, std::size_t size) {
+        FlushAndRemoveRegion(addr, size);
+    }
+
+    void FlushRegion(CacheAddr addr, std::size_t size) {
+        FlushAndRemoveRegion(addr, size);
+    }
+
+    /**
+     * Records a query in GPU mapped memory, potentially marked with a timestamp.
+     * @param gpu_addr  GPU address to flush to when the mapped memory is read.
+     * @param type      Query type, e.g. SamplesPassed.
+     * @param timestamp Timestamp, when empty the flushed query is assumed to be short.
+     */
+    void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) {
+        auto& memory_manager = system.GPU().MemoryManager();
+        const auto host_ptr = memory_manager.GetPointer(gpu_addr);
+
+        CachedQuery* query = TryGet(ToCacheAddr(host_ptr));
+        if (!query) {
+            const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
+            ASSERT_OR_EXECUTE(cpu_addr, return;);
+
+            query = Register(type, *cpu_addr, host_ptr, timestamp.has_value());
+        }
+
+        query->BindCounter(Stream(type).Current(), timestamp);
+    }
+
+    /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
+    void UpdateCounters() {
+        const auto& regs = system.GPU().Maxwell3D().regs;
+        Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable);
+    }
+
+    /// Resets a counter to zero. It doesn't disable the query after resetting.
+    void ResetCounter(VideoCore::QueryType type) {
+        Stream(type).Reset();
+    }
+
+    /// Returns a new host counter.
+    std::shared_ptr<HostCounter> Counter(std::shared_ptr<HostCounter> dependency,
+                                         VideoCore::QueryType type) {
+        return std::make_shared<HostCounter>(static_cast<QueryCache&>(*this), std::move(dependency),
+                                             type);
+    }
+
+    /// Returns the counter stream of the specified type.
+    CounterStream& Stream(VideoCore::QueryType type) {
+        return streams[static_cast<std::size_t>(type)];
+    }
+
+private:
+    /// Flushes a memory range to guest memory and removes it from the cache.
+    void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) {
+        const u64 addr_begin = static_cast<u64>(addr);
+        const u64 addr_end = addr_begin + static_cast<u64>(size);
+        const auto in_range = [addr_begin, addr_end](CachedQuery& query) {
+            const u64 cache_begin = query.CacheAddr();
+            const u64 cache_end = cache_begin + query.SizeInBytes();
+            return cache_begin < addr_end && addr_begin < cache_end;
+        };
+
+        const u64 page_end = addr_end >> PAGE_SHIFT;
+        for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) {
+            const auto& it = cached_queries.find(page);
+            if (it == std::end(cached_queries)) {
+                continue;
+            }
+            auto& contents = it->second;
+            for (auto& query : contents) {
+                if (!in_range(query)) {
+                    continue;
+                }
+                rasterizer.UpdatePagesCachedCount(query.CpuAddr(), query.SizeInBytes(), -1);
+                query.Flush();
+            }
+            contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range),
+                           std::end(contents));
+        }
+    }
+
+    /// Registers the passed parameters as cached and returns a pointer to the stored cached query.
+    CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) {
+        rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1);
+        const u64 page = static_cast<u64>(ToCacheAddr(host_ptr)) >> PAGE_SHIFT;
+        return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr,
+                                                  host_ptr);
+    }
+
+    /// Tries to a get a cached query. Returns nullptr on failure.
+    CachedQuery* TryGet(CacheAddr addr) {
+        const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT;
+        const auto it = cached_queries.find(page);
+        if (it == std::end(cached_queries)) {
+            return nullptr;
+        }
+        auto& contents = it->second;
+        const auto found = std::find_if(std::begin(contents), std::end(contents),
+                                        [addr](auto& query) { return query.CacheAddr() == addr; });
+        return found != std::end(contents) ? &*found : nullptr;
+    }
+
+    static constexpr std::uintptr_t PAGE_SIZE = 4096;
+    static constexpr int PAGE_SHIFT = 12;
+
+    Core::System& system;
+    VideoCore::RasterizerInterface& rasterizer;
+
+    std::unordered_map<u64, std::vector<CachedQuery>> cached_queries;
+
+    std::array<CounterStream, VideoCore::NumQueryTypes> streams;
+};
+
+template <class QueryCache, class HostCounter>
+class HostCounterBase {
+public:
+    explicit HostCounterBase(std::shared_ptr<HostCounter> dependency)
+        : dependency{std::move(dependency)} {}
+
+    /// Returns the current value of the query.
+    u64 Query() {
+        if (result) {
+            return *result;
+        }
+
+        u64 value = BlockingQuery();
+        if (dependency) {
+            value += dependency->Query();
+        }
+
+        return *(result = value);
+    }
+
+    /// Returns true when flushing this query will potentially wait.
+    bool WaitPending() const noexcept {
+        return result.has_value();
+    }
+
+protected:
+    /// Returns the value of query from the backend API blocking as needed.
+    virtual u64 BlockingQuery() const = 0;
+
+private:
+    std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value.
+    std::optional<u64> result;               ///< Filled with the already returned value.
+};
+
+template <class HostCounter>
+class CachedQueryBase {
+public:
+    explicit CachedQueryBase(VAddr cpu_addr, u8* host_ptr)
+        : cpu_addr{cpu_addr}, host_ptr{host_ptr} {}
+
+    CachedQueryBase(CachedQueryBase&& rhs) noexcept
+        : cpu_addr{rhs.cpu_addr}, host_ptr{rhs.host_ptr}, counter{std::move(rhs.counter)},
+          timestamp{rhs.timestamp} {}
+
+    CachedQueryBase(const CachedQueryBase&) = delete;
+
+    CachedQueryBase& operator=(CachedQueryBase&& rhs) noexcept {
+        cpu_addr = rhs.cpu_addr;
+        host_ptr = rhs.host_ptr;
+        counter = std::move(rhs.counter);
+        timestamp = rhs.timestamp;
+        return *this;
+    }
+
+    /// Flushes the query to guest memory.
+    virtual void Flush() {
+        // When counter is nullptr it means that it's just been reseted. We are supposed to write a
+        // zero in these cases.
+        const u64 value = counter ? counter->Query() : 0;
+        std::memcpy(host_ptr, &value, sizeof(u64));
+
+        if (timestamp) {
+            std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64));
+        }
+    }
+
+    /// Binds a counter to this query.
+    void BindCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) {
+        if (counter) {
+            // If there's an old counter set it means the query is being rewritten by the game.
+            // To avoid losing the data forever, flush here.
+            Flush();
+        }
+        counter = std::move(counter_);
+        timestamp = timestamp_;
+    }
+
+    VAddr CpuAddr() const noexcept {
+        return cpu_addr;
+    }
+
+    CacheAddr CacheAddr() const noexcept {
+        return ToCacheAddr(host_ptr);
+    }
+
+    u64 SizeInBytes() const noexcept {
+        return SizeInBytes(timestamp.has_value());
+    }
+
+    static u64 SizeInBytes(bool with_timestamp) {
+        return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE;
+    }
+
+protected:
+    /// Returns true when querying the counter may potentially block.
+    bool WaitPending() const noexcept {
+        return counter && counter->WaitPending();
+    }
+
+private:
+    static constexpr std::size_t SMALL_QUERY_SIZE = 8;   // Query size without timestamp.
+    static constexpr std::size_t LARGE_QUERY_SIZE = 16;  // Query size with timestamp.
+    static constexpr std::intptr_t TIMESTAMP_OFFSET = 8; // Timestamp offset in a large query.
+
+    VAddr cpu_addr;                       ///< Guest CPU address.
+    u8* host_ptr;                         ///< Writable host pointer.
+    std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree.
+    std::optional<u64> timestamp;         ///< Timestamp to flush to guest memory.
+};
+
+} // namespace VideoCommon
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp
index 74cb732097..7d5a044c71 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_query_cache.cpp
@@ -20,211 +20,49 @@
 
 namespace OpenGL {
 
-using VideoCore::QueryType;
-
 namespace {
 
-constexpr std::uintptr_t PAGE_SIZE = 4096;
-constexpr int PAGE_SHIFT = 12;
-
-constexpr std::size_t SMALL_QUERY_SIZE = 8;  // Query size without timestamp
-constexpr std::size_t LARGE_QUERY_SIZE = 16; // Query size with timestamp
-constexpr std::ptrdiff_t TIMESTAMP_OFFSET = 8;
-
 constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED};
 
-constexpr GLenum GetTarget(QueryType type) {
+constexpr GLenum GetTarget(VideoCore::QueryType type) {
     return QueryTargets[static_cast<std::size_t>(type)];
 }
 
 } // Anonymous namespace
 
-CounterStream::CounterStream(QueryCache& cache, QueryType type)
-    : cache{cache}, type{type}, target{GetTarget(type)} {}
-
-CounterStream::~CounterStream() = default;
-
-void CounterStream::Update(bool enabled, bool any_command_queued) {
-    if (enabled) {
-        Enable();
-    } else {
-        Disable(any_command_queued);
-    }
-}
-
-void CounterStream::Reset(bool any_command_queued) {
-    if (current) {
-        EndQuery(any_command_queued);
-
-        // Immediately start a new query to avoid disabling its state.
-        current = cache.GetHostCounter(nullptr, type);
-    }
-    last = nullptr;
-}
-
-std::shared_ptr<HostCounter> CounterStream::GetCurrent(bool any_command_queued) {
-    if (!current) {
-        return nullptr;
-    }
-    EndQuery(any_command_queued);
-    last = std::move(current);
-    current = cache.GetHostCounter(last, type);
-    return last;
-}
-
-void CounterStream::Enable() {
-    if (current) {
-        return;
-    }
-    current = cache.GetHostCounter(last, type);
-}
-
-void CounterStream::Disable(bool any_command_queued) {
-    if (current) {
-        EndQuery(any_command_queued);
-    }
-    last = std::exchange(current, nullptr);
-}
-
-void CounterStream::EndQuery(bool any_command_queued) {
-    if (!any_command_queued) {
-        // There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not
-        // having any of these causes a lock. glFlush is considered a command, so we can safely wait
-        // for this. Insert to the OpenGL command stream a flush.
-        glFlush();
-    }
-    glEndQuery(target);
-}
-
-QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& rasterizer)
-    : system{system}, rasterizer{rasterizer}, streams{{CounterStream{*this,
-                                                                     QueryType::SamplesPassed}}} {}
+QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& gl_rasterizer)
+    : VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream,
+                                  HostCounter>{system, static_cast<VideoCore::RasterizerInterface&>(
+                                                           gl_rasterizer)},
+      gl_rasterizer{gl_rasterizer} {}
 
 QueryCache::~QueryCache() = default;
 
-void QueryCache::InvalidateRegion(CacheAddr addr, std::size_t size) {
-    const u64 addr_begin = static_cast<u64>(addr);
-    const u64 addr_end = addr_begin + static_cast<u64>(size);
-    const auto in_range = [addr_begin, addr_end](CachedQuery& query) {
-        const u64 cache_begin = query.GetCacheAddr();
-        const u64 cache_end = cache_begin + query.GetSizeInBytes();
-        return cache_begin < addr_end && addr_begin < cache_end;
-    };
-
-    const u64 page_end = addr_end >> PAGE_SHIFT;
-    for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) {
-        const auto& it = cached_queries.find(page);
-        if (it == std::end(cached_queries)) {
-            continue;
-        }
-        auto& contents = it->second;
-        for (auto& query : contents) {
-            if (!in_range(query)) {
-                continue;
-            }
-            rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.GetSizeInBytes(), -1);
-            Flush(query);
-        }
-        contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range),
-                       std::end(contents));
-    }
-}
-
-void QueryCache::FlushRegion(CacheAddr addr, std::size_t size) {
-    // We can handle flushes in the same way as invalidations.
-    InvalidateRegion(addr, size);
-}
-
-void QueryCache::Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) {
-    auto& memory_manager = system.GPU().MemoryManager();
-    const auto host_ptr = memory_manager.GetPointer(gpu_addr);
-
-    CachedQuery* query = TryGet(ToCacheAddr(host_ptr));
-    if (!query) {
-        const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
-        ASSERT_OR_EXECUTE(cpu_addr, return;);
-
-        query = &Register(CachedQuery(type, *cpu_addr, host_ptr));
-    }
-
-    query->SetCounter(GetStream(type).GetCurrent(rasterizer.AnyCommandQueued()), timestamp);
-}
-
-void QueryCache::UpdateCounters() {
-    auto& samples_passed = GetStream(QueryType::SamplesPassed);
-
-    const auto& regs = system.GPU().Maxwell3D().regs;
-    samples_passed.Update(regs.samplecnt_enable, rasterizer.AnyCommandQueued());
-}
-
-void QueryCache::ResetCounter(QueryType type) {
-    GetStream(type).Reset(rasterizer.AnyCommandQueued());
-}
-
-void QueryCache::Reserve(QueryType type, OGLQuery&& query) {
-    reserved_queries[static_cast<std::size_t>(type)].push_back(std::move(query));
-}
-
-std::shared_ptr<HostCounter> QueryCache::GetHostCounter(std::shared_ptr<HostCounter> dependency,
-                                                        QueryType type) {
-    auto& reserve = reserved_queries[static_cast<std::size_t>(type)];
+OGLQuery QueryCache::AllocateQuery(VideoCore::QueryType type) {
+    auto& reserve = queries_reserve[static_cast<std::size_t>(type)];
     OGLQuery query;
     if (reserve.empty()) {
         query.Create(GetTarget(type));
-    } else {
-        query = std::move(reserve.back());
-        reserve.pop_back();
+        return query;
     }
 
-    return std::make_shared<HostCounter>(*this, std::move(dependency), type, std::move(query));
+    query = std::move(reserve.back());
+    reserve.pop_back();
+    return query;
 }
 
-CachedQuery& QueryCache::Register(CachedQuery&& cached_query) {
-    const u64 page = static_cast<u64>(cached_query.GetCacheAddr()) >> PAGE_SHIFT;
-    auto& stored_ref = cached_queries[page].emplace_back(std::move(cached_query));
-    rasterizer.UpdatePagesCachedCount(stored_ref.GetCpuAddr(), stored_ref.GetSizeInBytes(), 1);
-    return stored_ref;
+void QueryCache::Reserve(VideoCore::QueryType type, OGLQuery&& query) {
+    queries_reserve[static_cast<std::size_t>(type)].push_back(std::move(query));
 }
 
-CachedQuery* QueryCache::TryGet(CacheAddr addr) {
-    const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT;
-    const auto it = cached_queries.find(page);
-    if (it == std::end(cached_queries)) {
-        return nullptr;
-    }
-    auto& contents = it->second;
-    const auto found =
-        std::find_if(std::begin(contents), std::end(contents),
-                     [addr](const auto& query) { return query.GetCacheAddr() == addr; });
-    return found != std::end(contents) ? &*found : nullptr;
+bool QueryCache::AnyCommandQueued() const noexcept {
+    return gl_rasterizer.AnyCommandQueued();
 }
 
-void QueryCache::Flush(CachedQuery& cached_query) {
-    auto& stream = GetStream(cached_query.GetType());
-
-    // Waiting for a query while another query of the same target is enabled locks Nvidia's driver.
-    // To avoid this disable and re-enable keeping the dependency stream.
-    // But we only have to do this if we have pending waits to be done.
-    const bool slice_counter = stream.IsEnabled() && cached_query.WaitPending();
-    const bool any_command_queued = rasterizer.AnyCommandQueued();
-    if (slice_counter) {
-        stream.Update(false, any_command_queued);
-    }
-
-    cached_query.Flush();
-
-    if (slice_counter) {
-        stream.Update(true, any_command_queued);
-    }
-}
-
-CounterStream& QueryCache::GetStream(QueryType type) {
-    return streams[static_cast<std::size_t>(type)];
-}
-
-HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, QueryType type,
-                         OGLQuery&& query_)
-    : cache{cache}, type{type}, dependency{std::move(dependency)}, query{std::move(query_)} {
+HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency,
+                         VideoCore::QueryType type)
+    : VideoCommon::HostCounterBase<QueryCache, HostCounter>{std::move(dependency)}, cache{cache},
+      type{type}, query{cache.AllocateQuery(type)} {
     glBeginQuery(GetTarget(type), query.handle);
 }
 
@@ -232,81 +70,50 @@ HostCounter::~HostCounter() {
     cache.Reserve(type, std::move(query));
 }
 
-u64 HostCounter::Query() {
-    if (result) {
-        return *result;
+void HostCounter::EndQuery() {
+    if (!cache.AnyCommandQueued()) {
+        // There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not
+        // having any of these causes a lock. glFlush is considered a command, so we can safely wait
+        // for this. Insert to the OpenGL command stream a flush.
+        glFlush();
     }
-
-    u64 value;
-    glGetQueryObjectui64v(query.handle, GL_QUERY_RESULT, &value);
-    if (dependency) {
-        value += dependency->Query();
-    }
-
-    return *(result = value);
+    glEndQuery(GetTarget(type));
 }
 
-bool HostCounter::WaitPending() const noexcept {
-    return result.has_value();
+u64 HostCounter::BlockingQuery() const {
+    GLint64 value;
+    glGetQueryObjecti64v(query.handle, GL_QUERY_RESULT, &value);
+    return static_cast<u64>(value);
 }
 
-CachedQuery::CachedQuery(QueryType type, VAddr cpu_addr, u8* host_ptr)
-    : type{type}, cpu_addr{cpu_addr}, host_ptr{host_ptr} {}
+CachedQuery::CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr)
+    : VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr}, cache{&cache}, type{type} {}
 
 CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept
-    : type{rhs.type}, cpu_addr{rhs.cpu_addr}, host_ptr{rhs.host_ptr},
-      counter{std::move(rhs.counter)}, timestamp{rhs.timestamp} {}
-
-CachedQuery::~CachedQuery() = default;
+    : VideoCommon::CachedQueryBase<HostCounter>(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {}
 
 CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept {
+    VideoCommon::CachedQueryBase<HostCounter>::operator=(std::move(rhs));
+    cache = rhs.cache;
     type = rhs.type;
-    cpu_addr = rhs.cpu_addr;
-    host_ptr = rhs.host_ptr;
-    counter = std::move(rhs.counter);
-    timestamp = rhs.timestamp;
     return *this;
 }
 
 void CachedQuery::Flush() {
-    // When counter is nullptr it means that it's just been reseted. We are supposed to write a zero
-    // in these cases.
-    const u64 value = counter ? counter->Query() : 0;
-    std::memcpy(host_ptr, &value, sizeof(u64));
-
-    if (timestamp) {
-        std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64));
+    // Waiting for a query while another query of the same target is enabled locks Nvidia's driver.
+    // To avoid this disable and re-enable keeping the dependency stream.
+    // But we only have to do this if we have pending waits to be done.
+    auto& stream = cache->Stream(type);
+    const bool slice_counter = WaitPending() && stream.IsEnabled();
+    if (slice_counter) {
+        stream.Update(false);
     }
-}
 
-void CachedQuery::SetCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) {
-    if (counter) {
-        // If there's an old counter set it means the query is being rewritten by the game.
-        // To avoid losing the data forever, flush here.
-        Flush();
+    VideoCommon::CachedQueryBase<HostCounter>::Flush();
+
+    if (slice_counter) {
+        stream.Update(true);
     }
-    counter = std::move(counter_);
-    timestamp = timestamp_;
-}
-
-bool CachedQuery::WaitPending() const noexcept {
-    return counter && counter->WaitPending();
-}
-
-QueryType CachedQuery::GetType() const noexcept {
-    return type;
-}
-
-VAddr CachedQuery::GetCpuAddr() const noexcept {
-    return cpu_addr;
-}
-
-CacheAddr CachedQuery::GetCacheAddr() const noexcept {
-    return ToCacheAddr(host_ptr);
-}
-
-u64 CachedQuery::GetSizeInBytes() const noexcept {
-    return timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE;
 }
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h
index d9f22b44d7..20d337f156 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.h
+++ b/src/video_core/renderer_opengl/gl_query_cache.h
@@ -13,6 +13,7 @@
 #include <glad/glad.h>
 
 #include "common/common_types.h"
+#include "video_core/query_cache.h"
 #include "video_core/rasterizer_interface.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 
@@ -24,134 +25,57 @@ namespace OpenGL {
 
 class CachedQuery;
 class HostCounter;
-class RasterizerOpenGL;
 class QueryCache;
+class RasterizerOpenGL;
 
-class CounterStream final {
-public:
-    explicit CounterStream(QueryCache& cache, VideoCore::QueryType type);
-    ~CounterStream();
+using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
 
-    void Update(bool enabled, bool any_command_queued);
-
-    void Reset(bool any_command_queued);
-
-    std::shared_ptr<HostCounter> GetCurrent(bool any_command_queued);
-
-    bool IsEnabled() const {
-        return current != nullptr;
-    }
-
-private:
-    void Enable();
-
-    void Disable(bool any_command_queued);
-
-    void EndQuery(bool any_command_queued);
-
-    QueryCache& cache;
-
-    std::shared_ptr<HostCounter> current;
-    std::shared_ptr<HostCounter> last;
-    VideoCore::QueryType type;
-    GLenum target;
-};
-
-class QueryCache final {
+class QueryCache final
+    : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
 public:
     explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer);
     ~QueryCache();
 
-    void InvalidateRegion(CacheAddr addr, std::size_t size);
-
-    void FlushRegion(CacheAddr addr, std::size_t size);
-
-    void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp);
-
-    void UpdateCounters();
-
-    void ResetCounter(VideoCore::QueryType type);
+    OGLQuery AllocateQuery(VideoCore::QueryType type);
 
     void Reserve(VideoCore::QueryType type, OGLQuery&& query);
 
-    std::shared_ptr<HostCounter> GetHostCounter(std::shared_ptr<HostCounter> dependency,
-                                                VideoCore::QueryType type);
+    bool AnyCommandQueued() const noexcept;
 
 private:
-    CachedQuery& Register(CachedQuery&& cached_query);
-
-    CachedQuery* TryGet(CacheAddr addr);
-
-    void Flush(CachedQuery& cached_query);
-
-    CounterStream& GetStream(VideoCore::QueryType type);
-
-    Core::System& system;
-    RasterizerOpenGL& rasterizer;
-
-    std::unordered_map<u64, std::vector<CachedQuery>> cached_queries;
-
-    std::array<CounterStream, VideoCore::NumQueryTypes> streams;
-    std::array<std::vector<OGLQuery>, VideoCore::NumQueryTypes> reserved_queries;
+    RasterizerOpenGL& gl_rasterizer;
+    std::array<std::vector<OGLQuery>, VideoCore::NumQueryTypes> queries_reserve;
 };
 
-class HostCounter final {
+class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> {
 public:
     explicit HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency,
-                         VideoCore::QueryType type, OGLQuery&& query);
+                         VideoCore::QueryType type);
     ~HostCounter();
 
-    /// Returns the current value of the query.
-    u64 Query();
-
-    /// Returns true when querying this counter will potentially wait for OpenGL.
-    bool WaitPending() const noexcept;
+    void EndQuery();
 
 private:
+    u64 BlockingQuery() const override;
+
     QueryCache& cache;
     VideoCore::QueryType type;
-
-    std::shared_ptr<HostCounter> dependency; ///< Counter queued before this one.
-    OGLQuery query;                          ///< OpenGL query.
-    std::optional<u64> result;               ///< Added values of the counter.
+    OGLQuery query;
 };
 
-class CachedQuery final {
+class CachedQuery final : public VideoCommon::CachedQueryBase<HostCounter> {
 public:
-    explicit CachedQuery(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr);
-    CachedQuery(CachedQuery&&) noexcept;
-    CachedQuery(const CachedQuery&) = delete;
-    ~CachedQuery();
+    explicit CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr,
+                         u8* host_ptr);
+    CachedQuery(CachedQuery&& rhs) noexcept;
 
-    CachedQuery& operator=(CachedQuery&&) noexcept;
+    CachedQuery& operator=(CachedQuery&& rhs) noexcept;
 
-    /// Writes the counter value to host memory.
-    void Flush();
-
-    /// Updates the counter this cached query registered in guest memory will write when requested.
-    void SetCounter(std::shared_ptr<HostCounter> counter, std::optional<u64> timestamp);
-
-    /// Returns true when a flushing this query will potentially wait for OpenGL.
-    bool WaitPending() const noexcept;
-
-    /// Returns the query type.
-    VideoCore::QueryType GetType() const noexcept;
-
-    /// Returns the guest CPU address for this query.
-    VAddr GetCpuAddr() const noexcept;
-
-    /// Returns the cache address for this query.
-    CacheAddr GetCacheAddr() const noexcept;
-
-    /// Returns the number of cached bytes.
-    u64 GetSizeInBytes() const noexcept;
+    void Flush() override;
 
 private:
-    VideoCore::QueryType type;            ///< Abstracted query type (e.g. samples passed).
-    VAddr cpu_addr;                       ///< Guest CPU address.
-    u8* host_ptr;                         ///< Writable host pointer.
-    std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree.
-    std::optional<u64> timestamp;         ///< Timestamp to flush to guest memory.
+    QueryCache* cache;
+    VideoCore::QueryType type;
 };
 
 } // namespace OpenGL