early-access version 1952
This commit is contained in:
parent
0f29a1f4ef
commit
92d13687fe
13 changed files with 223 additions and 52 deletions
|
@ -1,7 +1,7 @@
|
||||||
yuzu emulator early access
|
yuzu emulator early access
|
||||||
=============
|
=============
|
||||||
|
|
||||||
This is the source code for early-access 1951.
|
This is the source code for early-access 1952.
|
||||||
|
|
||||||
## Legal Notice
|
## Legal Notice
|
||||||
|
|
||||||
|
|
|
@ -5,10 +5,8 @@
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/logging/log.h"
|
#include "common/logging/log.h"
|
||||||
#include "core/core.h"
|
#include "core/core.h"
|
||||||
#include "core/core_timing.h"
|
|
||||||
#include "core/hle/service/nvdrv/devices/nvdisp_disp0.h"
|
#include "core/hle/service/nvdrv/devices/nvdisp_disp0.h"
|
||||||
#include "core/hle/service/nvdrv/devices/nvmap.h"
|
#include "core/hle/service/nvdrv/devices/nvmap.h"
|
||||||
#include "core/perf_stats.h"
|
|
||||||
#include "video_core/gpu.h"
|
#include "video_core/gpu.h"
|
||||||
#include "video_core/renderer_base.h"
|
#include "video_core/renderer_base.h"
|
||||||
|
|
||||||
|
@ -41,7 +39,7 @@ void nvdisp_disp0::OnClose(DeviceFD fd) {}
|
||||||
|
|
||||||
void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height,
|
void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height,
|
||||||
u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform,
|
u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform,
|
||||||
const Common::Rectangle<int>& crop_rect) {
|
const Common::Rectangle<int>& crop_rect, const MultiFence& fences) {
|
||||||
VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle);
|
VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle);
|
||||||
LOG_TRACE(Service,
|
LOG_TRACE(Service,
|
||||||
"Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
|
"Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
|
||||||
|
@ -52,10 +50,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3
|
||||||
addr, offset, width, height, stride, static_cast<PixelFormat>(format),
|
addr, offset, width, height, stride, static_cast<PixelFormat>(format),
|
||||||
transform, crop_rect};
|
transform, crop_rect};
|
||||||
|
|
||||||
system.GetPerfStats().EndSystemFrame();
|
system.GPU().QueueFrame(&framebuffer, fences);
|
||||||
system.GPU().SwapBuffers(&framebuffer);
|
|
||||||
system.SpeedLimiter().DoSpeedLimiting(system.CoreTiming().GetGlobalTimeUs());
|
|
||||||
system.GetPerfStats().BeginSystemFrame();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Service::Nvidia::Devices
|
} // namespace Service::Nvidia::Devices
|
||||||
|
|
|
@ -33,7 +33,7 @@ public:
|
||||||
/// Performs a screen flip, drawing the buffer pointed to by the handle.
|
/// Performs a screen flip, drawing the buffer pointed to by the handle.
|
||||||
void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride,
|
void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride,
|
||||||
NVFlinger::BufferQueue::BufferTransformFlags transform,
|
NVFlinger::BufferQueue::BufferTransformFlags transform,
|
||||||
const Common::Rectangle<int>& crop_rect);
|
const Common::Rectangle<int>& crop_rect, const MultiFence& fence);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::shared_ptr<nvmap> nvmap_dev;
|
std::shared_ptr<nvmap> nvmap_dev;
|
||||||
|
|
|
@ -88,6 +88,10 @@ const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const {
|
||||||
return buffers[slot].igbp_buffer;
|
return buffers[slot].igbp_buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const BufferQueue::Buffer& BufferQueue::AccessBuffer(u32 slot) const {
|
||||||
|
return buffers[slot];
|
||||||
|
}
|
||||||
|
|
||||||
void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform,
|
void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform,
|
||||||
const Common::Rectangle<int>& crop_rect, u32 swap_interval,
|
const Common::Rectangle<int>& crop_rect, u32 swap_interval,
|
||||||
Service::Nvidia::MultiFence& multi_fence) {
|
Service::Nvidia::MultiFence& multi_fence) {
|
||||||
|
|
|
@ -107,6 +107,7 @@ public:
|
||||||
void Connect();
|
void Connect();
|
||||||
void Disconnect();
|
void Disconnect();
|
||||||
u32 Query(QueryType type);
|
u32 Query(QueryType type);
|
||||||
|
const Buffer& AccessBuffer(u32 slot) const;
|
||||||
|
|
||||||
u32 GetId() const {
|
u32 GetId() const {
|
||||||
return id;
|
return id;
|
||||||
|
|
|
@ -274,8 +274,6 @@ void NVFlinger::Compose() {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto& igbp_buffer = buffer->get().igbp_buffer;
|
|
||||||
|
|
||||||
if (!system.IsPoweredOn()) {
|
if (!system.IsPoweredOn()) {
|
||||||
return; // We are likely shutting down
|
return; // We are likely shutting down
|
||||||
}
|
}
|
||||||
|
@ -289,21 +287,29 @@ void NVFlinger::Compose() {
|
||||||
}
|
}
|
||||||
guard->lock();
|
guard->lock();
|
||||||
|
|
||||||
|
system.GetPerfStats().EndSystemFrame();
|
||||||
MicroProfileFlip();
|
MicroProfileFlip();
|
||||||
|
system.SpeedLimiter().DoSpeedLimiting(system.CoreTiming().GetGlobalTimeUs());
|
||||||
|
system.GetPerfStats().BeginSystemFrame();
|
||||||
|
|
||||||
|
swap_interval = buffer->get().swap_interval;
|
||||||
|
buffer_queue.ReleaseBuffer(buffer->get().slot);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void NVFlinger::PrequeueFrame(u32 buffer_queue_id, u32 slot) {
|
||||||
|
auto& buffer_queue = *FindBufferQueue(buffer_queue_id);
|
||||||
|
const auto& buffer = buffer_queue.AccessBuffer(slot);
|
||||||
|
const auto& igbp_buffer = buffer.igbp_buffer;
|
||||||
|
|
||||||
// Now send the buffer to the GPU for drawing.
|
// Now send the buffer to the GPU for drawing.
|
||||||
// TODO(Subv): Support more than just disp0. The display device selection is probably based
|
// TODO(Subv): Support more than just disp0. The display device selection is probably based
|
||||||
// on which display we're drawing (Default, Internal, External, etc)
|
// on which display we're drawing (Default, Internal, External, etc)
|
||||||
auto nvdisp = nvdrv->GetDevice<Nvidia::Devices::nvdisp_disp0>("/dev/nvdisp_disp0");
|
auto nvdisp = nvdrv->GetDevice<Nvidia::Devices::nvdisp_disp0>("/dev/nvdisp_disp0");
|
||||||
ASSERT(nvdisp);
|
ASSERT(nvdisp);
|
||||||
|
|
||||||
nvdisp->flip(igbp_buffer.gpu_buffer_id, igbp_buffer.offset, igbp_buffer.format,
|
nvdisp->flip(igbp_buffer.gpu_buffer_id, igbp_buffer.offset, igbp_buffer.format,
|
||||||
igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride,
|
igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride, buffer.transform,
|
||||||
buffer->get().transform, buffer->get().crop_rect);
|
buffer.crop_rect, buffer.multi_fence);
|
||||||
|
|
||||||
swap_interval = buffer->get().swap_interval;
|
|
||||||
buffer_queue.ReleaseBuffer(buffer->get().slot);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
s64 NVFlinger::GetNextTicks() const {
|
s64 NVFlinger::GetNextTicks() const {
|
||||||
|
|
|
@ -77,6 +77,8 @@ public:
|
||||||
/// Obtains a buffer queue identified by the ID.
|
/// Obtains a buffer queue identified by the ID.
|
||||||
[[nodiscard]] BufferQueue* FindBufferQueue(u32 id);
|
[[nodiscard]] BufferQueue* FindBufferQueue(u32 id);
|
||||||
|
|
||||||
|
void PrequeueFrame(u32 buffer_queue_id, u32 slot);
|
||||||
|
|
||||||
/// Performs a composition request to the emulated nvidia GPU and triggers the vsync events when
|
/// Performs a composition request to the emulated nvidia GPU and triggers the vsync events when
|
||||||
/// finished.
|
/// finished.
|
||||||
void Compose();
|
void Compose();
|
||||||
|
|
|
@ -592,6 +592,7 @@ private:
|
||||||
buffer_queue.QueueBuffer(request.data.slot, request.data.transform,
|
buffer_queue.QueueBuffer(request.data.slot, request.data.transform,
|
||||||
request.data.GetCropRect(), request.data.swap_interval,
|
request.data.GetCropRect(), request.data.swap_interval,
|
||||||
request.data.multi_fence);
|
request.data.multi_fence);
|
||||||
|
nv_flinger.PrequeueFrame(id, request.data.slot);
|
||||||
|
|
||||||
IGBPQueueBufferResponseParcel response{1280, 720};
|
IGBPQueueBufferResponseParcel response{1280, 720};
|
||||||
ctx.WriteBuffer(response.Serialize());
|
ctx.WriteBuffer(response.Serialize());
|
||||||
|
|
|
@ -114,10 +114,17 @@ void GPU::WaitFence(u32 syncpoint_id, u32 value) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GPU::IncrementSyncPointGuest(const u32 syncpoint_id) {
|
||||||
|
std::lock_guard lock{pre_sync_mutex};
|
||||||
|
auto& syncpoint = pre_syncpoints.at(syncpoint_id);
|
||||||
|
syncpoint++;
|
||||||
|
ProcessFrameRequests(syncpoint_id, syncpoint);
|
||||||
|
}
|
||||||
|
|
||||||
void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
|
void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
|
||||||
|
std::lock_guard lock{sync_mutex};
|
||||||
auto& syncpoint = syncpoints.at(syncpoint_id);
|
auto& syncpoint = syncpoints.at(syncpoint_id);
|
||||||
syncpoint++;
|
syncpoint++;
|
||||||
std::lock_guard lock{sync_mutex};
|
|
||||||
sync_cv.notify_all();
|
sync_cv.notify_all();
|
||||||
auto& interrupt = syncpt_interrupts.at(syncpoint_id);
|
auto& interrupt = syncpt_interrupts.at(syncpoint_id);
|
||||||
if (!interrupt.empty()) {
|
if (!interrupt.empty()) {
|
||||||
|
@ -162,25 +169,121 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GPU::WaitOnWorkRequest(u64 fence) {
|
||||||
|
std::unique_lock lck{work_request_mutex};
|
||||||
|
request_cv.wait(lck,
|
||||||
|
[&] { return fence >= current_request_fence.load(std::memory_order_relaxed); });
|
||||||
|
}
|
||||||
|
|
||||||
u64 GPU::RequestFlush(VAddr addr, std::size_t size) {
|
u64 GPU::RequestFlush(VAddr addr, std::size_t size) {
|
||||||
std::unique_lock lck{flush_request_mutex};
|
std::unique_lock lck{work_request_mutex};
|
||||||
const u64 fence = ++last_flush_fence;
|
const u64 fence = ++last_request_fence;
|
||||||
flush_requests.emplace_back(fence, addr, size);
|
work_requests.emplace_back(fence, addr, size);
|
||||||
|
return fence;
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 GPU::RequestQueueFrame(u64 id) {
|
||||||
|
std::unique_lock lck{work_request_mutex};
|
||||||
|
const u64 fence = ++last_request_fence;
|
||||||
|
work_requests.emplace_back(fence, id);
|
||||||
return fence;
|
return fence;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPU::TickWork() {
|
void GPU::TickWork() {
|
||||||
std::unique_lock lck{flush_request_mutex};
|
std::unique_lock lck{work_request_mutex};
|
||||||
while (!flush_requests.empty()) {
|
while (!work_requests.empty()) {
|
||||||
auto& request = flush_requests.front();
|
auto request = work_requests.front();
|
||||||
const u64 fence = request.fence;
|
const u64 fence = request.fence;
|
||||||
const VAddr addr = request.addr;
|
work_requests.pop_front();
|
||||||
const std::size_t size = request.size;
|
work_request_mutex.unlock();
|
||||||
flush_requests.pop_front();
|
switch (request.type) {
|
||||||
flush_request_mutex.unlock();
|
case RequestType::Flush: {
|
||||||
rasterizer->FlushRegion(addr, size);
|
rasterizer->FlushRegion(request.flush.addr, request.flush.size);
|
||||||
current_flush_fence.store(fence);
|
break;
|
||||||
flush_request_mutex.lock();
|
}
|
||||||
|
case RequestType::QueueFrame: {
|
||||||
|
Tegra::FramebufferConfig frame_info;
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> lock(frame_requests_mutex);
|
||||||
|
const u64 searching_id = request.queue_frame.id;
|
||||||
|
auto it = std::find_if(
|
||||||
|
frame_queue_items.begin(), frame_queue_items.end(),
|
||||||
|
[searching_id](const FrameQueue& item) { return item.id == searching_id; });
|
||||||
|
ASSERT(it != frame_queue_items.end());
|
||||||
|
frame_info = it->frame_info;
|
||||||
|
frame_queue_items.erase(it);
|
||||||
|
}
|
||||||
|
renderer->SwapBuffers(&frame_info);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default: {
|
||||||
|
LOG_ERROR(HW_GPU, "Unknown work request type={}", request.type);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
current_request_fence.store(fence, std::memory_order_release);
|
||||||
|
work_request_mutex.lock();
|
||||||
|
request_cv.notify_all();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPU::QueueFrame(const Tegra::FramebufferConfig* framebuffer,
|
||||||
|
const Service::Nvidia::MultiFence& fences) {
|
||||||
|
std::unique_lock<std::mutex> lock(frame_requests_mutex);
|
||||||
|
if (fences.num_fences == 0) {
|
||||||
|
u64 new_queue_id = frame_queue_ids++;
|
||||||
|
FrameQueue item{
|
||||||
|
.frame_info = *framebuffer,
|
||||||
|
.id = new_queue_id,
|
||||||
|
};
|
||||||
|
frame_queue_items.push_back(item);
|
||||||
|
RequestQueueFrame(new_queue_id);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
u64 new_id = frame_request_ids++;
|
||||||
|
FrameRequest request{
|
||||||
|
.frame_info = *framebuffer,
|
||||||
|
.count = 0,
|
||||||
|
.id = new_id,
|
||||||
|
};
|
||||||
|
std::unique_lock lck{pre_sync_mutex};
|
||||||
|
for (size_t i = 0; i < fences.num_fences; i++) {
|
||||||
|
auto& fence = fences.fences[i];
|
||||||
|
if (pre_syncpoints[fence.id].load(std::memory_order_relaxed) < fence.value) {
|
||||||
|
const FrameTrigger trigger{
|
||||||
|
.id = new_id,
|
||||||
|
.sync_point_value = fence.value,
|
||||||
|
};
|
||||||
|
frame_triggers[fence.id].push_back(trigger);
|
||||||
|
++request.count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (request.count == 0) {
|
||||||
|
lck.unlock();
|
||||||
|
gpu_thread.SwapBuffers(framebuffer);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
frame_requests.emplace(new_id, request);
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPU::ProcessFrameRequests(u32 syncpoint_id, u32 new_value) {
|
||||||
|
auto& list = frame_triggers[syncpoint_id];
|
||||||
|
if (list.empty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
auto it = list.begin();
|
||||||
|
while (it != list.end()) {
|
||||||
|
if (it->sync_point_value <= new_value) {
|
||||||
|
auto obj = frame_requests.find(it->id);
|
||||||
|
--obj->second.count;
|
||||||
|
if (obj->second.count == 0) {
|
||||||
|
rasterizer->FlushCommands();
|
||||||
|
renderer->SwapBuffers(&obj->second.frame_info);
|
||||||
|
frame_requests.erase(obj);
|
||||||
|
}
|
||||||
|
it = list.erase(it);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
++it;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -399,7 +502,7 @@ void GPU::ProcessFenceActionMethod() {
|
||||||
WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
|
WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
|
||||||
break;
|
break;
|
||||||
case FenceOperation::Increment:
|
case FenceOperation::Increment:
|
||||||
IncrementSyncPoint(regs.fence_action.syncpoint_id);
|
rasterizer->SignalSyncPoint(regs.fence_action.syncpoint_id);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value());
|
UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value());
|
||||||
|
|
|
@ -159,11 +159,16 @@ public:
|
||||||
void OnCommandListEnd();
|
void OnCommandListEnd();
|
||||||
|
|
||||||
/// Request a host GPU memory flush from the CPU.
|
/// Request a host GPU memory flush from the CPU.
|
||||||
[[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size);
|
u64 RequestFlush(VAddr addr, std::size_t size);
|
||||||
|
|
||||||
|
void WaitOnWorkRequest(u64 fence);
|
||||||
|
|
||||||
|
void QueueFrame(const Tegra::FramebufferConfig* framebuffer,
|
||||||
|
const Service::Nvidia::MultiFence& fence);
|
||||||
|
|
||||||
/// Obtains current flush request fence id.
|
/// Obtains current flush request fence id.
|
||||||
[[nodiscard]] u64 CurrentFlushRequestFence() const {
|
[[nodiscard]] u64 CurrentWorkRequestFence() const {
|
||||||
return current_flush_fence.load(std::memory_order_relaxed);
|
return current_request_fence.load(std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Tick pending requests within the GPU.
|
/// Tick pending requests within the GPU.
|
||||||
|
@ -225,6 +230,7 @@ public:
|
||||||
/// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
|
/// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
|
||||||
void WaitFence(u32 syncpoint_id, u32 value);
|
void WaitFence(u32 syncpoint_id, u32 value);
|
||||||
|
|
||||||
|
void IncrementSyncPointGuest(u32 syncpoint_id);
|
||||||
void IncrementSyncPoint(u32 syncpoint_id);
|
void IncrementSyncPoint(u32 syncpoint_id);
|
||||||
|
|
||||||
[[nodiscard]] u32 GetSyncpointValue(u32 syncpoint_id) const;
|
[[nodiscard]] u32 GetSyncpointValue(u32 syncpoint_id) const;
|
||||||
|
@ -365,6 +371,34 @@ private:
|
||||||
/// Determines where the method should be executed.
|
/// Determines where the method should be executed.
|
||||||
[[nodiscard]] bool ExecuteMethodOnEngine(u32 method);
|
[[nodiscard]] bool ExecuteMethodOnEngine(u32 method);
|
||||||
|
|
||||||
|
struct FrameRequest {
|
||||||
|
Tegra::FramebufferConfig frame_info;
|
||||||
|
size_t count;
|
||||||
|
u64 id;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct FrameTrigger {
|
||||||
|
u64 id;
|
||||||
|
u32 sync_point_value;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct FrameQueue {
|
||||||
|
Tegra::FramebufferConfig frame_info;
|
||||||
|
u64 id;
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Request a frame release on the GPU thread
|
||||||
|
u64 RequestQueueFrame(u64 id);
|
||||||
|
|
||||||
|
void ProcessFrameRequests(u32 syncpoint_id, u32 new_value);
|
||||||
|
|
||||||
|
std::mutex frame_requests_mutex;
|
||||||
|
std::unordered_map<u32, std::list<FrameTrigger>> frame_triggers;
|
||||||
|
std::unordered_map<u64, FrameRequest> frame_requests;
|
||||||
|
std::list<FrameQueue> frame_queue_items;
|
||||||
|
u64 frame_queue_ids{};
|
||||||
|
u64 frame_request_ids{};
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
Core::System& system;
|
Core::System& system;
|
||||||
std::unique_ptr<Tegra::MemoryManager> memory_manager;
|
std::unique_ptr<Tegra::MemoryManager> memory_manager;
|
||||||
|
@ -392,27 +426,50 @@ private:
|
||||||
/// When true, we are about to shut down emulation session, so terminate outstanding tasks
|
/// When true, we are about to shut down emulation session, so terminate outstanding tasks
|
||||||
std::atomic_bool shutting_down{};
|
std::atomic_bool shutting_down{};
|
||||||
|
|
||||||
|
std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> pre_syncpoints{};
|
||||||
std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{};
|
std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{};
|
||||||
|
|
||||||
std::array<std::list<u32>, Service::Nvidia::MaxSyncPoints> syncpt_interrupts;
|
std::array<std::list<u32>, Service::Nvidia::MaxSyncPoints> syncpt_interrupts;
|
||||||
|
|
||||||
|
std::mutex pre_sync_mutex;
|
||||||
std::mutex sync_mutex;
|
std::mutex sync_mutex;
|
||||||
std::mutex device_mutex;
|
std::mutex device_mutex;
|
||||||
|
|
||||||
std::condition_variable sync_cv;
|
std::condition_variable sync_cv;
|
||||||
|
|
||||||
struct FlushRequest {
|
enum class RequestType : u32 {
|
||||||
explicit FlushRequest(u64 fence_, VAddr addr_, std::size_t size_)
|
Flush = 0,
|
||||||
: fence{fence_}, addr{addr_}, size{size_} {}
|
QueueFrame = 1,
|
||||||
u64 fence;
|
|
||||||
VAddr addr;
|
|
||||||
std::size_t size;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
std::list<FlushRequest> flush_requests;
|
struct WorkRequest {
|
||||||
std::atomic<u64> current_flush_fence{};
|
explicit WorkRequest(u64 fence_, VAddr addr_, std::size_t size_)
|
||||||
u64 last_flush_fence{};
|
: fence{fence_}, type{RequestType::Flush} {
|
||||||
std::mutex flush_request_mutex;
|
flush.addr = addr_;
|
||||||
|
flush.size = size_;
|
||||||
|
}
|
||||||
|
|
||||||
|
explicit WorkRequest(u64 fence_, u64 id) : fence{fence_}, type{RequestType::QueueFrame} {
|
||||||
|
queue_frame.id = id;
|
||||||
|
}
|
||||||
|
u64 fence;
|
||||||
|
union {
|
||||||
|
struct {
|
||||||
|
VAddr addr;
|
||||||
|
std::size_t size;
|
||||||
|
} flush;
|
||||||
|
struct {
|
||||||
|
u64 id;
|
||||||
|
} queue_frame;
|
||||||
|
};
|
||||||
|
RequestType type;
|
||||||
|
}; // namespace Tegra
|
||||||
|
|
||||||
|
std::list<WorkRequest> work_requests;
|
||||||
|
std::atomic<u64> current_request_fence{};
|
||||||
|
u64 last_request_fence{};
|
||||||
|
std::mutex work_request_mutex;
|
||||||
|
std::condition_variable request_cv;
|
||||||
|
|
||||||
const bool is_async;
|
const bool is_async;
|
||||||
|
|
||||||
|
|
|
@ -105,7 +105,7 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) {
|
||||||
auto& gpu = system.GPU();
|
auto& gpu = system.GPU();
|
||||||
u64 fence = gpu.RequestFlush(addr, size);
|
u64 fence = gpu.RequestFlush(addr, size);
|
||||||
PushCommand(GPUTickCommand(), true);
|
PushCommand(GPUTickCommand(), true);
|
||||||
ASSERT(fence <= gpu.CurrentFlushRequestFence());
|
ASSERT(fence <= gpu.CurrentWorkRequestFence());
|
||||||
}
|
}
|
||||||
|
|
||||||
void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
|
void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
|
||||||
|
|
|
@ -214,6 +214,8 @@ void RasterizerOpenGL::Clear() {
|
||||||
void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
||||||
MICROPROFILE_SCOPE(OpenGL_Drawing);
|
MICROPROFILE_SCOPE(OpenGL_Drawing);
|
||||||
|
|
||||||
|
SCOPE_EXIT({ gpu.TickWork(); });
|
||||||
|
|
||||||
query_cache.UpdateCounters();
|
query_cache.UpdateCounters();
|
||||||
|
|
||||||
SyncState();
|
SyncState();
|
||||||
|
@ -269,8 +271,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
||||||
|
|
||||||
++num_queued_commands;
|
++num_queued_commands;
|
||||||
has_written_global_memory |= pipeline->WritesGlobalMemory();
|
has_written_global_memory |= pipeline->WritesGlobalMemory();
|
||||||
|
|
||||||
gpu.TickWork();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::DispatchCompute() {
|
void RasterizerOpenGL::DispatchCompute() {
|
||||||
|
@ -421,6 +421,7 @@ void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::SignalSyncPoint(u32 value) {
|
void RasterizerOpenGL::SignalSyncPoint(u32 value) {
|
||||||
|
gpu.IncrementSyncPointGuest(value);
|
||||||
if (!gpu.IsAsync()) {
|
if (!gpu.IsAsync()) {
|
||||||
gpu.IncrementSyncPoint(value);
|
gpu.IncrementSyncPoint(value);
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -412,6 +412,7 @@ void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::SignalSyncPoint(u32 value) {
|
void RasterizerVulkan::SignalSyncPoint(u32 value) {
|
||||||
|
gpu.IncrementSyncPointGuest(value);
|
||||||
if (!gpu.IsAsync()) {
|
if (!gpu.IsAsync()) {
|
||||||
gpu.IncrementSyncPoint(value);
|
gpu.IncrementSyncPoint(value);
|
||||||
return;
|
return;
|
||||||
|
|
Loading…
Reference in a new issue