From ec14ffe1cda04cd098ce07f3d3ad96c253e91eed Mon Sep 17 00:00:00 2001 From: bunnei Date: Tue, 22 Jul 2014 22:59:26 -0400 Subject: [PATCH] GSP: Implements preliminary command synchronization via GPU interrupts. Core: Added a comment to explain the logic for the RunLoop iterations. --- src/core/core.cpp | 20 ++++--- src/core/hle/service/gsp.cpp | 111 +++++++++++++++++++++++++++++------ src/core/hle/service/gsp.h | 16 +++++ src/core/hw/gpu.cpp | 22 ++++++- 4 files changed, 140 insertions(+), 29 deletions(-) diff --git a/src/core/core.cpp b/src/core/core.cpp index 7dc0809d0..fc9909377 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -26,21 +26,25 @@ ARM_Interface* g_sys_core = nullptr; ///< ARM11 system (OS) core /// Run the core CPU loop void RunLoop() { for (;;){ - g_app_core->Run(GPU::kFrameTicks); + // This function loops for 100 instructions in the CPU before trying to update hardware. + // This is a little bit faster than SingleStep, and should be pretty much equivalent. The + // number of instructions chosen is fairly arbitrary, however a large number will more + // drastically affect the frequency of GSP interrupts and likely break things. The point of + // this is to just loop in the CPU for more than 1 instruction to reduce overhead and make + // it a little bit faster... + g_app_core->Run(100); HW::Update(); - Kernel::Reschedule(); + if (HLE::g_reschedule) { + Kernel::Reschedule(); + } } } /// Step the CPU one instruction void SingleStep() { g_app_core->Step(); - - // Update and reschedule after approx. 1 frame - u64 current_ticks = Core::g_app_core->GetTicks(); - if ((current_ticks - g_last_ticks) >= GPU::kFrameTicks || HLE::g_reschedule) { - g_last_ticks = current_ticks; - HW::Update(); + HW::Update(); + if (HLE::g_reschedule) { Kernel::Reschedule(); } } diff --git a/src/core/hle/service/gsp.cpp b/src/core/hle/service/gsp.cpp index b20203e27..f3d9fd26d 100644 --- a/src/core/hle/service/gsp.cpp +++ b/src/core/hle/service/gsp.cpp @@ -21,6 +21,27 @@ // Main graphics debugger object - TODO: Here is probably not the best place for this GraphicsDebugger g_debugger; +/// GSP thread interrupt queue header +struct GX_InterruptQueue { + union { + u32 hex; + + // Index of last interrupt in the queue + BitField<0,8,u32> index; + + // Number of interrupts remaining to be processed by the userland code + BitField<8,8,u32> number_interrupts; + + // Error code - zero on success, otherwise an error has occurred + BitField<16,8,u32> error_code; + }; + + u32 unk0; + u32 unk1; + + GSP_GPU::GXInterruptId slot[0x34]; ///< Interrupt ID slots +}; + /// GSP shared memory GX command buffer header union GX_CmdBufferHeader { u32 hex; @@ -45,20 +66,28 @@ namespace GSP_GPU { Handle g_event = 0; Handle g_shared_memory = 0; -u32 g_thread_id = 0; +u32 g_thread_id = 1; /// Gets a pointer to the start (header) of a command buffer in GSP shared memory static inline u8* GX_GetCmdBufferPointer(u32 thread_id, u32 offset=0) { + if (0 == g_shared_memory) return nullptr; + return Kernel::GetSharedMemoryPointer(g_shared_memory, 0x800 + (thread_id * 0x200) + offset); } +/// Gets a pointer to the start (header) of a command buffer in GSP shared memory +static inline GX_InterruptQueue* GetInterruptQueue(u32 thread_id) { + return (GX_InterruptQueue*)Kernel::GetSharedMemoryPointer(g_shared_memory, sizeof(GX_InterruptQueue) * thread_id); +} + /// Finishes execution of a GSP command void GX_FinishCommand(u32 thread_id) { GX_CmdBufferHeader* header = (GX_CmdBufferHeader*)GX_GetCmdBufferPointer(thread_id); g_debugger.GXCommandProcessed(GX_GetCmdBufferPointer(thread_id, 0x20 + (header->index * 0x20))); - header->number_commands = header->number_commands - 1; + header->number_commands = 0; + // TODO: Increment header->index? } @@ -134,33 +163,55 @@ void RegisterInterruptRelayQueue(Service::Interface* self) { u32* cmd_buff = Service::GetCommandBuffer(); u32 flags = cmd_buff[1]; g_event = cmd_buff[3]; + g_shared_memory = Kernel::CreateSharedMemory("GSPSharedMem"); _assert_msg_(GSP, (g_event != 0), "handle is not valid!"); - Kernel::SetEventLocked(g_event, false); + cmd_buff[2] = g_thread_id++; // ThreadID + cmd_buff[4] = g_shared_memory; // GSP shared memory - // Hack - This function will permanently set the state of the GSP event such that GPU command - // synchronization barriers always passthrough. Correct solution would be to set this after the - // GPU as processed all queued up commands, but due to the emulator being single-threaded they - // will always be ready. - Kernel::SetPermanentLock(g_event, true); - - cmd_buff[0] = 0; // Result - no error - cmd_buff[2] = g_thread_id; // ThreadID - cmd_buff[4] = g_shared_memory; // GSP shared memory + Kernel::SignalEvent(GSP_GPU::g_event); // TODO(bunnei): Is this correct? } +/** + * Signals that the specified interrupt type has occurred to userland code + * @param interrupt_id ID of interrupt that is being signalled + */ +void SignalInterrupt(GXInterruptId interrupt_id) { + if (0 == GSP_GPU::g_event) { + WARN_LOG(GSP, "cannot synchronize until GSP event has been created!"); + return; + } + if (0 == g_shared_memory) { + WARN_LOG(GSP, "cannot synchronize until GSP shared memory has been created!"); + return; + } + for (int thread_id = 0; thread_id < 0x4; ++thread_id) { + GX_InterruptQueue* interrupt_queue = GetInterruptQueue(thread_id); + interrupt_queue->number_interrupts = interrupt_queue->number_interrupts + 1; + + u8 next = interrupt_queue->index; + next += interrupt_queue->number_interrupts; + next = next % 0x34; -/// This triggers handling of the GX command written to the command buffer in shared memory. -void TriggerCmdReqQueue(Service::Interface* self) { + interrupt_queue->slot[next] = interrupt_id; + interrupt_queue->error_code = 0x0; // No error + } + Kernel::SignalEvent(GSP_GPU::g_event); +} + +/// Executes the next GSP command +void ExecuteCommand(int thread_id, int command_index) { // Utility function to convert register ID to address auto WriteGPURegister = [](u32 id, u32 data) { GPU::Write(0x1EF00000 + 4 * id, data); }; - GX_CmdBufferHeader* header = (GX_CmdBufferHeader*)GX_GetCmdBufferPointer(g_thread_id); - auto& command = *(const GXCommand*)GX_GetCmdBufferPointer(g_thread_id, 0x20 + (header->index * 0x20)); + GX_CmdBufferHeader* header = (GX_CmdBufferHeader*)GX_GetCmdBufferPointer(thread_id); + auto& command = *(const GXCommand*)GX_GetCmdBufferPointer(thread_id, (command_index + 1) * 0x20); + + NOTICE_LOG(GSP, "decoding command 0x%08X", (int)command.id.Value()); switch (command.id) { @@ -186,6 +237,7 @@ void TriggerCmdReqQueue(Service::Interface* self) { g_debugger.CommandListCalled(params.address, (u32*)Memory::GetPointer(params.address), params.size); + SignalInterrupt(GXInterruptId::P3D); break; } @@ -208,6 +260,16 @@ void TriggerCmdReqQueue(Service::Interface* self) { // TODO: Check if texture copies are implemented correctly.. case GXCommandId::SET_DISPLAY_TRANSFER: + // TODO(bunnei): Signalling all of these interrupts here is totally wrong, but it seems to + // work well enough for running demos. Need to figure out how these all work and trigger + // them correctly. + SignalInterrupt(GXInterruptId::PSC0); + SignalInterrupt(GXInterruptId::PSC1); + SignalInterrupt(GXInterruptId::PPF); + SignalInterrupt(GXInterruptId::P3D); + SignalInterrupt(GXInterruptId::DMA); + break; + case GXCommandId::SET_TEXTURE_COPY: { auto& params = command.image_copy; @@ -233,8 +295,21 @@ void TriggerCmdReqQueue(Service::Interface* self) { default: ERROR_LOG(GSP, "unknown command 0x%08X", (int)command.id.Value()); } +} - GX_FinishCommand(g_thread_id); +/// This triggers handling of the GX command written to the command buffer in shared memory. +void TriggerCmdReqQueue(Service::Interface* self) { + // Iterate through each thread's command queue... + for (int thread_id = 0; thread_id < 0x4; ++thread_id) { + GX_CmdBufferHeader* header = (GX_CmdBufferHeader*)GX_GetCmdBufferPointer(thread_id); + + // Iterate through each command... + for (int command_index = 0; command_index < header->number_commands; ++command_index) { + ExecuteCommand(thread_id, command_index); + } + + GX_FinishCommand(thread_id); + } } const Interface::FunctionInfo FunctionTable[] = { @@ -275,7 +350,7 @@ const Interface::FunctionInfo FunctionTable[] = { Interface::Interface() { Register(FunctionTable, ARRAY_SIZE(FunctionTable)); - g_shared_memory = Kernel::CreateSharedMemory("GSPSharedMem"); + g_shared_memory = 0; } Interface::~Interface() { diff --git a/src/core/hle/service/gsp.h b/src/core/hle/service/gsp.h index a83cb4846..5a649d2df 100644 --- a/src/core/hle/service/gsp.h +++ b/src/core/hle/service/gsp.h @@ -29,6 +29,16 @@ enum class GXCommandId : u32 { SET_COMMAND_LIST_FIRST = 0x05, }; +enum class GXInterruptId : u8 { + PSC0 = 0x00, + PSC1 = 0x01, + PDC0 = 0x02, // Seems called every vertical screen line + PDC1 = 0x03, // Seems called every frame + PPF = 0x04, + P3D = 0x05, + DMA = 0x06, +}; + struct GXCommand { BitField<0, 8, GXCommandId> id; @@ -84,4 +94,10 @@ public: }; +/** + * Signals that the specified interrupt type has occurred to userland code + * @param interrupt_id ID of interrupt that is being signalled + */ +void SignalInterrupt(GXInterruptId interrupt_id); + } // namespace diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index c00be2a83..41976d989 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp @@ -7,7 +7,11 @@ #include "core/core.h" #include "core/mem_map.h" + +#include "core/hle/hle.h" #include "core/hle/kernel/thread.h" +#include "core/hle/service/gsp.h" + #include "core/hw/gpu.h" #include "video_core/video_core.h" @@ -17,7 +21,8 @@ namespace GPU { RegisterSet g_regs; -u64 g_last_ticks = 0; ///< Last CPU ticks +u32 g_cur_line = 0; ///< Current vertical screen line +u64 g_last_ticks = 0; ///< Last CPU ticks /** * Sets whether the framebuffers are in the GSP heap (FCRAM) or VRAM @@ -249,17 +254,28 @@ template void Write(u32 addr, const u8 data); void Update() { u64 current_ticks = Core::g_app_core->GetTicks(); - // Fake a vertical blank - if ((current_ticks - g_last_ticks) >= kFrameTicks) { + // Synchronize line... + if ((current_ticks - g_last_ticks) >= GPU::kFrameTicks / 400) { + GSP_GPU::SignalInterrupt(GSP_GPU::GXInterruptId::PDC0); + g_cur_line++; g_last_ticks = current_ticks; + } + + // Synchronize frame... + if (g_cur_line >= 400) { + g_cur_line = 0; + GSP_GPU::SignalInterrupt(GSP_GPU::GXInterruptId::PDC1); VideoCore::g_renderer->SwapBuffers(); Kernel::WaitCurrentThread(WAITTYPE_VBLANK); + HLE::Reschedule(__func__); } } /// Initialize hardware void Init() { + g_cur_line = 0; g_last_ticks = Core::g_app_core->GetTicks(); + // SetFramebufferLocation(FRAMEBUFFER_LOCATION_FCRAM); SetFramebufferLocation(FRAMEBUFFER_LOCATION_VRAM);