From 6d995b1ff654483f830b6c285629545263393d7e Mon Sep 17 00:00:00 2001 From: tfarley Date: Sun, 3 May 2015 15:34:48 -0400 Subject: [PATCH 1/4] INI hw/sw renderer toggle --- src/citra/config.cpp | 2 ++ src/citra/default_ini.h | 4 ++++ src/citra_qt/config.cpp | 4 ++++ src/core/settings.h | 2 ++ 4 files changed, 12 insertions(+) diff --git a/src/citra/config.cpp b/src/citra/config.cpp index ab564559d..846479fd7 100644 --- a/src/citra/config.cpp +++ b/src/citra/config.cpp @@ -70,6 +70,8 @@ void Config::ReadValues() { Settings::values.frame_skip = glfw_config->GetInteger("Core", "frame_skip", 0); // Renderer + Settings::values.use_hw_renderer = glfw_config->GetBoolean("Renderer", "use_hw_renderer", false); + Settings::values.bg_red = (float)glfw_config->GetReal("Renderer", "bg_red", 1.0); Settings::values.bg_green = (float)glfw_config->GetReal("Renderer", "bg_green", 1.0); Settings::values.bg_blue = (float)glfw_config->GetReal("Renderer", "bg_blue", 1.0); diff --git a/src/citra/default_ini.h b/src/citra/default_ini.h index 1dd971926..fd5a90d56 100644 --- a/src/citra/default_ini.h +++ b/src/citra/default_ini.h @@ -42,6 +42,10 @@ gpu_refresh_rate = frame_skip = [Renderer] +# Whether to use software or hardware rendering. +# 0 (default): Software, 1: Hardware +use_hw_renderer = + # The clear color for the renderer. What shows up on the sides of the bottom screen. # Must be in range of 0.0-1.0. Defaults to 1.0 for all. bg_red = diff --git a/src/citra_qt/config.cpp b/src/citra_qt/config.cpp index fb85121b3..460f4ec07 100644 --- a/src/citra_qt/config.cpp +++ b/src/citra_qt/config.cpp @@ -54,6 +54,8 @@ void Config::ReadValues() { qt_config->endGroup(); qt_config->beginGroup("Renderer"); + Settings::values.use_hw_renderer = qt_config->value("use_hw_renderer", false).toBool(); + Settings::values.bg_red = qt_config->value("bg_red", 1.0).toFloat(); Settings::values.bg_green = qt_config->value("bg_green", 1.0).toFloat(); Settings::values.bg_blue = qt_config->value("bg_blue", 1.0).toFloat(); @@ -105,6 +107,8 @@ void Config::SaveValues() { qt_config->endGroup(); qt_config->beginGroup("Renderer"); + qt_config->setValue("use_hw_renderer", Settings::values.use_hw_renderer); + // Cast to double because Qt's written float values are not human-readable qt_config->setValue("bg_red", (double)Settings::values.bg_red); qt_config->setValue("bg_green", (double)Settings::values.bg_green); diff --git a/src/core/settings.h b/src/core/settings.h index 0f4700241..54c1023b8 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -45,6 +45,8 @@ struct Values { int region_value; // Renderer + bool use_hw_renderer; + float bg_red; float bg_green; float bg_blue; From 05dc633a8c35221ce8d6abe6ddf027f8b0bab6c2 Mon Sep 17 00:00:00 2001 From: tfarley Date: Mon, 18 May 2015 21:21:33 -0700 Subject: [PATCH 2/4] OpenGL renderer --- src/citra/citra.cpp | 4 + src/citra_qt/main.cpp | 9 + src/citra_qt/main.h | 1 + src/citra_qt/main.ui | 9 + src/common/math_util.h | 4 + src/core/hle/service/gsp_gpu.cpp | 9 + src/core/hw/gpu.cpp | 16 +- src/video_core/CMakeLists.txt | 12 +- src/video_core/command_processor.cpp | 26 +- src/video_core/debug_utils/debug_utils.cpp | 4 + src/video_core/hwrasterizer_base.h | 40 + src/video_core/pica.h | 37 +- src/video_core/rasterizer.cpp | 20 +- src/video_core/renderer_base.h | 4 + .../renderer_opengl/gl_rasterizer.cpp | 879 ++++++++++++++++++ .../renderer_opengl/gl_rasterizer.h | 207 +++++ .../renderer_opengl/gl_rasterizer_cache.cpp | 77 ++ .../renderer_opengl/gl_rasterizer_cache.h | 36 + .../renderer_opengl/gl_resource_manager.cpp | 111 +++ .../renderer_opengl/gl_resource_manager.h | 79 ++ src/video_core/renderer_opengl/gl_shaders.h | 288 ++++++ src/video_core/renderer_opengl/gl_state.cpp | 160 ++++ src/video_core/renderer_opengl/gl_state.h | 70 ++ src/video_core/renderer_opengl/pica_to_gl.h | 105 +++ .../renderer_opengl/renderer_opengl.cpp | 61 +- .../renderer_opengl/renderer_opengl.h | 16 +- src/video_core/video_core.cpp | 3 + src/video_core/video_core.h | 5 + 28 files changed, 2245 insertions(+), 47 deletions(-) create mode 100644 src/video_core/hwrasterizer_base.h create mode 100644 src/video_core/renderer_opengl/gl_rasterizer.cpp create mode 100644 src/video_core/renderer_opengl/gl_rasterizer.h create mode 100644 src/video_core/renderer_opengl/gl_rasterizer_cache.cpp create mode 100644 src/video_core/renderer_opengl/gl_rasterizer_cache.h create mode 100644 src/video_core/renderer_opengl/gl_resource_manager.cpp create mode 100644 src/video_core/renderer_opengl/gl_resource_manager.h create mode 100644 src/video_core/renderer_opengl/gl_state.cpp create mode 100644 src/video_core/renderer_opengl/gl_state.h create mode 100644 src/video_core/renderer_opengl/pica_to_gl.h diff --git a/src/citra/citra.cpp b/src/citra/citra.cpp index ca93d5b91..ce8d7dd25 100644 --- a/src/citra/citra.cpp +++ b/src/citra/citra.cpp @@ -18,6 +18,8 @@ #include "citra/config.h" #include "citra/emu_window/emu_window_glfw.h" +#include "video_core/video_core.h" + /// Application entry point int main(int argc, char **argv) { Log::Filter log_filter(Log::Level::Debug); @@ -34,6 +36,8 @@ int main(int argc, char **argv) { std::string boot_filename = argv[1]; EmuWindow_GLFW* emu_window = new EmuWindow_GLFW; + VideoCore::g_hw_renderer_enabled = Settings::values.use_hw_renderer; + System::Init(emu_window); Loader::ResultStatus load_result = Loader::LoadFile(boot_filename); diff --git a/src/citra_qt/main.cpp b/src/citra_qt/main.cpp index 24506deab..2ea33ebc5 100644 --- a/src/citra_qt/main.cpp +++ b/src/citra_qt/main.cpp @@ -44,6 +44,8 @@ #include "core/arm/disassembler/load_symbol_map.h" #include "citra_qt/config.h" +#include "video_core/video_core.h" + #include "version.h" GMainWindow::GMainWindow() : emu_thread(nullptr) @@ -123,6 +125,9 @@ GMainWindow::GMainWindow() : emu_thread(nullptr) restoreState(settings.value("state").toByteArray()); render_window->restoreGeometry(settings.value("geometryRenderWindow").toByteArray()); + ui.action_Use_Hardware_Renderer->setChecked(Settings::values.use_hw_renderer); + SetHardwareRendererEnabled(ui.action_Use_Hardware_Renderer->isChecked()); + ui.action_Single_Window_Mode->setChecked(settings.value("singleWindowMode", true).toBool()); ToggleWindowMode(); @@ -135,6 +140,7 @@ GMainWindow::GMainWindow() : emu_thread(nullptr) connect(ui.action_Start, SIGNAL(triggered()), this, SLOT(OnStartGame())); connect(ui.action_Pause, SIGNAL(triggered()), this, SLOT(OnPauseGame())); connect(ui.action_Stop, SIGNAL(triggered()), this, SLOT(OnStopGame())); + connect(ui.action_Use_Hardware_Renderer, SIGNAL(triggered(bool)), this, SLOT(SetHardwareRendererEnabled(bool))); connect(ui.action_Single_Window_Mode, SIGNAL(triggered(bool)), this, SLOT(ToggleWindowMode())); connect(ui.action_Hotkeys, SIGNAL(triggered()), this, SLOT(OnOpenHotkeysDialog())); @@ -300,6 +306,9 @@ void GMainWindow::OnOpenHotkeysDialog() dialog.exec(); } +void GMainWindow::SetHardwareRendererEnabled(bool enabled) { + VideoCore::g_hw_renderer_enabled = enabled; +} void GMainWindow::ToggleWindowMode() { if (ui.action_Single_Window_Mode->isChecked()) { diff --git a/src/citra_qt/main.h b/src/citra_qt/main.h index 3e29534fb..242b08c39 100644 --- a/src/citra_qt/main.h +++ b/src/citra_qt/main.h @@ -69,6 +69,7 @@ private slots: void OnOpenHotkeysDialog(); void OnConfigure(); void OnDisplayTitleBars(bool); + void SetHardwareRendererEnabled(bool); void ToggleWindowMode(); private: diff --git a/src/citra_qt/main.ui b/src/citra_qt/main.ui index 689806465..0942c28c8 100644 --- a/src/citra_qt/main.ui +++ b/src/citra_qt/main.ui @@ -52,6 +52,7 @@ + @@ -131,6 +132,14 @@ Configure &Hotkeys ... + + + true + + + Use Hardware Renderer + + Configure ... diff --git a/src/common/math_util.h b/src/common/math_util.h index 0b1400b41..4b0910741 100644 --- a/src/common/math_util.h +++ b/src/common/math_util.h @@ -11,6 +11,10 @@ namespace MathUtil { +inline bool IntervalsIntersect(unsigned start0, unsigned length0, unsigned start1, unsigned length1) { + return (std::max(start0, start1) <= std::min(start0 + length0, start1 + length1)); +} + template inline T Clamp(const T val, const T& min, const T& max) { diff --git a/src/core/hle/service/gsp_gpu.cpp b/src/core/hle/service/gsp_gpu.cpp index c11c5faba..c56475ae4 100644 --- a/src/core/hle/service/gsp_gpu.cpp +++ b/src/core/hle/service/gsp_gpu.cpp @@ -15,6 +15,7 @@ #include "core/hw/lcd.h" #include "video_core/gpu_debugger.h" +#include "video_core/video_core.h" // Main graphics debugger object - TODO: Here is probably not the best place for this GraphicsDebugger g_debugger; @@ -264,6 +265,8 @@ static void FlushDataCache(Service::Interface* self) { u32 size = cmd_buff[2]; u32 process = cmd_buff[4]; + VideoCore::g_renderer->hw_rasterizer->NotifyFlush(Memory::VirtualToPhysicalAddress(address), size); + // TODO(purpasmart96): Verify return header on HW cmd_buff[1] = RESULT_SUCCESS.raw; // No error @@ -352,10 +355,16 @@ static void ExecuteCommand(const Command& command, u32 thread_id) { // GX request DMA - typically used for copying memory from GSP heap to VRAM case CommandId::REQUEST_DMA: + VideoCore::g_renderer->hw_rasterizer->NotifyPreRead(Memory::VirtualToPhysicalAddress(command.dma_request.source_address), + command.dma_request.size); + memcpy(Memory::GetPointer(command.dma_request.dest_address), Memory::GetPointer(command.dma_request.source_address), command.dma_request.size); SignalInterrupt(InterruptId::DMA); + + VideoCore::g_renderer->hw_rasterizer->NotifyFlush(Memory::VirtualToPhysicalAddress(command.dma_request.dest_address), + command.dma_request.size); break; // ctrulib homebrew sends all relevant command list data with this command, diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index 8ef1f70df..ddc5d647e 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp @@ -106,6 +106,8 @@ inline void Write(u32 addr, const T data) { } else { GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC1); } + + VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetStartAddress(), config.GetEndAddress() - config.GetStartAddress()); } break; } @@ -129,19 +131,25 @@ inline void Write(u32 addr, const T data) { u32 output_width = config.output_width / horizontal_scale; u32 output_height = config.output_height / vertical_scale; + u32 input_size = config.input_width * config.input_height * GPU::Regs::BytesPerPixel(config.input_format); + u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format); + + VideoCore::g_renderer->hw_rasterizer->NotifyPreRead(config.GetPhysicalInputAddress(), input_size); + if (config.raw_copy) { // Raw copies do not perform color conversion nor tiled->linear / linear->tiled conversions // TODO(Subv): Verify if raw copies perform scaling - memcpy(dst_pointer, src_pointer, config.output_width * config.output_height * - GPU::Regs::BytesPerPixel(config.output_format)); + memcpy(dst_pointer, src_pointer, output_size); LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), output format: %x, flags 0x%08X, Raw copy", - config.output_height * output_width * GPU::Regs::BytesPerPixel(config.output_format), + output_size, config.GetPhysicalInputAddress(), config.input_width.Value(), config.input_height.Value(), config.GetPhysicalOutputAddress(), config.output_width.Value(), config.output_height.Value(), config.output_format.Value(), config.flags); GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); + + VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetPhysicalOutputAddress(), output_size); break; } @@ -247,6 +255,8 @@ inline void Write(u32 addr, const T data) { config.output_format.Value(), config.flags); GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); + + VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetPhysicalOutputAddress(), output_size); } break; } diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 4c1e6449a..9866078d4 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -1,7 +1,11 @@ set(SRCS renderer_opengl/generated/gl_3_2_core.c - renderer_opengl/renderer_opengl.cpp + renderer_opengl/gl_rasterizer.cpp + renderer_opengl/gl_rasterizer_cache.cpp + renderer_opengl/gl_resource_manager.cpp renderer_opengl/gl_shader_util.cpp + renderer_opengl/gl_state.cpp + renderer_opengl/renderer_opengl.cpp debug_utils/debug_utils.cpp clipper.cpp command_processor.cpp @@ -15,13 +19,19 @@ set(SRCS set(HEADERS debug_utils/debug_utils.h renderer_opengl/generated/gl_3_2_core.h + renderer_opengl/gl_rasterizer.h + renderer_opengl/gl_rasterizer_cache.h + renderer_opengl/gl_resource_manager.h renderer_opengl/gl_shader_util.h renderer_opengl/gl_shaders.h + renderer_opengl/gl_state.h + renderer_opengl/pica_to_gl.h renderer_opengl/renderer_opengl.h clipper.h color.h command_processor.h gpu_debugger.h + hwrasterizer_base.h math.h pica.h primitive_assembly.h diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 6121df8e3..5c4c04408 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -12,8 +12,10 @@ #include "pica.h" #include "primitive_assembly.h" #include "vertex_shader.h" +#include "video_core.h" #include "core/hle/service/gsp_gpu.h" #include "core/hw/gpu.h" +#include "core/settings.h" #include "debug_utils/debug_utils.h" @@ -107,7 +109,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { bool index_u16 = index_info.format != 0; DebugUtils::GeometryDumper geometry_dumper; - PrimitiveAssembler clipper_primitive_assembler(registers.triangle_topology.Value()); + PrimitiveAssembler primitive_assembler(registers.triangle_topology.Value()); PrimitiveAssembler dumping_primitive_assembler(registers.triangle_topology.Value()); for (unsigned int index = 0; index < registers.num_vertices; ++index) @@ -185,9 +187,25 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { // TODO: Add processed vertex to vertex cache! } - // Send to triangle clipper - clipper_primitive_assembler.SubmitVertex(output, Clipper::ProcessTriangle); + if (Settings::values.use_hw_renderer) { + // Send to hardware renderer + static auto AddHWTriangle = [](const Pica::VertexShader::OutputVertex& v0, + const Pica::VertexShader::OutputVertex& v1, + const Pica::VertexShader::OutputVertex& v2) { + VideoCore::g_renderer->hw_rasterizer->AddTriangle(v0, v1, v2); + }; + + primitive_assembler.SubmitVertex(output, AddHWTriangle); + } else { + // Send to triangle clipper + primitive_assembler.SubmitVertex(output, Clipper::ProcessTriangle); + } } + + if (Settings::values.use_hw_renderer) { + VideoCore::g_renderer->hw_rasterizer->DrawTriangles(); + } + geometry_dumper.Dump(); if (g_debug_context) @@ -340,6 +358,8 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { break; } + VideoCore::g_renderer->hw_rasterizer->NotifyPicaRegisterChanged(id); + if (g_debug_context) g_debug_context->OnEvent(DebugContext::Event::CommandProcessed, reinterpret_cast(&id)); } diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index 883df48a5..9da44ccd6 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -24,6 +24,7 @@ #include "video_core/math.h" #include "video_core/pica.h" #include "video_core/utils.h" +#include "video_core/video_core.h" #include "debug_utils.h" @@ -40,6 +41,9 @@ void DebugContext::OnEvent(Event event, void* data) { { std::unique_lock lock(breakpoint_mutex); + // Commit the hardware renderer's framebuffer so it will show on debug widgets + VideoCore::g_renderer->hw_rasterizer->CommitFramebuffer(); + // TODO: Should stop the CPU thread here once we multithread emulation. active_breakpoint = event; diff --git a/src/video_core/hwrasterizer_base.h b/src/video_core/hwrasterizer_base.h new file mode 100644 index 000000000..dec193f8b --- /dev/null +++ b/src/video_core/hwrasterizer_base.h @@ -0,0 +1,40 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/emu_window.h" +#include "video_core/vertex_shader.h" + +class HWRasterizer { +public: + virtual ~HWRasterizer() { + } + + /// Initialize API-specific GPU objects + virtual void InitObjects() = 0; + + /// Reset the rasterizer, such as flushing all caches and updating all state + virtual void Reset() = 0; + + /// Queues the primitive formed by the given vertices for rendering + virtual void AddTriangle(const Pica::VertexShader::OutputVertex& v0, + const Pica::VertexShader::OutputVertex& v1, + const Pica::VertexShader::OutputVertex& v2) = 0; + + /// Draw the current batch of triangles + virtual void DrawTriangles() = 0; + + /// Commit the rasterizer's framebuffer contents immediately to the current 3DS memory framebuffer + virtual void CommitFramebuffer() = 0; + + /// Notify rasterizer that the specified PICA register has been changed + virtual void NotifyPicaRegisterChanged(u32 id) = 0; + + /// Notify rasterizer that the specified 3DS memory region will be read from after this notification + virtual void NotifyPreRead(PAddr addr, u32 size) = 0; + + /// Notify rasterizer that a 3DS memory region has been changed + virtual void NotifyFlush(PAddr addr, u32 size) = 0; +}; diff --git a/src/video_core/pica.h b/src/video_core/pica.h index e9bc7fb3b..503c09eca 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -490,20 +490,37 @@ struct Regs { } } - struct { - // Components are laid out in reverse byte order, most significant bits first. - enum ColorFormat : u32 { - RGBA8 = 0, - RGB8 = 1, - RGB5A1 = 2, - RGB565 = 3, - RGBA4 = 4, - }; + // Components are laid out in reverse byte order, most significant bits first. + enum ColorFormat : u32 { + RGBA8 = 0, + RGB8 = 1, + RGB5A1 = 2, + RGB565 = 3, + RGBA4 = 4, + }; + // Returns the number of bytes in the specified color format + static unsigned BytesPerColorPixel(ColorFormat format) { + switch (format) { + case ColorFormat::RGBA8: + return 4; + case ColorFormat::RGB8: + return 3; + case ColorFormat::RGB5A1: + case ColorFormat::RGB565: + case ColorFormat::RGBA4: + return 2; + default: + LOG_CRITICAL(HW_GPU, "Unknown color format %u", format); + UNIMPLEMENTED(); + } + } + + struct { INSERT_PADDING_WORDS(0x6); DepthFormat depth_format; - BitField<16, 3, u32> color_format; + BitField<16, 3, ColorFormat> color_format; INSERT_PADDING_WORDS(0x4); diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 59eff48f9..362efe52e 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -36,23 +36,23 @@ static void DrawPixel(int x, int y, const Math::Vec4& color) { u8* dst_pixel = Memory::GetPhysicalPointer(addr) + dst_offset; switch (registers.framebuffer.color_format) { - case registers.framebuffer.RGBA8: + case Pica::Regs::ColorFormat::RGBA8: Color::EncodeRGBA8(color, dst_pixel); break; - case registers.framebuffer.RGB8: + case Pica::Regs::ColorFormat::RGB8: Color::EncodeRGB8(color, dst_pixel); break; - case registers.framebuffer.RGB5A1: + case Pica::Regs::ColorFormat::RGB5A1: Color::EncodeRGB5A1(color, dst_pixel); break; - case registers.framebuffer.RGB565: + case Pica::Regs::ColorFormat::RGB565: Color::EncodeRGB565(color, dst_pixel); break; - case registers.framebuffer.RGBA4: + case Pica::Regs::ColorFormat::RGBA4: Color::EncodeRGBA4(color, dst_pixel); break; @@ -73,19 +73,19 @@ static const Math::Vec4 GetPixel(int x, int y) { u8* src_pixel = Memory::GetPhysicalPointer(addr) + src_offset; switch (registers.framebuffer.color_format) { - case registers.framebuffer.RGBA8: + case Pica::Regs::ColorFormat::RGBA8: return Color::DecodeRGBA8(src_pixel); - case registers.framebuffer.RGB8: + case Pica::Regs::ColorFormat::RGB8: return Color::DecodeRGB8(src_pixel); - case registers.framebuffer.RGB5A1: + case Pica::Regs::ColorFormat::RGB5A1: return Color::DecodeRGB5A1(src_pixel); - case registers.framebuffer.RGB565: + case Pica::Regs::ColorFormat::RGB565: return Color::DecodeRGB565(src_pixel); - case registers.framebuffer.RGBA4: + case Pica::Regs::ColorFormat::RGBA4: return Color::DecodeRGBA4(src_pixel); default: diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index b62409538..5757ac75d 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -6,6 +6,8 @@ #include "common/common_types.h" +#include "video_core/hwrasterizer_base.h" + class RendererBase : NonCopyable { public: @@ -48,6 +50,8 @@ public: return m_current_frame; } + std::unique_ptr hw_rasterizer; + protected: f32 m_current_fps; ///< Current framerate, should be set by the renderer int m_current_frame; ///< Current frame, should be set by the renderer diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp new file mode 100644 index 000000000..e44375547 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -0,0 +1,879 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "core/settings.h" +#include "core/hw/gpu.h" + +#include "video_core/color.h" +#include "video_core/pica.h" +#include "video_core/utils.h" +#include "video_core/renderer_opengl/gl_rasterizer.h" +#include "video_core/renderer_opengl/gl_shaders.h" +#include "video_core/renderer_opengl/gl_shader_util.h" +#include "video_core/renderer_opengl/pica_to_gl.h" + +#include "generated/gl_3_2_core.h" + +#include + +static bool IsPassThroughTevStage(const Pica::Regs::TevStageConfig& stage) { + return (stage.color_op == Pica::Regs::TevStageConfig::Operation::Replace && + stage.alpha_op == Pica::Regs::TevStageConfig::Operation::Replace && + stage.color_source1 == Pica::Regs::TevStageConfig::Source::Previous && + stage.alpha_source1 == Pica::Regs::TevStageConfig::Source::Previous && + stage.color_modifier1 == Pica::Regs::TevStageConfig::ColorModifier::SourceColor && + stage.alpha_modifier1 == Pica::Regs::TevStageConfig::AlphaModifier::SourceAlpha && + stage.GetColorMultiplier() == 1 && + stage.GetAlphaMultiplier() == 1); +} + +RasterizerOpenGL::RasterizerOpenGL() : last_fb_color_addr(0), last_fb_depth_addr(0) { } +RasterizerOpenGL::~RasterizerOpenGL() { } + +void RasterizerOpenGL::InitObjects() { + // Create the hardware shader program and get attrib/uniform locations + shader.Create(GLShaders::g_vertex_shader_hw, GLShaders::g_fragment_shader_hw); + attrib_position = glGetAttribLocation(shader.handle, "vert_position"); + attrib_color = glGetAttribLocation(shader.handle, "vert_color"); + attrib_texcoords = glGetAttribLocation(shader.handle, "vert_texcoords"); + + uniform_alphatest_enabled = glGetUniformLocation(shader.handle, "alphatest_enabled"); + uniform_alphatest_func = glGetUniformLocation(shader.handle, "alphatest_func"); + uniform_alphatest_ref = glGetUniformLocation(shader.handle, "alphatest_ref"); + + uniform_tex = glGetUniformLocation(shader.handle, "tex"); + + uniform_tev_combiner_buffer_color = glGetUniformLocation(shader.handle, "tev_combiner_buffer_color"); + + const auto tev_stages = Pica::registers.GetTevStages(); + for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { + auto& uniform_tev_cfg = uniform_tev_cfgs[tev_stage_index]; + + std::string tev_ref_str = "tev_cfgs[" + std::to_string(tev_stage_index) + "]"; + uniform_tev_cfg.enabled = glGetUniformLocation(shader.handle, (tev_ref_str + ".enabled").c_str()); + uniform_tev_cfg.color_sources = glGetUniformLocation(shader.handle, (tev_ref_str + ".color_sources").c_str()); + uniform_tev_cfg.alpha_sources = glGetUniformLocation(shader.handle, (tev_ref_str + ".alpha_sources").c_str()); + uniform_tev_cfg.color_modifiers = glGetUniformLocation(shader.handle, (tev_ref_str + ".color_modifiers").c_str()); + uniform_tev_cfg.alpha_modifiers = glGetUniformLocation(shader.handle, (tev_ref_str + ".alpha_modifiers").c_str()); + uniform_tev_cfg.color_alpha_op = glGetUniformLocation(shader.handle, (tev_ref_str + ".color_alpha_op").c_str()); + uniform_tev_cfg.color_alpha_multiplier = glGetUniformLocation(shader.handle, (tev_ref_str + ".color_alpha_multiplier").c_str()); + uniform_tev_cfg.const_color = glGetUniformLocation(shader.handle, (tev_ref_str + ".const_color").c_str()); + uniform_tev_cfg.updates_combiner_buffer_color_alpha = glGetUniformLocation(shader.handle, (tev_ref_str + ".updates_combiner_buffer_color_alpha").c_str()); + } + + // Generate VBO and VAO + vertex_buffer.Create(); + vertex_array.Create(); + + // Update OpenGL state + state.draw.vertex_array = vertex_array.handle; + state.draw.vertex_buffer = vertex_buffer.handle; + state.draw.shader_program = shader.handle; + + state.Apply(); + + // Set the texture samplers to correspond to different texture units + glUniform1i(uniform_tex, 0); + glUniform1i(uniform_tex + 1, 1); + glUniform1i(uniform_tex + 2, 2); + + // Set vertex attributes + glVertexAttribPointer(attrib_position, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position)); + glVertexAttribPointer(attrib_color, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, color)); + glVertexAttribPointer(attrib_texcoords, 2, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord0)); + glVertexAttribPointer(attrib_texcoords + 1, 2, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord1)); + glVertexAttribPointer(attrib_texcoords + 2, 2, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord2)); + glEnableVertexAttribArray(attrib_position); + glEnableVertexAttribArray(attrib_color); + glEnableVertexAttribArray(attrib_texcoords); + glEnableVertexAttribArray(attrib_texcoords + 1); + glEnableVertexAttribArray(attrib_texcoords + 2); + + // Create textures for OGL framebuffer that will be rendered to, initially 1x1 to succeed in framebuffer creation + fb_color_texture.texture.Create(); + ReconfigureColorTexture(fb_color_texture, Pica::Regs::ColorFormat::RGBA8, 1, 1); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + + fb_depth_texture.texture.Create(); + ReconfigureDepthTexture(fb_depth_texture, Pica::Regs::DepthFormat::D16, 1, 1); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_FUNC, GL_LEQUAL); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_MODE, GL_NONE); + + // Configure OpenGL framebuffer + framebuffer.Create(); + + state.draw.framebuffer = framebuffer.handle; + + // Unbind texture to allow binding to framebuffer + state.texture_units[0].enabled_2d = true; + state.texture_units[0].texture_2d = 0; + state.Apply(); + + glActiveTexture(GL_TEXTURE0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, fb_color_texture.texture.handle, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, fb_depth_texture.texture.handle, 0); + + ASSERT_MSG(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE, + "OpenGL rasterizer framebuffer setup failed, status %X", glCheckFramebufferStatus(GL_FRAMEBUFFER)); +} + +void RasterizerOpenGL::Reset() { + SyncCullMode(); + SyncBlendEnabled(); + SyncBlendFuncs(); + SyncBlendColor(); + SyncAlphaTest(); + SyncStencilTest(); + SyncDepthTest(); + + // TEV stage 0 + SyncTevSources(0, Pica::registers.tev_stage0); + SyncTevModifiers(0, Pica::registers.tev_stage0); + SyncTevOps(0, Pica::registers.tev_stage0); + SyncTevColor(0, Pica::registers.tev_stage0); + SyncTevMultipliers(0, Pica::registers.tev_stage0); + + // TEV stage 1 + SyncTevSources(1, Pica::registers.tev_stage1); + SyncTevModifiers(1, Pica::registers.tev_stage1); + SyncTevOps(1, Pica::registers.tev_stage1); + SyncTevColor(1, Pica::registers.tev_stage1); + SyncTevMultipliers(1, Pica::registers.tev_stage1); + + // TEV stage 2 + SyncTevSources(2, Pica::registers.tev_stage2); + SyncTevModifiers(2, Pica::registers.tev_stage2); + SyncTevOps(2, Pica::registers.tev_stage2); + SyncTevColor(2, Pica::registers.tev_stage2); + SyncTevMultipliers(2, Pica::registers.tev_stage2); + + // TEV stage 3 + SyncTevSources(3, Pica::registers.tev_stage3); + SyncTevModifiers(3, Pica::registers.tev_stage3); + SyncTevOps(3, Pica::registers.tev_stage3); + SyncTevColor(3, Pica::registers.tev_stage3); + SyncTevMultipliers(3, Pica::registers.tev_stage3); + + // TEV stage 4 + SyncTevSources(4, Pica::registers.tev_stage4); + SyncTevModifiers(4, Pica::registers.tev_stage4); + SyncTevOps(4, Pica::registers.tev_stage4); + SyncTevColor(4, Pica::registers.tev_stage4); + SyncTevMultipliers(4, Pica::registers.tev_stage4); + + // TEV stage 5 + SyncTevSources(5, Pica::registers.tev_stage5); + SyncTevModifiers(5, Pica::registers.tev_stage5); + SyncTevOps(5, Pica::registers.tev_stage5); + SyncTevColor(5, Pica::registers.tev_stage5); + SyncTevMultipliers(5, Pica::registers.tev_stage5); + + SyncCombinerColor(); + SyncCombinerWriteFlags(); + + res_cache.FullFlush(); +} + +void RasterizerOpenGL::AddTriangle(const Pica::VertexShader::OutputVertex& v0, + const Pica::VertexShader::OutputVertex& v1, + const Pica::VertexShader::OutputVertex& v2) { + vertex_batch.push_back(HardwareVertex(v0)); + vertex_batch.push_back(HardwareVertex(v1)); + vertex_batch.push_back(HardwareVertex(v2)); +} + +void RasterizerOpenGL::DrawTriangles() { + SyncFramebuffer(); + SyncDrawState(); + + glBufferData(GL_ARRAY_BUFFER, vertex_batch.size() * sizeof(HardwareVertex), vertex_batch.data(), GL_STREAM_DRAW); + glDrawArrays(GL_TRIANGLES, 0, (GLsizei)vertex_batch.size()); + + vertex_batch.clear(); + + // TODO: Flush the resource cache at the current depth and color framebuffer addresses for render-to-texture +} + +void RasterizerOpenGL::CommitFramebuffer() { + CommitColorBuffer(); + CommitDepthBuffer(); +} + +void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { + if (!Settings::values.use_hw_renderer) + return; + + switch(id) { + // Culling + case PICA_REG_INDEX(cull_mode): + SyncCullMode(); + break; + + // Blending + case PICA_REG_INDEX(output_merger.alphablend_enable): + SyncBlendEnabled(); + break; + case PICA_REG_INDEX(output_merger.alpha_blending): + SyncBlendFuncs(); + break; + case PICA_REG_INDEX(output_merger.blend_const): + SyncBlendColor(); + break; + + // Alpha test + case PICA_REG_INDEX(output_merger.alpha_test): + SyncAlphaTest(); + break; + + // Stencil test + case PICA_REG_INDEX(output_merger.stencil_test): + SyncStencilTest(); + break; + + // Depth test + case PICA_REG_INDEX(output_merger.depth_test_enable): + SyncDepthTest(); + break; + + // TEV stage 0 + case PICA_REG_INDEX(tev_stage0.color_source1): + SyncTevSources(0, Pica::registers.tev_stage0); + break; + case PICA_REG_INDEX(tev_stage0.color_modifier1): + SyncTevModifiers(0, Pica::registers.tev_stage0); + break; + case PICA_REG_INDEX(tev_stage0.color_op): + SyncTevOps(0, Pica::registers.tev_stage0); + break; + case PICA_REG_INDEX(tev_stage0.const_r): + SyncTevColor(0, Pica::registers.tev_stage0); + break; + case PICA_REG_INDEX(tev_stage0.color_scale): + SyncTevMultipliers(0, Pica::registers.tev_stage0); + break; + + // TEV stage 1 + case PICA_REG_INDEX(tev_stage1.color_source1): + SyncTevSources(1, Pica::registers.tev_stage1); + break; + case PICA_REG_INDEX(tev_stage1.color_modifier1): + SyncTevModifiers(1, Pica::registers.tev_stage1); + break; + case PICA_REG_INDEX(tev_stage1.color_op): + SyncTevOps(1, Pica::registers.tev_stage1); + break; + case PICA_REG_INDEX(tev_stage1.const_r): + SyncTevColor(1, Pica::registers.tev_stage1); + break; + case PICA_REG_INDEX(tev_stage1.color_scale): + SyncTevMultipliers(1, Pica::registers.tev_stage1); + break; + + // TEV stage 2 + case PICA_REG_INDEX(tev_stage2.color_source1): + SyncTevSources(2, Pica::registers.tev_stage2); + break; + case PICA_REG_INDEX(tev_stage2.color_modifier1): + SyncTevModifiers(2, Pica::registers.tev_stage2); + break; + case PICA_REG_INDEX(tev_stage2.color_op): + SyncTevOps(2, Pica::registers.tev_stage2); + break; + case PICA_REG_INDEX(tev_stage2.const_r): + SyncTevColor(2, Pica::registers.tev_stage2); + break; + case PICA_REG_INDEX(tev_stage2.color_scale): + SyncTevMultipliers(2, Pica::registers.tev_stage2); + break; + + // TEV stage 3 + case PICA_REG_INDEX(tev_stage3.color_source1): + SyncTevSources(3, Pica::registers.tev_stage3); + break; + case PICA_REG_INDEX(tev_stage3.color_modifier1): + SyncTevModifiers(3, Pica::registers.tev_stage3); + break; + case PICA_REG_INDEX(tev_stage3.color_op): + SyncTevOps(3, Pica::registers.tev_stage3); + break; + case PICA_REG_INDEX(tev_stage3.const_r): + SyncTevColor(3, Pica::registers.tev_stage3); + break; + case PICA_REG_INDEX(tev_stage3.color_scale): + SyncTevMultipliers(3, Pica::registers.tev_stage3); + break; + + // TEV stage 4 + case PICA_REG_INDEX(tev_stage4.color_source1): + SyncTevSources(4, Pica::registers.tev_stage4); + break; + case PICA_REG_INDEX(tev_stage4.color_modifier1): + SyncTevModifiers(4, Pica::registers.tev_stage4); + break; + case PICA_REG_INDEX(tev_stage4.color_op): + SyncTevOps(4, Pica::registers.tev_stage4); + break; + case PICA_REG_INDEX(tev_stage4.const_r): + SyncTevColor(4, Pica::registers.tev_stage4); + break; + case PICA_REG_INDEX(tev_stage4.color_scale): + SyncTevMultipliers(4, Pica::registers.tev_stage4); + break; + + // TEV stage 5 + case PICA_REG_INDEX(tev_stage5.color_source1): + SyncTevSources(5, Pica::registers.tev_stage5); + break; + case PICA_REG_INDEX(tev_stage5.color_modifier1): + SyncTevModifiers(5, Pica::registers.tev_stage5); + break; + case PICA_REG_INDEX(tev_stage5.color_op): + SyncTevOps(5, Pica::registers.tev_stage5); + break; + case PICA_REG_INDEX(tev_stage5.const_r): + SyncTevColor(5, Pica::registers.tev_stage5); + break; + case PICA_REG_INDEX(tev_stage5.color_scale): + SyncTevMultipliers(5, Pica::registers.tev_stage5); + break; + + // TEV combiner buffer color + case PICA_REG_INDEX(tev_combiner_buffer_color): + SyncCombinerColor(); + break; + + // TEV combiner buffer write flags + case PICA_REG_INDEX(tev_combiner_buffer_input): + SyncCombinerWriteFlags(); + break; + } +} + +void RasterizerOpenGL::NotifyPreRead(PAddr addr, u32 size) { + if (!Settings::values.use_hw_renderer) + return; + + PAddr cur_fb_color_addr = Pica::registers.framebuffer.GetColorBufferPhysicalAddress(); + u32 cur_fb_color_size = Pica::Regs::BytesPerColorPixel(Pica::registers.framebuffer.color_format) + * Pica::registers.framebuffer.GetWidth() * Pica::registers.framebuffer.GetHeight(); + + PAddr cur_fb_depth_addr = Pica::registers.framebuffer.GetDepthBufferPhysicalAddress(); + u32 cur_fb_depth_size = Pica::Regs::BytesPerDepthPixel(Pica::registers.framebuffer.depth_format) + * Pica::registers.framebuffer.GetWidth() * Pica::registers.framebuffer.GetHeight(); + + // If source memory region overlaps 3DS framebuffers, commit them before the copy happens + if (MathUtil::IntervalsIntersect(addr, size, cur_fb_color_addr, cur_fb_color_size)) + CommitColorBuffer(); + + if (MathUtil::IntervalsIntersect(addr, size, cur_fb_depth_addr, cur_fb_depth_size)) + CommitDepthBuffer(); +} + +void RasterizerOpenGL::NotifyFlush(PAddr addr, u32 size) { + if (!Settings::values.use_hw_renderer) + return; + + PAddr cur_fb_color_addr = Pica::registers.framebuffer.GetColorBufferPhysicalAddress(); + u32 cur_fb_color_size = Pica::Regs::BytesPerColorPixel(Pica::registers.framebuffer.color_format) + * Pica::registers.framebuffer.GetWidth() * Pica::registers.framebuffer.GetHeight(); + + PAddr cur_fb_depth_addr = Pica::registers.framebuffer.GetDepthBufferPhysicalAddress(); + u32 cur_fb_depth_size = Pica::Regs::BytesPerDepthPixel(Pica::registers.framebuffer.depth_format) + * Pica::registers.framebuffer.GetWidth() * Pica::registers.framebuffer.GetHeight(); + + // If modified memory region overlaps 3DS framebuffers, reload their contents into OpenGL + if (MathUtil::IntervalsIntersect(addr, size, cur_fb_color_addr, cur_fb_color_size)) + ReloadColorBuffer(); + + if (MathUtil::IntervalsIntersect(addr, size, cur_fb_depth_addr, cur_fb_depth_size)) + ReloadDepthBuffer(); + + // Notify cache of flush in case the region touches a cached resource + res_cache.NotifyFlush(addr, size); +} + +void RasterizerOpenGL::ReconfigureColorTexture(TextureInfo& texture, Pica::Regs::ColorFormat format, u32 width, u32 height) { + GLint internal_format; + + texture.format = format; + texture.width = width; + texture.height = height; + + switch (format) { + case Pica::Regs::ColorFormat::RGBA8: + internal_format = GL_RGBA; + texture.gl_format = GL_RGBA; + texture.gl_type = GL_UNSIGNED_INT_8_8_8_8; + break; + + case Pica::Regs::ColorFormat::RGB8: + // This pixel format uses BGR since GL_UNSIGNED_BYTE specifies byte-order, unlike every + // specific OpenGL type used in this function using native-endian (that is, little-endian + // mostly everywhere) for words or half-words. + // TODO: check how those behave on big-endian processors. + internal_format = GL_RGB; + texture.gl_format = GL_BGR; + texture.gl_type = GL_UNSIGNED_BYTE; + break; + + case Pica::Regs::ColorFormat::RGB5A1: + internal_format = GL_RGBA; + texture.gl_format = GL_RGBA; + texture.gl_type = GL_UNSIGNED_SHORT_5_5_5_1; + break; + + case Pica::Regs::ColorFormat::RGB565: + internal_format = GL_RGB; + texture.gl_format = GL_RGB; + texture.gl_type = GL_UNSIGNED_SHORT_5_6_5; + break; + + case Pica::Regs::ColorFormat::RGBA4: + internal_format = GL_RGBA; + texture.gl_format = GL_RGBA; + texture.gl_type = GL_UNSIGNED_SHORT_4_4_4_4; + break; + + default: + LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer texture color format %x", format); + UNIMPLEMENTED(); + break; + } + + state.texture_units[0].enabled_2d = true; + state.texture_units[0].texture_2d = texture.texture.handle; + state.Apply(); + + glActiveTexture(GL_TEXTURE0); + glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0, + texture.gl_format, texture.gl_type, nullptr); +} + +void RasterizerOpenGL::ReconfigureDepthTexture(DepthTextureInfo& texture, Pica::Regs::DepthFormat format, u32 width, u32 height) { + GLint internal_format; + + texture.format = format; + texture.width = width; + texture.height = height; + + switch (format) { + case Pica::Regs::DepthFormat::D16: + internal_format = GL_DEPTH_COMPONENT16; + texture.gl_format = GL_DEPTH_COMPONENT; + texture.gl_type = GL_UNSIGNED_SHORT; + break; + + case Pica::Regs::DepthFormat::D24: + internal_format = GL_DEPTH_COMPONENT24; + texture.gl_format = GL_DEPTH_COMPONENT; + texture.gl_type = GL_UNSIGNED_INT_24_8; + break; + + case Pica::Regs::DepthFormat::D24S8: + internal_format = GL_DEPTH24_STENCIL8; + texture.gl_format = GL_DEPTH_STENCIL; + texture.gl_type = GL_UNSIGNED_INT_24_8; + break; + + default: + LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer texture depth format %x", format); + UNIMPLEMENTED(); + break; + } + + state.texture_units[0].enabled_2d = true; + state.texture_units[0].texture_2d = texture.texture.handle; + state.Apply(); + + glActiveTexture(GL_TEXTURE0); + glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0, + texture.gl_format, texture.gl_type, nullptr); +} + +void RasterizerOpenGL::SyncFramebuffer() { + PAddr cur_fb_color_addr = Pica::registers.framebuffer.GetColorBufferPhysicalAddress(); + Pica::Regs::ColorFormat new_fb_color_format = Pica::registers.framebuffer.color_format; + + PAddr cur_fb_depth_addr = Pica::registers.framebuffer.GetDepthBufferPhysicalAddress(); + Pica::Regs::DepthFormat new_fb_depth_format = Pica::registers.framebuffer.depth_format; + + bool fb_size_changed = fb_color_texture.width != Pica::registers.framebuffer.GetWidth() || + fb_color_texture.height != Pica::registers.framebuffer.GetHeight(); + + bool color_fb_prop_changed = fb_color_texture.format != new_fb_color_format || + fb_size_changed; + + bool depth_fb_prop_changed = fb_depth_texture.format != new_fb_depth_format || + fb_size_changed; + + bool color_fb_modified = last_fb_color_addr != cur_fb_color_addr || + color_fb_prop_changed; + + bool depth_fb_modified = last_fb_depth_addr != cur_fb_depth_addr || + depth_fb_prop_changed; + + // Commit if framebuffer modified in any way + if (color_fb_modified) + CommitColorBuffer(); + + if (depth_fb_modified) + CommitDepthBuffer(); + + // Reconfigure framebuffer textures if any property has changed + if (color_fb_prop_changed) { + ReconfigureColorTexture(fb_color_texture, new_fb_color_format, + Pica::registers.framebuffer.GetWidth(), Pica::registers.framebuffer.GetHeight()); + } + + if (depth_fb_prop_changed) { + ReconfigureDepthTexture(fb_depth_texture, new_fb_depth_format, + Pica::registers.framebuffer.GetWidth(), Pica::registers.framebuffer.GetHeight()); + + // Only attach depth buffer as stencil if it supports stencil + switch (new_fb_depth_format) { + case Pica::Regs::DepthFormat::D16: + case Pica::Regs::DepthFormat::D24: + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + break; + + case Pica::Regs::DepthFormat::D24S8: + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, fb_depth_texture.texture.handle, 0); + break; + + default: + LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer depth format %x", new_fb_depth_format); + UNIMPLEMENTED(); + break; + } + } + + // Load buffer data again if fb modified in any way + if (color_fb_modified) { + last_fb_color_addr = cur_fb_color_addr; + + ReloadColorBuffer(); + } + + if (depth_fb_modified) { + last_fb_depth_addr = cur_fb_depth_addr; + + ReloadDepthBuffer(); + } +} + +void RasterizerOpenGL::SyncCullMode() { + switch (Pica::registers.cull_mode) { + case Pica::Regs::CullMode::KeepAll: + state.cull.enabled = false; + break; + + case Pica::Regs::CullMode::KeepClockWise: + state.cull.enabled = true; + state.cull.mode = GL_BACK; + break; + + case Pica::Regs::CullMode::KeepCounterClockWise: + state.cull.enabled = true; + state.cull.mode = GL_FRONT; + break; + + default: + LOG_CRITICAL(Render_OpenGL, "Unknown cull mode %d", Pica::registers.cull_mode.Value()); + UNIMPLEMENTED(); + break; + } +} + +void RasterizerOpenGL::SyncBlendEnabled() { + state.blend.enabled = Pica::registers.output_merger.alphablend_enable; +} + +void RasterizerOpenGL::SyncBlendFuncs() { + state.blend.src_rgb_func = PicaToGL::BlendFunc(Pica::registers.output_merger.alpha_blending.factor_source_rgb); + state.blend.dst_rgb_func = PicaToGL::BlendFunc(Pica::registers.output_merger.alpha_blending.factor_dest_rgb); + state.blend.src_a_func = PicaToGL::BlendFunc(Pica::registers.output_merger.alpha_blending.factor_source_a); + state.blend.dst_a_func = PicaToGL::BlendFunc(Pica::registers.output_merger.alpha_blending.factor_dest_a); +} + +void RasterizerOpenGL::SyncBlendColor() { + auto blend_color = PicaToGL::ColorRGBA8((u8*)&Pica::registers.output_merger.blend_const.r); + state.blend.color.red = blend_color[0]; + state.blend.color.green = blend_color[1]; + state.blend.color.blue = blend_color[2]; + state.blend.color.alpha = blend_color[3]; +} + +void RasterizerOpenGL::SyncAlphaTest() { + glUniform1i(uniform_alphatest_enabled, Pica::registers.output_merger.alpha_test.enable); + glUniform1i(uniform_alphatest_func, Pica::registers.output_merger.alpha_test.func); + glUniform1f(uniform_alphatest_ref, Pica::registers.output_merger.alpha_test.ref / 255.0f); +} + +void RasterizerOpenGL::SyncStencilTest() { + // TODO: Implement stencil test, mask, and op +} + +void RasterizerOpenGL::SyncDepthTest() { + state.depth.test_enabled = Pica::registers.output_merger.depth_test_enable; + state.depth.test_func = PicaToGL::CompareFunc(Pica::registers.output_merger.depth_test_func); + state.depth.write_mask = Pica::registers.output_merger.depth_write_enable ? GL_TRUE : GL_FALSE; +} + +void RasterizerOpenGL::SyncTevSources(unsigned stage_index, const Pica::Regs::TevStageConfig& config) { + GLint color_srcs[3] = { (GLint)config.color_source1.Value(), + (GLint)config.color_source2.Value(), + (GLint)config.color_source3.Value() }; + GLint alpha_srcs[3] = { (GLint)config.alpha_source1.Value(), + (GLint)config.alpha_source2.Value(), + (GLint)config.alpha_source3.Value() }; + + glUniform3iv(uniform_tev_cfgs[stage_index].color_sources, 1, color_srcs); + glUniform3iv(uniform_tev_cfgs[stage_index].alpha_sources, 1, alpha_srcs); +} + +void RasterizerOpenGL::SyncTevModifiers(unsigned stage_index, const Pica::Regs::TevStageConfig& config) { + GLint color_mods[3] = { (GLint)config.color_modifier1.Value(), + (GLint)config.color_modifier2.Value(), + (GLint)config.color_modifier3.Value() }; + GLint alpha_mods[3] = { (GLint)config.alpha_modifier1.Value(), + (GLint)config.alpha_modifier2.Value(), + (GLint)config.alpha_modifier3.Value() }; + + glUniform3iv(uniform_tev_cfgs[stage_index].color_modifiers, 1, color_mods); + glUniform3iv(uniform_tev_cfgs[stage_index].alpha_modifiers, 1, alpha_mods); +} + +void RasterizerOpenGL::SyncTevOps(unsigned stage_index, const Pica::Regs::TevStageConfig& config) { + glUniform2i(uniform_tev_cfgs[stage_index].color_alpha_op, (GLint)config.color_op.Value(), (GLint)config.alpha_op.Value()); +} + +void RasterizerOpenGL::SyncTevColor(unsigned stage_index, const Pica::Regs::TevStageConfig& config) { + auto const_color = PicaToGL::ColorRGBA8((u8*)&config.const_r); + glUniform4fv(uniform_tev_cfgs[stage_index].const_color, 1, const_color.data()); +} + +void RasterizerOpenGL::SyncTevMultipliers(unsigned stage_index, const Pica::Regs::TevStageConfig& config) { + glUniform2i(uniform_tev_cfgs[stage_index].color_alpha_multiplier, config.GetColorMultiplier(), config.GetAlphaMultiplier()); +} + +void RasterizerOpenGL::SyncCombinerColor() { + auto combiner_color = PicaToGL::ColorRGBA8((u8*)&Pica::registers.tev_combiner_buffer_color.r); + glUniform4fv(uniform_tev_combiner_buffer_color, 1, combiner_color.data()); +} + +void RasterizerOpenGL::SyncCombinerWriteFlags() { + const auto tev_stages = Pica::registers.GetTevStages(); + for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { + glUniform2i(uniform_tev_cfgs[tev_stage_index].updates_combiner_buffer_color_alpha, + Pica::registers.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(tev_stage_index), + Pica::registers.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(tev_stage_index)); + } +} + +void RasterizerOpenGL::SyncDrawState() { + // Sync the viewport + GLsizei viewport_width = (GLsizei)Pica::float24::FromRawFloat24(Pica::registers.viewport_size_x).ToFloat32() * 2; + GLsizei viewport_height = (GLsizei)Pica::float24::FromRawFloat24(Pica::registers.viewport_size_y).ToFloat32() * 2; + + // OpenGL uses different y coordinates, so negate corner offset and flip origin + // TODO: Ensure viewport_corner.x should not be negated or origin flipped + // TODO: Use floating-point viewports for accuracy if supported + glViewport((GLsizei)static_cast(Pica::registers.viewport_corner.x), + -(GLsizei)static_cast(Pica::registers.viewport_corner.y) + + Pica::registers.framebuffer.GetHeight() - viewport_height, + viewport_width, viewport_height); + + // Sync bound texture(s), upload if not cached + const auto pica_textures = Pica::registers.GetTextures(); + for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) { + const auto& texture = pica_textures[texture_index]; + + if (texture.enabled) { + state.texture_units[texture_index].enabled_2d = true; + res_cache.LoadAndBindTexture(state, texture_index, texture); + } else { + state.texture_units[texture_index].enabled_2d = false; + } + } + + // Skip processing TEV stages that simply pass the previous stage results through + const auto tev_stages = Pica::registers.GetTevStages(); + for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { + glUniform1i(uniform_tev_cfgs[tev_stage_index].enabled, !IsPassThroughTevStage(tev_stages[tev_stage_index])); + } + + state.Apply(); +} + +void RasterizerOpenGL::ReloadColorBuffer() { + u8* color_buffer = Memory::GetPhysicalPointer(Pica::registers.framebuffer.GetColorBufferPhysicalAddress()); + + if (color_buffer == nullptr) + return; + + u32 bytes_per_pixel = Pica::Regs::BytesPerColorPixel(fb_color_texture.format); + + std::unique_ptr temp_fb_color_buffer(new u8[fb_color_texture.width * fb_color_texture.height * bytes_per_pixel]); + + // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion is necessary. + for (int y = 0; y < fb_color_texture.height; ++y) { + for (int x = 0; x < fb_color_texture.width; ++x) { + const u32 coarse_y = y & ~7; + u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_color_texture.width * bytes_per_pixel; + u32 gl_px_idx = x * bytes_per_pixel + y * fb_color_texture.width * bytes_per_pixel; + + u8* pixel = color_buffer + dst_offset; + memcpy(&temp_fb_color_buffer[gl_px_idx], pixel, bytes_per_pixel); + } + } + + state.texture_units[0].enabled_2d = true; + state.texture_units[0].texture_2d = fb_color_texture.texture.handle; + state.Apply(); + + glActiveTexture(GL_TEXTURE0); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, fb_color_texture.width, fb_color_texture.height, + fb_color_texture.gl_format, fb_color_texture.gl_type, temp_fb_color_buffer.get()); +} + +void RasterizerOpenGL::ReloadDepthBuffer() { + // TODO: Appears to work, but double-check endianness of depth values and order of depth-stencil + u8* depth_buffer = Memory::GetPhysicalPointer(Pica::registers.framebuffer.GetDepthBufferPhysicalAddress()); + + if (depth_buffer == nullptr) { + return; + } + + u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format); + + // OpenGL needs 4 bpp alignment for D24 + u32 gl_bpp = bytes_per_pixel == 3 ? 4 : bytes_per_pixel; + + std::unique_ptr temp_fb_depth_buffer(new u8[fb_depth_texture.width * fb_depth_texture.height * gl_bpp]); + + for (int y = 0; y < fb_depth_texture.height; ++y) { + for (int x = 0; x < fb_depth_texture.width; ++x) { + const u32 coarse_y = y & ~7; + u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel; + u32 gl_px_idx = x + y * fb_depth_texture.width; + + switch (fb_depth_texture.format) { + case Pica::Regs::DepthFormat::D16: + ((u16*)temp_fb_depth_buffer.get())[gl_px_idx] = Color::DecodeD16(depth_buffer + dst_offset); + break; + case Pica::Regs::DepthFormat::D24: + ((u32*)temp_fb_depth_buffer.get())[gl_px_idx] = Color::DecodeD24(depth_buffer + dst_offset); + break; + case Pica::Regs::DepthFormat::D24S8: + { + Math::Vec2 depth_stencil = Color::DecodeD24S8(depth_buffer + dst_offset); + ((u32*)temp_fb_depth_buffer.get())[gl_px_idx] = (depth_stencil.x << 8) | depth_stencil.y; + break; + } + default: + LOG_CRITICAL(Render_OpenGL, "Unknown memory framebuffer depth format %x", fb_depth_texture.format); + UNIMPLEMENTED(); + break; + } + } + } + + state.texture_units[0].enabled_2d = true; + state.texture_units[0].texture_2d = fb_depth_texture.texture.handle; + state.Apply(); + + glActiveTexture(GL_TEXTURE0); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, fb_depth_texture.width, fb_depth_texture.height, + fb_depth_texture.gl_format, fb_depth_texture.gl_type, temp_fb_depth_buffer.get()); +} + +void RasterizerOpenGL::CommitColorBuffer() { + if (last_fb_color_addr != 0) { + u8* color_buffer = Memory::GetPhysicalPointer(last_fb_color_addr); + + if (color_buffer != nullptr) { + u32 bytes_per_pixel = Pica::Regs::BytesPerColorPixel(fb_color_texture.format); + + std::unique_ptr temp_gl_color_buffer(new u8[fb_color_texture.width * fb_color_texture.height * bytes_per_pixel]); + + state.texture_units[0].enabled_2d = true; + state.texture_units[0].texture_2d = fb_color_texture.texture.handle; + state.Apply(); + + glActiveTexture(GL_TEXTURE0); + glGetTexImage(GL_TEXTURE_2D, 0, fb_color_texture.gl_format, fb_color_texture.gl_type, temp_gl_color_buffer.get()); + + // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion is necessary. + for (int y = 0; y < fb_color_texture.height; ++y) { + for (int x = 0; x < fb_color_texture.width; ++x) { + const u32 coarse_y = y & ~7; + u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_color_texture.width * bytes_per_pixel; + u32 gl_px_idx = x * bytes_per_pixel + y * fb_color_texture.width * bytes_per_pixel; + + u8* pixel = color_buffer + dst_offset; + memcpy(pixel, &temp_gl_color_buffer[gl_px_idx], bytes_per_pixel); + } + } + } + } +} + +void RasterizerOpenGL::CommitDepthBuffer() { + if (last_fb_depth_addr != 0) { + // TODO: Output seems correct visually, but doesn't quite match sw renderer output. One of them is wrong. + u8* depth_buffer = Memory::GetPhysicalPointer(last_fb_depth_addr); + + if (depth_buffer != nullptr) { + u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format); + + // OpenGL needs 4 bpp alignment for D24 + u32 gl_bpp = bytes_per_pixel == 3 ? 4 : bytes_per_pixel; + + std::unique_ptr temp_gl_depth_buffer(new u8[fb_depth_texture.width * fb_depth_texture.height * gl_bpp]); + + state.texture_units[0].enabled_2d = true; + state.texture_units[0].texture_2d = fb_depth_texture.texture.handle; + state.Apply(); + + glActiveTexture(GL_TEXTURE0); + glGetTexImage(GL_TEXTURE_2D, 0, fb_depth_texture.gl_format, fb_depth_texture.gl_type, temp_gl_depth_buffer.get()); + + for (int y = 0; y < fb_depth_texture.height; ++y) { + for (int x = 0; x < fb_depth_texture.width; ++x) { + const u32 coarse_y = y & ~7; + u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel; + u32 gl_px_idx = x + y * fb_depth_texture.width; + + switch (fb_depth_texture.format) { + case Pica::Regs::DepthFormat::D16: + Color::EncodeD16(((u16*)temp_gl_depth_buffer.get())[gl_px_idx], depth_buffer + dst_offset); + break; + case Pica::Regs::DepthFormat::D24: + Color::EncodeD24(((u32*)temp_gl_depth_buffer.get())[gl_px_idx], depth_buffer + dst_offset); + break; + case Pica::Regs::DepthFormat::D24S8: + { + u32 depth_stencil = ((u32*)temp_gl_depth_buffer.get())[gl_px_idx]; + Color::EncodeD24S8((depth_stencil >> 8), depth_stencil & 0xFF, depth_buffer + dst_offset); + break; + } + default: + LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer depth format %x", fb_depth_texture.format); + UNIMPLEMENTED(); + break; + } + } + } + } + } +} diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h new file mode 100644 index 000000000..9896f8d04 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -0,0 +1,207 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "video_core/hwrasterizer_base.h" + +#include "gl_state.h" +#include "gl_rasterizer_cache.h" + +class RasterizerOpenGL : public HWRasterizer { +public: + + RasterizerOpenGL(); + ~RasterizerOpenGL() override; + + /// Initialize API-specific GPU objects + void InitObjects() override; + + /// Reset the rasterizer, such as flushing all caches and updating all state + void Reset() override; + + /// Queues the primitive formed by the given vertices for rendering + void AddTriangle(const Pica::VertexShader::OutputVertex& v0, + const Pica::VertexShader::OutputVertex& v1, + const Pica::VertexShader::OutputVertex& v2) override; + + /// Draw the current batch of triangles + void DrawTriangles() override; + + /// Commit the rasterizer's framebuffer contents immediately to the current 3DS memory framebuffer + void CommitFramebuffer() override; + + /// Notify rasterizer that the specified PICA register has been changed + void NotifyPicaRegisterChanged(u32 id) override; + + /// Notify rasterizer that the specified 3DS memory region will be read from after this notification + void NotifyPreRead(PAddr addr, u32 size) override; + + /// Notify rasterizer that a 3DS memory region has been changed + void NotifyFlush(PAddr addr, u32 size) override; + +private: + /// Structure used for managing texture environment states + struct TEVConfigUniforms { + GLuint enabled; + GLuint color_sources; + GLuint alpha_sources; + GLuint color_modifiers; + GLuint alpha_modifiers; + GLuint color_alpha_op; + GLuint color_alpha_multiplier; + GLuint const_color; + GLuint updates_combiner_buffer_color_alpha; + }; + + /// Structure used for storing information about color textures + struct TextureInfo { + OGLTexture texture; + GLsizei width; + GLsizei height; + Pica::Regs::ColorFormat format; + GLenum gl_format; + GLenum gl_type; + }; + + /// Structure used for storing information about depth textures + struct DepthTextureInfo { + OGLTexture texture; + GLsizei width; + GLsizei height; + Pica::Regs::DepthFormat format; + GLenum gl_format; + GLenum gl_type; + }; + + /// Structure that the hardware rendered vertices are composed of + struct HardwareVertex { + HardwareVertex(const Pica::VertexShader::OutputVertex& v) { + position[0] = v.pos.x.ToFloat32(); + position[1] = v.pos.y.ToFloat32(); + position[2] = v.pos.z.ToFloat32(); + position[3] = v.pos.w.ToFloat32(); + color[0] = v.color.x.ToFloat32(); + color[1] = v.color.y.ToFloat32(); + color[2] = v.color.z.ToFloat32(); + color[3] = v.color.w.ToFloat32(); + tex_coord0[0] = v.tc0.x.ToFloat32(); + tex_coord0[1] = v.tc0.y.ToFloat32(); + tex_coord1[0] = v.tc1.x.ToFloat32(); + tex_coord1[1] = v.tc1.y.ToFloat32(); + tex_coord2[0] = v.tc2.x.ToFloat32(); + tex_coord2[1] = v.tc2.y.ToFloat32(); + } + + GLfloat position[4]; + GLfloat color[4]; + GLfloat tex_coord0[2]; + GLfloat tex_coord1[2]; + GLfloat tex_coord2[2]; + }; + + /// Reconfigure the OpenGL color texture to use the given format and dimensions + void ReconfigureColorTexture(TextureInfo& texture, Pica::Regs::ColorFormat format, u32 width, u32 height); + + /// Reconfigure the OpenGL depth texture to use the given format and dimensions + void ReconfigureDepthTexture(DepthTextureInfo& texture, Pica::Regs::DepthFormat format, u32 width, u32 height); + + /// Syncs the state and contents of the OpenGL framebuffer to match the current PICA framebuffer + void SyncFramebuffer(); + + /// Syncs the cull mode to match the PICA register + void SyncCullMode(); + + /// Syncs the blend enabled status to match the PICA register + void SyncBlendEnabled(); + + /// Syncs the blend functions to match the PICA register + void SyncBlendFuncs(); + + /// Syncs the blend color to match the PICA register + void SyncBlendColor(); + + /// Syncs the alpha test states to match the PICA register + void SyncAlphaTest(); + + /// Syncs the stencil test states to match the PICA register + void SyncStencilTest(); + + /// Syncs the depth test states to match the PICA register + void SyncDepthTest(); + + /// Syncs the specified TEV stage's color and alpha sources to match the PICA register + void SyncTevSources(unsigned stage_index, const Pica::Regs::TevStageConfig& config); + + /// Syncs the specified TEV stage's color and alpha modifiers to match the PICA register + void SyncTevModifiers(unsigned stage_index, const Pica::Regs::TevStageConfig& config); + + /// Syncs the specified TEV stage's color and alpha combiner operations to match the PICA register + void SyncTevOps(unsigned stage_index, const Pica::Regs::TevStageConfig& config); + + /// Syncs the specified TEV stage's constant color to match the PICA register + void SyncTevColor(unsigned stage_index, const Pica::Regs::TevStageConfig& config); + + /// Syncs the specified TEV stage's color and alpha multipliers to match the PICA register + void SyncTevMultipliers(unsigned stage_index, const Pica::Regs::TevStageConfig& config); + + /// Syncs the TEV combiner color buffer to match the PICA register + void SyncCombinerColor(); + + /// Syncs the TEV combiner write flags to match the PICA register + void SyncCombinerWriteFlags(); + + /// Syncs the remaining OpenGL drawing state to match the current PICA state + void SyncDrawState(); + + /// Copies the 3DS color framebuffer into the OpenGL color framebuffer texture + void ReloadColorBuffer(); + + /// Copies the 3DS depth framebuffer into the OpenGL depth framebuffer texture + void ReloadDepthBuffer(); + + /** + * Save the current OpenGL color framebuffer to the current PICA framebuffer in 3DS memory + * Loads the OpenGL framebuffer textures into temporary buffers + * Then copies into the 3DS framebuffer using proper Morton order + */ + void CommitColorBuffer(); + + /** + * Save the current OpenGL depth framebuffer to the current PICA framebuffer in 3DS memory + * Loads the OpenGL framebuffer textures into temporary buffers + * Then copies into the 3DS framebuffer using proper Morton order + */ + void CommitDepthBuffer(); + + RasterizerCacheOpenGL res_cache; + + std::vector vertex_batch; + + OpenGLState state; + + PAddr last_fb_color_addr; + PAddr last_fb_depth_addr; + + // Hardware rasterizer + TextureInfo fb_color_texture; + DepthTextureInfo fb_depth_texture; + OGLShader shader; + OGLVertexArray vertex_array; + OGLBuffer vertex_buffer; + OGLFramebuffer framebuffer; + + // Hardware vertex shader + GLuint attrib_position; + GLuint attrib_color; + GLuint attrib_texcoords; + + // Hardware fragment shader + GLuint uniform_alphatest_enabled; + GLuint uniform_alphatest_func; + GLuint uniform_alphatest_ref; + GLuint uniform_tex; + GLuint uniform_tev_combiner_buffer_color; + TEVConfigUniforms uniform_tev_cfgs[6]; +}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp new file mode 100644 index 000000000..6f88a8b21 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -0,0 +1,77 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/make_unique.h" +#include "common/math_util.h" + +#include "core/memory.h" + +#include "video_core/renderer_opengl/gl_rasterizer_cache.h" +#include "video_core/renderer_opengl/pica_to_gl.h" +#include "video_core/debug_utils/debug_utils.h" +#include "video_core/math.h" + +RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { + FullFlush(); +} + +void RasterizerCacheOpenGL::LoadAndBindTexture(OpenGLState &state, unsigned texture_unit, const Pica::Regs::FullTextureConfig& config) { + PAddr texture_addr = config.config.GetPhysicalAddress(); + + const auto cached_texture = texture_cache.find(texture_addr); + + if (cached_texture != texture_cache.end()) { + state.texture_units[texture_unit].texture_2d = cached_texture->second->texture.handle; + state.Apply(); + } else { + std::unique_ptr new_texture = Common::make_unique(); + + new_texture->texture.Create(); + state.texture_units[texture_unit].texture_2d = new_texture->texture.handle; + state.Apply(); + + // TODO: Need to choose filters that correspond to PICA once register is declared + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, PicaToGL::WrapMode(config.config.wrap_s)); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, PicaToGL::WrapMode(config.config.wrap_t)); + + const auto info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config.config, config.format); + + new_texture->width = info.width; + new_texture->height = info.height; + new_texture->size = info.width * info.height * Pica::Regs::NibblesPerPixel(info.format); + + u8* texture_src_data = Memory::GetPhysicalPointer(texture_addr); + std::unique_ptr[]> temp_texture_buffer_rgba(new Math::Vec4[info.width * info.height]); + + for (int y = 0; y < info.height; ++y) { + for (int x = 0; x < info.width; ++x) { + temp_texture_buffer_rgba[x + info.width * y] = Pica::DebugUtils::LookupTexture(texture_src_data, x, info.height - 1 - y, info); + } + } + + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, info.width, info.height, 0, GL_RGBA, GL_UNSIGNED_BYTE, temp_texture_buffer_rgba.get()); + + texture_cache.emplace(texture_addr, std::move(new_texture)); + } +} + +void RasterizerCacheOpenGL::NotifyFlush(PAddr addr, u32 size) { + // Flush any texture that falls in the flushed region + // TODO: Optimize by also inserting upper bound (addr + size) of each texture into the same map and also narrow using lower_bound + auto cache_upper_bound = texture_cache.upper_bound(addr + size); + for (auto it = texture_cache.begin(); it != cache_upper_bound;) { + if (MathUtil::IntervalsIntersect(addr, size, it->first, it->second->size)) { + it = texture_cache.erase(it); + } else { + ++it; + } + } +} + +void RasterizerCacheOpenGL::FullFlush() { + texture_cache.clear(); +} diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h new file mode 100644 index 000000000..96f3a925c --- /dev/null +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -0,0 +1,36 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "gl_state.h" +#include "gl_resource_manager.h" +#include "video_core/pica.h" + +#include +#include + +class RasterizerCacheOpenGL : NonCopyable { +public: + ~RasterizerCacheOpenGL(); + + /// Loads a texture from 3DS memory to OpenGL and caches it (if not already cached) + void LoadAndBindTexture(OpenGLState &state, unsigned texture_unit, const Pica::Regs::FullTextureConfig& config); + + /// Flush any cached resource that touches the flushed region + void NotifyFlush(PAddr addr, u32 size); + + /// Flush all cached OpenGL resources tracked by this cache manager + void FullFlush(); + +private: + struct CachedTexture { + OGLTexture texture; + GLuint width; + GLuint height; + u32 size; + }; + + std::map> texture_cache; +}; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp new file mode 100644 index 000000000..8f4ae28a4 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -0,0 +1,111 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_shader_util.h" + +// Textures +OGLTexture::OGLTexture() : handle(0) { +} + +OGLTexture::~OGLTexture() { + Release(); +} + +void OGLTexture::Create() { + if (handle != 0) { + return; + } + + glGenTextures(1, &handle); +} + +void OGLTexture::Release() { + glDeleteTextures(1, &handle); + handle = 0; +} + +// Shaders +OGLShader::OGLShader() : handle(0) { +} + +OGLShader::~OGLShader() { + Release(); +} + +void OGLShader::Create(const char* vert_shader, const char* frag_shader) { + if (handle != 0) { + return; + } + + handle = ShaderUtil::LoadShaders(vert_shader, frag_shader); +} + +void OGLShader::Release() { + glDeleteProgram(handle); + handle = 0; +} + +// Buffer objects +OGLBuffer::OGLBuffer() : handle(0) { +} + +OGLBuffer::~OGLBuffer() { + Release(); +} + +void OGLBuffer::Create() { + if (handle != 0) { + return; + } + + glGenBuffers(1, &handle); +} + +void OGLBuffer::Release() { + glDeleteBuffers(1, &handle); + handle = 0; +} + +// Vertex array objects +OGLVertexArray::OGLVertexArray() : handle(0) { +} + +OGLVertexArray::~OGLVertexArray() { + Release(); +} + +void OGLVertexArray::Create() { + if (handle != 0) { + return; + } + + glGenVertexArrays(1, &handle); +} + +void OGLVertexArray::Release() { + glDeleteVertexArrays(1, &handle); + handle = 0; +} + +// Framebuffers +OGLFramebuffer::OGLFramebuffer() : handle(0) { +} + +OGLFramebuffer::~OGLFramebuffer() { + Release(); +} + +void OGLFramebuffer::Create() { + if (handle != 0) { + return; + } + + glGenFramebuffers(1, &handle); +} + +void OGLFramebuffer::Release() { + glDeleteFramebuffers(1, &handle); + handle = 0; +} diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h new file mode 100644 index 000000000..975720d0a --- /dev/null +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -0,0 +1,79 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +#include "generated/gl_3_2_core.h" + +class OGLTexture : public NonCopyable { +public: + OGLTexture(); + ~OGLTexture(); + + /// Creates a new internal OpenGL resource and stores the handle + void Create(); + + /// Deletes the internal OpenGL resource + void Release(); + + GLuint handle; +}; + +class OGLShader : public NonCopyable { +public: + OGLShader(); + ~OGLShader(); + + /// Creates a new internal OpenGL resource and stores the handle + void Create(const char* vert_shader, const char* frag_shader); + + /// Deletes the internal OpenGL resource + void Release(); + + GLuint handle; +}; + +class OGLBuffer : public NonCopyable { +public: + OGLBuffer(); + ~OGLBuffer(); + + /// Creates a new internal OpenGL resource and stores the handle + void Create(); + + /// Deletes the internal OpenGL resource + void Release(); + + GLuint handle; +}; + +class OGLVertexArray : public NonCopyable { +public: + OGLVertexArray(); + ~OGLVertexArray(); + + /// Creates a new internal OpenGL resource and stores the handle + void Create(); + + /// Deletes the internal OpenGL resource + void Release(); + + GLuint handle; +}; + +class OGLFramebuffer : public NonCopyable { +public: + OGLFramebuffer(); + ~OGLFramebuffer(); + + /// Creates a new internal OpenGL resource and stores the handle + void Create(); + + /// Deletes the internal OpenGL resource + void Release(); + + GLuint handle; +}; diff --git a/src/video_core/renderer_opengl/gl_shaders.h b/src/video_core/renderer_opengl/gl_shaders.h index 746a37afe..8f0941230 100644 --- a/src/video_core/renderer_opengl/gl_shaders.h +++ b/src/video_core/renderer_opengl/gl_shaders.h @@ -42,4 +42,292 @@ void main() { } )"; +const char g_vertex_shader_hw[] = R"( +#version 150 core + +#define NUM_VTX_ATTR 7 + +in vec4 vert_position; +in vec4 vert_color; +in vec2 vert_texcoords[3]; + +out vec4 o[NUM_VTX_ATTR]; + +void main() { + o[2] = vert_color; + o[3] = vec4(vert_texcoords[0].xy, vert_texcoords[1].xy); + o[5] = vec4(0.0, 0.0, vert_texcoords[2].xy); + + gl_Position = vec4(vert_position.x, -vert_position.y, -vert_position.z, vert_position.w); +} +)"; + +// TODO: Create a shader constructor and cache that builds this program with minimal conditionals instead of using tev_cfg uniforms +const char g_fragment_shader_hw[] = R"( +#version 150 core + +#define NUM_VTX_ATTR 7 +#define NUM_TEV_STAGES 6 + +#define SOURCE_PRIMARYCOLOR 0x0 +#define SOURCE_PRIMARYFRAGMENTCOLOR 0x1 +#define SOURCE_TEXTURE0 0x3 +#define SOURCE_TEXTURE1 0x4 +#define SOURCE_TEXTURE2 0x5 +#define SOURCE_TEXTURE3 0x6 +#define SOURCE_PREVIOUSBUFFER 0xd +#define SOURCE_CONSTANT 0xe +#define SOURCE_PREVIOUS 0xf + +#define COLORMODIFIER_SOURCECOLOR 0x0 +#define COLORMODIFIER_ONEMINUSSOURCECOLOR 0x1 +#define COLORMODIFIER_SOURCEALPHA 0x2 +#define COLORMODIFIER_ONEMINUSSOURCEALPHA 0x3 +#define COLORMODIFIER_SOURCERED 0x4 +#define COLORMODIFIER_ONEMINUSSOURCERED 0x5 +#define COLORMODIFIER_SOURCEGREEN 0x8 +#define COLORMODIFIER_ONEMINUSSOURCEGREEN 0x9 +#define COLORMODIFIER_SOURCEBLUE 0xc +#define COLORMODIFIER_ONEMINUSSOURCEBLUE 0xd + +#define ALPHAMODIFIER_SOURCEALPHA 0x0 +#define ALPHAMODIFIER_ONEMINUSSOURCEALPHA 0x1 +#define ALPHAMODIFIER_SOURCERED 0x2 +#define ALPHAMODIFIER_ONEMINUSSOURCERED 0x3 +#define ALPHAMODIFIER_SOURCEGREEN 0x4 +#define ALPHAMODIFIER_ONEMINUSSOURCEGREEN 0x5 +#define ALPHAMODIFIER_SOURCEBLUE 0x6 +#define ALPHAMODIFIER_ONEMINUSSOURCEBLUE 0x7 + +#define OPERATION_REPLACE 0 +#define OPERATION_MODULATE 1 +#define OPERATION_ADD 2 +#define OPERATION_ADDSIGNED 3 +#define OPERATION_LERP 4 +#define OPERATION_SUBTRACT 5 +#define OPERATION_MULTIPLYTHENADD 8 +#define OPERATION_ADDTHENMULTIPLY 9 + +#define COMPAREFUNC_NEVER 0 +#define COMPAREFUNC_ALWAYS 1 +#define COMPAREFUNC_EQUAL 2 +#define COMPAREFUNC_NOTEQUAL 3 +#define COMPAREFUNC_LESSTHAN 4 +#define COMPAREFUNC_LESSTHANOREQUAL 5 +#define COMPAREFUNC_GREATERTHAN 6 +#define COMPAREFUNC_GREATERTHANOREQUAL 7 + +in vec4 o[NUM_VTX_ATTR]; +out vec4 color; + +uniform bool alphatest_enabled; +uniform int alphatest_func; +uniform float alphatest_ref; + +uniform sampler2D tex[3]; + +uniform vec4 tev_combiner_buffer_color; + +struct TEVConfig +{ + bool enabled; + ivec3 color_sources; + ivec3 alpha_sources; + ivec3 color_modifiers; + ivec3 alpha_modifiers; + ivec2 color_alpha_op; + ivec2 color_alpha_multiplier; + vec4 const_color; + bvec2 updates_combiner_buffer_color_alpha; +}; + +uniform TEVConfig tev_cfgs[NUM_TEV_STAGES]; + +vec4 g_combiner_buffer; +vec4 g_last_tex_env_out; +vec4 g_const_color; + +vec4 GetSource(int source) { + if (source == SOURCE_PRIMARYCOLOR) { + return o[2]; + } else if (source == SOURCE_PRIMARYFRAGMENTCOLOR) { + // HACK: Uses color value, but should really use fragment lighting output + return o[2]; + } else if (source == SOURCE_TEXTURE0) { + return texture(tex[0], o[3].xy); + } else if (source == SOURCE_TEXTURE1) { + return texture(tex[1], o[3].zw); + } else if (source == SOURCE_TEXTURE2) { + // TODO: Unverified + return texture(tex[2], o[5].zw); + } else if (source == SOURCE_TEXTURE3) { + // TODO: no 4th texture? + } else if (source == SOURCE_PREVIOUSBUFFER) { + return g_combiner_buffer; + } else if (source == SOURCE_CONSTANT) { + return g_const_color; + } else if (source == SOURCE_PREVIOUS) { + return g_last_tex_env_out; + } + + return vec4(0.0); +} + +vec3 GetColorModifier(int factor, vec4 color) { + if (factor == COLORMODIFIER_SOURCECOLOR) { + return color.rgb; + } else if (factor == COLORMODIFIER_ONEMINUSSOURCECOLOR) { + return vec3(1.0) - color.rgb; + } else if (factor == COLORMODIFIER_SOURCEALPHA) { + return color.aaa; + } else if (factor == COLORMODIFIER_ONEMINUSSOURCEALPHA) { + return vec3(1.0) - color.aaa; + } else if (factor == COLORMODIFIER_SOURCERED) { + return color.rrr; + } else if (factor == COLORMODIFIER_ONEMINUSSOURCERED) { + return vec3(1.0) - color.rrr; + } else if (factor == COLORMODIFIER_SOURCEGREEN) { + return color.ggg; + } else if (factor == COLORMODIFIER_ONEMINUSSOURCEGREEN) { + return vec3(1.0) - color.ggg; + } else if (factor == COLORMODIFIER_SOURCEBLUE) { + return color.bbb; + } else if (factor == COLORMODIFIER_ONEMINUSSOURCEBLUE) { + return vec3(1.0) - color.bbb; + } + + return vec3(0.0); +} + +float GetAlphaModifier(int factor, vec4 color) { + if (factor == ALPHAMODIFIER_SOURCEALPHA) { + return color.a; + } else if (factor == ALPHAMODIFIER_ONEMINUSSOURCEALPHA) { + return 1.0 - color.a; + } else if (factor == ALPHAMODIFIER_SOURCERED) { + return color.r; + } else if (factor == ALPHAMODIFIER_ONEMINUSSOURCERED) { + return 1.0 - color.r; + } else if (factor == ALPHAMODIFIER_SOURCEGREEN) { + return color.g; + } else if (factor == ALPHAMODIFIER_ONEMINUSSOURCEGREEN) { + return 1.0 - color.g; + } else if (factor == ALPHAMODIFIER_SOURCEBLUE) { + return color.b; + } else if (factor == ALPHAMODIFIER_ONEMINUSSOURCEBLUE) { + return 1.0 - color.b; + } + + return 0.0; +} + +vec3 ColorCombine(int op, vec3 color[3]) { + if (op == OPERATION_REPLACE) { + return color[0]; + } else if (op == OPERATION_MODULATE) { + return color[0] * color[1]; + } else if (op == OPERATION_ADD) { + return min(color[0] + color[1], 1.0); + } else if (op == OPERATION_ADDSIGNED) { + return clamp(color[0] + color[1] - vec3(0.5), 0.0, 1.0); + } else if (op == OPERATION_LERP) { + return color[0] * color[2] + color[1] * (vec3(1.0) - color[2]); + } else if (op == OPERATION_SUBTRACT) { + return max(color[0] - color[1], 0.0); + } else if (op == OPERATION_MULTIPLYTHENADD) { + return min(color[0] * color[1] + color[2], 1.0); + } else if (op == OPERATION_ADDTHENMULTIPLY) { + return min(color[0] + color[1], 1.0) * color[2]; + } + + return vec3(0.0); +} + +float AlphaCombine(int op, float alpha[3]) { + if (op == OPERATION_REPLACE) { + return alpha[0]; + } else if (op == OPERATION_MODULATE) { + return alpha[0] * alpha[1]; + } else if (op == OPERATION_ADD) { + return min(alpha[0] + alpha[1], 1.0); + } else if (op == OPERATION_ADDSIGNED) { + return clamp(alpha[0] + alpha[1] - 0.5, 0.0, 1.0); + } else if (op == OPERATION_LERP) { + return alpha[0] * alpha[2] + alpha[1] * (1.0 - alpha[2]); + } else if (op == OPERATION_SUBTRACT) { + return max(alpha[0] - alpha[1], 0.0); + } else if (op == OPERATION_MULTIPLYTHENADD) { + return min(alpha[0] * alpha[1] + alpha[2], 1.0); + } else if (op == OPERATION_ADDTHENMULTIPLY) { + return min(alpha[0] + alpha[1], 1.0) * alpha[2]; + } + + return 0.0; +} + +void main(void) { + g_combiner_buffer = tev_combiner_buffer_color; + + for (int tex_env_idx = 0; tex_env_idx < NUM_TEV_STAGES; ++tex_env_idx) { + if (tev_cfgs[tex_env_idx].enabled) { + g_const_color = tev_cfgs[tex_env_idx].const_color; + + vec3 color_results[3] = vec3[3](GetColorModifier(tev_cfgs[tex_env_idx].color_modifiers.x, GetSource(tev_cfgs[tex_env_idx].color_sources.x)), + GetColorModifier(tev_cfgs[tex_env_idx].color_modifiers.y, GetSource(tev_cfgs[tex_env_idx].color_sources.y)), + GetColorModifier(tev_cfgs[tex_env_idx].color_modifiers.z, GetSource(tev_cfgs[tex_env_idx].color_sources.z))); + vec3 color_output = ColorCombine(tev_cfgs[tex_env_idx].color_alpha_op.x, color_results); + + float alpha_results[3] = float[3](GetAlphaModifier(tev_cfgs[tex_env_idx].alpha_modifiers.x, GetSource(tev_cfgs[tex_env_idx].alpha_sources.x)), + GetAlphaModifier(tev_cfgs[tex_env_idx].alpha_modifiers.y, GetSource(tev_cfgs[tex_env_idx].alpha_sources.y)), + GetAlphaModifier(tev_cfgs[tex_env_idx].alpha_modifiers.z, GetSource(tev_cfgs[tex_env_idx].alpha_sources.z))); + float alpha_output = AlphaCombine(tev_cfgs[tex_env_idx].color_alpha_op.y, alpha_results); + + g_last_tex_env_out = vec4(min(color_output * tev_cfgs[tex_env_idx].color_alpha_multiplier.x, 1.0), min(alpha_output * tev_cfgs[tex_env_idx].color_alpha_multiplier.y, 1.0)); + } + + if (tev_cfgs[tex_env_idx].updates_combiner_buffer_color_alpha.x) { + g_combiner_buffer.rgb = g_last_tex_env_out.rgb; + } + + if (tev_cfgs[tex_env_idx].updates_combiner_buffer_color_alpha.y) { + g_combiner_buffer.a = g_last_tex_env_out.a; + } + } + + if (alphatest_enabled) { + if (alphatest_func == COMPAREFUNC_NEVER) { + discard; + } else if (alphatest_func == COMPAREFUNC_ALWAYS) { + + } else if (alphatest_func == COMPAREFUNC_EQUAL) { + if (g_last_tex_env_out.a != alphatest_ref) { + discard; + } + } else if (alphatest_func == COMPAREFUNC_NOTEQUAL) { + if (g_last_tex_env_out.a == alphatest_ref) { + discard; + } + } else if (alphatest_func == COMPAREFUNC_LESSTHAN) { + if (g_last_tex_env_out.a >= alphatest_ref) { + discard; + } + } else if (alphatest_func == COMPAREFUNC_LESSTHANOREQUAL) { + if (g_last_tex_env_out.a > alphatest_ref) { + discard; + } + } else if (alphatest_func == COMPAREFUNC_GREATERTHAN) { + if (g_last_tex_env_out.a <= alphatest_ref) { + discard; + } + } else if (alphatest_func == COMPAREFUNC_GREATERTHANOREQUAL) { + if (g_last_tex_env_out.a < alphatest_ref) { + discard; + } + } + } + + color = g_last_tex_env_out; +} +)"; + } diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp new file mode 100644 index 000000000..ca8a371e7 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -0,0 +1,160 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/renderer_opengl/gl_state.h" +#include "video_core/pica.h" + +OpenGLState OpenGLState::cur_state; + +OpenGLState::OpenGLState() { + // These all match default OpenGL values + cull.enabled = false; + cull.mode = GL_BACK; + + depth.test_enabled = false; + depth.test_func = GL_LESS; + depth.write_mask = GL_TRUE; + + stencil.test_enabled = false; + stencil.test_func = GL_ALWAYS; + stencil.test_ref = 0; + stencil.test_mask = -1; + stencil.write_mask = -1; + + blend.enabled = false; + blend.src_rgb_func = GL_ONE; + blend.dst_rgb_func = GL_ZERO; + blend.src_a_func = GL_ONE; + blend.dst_a_func = GL_ZERO; + blend.color.red = 0.0f; + blend.color.green = 0.0f; + blend.color.blue = 0.0f; + blend.color.alpha = 0.0f; + + for (auto& texture_unit : texture_units) { + texture_unit.enabled_2d = false; + texture_unit.texture_2d = 0; + } + + draw.framebuffer = 0; + draw.vertex_array = 0; + draw.vertex_buffer = 0; + draw.shader_program = 0; +} + +const void OpenGLState::Apply() { + // Culling + if (cull.enabled != cur_state.cull.enabled) { + if (cull.enabled) { + glEnable(GL_CULL_FACE); + } else { + glDisable(GL_CULL_FACE); + } + } + + if (cull.mode != cur_state.cull.mode) { + glCullFace(cull.mode); + } + + // Depth test + if (depth.test_enabled != cur_state.depth.test_enabled) { + if (depth.test_enabled) { + glEnable(GL_DEPTH_TEST); + } else { + glDisable(GL_DEPTH_TEST); + } + } + + if (depth.test_func != cur_state.depth.test_func) { + glDepthFunc(depth.test_func); + } + + // Depth mask + if (depth.write_mask != cur_state.depth.write_mask) { + glDepthMask(depth.write_mask); + } + + // Stencil test + if (stencil.test_enabled != cur_state.stencil.test_enabled) { + if (stencil.test_enabled) { + glEnable(GL_STENCIL_TEST); + } else { + glDisable(GL_STENCIL_TEST); + } + } + + if (stencil.test_func != cur_state.stencil.test_func || + stencil.test_ref != cur_state.stencil.test_ref || + stencil.test_mask != cur_state.stencil.test_mask) { + glStencilFunc(stencil.test_func, stencil.test_ref, stencil.test_mask); + } + + // Stencil mask + if (stencil.write_mask != cur_state.stencil.write_mask) { + glStencilMask(stencil.write_mask); + } + + // Blending + if (blend.enabled != cur_state.blend.enabled) { + if (blend.enabled) { + glEnable(GL_BLEND); + } else { + glDisable(GL_BLEND); + } + } + + if (blend.color.red != cur_state.blend.color.red || + blend.color.green != cur_state.blend.color.green || + blend.color.blue != cur_state.blend.color.blue || + blend.color.alpha != cur_state.blend.color.alpha) { + glBlendColor(blend.color.red, blend.color.green, blend.color.blue, blend.color.alpha); + } + + if (blend.src_rgb_func != cur_state.blend.src_rgb_func || + blend.dst_rgb_func != cur_state.blend.dst_rgb_func || + blend.src_a_func != cur_state.blend.src_a_func || + blend.dst_a_func != cur_state.blend.dst_a_func) { + glBlendFuncSeparate(blend.src_rgb_func, blend.dst_rgb_func, blend.src_a_func, blend.dst_a_func); + } + + // Textures + for (unsigned texture_index = 0; texture_index < ARRAY_SIZE(texture_units); ++texture_index) { + if (texture_units[texture_index].enabled_2d != texture_units[texture_index].enabled_2d) { + glActiveTexture(GL_TEXTURE0 + texture_index); + + if (texture_units[texture_index].enabled_2d) { + glEnable(GL_TEXTURE_2D); + } else { + glDisable(GL_TEXTURE_2D); + } + } + + if (texture_units[texture_index].texture_2d != cur_state.texture_units[texture_index].texture_2d) { + glActiveTexture(GL_TEXTURE0 + texture_index); + glBindTexture(GL_TEXTURE_2D, texture_units[texture_index].texture_2d); + } + } + + // Framebuffer + if (draw.framebuffer != cur_state.draw.framebuffer) { + glBindFramebuffer(GL_FRAMEBUFFER, draw.framebuffer); + } + + // Vertex array + if (draw.vertex_array != cur_state.draw.vertex_array) { + glBindVertexArray(draw.vertex_array); + } + + // Vertex buffer + if (draw.vertex_buffer != cur_state.draw.vertex_buffer) { + glBindBuffer(GL_ARRAY_BUFFER, draw.vertex_buffer); + } + + // Shader program + if (draw.shader_program != cur_state.draw.shader_program) { + glUseProgram(draw.shader_program); + } + + cur_state = *this; +} diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h new file mode 100644 index 000000000..a56d31371 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_state.h @@ -0,0 +1,70 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "generated/gl_3_2_core.h" + +class OpenGLState { +public: + struct { + bool enabled; // GL_CULL_FACE + GLenum mode; // GL_CULL_FACE_MODE + } cull; + + struct { + bool test_enabled; // GL_DEPTH_TEST + GLenum test_func; // GL_DEPTH_FUNC + GLboolean write_mask; // GL_DEPTH_WRITEMASK + } depth; + + struct { + bool test_enabled; // GL_STENCIL_TEST + GLenum test_func; // GL_STENCIL_FUNC + GLint test_ref; // GL_STENCIL_REF + GLuint test_mask; // GL_STENCIL_VALUE_MASK + GLuint write_mask; // GL_STENCIL_WRITEMASK + } stencil; + + struct { + bool enabled; // GL_BLEND + GLenum src_rgb_func; // GL_BLEND_SRC_RGB + GLenum dst_rgb_func; // GL_BLEND_DST_RGB + GLenum src_a_func; // GL_BLEND_SRC_ALPHA + GLenum dst_a_func; // GL_BLEND_DST_ALPHA + + struct { + GLclampf red; + GLclampf green; + GLclampf blue; + GLclampf alpha; + } color; // GL_BLEND_COLOR + } blend; + + // 3 texture units - one for each that is used in PICA fragment shader emulation + struct { + bool enabled_2d; // GL_TEXTURE_2D + GLuint texture_2d; // GL_TEXTURE_BINDING_2D + } texture_units[3]; + + struct { + GLuint framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING + GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING + GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING + GLuint shader_program; // GL_CURRENT_PROGRAM + } draw; + + OpenGLState(); + + /// Get the currently active OpenGL state + static const OpenGLState& GetCurState() { + return cur_state; + } + + /// Apply this state as the current OpenGL state + const void Apply(); + +private: + static OpenGLState cur_state; +}; diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h new file mode 100644 index 000000000..8369c649e --- /dev/null +++ b/src/video_core/renderer_opengl/pica_to_gl.h @@ -0,0 +1,105 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +#include "video_core/pica.h" + +#include "generated/gl_3_2_core.h" + +namespace PicaToGL { + +inline GLenum WrapMode(Pica::Regs::TextureConfig::WrapMode mode) { + static const GLenum wrap_mode_table[] = { + GL_CLAMP_TO_EDGE, // WrapMode::ClampToEdge + 0, // Unknown + GL_REPEAT, // WrapMode::Repeat + GL_MIRRORED_REPEAT // WrapMode::MirroredRepeat + }; + + // Range check table for input + if (mode >= ARRAY_SIZE(wrap_mode_table)) { + LOG_CRITICAL(Render_OpenGL, "Unknown texture wrap mode %d", mode); + UNREACHABLE(); + + return GL_CLAMP_TO_EDGE; + } + + GLenum gl_mode = wrap_mode_table[mode]; + + // Check for dummy values indicating an unknown mode + if (gl_mode == 0) { + LOG_CRITICAL(Render_OpenGL, "Unknown texture wrap mode %d", mode); + UNIMPLEMENTED(); + + return GL_CLAMP_TO_EDGE; + } + + return gl_mode; +} + +inline GLenum BlendFunc(u32 factor) { + static const GLenum blend_func_table[] = { + GL_ZERO, // BlendFactor::Zero + GL_ONE, // BlendFactor::One + GL_SRC_COLOR, // BlendFactor::SourceColor + GL_ONE_MINUS_SRC_COLOR, // BlendFactor::OneMinusSourceColor + GL_DST_COLOR, // BlendFactor::DestColor + GL_ONE_MINUS_DST_COLOR, // BlendFactor::OneMinusDestColor + GL_SRC_ALPHA, // BlendFactor::SourceAlpha + GL_ONE_MINUS_SRC_ALPHA, // BlendFactor::OneMinusSourceAlpha + GL_DST_ALPHA, // BlendFactor::DestAlpha + GL_ONE_MINUS_DST_ALPHA, // BlendFactor::OneMinusDestAlpha + GL_CONSTANT_COLOR, // BlendFactor::ConstantColor + GL_ONE_MINUS_CONSTANT_COLOR, // BlendFactor::OneMinusConstantColor + GL_CONSTANT_ALPHA, // BlendFactor::ConstantAlpha + GL_ONE_MINUS_CONSTANT_ALPHA, // BlendFactor::OneMinusConstantAlpha + GL_SRC_ALPHA_SATURATE, // BlendFactor::SourceAlphaSaturate + }; + + // Range check table for input + if (factor >= ARRAY_SIZE(blend_func_table)) { + LOG_CRITICAL(Render_OpenGL, "Unknown blend factor %d", factor); + UNREACHABLE(); + + return GL_ONE; + } + + return blend_func_table[factor]; +} + +inline GLenum CompareFunc(u32 func) { + static const GLenum compare_func_table[] = { + GL_NEVER, // CompareFunc::Never + GL_ALWAYS, // CompareFunc::Always + GL_EQUAL, // CompareFunc::Equal + GL_NOTEQUAL, // CompareFunc::NotEqual + GL_LESS, // CompareFunc::LessThan + GL_LEQUAL, // CompareFunc::LessThanOrEqual + GL_GREATER, // CompareFunc::GreaterThan + GL_GEQUAL, // CompareFunc::GreaterThanOrEqual + }; + + // Range check table for input + if (func >= ARRAY_SIZE(compare_func_table)) { + LOG_CRITICAL(Render_OpenGL, "Unknown compare function %d", func); + UNREACHABLE(); + + return GL_ALWAYS; + } + + return compare_func_table[func]; +} + +inline std::array ColorRGBA8(const u8* bytes) { + return { { bytes[0] / 255.0f, + bytes[1] / 255.0f, + bytes[2] / 255.0f, + bytes[3] / 255.0f + } }; +} + +} // namespace diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 71ceb021b..16cf92e20 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -53,6 +53,7 @@ static std::array MakeOrthographicMatrix(const float width, const /// RendererOpenGL constructor RendererOpenGL::RendererOpenGL() { + hw_rasterizer.reset(new RasterizerOpenGL()); resolution_width = std::max(VideoCore::kScreenTopWidth, VideoCore::kScreenBottomWidth); resolution_height = VideoCore::kScreenTopHeight + VideoCore::kScreenBottomHeight; } @@ -63,7 +64,9 @@ RendererOpenGL::~RendererOpenGL() { /// Swap buffers (render frame) void RendererOpenGL::SwapBuffers() { - render_window->MakeCurrent(); + // Maintain the rasterizer's state as a priority + OpenGLState prev_state = OpenGLState::GetCurState(); + state.Apply(); for(int i : {0, 1}) { const auto& framebuffer = GPU::g_regs.framebuffer_config[i]; @@ -110,7 +113,19 @@ void RendererOpenGL::SwapBuffers() { render_window->PollEvents(); render_window->SwapBuffers(); + prev_state.Apply(); + profiler.BeginFrame(); + + bool hw_renderer_enabled = VideoCore::g_hw_renderer_enabled; + if (Settings::values.use_hw_renderer != hw_renderer_enabled) { + // TODO: Save new setting value to config file for next startup + Settings::values.use_hw_renderer = hw_renderer_enabled; + + if (Settings::values.use_hw_renderer) { + hw_rasterizer->Reset(); + } + } } /** @@ -139,7 +154,11 @@ void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig& // only allows rows to have a memory alignement of 4. ASSERT(pixel_stride % 4 == 0); - glBindTexture(GL_TEXTURE_2D, texture.handle); + state.texture_units[0].enabled_2d = true; + state.texture_units[0].texture_2d = texture.handle; + state.Apply(); + + glActiveTexture(GL_TEXTURE0); glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)pixel_stride); // Update existing texture @@ -151,7 +170,6 @@ void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig& texture.gl_format, texture.gl_type, framebuffer_data); glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); - glBindTexture(GL_TEXTURE_2D, 0); } /** @@ -161,13 +179,15 @@ void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig& */ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, const TextureInfo& texture) { - glBindTexture(GL_TEXTURE_2D, texture.handle); + state.texture_units[0].enabled_2d = true; + state.texture_units[0].texture_2d = texture.handle; + state.Apply(); + glActiveTexture(GL_TEXTURE0); u8 framebuffer_data[3] = { color_r, color_g, color_b }; // Update existing texture glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, framebuffer_data); - glBindTexture(GL_TEXTURE_2D, 0); } /** @@ -175,7 +195,6 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color */ void RendererOpenGL::InitOpenGLObjects() { glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, 0.0f); - glDisable(GL_DEPTH_TEST); // Link shaders and get variable locations program_id = ShaderUtil::LoadShaders(GLShaders::g_vertex_shader, GLShaders::g_fragment_shader); @@ -189,10 +208,12 @@ void RendererOpenGL::InitOpenGLObjects() { // Generate VAO glGenVertexArrays(1, &vertex_array_handle); - glBindVertexArray(vertex_array_handle); + + state.draw.vertex_array = vertex_array_handle; + state.draw.vertex_buffer = vertex_buffer_handle; + state.Apply(); // Attach vertex data to VAO - glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer_handle); glBufferData(GL_ARRAY_BUFFER, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW); glVertexAttribPointer(attrib_position, 2, GL_FLOAT, GL_FALSE, sizeof(ScreenRectVertex), (GLvoid*)offsetof(ScreenRectVertex, position)); glVertexAttribPointer(attrib_tex_coord, 2, GL_FLOAT, GL_FALSE, sizeof(ScreenRectVertex), (GLvoid*)offsetof(ScreenRectVertex, tex_coord)); @@ -206,14 +227,19 @@ void RendererOpenGL::InitOpenGLObjects() { // Allocation of storage is deferred until the first frame, when we // know the framebuffer size. - glBindTexture(GL_TEXTURE_2D, texture.handle); + state.texture_units[0].enabled_2d = true; + state.texture_units[0].texture_2d = texture.handle; + state.Apply(); + + glActiveTexture(GL_TEXTURE0); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); } - glBindTexture(GL_TEXTURE_2D, 0); + + hw_rasterizer->InitObjects(); } void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, @@ -264,7 +290,11 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, UNIMPLEMENTED(); } - glBindTexture(GL_TEXTURE_2D, texture.handle); + state.texture_units[0].enabled_2d = true; + state.texture_units[0].texture_2d = texture.handle; + state.Apply(); + + glActiveTexture(GL_TEXTURE0); glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0, texture.gl_format, texture.gl_type, nullptr); } @@ -280,8 +310,10 @@ void RendererOpenGL::DrawSingleScreenRotated(const TextureInfo& texture, float x ScreenRectVertex(x+w, y+h, 0.f, 1.f), }; - glBindTexture(GL_TEXTURE_2D, texture.handle); - glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer_handle); + state.texture_units[0].enabled_2d = true; + state.texture_units[0].texture_2d = texture.handle; + state.Apply(); + glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices.data()); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); } @@ -295,7 +327,8 @@ void RendererOpenGL::DrawScreens() { glViewport(0, 0, layout.width, layout.height); glClear(GL_COLOR_BUFFER_BIT); - glUseProgram(program_id); + state.draw.shader_program = program_id; + state.Apply(); // Set projection matrix std::array ortho_matrix = MakeOrthographicMatrix((float)layout.width, diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index cd782428e..2ec2e14ca 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -13,6 +13,8 @@ #include "core/hw/gpu.h" #include "video_core/renderer_base.h" +#include "video_core/renderer_opengl/gl_state.h" +#include "video_core/renderer_opengl/gl_rasterizer.h" class EmuWindow; @@ -49,18 +51,18 @@ private: }; void InitOpenGLObjects(); - static void ConfigureFramebufferTexture(TextureInfo& texture, - const GPU::Regs::FramebufferConfig& framebuffer); + void ConfigureFramebufferTexture(TextureInfo& texture, + const GPU::Regs::FramebufferConfig& framebuffer); void DrawScreens(); void DrawSingleScreenRotated(const TextureInfo& texture, float x, float y, float w, float h); void UpdateFramerate(); // Loads framebuffer from emulated memory into the active OpenGL texture. - static void LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig& framebuffer, - const TextureInfo& texture); + void LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig& framebuffer, + const TextureInfo& texture); // Fills active OpenGL texture with the given RGB color. - static void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, - const TextureInfo& texture); + void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, + const TextureInfo& texture); /// Computes the viewport rectangle MathUtil::Rectangle GetViewportExtent(); @@ -71,6 +73,8 @@ private: int resolution_width; ///< Current resolution width int resolution_height; ///< Current resolution height + OpenGLState state; + // OpenGL object IDs GLuint vertex_array_handle; GLuint vertex_buffer_handle; diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index 42e3bdd5b..d4d907d5e 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -6,6 +6,7 @@ #include "common/emu_window.h" #include "core/core.h" +#include "core/settings.h" #include "video_core/video_core.h" #include "video_core/renderer_base.h" @@ -19,6 +20,8 @@ namespace VideoCore { EmuWindow* g_emu_window = nullptr; ///< Frontend emulator window RendererBase* g_renderer = nullptr; ///< Renderer plugin +std::atomic g_hw_renderer_enabled; + /// Initialize the video core void Init(EmuWindow* emu_window) { g_emu_window = emu_window; diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h index f885bec21..3f24df7bd 100644 --- a/src/video_core/video_core.h +++ b/src/video_core/video_core.h @@ -8,6 +8,8 @@ #include "renderer_base.h" +#include + //////////////////////////////////////////////////////////////////////////////////////////////////// // Video Core namespace @@ -31,6 +33,9 @@ static const int kScreenBottomHeight = 240; ///< 3DS bottom screen height extern RendererBase* g_renderer; ///< Renderer plugin extern EmuWindow* g_emu_window; ///< Emu window +// TODO: Wrap this in a user settings struct along with any other graphics settings (often set from qt ui) +extern std::atomic g_hw_renderer_enabled; + /// Start the video core void Start(); From 16fbba3c2a21bf92e9dd6d17c89da9bccf339691 Mon Sep 17 00:00:00 2001 From: tfarley Date: Mon, 18 May 2015 21:24:43 -0700 Subject: [PATCH 3/4] MakeCurrent race condition fix --- src/citra_qt/bootmanager.cpp | 4 ++-- src/citra_qt/main.cpp | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/citra_qt/bootmanager.cpp b/src/citra_qt/bootmanager.cpp index ab9403007..ea46bb1da 100644 --- a/src/citra_qt/bootmanager.cpp +++ b/src/citra_qt/bootmanager.cpp @@ -28,11 +28,11 @@ EmuThread::EmuThread(GRenderWindow* render_window) : exec_step(false), running(false), stop_run(false), render_window(render_window) { - - connect(this, SIGNAL(started()), render_window, SLOT(moveContext())); } void EmuThread::run() { + render_window->MakeCurrent(); + stop_run = false; // holds whether the cpu was running during the last iteration, diff --git a/src/citra_qt/main.cpp b/src/citra_qt/main.cpp index 2ea33ebc5..f6010459a 100644 --- a/src/citra_qt/main.cpp +++ b/src/citra_qt/main.cpp @@ -216,6 +216,7 @@ void GMainWindow::BootGame(std::string filename) { // Create and start the emulation thread emu_thread = Common::make_unique(render_window); emit EmulationStarting(emu_thread.get()); + render_window->moveContext(); emu_thread->start(); // BlockingQueuedConnection is important here, it makes sure we've finished refreshing our views before the CPU continues From 62668688e1055dbfcbff824acfd9c9f970fc2c09 Mon Sep 17 00:00:00 2001 From: tfarley Date: Fri, 22 May 2015 14:49:42 -0700 Subject: [PATCH 4/4] Flush for y2r (moflex) --- src/core/hle/service/y2r_u.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/core/hle/service/y2r_u.cpp b/src/core/hle/service/y2r_u.cpp index ce822e990..15987e028 100644 --- a/src/core/hle/service/y2r_u.cpp +++ b/src/core/hle/service/y2r_u.cpp @@ -9,7 +9,11 @@ #include "core/hle/hle.h" #include "core/hle/kernel/event.h" #include "core/hle/service/y2r_u.h" +#include "core/mem_map.h" +#include "core/memory.h" + #include "video_core/utils.h" +#include "video_core/video_core.h" //////////////////////////////////////////////////////////////////////////////////////////////////// // Namespace Y2R_U @@ -260,6 +264,13 @@ static void StartConversion(Service::Interface* self) { break; } } + + // dst_image_size would seem to be perfect for this, but it doesn't include the stride :( + u32 total_output_size = conversion_params.input_lines * + (conversion_params.dst_transfer_unit + conversion_params.dst_stride); + VideoCore::g_renderer->hw_rasterizer->NotifyFlush( + Memory::VirtualToPhysicalAddress(conversion_params.dst_address), total_output_size); + LOG_DEBUG(Service_Y2R, "called"); completion_event->Signal();