diff --git a/src/android/.gitignore b/src/android/.gitignore index 5edb4eeb0..4423a0b45 100644 --- a/src/android/.gitignore +++ b/src/android/.gitignore @@ -8,3 +8,21 @@ /build /captures .externalNativeBuild + +# CXX compile cache +app/.cxx + +# Google Services (e.g. APIs or Firebase) +google-services.json + +# Freeline +freeline.py +freeline/ +freeline_project_description.json + +# fastlane +fastlane/report.xml +fastlane/Preview.html +fastlane/screenshots +fastlane/test_output +fastlane/readme.md diff --git a/src/audio_core/hle/source.cpp b/src/audio_core/hle/source.cpp index 604b0593d..24bbbbf6f 100644 --- a/src/audio_core/hle/source.cpp +++ b/src/audio_core/hle/source.cpp @@ -345,7 +345,6 @@ void Source::GenerateFrame() { break; case InterpolationMode::Polyphase: // TODO(merry): Implement polyphase interpolation - LOG_DEBUG(Audio_DSP, "Polyphase interpolation unimplemented; falling back to linear"); AudioInterp::Linear(state.interp_state, state.current_buffer, state.rate_multiplier, current_frame, frame_position); break; diff --git a/src/citra_qt/applets/mii_selector.cpp b/src/citra_qt/applets/mii_selector.cpp index 3ee25805f..2099e675e 100644 --- a/src/citra_qt/applets/mii_selector.cpp +++ b/src/citra_qt/applets/mii_selector.cpp @@ -8,11 +8,7 @@ #include #include #include "citra_qt/applets/mii_selector.h" -#include "common/file_util.h" #include "common/string_util.h" -#include "core/file_sys/archive_extsavedata.h" -#include "core/file_sys/file_backend.h" -#include "core/hle/service/ptm/ptm.h" QtMiiSelectorDialog::QtMiiSelectorDialog(QWidget* parent, QtMiiSelector* mii_selector_) : QDialog(parent), mii_selector(mii_selector_) { @@ -33,37 +29,9 @@ QtMiiSelectorDialog::QtMiiSelectorDialog(QWidget* parent, QtMiiSelector* mii_sel miis.push_back(HLE::Applets::MiiSelector::GetStandardMiiResult().selected_mii_data); combobox->addItem(tr("Standard Mii")); - - std::string nand_directory{FileUtil::GetUserPath(FileUtil::UserPath::NANDDir)}; - FileSys::ArchiveFactory_ExtSaveData extdata_archive_factory(nand_directory, true); - - auto archive_result = extdata_archive_factory.Open(Service::PTM::ptm_shared_extdata_id, 0); - if (archive_result.Succeeded()) { - auto archive = std::move(archive_result).Unwrap(); - - FileSys::Path file_path = "/CFL_DB.dat"; - FileSys::Mode mode{}; - mode.read_flag.Assign(1); - - auto file_result = archive->OpenFile(file_path, mode); - if (file_result.Succeeded()) { - auto file = std::move(file_result).Unwrap(); - - u32 saved_miis_offset = 0x8; - // The Mii Maker has a 100 Mii limit on the 3ds - for (int i = 0; i < 100; ++i) { - HLE::Applets::MiiData mii; - std::array mii_raw; - file->Read(saved_miis_offset, sizeof(mii), mii_raw.data()); - std::memcpy(&mii, mii_raw.data(), sizeof(mii)); - if (mii.mii_id != 0) { - std::string name = Common::UTF16BufferToUTF8(mii.mii_name); - miis.push_back(mii); - combobox->addItem(QString::fromStdString(name)); - } - saved_miis_offset += sizeof(mii); - } - } + for (const auto& mii : Frontend::LoadMiis()) { + miis.push_back(mii); + combobox->addItem(QString::fromStdString(Common::UTF16BufferToUTF8(mii.mii_name))); } if (combobox->count() > static_cast(config.initially_selected_mii_index)) { diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp index 90608157e..fdc702521 100644 --- a/src/common/logging/backend.cpp +++ b/src/common/logging/backend.cpp @@ -26,6 +26,10 @@ namespace Log { +Filter filter; +void SetGlobalFilter(const Filter& f) { + filter = f; +} /** * Static state as a singleton. */ @@ -58,14 +62,6 @@ public: backends.erase(it, backends.end()); } - const Filter& GetGlobalFilter() const { - return filter; - } - - void SetGlobalFilter(const Filter& f) { - filter = f; - } - Backend* GetBackend(std::string_view backend_name) { const auto it = std::find_if(backends.begin(), backends.end(), @@ -144,6 +140,10 @@ void ColorConsoleBackend::Write(const Entry& entry) { PrintColoredMessage(entry); } +void LogcatBackend::Write(const Entry& entry) { + PrintMessageToLogcat(entry); +} + FileBackend::FileBackend(const std::string& filename) : bytes_written(0) { if (FileUtil::Exists(filename + ".old.txt")) { FileUtil::Delete(filename + ".old.txt"); @@ -283,10 +283,6 @@ const char* GetLevelName(Level log_level) { return "Invalid"; } -void SetGlobalFilter(const Filter& filter) { - Impl::Instance().SetGlobalFilter(filter); -} - void AddBackend(std::unique_ptr backend) { Impl::Instance().AddBackend(std::move(backend)); } @@ -303,10 +299,6 @@ void FmtLogMessageImpl(Class log_class, Level log_level, const char* filename, unsigned int line_num, const char* function, const char* format, const fmt::format_args& args) { auto& instance = Impl::Instance(); - const auto& filter = instance.GetGlobalFilter(); - if (!filter.CheckMessage(log_class, log_level)) - return; - instance.PushEntry(log_class, log_level, filename, line_num, function, fmt::vformat(format, args)); } diff --git a/src/common/logging/backend.h b/src/common/logging/backend.h index a6714ffd0..907c6a297 100644 --- a/src/common/logging/backend.h +++ b/src/common/logging/backend.h @@ -14,8 +14,6 @@ namespace Log { -class Filter; - /** * A log entry. Log entries are store in a structured format to permit more varied output * formatting on different frontends, as well as facilitating filtering and aggregation. @@ -83,6 +81,21 @@ public: void Write(const Entry& entry) override; }; +/** + * Backend that writes to the Android logcat + */ +class LogcatBackend : public Backend { +public: + static const char* Name() { + return "logcat"; + } + + const char* GetName() const override { + return Name(); + } + void Write(const Entry& entry) override; +}; + /** * Backend that writes to a file passed into the constructor */ @@ -136,10 +149,4 @@ const char* GetLogClassName(Class log_class); */ const char* GetLevelName(Level log_level); -/** - * The global filter will prevent any messages from even being processed if they are filtered. Each - * backend can have a filter, but if the level is lower than the global filter, the backend will - * never get the message - */ -void SetGlobalFilter(const Filter& filter); } // namespace Log diff --git a/src/common/logging/filter.h b/src/common/logging/filter.h index bbadbcba1..058c7b345 100644 --- a/src/common/logging/filter.h +++ b/src/common/logging/filter.h @@ -9,43 +9,4 @@ #include #include "common/logging/log.h" -namespace Log { - -/** - * Implements a log message filter which allows different log classes to have different minimum - * severity levels. The filter can be changed at runtime and can be parsed from a string to allow - * editing via the interface or loading from a configuration file. - */ -class Filter { -public: - /// Initializes the filter with all classes having `default_level` as the minimum level. - explicit Filter(Level default_level = Level::Info); - - /// Resets the filter so that all classes have `level` as the minimum displayed level. - void ResetAll(Level level); - /// Sets the minimum level of `log_class` (and not of its subclasses) to `level`. - void SetClassLevel(Class log_class, Level level); - - /** - * Parses a filter string and applies it to this filter. - * - * A filter string consists of a space-separated list of filter rules, each of the format - * `:`. `` is a log class name, with subclasses separated using periods. - * `*` is allowed as a class name and will reset all filters to the specified level. `` - * a severity level name which will be set as the minimum logging level of the matched classes. - * Rules are applied left to right, with each rule overriding previous ones in the sequence. - * - * A few examples of filter rules: - * - `*:Info` -- Resets the level of all classes to Info. - * - `Service:Info` -- Sets the level of Service to Info. - * - `Service.FS:Trace` -- Sets the level of the Service.FS class to Trace. - */ - void ParseFilterString(std::string_view filter_view); - - /// Matches class/level combination against the filter, returning true if it passed. - bool CheckMessage(Class log_class, Level level) const; - -private: - std::array(Class::Count)> class_levels; -}; -} // namespace Log +namespace Log {} // namespace Log diff --git a/src/common/logging/log.h b/src/common/logging/log.h index a14e2ff37..3b3810851 100644 --- a/src/common/logging/log.h +++ b/src/common/logging/log.h @@ -4,13 +4,14 @@ #pragma once +#include #include #include "common/common_types.h" namespace Log { // trims up to and including the last of ../, ..\, src/, src\ in a string -constexpr const char* TrimSourcePath(std::string_view source) { +inline const char* TrimSourcePath(std::string_view source) { const auto rfind = [source](const std::string_view match) { return source.rfind(match) == source.npos ? 0 : (source.rfind(match) + match.size()); }; @@ -113,6 +114,47 @@ enum class Class : ClassType { Count ///< Total number of logging classes }; +/** + * Implements a log message filter which allows different log classes to have different minimum + * severity levels. The filter can be changed at runtime and can be parsed from a string to allow + * editing via the interface or loading from a configuration file. + */ +class Filter { +public: + /// Initializes the filter with all classes having `default_level` as the minimum level. + explicit Filter(Level default_level = Level::Info); + + /// Resets the filter so that all classes have `level` as the minimum displayed level. + void ResetAll(Level level); + /// Sets the minimum level of `log_class` (and not of its subclasses) to `level`. + void SetClassLevel(Class log_class, Level level); + + /** + * Parses a filter string and applies it to this filter. + * + * A filter string consists of a space-separated list of filter rules, each of the format + * `:`. `` is a log class name, with subclasses separated using periods. + * `*` is allowed as a class name and will reset all filters to the specified level. `` + * a severity level name which will be set as the minimum logging level of the matched classes. + * Rules are applied left to right, with each rule overriding previous ones in the sequence. + * + * A few examples of filter rules: + * - `*:Info` -- Resets the level of all classes to Info. + * - `Service:Info` -- Sets the level of Service to Info. + * - `Service.FS:Trace` -- Sets the level of the Service.FS class to Trace. + */ + void ParseFilterString(std::string_view filter_view); + + /// Matches class/level combination against the filter, returning true if it passed. + bool CheckMessage(Class log_class, Level level) const; + +private: + std::array(Class::Count)> class_levels; +}; +extern Filter filter; + +void SetGlobalFilter(const Filter& f); + /// Logs a message to the global logger, using fmt void FmtLogMessageImpl(Class log_class, Level log_level, const char* filename, unsigned int line_num, const char* function, const char* format, @@ -121,6 +163,9 @@ void FmtLogMessageImpl(Class log_class, Level log_level, const char* filename, template void FmtLogMessage(Class log_class, Level log_level, const char* filename, unsigned int line_num, const char* function, const char* format, const Args&... args) { + if (!filter.CheckMessage(log_class, log_level)) + return; + FmtLogMessageImpl(log_class, log_level, filename, line_num, function, format, fmt::make_format_args(args...)); } diff --git a/src/common/logging/text_formatter.cpp b/src/common/logging/text_formatter.cpp index aa0dbd0c6..3d919ab10 100644 --- a/src/common/logging/text_formatter.cpp +++ b/src/common/logging/text_formatter.cpp @@ -34,13 +34,7 @@ std::string FormatLogMessage(const Entry& entry) { void PrintMessage(const Entry& entry) { const auto str = FormatLogMessage(entry).append(1, '\n'); -#ifdef ANDROID - // Android's log level enum are offset by '2' - const int android_log_level = static_cast(entry.log_level) + 2; - __android_log_print(android_log_level, "CitraNative", "%s", str.c_str()); -#else fputs(str.c_str(), stderr); -#endif } void PrintColoredMessage(const Entry& entry) { @@ -78,7 +72,7 @@ void PrintColoredMessage(const Entry& entry) { } SetConsoleTextAttribute(console_handle, color); -#elif !defined(ANDROID) +#else #define ESC "\x1b" const char* color = ""; switch (entry.log_level) { @@ -111,9 +105,40 @@ void PrintColoredMessage(const Entry& entry) { #ifdef _WIN32 SetConsoleTextAttribute(console_handle, original_info.wAttributes); -#elif !defined(ANDROID) +#else fputs(ESC "[0m", stderr); #undef ESC #endif } + +void PrintMessageToLogcat(const Entry& entry) { +#ifdef ANDROID + const auto str = FormatLogMessage(entry); + + android_LogPriority android_log_priority; + switch (entry.log_level) { + case Level::Trace: + android_log_priority = ANDROID_LOG_VERBOSE; + break; + case Level::Debug: + android_log_priority = ANDROID_LOG_DEBUG; + break; + case Level::Info: + android_log_priority = ANDROID_LOG_INFO; + break; + case Level::Warning: + android_log_priority = ANDROID_LOG_WARN; + break; + case Level::Error: + android_log_priority = ANDROID_LOG_ERROR; + break; + case Level::Critical: + android_log_priority = ANDROID_LOG_FATAL; + break; + case Level::Count: + UNREACHABLE(); + } + __android_log_print(android_log_priority, "CitraNative", "%s", str.c_str()); +#endif +} } // namespace Log diff --git a/src/common/logging/text_formatter.h b/src/common/logging/text_formatter.h index b6d9e57c8..13430951d 100644 --- a/src/common/logging/text_formatter.h +++ b/src/common/logging/text_formatter.h @@ -17,4 +17,6 @@ std::string FormatLogMessage(const Entry& entry); void PrintMessage(const Entry& entry); /// Prints the same message as `PrintMessage`, but colored according to the severity level. void PrintColoredMessage(const Entry& entry); +/// Formats and prints a log entry to the android logcat. +void PrintMessageToLogcat(const Entry& entry); } // namespace Log diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 6ee110869..6b98a0d28 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -108,8 +108,8 @@ add_library(core STATIC frontend/framebuffer_layout.h frontend/image_interface.h frontend/input.h - frontend/mic.h frontend/mic.cpp + frontend/mic.h frontend/scope_acquire_context.cpp frontend/scope_acquire_context.h gdbstub/gdbstub.cpp diff --git a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp index 7ba67320b..72e1ebe06 100644 --- a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp +++ b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp @@ -953,6 +953,9 @@ unsigned InterpreterMainLoop(ARMul_State* cpu) { #define INC_PC(l) ptr += sizeof(arm_inst) + l #define INC_PC_STUB ptr += sizeof(arm_inst) +#ifdef ANDROID +#define GDB_BP_CHECK +#else #define GDB_BP_CHECK \ cpu->Cpsr &= ~(1 << 5); \ cpu->Cpsr |= cpu->TFlag << 5; \ @@ -965,6 +968,7 @@ unsigned InterpreterMainLoop(ARMul_State* cpu) { goto END; \ } \ } +#endif // GCC and Clang have a C++ extension to support a lookup table of labels. Otherwise, fallback to a // clunky switch statement. @@ -1652,11 +1656,13 @@ DISPATCH : { goto END; } +#ifndef ANDROID // Find breakpoint if one exists within the block if (GDBStub::IsConnected()) { breakpoint_data = GDBStub::GetNextBreakpointFromAddress(cpu->Reg[15], GDBStub::BreakpointType::Execute); } +#endif inst_base = (arm_inst*)&trans_cache_buf[ptr]; GOTO_NEXT_INST; diff --git a/src/core/arm/skyeye_common/armstate.cpp b/src/core/arm/skyeye_common/armstate.cpp index 775618a8b..5e773b0e3 100644 --- a/src/core/arm/skyeye_common/armstate.cpp +++ b/src/core/arm/skyeye_common/armstate.cpp @@ -182,13 +182,16 @@ void ARMul_State::ResetMPCoreCP15Registers() { CP15[CP15_MAIN_TLB_LOCKDOWN_ATTRIBUTE] = 0x00000000; CP15[CP15_TLB_DEBUG_CONTROL] = 0x00000000; } - +#ifdef ANDROID +static void CheckMemoryBreakpoint(u32 address, GDBStub::BreakpointType type) {} +#else static void CheckMemoryBreakpoint(u32 address, GDBStub::BreakpointType type) { if (GDBStub::IsServerEnabled() && GDBStub::CheckBreakpoint(address, type)) { LOG_DEBUG(Debug, "Found memory breakpoint @ {:08x}", address); GDBStub::Break(true); } } +#endif u8 ARMul_State::ReadMemory8(u32 address) const { CheckMemoryBreakpoint(address, GDBStub::BreakpointType::Read); diff --git a/src/core/frontend/applets/mii_selector.cpp b/src/core/frontend/applets/mii_selector.cpp index 2ca23f1db..2fdfe3049 100644 --- a/src/core/frontend/applets/mii_selector.cpp +++ b/src/core/frontend/applets/mii_selector.cpp @@ -2,7 +2,12 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "common/file_util.h" +#include "common/string_util.h" +#include "core/file_sys/archive_extsavedata.h" +#include "core/file_sys/file_backend.h" #include "core/frontend/applets/mii_selector.h" +#include "core/hle/service/ptm/ptm.h" namespace Frontend { @@ -10,6 +15,42 @@ void MiiSelector::Finalize(u32 return_code, HLE::Applets::MiiData mii) { data = {return_code, mii}; } +std::vector LoadMiis() { + std::vector miis; + + std::string nand_directory{FileUtil::GetUserPath(FileUtil::UserPath::NANDDir)}; + FileSys::ArchiveFactory_ExtSaveData extdata_archive_factory(nand_directory, true); + + auto archive_result = extdata_archive_factory.Open(Service::PTM::ptm_shared_extdata_id, 0); + if (archive_result.Succeeded()) { + auto archive = std::move(archive_result).Unwrap(); + + FileSys::Path file_path = "/CFL_DB.dat"; + FileSys::Mode mode{}; + mode.read_flag.Assign(1); + + auto file_result = archive->OpenFile(file_path, mode); + if (file_result.Succeeded()) { + auto file = std::move(file_result).Unwrap(); + + u32 saved_miis_offset = 0x8; + // The Mii Maker has a 100 Mii limit on the 3ds + for (int i = 0; i < 100; ++i) { + HLE::Applets::MiiData mii; + std::array mii_raw; + file->Read(saved_miis_offset, sizeof(mii), mii_raw.data()); + std::memcpy(&mii, mii_raw.data(), sizeof(mii)); + if (mii.mii_id != 0) { + miis.push_back(mii); + } + saved_miis_offset += sizeof(mii); + } + } + } + + return miis; +} + void DefaultMiiSelector::Setup(const Frontend::MiiSelectorConfig& config) { MiiSelector::Setup(config); Finalize(0, HLE::Applets::MiiSelector::GetStandardMiiResult().selected_mii_data); diff --git a/src/core/frontend/applets/mii_selector.h b/src/core/frontend/applets/mii_selector.h index 53578282b..3a5633a52 100644 --- a/src/core/frontend/applets/mii_selector.h +++ b/src/core/frontend/applets/mii_selector.h @@ -50,6 +50,8 @@ protected: MiiSelectorData data; }; +std::vector LoadMiis(); + class DefaultMiiSelector final : public MiiSelector { public: void Setup(const MiiSelectorConfig& config) override; diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index a6f9860eb..bb81f117c 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -54,6 +54,8 @@ add_library(video_core STATIC renderer_opengl/post_processing_opengl.h renderer_opengl/renderer_opengl.cpp renderer_opengl/renderer_opengl.h + renderer_opengl/texture_downloader_es.cpp + renderer_opengl/texture_downloader_es.h renderer_opengl/texture_filters/anime4k/anime4k_ultrafast.cpp renderer_opengl/texture_filters/anime4k/anime4k_ultrafast.h renderer_opengl/texture_filters/bicubic/bicubic.cpp @@ -99,11 +101,12 @@ add_library(video_core STATIC ) set(SHADER_FILES + renderer_opengl/depth_to_color.frag + renderer_opengl/depth_to_color.vert + renderer_opengl/ds_to_color.frag renderer_opengl/texture_filters/anime4k/refine.frag - renderer_opengl/texture_filters/anime4k/refine.vert renderer_opengl/texture_filters/anime4k/x_gradient.frag renderer_opengl/texture_filters/anime4k/y_gradient.frag - renderer_opengl/texture_filters/anime4k/y_gradient.vert renderer_opengl/texture_filters/bicubic/bicubic.frag renderer_opengl/texture_filters/scale_force/scale_force.frag renderer_opengl/texture_filters/tex_coord.vert @@ -121,7 +124,7 @@ endforeach() add_custom_target(shaders BYPRODUCTS ${SHADER_HEADERS} - COMMAND cmake -P ${CMAKE_CURRENT_SOURCE_DIR}/generate_shaders.cmake + COMMAND "${CMAKE_COMMAND}" -P ${CMAKE_CURRENT_SOURCE_DIR}/generate_shaders.cmake SOURCES ${SHADER_FILES} ) add_dependencies(video_core shaders) diff --git a/src/video_core/renderer_opengl/depth_to_color.frag b/src/video_core/renderer_opengl/depth_to_color.frag new file mode 100644 index 000000000..e69bed890 --- /dev/null +++ b/src/video_core/renderer_opengl/depth_to_color.frag @@ -0,0 +1,10 @@ +//? #version 320 es + +out highp uint color; + +uniform highp sampler2D depth; +uniform int lod; + +void main() { + color = uint(texelFetch(depth, ivec2(gl_FragCoord.xy), lod).x * (exp2(32.0) - 1.0)); +} diff --git a/src/video_core/renderer_opengl/texture_filters/anime4k/y_gradient.vert b/src/video_core/renderer_opengl/depth_to_color.vert similarity index 60% rename from src/video_core/renderer_opengl/texture_filters/anime4k/y_gradient.vert rename to src/video_core/renderer_opengl/depth_to_color.vert index 376a67b79..866d43b46 100644 --- a/src/video_core/renderer_opengl/texture_filters/anime4k/y_gradient.vert +++ b/src/video_core/renderer_opengl/depth_to_color.vert @@ -1,12 +1,8 @@ -//? #version 330 -out vec2 input_max; - -uniform sampler2D tex_size; +//? #version 320 es const vec2 vertices[4] = vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0)); void main() { gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0); - input_max = textureSize(tex_size, 0) * 2 - 1; } diff --git a/src/video_core/renderer_opengl/ds_to_color.frag b/src/video_core/renderer_opengl/ds_to_color.frag new file mode 100644 index 000000000..954217064 --- /dev/null +++ b/src/video_core/renderer_opengl/ds_to_color.frag @@ -0,0 +1,9 @@ +//? #version 320 es +#extension GL_ARM_shader_framebuffer_fetch_depth_stencil : enable + +out highp uint color; + +void main() { + color = uint(gl_LastFragDepthARM * (exp2(24.0) - 1.0)) << 8; + color |= uint(gl_LastFragStencilARM); +} diff --git a/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp b/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp index 2175c62bd..ee842a859 100644 --- a/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp +++ b/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp @@ -220,9 +220,175 @@ private: GLint d24s8_abgr_viewport_u_id; }; +class ShaderD24S8toRGBA8 final : public FormatReinterpreterBase { +public: + ShaderD24S8toRGBA8() { + constexpr std::string_view vs_source = R"( +out vec2 dst_coord; + +uniform mediump ivec2 dst_size; + +const vec2 vertices[4] = + vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0)); + +void main() { + gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0); + dst_coord = (vertices[gl_VertexID] / 2.0 + 0.5) * vec2(dst_size); +} +)"; + + constexpr std::string_view fs_source = R"( +in mediump vec2 dst_coord; + +out lowp vec4 frag_color; + +uniform highp sampler2D depth; +uniform lowp usampler2D stencil; +uniform mediump ivec2 dst_size; +uniform mediump ivec2 src_size; +uniform mediump ivec2 src_offset; + +void main() { + mediump ivec2 tex_coord; + if (src_size == dst_size) { + tex_coord = ivec2(dst_coord); + } else { + highp int tex_index = int(dst_coord.y) * dst_size.x + int(dst_coord.x); + mediump int y = tex_index / src_size.x; + tex_coord = ivec2(tex_index - y * src_size.x, y); + } + tex_coord -= src_offset; + + highp uint depth_val = + uint(texelFetch(depth, tex_coord, 0).x * (exp2(32.0) - 1.0)); + lowp uint stencil_val = texelFetch(stencil, tex_coord, 0).x; + highp uvec4 components = + uvec4(stencil_val, (uvec3(depth_val) >> uvec3(24u, 16u, 8u)) & 0x000000FFu); + frag_color = vec4(components) / (exp2(8.0) - 1.0); +} +)"; + + program.Create(vs_source.data(), fs_source.data()); + dst_size_loc = glGetUniformLocation(program.handle, "dst_size"); + src_size_loc = glGetUniformLocation(program.handle, "src_size"); + src_offset_loc = glGetUniformLocation(program.handle, "src_offset"); + vao.Create(); + + auto state = OpenGLState::GetCurState(); + auto cur_program = state.draw.shader_program; + state.draw.shader_program = program.handle; + state.Apply(); + glUniform1i(glGetUniformLocation(program.handle, "stencil"), 1); + state.draw.shader_program = cur_program; + state.Apply(); + + // OES_texture_view doesn't seem to support D24S8 views, at least on adreno + // so instead it will do an intermediate copy before running through the shader + if (GLAD_GL_ARB_texture_view) { + texture_view_func = glTextureView; + } else { + LOG_INFO(Render_OpenGL, + "Texture views are unsupported, reinterpretation will do intermediate copy"); + temp_tex.Create(); + } + } + + void Reinterpret(GLuint src_tex, const Common::Rectangle& src_rect, GLuint read_fb_handle, + GLuint dst_tex, const Common::Rectangle& dst_rect, + GLuint draw_fb_handle) override { + OpenGLState prev_state = OpenGLState::GetCurState(); + SCOPE_EXIT({ prev_state.Apply(); }); + + OpenGLState state; + state.texture_units[0].texture_2d = src_tex; + + if (texture_view_func) { + temp_tex.Create(); + glActiveTexture(GL_TEXTURE1); + texture_view_func(temp_tex.handle, GL_TEXTURE_2D, src_tex, GL_DEPTH24_STENCIL8, 0, 1, 0, + 1); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + } else if (src_rect.top > temp_rect.top || src_rect.right > temp_rect.right) { + temp_tex.Release(); + temp_tex.Create(); + state.texture_units[1].texture_2d = temp_tex.handle; + state.Apply(); + glActiveTexture(GL_TEXTURE1); + glTexStorage2D(GL_TEXTURE_2D, 1, GL_DEPTH24_STENCIL8, src_rect.right, src_rect.top); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + temp_rect = src_rect; + } + + state.texture_units[1].texture_2d = temp_tex.handle; + state.draw.draw_framebuffer = draw_fb_handle; + state.draw.shader_program = program.handle; + state.draw.vertex_array = vao.handle; + state.viewport = {static_cast(dst_rect.left), static_cast(dst_rect.bottom), + static_cast(dst_rect.GetWidth()), + static_cast(dst_rect.GetHeight())}; + state.Apply(); + + glActiveTexture(GL_TEXTURE1); + if (!texture_view_func) { + glCopyImageSubData(src_tex, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0, + temp_tex.handle, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0, + src_rect.GetWidth(), src_rect.GetHeight(), 1); + } + glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex, + 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + + glUniform2i(dst_size_loc, dst_rect.GetWidth(), dst_rect.GetHeight()); + glUniform2i(src_size_loc, src_rect.GetWidth(), src_rect.GetHeight()); + glUniform2i(src_offset_loc, src_rect.left, src_rect.bottom); + glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + + if (texture_view_func) { + temp_tex.Release(); + } + } + +private: + decltype(glTextureView) texture_view_func = nullptr; + OGLProgram program{}; + GLint dst_size_loc{-1}, src_size_loc{-1}, src_offset_loc{-1}; + OGLVertexArray vao{}; + OGLTexture temp_tex{}; + Common::Rectangle temp_rect{0, 0, 0, 0}; +}; + +class CopyImageSubData final : public FormatReinterpreterBase { + void Reinterpret(GLuint src_tex, const Common::Rectangle& src_rect, GLuint read_fb_handle, + GLuint dst_tex, const Common::Rectangle& dst_rect, + GLuint draw_fb_handle) override { + glCopyImageSubData(src_tex, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0, dst_tex, + GL_TEXTURE_2D, 0, dst_rect.left, dst_rect.bottom, 0, src_rect.GetWidth(), + src_rect.GetHeight(), 1); + } +}; + FormatReinterpreterOpenGL::FormatReinterpreterOpenGL() { - reinterpreters.emplace(PixelFormatPair{PixelFormat::RGBA8, PixelFormat::D24S8}, - std::make_unique()); + std::string_view vendor{reinterpret_cast(glGetString(GL_VENDOR))}; + if (vendor.find("NVIDIA") != vendor.npos) { + reinterpreters.emplace(PixelFormatPair{PixelFormat::RGBA8, PixelFormat::D24S8}, + std::make_unique()); + // Nvidia bends the spec and allows direct copies between color and depth formats + // might as well take advantage of it + LOG_INFO(Render_OpenGL, "Using glCopyImageSubData for D24S8 to RGBA8 reinterpretation"); + } else if ((GLAD_GL_ARB_stencil_texturing && GLAD_GL_ARB_texture_storage) || GLES) { + reinterpreters.emplace(PixelFormatPair{PixelFormat::RGBA8, PixelFormat::D24S8}, + std::make_unique()); + LOG_INFO(Render_OpenGL, "Using shader for D24S8 to RGBA8 reinterpretation"); + } else { + reinterpreters.emplace(PixelFormatPair{PixelFormat::RGBA8, PixelFormat::D24S8}, + std::make_unique()); + LOG_INFO(Render_OpenGL, "Using pbo for D24S8 to RGBA8 reinterpretation"); + } reinterpreters.emplace(PixelFormatPair{PixelFormat::RGB5A1, PixelFormat::RGBA4}, std::make_unique()); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 4097d0cee..4a331c630 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -52,16 +52,17 @@ RasterizerOpenGL::RasterizerOpenGL() : is_amd(IsVendorAmd()), vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE, is_amd), uniform_buffer(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE, false), index_buffer(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE, false), - texture_buffer(GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE, false) { + texture_buffer(GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE, false), + texture_lf_buffer(GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE, false) { - allow_shadow = GLAD_GL_ARB_shader_image_load_store && GLAD_GL_ARB_shader_image_size && - GLAD_GL_ARB_framebuffer_no_attachments; + allow_shadow = GLES || (GLAD_GL_ARB_shader_image_load_store && GLAD_GL_ARB_shader_image_size && + GLAD_GL_ARB_framebuffer_no_attachments); if (!allow_shadow) { LOG_WARNING(Render_OpenGL, "Shadow might not be able to render because of unsupported OpenGL extensions."); } - if (!GLAD_GL_ARB_copy_image) { + if (!GLAD_GL_ARB_copy_image && !GLES) { LOG_WARNING(Render_OpenGL, "ARB_copy_image not supported. Some games might produce artifacts."); } @@ -149,11 +150,15 @@ RasterizerOpenGL::RasterizerOpenGL() framebuffer.Create(); // Allocate and bind texture buffer lut textures + texture_buffer_lut_lf.Create(); texture_buffer_lut_rg.Create(); texture_buffer_lut_rgba.Create(); + state.texture_buffer_lut_lf.texture_buffer = texture_buffer_lut_lf.handle; state.texture_buffer_lut_rg.texture_buffer = texture_buffer_lut_rg.handle; state.texture_buffer_lut_rgba.texture_buffer = texture_buffer_lut_rgba.handle; state.Apply(); + glActiveTexture(TextureUnits::TextureBufferLUT_LF.Enum()); + glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, texture_lf_buffer.GetHandle()); glActiveTexture(TextureUnits::TextureBufferLUT_RG.Enum()); glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, texture_buffer.GetHandle()); glActiveTexture(TextureUnits::TextureBufferLUT_RGBA.Enum()); @@ -777,7 +782,7 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { } OGLTexture temp_tex; - if (need_duplicate_texture && GLAD_GL_ARB_copy_image) { + if (need_duplicate_texture && (GLAD_GL_ARB_copy_image || GLES)) { // The game is trying to use a surface as a texture and framebuffer at the same time // which causes unpredictable behavior on the host. // Making a copy to sample from eliminates this issue and seems to be fairly cheap. @@ -821,6 +826,7 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { // Sync the LUTs within the texture buffer SyncAndUploadLUTs(); + SyncAndUploadLUTsLF(); // Sync the uniform data UploadUniforms(accelerate); @@ -942,6 +948,10 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { // Blending case PICA_REG_INDEX(framebuffer.output_merger.alphablend_enable): + if (GLES) { + // With GLES, we need this in the fragment shader to emulate logic operations + shader_dirty = true; + } SyncBlendEnabled(); break; case PICA_REG_INDEX(framebuffer.output_merger.alpha_blending): @@ -1062,6 +1072,10 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { // Logic op case PICA_REG_INDEX(framebuffer.output_merger.logic_op): + if (GLES) { + // With GLES, we need this in the fragment shader to emulate logic operations + shader_dirty = true; + } SyncLogicOp(); break; @@ -1816,11 +1830,31 @@ void RasterizerOpenGL::SyncAlphaTest() { } void RasterizerOpenGL::SyncLogicOp() { - state.logic_op = PicaToGL::LogicOp(Pica::g_state.regs.framebuffer.output_merger.logic_op); + const auto& regs = Pica::g_state.regs; + state.logic_op = PicaToGL::LogicOp(regs.framebuffer.output_merger.logic_op); + + if (GLES) { + if (!regs.framebuffer.output_merger.alphablend_enable) { + if (regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp) { + // Color output is disabled by logic operation. We use color write mask to skip + // color but allow depth write. + state.color_mask = {}; + } + } + } } void RasterizerOpenGL::SyncColorWriteMask() { const auto& regs = Pica::g_state.regs; + if (GLES) { + if (!regs.framebuffer.output_merger.alphablend_enable) { + if (regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp) { + // Color output is disabled by logic operation. We use color write mask to skip + // color but allow depth write. Return early to avoid overwriting this. + return; + } + } + } auto IsColorWriteEnabled = [&](u32 value) { return (regs.framebuffer.framebuffer.allow_color_write != 0 && value != 0) ? GL_TRUE @@ -2005,18 +2039,11 @@ void RasterizerOpenGL::SyncShadowTextureBias() { } } -void RasterizerOpenGL::SyncAndUploadLUTs() { - constexpr std::size_t max_size = sizeof(GLvec2) * 256 * Pica::LightingRegs::NumLightingSampler + - sizeof(GLvec2) * 128 + // fog - sizeof(GLvec2) * 128 * 3 + // proctex: noise + color + alpha - sizeof(GLvec4) * 256 + // proctex - sizeof(GLvec4) * 256; // proctex diff +void RasterizerOpenGL::SyncAndUploadLUTsLF() { + constexpr std::size_t max_size = + sizeof(GLvec2) * 256 * Pica::LightingRegs::NumLightingSampler + sizeof(GLvec2) * 128; // fog - if (!uniform_block_data.lighting_lut_dirty_any && !uniform_block_data.fog_lut_dirty && - !uniform_block_data.proctex_noise_lut_dirty && - !uniform_block_data.proctex_color_map_dirty && - !uniform_block_data.proctex_alpha_map_dirty && !uniform_block_data.proctex_lut_dirty && - !uniform_block_data.proctex_diff_lut_dirty) { + if (!uniform_block_data.lighting_lut_dirty_any && !uniform_block_data.fog_lut_dirty) { return; } @@ -2024,8 +2051,8 @@ void RasterizerOpenGL::SyncAndUploadLUTs() { GLintptr offset; bool invalidate; std::size_t bytes_used = 0; - glBindBuffer(GL_TEXTURE_BUFFER, texture_buffer.GetHandle()); - std::tie(buffer, offset, invalidate) = texture_buffer.Map(max_size, sizeof(GLvec4)); + glBindBuffer(GL_TEXTURE_BUFFER, texture_lf_buffer.GetHandle()); + std::tie(buffer, offset, invalidate) = texture_lf_buffer.Map(max_size, sizeof(GLvec4)); // Sync the lighting luts if (uniform_block_data.lighting_lut_dirty_any || invalidate) { @@ -2050,8 +2077,8 @@ void RasterizerOpenGL::SyncAndUploadLUTs() { uniform_block_data.lighting_lut_dirty[index] = false; } } + uniform_block_data.lighting_lut_dirty_any = false; } - uniform_block_data.lighting_lut_dirty_any = false; // Sync the fog lut if (uniform_block_data.fog_lut_dirty || invalidate) { @@ -2073,6 +2100,28 @@ void RasterizerOpenGL::SyncAndUploadLUTs() { uniform_block_data.fog_lut_dirty = false; } + texture_lf_buffer.Unmap(bytes_used); +} + +void RasterizerOpenGL::SyncAndUploadLUTs() { + constexpr std::size_t max_size = sizeof(GLvec2) * 128 * 3 + // proctex: noise + color + alpha + sizeof(GLvec4) * 256 + // proctex + sizeof(GLvec4) * 256; // proctex diff + + if (!uniform_block_data.proctex_noise_lut_dirty && + !uniform_block_data.proctex_color_map_dirty && + !uniform_block_data.proctex_alpha_map_dirty && !uniform_block_data.proctex_lut_dirty && + !uniform_block_data.proctex_diff_lut_dirty) { + return; + } + + u8* buffer; + GLintptr offset; + bool invalidate; + std::size_t bytes_used = 0; + glBindBuffer(GL_TEXTURE_BUFFER, texture_buffer.GetHandle()); + std::tie(buffer, offset, invalidate) = texture_buffer.Map(max_size, sizeof(GLvec4)); + // helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap auto SyncProcTexValueLUT = [this, buffer, offset, invalidate, &bytes_used]( const std::array& lut, diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index a28e9bda1..4748655d5 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -233,6 +233,7 @@ private: /// Syncs and uploads the lighting, fog and proctex LUTs void SyncAndUploadLUTs(); + void SyncAndUploadLUTsLF(); /// Upload the uniform blocks to the uniform buffer object void UploadUniforms(bool accelerate_draw); @@ -303,6 +304,7 @@ private: OGLStreamBuffer uniform_buffer; OGLStreamBuffer index_buffer; OGLStreamBuffer texture_buffer; + OGLStreamBuffer texture_lf_buffer; OGLFramebuffer framebuffer; GLint uniform_buffer_alignment; std::size_t uniform_size_aligned_vs; @@ -310,6 +312,7 @@ private: SamplerInfo texture_cube_sampler; + OGLTexture texture_buffer_lut_lf; OGLTexture texture_buffer_lut_rg; OGLTexture texture_buffer_lut_rgba; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index d7112126d..25e345098 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -36,6 +37,7 @@ #include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_vars.h" +#include "video_core/renderer_opengl/texture_downloader_es.h" #include "video_core/renderer_opengl/texture_filters/texture_filterer.h" #include "video_core/utils.h" #include "video_core/video_core.h" @@ -64,13 +66,6 @@ static constexpr std::array fb_format_tuples_oes = {{ {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4}, // RGBA4 }}; -static constexpr std::array depth_format_tuples = {{ - {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16 - {}, - {GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT}, // D24 - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24S8 -}}; - const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { const SurfaceType type = SurfaceParams::GetFormatType(pixel_format); if (type == SurfaceType::Color) { @@ -87,79 +82,6 @@ const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { return tex_tuple; } -/** - * OpenGL ES does not support glGetTexImage. Obtain the pixels by attaching the - * texture to a framebuffer. - * Originally from https://github.com/apitrace/apitrace/blob/master/retrace/glstate_images.cpp - */ -static void GetTexImageOES(GLenum target, GLint level, GLenum format, GLenum type, GLint height, - GLint width, GLint depth, GLubyte* pixels, std::size_t size) { - memset(pixels, 0x80, size); - - OpenGLState cur_state = OpenGLState::GetCurState(); - OpenGLState state; - - GLenum texture_binding = GL_NONE; - switch (target) { - case GL_TEXTURE_2D: - texture_binding = GL_TEXTURE_BINDING_2D; - break; - case GL_TEXTURE_CUBE_MAP_POSITIVE_X: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: - texture_binding = GL_TEXTURE_BINDING_CUBE_MAP; - break; - case GL_TEXTURE_3D_OES: - texture_binding = GL_TEXTURE_BINDING_3D_OES; - default: - return; - } - - GLint texture = 0; - glGetIntegerv(texture_binding, &texture); - if (!texture) { - return; - } - - OGLFramebuffer fbo; - fbo.Create(); - state.draw.read_framebuffer = fbo.handle; - state.Apply(); - - switch (target) { - case GL_TEXTURE_2D: - case GL_TEXTURE_CUBE_MAP_POSITIVE_X: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texture, - level); - GLenum status = glCheckFramebufferStatus(GL_READ_FRAMEBUFFER); - if (status != GL_FRAMEBUFFER_COMPLETE) { - LOG_DEBUG(Render_OpenGL, "Framebuffer is incomplete, status: {:X}", status); - } - glReadPixels(0, 0, width, height, format, type, pixels); - break; - } - case GL_TEXTURE_3D_OES: - for (int i = 0; i < depth; i++) { - glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_3D, - texture, level, i); - glReadPixels(0, 0, width, height, format, type, pixels + 4 * i * width * height); - } - break; - } - - cur_state.Apply(); - - fbo.Release(); -} - template static constexpr auto RangeFromInterval(Map& map, const Interval& interval) { return boost::make_iterator_range(map.equal_range(interval)); @@ -329,8 +251,14 @@ OGLTexture RasterizerCacheOpenGL::AllocateSurfaceTexture(const FormatTuple& form cur_state.Apply(); glActiveTexture(GL_TEXTURE0); - glTexImage2D(GL_TEXTURE_2D, 0, format_tuple.internal_format, width, height, 0, - format_tuple.format, format_tuple.type, nullptr); + if (GL_ARB_texture_storage) { + // Allocate all possible mipmap levels upfront + auto levels = std::log2(std::max(width, height)) + 1; + glTexStorage2D(GL_TEXTURE_2D, levels, format_tuple.internal_format, width, height); + } else { + glTexImage2D(GL_TEXTURE_2D, 0, format_tuple.internal_format, width, height, 0, + format_tuple.format, format_tuple.type, nullptr); + } glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); @@ -352,17 +280,22 @@ static void AllocateTextureCube(GLuint texture, const FormatTuple& format_tuple, cur_state.texture_cube_unit.texture_cube = texture; cur_state.Apply(); glActiveTexture(TextureUnits::TextureCube.Enum()); - - for (auto faces : { - GL_TEXTURE_CUBE_MAP_POSITIVE_X, - GL_TEXTURE_CUBE_MAP_POSITIVE_Y, - GL_TEXTURE_CUBE_MAP_POSITIVE_Z, - GL_TEXTURE_CUBE_MAP_NEGATIVE_X, - GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, - GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, - }) { - glTexImage2D(faces, 0, format_tuple.internal_format, width, width, 0, format_tuple.format, - format_tuple.type, nullptr); + if (GL_ARB_texture_storage) { + // Allocate all possible mipmap levels in case the game uses them later + auto levels = std::log2(width) + 1; + glTexStorage2D(GL_TEXTURE_CUBE_MAP, levels, format_tuple.internal_format, width, width); + } else { + for (auto faces : { + GL_TEXTURE_CUBE_MAP_POSITIVE_X, + GL_TEXTURE_CUBE_MAP_POSITIVE_Y, + GL_TEXTURE_CUBE_MAP_POSITIVE_Z, + GL_TEXTURE_CUBE_MAP_NEGATIVE_X, + GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, + GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, + }) { + glTexImage2D(faces, 0, format_tuple.internal_format, width, width, 0, + format_tuple.format, format_tuple.type, nullptr); + } } // Restore previous texture bindings @@ -775,23 +708,28 @@ void CachedSurface::DumpTexture(GLuint target_tex, u64 tex_hash) { LOG_INFO(Render_OpenGL, "Dumping texture to {}", dump_path); std::vector decoded_texture; decoded_texture.resize(width * height * 4); - glBindTexture(GL_TEXTURE_2D, target_tex); + OpenGLState state = OpenGLState::GetCurState(); + GLuint old_texture = state.texture_units[0].texture_2d; + state.Apply(); /* GetTexImageOES is used even if not using OpenGL ES to work around a small issue that happens if using custom textures with texture dumping at the same. Let's say there's 2 textures that are both 32x32 and one of them gets replaced with a - higher quality 256x256 texture. If the 256x256 texture is displayed first and the 32x32 - texture gets uploaded to the same underlying OpenGL texture, the 32x32 texture will - appear in the corner of the 256x256 texture. - If texture dumping is enabled and the 32x32 is undumped, Citra will attempt to dump it. - Since the underlying OpenGL texture is still 256x256, Citra crashes because it thinks the - texture is only 32x32. + higher quality 256x256 texture. If the 256x256 texture is displayed first and the + 32x32 texture gets uploaded to the same underlying OpenGL texture, the 32x32 texture + will appear in the corner of the 256x256 texture. If texture dumping is enabled and + the 32x32 is undumped, Citra will attempt to dump it. Since the underlying OpenGL + texture is still 256x256, Citra crashes because it thinks the texture is only 32x32. GetTexImageOES conveniently only dumps the specified region, and works on both desktop and ES. */ - GetTexImageOES(GL_TEXTURE_2D, 0, GL_RGBA, GL_UNSIGNED_BYTE, height, width, 0, - &decoded_texture[0], decoded_texture.size()); - glBindTexture(GL_TEXTURE_2D, 0); + // if the backend isn't OpenGL ES, this won't be initialized yet + if (!owner.texture_downloader_es) + owner.texture_downloader_es = std::make_unique(false); + owner.texture_downloader_es->GetTexImage(GL_TEXTURE_2D, 0, GL_RGBA, GL_UNSIGNED_BYTE, + height, width, &decoded_texture[0]); + state.texture_units[0].texture_2d = old_texture; + state.Apply(); Common::FlipRGBA8Texture(decoded_texture, width, height); if (!image_interface->EncodePNG(dump_path, decoded_texture, width, height)) LOG_ERROR(Render_OpenGL, "Failed to save decoded texture"); @@ -901,8 +839,9 @@ void CachedSurface::UploadGLTexture(Common::Rectangle rect, GLuint read_fb_ MICROPROFILE_DEFINE(OpenGL_TextureDL, "OpenGL", "Texture Download", MP_RGB(128, 192, 64)); void CachedSurface::DownloadGLTexture(const Common::Rectangle& rect, GLuint read_fb_handle, GLuint draw_fb_handle) { - if (type == SurfaceType::Fill) + if (type == SurfaceType::Fill) { return; + } MICROPROFILE_SCOPE(OpenGL_TextureDL); @@ -941,9 +880,9 @@ void CachedSurface::DownloadGLTexture(const Common::Rectangle& rect, GLuint glActiveTexture(GL_TEXTURE0); if (GLES) { - GetTexImageOES(GL_TEXTURE_2D, 0, tuple.format, tuple.type, rect.GetHeight(), - rect.GetWidth(), 0, &gl_buffer[buffer_offset], - gl_buffer.size() - buffer_offset); + owner.texture_downloader_es->GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, + rect.GetHeight(), rect.GetWidth(), + &gl_buffer[buffer_offset]); } else { glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, &gl_buffer[buffer_offset]); } @@ -967,6 +906,20 @@ void CachedSurface::DownloadGLTexture(const Common::Rectangle& rect, GLuint glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, texture.handle, 0); } + switch (glCheckFramebufferStatus(GL_FRAMEBUFFER)) { + case GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT: + LOG_WARNING(Render_OpenGL, "Framebuffer incomplete attachment"); + break; + case GL_FRAMEBUFFER_INCOMPLETE_DIMENSIONS: + LOG_WARNING(Render_OpenGL, "Framebuffer incomplete dimensions"); + break; + case GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT: + LOG_WARNING(Render_OpenGL, "Framebuffer incomplete missing attachment"); + break; + case GL_FRAMEBUFFER_UNSUPPORTED: + LOG_WARNING(Render_OpenGL, "Framebuffer unsupported"); + break; + } glReadPixels(static_cast(rect.left), static_cast(rect.bottom), static_cast(rect.GetWidth()), static_cast(rect.GetHeight()), tuple.format, tuple.type, &gl_buffer[buffer_offset]); @@ -1083,13 +1036,18 @@ RasterizerCacheOpenGL::RasterizerCacheOpenGL() { texture_filterer = std::make_unique(Settings::values.texture_filter_name, resolution_scale_factor); format_reinterpreter = std::make_unique(); + if (GLES) + texture_downloader_es = std::make_unique(false); read_framebuffer.Create(); draw_framebuffer.Create(); } RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { +#ifndef ANDROID + // This is for switching renderers, which is unsupported on Android, and costly on shutdown ClearAll(false); +#endif } MICROPROFILE_DEFINE(OpenGL_BlitSurface, "OpenGL", "BlitSurface", MP_RGB(128, 192, 64)); @@ -1304,9 +1262,14 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Pica::Texture::TextureInf width = surface->GetScaledWidth(); height = surface->GetScaledHeight(); } - for (u32 level = surface->max_level + 1; level <= max_level; ++level) { - glTexImage2D(GL_TEXTURE_2D, level, format_tuple.internal_format, width >> level, - height >> level, 0, format_tuple.format, format_tuple.type, nullptr); + // If we are using ARB_texture_storage then we've already allocated all of the mipmap + // levels + if (!GL_ARB_texture_storage) { + for (u32 level = surface->max_level + 1; level <= max_level; ++level) { + glTexImage2D(GL_TEXTURE_2D, level, format_tuple.internal_format, width >> level, + height >> level, 0, format_tuple.format, format_tuple.type, + nullptr); + } } if (surface->is_custom || !texture_filterer->IsNull()) { // TODO: proper mipmap support for custom textures @@ -1806,6 +1769,8 @@ void RasterizerCacheOpenGL::ClearAll(bool flush) { } void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, Surface flush_surface) { + std::lock_guard lock{mutex}; + if (size == 0) return; @@ -1842,6 +1807,8 @@ void RasterizerCacheOpenGL::FlushAll() { } void RasterizerCacheOpenGL::InvalidateRegion(PAddr addr, u32 size, const Surface& region_owner) { + std::lock_guard lock{mutex}; + if (size == 0) return; @@ -1917,6 +1884,8 @@ Surface RasterizerCacheOpenGL::CreateSurface(const SurfaceParams& params) { } void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) { + std::lock_guard lock{mutex}; + if (surface->registered) { return; } @@ -1926,6 +1895,8 @@ void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) { } void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) { + std::lock_guard lock{mutex}; + if (!surface->registered) { return; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 108aa7bad..da795a968 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #ifdef __GNUC__ @@ -170,6 +171,8 @@ private: bool valid = false; }; +class RasterizerCacheOpenGL; + struct CachedSurface : SurfaceParams, std::enable_shared_from_this { CachedSurface(RasterizerCacheOpenGL& owner) : owner{owner} {} ~CachedSurface(); @@ -266,6 +269,15 @@ struct CachedTextureCube { std::shared_ptr nz; }; +static constexpr std::array depth_format_tuples = {{ + {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16 + {}, + {GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT}, // D24 + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24S8 +}}; + +class TextureDownloaderES; + class RasterizerCacheOpenGL : NonCopyable { public: RasterizerCacheOpenGL(); @@ -365,11 +377,14 @@ private: std::unordered_map texture_cube_cache; + std::recursive_mutex mutex; + public: OGLTexture AllocateSurfaceTexture(const FormatTuple& format_tuple, u32 width, u32 height); std::unique_ptr texture_filterer; std::unique_ptr format_reinterpreter; + std::unique_ptr texture_downloader_es; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index c4a429ccb..f1516c4e3 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -514,11 +514,21 @@ private: } case OpCode::Id::RCP: { + if (!sanitize_mul) { + // When accurate multiplication is OFF, NaN are not really handled. This is a + // workaround to cheaply avoid NaN. Fixes graphical issues in Ocarina of Time. + shader.AddLine("if ({}.x != 0.0)", src1); + } SetDest(swizzle, dest_reg, fmt::format("(1.0 / {}.x)", src1), 4, 1); break; } case OpCode::Id::RSQ: { + if (!sanitize_mul) { + // When accurate multiplication is OFF, NaN are not really handled. This is a + // workaround to cheaply avoid NaN. Fixes graphical issues in Ocarina of Time. + shader.AddLine("if ({}.x > 0.0)", src1); + } SetDest(swizzle, dest_reg, fmt::format("inversesqrt({}.x)", src1), 4, 1); break; } @@ -807,6 +817,13 @@ private: void Generate() { if (sanitize_mul) { +#ifdef ANDROID + // Use a cheaper sanitize_mul on Android, as mobile GPUs struggle here + // This seems to be sufficient at least for Ocarina of Time and Attack on Titan accurate + // multiplication bugs + shader.AddLine( + "#define sanitize_mul(lhs, rhs) mix(lhs * rhs, vec4(0.0), isnan(lhs * rhs))"); +#else shader.AddLine("vec4 sanitize_mul(vec4 lhs, vec4 rhs) {{"); ++shader.scope; shader.AddLine("vec4 product = lhs * rhs;"); @@ -814,6 +831,7 @@ private: "isnan(lhs)), isnan(product));"); --shader.scope; shader.AddLine("}}\n"); +#endif } // Add declarations for registers diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index ee4cea9f2..8e1110b02 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -102,7 +102,9 @@ static std::string GetVertexInterfaceDeclaration(bool is_output, bool separable_ out += R"( out gl_PerVertex { vec4 gl_Position; +#if !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance) float gl_ClipDistance[2]; +#endif // !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance) }; )"; } @@ -127,6 +129,17 @@ PicaFSConfig PicaFSConfig::BuildFromRegs(const Pica::Regs& regs) { state.texture2_use_coord1 = regs.texturing.main_config.texture2_use_coord1 != 0; + if (GLES) { + // With GLES, we need this in the fragment shader to emulate logic operations + state.alphablend_enable = + Pica::g_state.regs.framebuffer.output_merger.alphablend_enable == 1; + state.logic_op = regs.framebuffer.output_merger.logic_op; + } else { + // We don't need these otherwise, reset them to avoid unnecessary shader generation + state.alphablend_enable = {}; + state.logic_op = {}; + } + // Copy relevant tev stages fields. // We don't sync const_color here because of the high variance, it is a // shader uniform instead. @@ -607,13 +620,15 @@ static void WriteTevStage(std::string& out, const PicaFSConfig& config, unsigned if (!IsPassThroughTevStage(stage)) { const std::string index_name = std::to_string(index); - out += fmt::format("vec3 color_results_{}[3] = vec3[3](", index_name); + out += fmt::format("vec3 color_results_{}_1 = ", index_name); AppendColorModifier(out, config, stage.color_modifier1, stage.color_source1, index_name); - out += ", "; + out += fmt::format(";\nvec3 color_results_{}_2 = ", index_name); AppendColorModifier(out, config, stage.color_modifier2, stage.color_source2, index_name); - out += ", "; + out += fmt::format(";\nvec3 color_results_{}_3 = ", index_name); AppendColorModifier(out, config, stage.color_modifier3, stage.color_source3, index_name); - out += ");\n"; + out += fmt::format(";\nvec3 color_results_{}[3] = vec3[3](color_results_{}_1, " + "color_results_{}_2, color_results_{}_3);\n", + index_name, index_name, index_name, index_name); // Round the output of each TEV stage to maintain the PICA's 8 bits of precision out += fmt::format("vec3 color_output_{} = byteround(", index_name); @@ -1216,14 +1231,21 @@ float ProcTexNoiseCoef(vec2 x) { ShaderDecompiler::ProgramResult GenerateFragmentShader(const PicaFSConfig& config, bool separable_shader) { const auto& state = config.state; + std::string out; - std::string out = R"( + if (GLES) { + out += R"( +#define ALLOW_SHADOW (defined(CITRA_GLES)) +)"; + } else { + out += R"( #extension GL_ARB_shader_image_load_store : enable #extension GL_ARB_shader_image_size : enable #define ALLOW_SHADOW (defined(GL_ARB_shader_image_load_store) && defined(GL_ARB_shader_image_size)) )"; + } - if (separable_shader) { + if (separable_shader && !GLES) { out += "#extension GL_ARB_separate_shader_objects : enable\n"; } @@ -1244,6 +1266,7 @@ uniform sampler2D tex0; uniform sampler2D tex1; uniform sampler2D tex2; uniform samplerCube tex_cube; +uniform samplerBuffer texture_buffer_lut_lf; uniform samplerBuffer texture_buffer_lut_rg; uniform samplerBuffer texture_buffer_lut_rgba; @@ -1267,7 +1290,7 @@ vec3 quaternion_rotate(vec4 q, vec3 v) { } float LookupLightingLUT(int lut_index, int index, float delta) { - vec2 entry = texelFetch(texture_buffer_lut_rg, lighting_lut_offset[lut_index >> 2][lut_index & 3] + index).rg; + vec2 entry = texelFetch(texture_buffer_lut_lf, lighting_lut_offset[lut_index >> 2][lut_index & 3] + index).rg; return entry.r + entry.g * delta; } @@ -1519,7 +1542,7 @@ vec4 secondary_fragment_color = vec4(0.0); // Generate clamped fog factor from LUT for given fog index out += "float fog_i = clamp(floor(fog_index), 0.0, 127.0);\n" "float fog_f = fog_index - fog_i;\n" - "vec2 fog_lut_entry = texelFetch(texture_buffer_lut_rg, int(fog_i) + " + "vec2 fog_lut_entry = texelFetch(texture_buffer_lut_lf, int(fog_i) + " "fog_lut_offset).rg;\n" "float fog_factor = fog_lut_entry.r + fog_lut_entry.g * fog_f;\n" "fog_factor = clamp(fog_factor, 0.0, 1.0);\n"; @@ -1537,8 +1560,8 @@ vec4 secondary_fragment_color = vec4(0.0); if (state.shadow_rendering) { out += R"( #if ALLOW_SHADOW -uint d = uint(clamp(depth, 0.0, 1.0) * 0xFFFFFF); -uint s = uint(last_tex_env_out.g * 0xFF); +uint d = uint(clamp(depth, 0.0, 1.0) * float(0xFFFFFF)); +uint s = uint(last_tex_env_out.g * float(0xFF)); ivec2 image_coord = ivec2(gl_FragCoord.xy); uint old = imageLoad(shadow_buffer, image_coord).x; @@ -1567,6 +1590,32 @@ do { out += "color = byteround(last_tex_env_out);\n"; } + if (GLES) { + if (!state.alphablend_enable) { + switch (state.logic_op) { + case FramebufferRegs::LogicOp::Clear: + out += "color = vec4(0);\n"; + break; + case FramebufferRegs::LogicOp::Set: + out += "color = vec4(1);\n"; + break; + case FramebufferRegs::LogicOp::Copy: + // Take the color output as-is + break; + case FramebufferRegs::LogicOp::CopyInverted: + out += "color = ~color;\n"; + break; + case FramebufferRegs::LogicOp::NoOp: + // We need to discard the color, but not necessarily the depth. This is not possible + // with fragment shader alone, so we emulate this behavior on GLES with glColorMask. + break; + default: + LOG_CRITICAL(HW_GPU, "Unhandled logic_op {:x}", static_cast(state.logic_op)); + UNIMPLEMENTED(); + } + } + } + out += '}'; return {std::move(out)}; @@ -1574,7 +1623,7 @@ do { ShaderDecompiler::ProgramResult GenerateTrivialVertexShader(bool separable_shader) { std::string out; - if (separable_shader) { + if (separable_shader && !GLES) { out += "#extension GL_ARB_separate_shader_objects : enable\n"; } @@ -1617,8 +1666,8 @@ void main() { std::optional GenerateVertexShader( const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config, bool separable_shader) { - std::string out = ""; - if (separable_shader) { + std::string out; + if (separable_shader && !GLES) { out += "#extension GL_ARB_separate_shader_objects : enable\n"; } @@ -1767,8 +1816,8 @@ void EmitPrim(Vertex vtx0, Vertex vtx1, Vertex vtx2) { ShaderDecompiler::ProgramResult GenerateFixedGeometryShader(const PicaFixedGSConfig& config, bool separable_shader) { - std::string out = ""; - if (separable_shader) { + std::string out; + if (separable_shader && !GLES) { out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; } diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index 3b11fa88c..eb0e4cc23 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -61,6 +61,8 @@ struct PicaFSConfigState { Pica::RasterizerRegs::DepthBuffering depthmap_enable; Pica::TexturingRegs::FogMode fog_mode; bool fog_flip; + bool alphablend_enable; + Pica::FramebufferRegs::LogicOp logic_op; struct { struct { diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 1452c84d7..a5664b1ff 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -123,6 +123,7 @@ static void SetShaderSamplerBindings(GLuint shader) { SetShaderSamplerBinding(shader, "tex_cube", TextureUnits::TextureCube); // Set the texture samplers to correspond to different lookup table texture units + SetShaderSamplerBinding(shader, "texture_buffer_lut_lf", TextureUnits::TextureBufferLUT_LF); SetShaderSamplerBinding(shader, "texture_buffer_lut_rg", TextureUnits::TextureBufferLUT_RG); SetShaderSamplerBinding(shader, "texture_buffer_lut_rgba", TextureUnits::TextureBufferLUT_RGBA); @@ -176,7 +177,10 @@ public: OGLProgram& program = boost::get(shader_or_program); program.Create(true, {shader.handle}); SetShaderUniformBlockBindings(program.handle); - SetShaderSamplerBindings(program.handle); + + if (type == GL_FRAGMENT_SHADER) { + SetShaderSamplerBindings(program.handle); + } } } diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index 9ed5e8cc7..a51ad443b 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp @@ -14,7 +14,7 @@ namespace OpenGL { GLuint LoadShader(const char* source, GLenum type) { - const std::string version = GLES ? R"(#version 310 es + const std::string version = GLES ? R"(#version 320 es #define CITRA_GLES diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h index 1871403f9..fef6139d3 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.h +++ b/src/video_core/renderer_opengl/gl_shader_util.h @@ -12,11 +12,15 @@ namespace OpenGL { // High precision may or may not supported in GLES3. If it isn't, use medium precision instead. static constexpr char fragment_shader_precision_OES[] = R"( #ifdef GL_FRAGMENT_PRECISION_HIGH - precision highp float; +precision highp int; +precision highp float; precision highp samplerBuffer; +precision highp uimage2D; #else - precision mediump float; +precision mediump int; +precision mediump float; precision mediump samplerBuffer; +precision mediump uimage2D; #endif // GL_FRAGMENT_PRECISION_HIGH )"; diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 95ab8525a..89944f80e 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -58,6 +58,7 @@ OpenGLState::OpenGLState() { texture_cube_unit.texture_cube = 0; texture_cube_unit.sampler = 0; + texture_buffer_lut_lf.texture_buffer = 0; texture_buffer_lut_rg.texture_buffer = 0; texture_buffer_lut_rgba.texture_buffer = 0; @@ -169,10 +170,17 @@ void OpenGLState::Apply() const { if (blend.enabled != cur_state.blend.enabled) { if (blend.enabled) { glEnable(GL_BLEND); - glDisable(GL_COLOR_LOGIC_OP); } else { glDisable(GL_BLEND); - glEnable(GL_COLOR_LOGIC_OP); + } + + // GLES does not support glLogicOp + if (!GLES) { + if (blend.enabled) { + glDisable(GL_COLOR_LOGIC_OP); + } else { + glEnable(GL_COLOR_LOGIC_OP); + } } } @@ -196,13 +204,11 @@ void OpenGLState::Apply() const { glBlendEquationSeparate(blend.rgb_equation, blend.a_equation); } - // GLES3 does not support glLogicOp + // GLES does not support glLogicOp if (!GLES) { if (logic_op != cur_state.logic_op) { glLogicOp(logic_op); } - } else { - LOG_TRACE(Render_OpenGL, "glLogicOps are unimplemented..."); } // Textures @@ -224,6 +230,12 @@ void OpenGLState::Apply() const { glBindSampler(TextureUnits::TextureCube.id, texture_cube_unit.sampler); } + // Texture buffer LUTs + if (texture_buffer_lut_lf.texture_buffer != cur_state.texture_buffer_lut_lf.texture_buffer) { + glActiveTexture(TextureUnits::TextureBufferLUT_LF.Enum()); + glBindTexture(GL_TEXTURE_BUFFER, texture_buffer_lut_lf.texture_buffer); + } + // Texture buffer LUTs if (texture_buffer_lut_rg.texture_buffer != cur_state.texture_buffer_lut_rg.texture_buffer) { glActiveTexture(TextureUnits::TextureBufferLUT_RG.Enum()); @@ -354,6 +366,8 @@ OpenGLState& OpenGLState::ResetTexture(GLuint handle) { } if (texture_cube_unit.texture_cube == handle) texture_cube_unit.texture_cube = 0; + if (texture_buffer_lut_lf.texture_buffer == handle) + texture_buffer_lut_lf.texture_buffer = 0; if (texture_buffer_lut_rg.texture_buffer == handle) texture_buffer_lut_rg.texture_buffer = 0; if (texture_buffer_lut_rgba.texture_buffer == handle) diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 3fa585b04..e3b85a297 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -22,7 +22,8 @@ constexpr TextureUnit PicaTexture(int unit) { return TextureUnit{unit}; } -constexpr TextureUnit TextureCube{3}; +constexpr TextureUnit TextureCube{6}; +constexpr TextureUnit TextureBufferLUT_LF{3}; constexpr TextureUnit TextureBufferLUT_RG{4}; constexpr TextureUnit TextureBufferLUT_RGBA{5}; @@ -101,6 +102,10 @@ public: GLuint sampler; // GL_SAMPLER_BINDING } texture_cube_unit; + struct { + GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER + } texture_buffer_lut_lf; + struct { GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER } texture_buffer_lut_rg; diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 113c9aa0a..0d048e8c5 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -29,6 +29,7 @@ #include "core/tracer/recorder.h" #include "video_core/debug_utils/debug_utils.h" #include "video_core/rasterizer_interface.h" +#include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_vars.h" #include "video_core/renderer_opengl/post_processing_opengl.h" #include "video_core/renderer_opengl/renderer_opengl.h" @@ -39,7 +40,12 @@ namespace OpenGL { // If the size of this is too small, it ends up creating a soft cap on FPS as the renderer will have // to wait on available presentation frames. There doesn't seem to be much of a downside to a larger // number but 9 swap textures at 60FPS presentation allows for 800% speed so thats probably fine +#ifdef ANDROID +// Reduce the size of swap_chain, since the UI only allows upto 200% speed. +constexpr std::size_t SWAP_CHAIN_SIZE = 6; +#else constexpr std::size_t SWAP_CHAIN_SIZE = 9; +#endif class OGLTextureMailboxException : public std::runtime_error { public: @@ -96,7 +102,7 @@ public: frame->color.Create(); state.renderbuffer = frame->color.handle; state.Apply(); - glRenderbufferStorage(GL_RENDERBUFFER, GL_RGBA, width, height); + glRenderbufferStorage(GL_RENDERBUFFER, GL_RGBA8, width, height); // Recreate the FBO for the render target frame->render.Release(); @@ -1197,14 +1203,18 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum /// Initialize the renderer VideoCore::ResultStatus RendererOpenGL::Init() { +#ifndef ANDROID if (!gladLoadGL()) { return VideoCore::ResultStatus::ErrorBelowGL33; } + // Qualcomm has some spammy info messages that are marked as errors but not important + // https://developer.qualcomm.com/comment/11845 if (GLAD_GL_KHR_debug) { glEnable(GL_DEBUG_OUTPUT); glDebugMessageCallback(DebugHandler, nullptr); } +#endif const char* gl_version{reinterpret_cast(glGetString(GL_VERSION))}; const char* gpu_vendor{reinterpret_cast(glGetString(GL_VENDOR))}; diff --git a/src/video_core/renderer_opengl/texture_downloader_es.cpp b/src/video_core/renderer_opengl/texture_downloader_es.cpp new file mode 100644 index 000000000..11663512e --- /dev/null +++ b/src/video_core/renderer_opengl/texture_downloader_es.cpp @@ -0,0 +1,254 @@ +// Copyright 2020 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include + +#include "common/logging/log.h" +#include "video_core/renderer_opengl/gl_rasterizer_cache.h" +#include "video_core/renderer_opengl/gl_state.h" +#include "video_core/renderer_opengl/gl_vars.h" +#include "video_core/renderer_opengl/texture_downloader_es.h" + +#include "shaders/depth_to_color.frag" +#include "shaders/depth_to_color.vert" +#include "shaders/ds_to_color.frag" + +namespace OpenGL { + +/** + * Self tests for the texture downloader + */ +void TextureDownloaderES::Test() { + auto cur_state = OpenGLState::GetCurState(); + OpenGLState state; + + { + GLint range[2]; + GLint precision; +#define PRECISION_TEST(type) \ + glGetShaderPrecisionFormat(GL_FRAGMENT_SHADER, type, range, &precision); \ + LOG_INFO(Render_OpenGL, #type " range: [{}, {}], precision: {}", range[0], range[1], precision); + PRECISION_TEST(GL_LOW_INT); + PRECISION_TEST(GL_MEDIUM_INT); + PRECISION_TEST(GL_HIGH_INT); + PRECISION_TEST(GL_LOW_FLOAT); + PRECISION_TEST(GL_MEDIUM_FLOAT); + PRECISION_TEST(GL_HIGH_FLOAT); +#undef PRECISION_TEST + } + glActiveTexture(GL_TEXTURE0); + + const auto test = [this, &state](FormatTuple tuple, auto original_data, std::size_t tex_size, + auto data_generator) { + OGLTexture texture; + texture.Create(); + state.texture_units[0].texture_2d = texture.handle; + state.Apply(); + + original_data.resize(tex_size * tex_size); + for (std::size_t idx = 0; idx < original_data.size(); ++idx) + original_data[idx] = data_generator(idx); + glTexStorage2D(GL_TEXTURE_2D, 1, tuple.internal_format, tex_size, tex_size); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, tex_size, tex_size, tuple.format, tuple.type, + original_data.data()); + + decltype(original_data) new_data(original_data.size()); + glFinish(); + auto start = std::chrono::high_resolution_clock::now(); + GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, tex_size, tex_size, + new_data.data()); + glFinish(); + auto time = std::chrono::high_resolution_clock::now() - start; + LOG_INFO(Render_OpenGL, "test took {}", std::chrono::duration(time)); + + int diff = 0; + for (std::size_t idx = 0; idx < original_data.size(); ++idx) + if (new_data[idx] - original_data[idx] != diff) { + diff = new_data[idx] - original_data[idx]; + // every time the error between the real and expected value changes, log it + // some error is expected in D24 due to floating point precision + LOG_WARNING(Render_OpenGL, "difference changed at {:#X}: {:#X} -> {:#X}", idx, + original_data[idx], new_data[idx]); + } + }; + LOG_INFO(Render_OpenGL, "GL_DEPTH24_STENCIL8 download test starting"); + test(depth_format_tuples[3], std::vector{}, 4096, + [](std::size_t idx) { return static_cast((idx << 8) | (idx & 0xFF)); }); + LOG_INFO(Render_OpenGL, "GL_DEPTH_COMPONENT24 download test starting"); + test(depth_format_tuples[2], std::vector{}, 4096, + [](std::size_t idx) { return static_cast(idx << 8); }); + LOG_INFO(Render_OpenGL, "GL_DEPTH_COMPONENT16 download test starting"); + test(depth_format_tuples[0], std::vector{}, 256, + [](std::size_t idx) { return static_cast(idx); }); + + cur_state.Apply(); +} + +TextureDownloaderES::TextureDownloaderES(bool enable_depth_stencil) { + vao.Create(); + read_fbo_generic.Create(); + + depth32_fbo.Create(); + r32ui_renderbuffer.Create(); + depth16_fbo.Create(); + r16_renderbuffer.Create(); + + const auto init_program = [](ConversionShader& converter, std::string_view frag) { + converter.program.Create(depth_to_color_vert.data(), frag.data()); + converter.lod_location = glGetUniformLocation(converter.program.handle, "lod"); + }; + + // xperia64: The depth stencil shader currently uses a GLES extension that is not supported + // across all devices Reportedly broken on Tegra devices and the Nexus 6P, so enabling it can be + // toggled + if (enable_depth_stencil) { + init_program(d24s8_r32ui_conversion_shader, ds_to_color_frag); + } + + init_program(d24_r32ui_conversion_shader, depth_to_color_frag); + init_program(d16_r16_conversion_shader, R"( +out highp float color; + +uniform highp sampler2D depth; +uniform int lod; + +void main(){ + color = texelFetch(depth, ivec2(gl_FragCoord.xy), lod).x; +} +)"); + + sampler.Create(); + glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glSamplerParameteri(sampler.handle, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + + auto cur_state = OpenGLState::GetCurState(); + auto state = cur_state; + + state.draw.shader_program = d24s8_r32ui_conversion_shader.program.handle; + state.draw.draw_framebuffer = depth32_fbo.handle; + state.renderbuffer = r32ui_renderbuffer.handle; + state.Apply(); + glRenderbufferStorage(GL_RENDERBUFFER, GL_R32UI, max_size, max_size); + glFramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, + r32ui_renderbuffer.handle); + glUniform1i(glGetUniformLocation(d24s8_r32ui_conversion_shader.program.handle, "depth"), 1); + + state.draw.draw_framebuffer = depth16_fbo.handle; + state.renderbuffer = r16_renderbuffer.handle; + state.Apply(); + glRenderbufferStorage(GL_RENDERBUFFER, GL_R16, max_size, max_size); + glFramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, + r16_renderbuffer.handle); + + cur_state.Apply(); +} + +/** + * OpenGL ES does not support glReadBuffer for depth/stencil formats + * This gets around it by converting to a Red surface before downloading + */ +GLuint TextureDownloaderES::ConvertDepthToColor(GLuint level, GLenum& format, GLenum& type, + GLint height, GLint width) { + ASSERT(width <= max_size && height <= max_size); + const OpenGLState cur_state = OpenGLState::GetCurState(); + OpenGLState state; + state.texture_units[0] = {cur_state.texture_units[0].texture_2d, sampler.handle}; + state.draw.vertex_array = vao.handle; + + OGLTexture texture_view; + const ConversionShader* converter; + switch (type) { + case GL_UNSIGNED_SHORT: + state.draw.draw_framebuffer = depth16_fbo.handle; + converter = &d16_r16_conversion_shader; + format = GL_RED; + break; + case GL_UNSIGNED_INT: + state.draw.draw_framebuffer = depth32_fbo.handle; + converter = &d24_r32ui_conversion_shader; + format = GL_RED_INTEGER; + break; + case GL_UNSIGNED_INT_24_8: + state.draw.draw_framebuffer = depth32_fbo.handle; + converter = &d24s8_r32ui_conversion_shader; + format = GL_RED_INTEGER; + type = GL_UNSIGNED_INT; + break; + default: + UNREACHABLE_MSG("Destination type not recognized"); + } + state.draw.shader_program = converter->program.handle; + state.viewport = {0, 0, width, height}; + state.Apply(); + if (converter->program.handle == d24s8_r32ui_conversion_shader.program.handle) { + // TODO BreadFish64: the ARM framebuffer reading extension is probably not the most optimal + // way to do this, search for another solution + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + state.texture_units[0].texture_2d, level); + } + + glUniform1i(converter->lod_location, level); + glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + if (texture_view.handle) { + glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_DEPTH_COMPONENT); + } + return state.draw.draw_framebuffer; +} + +/** + * OpenGL ES does not support glGetTexImage. Obtain the pixels by attaching the + * texture to a framebuffer. + * Originally from https://github.com/apitrace/apitrace/blob/master/retrace/glstate_images.cpp + * Depth texture download assumes that the texture's format tuple matches what is found + * OpenGL::depth_format_tuples + */ +void TextureDownloaderES::GetTexImage(GLenum target, GLuint level, GLenum format, GLenum type, + GLint height, GLint width, void* pixels) { + OpenGLState state = OpenGLState::GetCurState(); + GLuint texture; + const GLuint old_read_buffer = state.draw.read_framebuffer; + switch (target) { + case GL_TEXTURE_2D: + texture = state.texture_units[0].texture_2d; + break; + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: + texture = state.texture_cube_unit.texture_cube; + break; + default: + UNIMPLEMENTED_MSG("Unexpected target {:x}", target); + } + + switch (format) { + case GL_DEPTH_COMPONENT: + case GL_DEPTH_STENCIL: + // unfortunately, the accurate way is too slow for release + return; + state.draw.read_framebuffer = ConvertDepthToColor(level, format, type, height, width); + state.Apply(); + break; + default: + state.draw.read_framebuffer = read_fbo_generic.handle; + state.Apply(); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texture, + level); + } + GLenum status = glCheckFramebufferStatus(GL_READ_FRAMEBUFFER); + if (status != GL_FRAMEBUFFER_COMPLETE) { + LOG_DEBUG(Render_OpenGL, "Framebuffer is incomplete, status: {:X}", status); + } + glReadPixels(0, 0, width, height, format, type, pixels); + + state.draw.read_framebuffer = old_read_buffer; + state.Apply(); +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/texture_downloader_es.h b/src/video_core/renderer_opengl/texture_downloader_es.h new file mode 100644 index 000000000..66c27dde1 --- /dev/null +++ b/src/video_core/renderer_opengl/texture_downloader_es.h @@ -0,0 +1,36 @@ +// Copyright 2020 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" + +namespace OpenGL { +class OpenGLState; + +class TextureDownloaderES { + static constexpr u16 max_size = 1024; + + OGLVertexArray vao; + OGLFramebuffer read_fbo_generic; + OGLFramebuffer depth32_fbo, depth16_fbo; + OGLRenderbuffer r32ui_renderbuffer, r16_renderbuffer; + struct ConversionShader { + OGLProgram program; + GLint lod_location{-1}; + } d24_r32ui_conversion_shader, d16_r16_conversion_shader, d24s8_r32ui_conversion_shader; + OGLSampler sampler; + + void Test(); + GLuint ConvertDepthToColor(GLuint level, GLenum& format, GLenum& type, GLint height, + GLint width); + +public: + TextureDownloaderES(bool enable_depth_stencil); + + void GetTexImage(GLenum target, GLuint level, GLenum format, const GLenum type, GLint height, + GLint width, void* pixels); +}; +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/texture_filters/anime4k/anime4k_ultrafast.cpp b/src/video_core/renderer_opengl/texture_filters/anime4k/anime4k_ultrafast.cpp index b70cc14f4..69fda08a9 100644 --- a/src/video_core/renderer_opengl/texture_filters/anime4k/anime4k_ultrafast.cpp +++ b/src/video_core/renderer_opengl/texture_filters/anime4k/anime4k_ultrafast.cpp @@ -34,30 +34,14 @@ #include "video_core/renderer_opengl/texture_filters/anime4k/anime4k_ultrafast.h" #include "shaders/refine.frag" -#include "shaders/refine.vert" #include "shaders/tex_coord.vert" #include "shaders/x_gradient.frag" #include "shaders/y_gradient.frag" -#include "shaders/y_gradient.vert" namespace OpenGL { Anime4kUltrafast::Anime4kUltrafast(u16 scale_factor) : TextureFilterBase(scale_factor) { const OpenGLState cur_state = OpenGLState::GetCurState(); - const auto setup_temp_tex = [this](TempTex& texture, GLint internal_format, GLint format) { - texture.fbo.Create(); - texture.tex.Create(); - state.draw.draw_framebuffer = texture.fbo.handle; - state.Apply(); - glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_RECTANGLE, texture.tex.handle); - glTexImage2D(GL_TEXTURE_RECTANGLE, 0, internal_format, 1024 * internal_scale_factor, - 1024 * internal_scale_factor, 0, format, GL_HALF_FLOAT, nullptr); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_RECTANGLE, - texture.tex.handle, 0); - }; - setup_temp_tex(LUMAD, GL_R16F, GL_RED); - setup_temp_tex(XY, GL_RG16F, GL_RG); vao.Create(); @@ -65,17 +49,17 @@ Anime4kUltrafast::Anime4kUltrafast(u16 scale_factor) : TextureFilterBase(scale_f samplers[idx].Create(); state.texture_units[idx].sampler = samplers[idx].handle; glSamplerParameteri(samplers[idx].handle, GL_TEXTURE_MIN_FILTER, - idx == 0 ? GL_LINEAR : GL_NEAREST); + idx != 2 ? GL_LINEAR : GL_NEAREST); glSamplerParameteri(samplers[idx].handle, GL_TEXTURE_MAG_FILTER, - idx == 0 ? GL_LINEAR : GL_NEAREST); + idx != 2 ? GL_LINEAR : GL_NEAREST); glSamplerParameteri(samplers[idx].handle, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glSamplerParameteri(samplers[idx].handle, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); } state.draw.vertex_array = vao.handle; gradient_x_program.Create(tex_coord_vert.data(), x_gradient_frag.data()); - gradient_y_program.Create(y_gradient_vert.data(), y_gradient_frag.data()); - refine_program.Create(refine_vert.data(), refine_frag.data()); + gradient_y_program.Create(tex_coord_vert.data(), y_gradient_frag.data()); + refine_program.Create(tex_coord_vert.data(), refine_frag.data()); state.draw.shader_program = gradient_y_program.handle; state.Apply(); @@ -84,8 +68,6 @@ Anime4kUltrafast::Anime4kUltrafast(u16 scale_factor) : TextureFilterBase(scale_f state.draw.shader_program = refine_program.handle; state.Apply(); glUniform1i(glGetUniformLocation(refine_program.handle, "LUMAD"), 1); - glUniform1f(glGetUniformLocation(refine_program.handle, "final_scale"), - static_cast(internal_scale_factor) / scale_factor); cur_state.Apply(); } @@ -95,20 +77,48 @@ void Anime4kUltrafast::Filter(GLuint src_tex, const Common::Rectangle& src_ GLuint read_fb_handle, GLuint draw_fb_handle) { const OpenGLState cur_state = OpenGLState::GetCurState(); + // These will have handles from the previous texture that was filtered, reset them to avoid + // binding invalid textures. + state.texture_units[0].texture_2d = 0; + state.texture_units[1].texture_2d = 0; + state.texture_units[2].texture_2d = 0; + + const auto setup_temp_tex = [this, &src_rect](GLint internal_format, GLint format) { + TempTex texture; + texture.fbo.Create(); + texture.tex.Create(); + state.texture_units[0].texture_2d = texture.tex.handle; + state.draw.draw_framebuffer = texture.fbo.handle; + state.Apply(); + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, texture.tex.handle); + if (GL_ARB_texture_storage) { + glTexStorage2D(GL_TEXTURE_2D, 1, internal_format, + src_rect.GetWidth() * internal_scale_factor, + src_rect.GetHeight() * internal_scale_factor); + } else { + glTexImage2D( + GL_TEXTURE_2D, 0, internal_format, src_rect.GetWidth() * internal_scale_factor, + src_rect.GetHeight() * internal_scale_factor, 0, format, GL_HALF_FLOAT, nullptr); + } + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + texture.tex.handle, 0); + return texture; + }; + auto XY = setup_temp_tex(GL_RG16F, GL_RG); + auto LUMAD = setup_temp_tex(GL_R16F, GL_RED); + state.viewport = {static_cast(src_rect.left * internal_scale_factor), static_cast(src_rect.bottom * internal_scale_factor), static_cast(src_rect.GetWidth() * internal_scale_factor), static_cast(src_rect.GetHeight() * internal_scale_factor)}; state.texture_units[0].texture_2d = src_tex; + state.texture_units[1].texture_2d = LUMAD.tex.handle; + state.texture_units[2].texture_2d = XY.tex.handle; state.draw.draw_framebuffer = XY.fbo.handle; state.draw.shader_program = gradient_x_program.handle; state.Apply(); - glActiveTexture(GL_TEXTURE1); - glBindTexture(GL_TEXTURE_RECTANGLE, LUMAD.tex.handle); - glActiveTexture(GL_TEXTURE2); - glBindTexture(GL_TEXTURE_RECTANGLE, XY.tex.handle); - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); // gradient y pass diff --git a/src/video_core/renderer_opengl/texture_filters/anime4k/anime4k_ultrafast.h b/src/video_core/renderer_opengl/texture_filters/anime4k/anime4k_ultrafast.h index 9e89da816..8175ed390 100644 --- a/src/video_core/renderer_opengl/texture_filters/anime4k/anime4k_ultrafast.h +++ b/src/video_core/renderer_opengl/texture_filters/anime4k/anime4k_ultrafast.h @@ -30,8 +30,6 @@ private: OGLTexture tex; OGLFramebuffer fbo; }; - TempTex LUMAD; - TempTex XY; std::array samplers; diff --git a/src/video_core/renderer_opengl/texture_filters/anime4k/refine.frag b/src/video_core/renderer_opengl/texture_filters/anime4k/refine.frag index 4417b96f6..569f30078 100644 --- a/src/video_core/renderer_opengl/texture_filters/anime4k/refine.frag +++ b/src/video_core/renderer_opengl/texture_filters/anime4k/refine.frag @@ -1,14 +1,12 @@ //? #version 330 +precision mediump float; + in vec2 tex_coord; -in vec2 input_max; out vec4 frag_color; uniform sampler2D HOOKED; -uniform sampler2DRect LUMAD; -uniform sampler2DRect LUMAG; - -uniform float final_scale; +uniform sampler2D LUMAD; const float LINE_DETECT_THRESHOLD = 0.4; const float STRENGTH = 0.6; @@ -21,12 +19,12 @@ struct RGBAL { }; vec4 getAverage(vec4 cc, vec4 a, vec4 b, vec4 c) { - return cc * (1 - STRENGTH) + ((a + b + c) / 3) * STRENGTH; + return cc * (1.0 - STRENGTH) + ((a + b + c) / 3.0) * STRENGTH; } -#define GetRGBAL(offset) \ - RGBAL(textureOffset(HOOKED, tex_coord, offset), \ - texture(LUMAD, clamp((gl_FragCoord.xy + offset) * final_scale, vec2(0.0), input_max)).x) +#define GetRGBAL(x_offset, y_offset) \ + RGBAL(textureLodOffset(HOOKED, tex_coord, 0.0, ivec2(x_offset, y_offset)), \ + textureLodOffset(LUMAD, tex_coord, 0.0, ivec2(x_offset, y_offset)).x) float min3v(float a, float b, float c) { return min(min(a, b), c); @@ -37,23 +35,23 @@ float max3v(float a, float b, float c) { } vec4 Compute() { - RGBAL cc = GetRGBAL(ivec2(0)); + RGBAL cc = GetRGBAL(0, 0); if (cc.l > LINE_DETECT_THRESHOLD) { return cc.c; } - RGBAL tl = GetRGBAL(ivec2(-1, -1)); - RGBAL t = GetRGBAL(ivec2(0, -1)); - RGBAL tr = GetRGBAL(ivec2(1, -1)); + RGBAL tl = GetRGBAL(-1, -1); + RGBAL t = GetRGBAL(0, -1); + RGBAL tr = GetRGBAL(1, -1); - RGBAL l = GetRGBAL(ivec2(-1, 0)); + RGBAL l = GetRGBAL(-1, 0); - RGBAL r = GetRGBAL(ivec2(1, 0)); + RGBAL r = GetRGBAL(1, 0); - RGBAL bl = GetRGBAL(ivec2(-1, 1)); - RGBAL b = GetRGBAL(ivec2(0, 1)); - RGBAL br = GetRGBAL(ivec2(1, 1)); + RGBAL bl = GetRGBAL(-1, 1); + RGBAL b = GetRGBAL(0, 1); + RGBAL br = GetRGBAL(1, 1); // Kernel 0 and 4 float maxDark = max3v(br.l, b.l, bl.l); diff --git a/src/video_core/renderer_opengl/texture_filters/anime4k/refine.vert b/src/video_core/renderer_opengl/texture_filters/anime4k/refine.vert deleted file mode 100644 index 552a218fb..000000000 --- a/src/video_core/renderer_opengl/texture_filters/anime4k/refine.vert +++ /dev/null @@ -1,14 +0,0 @@ -//? #version 330 -out vec2 tex_coord; -out vec2 input_max; - -uniform sampler2D HOOKED; - -const vec2 vertices[4] = - vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0)); - -void main() { - gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0); - tex_coord = (vertices[gl_VertexID] + 1.0) / 2.0; - input_max = textureSize(HOOKED, 0) * 2.0 - 1.0; -} diff --git a/src/video_core/renderer_opengl/texture_filters/anime4k/x_gradient.frag b/src/video_core/renderer_opengl/texture_filters/anime4k/x_gradient.frag index 49502fac7..8103cb77c 100644 --- a/src/video_core/renderer_opengl/texture_filters/anime4k/x_gradient.frag +++ b/src/video_core/renderer_opengl/texture_filters/anime4k/x_gradient.frag @@ -1,4 +1,6 @@ //? #version 330 +precision mediump float; + in vec2 tex_coord; out vec2 frag_color; @@ -7,7 +9,7 @@ uniform sampler2D tex_input; const vec3 K = vec3(0.2627, 0.6780, 0.0593); // TODO: improve handling of alpha channel -#define GetLum(xoffset) dot(K, textureOffset(tex_input, tex_coord, ivec2(xoffset, 0)).rgb) +#define GetLum(xoffset) dot(K, textureLodOffset(tex_input, tex_coord, 0.0, ivec2(xoffset, 0)).rgb) void main() { float l = GetLum(-1); diff --git a/src/video_core/renderer_opengl/texture_filters/anime4k/y_gradient.frag b/src/video_core/renderer_opengl/texture_filters/anime4k/y_gradient.frag index a0e820001..81e0d0f6e 100644 --- a/src/video_core/renderer_opengl/texture_filters/anime4k/y_gradient.frag +++ b/src/video_core/renderer_opengl/texture_filters/anime4k/y_gradient.frag @@ -1,16 +1,18 @@ //? #version 330 -in vec2 input_max; +precision mediump float; + +in vec2 tex_coord; out float frag_color; -uniform sampler2DRect tex_input; +uniform sampler2D tex_input; void main() { - vec2 t = texture(tex_input, min(gl_FragCoord.xy + vec2(0.0, 1.0), input_max)).xy; - vec2 c = texture(tex_input, gl_FragCoord.xy).xy; - vec2 b = texture(tex_input, max(gl_FragCoord.xy - vec2(0.0, 1.0), vec2(0.0))).xy; + vec2 t = textureLodOffset(tex_input, tex_coord, 0.0, ivec2(0, 1)).xy; + vec2 c = textureLod(tex_input, tex_coord, 0.0).xy; + vec2 b = textureLodOffset(tex_input, tex_coord, 0.0, ivec2(0, -1)).xy; - vec2 grad = vec2(t.x + 2 * c.x + b.x, b.y - t.y); + vec2 grad = vec2(t.x + 2.0 * c.x + b.x, b.y - t.y); - frag_color = 1 - length(grad); + frag_color = 1.0 - length(grad); } diff --git a/src/video_core/renderer_opengl/texture_filters/bicubic/bicubic.frag b/src/video_core/renderer_opengl/texture_filters/bicubic/bicubic.frag index 2bdab3cf6..f384c7864 100644 --- a/src/video_core/renderer_opengl/texture_filters/bicubic/bicubic.frag +++ b/src/video_core/renderer_opengl/texture_filters/bicubic/bicubic.frag @@ -1,4 +1,6 @@ //? #version 330 +precision mediump float; + in vec2 tex_coord; out vec4 frag_color; @@ -18,7 +20,7 @@ vec4 cubic(float v) { vec4 textureBicubic(sampler2D sampler, vec2 texCoords) { - vec2 texSize = textureSize(sampler, 0); + vec2 texSize = vec2(textureSize(sampler, 0)); vec2 invTexSize = 1.0 / texSize; texCoords = texCoords * texSize - 0.5; diff --git a/src/video_core/renderer_opengl/texture_filters/xbrz/xbrz_freescale.frag b/src/video_core/renderer_opengl/texture_filters/xbrz/xbrz_freescale.frag index 4868d18f7..84f1b3503 100644 --- a/src/video_core/renderer_opengl/texture_filters/xbrz/xbrz_freescale.frag +++ b/src/video_core/renderer_opengl/texture_filters/xbrz/xbrz_freescale.frag @@ -1,4 +1,6 @@ //? #version 330 +precision mediump float; + in vec2 tex_coord; in vec2 source_size; in vec2 output_size; @@ -6,7 +8,7 @@ in vec2 output_size; out vec4 frag_color; uniform sampler2D tex; -uniform float scale; +uniform lowp float scale; const int BLEND_NONE = 0; const int BLEND_NORMAL = 1; @@ -42,12 +44,12 @@ float GetLeftRatio(vec2 center, vec2 origin, vec2 direction) { return smoothstep(-sqrt(2.0) / 2.0, sqrt(2.0) / 2.0, v); } -vec2 pos = fract(tex_coord * source_size) - vec2(0.5, 0.5); -vec2 coord = tex_coord - pos / source_size; - #define P(x, y) textureOffset(tex, coord, ivec2(x, y)) void main() { + vec2 pos = fract(tex_coord * source_size) - vec2(0.5, 0.5); + vec2 coord = tex_coord - pos / source_size; + //--------------------------------------- // Input Pixel Mapping: -|x|x|x|- // x|A|B|C|x @@ -142,15 +144,15 @@ void main() { (IsPixEqual(G, H) && IsPixEqual(H, I) && IsPixEqual(I, F) && IsPixEqual(F, C) && !IsPixEqual(E, I)))); vec2 origin = vec2(0.0, 1.0 / sqrt(2.0)); - ivec2 direction = ivec2(1, -1); + vec2 direction = vec2(1.0, -1.0); if (doLineBlend) { bool haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_F_G <= dist_H_C) && E != G && D != G; bool haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_H_C <= dist_F_G) && E != C && B != C; origin = haveShallowLine ? vec2(0.0, 0.25) : vec2(0.0, 0.5); - direction.x += haveShallowLine ? 1 : 0; - direction.y -= haveSteepLine ? 1 : 0; + direction.x += haveShallowLine ? 1.0 : 0.0; + direction.y -= haveSteepLine ? 1.0 : 0.0; } vec4 blendPix = mix(H, F, step(ColorDist(E, F), ColorDist(E, H))); res = mix(res, blendPix, GetLeftRatio(pos, origin, direction)); @@ -169,15 +171,15 @@ void main() { (IsPixEqual(A, D) && IsPixEqual(D, G) && IsPixEqual(G, H) && IsPixEqual(H, I) && !IsPixEqual(E, G)))); vec2 origin = vec2(-1.0 / sqrt(2.0), 0.0); - ivec2 direction = ivec2(1, 1); + vec2 direction = vec2(1.0, 1.0); if (doLineBlend) { bool haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_H_A <= dist_D_I) && E != A && B != A; bool haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_D_I <= dist_H_A) && E != I && F != I; origin = haveShallowLine ? vec2(-0.25, 0.0) : vec2(-0.5, 0.0); - direction.y += haveShallowLine ? 1 : 0; - direction.x += haveSteepLine ? 1 : 0; + direction.y += haveShallowLine ? 1.0 : 0.0; + direction.x += haveSteepLine ? 1.0 : 0.0; } origin = origin; direction = direction; @@ -198,15 +200,15 @@ void main() { (IsPixEqual(I, F) && IsPixEqual(F, C) && IsPixEqual(C, B) && IsPixEqual(B, A) && !IsPixEqual(E, C)))); vec2 origin = vec2(1.0 / sqrt(2.0), 0.0); - ivec2 direction = ivec2(-1, -1); + vec2 direction = vec2(-1.0, -1.0); if (doLineBlend) { bool haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_B_I <= dist_F_A) && E != I && H != I; bool haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_F_A <= dist_B_I) && E != A && D != A; origin = haveShallowLine ? vec2(0.25, 0.0) : vec2(0.5, 0.0); - direction.y -= haveShallowLine ? 1 : 0; - direction.x -= haveSteepLine ? 1 : 0; + direction.y -= haveShallowLine ? 1.0 : 0.0; + direction.x -= haveSteepLine ? 1.0 : 0.0; } vec4 blendPix = mix(F, B, step(ColorDist(E, B), ColorDist(E, F))); res = mix(res, blendPix, GetLeftRatio(pos, origin, direction)); @@ -225,15 +227,15 @@ void main() { (IsPixEqual(C, B) && IsPixEqual(B, A) && IsPixEqual(A, D) && IsPixEqual(D, G) && !IsPixEqual(E, A)))); vec2 origin = vec2(0.0, -1.0 / sqrt(2.0)); - ivec2 direction = ivec2(-1, 1); + vec2 direction = vec2(-1.0, 1.0); if (doLineBlend) { bool haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_D_C <= dist_B_G) && E != C && F != C; bool haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_B_G <= dist_D_C) && E != G && H != G; origin = haveShallowLine ? vec2(0.0, -0.25) : vec2(0.0, -0.5); - direction.x -= haveShallowLine ? 1 : 0; - direction.y += haveSteepLine ? 1 : 0; + direction.x -= haveShallowLine ? 1.0 : 0.0; + direction.y += haveSteepLine ? 1.0 : 0.0; } vec4 blendPix = mix(D, B, step(ColorDist(E, B), ColorDist(E, D))); res = mix(res, blendPix, GetLeftRatio(pos, origin, direction)); diff --git a/src/video_core/renderer_opengl/texture_filters/xbrz/xbrz_freescale.vert b/src/video_core/renderer_opengl/texture_filters/xbrz/xbrz_freescale.vert index adf45d564..63905075f 100644 --- a/src/video_core/renderer_opengl/texture_filters/xbrz/xbrz_freescale.vert +++ b/src/video_core/renderer_opengl/texture_filters/xbrz/xbrz_freescale.vert @@ -4,7 +4,7 @@ out vec2 source_size; out vec2 output_size; uniform sampler2D tex; -uniform float scale; +uniform lowp float scale; const vec2 vertices[4] = vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0)); @@ -12,6 +12,6 @@ const vec2 vertices[4] = void main() { gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0); tex_coord = (vertices[gl_VertexID] + 1.0) / 2.0; - source_size = textureSize(tex, 0); + source_size = vec2(textureSize(tex, 0)); output_size = source_size * scale; }