early-access version 1896

2021-07-20 11:00:32 +02:00 · 2021-07-20 11:00:32 +02:00 · 83d2501797
commit 83d2501797
parent 4c75967a9d
10 changed files with 65 additions and 41 deletions
--- a/CMakeModules/GenerateSCMRev.cmake
+++ b/CMakeModules/GenerateSCMRev.cmake
@ -48,15 +48,6 @@ if (BUILD_REPOSITORY)
  endif()
 endif()

-# The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR)
-set(VIDEO_CORE "${SRC_DIR}/src/video_core")
-set(HASH_FILES
-    # ...
-)
-set(COMBINED "")
-foreach (F IN LISTS HASH_FILES)
-    file(READ ${F} TMP)
-    set(COMBINED "${COMBINED}${TMP}")
-endforeach()
-string(MD5 SHADER_CACHE_VERSION "${COMBINED}")
+# The variable SRC_DIR must be passed into the script
+# (since it uses the current build directory for all values of CMAKE_*_DIR)
 configure_file("${SRC_DIR}/src/common/scm_rev.cpp.in" "scm_rev.cpp" @ONLY)
--- a/README.md
+++ b/README.md
@ -1,7 +1,7 @@
 yuzu emulator early access
 =============

-This is the source code for early-access 1893.
+This is the source code for early-access 1896.

 ## Legal Notice

--- a/src/core/hle/service/am/applets/applet_controller.h
+++ b/src/core/hle/service/am/applets/applet_controller.h
@ -80,7 +80,7 @@ struct ControllerSupportArgOld {
 static_assert(sizeof(ControllerSupportArgOld) == 0x21C,
              "ControllerSupportArgOld has incorrect size.");

-// LibraryAppletVersion 0x7
+// LibraryAppletVersion 0x7, 0x8
 struct ControllerSupportArgNew {
    ControllerSupportArgHeader header{};
    std::array<IdentificationColor, 8> identification_colors{};
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@ -180,11 +180,9 @@ Device::Device() {
        LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported");
        shader_backend = Settings::ShaderBackend::GLSL;
    }
-    // Completely disable async shaders for now, as it causes graphical glitches
-    use_asynchronous_shaders = false;
    // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation.
-    // use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() &&
-    //                            !(is_amd || (is_intel && !is_linux));
+    use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() &&
+                               !(is_amd || (is_intel && !is_linux));
    use_driver_cache = is_nvidia;

    LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
@ -237,10 +237,12 @@ GraphicsPipeline::GraphicsPipeline(
    if (key.xfb_enabled && device.UseAssemblyShaders()) {
        GenerateTransformFeedbackState();
    }
-    auto func{[this, device, sources, sources_spirv,
-               shader_notify](ShaderContext::Context*) mutable {
+    const bool in_parallel = thread_worker != nullptr;
+    const auto backend = device.GetShaderBackend();
+    auto func{[this, sources = std::move(sources), sources_spirv = std::move(sources_spirv),
+               shader_notify, backend, in_parallel](ShaderContext::Context*) mutable {
        for (size_t stage = 0; stage < 5; ++stage) {
-            switch (device.GetShaderBackend()) {
+            switch (backend) {
            case Settings::ShaderBackend::GLSL:
                if (!sources[stage].empty()) {
                    source_programs[stage] = CreateProgram(sources[stage], Stage(stage));
@ -249,6 +251,10 @@ GraphicsPipeline::GraphicsPipeline(
            case Settings::ShaderBackend::GLASM:
                if (!sources[stage].empty()) {
                    assembly_programs[stage] = CompileProgram(sources[stage], AssemblyStage(stage));
+                    if (in_parallel) {
+                        // Make sure program is built before continuing when building in parallel
+                        glGetString(GL_PROGRAM_ERROR_STRING_NV);
+                    }
                }
                break;
            case Settings::ShaderBackend::SPIRV:
@ -258,10 +264,20 @@ GraphicsPipeline::GraphicsPipeline(
                break;
            }
        }
+        if (in_parallel && backend != Settings::ShaderBackend::GLASM) {
+            // Make sure programs have built if we are building shaders in parallel
+            for (OGLProgram& program : source_programs) {
+                if (program.handle != 0) {
+                    GLint status{};
+                    glGetProgramiv(program.handle, GL_LINK_STATUS, &status);
+                }
+            }
+        }
        if (shader_notify) {
            shader_notify->MarkShaderComplete();
        }
        is_built = true;
+        built_condvar.notify_one();
    }};
    if (thread_worker) {
        thread_worker->QueueWork(std::move(func));
@ -434,6 +450,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
    buffer_cache.UpdateGraphicsBuffers(is_indexed);
    buffer_cache.BindHostGeometryBuffers(is_indexed);

+    if (!is_built.load(std::memory_order::relaxed)) {
+        WaitForBuild();
+    }
    if (assembly_programs[0].handle != 0) {
        program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask);
    } else {
@ -545,4 +564,9 @@ void GraphicsPipeline::GenerateTransformFeedbackState() {
    num_xfb_strides = static_cast<GLsizei>(current_stream - xfb_streams.data());
 }

+void GraphicsPipeline::WaitForBuild() {
+    std::unique_lock lock{built_mutex};
+    built_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
+}
+
 } // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
@ -119,6 +119,8 @@ private:

    void GenerateTransformFeedbackState();

+    void WaitForBuild();
+
    TextureCache& texture_cache;
    BufferCache& buffer_cache;
    Tegra::MemoryManager& gpu_memory;
@ -143,13 +145,16 @@ private:

    bool use_storage_buffers{};
    bool writes_global_memory{};
-    std::atomic_bool is_built{false};

    static constexpr std::size_t XFB_ENTRY_STRIDE = 3;
    GLsizei num_xfb_attribs{};
    GLsizei num_xfb_strides{};
    std::array<GLint, 128 * XFB_ENTRY_STRIDE * Maxwell::NumTransformFeedbackBuffers> xfb_attribs{};
    std::array<GLint, Maxwell::NumTransformFeedbackBuffers> xfb_streams{};
+
+    std::mutex built_mutex;
+    std::condition_variable built_condvar;
+    std::atomic_bool is_built{false};
 };

 } // namespace OpenGL
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@ -212,8 +212,8 @@ void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat
    }
    if (vk_buffer == VK_NULL_HANDLE) {
        // Vulkan doesn't support null index buffers. Replace it with our own null buffer.
-        ReserveNullIndexBuffer();
-        vk_buffer = *null_index_buffer;
+        ReserveNullBuffer();
+        vk_buffer = *null_buffer;
    }
    scheduler.Record([vk_buffer, vk_offset, vk_index_type](vk::CommandBuffer cmdbuf) {
        cmdbuf.BindIndexBuffer(vk_buffer, vk_offset, vk_index_type);
@ -221,16 +221,14 @@ void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat
 }

 void BufferCacheRuntime::BindQuadArrayIndexBuffer(u32 first, u32 count) {
-    const u32 total_indices = first + count;
-    if (total_indices == 0) {
-        ReserveNullIndexBuffer();
-        scheduler.Record([buffer = *null_index_buffer,
-                          index_type = quad_array_lut_index_type](vk::CommandBuffer cmdbuf) {
-            cmdbuf.BindIndexBuffer(buffer, 0, index_type);
+    if (count == 0) {
+        ReserveNullBuffer();
+        scheduler.Record([this](vk::CommandBuffer cmdbuf) {
+            cmdbuf.BindIndexBuffer(*null_buffer, 0, VK_INDEX_TYPE_UINT32);
        });
        return;
    }
-    ReserveQuadArrayLUT(total_indices, true);
+    ReserveQuadArrayLUT(first + count, true);

    // The LUT has the indices 0, 1, 2, and 3 copied as an array
    // To apply these 'first' offsets we can apply an offset based on the modulus.
@ -264,6 +262,14 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer,
        // Already logged in the rasterizer
        return;
    }
+    if (buffer == VK_NULL_HANDLE) {
+        // Vulkan doesn't support null transform feedback buffers.
+        // Replace it with our own null buffer.
+        ReserveNullBuffer();
+        buffer = *null_buffer;
+        offset = 0;
+        size = 0;
+    }
    scheduler.Record([index, buffer, offset, size](vk::CommandBuffer cmdbuf) {
        const VkDeviceSize vk_offset = offset;
        const VkDeviceSize vk_size = size;
@ -348,11 +354,11 @@ void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle
    });
 }

-void BufferCacheRuntime::ReserveNullIndexBuffer() {
-    if (null_index_buffer) {
+void BufferCacheRuntime::ReserveNullBuffer() {
+    if (null_buffer) {
        return;
    }
-    null_index_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
+    null_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
        .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
        .pNext = nullptr,
        .flags = 0,
@ -363,12 +369,12 @@ void BufferCacheRuntime::ReserveNullIndexBuffer() {
        .pQueueFamilyIndices = nullptr,
    });
    if (device.HasDebuggingToolAttached()) {
-        null_index_buffer.SetObjectNameEXT("Null index buffer");
+        null_buffer.SetObjectNameEXT("Null index buffer");
    }
-    null_index_buffer_commit = memory_allocator.Commit(null_index_buffer, MemoryUsage::DeviceLocal);
+    null_buffer_commit = memory_allocator.Commit(null_buffer, MemoryUsage::DeviceLocal);

    scheduler.RequestOutsideRenderPassOperationContext();
-    scheduler.Record([buffer = *null_index_buffer](vk::CommandBuffer cmdbuf) {
+    scheduler.Record([buffer = *null_buffer](vk::CommandBuffer cmdbuf) {
        cmdbuf.FillBuffer(buffer, 0, VK_WHOLE_SIZE, 0);
    });
 }
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@ -111,7 +111,7 @@ private:

    void ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle);

-    void ReserveNullIndexBuffer();
+    void ReserveNullBuffer();

    const Device& device;
    MemoryAllocator& memory_allocator;
@ -124,8 +124,8 @@ private:
    VkIndexType quad_array_lut_index_type{};
    u32 current_num_indices = 0;

-    vk::Buffer null_index_buffer;
-    MemoryCommit null_index_buffer_commit;
+    vk::Buffer null_buffer;
+    MemoryCommit null_buffer_commit;

    Uint8Pass uint8_pass;
    QuadIndexedPass quad_index_pass;
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@ -271,7 +271,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw
      device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_},
      update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_},
      buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, shader_notify{shader_notify_},
-      use_asynchronous_shaders{false},
+      use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()},
      workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"),
      serialization_thread(1, "yuzu:PipelineSerialization") {
    const auto& float_control{device.FloatControlProperties()};
--- a/src/yuzu/configuration/configure_graphics_advanced.ui
+++ b/src/yuzu/configuration/configure_graphics_advanced.ui
@ -82,7 +82,7 @@
           <string>Enables asynchronous shader compilation, which may reduce shader stutter. This feature is experimental.</string>
          </property>
          <property name="text">
-           <string>Use asynchronous shader building (experimental)</string>
+           <string>Use asynchronous shader building</string>
          </property>
         </widget>
        </item>