From 382852418be0fd14fb0290d7b0ff86644685b59b Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 23 Aug 2018 15:38:57 -0400 Subject: [PATCH 1/3] video_core: Add RasterizerCache class for common cache management code. --- src/video_core/CMakeLists.txt | 1 + src/video_core/rasterizer_cache.h | 116 ++++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+) create mode 100644 src/video_core/rasterizer_cache.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index c6431e722c..c9822f0d8d 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -18,6 +18,7 @@ add_library(video_core STATIC macro_interpreter.h memory_manager.cpp memory_manager.h + rasterizer_cache.h rasterizer_interface.h renderer_base.cpp renderer_base.h diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h new file mode 100644 index 0000000000..7a0492a4e0 --- /dev/null +++ b/src/video_core/rasterizer_cache.h @@ -0,0 +1,116 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "common/common_types.h" +#include "core/memory.h" +#include "video_core/memory_manager.h" + +template +class RasterizerCache : NonCopyable { +public: + /// Mark the specified region as being invalidated + void InvalidateRegion(Tegra::GPUVAddr region_addr, size_t region_size) { + for (auto iter = cached_objects.cbegin(); iter != cached_objects.cend();) { + const auto& object{iter->second}; + + ++iter; + + if (object->GetAddr() <= (region_addr + region_size) && + region_addr <= (object->GetAddr() + object->GetSizeInBytes())) { + // Regions overlap, so invalidate + Unregister(object); + } + } + } + +protected: + /// Tries to get an object from the cache with the specified address + T TryGet(Tegra::GPUVAddr addr) const { + const auto& search{cached_objects.find(addr)}; + if (search != cached_objects.end()) { + return search->second; + } + + return nullptr; + } + + /// Gets a reference to the cache + const std::unordered_map& GetCache() const { + return cached_objects; + } + + /// Register an object into the cache + void Register(const T& object) { + const auto& search{cached_objects.find(object->GetAddr())}; + if (search != cached_objects.end()) { + // Registered already + return; + } + + cached_objects[object->GetAddr()] = object; + UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), 1); + } + + /// Unregisters an object from the cache + void Unregister(const T& object) { + const auto& search{cached_objects.find(object->GetAddr())}; + if (search == cached_objects.end()) { + // Unregistered already + return; + } + + UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), -1); + cached_objects.erase(search); + } + +private: + using PageMap = boost::icl::interval_map; + + template + constexpr auto RangeFromInterval(Map& map, const Interval& interval) { + return boost::make_iterator_range(map.equal_range(interval)); + } + + /// Increase/decrease the number of object in pages touching the specified region + void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) { + const u64 page_start{addr >> Tegra::MemoryManager::PAGE_BITS}; + const u64 page_end{(addr + size) >> Tegra::MemoryManager::PAGE_BITS}; + + // Interval maps will erase segments if count reaches 0, so if delta is negative we have to + // subtract after iterating + const auto pages_interval = PageMap::interval_type::right_open(page_start, page_end); + if (delta > 0) + cached_pages.add({pages_interval, delta}); + + for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) { + const auto interval = pair.first & pages_interval; + const int count = pair.second; + + const Tegra::GPUVAddr interval_start_addr = boost::icl::first(interval) + << Tegra::MemoryManager::PAGE_BITS; + const Tegra::GPUVAddr interval_end_addr = boost::icl::last_next(interval) + << Tegra::MemoryManager::PAGE_BITS; + const u64 interval_size = interval_end_addr - interval_start_addr; + + if (delta > 0 && count == delta) + Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true); + else if (delta < 0 && count == -delta) + Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false); + else + ASSERT(count >= 0); + } + + if (delta < 0) + cached_pages.add({pages_interval, delta}); + } + + std::unordered_map cached_objects; + PageMap cached_pages; +}; From a0e1566dc500e1d6fcfd047463c52507b29287fa Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 23 Aug 2018 15:44:41 -0400 Subject: [PATCH 2/3] gl_rasterizer_cache: Update to use RasterizerCache base class. --- .../renderer_opengl/gl_rasterizer.cpp | 5 +- .../renderer_opengl/gl_rasterizer_cache.cpp | 110 ++---------------- .../renderer_opengl/gl_rasterizer_cache.h | 37 ++---- 3 files changed, 20 insertions(+), 132 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 96851ccb5d..41a58598bf 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -571,12 +571,10 @@ void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {} void RasterizerOpenGL::FlushAll() { MICROPROFILE_SCOPE(OpenGL_CacheManagement); - res_cache.FlushRegion(0, Kernel::VMManager::MAX_ADDRESS); } void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); - res_cache.FlushRegion(addr, size); } void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) { @@ -586,8 +584,7 @@ void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) { void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); - res_cache.FlushRegion(addr, size); - res_cache.InvalidateRegion(addr, size); + InvalidateRegion(addr, size); } bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 83d8d3d94c..65305000c6 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -677,12 +677,6 @@ RasterizerCacheOpenGL::RasterizerCacheOpenGL() { draw_framebuffer.Create(); } -RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { - while (!surface_cache.empty()) { - UnregisterSurface(surface_cache.begin()->second); - } -} - Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { return GetSurface(SurfaceParams::CreateForTexture(config)); } @@ -766,27 +760,25 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres return {}; // Look up surface in the cache based on address - const auto& search{surface_cache.find(params.addr)}; - Surface surface; - if (search != surface_cache.end()) { - surface = search->second; + Surface surface{TryGet(params.addr)}; + if (surface) { if (Settings::values.use_accurate_framebuffers) { // If use_accurate_framebuffers is enabled, always load from memory FlushSurface(surface); - UnregisterSurface(surface); + Unregister(surface); } else if (surface->GetSurfaceParams().IsCompatibleSurface(params)) { // Use the cached surface as-is return surface; } else if (preserve_contents) { // If surface parameters changed and we care about keeping the previous data, recreate // the surface from the old one - UnregisterSurface(surface); + Unregister(surface); Surface new_surface{RecreateSurface(surface, params)}; - RegisterSurface(new_surface); + Register(new_surface); return new_surface; } else { // Delete the old surface before creating a new one to prevent collisions. - UnregisterSurface(surface); + Unregister(surface); } } @@ -797,7 +789,7 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres if (!surface) { surface = std::make_shared(params); ReserveSurface(surface); - RegisterSurface(surface); + Register(surface); } // Only load surface from memory if we care about the contents @@ -894,7 +886,7 @@ Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr cpu_addr) const { // framebuffer overlaps surfaces. std::vector surfaces; - for (const auto& surface : surface_cache) { + for (const auto& surface : GetCache()) { const auto& params = surface.second->GetSurfaceParams(); const VAddr surface_cpu_addr = params.GetCpuAddr(); if (cpu_addr >= surface_cpu_addr && cpu_addr < (surface_cpu_addr + params.size_in_bytes)) { @@ -912,51 +904,6 @@ Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr cpu_addr) const { return surfaces[0]; } -void RasterizerCacheOpenGL::FlushRegion(Tegra::GPUVAddr /*addr*/, size_t /*size*/) { - // TODO(bunnei): This is unused in the current implementation of the rasterizer cache. We should - // probably implement this in the future, but for now, the `use_accurate_framebufers` setting - // can be used to always flush. -} - -void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, size_t size) { - for (auto iter = surface_cache.cbegin(); iter != surface_cache.cend();) { - const auto& surface{iter->second}; - const auto& params{surface->GetSurfaceParams()}; - - ++iter; - - if (params.IsOverlappingRegion(addr, size)) { - UnregisterSurface(surface); - } - } -} - -void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) { - const auto& params{surface->GetSurfaceParams()}; - const auto& search{surface_cache.find(params.addr)}; - - if (search != surface_cache.end()) { - // Registered already - return; - } - - surface_cache[params.addr] = surface; - UpdatePagesCachedCount(params.addr, params.size_in_bytes, 1); -} - -void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) { - const auto& params{surface->GetSurfaceParams()}; - const auto& search{surface_cache.find(params.addr)}; - - if (search == surface_cache.end()) { - // Unregistered already - return; - } - - UpdatePagesCachedCount(params.addr, params.size_in_bytes, -1); - surface_cache.erase(search); -} - void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) { const auto& surface_reserve_key{SurfaceReserveKey::Create(surface->GetSurfaceParams())}; surface_reserve[surface_reserve_key] = surface; @@ -966,49 +913,10 @@ Surface RasterizerCacheOpenGL::TryGetReservedSurface(const SurfaceParams& params const auto& surface_reserve_key{SurfaceReserveKey::Create(params)}; auto search{surface_reserve.find(surface_reserve_key)}; if (search != surface_reserve.end()) { - RegisterSurface(search->second); + Register(search->second); return search->second; } return {}; } -template -constexpr auto RangeFromInterval(Map& map, const Interval& interval) { - return boost::make_iterator_range(map.equal_range(interval)); -} - -void RasterizerCacheOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) { - const u64 num_pages = ((addr + size - 1) >> Tegra::MemoryManager::PAGE_BITS) - - (addr >> Tegra::MemoryManager::PAGE_BITS) + 1; - const u64 page_start = addr >> Tegra::MemoryManager::PAGE_BITS; - const u64 page_end = page_start + num_pages; - - // Interval maps will erase segments if count reaches 0, so if delta is negative we have to - // subtract after iterating - const auto pages_interval = PageMap::interval_type::right_open(page_start, page_end); - if (delta > 0) - cached_pages.add({pages_interval, delta}); - - for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) { - const auto interval = pair.first & pages_interval; - const int count = pair.second; - - const Tegra::GPUVAddr interval_start_addr = boost::icl::first(interval) - << Tegra::MemoryManager::PAGE_BITS; - const Tegra::GPUVAddr interval_end_addr = boost::icl::last_next(interval) - << Tegra::MemoryManager::PAGE_BITS; - const u64 interval_size = interval_end_addr - interval_start_addr; - - if (delta > 0 && count == delta) - Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true); - else if (delta < 0 && count == -delta) - Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false); - else - ASSERT(count >= 0); - } - - if (delta < 0) - cached_pages.add({pages_interval, delta}); -} - } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index c8c615df22..8a6ca2a4b4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -8,12 +8,12 @@ #include #include #include -#include #include "common/common_types.h" #include "common/hash.h" #include "common/math_util.h" #include "video_core/engines/maxwell_3d.h" +#include "video_core/rasterizer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/textures/texture.h" @@ -22,7 +22,6 @@ namespace OpenGL { class CachedSurface; using Surface = std::shared_ptr; using SurfaceSurfaceRect_Tuple = std::tuple>; -using PageMap = boost::icl::interval_map; struct SurfaceParams { enum class PixelFormat { @@ -632,11 +631,6 @@ struct SurfaceParams { /// Returns the CPU virtual address for this surface VAddr GetCpuAddr() const; - /// Returns true if the specified region overlaps with this surface's region in Switch memory - bool IsOverlappingRegion(Tegra::GPUVAddr region_addr, size_t region_size) const { - return addr <= (region_addr + region_size) && region_addr <= (addr + size_in_bytes); - } - /// Creates SurfaceParams from a texture configuration static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config); @@ -708,6 +702,14 @@ class CachedSurface final { public: CachedSurface(const SurfaceParams& params); + Tegra::GPUVAddr GetAddr() const { + return params.addr; + } + + size_t GetSizeInBytes() const { + return params.size_in_bytes; + } + const OGLTexture& Texture() const { return texture; } @@ -737,10 +739,9 @@ private: SurfaceParams params; }; -class RasterizerCacheOpenGL final : NonCopyable { +class RasterizerCacheOpenGL final : public RasterizerCache { public: RasterizerCacheOpenGL(); - ~RasterizerCacheOpenGL(); /// Get a surface based on the texture configuration Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config); @@ -755,12 +756,6 @@ public: /// Tries to find a framebuffer GPU address based on the provided CPU address Surface TryFindFramebufferSurface(VAddr cpu_addr) const; - /// Write any cached resources overlapping the region back to memory (if dirty) - void FlushRegion(Tegra::GPUVAddr addr, size_t size); - - /// Mark the specified region as being invalidated - void InvalidateRegion(Tegra::GPUVAddr addr, size_t size); - private: void LoadSurface(const Surface& surface); Surface GetSurface(const SurfaceParams& params, bool preserve_contents = true); @@ -768,24 +763,12 @@ private: /// Recreates a surface with new parameters Surface RecreateSurface(const Surface& surface, const SurfaceParams& new_params); - /// Register surface into the cache - void RegisterSurface(const Surface& surface); - - /// Remove surface from the cache - void UnregisterSurface(const Surface& surface); - /// Reserves a unique surface that can be reused later void ReserveSurface(const Surface& surface); /// Tries to get a reserved surface for the specified parameters Surface TryGetReservedSurface(const SurfaceParams& params); - /// Increase/decrease the number of surface in pages touching the specified region - void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta); - - std::unordered_map surface_cache; - PageMap cached_pages; - /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have /// previously been used. This is to prevent surfaces from being constantly created and /// destroyed when used with different surface parameters. From b55d8111e66ab909ec932850284a7afd13827303 Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 23 Aug 2018 17:30:27 -0400 Subject: [PATCH 3/3] renderer_opengl: Implement a new shader cache. --- src/video_core/CMakeLists.txt | 2 + .../renderer_opengl/gl_rasterizer.cpp | 84 ++++------- .../renderer_opengl/gl_rasterizer.h | 15 +- .../renderer_opengl/gl_shader_cache.cpp | 131 ++++++++++++++++++ .../renderer_opengl/gl_shader_cache.h | 69 +++++++++ .../renderer_opengl/gl_shader_gen.cpp | 4 +- .../renderer_opengl/gl_shader_gen.h | 75 +--------- .../renderer_opengl/gl_shader_manager.cpp | 29 ---- .../renderer_opengl/gl_shader_manager.h | 126 ++--------------- 9 files changed, 250 insertions(+), 285 deletions(-) create mode 100644 src/video_core/renderer_opengl/gl_shader_cache.cpp create mode 100644 src/video_core/renderer_opengl/gl_shader_cache.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index c9822f0d8d..aa5bc3bbee 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -27,6 +27,8 @@ add_library(video_core STATIC renderer_opengl/gl_rasterizer_cache.cpp renderer_opengl/gl_rasterizer_cache.h renderer_opengl/gl_resource_manager.h + renderer_opengl/gl_shader_cache.cpp + renderer_opengl/gl_shader_cache.h renderer_opengl/gl_shader_decompiler.cpp renderer_opengl/gl_shader_decompiler.h renderer_opengl/gl_shader_gen.cpp diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 41a58598bf..9951d8178b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -178,19 +178,6 @@ std::pair RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, return {array_ptr, buffer_offset}; } -static GLShader::ProgramCode GetShaderProgramCode(Maxwell::ShaderProgram program) { - auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); - - // Fetch program code from memory - GLShader::ProgramCode program_code(GLShader::MAX_PROGRAM_CODE_LENGTH); - auto& shader_config = gpu.regs.shader_config[static_cast(program)]; - const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset}; - const boost::optional cpu_address{gpu.memory_manager.GpuToCpuAddress(gpu_address)}; - Memory::ReadBlock(*cpu_address, program_code.data(), program_code.size() * sizeof(u64)); - - return program_code; -} - std::pair RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); @@ -224,31 +211,17 @@ std::pair RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_ptr += sizeof(ubo); buffer_offset += sizeof(ubo); - GLShader::ShaderSetup setup{GetShaderProgramCode(program)}; - GLShader::ShaderEntries shader_resources; + const Tegra::GPUVAddr addr{gpu.regs.code_address.CodeAddress() + shader_config.offset}; + Shader shader{shader_cache.GetStageProgram(program)}; switch (program) { - case Maxwell::ShaderProgram::VertexA: { - // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. - // Conventional HW does not support this, so we combine VertexA and VertexB into one - // stage here. - setup.SetProgramB(GetShaderProgramCode(Maxwell::ShaderProgram::VertexB)); - GLShader::MaxwellVSConfig vs_config{setup}; - shader_resources = - shader_program_manager->UseProgrammableVertexShader(vs_config, setup); - break; - } - + case Maxwell::ShaderProgram::VertexA: case Maxwell::ShaderProgram::VertexB: { - GLShader::MaxwellVSConfig vs_config{setup}; - shader_resources = - shader_program_manager->UseProgrammableVertexShader(vs_config, setup); + shader_program_manager->UseProgrammableVertexShader(shader->GetProgramHandle()); break; } case Maxwell::ShaderProgram::Fragment: { - GLShader::MaxwellFSConfig fs_config{setup}; - shader_resources = - shader_program_manager->UseProgrammableFragmentShader(fs_config, setup); + shader_program_manager->UseProgrammableFragmentShader(shader->GetProgramHandle()); break; } default: @@ -257,18 +230,14 @@ std::pair RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr UNREACHABLE(); } - GLuint gl_stage_program = shader_program_manager->GetCurrentProgramStage( - static_cast(stage)); - // Configure the const buffers for this shader stage. - std::tie(buffer_ptr, buffer_offset, current_constbuffer_bindpoint) = SetupConstBuffers( - buffer_ptr, buffer_offset, static_cast(stage), gl_stage_program, - current_constbuffer_bindpoint, shader_resources.const_buffer_entries); + std::tie(buffer_ptr, buffer_offset, current_constbuffer_bindpoint) = + SetupConstBuffers(buffer_ptr, buffer_offset, static_cast(stage), + shader, current_constbuffer_bindpoint); // Configure the textures for this shader stage. - current_texture_bindpoint = - SetupTextures(static_cast(stage), gl_stage_program, - current_texture_bindpoint, shader_resources.texture_samplers); + current_texture_bindpoint = SetupTextures(static_cast(stage), shader, + current_texture_bindpoint); // When VertexA is enabled, we have dual vertex shaders if (program == Maxwell::ShaderProgram::VertexA) { @@ -580,6 +549,7 @@ void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) { void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); res_cache.InvalidateRegion(addr, size); + shader_cache.InvalidateRegion(addr, size); } void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) { @@ -669,15 +639,17 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr } } -std::tuple RasterizerOpenGL::SetupConstBuffers( - u8* buffer_ptr, GLintptr buffer_offset, Maxwell::ShaderStage stage, GLuint program, - u32 current_bindpoint, const std::vector& entries) { +std::tuple RasterizerOpenGL::SetupConstBuffers(u8* buffer_ptr, + GLintptr buffer_offset, + Maxwell::ShaderStage stage, + Shader& shader, + u32 current_bindpoint) { const auto& gpu = Core::System::GetInstance().GPU(); const auto& maxwell3d = gpu.Maxwell3D(); + const auto& shader_stage = maxwell3d.state.shader_stages[static_cast(stage)]; + const auto& entries = shader->GetShaderEntries().const_buffer_entries; // Upload only the enabled buffers from the 16 constbuffers of each shader stage - const auto& shader_stage = maxwell3d.state.shader_stages[static_cast(stage)]; - for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { const auto& used_buffer = entries[bindpoint]; const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()]; @@ -716,12 +688,9 @@ std::tuple RasterizerOpenGL::SetupConstBuffers( stream_buffer.GetHandle(), const_buffer_offset, size); // Now configure the bindpoint of the buffer inside the shader - const std::string buffer_name = used_buffer.GetName(); - const GLuint index = - glGetProgramResourceIndex(program, GL_UNIFORM_BLOCK, buffer_name.c_str()); - if (index != GL_INVALID_INDEX) { - glUniformBlockBinding(program, index, current_bindpoint + bindpoint); - } + glUniformBlockBinding(shader->GetProgramHandle(), + shader->GetProgramResourceIndex(used_buffer.GetName()), + current_bindpoint + bindpoint); } state.Apply(); @@ -729,10 +698,10 @@ std::tuple RasterizerOpenGL::SetupConstBuffers( return {buffer_ptr, buffer_offset, current_bindpoint + static_cast(entries.size())}; } -u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program, u32 current_unit, - const std::vector& entries) { +u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader, u32 current_unit) { const auto& gpu = Core::System::GetInstance().GPU(); const auto& maxwell3d = gpu.Maxwell3D(); + const auto& entries = shader->GetShaderEntries().texture_samplers; ASSERT_MSG(current_unit + entries.size() <= std::size(state.texture_units), "Exceeded the number of active textures."); @@ -742,12 +711,9 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program, u32 current_bindpoint = current_unit + bindpoint; // Bind the uniform to the sampler. - GLint uniform = glGetUniformLocation(program, entry.GetName().c_str()); - if (uniform == -1) { - continue; - } - glProgramUniform1i(program, uniform, current_bindpoint); + glProgramUniform1i(shader->GetProgramHandle(), shader->GetUniformLocation(entry.GetName()), + current_bindpoint); const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset()); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 531b040463..7dd329efee 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -17,6 +17,7 @@ #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_shader_gen.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state.h" @@ -99,26 +100,23 @@ private: /* * Configures the current constbuffers to use for the draw command. * @param stage The shader stage to configure buffers for. - * @param program The OpenGL program object that contains the specified stage. + * @param shader The shader object that contains the specified stage. * @param current_bindpoint The offset at which to start counting new buffer bindpoints. - * @param entries Vector describing the buffers that are actually used in the guest shader. * @returns The next available bindpoint for use in the next shader stage. */ std::tuple SetupConstBuffers( u8* buffer_ptr, GLintptr buffer_offset, Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, - GLuint program, u32 current_bindpoint, - const std::vector& entries); + Shader& shader, u32 current_bindpoint); /* * Configures the current textures to use for the draw command. * @param stage The shader stage to configure textures for. - * @param program The OpenGL program object that contains the specified stage. + * @param shader The shader object that contains the specified stage. * @param current_unit The offset at which to start counting unused texture units. - * @param entries Vector describing the textures that are actually used in the guest shader. * @returns The next available bindpoint for use in the next shader stage. */ - u32 SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, GLuint program, - u32 current_unit, const std::vector& entries); + u32 SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader, + u32 current_unit); /// Syncs the viewport to match the guest state void SyncViewport(const MathUtil::Rectangle& surfaces_rect); @@ -157,6 +155,7 @@ private: OpenGLState state; RasterizerCacheOpenGL res_cache; + ShaderCacheOpenGL shader_cache; Core::Frontend::EmuWindow& emu_window; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp new file mode 100644 index 0000000000..3c3d1d35ef --- /dev/null +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -0,0 +1,131 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "core/core.h" +#include "core/memory.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_opengl/gl_shader_cache.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" + +namespace OpenGL { + +/// Gets the address for the specified shader stage program +static Tegra::GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) { + auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); + + GLShader::ProgramCode program_code(GLShader::MAX_PROGRAM_CODE_LENGTH); + auto& shader_config = gpu.regs.shader_config[static_cast(program)]; + return gpu.regs.code_address.CodeAddress() + shader_config.offset; +} + +/// Gets the shader program code from memory for the specified address +static GLShader::ProgramCode GetShaderCode(Tegra::GPUVAddr addr) { + auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); + + GLShader::ProgramCode program_code(GLShader::MAX_PROGRAM_CODE_LENGTH); + const boost::optional cpu_address{gpu.memory_manager.GpuToCpuAddress(addr)}; + Memory::ReadBlock(*cpu_address, program_code.data(), program_code.size() * sizeof(u64)); + + return program_code; +} + +/// Helper function to set shader uniform block bindings for a single shader stage +static void SetShaderUniformBlockBinding(GLuint shader, const char* name, + Maxwell::ShaderStage binding, size_t expected_size) { + const GLuint ub_index = glGetUniformBlockIndex(shader, name); + if (ub_index == GL_INVALID_INDEX) { + return; + } + + GLint ub_size = 0; + glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size); + ASSERT_MSG(static_cast(ub_size) == expected_size, + "Uniform block size did not match! Got {}, expected {}", ub_size, expected_size); + glUniformBlockBinding(shader, ub_index, static_cast(binding)); +} + +/// Sets shader uniform block bindings for an entire shader program +static void SetShaderUniformBlockBindings(GLuint shader) { + SetShaderUniformBlockBinding(shader, "vs_config", Maxwell::ShaderStage::Vertex, + sizeof(GLShader::MaxwellUniformData)); + SetShaderUniformBlockBinding(shader, "gs_config", Maxwell::ShaderStage::Geometry, + sizeof(GLShader::MaxwellUniformData)); + SetShaderUniformBlockBinding(shader, "fs_config", Maxwell::ShaderStage::Fragment, + sizeof(GLShader::MaxwellUniformData)); +} + +CachedShader::CachedShader(Tegra::GPUVAddr addr, Maxwell::ShaderProgram program_type) + : addr{addr}, program_type{program_type}, setup{GetShaderCode(addr)} { + + GLShader::ProgramResult program_result; + GLenum gl_type{}; + + switch (program_type) { + case Maxwell::ShaderProgram::VertexA: + // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. + // Conventional HW does not support this, so we combine VertexA and VertexB into one + // stage here. + setup.SetProgramB(GetShaderCode(GetShaderAddress(Maxwell::ShaderProgram::VertexB))); + case Maxwell::ShaderProgram::VertexB: + program_result = GLShader::GenerateVertexShader(setup); + gl_type = GL_VERTEX_SHADER; + break; + case Maxwell::ShaderProgram::Fragment: + program_result = GLShader::GenerateFragmentShader(setup); + gl_type = GL_FRAGMENT_SHADER; + break; + default: + LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast(program_type)); + UNREACHABLE(); + return; + } + + entries = program_result.second; + + OGLShader shader; + shader.Create(program_result.first.c_str(), gl_type); + program.Create(true, shader.handle); + SetShaderUniformBlockBindings(program.handle); +} + +GLuint CachedShader::GetProgramResourceIndex(const std::string& name) { + auto search{resource_cache.find(name)}; + if (search == resource_cache.end()) { + const GLuint index{ + glGetProgramResourceIndex(program.handle, GL_UNIFORM_BLOCK, name.c_str())}; + resource_cache[name] = index; + return index; + } + + return search->second; +} + +GLint CachedShader::GetUniformLocation(const std::string& name) { + auto search{uniform_cache.find(name)}; + if (search == uniform_cache.end()) { + const GLint index{glGetUniformLocation(program.handle, name.c_str())}; + uniform_cache[name] = index; + return index; + } + + return search->second; +} + +Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { + const Tegra::GPUVAddr program_addr{GetShaderAddress(program)}; + + // Look up shader in the cache based on address + Shader shader{TryGet(program_addr)}; + + if (!shader) { + // No shader found - create a new one + shader = std::make_shared(program_addr, program); + Register(shader); + } + + return shader; +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h new file mode 100644 index 0000000000..44156dcab1 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -0,0 +1,69 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "common/common_types.h" +#include "video_core/memory_manager.h" +#include "video_core/rasterizer_cache.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_shader_gen.h" + +namespace OpenGL { + +class CachedShader; +using Shader = std::shared_ptr; +using Maxwell = Tegra::Engines::Maxwell3D::Regs; + +class CachedShader final { +public: + CachedShader(Tegra::GPUVAddr addr, Maxwell::ShaderProgram program_type); + + /// Gets the address of the shader in guest memory, required for cache management + Tegra::GPUVAddr GetAddr() const { + return addr; + } + + /// Gets the size of the shader in guest memory, required for cache management + size_t GetSizeInBytes() const { + return sizeof(GLShader::ProgramCode); + } + + /// Gets the shader entries for the shader + const GLShader::ShaderEntries& GetShaderEntries() const { + return entries; + } + + /// Gets the GL program handle for the shader + GLuint GetProgramHandle() const { + return program.handle; + } + + /// Gets the GL program resource location for the specified resource, caching as needed + GLuint GetProgramResourceIndex(const std::string& name); + + /// Gets the GL uniform location for the specified resource, caching as needed + GLint GetUniformLocation(const std::string& name); + +private: + Tegra::GPUVAddr addr; + Maxwell::ShaderProgram program_type; + GLShader::ShaderSetup setup; + GLShader::ShaderEntries entries; + OGLProgram program; + + std::unordered_map resource_cache; + std::unordered_map uniform_cache; +}; + +class ShaderCacheOpenGL final : public RasterizerCache { +public: + /// Gets the current specified shader stage program + Shader GetStageProgram(Maxwell::ShaderProgram program); +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 0677317bc5..6ca05945e9 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -13,7 +13,7 @@ using Tegra::Engines::Maxwell3D; static constexpr u32 PROGRAM_OFFSET{10}; -ProgramResult GenerateVertexShader(const ShaderSetup& setup, const MaxwellVSConfig& config) { +ProgramResult GenerateVertexShader(const ShaderSetup& setup) { std::string out = "#version 430 core\n"; out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; out += Decompiler::GetCommonDeclarations(); @@ -75,7 +75,7 @@ void main() { return {out, program.second}; } -ProgramResult GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSConfig& config) { +ProgramResult GenerateFragmentShader(const ShaderSetup& setup) { std::string out = "#version 430 core\n"; out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; out += Decompiler::GetCommonDeclarations(); diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index 4e5a6f1308..c788099d4e 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -6,12 +6,9 @@ #include #include -#include -#include #include -#include + #include "common/common_types.h" -#include "common/hash.h" namespace OpenGL::GLShader { @@ -124,18 +121,8 @@ struct ShaderSetup { ProgramCode code_b; // Used for dual vertex shaders } program; - bool program_code_hash_dirty = true; - - u64 GetProgramCodeHash() { - if (program_code_hash_dirty) { - program_code_hash = GetNewHash(); - program_code_hash_dirty = false; - } - return program_code_hash; - } - /// Used in scenarios where we have a dual vertex shaders - void SetProgramB(ProgramCode program_b) { + void SetProgramB(ProgramCode&& program_b) { program.code_b = std::move(program_b); has_program_b = true; } @@ -145,73 +132,19 @@ struct ShaderSetup { } private: - u64 GetNewHash() const { - size_t hash = 0; - - const u64 hash_a = Common::ComputeHash64(program.code.data(), program.code.size()); - boost::hash_combine(hash, hash_a); - - if (has_program_b) { - // Compute hash over dual shader programs - const u64 hash_b = Common::ComputeHash64(program.code_b.data(), program.code_b.size()); - boost::hash_combine(hash, hash_b); - } - - return hash; - } - - u64 program_code_hash{}; bool has_program_b{}; }; -struct MaxwellShaderConfigCommon { - void Init(ShaderSetup& setup) { - program_hash = setup.GetProgramCodeHash(); - } - - u64 program_hash; -}; - -struct MaxwellVSConfig : Common::HashableStruct { - explicit MaxwellVSConfig(ShaderSetup& setup) { - state.Init(setup); - } -}; - -struct MaxwellFSConfig : Common::HashableStruct { - explicit MaxwellFSConfig(ShaderSetup& setup) { - state.Init(setup); - } -}; - /** * Generates the GLSL vertex shader program source code for the given VS program * @returns String of the shader source code */ -ProgramResult GenerateVertexShader(const ShaderSetup& setup, const MaxwellVSConfig& config); +ProgramResult GenerateVertexShader(const ShaderSetup& setup); /** * Generates the GLSL fragment shader program source code for the given FS program * @returns String of the shader source code */ -ProgramResult GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSConfig& config); +ProgramResult GenerateFragmentShader(const ShaderSetup& setup); } // namespace OpenGL::GLShader - -namespace std { - -template <> -struct hash { - size_t operator()(const OpenGL::GLShader::MaxwellVSConfig& k) const { - return k.Hash(); - } -}; - -template <> -struct hash { - size_t operator()(const OpenGL::GLShader::MaxwellFSConfig& k) const { - return k.Hash(); - } -}; - -} // namespace std diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 8960afef5f..022d32a86e 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -3,39 +3,10 @@ // Refer to the license.txt file included. #include "core/core.h" -#include "core/hle/kernel/process.h" -#include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_shader_manager.h" namespace OpenGL::GLShader { -namespace Impl { -static void SetShaderUniformBlockBinding(GLuint shader, const char* name, - Maxwell3D::Regs::ShaderStage binding, - size_t expected_size) { - const GLuint ub_index = glGetUniformBlockIndex(shader, name); - if (ub_index == GL_INVALID_INDEX) { - return; - } - - GLint ub_size = 0; - glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size); - ASSERT_MSG(static_cast(ub_size) == expected_size, - "Uniform block size did not match! Got {}, expected {}", ub_size, expected_size); - glUniformBlockBinding(shader, ub_index, static_cast(binding)); -} - -void SetShaderUniformBlockBindings(GLuint shader) { - SetShaderUniformBlockBinding(shader, "vs_config", Maxwell3D::Regs::ShaderStage::Vertex, - sizeof(MaxwellUniformData)); - SetShaderUniformBlockBinding(shader, "gs_config", Maxwell3D::Regs::ShaderStage::Geometry, - sizeof(MaxwellUniformData)); - SetShaderUniformBlockBinding(shader, "fs_config", Maxwell3D::Regs::ShaderStage::Fragment, - sizeof(MaxwellUniformData)); -} - -} // namespace Impl - void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) { const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); const auto& regs = gpu.regs; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 0e70857765..533e42caa8 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -4,12 +4,9 @@ #pragma once -#include -#include -#include #include + #include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_shader_gen.h" #include "video_core/renderer_opengl/maxwell_to_gl.h" namespace OpenGL::GLShader { @@ -19,10 +16,6 @@ static constexpr size_t NumTextureSamplers = 32; using Tegra::Engines::Maxwell3D; -namespace Impl { -void SetShaderUniformBlockBindings(GLuint shader); -} // namespace Impl - /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned // NOTE: Always keep a vec4 at the end. The GL spec is not clear whether the alignment at // the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. @@ -36,102 +29,22 @@ static_assert(sizeof(MaxwellUniformData) == 32, "MaxwellUniformData structure si static_assert(sizeof(MaxwellUniformData) < 16384, "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec"); -class OGLShaderStage { -public: - OGLShaderStage() = default; - - void Create(const ProgramResult& program_result, GLenum type) { - OGLShader shader; - shader.Create(program_result.first.c_str(), type); - program.Create(true, shader.handle); - Impl::SetShaderUniformBlockBindings(program.handle); - entries = program_result.second; - } - GLuint GetHandle() const { - return program.handle; - } - - ShaderEntries GetEntries() const { - return entries; - } - -private: - OGLProgram program; - ShaderEntries entries; -}; - -// TODO(wwylele): beautify this doc -// This is a shader cache designed for translating PICA shader to GLSL shader. -// The double cache is needed because diffent KeyConfigType, which includes a hash of the code -// region (including its leftover unused code) can generate the same GLSL code. -template -class ShaderCache { -public: - ShaderCache() = default; - - using Result = std::pair; - - Result Get(const KeyConfigType& key, const ShaderSetup& setup) { - auto map_it = shader_map.find(key); - if (map_it == shader_map.end()) { - ProgramResult program = CodeGenerator(setup, key); - - auto [iter, new_shader] = shader_cache.emplace(program.first, OGLShaderStage{}); - OGLShaderStage& cached_shader = iter->second; - if (new_shader) { - cached_shader.Create(program, ShaderType); - } - shader_map[key] = &cached_shader; - return {cached_shader.GetHandle(), program.second}; - } else { - return {map_it->second->GetHandle(), map_it->second->GetEntries()}; - } - } - -private: - std::unordered_map shader_map; - std::unordered_map shader_cache; -}; - -using VertexShaders = ShaderCache; - -using FragmentShaders = ShaderCache; - class ProgramManager { public: ProgramManager() { pipeline.Create(); } - ShaderEntries UseProgrammableVertexShader(const MaxwellVSConfig& config, - const ShaderSetup& setup) { - ShaderEntries result; - std::tie(current.vs, result) = vertex_shaders.Get(config, setup); - return result; + void UseProgrammableVertexShader(GLuint program) { + vs = program; } - ShaderEntries UseProgrammableFragmentShader(const MaxwellFSConfig& config, - const ShaderSetup& setup) { - ShaderEntries result; - std::tie(current.fs, result) = fragment_shaders.Get(config, setup); - return result; - } - - GLuint GetCurrentProgramStage(Maxwell3D::Regs::ShaderStage stage) const { - switch (stage) { - case Maxwell3D::Regs::ShaderStage::Vertex: - return current.vs; - case Maxwell3D::Regs::ShaderStage::Fragment: - return current.fs; - } - - UNREACHABLE(); + void UseProgrammableFragmentShader(GLuint program) { + fs = program; } void UseTrivialGeometryShader() { - current.gs = 0; + gs = 0; } void ApplyTo(OpenGLState& state) { @@ -140,35 +53,16 @@ public: GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT, 0); - glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current.vs); - glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, current.gs); - glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current.fs); + glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vs); + glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, gs); + glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fs); state.draw.shader_program = 0; state.draw.program_pipeline = pipeline.handle; } private: - struct ShaderTuple { - GLuint vs = 0, gs = 0, fs = 0; - bool operator==(const ShaderTuple& rhs) const { - return std::tie(vs, gs, fs) == std::tie(rhs.vs, rhs.gs, rhs.fs); - } - struct Hash { - std::size_t operator()(const ShaderTuple& tuple) const { - std::size_t hash = 0; - boost::hash_combine(hash, tuple.vs); - boost::hash_combine(hash, tuple.gs); - boost::hash_combine(hash, tuple.fs); - return hash; - } - }; - }; - ShaderTuple current; - VertexShaders vertex_shaders; - FragmentShaders fragment_shaders; - - std::unordered_map program_cache; OGLPipeline pipeline; + GLuint vs{}, fs{}, gs{}; }; } // namespace OpenGL::GLShader