From b46aaf17eea05a04d6c7f980219b58a17d8e569e Mon Sep 17 00:00:00 2001 From: pineappleEA Date: Sat, 30 Oct 2021 03:43:57 +0200 Subject: [PATCH] early-access version 2167 --- README.md | 2 +- .../hle/service/nvdrv/devices/nvhost_ctrl.cpp | 40 +++++++------------ .../ir_opt/rescaling_pass.cpp | 36 ++++++++++++++++- .../host_shaders/present_gaussian.frag | 4 +- .../renderer_opengl/gl_rasterizer.cpp | 5 ++- .../renderer_opengl/gl_texture_cache.cpp | 9 ++++- .../renderer_vulkan/vk_texture_cache.cpp | 9 ++++- src/video_core/texture_cache/image_base.h | 1 - src/video_core/texture_cache/image_info.cpp | 10 +++-- src/video_core/texture_cache/texture_cache.h | 18 +-------- src/video_core/textures/texture.cpp | 19 +-------- .../configure_graphics_advanced.ui | 8 ++-- 12 files changed, 85 insertions(+), 76 deletions(-) diff --git a/README.md b/README.md index 3ae60b138..2214b601a 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 2166. +This is the source code for early-access 2167. ## Legal Notice diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp index f9b82b504..31a26c500 100755 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp @@ -106,30 +106,28 @@ NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector& input, std::vector auto& event = events_interface.events[event_id]; auto& gpu = system.GPU(); - - // This is mostly to take into account unimplemented features. As synced - // gpu is always synced. - if (!gpu.IsAsync()) { - event.event->GetWritableEvent().Signal(); - return NvResult::Success; - } const u32 current_syncpoint_value = event.fence.value; const s32 diff = current_syncpoint_value - params.threshold; - if (diff >= 0) { - event.event->GetWritableEvent().Signal(); - params.value = current_syncpoint_value; - std::memcpy(output.data(), ¶ms, sizeof(params)); - events_interface.failed[event_id] = false; - return NvResult::Success; - } - const u32 target_value = current_syncpoint_value - diff; + const u32 target_value = params.value; if (!is_async) { params.value = 0; } + const auto check_failing = [&]() { + if (events_interface.failed[event_id]) { + gpu.WaitFence(params.syncpt_id, target_value); + std::memcpy(output.data(), ¶ms, sizeof(params)); + events_interface.failed[event_id] = false; + return true; + } + return false; + }; + if (params.timeout == 0) { - std::memcpy(output.data(), ¶ms, sizeof(params)); + if (check_failing()) { + return NvResult::Success; + } return NvResult::Timeout; } @@ -148,15 +146,7 @@ NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector& input, std::vector params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000; } params.value |= event_id; - event.event->GetWritableEvent().Clear(); - if (events_interface.failed[event_id]) { - { - auto lk = system.StallCPU(); - gpu.WaitFence(params.syncpt_id, target_value); - system.UnstallCPU(); - } - std::memcpy(output.data(), ¶ms, sizeof(params)); - events_interface.failed[event_id] = false; + if (check_failing()) { return NvResult::Success; } gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value); diff --git a/src/shader_recompiler/ir_opt/rescaling_pass.cpp b/src/shader_recompiler/ir_opt/rescaling_pass.cpp index a5fa4ee83..c28500dd1 100755 --- a/src/shader_recompiler/ir_opt/rescaling_pass.cpp +++ b/src/shader_recompiler/ir_opt/rescaling_pass.cpp @@ -30,7 +30,7 @@ namespace { return false; } -void VisitMark(const IR::Inst& inst) { +void VisitMark(IR::Block& block, IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::ShuffleIndex: case IR::Opcode::ShuffleUp: @@ -49,19 +49,30 @@ void VisitMark(const IR::Inst& inst) { break; } IR::Inst* const bitcast_inst{bitcast_arg.InstRecursive()}; + bool must_patch_outside = false; if (bitcast_inst->GetOpcode() == IR::Opcode::GetAttribute) { const IR::Attribute attr{bitcast_inst->Arg(0).Attribute()}; switch (attr) { case IR::Attribute::PositionX: case IR::Attribute::PositionY: bitcast_inst->SetFlags(0xDEADBEEF); + must_patch_outside = true; break; default: break; } } + if (must_patch_outside) { + const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + const IR::F32 new_inst{&*block.PrependNewInst(it, inst)}; + const IR::F32 up_factor{ir.FPRecip(ir.ResolutionDownFactor())}; + const IR::Value converted{ir.FPMul(new_inst, up_factor)}; + inst.ReplaceUsesWith(converted); + } break; } + default: break; } @@ -75,6 +86,14 @@ void PatchFragCoord(IR::Block& block, IR::Inst& inst) { inst.ReplaceUsesWith(downscaled_frag_coord); } +void PatchPointSize(IR::Block& block, IR::Inst& inst) { + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + const IR::F32 point_value{inst.Arg(1)}; + const IR::F32 up_factor{ir.FPRecip(ir.ResolutionDownFactor())}; + const IR::F32 upscaled_point_value{ir.FPMul(point_value, up_factor)}; + inst.SetArg(1, upscaled_point_value); +} + [[nodiscard]] IR::U32 Scale(IR::IREmitter& ir, const IR::U1& is_scaled, const IR::U32& value) { IR::U32 scaled_value{value}; if (const u32 up_scale = Settings::values.resolution_info.up_scale; up_scale != 1) { @@ -253,6 +272,19 @@ void Visit(const IR::Program& program, IR::Block& block, IR::Inst& inst) { } break; } + case IR::Opcode::SetAttribute: { + const IR::Attribute attr{inst.Arg(0).Attribute()}; + switch (attr) { + case IR::Attribute::PointSize: + if (inst.Flags() != 0xDEADBEEF) { + PatchPointSize(block, inst); + } + break; + default: + break; + } + break; + } case IR::Opcode::ImageQueryDimensions: PatchImageQueryDimensions(block, inst); break; @@ -281,7 +313,7 @@ void RescalingPass(IR::Program& program) { if (is_fragment_shader) { for (IR::Block* const block : program.post_order_blocks) { for (IR::Inst& inst : block->Instructions()) { - VisitMark(inst); + VisitMark(*block, inst); } } } diff --git a/src/video_core/host_shaders/present_gaussian.frag b/src/video_core/host_shaders/present_gaussian.frag index 72a300dac..66fed3238 100755 --- a/src/video_core/host_shaders/present_gaussian.frag +++ b/src/video_core/host_shaders/present_gaussian.frag @@ -63,8 +63,8 @@ void main() { // TODO(Blinkhawk): This code can be optimized through shader group instructions. vec3 horizontal = blurHorizontal(color_texture, frag_tex_coord, tex_offset).rgb; vec3 vertical = blurVertical(color_texture, frag_tex_coord, tex_offset).rgb; - vec3 diagonalA = blurVertical(color_texture, frag_tex_coord, tex_offset).rgb; - vec3 diagonalB = blurVertical(color_texture, frag_tex_coord, tex_offset * vec2(1.0, -1.0)).rgb; + vec3 diagonalA = blurDiagonal(color_texture, frag_tex_coord, tex_offset).rgb; + vec3 diagonalB = blurDiagonal(color_texture, frag_tex_coord, tex_offset * vec2(1.0, -1.0)).rgb; vec3 combination = mix(mix(horizontal, vertical, 0.5f), mix(diagonalA, diagonalB, 0.5f), 0.5f); color = vec4(combination + base, 1.0f); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 696173acc..3938c8c0e 100755 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -978,8 +978,9 @@ void RasterizerOpenGL::SyncPointState() { oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable); oglEnable(GL_PROGRAM_POINT_SIZE, maxwell3d.regs.vp_point_size.enable); - - glPointSize(std::max(1.0f, maxwell3d.regs.point_size)); + const bool is_rescaling{texture_cache.IsRescaling()}; + const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f; + glPointSize(std::max(1.0f, maxwell3d.regs.point_size * scale)); } void RasterizerOpenGL::SyncLineState() { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 00610ea2c..c2668fee6 100755 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -1201,7 +1201,14 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) { glSamplerParameterfv(handle, GL_TEXTURE_BORDER_COLOR, config.BorderColor().data()); if (GLAD_GL_ARB_texture_filter_anisotropic || GLAD_GL_EXT_texture_filter_anisotropic) { - glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, config.MaxAnisotropy()); + const f32 setting_anisotropic = + static_cast(1U << Settings::values.max_anisotropy.GetValue()); + const f32 game_anisotropic = std::clamp(config.MaxAnisotropy(), 1.0f, 16.0f); + const bool aument_anisotropic = + game_anisotropic > 1.0f || config.mipmap_filter == TextureMipmapFilter::Linear; + const f32 max_anisotropy = + aument_anisotropic ? std::max(game_anisotropic, setting_anisotropic) : game_anisotropic; + glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropy); } else { LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_anisotropic is required"); } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 1c0741250..7db561ca0 100755 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1448,7 +1448,14 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t LOG_WARNING(Render_Vulkan, "VK_EXT_sampler_filter_minmax is required"); } // Some games have samplers with garbage. Sanitize them here. - const float max_anisotropy = std::clamp(tsc.MaxAnisotropy(), 1.0f, 16.0f); + const f32 setting_anisotropic = + static_cast(1U << Settings::values.max_anisotropy.GetValue()); + const f32 game_anisotropic = std::clamp(tsc.MaxAnisotropy(), 1.0f, 16.0f); + const bool aument_anisotropic = + game_anisotropic > 1.0f || tsc.mipmap_filter == TextureMipmapFilter::Linear; + const f32 max_anisotropy = + aument_anisotropic ? std::max(game_anisotropic, setting_anisotropic) : game_anisotropic; + sampler = device.GetLogical().CreateSampler(VkSamplerCreateInfo{ .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, .pNext = pnext, diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 02c669766..89c111c00 100755 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -38,7 +38,6 @@ enum class ImageFlagBits : u32 { Rescaled = 1 << 12, CheckingRescalable = 1 << 13, IsRescalable = 1 << 14, - Blacklisted = 1 << 15, }; DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index d8e414247..afb94082b 100755 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -16,6 +16,7 @@ namespace VideoCommon { using Tegra::Texture::TextureType; using Tegra::Texture::TICEntry; using VideoCore::Surface::PixelFormat; +using VideoCore::Surface::SurfaceType; ImageInfo::ImageInfo(const TICEntry& config) noexcept { format = PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type, @@ -102,6 +103,7 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept { layer_stride = CalculateLayerStride(*this); maybe_unaligned_layer_stride = CalculateLayerSize(*this); rescaleable &= (block.depth == 0) && resources.levels == 1; + rescaleable &= size.height > 256 || GetFormatType(format) != SurfaceType::ColorTexture; downscaleable = size.height > 512; } } @@ -135,7 +137,8 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) type = ImageType::e3D; size.depth = rt.depth; } else { - rescaleable = block.depth == 0 && size.height > 256; + rescaleable = block.depth == 0; + rescaleable &= size.height > 256; downscaleable = size.height > 512; type = ImageType::e2D; resources.layers = rt.depth; @@ -165,7 +168,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept { type = ImageType::e3D; size.depth = regs.zeta_depth; } else { - rescaleable = block.depth == 0 && size.height > 256; + rescaleable = block.depth == 0; downscaleable = size.height > 512; type = ImageType::e2D; resources.layers = regs.zeta_depth; @@ -199,7 +202,8 @@ ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept { .height = config.height, .depth = 1, }; - rescaleable = block.depth == 0 && size.height > 256; + rescaleable = block.depth == 0; + rescaleable &= size.height > 256; downscaleable = size.height > 512; } } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c8031b695..aec130a32 100755 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -53,8 +53,8 @@ TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& const auto device_memory = runtime.GetDeviceLocalMemory(); const u64 possible_expected_memory = (device_memory * 4) / 10; const u64 possible_critical_memory = (device_memory * 7) / 10; - expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY); - critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY); + expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY - 256_MiB); + critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY - 512_MiB); minimum_memory = 0; } else { // On OpenGL we can be more conservatives as the driver takes care. @@ -355,7 +355,6 @@ void TextureCache

::FillImageViews(DescriptorTable& table, if (view.blacklist && view.id != NULL_IMAGE_VIEW_ID) { const ImageViewBase& image_view{slot_image_views[view.id]}; auto& image = slot_images[image_view.image_id]; - image.flags |= ImageFlagBits::Blacklisted; has_blacklisted |= ScaleDown(image); image.scale_rating = 0; } @@ -985,7 +984,6 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA bool can_rescale = info.rescaleable; bool any_rescaled = false; - bool any_blacklisted = false; for (const ImageId sibling_id : all_siblings) { if (!can_rescale) { break; @@ -993,7 +991,6 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA Image& sibling = slot_images[sibling_id]; can_rescale &= ImageCanRescale(sibling); any_rescaled |= True(sibling.flags & ImageFlagBits::Rescaled); - any_blacklisted |= True(sibling.flags & ImageFlagBits::Blacklisted); } can_rescale &= any_rescaled; @@ -1007,9 +1004,6 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA for (const ImageId sibling_id : all_siblings) { Image& sibling = slot_images[sibling_id]; ScaleDown(sibling); - if (any_blacklisted) { - sibling.flags |= ImageFlagBits::Blacklisted; - } } } @@ -1644,7 +1638,6 @@ void TextureCache

::SynchronizeAliases(ImageId image_id) { boost::container::small_vector aliased_images; Image& image = slot_images[image_id]; bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled); - bool any_blacklisted = True(image.flags & ImageFlagBits::Blacklisted); u64 most_recent_tick = image.modification_tick; for (const AliasedImage& aliased : image.aliased_images) { ImageBase& aliased_image = slot_images[aliased.id]; @@ -1652,7 +1645,6 @@ void TextureCache

::SynchronizeAliases(ImageId image_id) { most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); aliased_images.push_back(&aliased); any_rescaled |= True(aliased_image.flags & ImageFlagBits::Rescaled); - any_blacklisted |= True(aliased_image.flags & ImageFlagBits::Blacklisted); } } if (aliased_images.empty()) { @@ -1664,9 +1656,6 @@ void TextureCache

::SynchronizeAliases(ImageId image_id) { ScaleUp(image); } else { ScaleDown(image); - if (any_blacklisted) { - image.flags |= ImageFlagBits::Blacklisted; - } } } image.modification_tick = most_recent_tick; @@ -1684,9 +1673,6 @@ void TextureCache

::SynchronizeAliases(ImageId image_id) { Image& aliased_image = slot_images[aliased->id]; if (!can_rescale) { ScaleDown(aliased_image); - if (any_blacklisted) { - aliased_image.flags |= ImageFlagBits::Blacklisted; - } CopyImage(image_id, aliased->id, aliased->copies); continue; } diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp index a552543ed..b2d5bb03e 100755 --- a/src/video_core/textures/texture.cpp +++ b/src/video_core/textures/texture.cpp @@ -6,7 +6,6 @@ #include #include "common/cityhash.h" -#include "common/settings.h" #include "video_core/textures/texture.h" using Tegra::Texture::TICEntry; @@ -51,22 +50,6 @@ constexpr std::array SRGB_CONVERSION_LUT = { 0.917104f, 0.929242f, 0.941493f, 0.953859f, 0.966338f, 1.000000f, 1.000000f, 1.000000f, }; -unsigned SettingsMinimumAnisotropy() noexcept { - switch (static_cast(Settings::values.max_anisotropy.GetValue())) { - default: - case Anisotropy::Default: - return 1U; - case Anisotropy::Filter2x: - return 2U; - case Anisotropy::Filter4x: - return 4U; - case Anisotropy::Filter8x: - return 8U; - case Anisotropy::Filter16x: - return 16U; - } -} - } // Anonymous namespace std::array TSCEntry::BorderColor() const noexcept { @@ -78,7 +61,7 @@ std::array TSCEntry::BorderColor() const noexcept { } float TSCEntry::MaxAnisotropy() const noexcept { - return static_cast(std::max(1U << max_anisotropy, SettingsMinimumAnisotropy())); + return static_cast(1U << max_anisotropy); } } // namespace Tegra::Texture diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index d06b45f17..cbbcd45a0 100755 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui @@ -130,22 +130,22 @@ - 2x (WILL BREAK THINGS) + 2x - 4x (WILL BREAK THINGS) + 4x - 8x (WILL BREAK THINGS) + 8x - 16x (WILL BREAK THINGS) + 16x