mirror of
https://git.suyu.dev/suyu/suyu.git
synced 2024-12-21 16:00:58 +01:00
Merge pull request #12066 from ameerj/nvidia-nsanity
shader_recompiler: add byteswap pattern workaround for Nvidia
This commit is contained in:
commit
4458920799
4 changed files with 82 additions and 0 deletions
|
@ -231,6 +231,7 @@ add_library(shader_recompiler STATIC
|
||||||
ir_opt/rescaling_pass.cpp
|
ir_opt/rescaling_pass.cpp
|
||||||
ir_opt/ssa_rewrite_pass.cpp
|
ir_opt/ssa_rewrite_pass.cpp
|
||||||
ir_opt/texture_pass.cpp
|
ir_opt/texture_pass.cpp
|
||||||
|
ir_opt/vendor_workaround_pass.cpp
|
||||||
ir_opt/verification_pass.cpp
|
ir_opt/verification_pass.cpp
|
||||||
object_pool.h
|
object_pool.h
|
||||||
precompiled_headers.h
|
precompiled_headers.h
|
||||||
|
|
|
@ -310,6 +310,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
||||||
}
|
}
|
||||||
Optimization::CollectShaderInfoPass(env, program);
|
Optimization::CollectShaderInfoPass(env, program);
|
||||||
Optimization::LayerPass(program, host_info);
|
Optimization::LayerPass(program, host_info);
|
||||||
|
Optimization::VendorWorkaroundPass(program);
|
||||||
|
|
||||||
CollectInterpolationInfo(env, program);
|
CollectInterpolationInfo(env, program);
|
||||||
AddNVNStorageBuffers(program);
|
AddNVNStorageBuffers(program);
|
||||||
|
|
|
@ -26,6 +26,7 @@ void SsaRewritePass(IR::Program& program);
|
||||||
void PositionPass(Environment& env, IR::Program& program);
|
void PositionPass(Environment& env, IR::Program& program);
|
||||||
void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo& host_info);
|
void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo& host_info);
|
||||||
void LayerPass(IR::Program& program, const HostTranslateInfo& host_info);
|
void LayerPass(IR::Program& program, const HostTranslateInfo& host_info);
|
||||||
|
void VendorWorkaroundPass(IR::Program& program);
|
||||||
void VerificationPass(const IR::Program& program);
|
void VerificationPass(const IR::Program& program);
|
||||||
|
|
||||||
// Dual Vertex
|
// Dual Vertex
|
||||||
|
|
79
src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp
Normal file
79
src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp
Normal file
|
@ -0,0 +1,79 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include "shader_recompiler/frontend/ir/basic_block.h"
|
||||||
|
#include "shader_recompiler/frontend/ir/ir_emitter.h"
|
||||||
|
#include "shader_recompiler/frontend/ir/value.h"
|
||||||
|
#include "shader_recompiler/ir_opt/passes.h"
|
||||||
|
|
||||||
|
namespace Shader::Optimization {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
void AddingByteSwapsWorkaround(IR::Block& block, IR::Inst& inst) {
|
||||||
|
/*
|
||||||
|
* Workaround for an NVIDIA bug seen in Super Mario RPG
|
||||||
|
*
|
||||||
|
* We are looking for this pattern:
|
||||||
|
* %lhs_bfe = BitFieldUExtract %factor_a, #0, #16
|
||||||
|
* %lhs_mul = IMul32 %lhs_bfe, %factor_b // potentially optional?
|
||||||
|
* %lhs_shl = ShiftLeftLogical32 %lhs_mul, #16
|
||||||
|
* %rhs_bfe = BitFieldUExtract %factor_a, #16, #16
|
||||||
|
* %result = IAdd32 %lhs_shl, %rhs_bfe
|
||||||
|
*
|
||||||
|
* And replacing the IAdd32 with a BitwiseOr32
|
||||||
|
* %result = BitwiseOr32 %lhs_shl, %rhs_bfe
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
IR::Inst* const lhs_shl{inst.Arg(0).TryInstRecursive()};
|
||||||
|
IR::Inst* const rhs_bfe{inst.Arg(1).TryInstRecursive()};
|
||||||
|
if (!lhs_shl || !rhs_bfe) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 ||
|
||||||
|
lhs_shl->Arg(1) != IR::Value{16U}) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (rhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract || rhs_bfe->Arg(1) != IR::Value{16U} ||
|
||||||
|
rhs_bfe->Arg(2) != IR::Value{16U}) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
IR::Inst* const lhs_mul{lhs_shl->Arg(0).TryInstRecursive()};
|
||||||
|
if (!lhs_mul) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const bool lhs_mul_optional{lhs_mul->GetOpcode() == IR::Opcode::BitFieldUExtract};
|
||||||
|
if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 &&
|
||||||
|
lhs_mul->GetOpcode() != IR::Opcode::BitFieldUExtract) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
IR::Inst* const lhs_bfe{lhs_mul_optional ? lhs_mul : lhs_mul->Arg(0).TryInstRecursive()};
|
||||||
|
if (!lhs_bfe) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (lhs_bfe->Arg(1) != IR::Value{0U} || lhs_bfe->Arg(2) != IR::Value{16U}) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||||
|
inst.ReplaceUsesWith(ir.BitwiseOr(IR::U32{inst.Arg(0)}, IR::U32{inst.Arg(1)}));
|
||||||
|
}
|
||||||
|
|
||||||
|
} // Anonymous namespace
|
||||||
|
|
||||||
|
void VendorWorkaroundPass(IR::Program& program) {
|
||||||
|
for (IR::Block* const block : program.post_order_blocks) {
|
||||||
|
for (IR::Inst& inst : block->Instructions()) {
|
||||||
|
switch (inst.GetOpcode()) {
|
||||||
|
case IR::Opcode::IAdd32:
|
||||||
|
AddingByteSwapsWorkaround(*block, inst);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Shader::Optimization
|
Loading…
Reference in a new issue