From 2e0a9f66a0d41dd81bfaa7078aad9b53bedb547e Mon Sep 17 00:00:00 2001 From: Subv Date: Fri, 20 Apr 2018 09:02:28 -0500 Subject: [PATCH 1/5] ShaderGen: Ignore the 'sched' instruction when generating shaders. The 'sched' instruction has a very convoluted encoding, but fortunately it seems to only appear on a fixed interval (once every 4 instructions). --- .../renderer_opengl/gl_shader_decompiler.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index de137558d0..c23f590cd8 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -276,6 +276,18 @@ private: shader.AddLine(dest + " = " + src + ";"); } + /* + * Returns whether the instruction at the specified offset is a 'sched' instruction. + * Sched instructions always appear before a sequence of 3 instructions. + */ + bool IsSchedInstruction(u32 offset) const { + // sched instructions appear once every 4 instructions. + static constexpr size_t SchedPeriod = 4; + u32 absolute_offset = offset - main_offset; + + return (absolute_offset % SchedPeriod) == 0; + } + /** * Compiles a single instruction from Tegra to GLSL. * @param offset the offset of the Tegra shader instruction. @@ -283,6 +295,10 @@ private: * + 1. If the current instruction always terminates the program, returns PROGRAM_END. */ u32 CompileInstr(u32 offset) { + // Ignore sched instructions when generating code. + if (IsSchedInstruction(offset)) + return offset + 1; + const Instruction instr = {program_code[offset]}; shader.AddLine("// " + std::to_string(offset) + ": " + OpCode::GetInfo(instr.opcode).name); From d03fc774756306aa8fd89abd5522c928b46336c7 Mon Sep 17 00:00:00 2001 From: Subv Date: Fri, 20 Apr 2018 09:04:54 -0500 Subject: [PATCH 2/5] ShaderGen: Register id 255 is special and is hardcoded to return 0 (SR_ZERO). --- src/video_core/engines/shader_bytecode.h | 3 +++ src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 2 ++ 2 files changed, 5 insertions(+) diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 7cd125f05c..b0da805dba 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -13,6 +13,9 @@ namespace Tegra { namespace Shader { struct Register { + // Register 255 is special cased to always be 0 + static constexpr size_t ZeroIndex = 255; + constexpr Register() = default; constexpr Register(u64 value) : value(value) {} diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index c23f590cd8..6db0b7d396 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -220,6 +220,8 @@ private: /// Generates code representing a temporary (GPR) register. std::string GetRegister(const Register& reg, unsigned elem = 0) { + if (reg == Register::ZeroIndex) + return "0"; if (stage == Maxwell3D::Regs::ShaderStage::Fragment && reg < 4) { // GPRs 0-3 are output color for the fragment shader return std::string{"color."} + "rgba"[(reg + elem) & 3]; From 0a5e01b710b66b9264ceb469903e8b8f16faf516 Mon Sep 17 00:00:00 2001 From: Subv Date: Fri, 20 Apr 2018 09:09:50 -0500 Subject: [PATCH 3/5] ShaderGen: Implemented the fsetp instruction. Predicate variables are now added to the generated shader code in the form of 'pX' where X is the predicate id. These predicate variables are initialized to false on shader startup and are set via the fsetp instructions. TODO: * Not all the comparison types are implemented. * Only the single-predicate version is implemented. --- src/video_core/engines/shader_bytecode.h | 43 ++++++++++- .../renderer_opengl/gl_shader_decompiler.cpp | 72 +++++++++++++++++++ 2 files changed, 112 insertions(+), 3 deletions(-) diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index b0da805dba..fb639a4170 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -109,6 +109,8 @@ union OpCode { FSETP_R = 0x5BB, FSETP_C = 0x4BB, + FSETP_IMM = 0x36B, + FSETP_NEG_IMM = 0x37B, EXIT = 0xE30, KIL = 0xE33, @@ -124,6 +126,7 @@ union OpCode { Ffma, Flow, Memory, + FloatPredicate, Unknown, }; @@ -164,6 +167,9 @@ union OpCode { case Id::FSETP_C: case Id::KIL: return op4; + case Id::FSETP_IMM: + case Id::FSETP_NEG_IMM: + return Id::FSETP_IMM; } switch (op5) { @@ -241,8 +247,9 @@ union OpCode { info_table[Id::FMUL_C] = {Type::Arithmetic, "fmul_c"}; info_table[Id::FMUL_IMM] = {Type::Arithmetic, "fmul_imm"}; info_table[Id::FMUL32_IMM] = {Type::Arithmetic, "fmul32_imm"}; - info_table[Id::FSETP_C] = {Type::Arithmetic, "fsetp_c"}; - info_table[Id::FSETP_R] = {Type::Arithmetic, "fsetp_r"}; + info_table[Id::FSETP_C] = {Type::FloatPredicate, "fsetp_c"}; + info_table[Id::FSETP_R] = {Type::FloatPredicate, "fsetp_r"}; + info_table[Id::FSETP_IMM] = {Type::FloatPredicate, "fsetp_imm"}; info_table[Id::EXIT] = {Type::Trivial, "exit"}; info_table[Id::IPA] = {Type::Trivial, "ipa"}; info_table[Id::KIL] = {Type::Flow, "kil"}; @@ -286,7 +293,23 @@ namespace Shader { enum class Pred : u64 { UnusedIndex = 0x7, - NeverExecute = 0xf, + NeverExecute = 0xF, +}; + +enum class PredCondition : u64 { + LessThan = 1, + Equal = 2, + LessEqual = 3, + GreaterThan = 4, + NotEqual = 5, + GreaterEqual = 6, + // TODO(Subv): Other condition types +}; + +enum class PredOperation : u64 { + And = 0, + Or = 1, + Xor = 2, }; enum class SubOp : u64 { @@ -346,6 +369,20 @@ union Instruction { BitField<49, 1, u64> negate_c; } ffma; + union { + BitField<0, 3, u64> pred0; + BitField<3, 3, u64> pred3; + BitField<7, 1, u64> abs_a; + BitField<39, 3, u64> pred39; + BitField<42, 1, u64> neg_pred; + BitField<43, 1, u64> neg_a; + BitField<44, 1, u64> abs_b; + BitField<45, 2, PredOperation> op; + BitField<47, 1, u64> ftz; + BitField<48, 4, PredCondition> cond; + BitField<56, 1, u64> neg_b; + } fsetp; + BitField<61, 1, u64> is_b_imm; BitField<60, 1, u64> is_b_gpr; BitField<59, 1, u64> is_c_gpr; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 6db0b7d396..2e0203a680 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -278,6 +278,21 @@ private: shader.AddLine(dest + " = " + src + ";"); } + /* + * Writes code that assigns a predicate boolean variable. + * @param pred The id of the predicate to write to. + * @param value The expression value to assign to the predicate. + */ + void SetPredicate(u64 pred, const std::string& value) { + using Tegra::Shader::Pred; + // Can't assign to the constant predicate. + ASSERT(pred != static_cast(Pred::UnusedIndex)); + + std::string variable = 'p' + std::to_string(pred); + shader.AddLine(variable + " = " + value + ';'); + declr_predicates.insert(std::move(variable)); + } + /* * Returns whether the instruction at the specified offset is a 'sched' instruction. * Sched instructions always appear before a sequence of 3 instructions. @@ -468,7 +483,57 @@ private: } break; } + case OpCode::Type::FloatPredicate: { + std::string op_a = instr.fsetp.neg_a ? "-" : ""; + op_a += GetRegister(instr.gpr8); + if (instr.fsetp.abs_a) { + op_a = "abs(" + op_a + ')'; + } + + std::string op_b{}; + + if (instr.is_b_imm) { + if (instr.fsetp.neg_b) { + // Only the immediate version of fsetp has a neg_b bit. + op_b += '-'; + } + op_b += '(' + GetImmediate19(instr) + ')'; + } else { + if (instr.is_b_gpr) { + op_b += GetRegister(instr.gpr20); + } else { + op_b += GetUniform(instr.uniform); + } + } + + if (instr.fsetp.abs_b) { + op_b = "abs(" + op_b + ')'; + } + + using Tegra::Shader::Pred; + ASSERT_MSG(instr.fsetp.pred0 == static_cast(Pred::UnusedIndex) && + instr.fsetp.pred39 == static_cast(Pred::UnusedIndex), + "Compound predicates are not implemented"); + + // We can't use the constant predicate as destination. + ASSERT(instr.fsetp.pred3 != static_cast(Pred::UnusedIndex)); + + using Tegra::Shader::PredCondition; + switch (instr.fsetp.cond) { + case PredCondition::LessThan: + SetPredicate(instr.fsetp.pred3, '(' + op_a + ") < (" + op_b + ')'); + break; + case PredCondition::Equal: + SetPredicate(instr.fsetp.pred3, '(' + op_a + ") == (" + op_b + ')'); + break; + default: + NGLOG_CRITICAL(HW_GPU, "Unhandled predicate condition: {} (a: {}, b: {})", + static_cast(instr.fsetp.cond.Value()), op_a, op_b); + UNREACHABLE(); + } + break; + } default: { switch (instr.opcode.EffectiveOpCode()) { case OpCode::Id::EXIT: { @@ -623,6 +688,12 @@ private: declarations.AddNewLine(); ++const_buffer_layout; } + + declarations.AddNewLine(); + for (const auto& pred : declr_predicates) { + declarations.AddLine("bool " + pred + " = false;"); + } + declarations.AddNewLine(); } private: @@ -636,6 +707,7 @@ private: // Declarations std::set declr_register; + std::set declr_predicates; std::set declr_input_attribute; std::set declr_output_attribute; std::array declr_const_buffers; From c3a8ea76f180fbaf2d58d0454e7adc2bb1f30009 Mon Sep 17 00:00:00 2001 From: Subv Date: Fri, 20 Apr 2018 09:16:55 -0500 Subject: [PATCH 4/5] ShaderGen: Implemented predicated instruction execution. Each predicated instruction will be wrapped in an `if (predicate) { instruction_body; }` in the GLSL, where `predicate` is one of the predicate boolean variables previously set by fsetp. --- src/video_core/engines/shader_bytecode.h | 6 +++- .../renderer_opengl/gl_shader_decompiler.cpp | 35 +++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index fb639a4170..e6c2fd3679 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -331,7 +331,11 @@ union Instruction { OpCode opcode; BitField<0, 8, Register> gpr0; BitField<8, 8, Register> gpr8; - BitField<16, 4, Pred> pred; + union { + BitField<16, 4, Pred> full_pred; + BitField<16, 3, u64> pred_index; + } pred; + BitField<19, 1, u64> negate_pred; BitField<20, 8, Register> gpr20; BitField<20, 7, SubOp> sub_op; BitField<28, 8, Register> gpr28; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 2e0203a680..7aaee9464e 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -293,6 +293,25 @@ private: declr_predicates.insert(std::move(variable)); } + /* + * Returns the condition to use in the 'if' for a predicated instruction. + * @param instr Instruction to generate the if condition for. + * @returns string containing the predicate condition. + */ + std::string GetPredicateCondition(Instruction instr) const { + using Tegra::Shader::Pred; + ASSERT(instr.pred.pred_index != static_cast(Pred::UnusedIndex)); + + std::string variable = + 'p' + std::to_string(static_cast(instr.pred.pred_index.Value())); + + if (instr.negate_pred) { + return "!(" + variable + ')'; + } + + return variable; + } + /* * Returns whether the instruction at the specified offset is a 'sched' instruction. * Sched instructions always appear before a sequence of 3 instructions. @@ -320,6 +339,16 @@ private: shader.AddLine("// " + std::to_string(offset) + ": " + OpCode::GetInfo(instr.opcode).name); + using Tegra::Shader::Pred; + ASSERT_MSG(instr.pred.full_pred != Pred::NeverExecute, + "NeverExecute predicate not implemented"); + + if (instr.pred.pred_index != static_cast(Pred::UnusedIndex)) { + shader.AddLine("if (" + GetPredicateCondition(instr) + ')'); + shader.AddLine('{'); + ++shader.scope; + } + switch (OpCode::GetInfo(instr.opcode).type) { case OpCode::Type::Arithmetic: { std::string dest = GetRegister(instr.gpr0); @@ -559,6 +588,12 @@ private: } } + // Close the predicate condition scope. + if (instr.pred != Pred::UnusedIndex) { + --shader.scope; + shader.AddLine('}'); + } + return offset + 1; } From 17a0ef1e1eb65ceb41232e694f779e1645e2b2d7 Mon Sep 17 00:00:00 2001 From: Subv Date: Fri, 20 Apr 2018 09:17:39 -0500 Subject: [PATCH 5/5] ShaderGen: Implemented the KIL instruction, which is equivalent to 'discard'. --- src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 7aaee9464e..2395945c3b 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -566,10 +566,16 @@ private: default: { switch (instr.opcode.EffectiveOpCode()) { case OpCode::Id::EXIT: { + ASSERT_MSG(instr.pred.pred_index == static_cast(Pred::UnusedIndex), + "Predicated exits not implemented"); shader.AddLine("return true;"); offset = PROGRAM_END - 1; break; } + case OpCode::Id::KIL: { + shader.AddLine("discard;"); + break; + } case OpCode::Id::IPA: { const auto& attribute = instr.attribute.fmt28; std::string dest = GetRegister(instr.gpr0); @@ -589,7 +595,7 @@ private: } // Close the predicate condition scope. - if (instr.pred != Pred::UnusedIndex) { + if (instr.pred.pred_index != static_cast(Pred::UnusedIndex)) { --shader.scope; shader.AddLine('}'); }