From 6c4985edc9e05dbd9a93ba670625daee98c14bcf Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 14 May 2019 17:16:23 -0300 Subject: [PATCH 1/2] shader/half_set_predicate: Implement missing HSETP2 variants --- src/video_core/engines/shader_bytecode.h | 26 +++++++++--- .../shader/decode/half_set_predicate.cpp | 42 +++++++++++++------ 2 files changed, 49 insertions(+), 19 deletions(-) diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 79d469b88b..8520a01433 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -931,8 +931,6 @@ union Instruction { } csetp; union { - BitField<35, 4, PredCondition> cond; - BitField<49, 1, u64> h_and; BitField<6, 1, u64> ftz; BitField<45, 2, PredOperation> op; BitField<3, 3, u64> pred3; @@ -940,9 +938,21 @@ union Instruction { BitField<43, 1, u64> negate_a; BitField<44, 1, u64> abs_a; BitField<47, 2, HalfType> type_a; - BitField<31, 1, u64> negate_b; - BitField<30, 1, u64> abs_b; - BitField<28, 2, HalfType> type_b; + union { + BitField<35, 4, PredCondition> cond; + BitField<49, 1, u64> h_and; + BitField<31, 1, u64> negate_b; + BitField<30, 1, u64> abs_b; + BitField<28, 2, HalfType> type_b; + } reg; + union { + BitField<56, 1, u64> negate_b; + BitField<54, 1, u64> abs_b; + } cbuf; + union { + BitField<49, 4, PredCondition> cond; + BitField<53, 1, u64> h_and; + } cbuf_and_imm; BitField<42, 1, u64> neg_pred; BitField<39, 3, u64> pred39; } hsetp2; @@ -1548,7 +1558,9 @@ public: HFMA2_RC, HFMA2_RR, HFMA2_IMM_R, + HSETP2_C, HSETP2_R, + HSETP2_IMM, HSET2_R, POPC_C, POPC_R, @@ -1831,7 +1843,9 @@ private: INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"), INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"), INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"), - INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP_R"), + INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"), + INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"), + INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"), INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp index d59d15bd8a..ff41fb2b5b 100644 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ b/src/video_core/shader/decode/half_set_predicate.cpp @@ -23,17 +23,33 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); - Node op_b = [&]() { - switch (opcode->get().GetId()) { - case OpCode::Id::HSETP2_R: - return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a, - instr.hsetp2.negate_b); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - op_b = UnpackHalfFloat(op_b, instr.hsetp2.type_b); + Tegra::Shader::PredCondition cond{}; + bool h_and{}; + Node op_b{}; + switch (opcode->get().GetId()) { + case OpCode::Id::HSETP2_C: + cond = instr.hsetp2.cbuf_and_imm.cond; + h_and = instr.hsetp2.cbuf_and_imm.h_and; + op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), + instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b); + break; + case OpCode::Id::HSETP2_IMM: + cond = instr.hsetp2.cbuf_and_imm.cond; + h_and = instr.hsetp2.cbuf_and_imm.h_and; + op_b = UnpackHalfImmediate(instr, true); + break; + case OpCode::Id::HSETP2_R: + cond = instr.hsetp2.reg.cond; + h_and = instr.hsetp2.reg.h_and; + op_b = + UnpackHalfFloat(GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.reg.abs_b, + instr.hsetp2.reg.negate_b), + instr.hsetp2.reg.type_b); + break; + default: + UNREACHABLE(); + op_b = Immediate(0); + } // We can't use the constant predicate as destination. ASSERT(instr.hsetp2.pred3 != static_cast(Pred::UnusedIndex)); @@ -42,9 +58,9 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); const OperationCode pair_combiner = - instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2; + h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2; - const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, op_a, op_b); + const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b); const Node first_pred = Operation(pair_combiner, comparison); // Set the primary predicate to the result of Predicate OP SecondPredicate From 45c162444d357e18b333e94973a39b9d71c7604d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 19 Jul 2019 22:20:34 -0300 Subject: [PATCH 2/2] shader/half_set_predicate: Fix HSETP2 implementation --- .../renderer_opengl/gl_shader_decompiler.cpp | 16 +++------- .../renderer_vulkan/vk_shader_decompiler.cpp | 17 +++------- .../shader/decode/half_set_predicate.cpp | 31 +++++++++---------- src/video_core/shader/node.h | 3 +- 4 files changed, 23 insertions(+), 44 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 119073776a..e19d502bc1 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -254,10 +254,6 @@ public: } private: - using OperationDecompilerFn = std::string (GLSLDecompiler::*)(Operation); - using OperationDecompilersArray = - std::array(OperationCode::Amount)>; - void DeclareVertex() { if (stage != ShaderStage::Vertex) return; @@ -1400,14 +1396,10 @@ private: return fmt::format("{}[{}]", pair, VisitOperand(operation, 1, Type::Uint)); } - std::string LogicalAll2(Operation operation) { + std::string LogicalAnd2(Operation operation) { return GenerateUnary(operation, "all", Type::Bool, Type::Bool2); } - std::string LogicalAny2(Operation operation) { - return GenerateUnary(operation, "any", Type::Bool, Type::Bool2); - } - template std::string GenerateHalfComparison(Operation operation, const std::string& compare_op) { const std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2, @@ -1714,7 +1706,7 @@ private: return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')'; } - static constexpr OperationDecompilersArray operation_decompilers = { + static constexpr std::array operation_decompilers = { &GLSLDecompiler::Assign, &GLSLDecompiler::Select, @@ -1798,8 +1790,7 @@ private: &GLSLDecompiler::LogicalXor, &GLSLDecompiler::LogicalNegate, &GLSLDecompiler::LogicalPick2, - &GLSLDecompiler::LogicalAll2, - &GLSLDecompiler::LogicalAny2, + &GLSLDecompiler::LogicalAnd2, &GLSLDecompiler::LogicalLessThan, &GLSLDecompiler::LogicalEqual, @@ -1863,6 +1854,7 @@ private: &GLSLDecompiler::WorkGroupId<1>, &GLSLDecompiler::WorkGroupId<2>, }; + static_assert(operation_decompilers.size() == static_cast(OperationCode::Amount)); std::string GetRegister(u32 index) const { return GetDeclarationWithSuffix(index, "gpr"); diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 9b2d8e987d..d267712c9f 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -205,10 +205,6 @@ public: } private: - using OperationDecompilerFn = Id (SPIRVDecompiler::*)(Operation); - using OperationDecompilersArray = - std::array(OperationCode::Amount)>; - static constexpr auto INTERNAL_FLAGS_COUNT = static_cast(InternalFlag::Amount); void AllocateBindings() { @@ -804,12 +800,7 @@ private: return {}; } - Id LogicalAll2(Operation operation) { - UNIMPLEMENTED(); - return {}; - } - - Id LogicalAny2(Operation operation) { + Id LogicalAnd2(Operation operation) { UNIMPLEMENTED(); return {}; } @@ -1206,7 +1197,7 @@ private: return {}; } - static constexpr OperationDecompilersArray operation_decompilers = { + static constexpr std::array operation_decompilers = { &SPIRVDecompiler::Assign, &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float, @@ -1291,8 +1282,7 @@ private: &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>, &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>, &SPIRVDecompiler::LogicalPick2, - &SPIRVDecompiler::LogicalAll2, - &SPIRVDecompiler::LogicalAny2, + &SPIRVDecompiler::LogicalAnd2, &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>, &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>, @@ -1357,6 +1347,7 @@ private: &SPIRVDecompiler::WorkGroupId<1>, &SPIRVDecompiler::WorkGroupId<2>, }; + static_assert(operation_decompilers.size() == static_cast(OperationCode::Amount)); const VKDevice& device; const ShaderIR& ir; diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp index ff41fb2b5b..ad180d6dfd 100644 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ b/src/video_core/shader/decode/half_set_predicate.cpp @@ -51,26 +51,23 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { op_b = Immediate(0); } - // We can't use the constant predicate as destination. - ASSERT(instr.hsetp2.pred3 != static_cast(Pred::UnusedIndex)); - - const Node second_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0); - const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); - const OperationCode pair_combiner = - h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2; + const Node pred39 = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred); + + const auto Write = [&](u64 dest, Node src) { + SetPredicate(bb, dest, Operation(combiner, std::move(src), pred39)); + }; const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b); - const Node first_pred = Operation(pair_combiner, comparison); - - // Set the primary predicate to the result of Predicate OP SecondPredicate - const Node value = Operation(combiner, first_pred, second_pred); - SetPredicate(bb, instr.hsetp2.pred3, value); - - if (instr.hsetp2.pred0 != static_cast(Pred::UnusedIndex)) { - // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled - const Node negated_pred = Operation(OperationCode::LogicalNegate, first_pred); - SetPredicate(bb, instr.hsetp2.pred0, Operation(combiner, negated_pred, second_pred)); + const u64 first = instr.hsetp2.pred0; + const u64 second = instr.hsetp2.pred3; + if (h_and) { + const Node joined = Operation(OperationCode::LogicalAnd2, comparison); + Write(first, joined); + Write(second, Operation(OperationCode::LogicalNegate, joined)); + } else { + Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0u))); + Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1u))); } return pc; diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 7427ed896d..715184d67a 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -101,8 +101,7 @@ enum class OperationCode { LogicalXor, /// (bool a, bool b) -> bool LogicalNegate, /// (bool a) -> bool LogicalPick2, /// (bool2 pair, uint index) -> bool - LogicalAll2, /// (bool2 a) -> bool - LogicalAny2, /// (bool2 a) -> bool + LogicalAnd2, /// (bool2 a) -> bool LogicalFLessThan, /// (float a, float b) -> bool LogicalFEqual, /// (float a, float b) -> bool