suyu/src/video_core/shader/decode/other.cpp
ReinUsesLisp 2339fe199f shader_decompiler: Remove FragCoord.w hack and change IPA implementation
Credits go to gdkchan and Ryujinx. The pull request used for this can
be found here: https://github.com/Ryujinx/Ryujinx/pull/1082

yuzu was already using the header for interpolation, but it was missing
the FragCoord.w multiplication described in the linked pull request.
This commit finally removes the FragCoord.w == 1.0f hack from the shader
decompiler.

While we are at it, this commit renames some enumerations to match
Nvidia's documentation (linked below) and fixes component declaration
order in the shader program header (z and w were swapped).

https://github.com/NVIDIA/open-gpu-doc/blob/master/Shader-Program-Header/Shader-Program-Header.html
2020-04-01 21:48:55 -03:00

286 lines
12 KiB
C++

// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/node_helper.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using std::move;
using Tegra::Shader::ConditionCode;
using Tegra::Shader::Instruction;
using Tegra::Shader::IpaInterpMode;
using Tegra::Shader::OpCode;
using Tegra::Shader::PixelImap;
using Tegra::Shader::Register;
using Tegra::Shader::SystemVariable;
using Index = Tegra::Shader::Attribute::Index;
u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
switch (opcode->get().GetId()) {
case OpCode::Id::NOP: {
UNIMPLEMENTED_IF(instr.nop.cc != Tegra::Shader::ConditionCode::T);
UNIMPLEMENTED_IF(instr.nop.trigger != 0);
// With the previous preconditions, this instruction is a no-operation.
break;
}
case OpCode::Id::EXIT: {
const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "EXIT condition code used: {}",
static_cast<u32>(cc));
switch (instr.flow.cond) {
case Tegra::Shader::FlowCondition::Always:
bb.push_back(Operation(OperationCode::Exit));
if (instr.pred.pred_index == static_cast<u64>(Tegra::Shader::Pred::UnusedIndex)) {
// If this is an unconditional exit then just end processing here,
// otherwise we have to account for the possibility of the condition
// not being met, so continue processing the next instruction.
pc = MAX_PROGRAM_LENGTH - 1;
}
break;
case Tegra::Shader::FlowCondition::Fcsm_Tr:
// TODO(bunnei): What is this used for? If we assume this conditon is not
// satisifed, dual vertex shaders in Farming Simulator make more sense
UNIMPLEMENTED_MSG("Skipping unknown FlowCondition::Fcsm_Tr");
break;
default:
UNIMPLEMENTED_MSG("Unhandled flow condition: {}",
static_cast<u32>(instr.flow.cond.Value()));
}
break;
}
case OpCode::Id::KIL: {
UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always);
const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "KIL condition code used: {}",
static_cast<u32>(cc));
bb.push_back(Operation(OperationCode::Discard));
break;
}
case OpCode::Id::MOV_SYS: {
const Node value = [this, instr] {
switch (instr.sys20) {
case SystemVariable::LaneId:
LOG_WARNING(HW_GPU, "MOV_SYS instruction with LaneId is incomplete");
return Immediate(0U);
case SystemVariable::InvocationId:
return Operation(OperationCode::InvocationId);
case SystemVariable::Ydirection:
return Operation(OperationCode::YNegate);
case SystemVariable::InvocationInfo:
LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete");
return Immediate(0U);
case SystemVariable::Tid: {
Node value = Immediate(0);
value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdX), 0, 9);
value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdY), 16, 9);
value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdZ), 26, 5);
return value;
}
case SystemVariable::TidX:
return Operation(OperationCode::LocalInvocationIdX);
case SystemVariable::TidY:
return Operation(OperationCode::LocalInvocationIdY);
case SystemVariable::TidZ:
return Operation(OperationCode::LocalInvocationIdZ);
case SystemVariable::CtaIdX:
return Operation(OperationCode::WorkGroupIdX);
case SystemVariable::CtaIdY:
return Operation(OperationCode::WorkGroupIdY);
case SystemVariable::CtaIdZ:
return Operation(OperationCode::WorkGroupIdZ);
default:
UNIMPLEMENTED_MSG("Unhandled system move: {}",
static_cast<u32>(instr.sys20.Value()));
return Immediate(0u);
}
}();
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::BRA: {
Node branch;
if (instr.bra.constant_buffer == 0) {
const u32 target = pc + instr.bra.GetBranchTarget();
branch = Operation(OperationCode::Branch, Immediate(target));
} else {
const u32 target = pc + 1;
const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset());
const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
PRECISE, op_a, Immediate(3));
const Node operand =
Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
branch = Operation(OperationCode::BranchIndirect, operand);
}
const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
if (cc != Tegra::Shader::ConditionCode::T) {
bb.push_back(Conditional(GetConditionCode(cc), {branch}));
} else {
bb.push_back(branch);
}
break;
}
case OpCode::Id::BRX: {
Node operand;
if (instr.brx.constant_buffer != 0) {
const s32 target = pc + 1;
const Node index = GetRegister(instr.gpr8);
const Node op_a =
GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index);
const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
PRECISE, op_a, Immediate(3));
operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
} else {
const s32 target = pc + instr.brx.GetBranchExtend();
const Node op_a = GetRegister(instr.gpr8);
const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
PRECISE, op_a, Immediate(3));
operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
}
const Node branch = Operation(OperationCode::BranchIndirect, operand);
const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
if (cc != Tegra::Shader::ConditionCode::T) {
bb.push_back(Conditional(GetConditionCode(cc), {branch}));
} else {
bb.push_back(branch);
}
break;
}
case OpCode::Id::SSY: {
UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
"Constant buffer flow is not supported");
if (disable_flow_stack) {
break;
}
// The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC.
const u32 target = pc + instr.bra.GetBranchTarget();
bb.push_back(
Operation(OperationCode::PushFlowStack, MetaStackClass::Ssy, Immediate(target)));
break;
}
case OpCode::Id::PBK: {
UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
"Constant buffer PBK is not supported");
if (disable_flow_stack) {
break;
}
// PBK pushes to a stack the address where BRK will jump to.
const u32 target = pc + instr.bra.GetBranchTarget();
bb.push_back(
Operation(OperationCode::PushFlowStack, MetaStackClass::Pbk, Immediate(target)));
break;
}
case OpCode::Id::SYNC: {
const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}",
static_cast<u32>(cc));
if (decompiled) {
break;
}
// The SYNC opcode jumps to the address previously set by the SSY opcode
bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy));
break;
}
case OpCode::Id::BRK: {
const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}",
static_cast<u32>(cc));
if (decompiled) {
break;
}
// The BRK opcode jumps to the address previously set by the PBK opcode
bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk));
break;
}
case OpCode::Id::IPA: {
const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff;
const auto attribute = instr.attribute.fmt28;
const Index index = attribute.index;
Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8)
: GetInputAttribute(index, attribute.element);
// Code taken from Ryujinx.
if (index >= Index::Attribute_0 && index <= Index::Attribute_31) {
const u32 location = static_cast<u32>(index) - static_cast<u32>(Index::Attribute_0);
if (header.ps.GetPixelImap(location) == PixelImap::Perspective) {
Node position_w = GetInputAttribute(Index::Position, 3);
value = Operation(OperationCode::FMul, move(value), move(position_w));
}
}
if (instr.ipa.interp_mode == IpaInterpMode::Multiply) {
value = Operation(OperationCode::FMul, move(value), GetRegister(instr.gpr20));
}
value = GetSaturatedFloat(move(value), instr.ipa.saturate);
SetRegister(bb, instr.gpr0, move(value));
break;
}
case OpCode::Id::OUT_R: {
UNIMPLEMENTED_IF_MSG(instr.gpr20.Value() != Register::ZeroIndex,
"Stream buffer is not supported");
if (instr.out.emit) {
// gpr0 is used to store the next address and gpr8 contains the address to emit.
// Hardware uses pointers here but we just ignore it
bb.push_back(Operation(OperationCode::EmitVertex));
SetRegister(bb, instr.gpr0, Immediate(0));
}
if (instr.out.cut) {
bb.push_back(Operation(OperationCode::EndPrimitive));
}
break;
}
case OpCode::Id::ISBERD: {
UNIMPLEMENTED_IF(instr.isberd.o != 0);
UNIMPLEMENTED_IF(instr.isberd.skew != 0);
UNIMPLEMENTED_IF(instr.isberd.shift != Tegra::Shader::IsberdShift::None);
UNIMPLEMENTED_IF(instr.isberd.mode != Tegra::Shader::IsberdMode::None);
LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete");
SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8));
break;
}
case OpCode::Id::MEMBAR: {
UNIMPLEMENTED_IF(instr.membar.type != Tegra::Shader::MembarType::GL);
UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default);
bb.push_back(Operation(OperationCode::MemoryBarrierGL));
break;
}
case OpCode::Id::DEPBAR: {
LOG_DEBUG(HW_GPU, "DEPBAR instruction is stubbed");
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName());
}
return pc;
}
} // namespace VideoCommon::Shader