mirror of
https://git.suyu.dev/suyu/suyu.git
synced 2025-01-10 17:51:01 +01:00
Shaders: Implemented multiple-word loads and stores to and from attribute memory.
This seems to be an optimization performed by nouveau.
This commit is contained in:
parent
df5a44a40b
commit
c878a819d7
2 changed files with 58 additions and 7 deletions
|
@ -67,6 +67,13 @@ private:
|
||||||
u64 value{};
|
u64 value{};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum class AttributeSize : u64 {
|
||||||
|
Word = 0,
|
||||||
|
DoubleWord = 1,
|
||||||
|
TripleWord = 2,
|
||||||
|
QuadWord = 3,
|
||||||
|
};
|
||||||
|
|
||||||
union Attribute {
|
union Attribute {
|
||||||
Attribute() = default;
|
Attribute() = default;
|
||||||
|
|
||||||
|
@ -87,9 +94,10 @@ union Attribute {
|
||||||
};
|
};
|
||||||
|
|
||||||
union {
|
union {
|
||||||
|
BitField<20, 10, u64> immediate;
|
||||||
BitField<22, 2, u64> element;
|
BitField<22, 2, u64> element;
|
||||||
BitField<24, 6, Index> index;
|
BitField<24, 6, Index> index;
|
||||||
BitField<47, 3, u64> size;
|
BitField<47, 3, AttributeSize> size;
|
||||||
} fmt20;
|
} fmt20;
|
||||||
|
|
||||||
union {
|
union {
|
||||||
|
|
|
@ -1772,13 +1772,34 @@ private:
|
||||||
case OpCode::Type::Memory: {
|
case OpCode::Type::Memory: {
|
||||||
switch (opcode->GetId()) {
|
switch (opcode->GetId()) {
|
||||||
case OpCode::Id::LD_A: {
|
case OpCode::Id::LD_A: {
|
||||||
ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested");
|
|
||||||
// Note: Shouldn't this be interp mode flat? As in no interpolation made.
|
// Note: Shouldn't this be interp mode flat? As in no interpolation made.
|
||||||
|
ASSERT_MSG(instr.gpr8.Value() == Register::ZeroIndex,
|
||||||
|
"Indirect attribute loads are not supported");
|
||||||
|
ASSERT_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) == 0,
|
||||||
|
"Unaligned attribute loads are not supported");
|
||||||
|
|
||||||
Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective,
|
Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective,
|
||||||
Tegra::Shader::IpaSampleMode::Default};
|
Tegra::Shader::IpaSampleMode::Default};
|
||||||
regs.SetRegisterToInputAttibute(instr.gpr0, instr.attribute.fmt20.element,
|
|
||||||
instr.attribute.fmt20.index, input_mode);
|
u32 next_element = instr.attribute.fmt20.element;
|
||||||
|
u32 next_index = static_cast<u32>(instr.attribute.fmt20.index.Value());
|
||||||
|
|
||||||
|
const auto LoadNextElement = [&](u32 reg_offset) {
|
||||||
|
regs.SetRegisterToInputAttibute(instr.gpr0.Value() + reg_offset, next_element,
|
||||||
|
static_cast<Attribute::Index>(next_index),
|
||||||
|
input_mode);
|
||||||
|
|
||||||
|
// Load the next attribute element into the following register. If the element
|
||||||
|
// to load goes beyond the vec4 size, load the first element of the next
|
||||||
|
// attribute.
|
||||||
|
next_element = (next_element + 1) % 4;
|
||||||
|
next_index = next_index + (next_element == 0 ? 1 : 0);
|
||||||
|
};
|
||||||
|
|
||||||
|
const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
|
||||||
|
for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
|
||||||
|
LoadNextElement(reg_offset);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case OpCode::Id::LD_C: {
|
case OpCode::Id::LD_C: {
|
||||||
|
@ -1820,9 +1841,31 @@ private:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case OpCode::Id::ST_A: {
|
case OpCode::Id::ST_A: {
|
||||||
ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested");
|
ASSERT_MSG(instr.gpr8.Value() == Register::ZeroIndex,
|
||||||
regs.SetOutputAttributeToRegister(instr.attribute.fmt20.index,
|
"Indirect attribute loads are not supported");
|
||||||
instr.attribute.fmt20.element, instr.gpr0);
|
ASSERT_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) == 0,
|
||||||
|
"Unaligned attribute loads are not supported");
|
||||||
|
|
||||||
|
u32 next_element = instr.attribute.fmt20.element;
|
||||||
|
u32 next_index = static_cast<u32>(instr.attribute.fmt20.index.Value());
|
||||||
|
|
||||||
|
const auto StoreNextElement = [&](u32 reg_offset) {
|
||||||
|
regs.SetOutputAttributeToRegister(static_cast<Attribute::Index>(next_index),
|
||||||
|
next_element,
|
||||||
|
instr.gpr0.Value() + reg_offset);
|
||||||
|
|
||||||
|
// Load the next attribute element into the following register. If the element
|
||||||
|
// to load goes beyond the vec4 size, load the first element of the next
|
||||||
|
// attribute.
|
||||||
|
next_element = (next_element + 1) % 4;
|
||||||
|
next_index = next_index + (next_element == 0 ? 1 : 0);
|
||||||
|
};
|
||||||
|
|
||||||
|
const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
|
||||||
|
for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
|
||||||
|
StoreNextElement(reg_offset);
|
||||||
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case OpCode::Id::TEX: {
|
case OpCode::Id::TEX: {
|
||||||
|
|
Loading…
Reference in a new issue