commit
ba0bfe7d82
4 changed files with 118 additions and 20 deletions
2
externals/nihstro
vendored
2
externals/nihstro
vendored
|
@ -1 +1 @@
|
||||||
Subproject commit 4a78588b308564f7ebae193e0ae00d9a0d5741d5
|
Subproject commit 81f1804a43f625e3a1a20752c0db70a413410380
|
|
@ -226,7 +226,8 @@ struct Regs {
|
||||||
Texture1 = 0x4,
|
Texture1 = 0x4,
|
||||||
Texture2 = 0x5,
|
Texture2 = 0x5,
|
||||||
Texture3 = 0x6,
|
Texture3 = 0x6,
|
||||||
// 0x7-0xc = primary color??
|
|
||||||
|
PreviousBuffer = 0xd,
|
||||||
Constant = 0xe,
|
Constant = 0xe,
|
||||||
Previous = 0xf,
|
Previous = 0xf,
|
||||||
};
|
};
|
||||||
|
@ -299,7 +300,18 @@ struct Regs {
|
||||||
BitField<24, 8, u32> const_a;
|
BitField<24, 8, u32> const_a;
|
||||||
};
|
};
|
||||||
|
|
||||||
INSERT_PADDING_WORDS(0x1);
|
union {
|
||||||
|
BitField< 0, 2, u32> color_scale;
|
||||||
|
BitField<16, 2, u32> alpha_scale;
|
||||||
|
};
|
||||||
|
|
||||||
|
inline unsigned GetColorMultiplier() const {
|
||||||
|
return (color_scale < 3) ? (1 << color_scale) : 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline unsigned GetAlphaMultiplier() const {
|
||||||
|
return (alpha_scale < 3) ? (1 << alpha_scale) : 1;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
TevStageConfig tev_stage0;
|
TevStageConfig tev_stage0;
|
||||||
|
@ -309,11 +321,36 @@ struct Regs {
|
||||||
TevStageConfig tev_stage2;
|
TevStageConfig tev_stage2;
|
||||||
INSERT_PADDING_WORDS(0x3);
|
INSERT_PADDING_WORDS(0x3);
|
||||||
TevStageConfig tev_stage3;
|
TevStageConfig tev_stage3;
|
||||||
INSERT_PADDING_WORDS(0x13);
|
INSERT_PADDING_WORDS(0x3);
|
||||||
|
|
||||||
|
union {
|
||||||
|
// Tev stages 0-3 write their output to the combiner buffer if the corresponding bit in
|
||||||
|
// these masks are set
|
||||||
|
BitField< 8, 4, u32> update_mask_rgb;
|
||||||
|
BitField<12, 4, u32> update_mask_a;
|
||||||
|
|
||||||
|
bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const {
|
||||||
|
return (stage_index < 4) && (update_mask_rgb & (1 << stage_index));
|
||||||
|
}
|
||||||
|
|
||||||
|
bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const {
|
||||||
|
return (stage_index < 4) && (update_mask_a & (1 << stage_index));
|
||||||
|
}
|
||||||
|
} tev_combiner_buffer_input;
|
||||||
|
|
||||||
|
INSERT_PADDING_WORDS(0xf);
|
||||||
TevStageConfig tev_stage4;
|
TevStageConfig tev_stage4;
|
||||||
INSERT_PADDING_WORDS(0x3);
|
INSERT_PADDING_WORDS(0x3);
|
||||||
TevStageConfig tev_stage5;
|
TevStageConfig tev_stage5;
|
||||||
INSERT_PADDING_WORDS(0x3);
|
|
||||||
|
union {
|
||||||
|
BitField< 0, 8, u32> r;
|
||||||
|
BitField< 8, 8, u32> g;
|
||||||
|
BitField<16, 8, u32> b;
|
||||||
|
BitField<24, 8, u32> a;
|
||||||
|
} tev_combiner_buffer_color;
|
||||||
|
|
||||||
|
INSERT_PADDING_WORDS(0x2);
|
||||||
|
|
||||||
const std::array<Regs::TevStageConfig,6> GetTevStages() const {
|
const std::array<Regs::TevStageConfig,6> GetTevStages() const {
|
||||||
return { tev_stage0, tev_stage1,
|
return { tev_stage0, tev_stage1,
|
||||||
|
@ -426,9 +463,7 @@ struct Regs {
|
||||||
D24S8 = 3
|
D24S8 = 3
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
// Returns the number of bytes in the specified depth format
|
||||||
* Returns the number of bytes in the specified depth format
|
|
||||||
*/
|
|
||||||
static u32 BytesPerDepthPixel(DepthFormat format) {
|
static u32 BytesPerDepthPixel(DepthFormat format) {
|
||||||
switch (format) {
|
switch (format) {
|
||||||
case DepthFormat::D16:
|
case DepthFormat::D16:
|
||||||
|
@ -443,6 +478,20 @@ struct Regs {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Returns the number of bits per depth component of the specified depth format
|
||||||
|
static u32 DepthBitsPerPixel(DepthFormat format) {
|
||||||
|
switch (format) {
|
||||||
|
case DepthFormat::D16:
|
||||||
|
return 16;
|
||||||
|
case DepthFormat::D24:
|
||||||
|
case DepthFormat::D24S8:
|
||||||
|
return 24;
|
||||||
|
default:
|
||||||
|
LOG_CRITICAL(HW_GPU, "Unknown depth format %u", format);
|
||||||
|
UNIMPLEMENTED();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
// Components are laid out in reverse byte order, most significant bits first.
|
// Components are laid out in reverse byte order, most significant bits first.
|
||||||
enum ColorFormat : u32 {
|
enum ColorFormat : u32 {
|
||||||
|
@ -784,8 +833,10 @@ struct Regs {
|
||||||
ADD_FIELD(tev_stage1);
|
ADD_FIELD(tev_stage1);
|
||||||
ADD_FIELD(tev_stage2);
|
ADD_FIELD(tev_stage2);
|
||||||
ADD_FIELD(tev_stage3);
|
ADD_FIELD(tev_stage3);
|
||||||
|
ADD_FIELD(tev_combiner_buffer_input);
|
||||||
ADD_FIELD(tev_stage4);
|
ADD_FIELD(tev_stage4);
|
||||||
ADD_FIELD(tev_stage5);
|
ADD_FIELD(tev_stage5);
|
||||||
|
ADD_FIELD(tev_combiner_buffer_color);
|
||||||
ADD_FIELD(output_merger);
|
ADD_FIELD(output_merger);
|
||||||
ADD_FIELD(framebuffer);
|
ADD_FIELD(framebuffer);
|
||||||
ADD_FIELD(vertex_attributes);
|
ADD_FIELD(vertex_attributes);
|
||||||
|
@ -859,8 +910,10 @@ ASSERT_REG_POSITION(tev_stage0, 0xc0);
|
||||||
ASSERT_REG_POSITION(tev_stage1, 0xc8);
|
ASSERT_REG_POSITION(tev_stage1, 0xc8);
|
||||||
ASSERT_REG_POSITION(tev_stage2, 0xd0);
|
ASSERT_REG_POSITION(tev_stage2, 0xd0);
|
||||||
ASSERT_REG_POSITION(tev_stage3, 0xd8);
|
ASSERT_REG_POSITION(tev_stage3, 0xd8);
|
||||||
|
ASSERT_REG_POSITION(tev_combiner_buffer_input, 0xe0);
|
||||||
ASSERT_REG_POSITION(tev_stage4, 0xf0);
|
ASSERT_REG_POSITION(tev_stage4, 0xf0);
|
||||||
ASSERT_REG_POSITION(tev_stage5, 0xf8);
|
ASSERT_REG_POSITION(tev_stage5, 0xf8);
|
||||||
|
ASSERT_REG_POSITION(tev_combiner_buffer_color, 0xfd);
|
||||||
ASSERT_REG_POSITION(output_merger, 0x100);
|
ASSERT_REG_POSITION(output_merger, 0x100);
|
||||||
ASSERT_REG_POSITION(framebuffer, 0x110);
|
ASSERT_REG_POSITION(framebuffer, 0x110);
|
||||||
ASSERT_REG_POSITION(vertex_attributes, 0x200);
|
ASSERT_REG_POSITION(vertex_attributes, 0x200);
|
||||||
|
|
|
@ -90,7 +90,7 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
|
||||||
UNIMPLEMENTED();
|
UNIMPLEMENTED();
|
||||||
}
|
}
|
||||||
|
|
||||||
return {};
|
return {0, 0, 0, 0};
|
||||||
}
|
}
|
||||||
|
|
||||||
static u32 GetDepth(int x, int y) {
|
static u32 GetDepth(int x, int y) {
|
||||||
|
@ -376,7 +376,13 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
|
||||||
// with some basic arithmetic. Alpha combiners can be configured separately but work
|
// with some basic arithmetic. Alpha combiners can be configured separately but work
|
||||||
// analogously.
|
// analogously.
|
||||||
Math::Vec4<u8> combiner_output;
|
Math::Vec4<u8> combiner_output;
|
||||||
for (const auto& tev_stage : tev_stages) {
|
Math::Vec4<u8> combiner_buffer = {
|
||||||
|
registers.tev_combiner_buffer_color.r, registers.tev_combiner_buffer_color.g,
|
||||||
|
registers.tev_combiner_buffer_color.b, registers.tev_combiner_buffer_color.a
|
||||||
|
};
|
||||||
|
|
||||||
|
for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) {
|
||||||
|
const auto& tev_stage = tev_stages[tev_stage_index];
|
||||||
using Source = Regs::TevStageConfig::Source;
|
using Source = Regs::TevStageConfig::Source;
|
||||||
using ColorModifier = Regs::TevStageConfig::ColorModifier;
|
using ColorModifier = Regs::TevStageConfig::ColorModifier;
|
||||||
using AlphaModifier = Regs::TevStageConfig::AlphaModifier;
|
using AlphaModifier = Regs::TevStageConfig::AlphaModifier;
|
||||||
|
@ -398,6 +404,9 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
|
||||||
case Source::Texture2:
|
case Source::Texture2:
|
||||||
return texture_color[2];
|
return texture_color[2];
|
||||||
|
|
||||||
|
case Source::PreviousBuffer:
|
||||||
|
return combiner_buffer;
|
||||||
|
|
||||||
case Source::Constant:
|
case Source::Constant:
|
||||||
return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b, tev_stage.const_a};
|
return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b, tev_stage.const_a};
|
||||||
|
|
||||||
|
@ -407,7 +416,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
|
||||||
default:
|
default:
|
||||||
LOG_ERROR(HW_GPU, "Unknown color combiner source %d\n", (int)source);
|
LOG_ERROR(HW_GPU, "Unknown color combiner source %d\n", (int)source);
|
||||||
UNIMPLEMENTED();
|
UNIMPLEMENTED();
|
||||||
return {};
|
return {0, 0, 0, 0};
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -490,6 +499,16 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
|
||||||
return result.Cast<u8>();
|
return result.Cast<u8>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case Operation::AddSigned:
|
||||||
|
{
|
||||||
|
// TODO(bunnei): Verify that the color conversion from (float) 0.5f to (byte) 128 is correct
|
||||||
|
auto result = input[0].Cast<int>() + input[1].Cast<int>() - Math::MakeVec<int>(128, 128, 128);
|
||||||
|
result.r() = MathUtil::Clamp<int>(result.r(), 0, 255);
|
||||||
|
result.g() = MathUtil::Clamp<int>(result.g(), 0, 255);
|
||||||
|
result.b() = MathUtil::Clamp<int>(result.b(), 0, 255);
|
||||||
|
return result.Cast<u8>();
|
||||||
|
}
|
||||||
|
|
||||||
case Operation::Lerp:
|
case Operation::Lerp:
|
||||||
return ((input[0] * input[2] + input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / 255).Cast<u8>();
|
return ((input[0] * input[2] + input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / 255).Cast<u8>();
|
||||||
|
|
||||||
|
@ -524,7 +543,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
|
||||||
default:
|
default:
|
||||||
LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op);
|
LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op);
|
||||||
UNIMPLEMENTED();
|
UNIMPLEMENTED();
|
||||||
return {};
|
return {0, 0, 0};
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -578,7 +597,20 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
|
||||||
};
|
};
|
||||||
auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result);
|
auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result);
|
||||||
|
|
||||||
combiner_output = Math::MakeVec(color_output, alpha_output);
|
combiner_output[0] = std::min((unsigned)255, color_output.r() * tev_stage.GetColorMultiplier());
|
||||||
|
combiner_output[1] = std::min((unsigned)255, color_output.g() * tev_stage.GetColorMultiplier());
|
||||||
|
combiner_output[2] = std::min((unsigned)255, color_output.b() * tev_stage.GetColorMultiplier());
|
||||||
|
combiner_output[3] = std::min((unsigned)255, alpha_output * tev_stage.GetAlphaMultiplier());
|
||||||
|
|
||||||
|
if (registers.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(tev_stage_index)) {
|
||||||
|
combiner_buffer.r() = combiner_output.r();
|
||||||
|
combiner_buffer.g() = combiner_output.g();
|
||||||
|
combiner_buffer.b() = combiner_output.b();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (registers.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(tev_stage_index)) {
|
||||||
|
combiner_buffer.a() = combiner_output.a();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (registers.output_merger.alpha_test.enable) {
|
if (registers.output_merger.alpha_test.enable) {
|
||||||
|
@ -624,9 +656,10 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
|
||||||
|
|
||||||
// TODO: Does depth indeed only get written even if depth testing is enabled?
|
// TODO: Does depth indeed only get written even if depth testing is enabled?
|
||||||
if (registers.output_merger.depth_test_enable) {
|
if (registers.output_merger.depth_test_enable) {
|
||||||
u16 z = (u16)((v0.screenpos[2].ToFloat32() * w0 +
|
unsigned num_bits = Pica::Regs::DepthBitsPerPixel(registers.framebuffer.depth_format);
|
||||||
v1.screenpos[2].ToFloat32() * w1 +
|
u32 z = (u32)((v0.screenpos[2].ToFloat32() * w0 +
|
||||||
v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum);
|
v1.screenpos[2].ToFloat32() * w1 +
|
||||||
|
v2.screenpos[2].ToFloat32() * w2) * ((1 << num_bits) - 1) / wsum);
|
||||||
u32 ref_z = GetDepth(x >> 4, y >> 4);
|
u32 ref_z = GetDepth(x >> 4, y >> 4);
|
||||||
|
|
||||||
bool pass = false;
|
bool pass = false;
|
||||||
|
|
|
@ -235,6 +235,15 @@ static void ProcessShaderCode(VertexShaderState& state) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case OpCode::Id::FLR:
|
||||||
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
if (!swizzle.DestComponentEnabled(i))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
dest[i] = float24::FromFloat32(std::floor(src1[i].ToFloat32()));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
case OpCode::Id::MAX:
|
case OpCode::Id::MAX:
|
||||||
for (int i = 0; i < 4; ++i) {
|
for (int i = 0; i < 4; ++i) {
|
||||||
if (!swizzle.DestComponentEnabled(i))
|
if (!swizzle.DestComponentEnabled(i))
|
||||||
|
@ -366,12 +375,15 @@ static void ProcessShaderCode(VertexShaderState& state) {
|
||||||
|
|
||||||
case OpCode::Type::MultiplyAdd:
|
case OpCode::Type::MultiplyAdd:
|
||||||
{
|
{
|
||||||
if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) {
|
if ((instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) ||
|
||||||
|
(instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI)) {
|
||||||
const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.mad.operand_desc_id];
|
const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.mad.operand_desc_id];
|
||||||
|
|
||||||
const float24* src1_ = LookupSourceRegister(instr.mad.src1);
|
bool is_inverted = (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI);
|
||||||
const float24* src2_ = LookupSourceRegister(instr.mad.src2);
|
|
||||||
const float24* src3_ = LookupSourceRegister(instr.mad.src3);
|
const float24* src1_ = LookupSourceRegister(instr.mad.GetSrc1(is_inverted));
|
||||||
|
const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted));
|
||||||
|
const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted));
|
||||||
|
|
||||||
const bool negate_src1 = ((bool)swizzle.negate_src1 != false);
|
const bool negate_src1 = ((bool)swizzle.negate_src1 != false);
|
||||||
const bool negate_src2 = ((bool)swizzle.negate_src2 != false);
|
const bool negate_src2 = ((bool)swizzle.negate_src2 != false);
|
||||||
|
|
Loading…
Reference in a new issue