From f9026e8a7a53073340f7188f433f81fe84a16976 Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Sun, 29 Jan 2017 18:10:26 -0800 Subject: [PATCH] SWRasterizer: Convert large no-capture lambdas to standalone functions --- src/video_core/swrasterizer/rasterizer.cpp | 625 ++++++++++----------- 1 file changed, 310 insertions(+), 315 deletions(-) diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp index cb11338b7..7044a6136 100644 --- a/src/video_core/swrasterizer/rasterizer.cpp +++ b/src/video_core/swrasterizer/rasterizer.cpp @@ -28,9 +28,318 @@ #include "video_core/utils.h" namespace Pica { - namespace Rasterizer { +using TevStageConfig = TexturingRegs::TevStageConfig; + +static int GetWrappedTexCoord(TexturingRegs::TextureConfig::WrapMode mode, int val, unsigned size) { + switch (mode) { + case TexturingRegs::TextureConfig::ClampToEdge: + val = std::max(val, 0); + val = std::min(val, (int)size - 1); + return val; + + case TexturingRegs::TextureConfig::ClampToBorder: + return val; + + case TexturingRegs::TextureConfig::Repeat: + return (int)((unsigned)val % size); + + case TexturingRegs::TextureConfig::MirroredRepeat: { + unsigned int coord = ((unsigned)val % (2 * size)); + if (coord >= size) + coord = 2 * size - 1 - coord; + return (int)coord; + } + + default: + LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x", (int)mode); + UNIMPLEMENTED(); + return 0; + } +}; + +static Math::Vec3 GetColorModifier(TevStageConfig::ColorModifier factor, + const Math::Vec4& values) { + using ColorModifier = TevStageConfig::ColorModifier; + + switch (factor) { + case ColorModifier::SourceColor: + return values.rgb(); + + case ColorModifier::OneMinusSourceColor: + return (Math::Vec3(255, 255, 255) - values.rgb()).Cast(); + + case ColorModifier::SourceAlpha: + return values.aaa(); + + case ColorModifier::OneMinusSourceAlpha: + return (Math::Vec3(255, 255, 255) - values.aaa()).Cast(); + + case ColorModifier::SourceRed: + return values.rrr(); + + case ColorModifier::OneMinusSourceRed: + return (Math::Vec3(255, 255, 255) - values.rrr()).Cast(); + + case ColorModifier::SourceGreen: + return values.ggg(); + + case ColorModifier::OneMinusSourceGreen: + return (Math::Vec3(255, 255, 255) - values.ggg()).Cast(); + + case ColorModifier::SourceBlue: + return values.bbb(); + + case ColorModifier::OneMinusSourceBlue: + return (Math::Vec3(255, 255, 255) - values.bbb()).Cast(); + } +}; + +static u8 GetAlphaModifier(TevStageConfig::AlphaModifier factor, const Math::Vec4& values) { + using AlphaModifier = TevStageConfig::AlphaModifier; + + switch (factor) { + case AlphaModifier::SourceAlpha: + return values.a(); + + case AlphaModifier::OneMinusSourceAlpha: + return 255 - values.a(); + + case AlphaModifier::SourceRed: + return values.r(); + + case AlphaModifier::OneMinusSourceRed: + return 255 - values.r(); + + case AlphaModifier::SourceGreen: + return values.g(); + + case AlphaModifier::OneMinusSourceGreen: + return 255 - values.g(); + + case AlphaModifier::SourceBlue: + return values.b(); + + case AlphaModifier::OneMinusSourceBlue: + return 255 - values.b(); + } +}; + +static Math::Vec3 ColorCombine(TevStageConfig::Operation op, const Math::Vec3 input[3]) { + using Operation = TevStageConfig::Operation; + + switch (op) { + case Operation::Replace: + return input[0]; + + case Operation::Modulate: + return ((input[0] * input[1]) / 255).Cast(); + + case Operation::Add: { + auto result = input[0] + input[1]; + result.r() = std::min(255, result.r()); + result.g() = std::min(255, result.g()); + result.b() = std::min(255, result.b()); + return result.Cast(); + } + + case Operation::AddSigned: { + // TODO(bunnei): Verify that the color conversion from (float) 0.5f to + // (byte) 128 is correct + auto result = + input[0].Cast() + input[1].Cast() - Math::MakeVec(128, 128, 128); + result.r() = MathUtil::Clamp(result.r(), 0, 255); + result.g() = MathUtil::Clamp(result.g(), 0, 255); + result.b() = MathUtil::Clamp(result.b(), 0, 255); + return result.Cast(); + } + + case Operation::Lerp: + return ((input[0] * input[2] + + input[1] * (Math::MakeVec(255, 255, 255) - input[2]).Cast()) / + 255) + .Cast(); + + case Operation::Subtract: { + auto result = input[0].Cast() - input[1].Cast(); + result.r() = std::max(0, result.r()); + result.g() = std::max(0, result.g()); + result.b() = std::max(0, result.b()); + return result.Cast(); + } + + case Operation::MultiplyThenAdd: { + auto result = (input[0] * input[1] + 255 * input[2].Cast()) / 255; + result.r() = std::min(255, result.r()); + result.g() = std::min(255, result.g()); + result.b() = std::min(255, result.b()); + return result.Cast(); + } + + case Operation::AddThenMultiply: { + auto result = input[0] + input[1]; + result.r() = std::min(255, result.r()); + result.g() = std::min(255, result.g()); + result.b() = std::min(255, result.b()); + result = (result * input[2].Cast()) / 255; + return result.Cast(); + } + case Operation::Dot3_RGB: { + // Not fully accurate. Worst case scenario seems to yield a +/-3 error. Some HW results + // indicate that the per-component computation can't have a higher precision than 1/256, + // while dot3_rgb((0x80,g0,b0), (0x7F,g1,b1)) and dot3_rgb((0x80,g0,b0), (0x80,g1,b1)) give + // different results. + int result = ((input[0].r() * 2 - 255) * (input[1].r() * 2 - 255) + 128) / 256 + + ((input[0].g() * 2 - 255) * (input[1].g() * 2 - 255) + 128) / 256 + + ((input[0].b() * 2 - 255) * (input[1].b() * 2 - 255) + 128) / 256; + result = std::max(0, std::min(255, result)); + return {(u8)result, (u8)result, (u8)result}; + } + default: + LOG_ERROR(HW_GPU, "Unknown color combiner operation %d", (int)op); + UNIMPLEMENTED(); + return {0, 0, 0}; + } +}; + +static u8 AlphaCombine(TevStageConfig::Operation op, const std::array& input) { + switch (op) { + using Operation = TevStageConfig::Operation; + case Operation::Replace: + return input[0]; + + case Operation::Modulate: + return input[0] * input[1] / 255; + + case Operation::Add: + return std::min(255, input[0] + input[1]); + + case Operation::AddSigned: { + // TODO(bunnei): Verify that the color conversion from (float) 0.5f to (byte) 128 is correct + auto result = static_cast(input[0]) + static_cast(input[1]) - 128; + return static_cast(MathUtil::Clamp(result, 0, 255)); + } + + case Operation::Lerp: + return (input[0] * input[2] + input[1] * (255 - input[2])) / 255; + + case Operation::Subtract: + return std::max(0, (int)input[0] - (int)input[1]); + + case Operation::MultiplyThenAdd: + return std::min(255, (input[0] * input[1] + 255 * input[2]) / 255); + + case Operation::AddThenMultiply: + return (std::min(255, (input[0] + input[1])) * input[2]) / 255; + + default: + LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d", (int)op); + UNIMPLEMENTED(); + return 0; + } +}; + +static Math::Vec4 EvaluateBlendEquation(const Math::Vec4& src, + const Math::Vec4& srcfactor, + const Math::Vec4& dest, + const Math::Vec4& destfactor, + FramebufferRegs::BlendEquation equation) { + Math::Vec4 result; + + auto src_result = (src * srcfactor).Cast(); + auto dst_result = (dest * destfactor).Cast(); + + switch (equation) { + case FramebufferRegs::BlendEquation::Add: + result = (src_result + dst_result) / 255; + break; + + case FramebufferRegs::BlendEquation::Subtract: + result = (src_result - dst_result) / 255; + break; + + case FramebufferRegs::BlendEquation::ReverseSubtract: + result = (dst_result - src_result) / 255; + break; + + // TODO: How do these two actually work? OpenGL doesn't include the blend factors in the + // min/max computations, but is this what the 3DS actually does? + case FramebufferRegs::BlendEquation::Min: + result.r() = std::min(src.r(), dest.r()); + result.g() = std::min(src.g(), dest.g()); + result.b() = std::min(src.b(), dest.b()); + result.a() = std::min(src.a(), dest.a()); + break; + + case FramebufferRegs::BlendEquation::Max: + result.r() = std::max(src.r(), dest.r()); + result.g() = std::max(src.g(), dest.g()); + result.b() = std::max(src.b(), dest.b()); + result.a() = std::max(src.a(), dest.a()); + break; + + default: + LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation %x", equation); + UNIMPLEMENTED(); + } + + return Math::Vec4(MathUtil::Clamp(result.r(), 0, 255), MathUtil::Clamp(result.g(), 0, 255), + MathUtil::Clamp(result.b(), 0, 255), MathUtil::Clamp(result.a(), 0, 255)); +}; + +static u8 LogicOp(u8 src, u8 dest, FramebufferRegs::LogicOp op) { + switch (op) { + case FramebufferRegs::LogicOp::Clear: + return 0; + + case FramebufferRegs::LogicOp::And: + return src & dest; + + case FramebufferRegs::LogicOp::AndReverse: + return src & ~dest; + + case FramebufferRegs::LogicOp::Copy: + return src; + + case FramebufferRegs::LogicOp::Set: + return 255; + + case FramebufferRegs::LogicOp::CopyInverted: + return ~src; + + case FramebufferRegs::LogicOp::NoOp: + return dest; + + case FramebufferRegs::LogicOp::Invert: + return ~dest; + + case FramebufferRegs::LogicOp::Nand: + return ~(src & dest); + + case FramebufferRegs::LogicOp::Or: + return src | dest; + + case FramebufferRegs::LogicOp::Nor: + return ~(src | dest); + + case FramebufferRegs::LogicOp::Xor: + return src ^ dest; + + case FramebufferRegs::LogicOp::Equiv: + return ~(src ^ dest); + + case FramebufferRegs::LogicOp::AndInverted: + return ~src & dest; + + case FramebufferRegs::LogicOp::OrReverse: + return src | ~dest; + + case FramebufferRegs::LogicOp::OrInverted: + return ~src | dest; + } +}; + // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values struct Fix12P4 { Fix12P4() {} @@ -304,34 +613,6 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve int t = (int)(v * float24::FromFloat32(static_cast(texture.config.height))) .ToFloat32(); - static auto GetWrappedTexCoord = [](TexturingRegs::TextureConfig::WrapMode mode, - int val, unsigned size) { - switch (mode) { - case TexturingRegs::TextureConfig::ClampToEdge: - val = std::max(val, 0); - val = std::min(val, (int)size - 1); - return val; - - case TexturingRegs::TextureConfig::ClampToBorder: - return val; - - case TexturingRegs::TextureConfig::Repeat: - return (int)((unsigned)val % size); - - case TexturingRegs::TextureConfig::MirroredRepeat: { - unsigned int coord = ((unsigned)val % (2 * size)); - if (coord >= size) - coord = 2 * size - 1 - coord; - return (int)coord; - } - - default: - LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x", (int)mode); - UNIMPLEMENTED(); - return 0; - } - }; - if ((texture.config.wrap_s == TexturingRegs::TextureConfig::ClampToBorder && (s < 0 || static_cast(s) >= texture.config.width)) || (texture.config.wrap_t == TexturingRegs::TextureConfig::ClampToBorder && @@ -380,9 +661,6 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve ++tev_stage_index) { const auto& tev_stage = tev_stages[tev_stage_index]; using Source = TexturingRegs::TevStageConfig::Source; - using ColorModifier = TexturingRegs::TevStageConfig::ColorModifier; - using AlphaModifier = TexturingRegs::TevStageConfig::AlphaModifier; - using Operation = TexturingRegs::TevStageConfig::Operation; auto GetSource = [&](Source source) -> Math::Vec4 { switch (source) { @@ -422,187 +700,6 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve } }; - static auto GetColorModifier = [](ColorModifier factor, - const Math::Vec4& values) -> Math::Vec3 { - switch (factor) { - case ColorModifier::SourceColor: - return values.rgb(); - - case ColorModifier::OneMinusSourceColor: - return (Math::Vec3(255, 255, 255) - values.rgb()).Cast(); - - case ColorModifier::SourceAlpha: - return values.aaa(); - - case ColorModifier::OneMinusSourceAlpha: - return (Math::Vec3(255, 255, 255) - values.aaa()).Cast(); - - case ColorModifier::SourceRed: - return values.rrr(); - - case ColorModifier::OneMinusSourceRed: - return (Math::Vec3(255, 255, 255) - values.rrr()).Cast(); - - case ColorModifier::SourceGreen: - return values.ggg(); - - case ColorModifier::OneMinusSourceGreen: - return (Math::Vec3(255, 255, 255) - values.ggg()).Cast(); - - case ColorModifier::SourceBlue: - return values.bbb(); - - case ColorModifier::OneMinusSourceBlue: - return (Math::Vec3(255, 255, 255) - values.bbb()).Cast(); - } - }; - - static auto GetAlphaModifier = [](AlphaModifier factor, - const Math::Vec4& values) -> u8 { - switch (factor) { - case AlphaModifier::SourceAlpha: - return values.a(); - - case AlphaModifier::OneMinusSourceAlpha: - return 255 - values.a(); - - case AlphaModifier::SourceRed: - return values.r(); - - case AlphaModifier::OneMinusSourceRed: - return 255 - values.r(); - - case AlphaModifier::SourceGreen: - return values.g(); - - case AlphaModifier::OneMinusSourceGreen: - return 255 - values.g(); - - case AlphaModifier::SourceBlue: - return values.b(); - - case AlphaModifier::OneMinusSourceBlue: - return 255 - values.b(); - } - }; - - static auto ColorCombine = [](Operation op, - const Math::Vec3 input[3]) -> Math::Vec3 { - switch (op) { - case Operation::Replace: - return input[0]; - - case Operation::Modulate: - return ((input[0] * input[1]) / 255).Cast(); - - case Operation::Add: { - auto result = input[0] + input[1]; - result.r() = std::min(255, result.r()); - result.g() = std::min(255, result.g()); - result.b() = std::min(255, result.b()); - return result.Cast(); - } - - case Operation::AddSigned: { - // TODO(bunnei): Verify that the color conversion from (float) 0.5f to - // (byte) 128 is correct - auto result = input[0].Cast() + input[1].Cast() - - Math::MakeVec(128, 128, 128); - result.r() = MathUtil::Clamp(result.r(), 0, 255); - result.g() = MathUtil::Clamp(result.g(), 0, 255); - result.b() = MathUtil::Clamp(result.b(), 0, 255); - return result.Cast(); - } - - case Operation::Lerp: - return ((input[0] * input[2] + - input[1] * - (Math::MakeVec(255, 255, 255) - input[2]).Cast()) / - 255) - .Cast(); - - case Operation::Subtract: { - auto result = input[0].Cast() - input[1].Cast(); - result.r() = std::max(0, result.r()); - result.g() = std::max(0, result.g()); - result.b() = std::max(0, result.b()); - return result.Cast(); - } - - case Operation::MultiplyThenAdd: { - auto result = (input[0] * input[1] + 255 * input[2].Cast()) / 255; - result.r() = std::min(255, result.r()); - result.g() = std::min(255, result.g()); - result.b() = std::min(255, result.b()); - return result.Cast(); - } - - case Operation::AddThenMultiply: { - auto result = input[0] + input[1]; - result.r() = std::min(255, result.r()); - result.g() = std::min(255, result.g()); - result.b() = std::min(255, result.b()); - result = (result * input[2].Cast()) / 255; - return result.Cast(); - } - case Operation::Dot3_RGB: { - // Not fully accurate. - // Worst case scenario seems to yield a +/-3 error - // Some HW results indicate that the per-component computation can't have a - // higher precision than 1/256, - // while dot3_rgb( (0x80,g0,b0),(0x7F,g1,b1) ) and dot3_rgb( - // (0x80,g0,b0),(0x80,g1,b1) ) give different results - int result = - ((input[0].r() * 2 - 255) * (input[1].r() * 2 - 255) + 128) / 256 + - ((input[0].g() * 2 - 255) * (input[1].g() * 2 - 255) + 128) / 256 + - ((input[0].b() * 2 - 255) * (input[1].b() * 2 - 255) + 128) / 256; - result = std::max(0, std::min(255, result)); - return {(u8)result, (u8)result, (u8)result}; - } - default: - LOG_ERROR(HW_GPU, "Unknown color combiner operation %d", (int)op); - UNIMPLEMENTED(); - return {0, 0, 0}; - } - }; - - static auto AlphaCombine = [](Operation op, const std::array& input) -> u8 { - switch (op) { - case Operation::Replace: - return input[0]; - - case Operation::Modulate: - return input[0] * input[1] / 255; - - case Operation::Add: - return std::min(255, input[0] + input[1]); - - case Operation::AddSigned: { - // TODO(bunnei): Verify that the color conversion from (float) 0.5f to - // (byte) 128 is correct - auto result = static_cast(input[0]) + static_cast(input[1]) - 128; - return static_cast(MathUtil::Clamp(result, 0, 255)); - } - - case Operation::Lerp: - return (input[0] * input[2] + input[1] * (255 - input[2])) / 255; - - case Operation::Subtract: - return std::max(0, (int)input[0] - (int)input[1]); - - case Operation::MultiplyThenAdd: - return std::min(255, (input[0] * input[1] + 255 * input[2]) / 255); - - case Operation::AddThenMultiply: - return (std::min(255, (input[0] + input[1])) * input[2]) / 255; - - default: - LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d", (int)op); - UNIMPLEMENTED(); - return 0; - } - }; - // color combiner // NOTE: Not sure if the alpha combiner might use the color output of the previous // stage as input. Hence, we currently don't directly write the result to @@ -917,56 +1014,6 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve return combiner_output[channel]; }; - static auto EvaluateBlendEquation = []( - const Math::Vec4& src, const Math::Vec4& srcfactor, - const Math::Vec4& dest, const Math::Vec4& destfactor, - FramebufferRegs::BlendEquation equation) { - - Math::Vec4 result; - - auto src_result = (src * srcfactor).Cast(); - auto dst_result = (dest * destfactor).Cast(); - - switch (equation) { - case FramebufferRegs::BlendEquation::Add: - result = (src_result + dst_result) / 255; - break; - - case FramebufferRegs::BlendEquation::Subtract: - result = (src_result - dst_result) / 255; - break; - - case FramebufferRegs::BlendEquation::ReverseSubtract: - result = (dst_result - src_result) / 255; - break; - - // TODO: How do these two actually work? - // OpenGL doesn't include the blend factors in the min/max computations, - // but is this what the 3DS actually does? - case FramebufferRegs::BlendEquation::Min: - result.r() = std::min(src.r(), dest.r()); - result.g() = std::min(src.g(), dest.g()); - result.b() = std::min(src.b(), dest.b()); - result.a() = std::min(src.a(), dest.a()); - break; - - case FramebufferRegs::BlendEquation::Max: - result.r() = std::max(src.r(), dest.r()); - result.g() = std::max(src.g(), dest.g()); - result.b() = std::max(src.b(), dest.b()); - result.a() = std::max(src.a(), dest.a()); - break; - - default: - LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation %x", equation); - UNIMPLEMENTED(); - } - - return Math::Vec4( - MathUtil::Clamp(result.r(), 0, 255), MathUtil::Clamp(result.g(), 0, 255), - MathUtil::Clamp(result.b(), 0, 255), MathUtil::Clamp(result.a(), 0, 255)); - }; - auto srcfactor = Math::MakeVec(LookupFactor(0, params.factor_source_rgb), LookupFactor(1, params.factor_source_rgb), LookupFactor(2, params.factor_source_rgb), @@ -983,58 +1030,6 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve dstfactor, params.blend_equation_a) .a(); } else { - static auto LogicOp = [](u8 src, u8 dest, FramebufferRegs::LogicOp op) -> u8 { - switch (op) { - case FramebufferRegs::LogicOp::Clear: - return 0; - - case FramebufferRegs::LogicOp::And: - return src & dest; - - case FramebufferRegs::LogicOp::AndReverse: - return src & ~dest; - - case FramebufferRegs::LogicOp::Copy: - return src; - - case FramebufferRegs::LogicOp::Set: - return 255; - - case FramebufferRegs::LogicOp::CopyInverted: - return ~src; - - case FramebufferRegs::LogicOp::NoOp: - return dest; - - case FramebufferRegs::LogicOp::Invert: - return ~dest; - - case FramebufferRegs::LogicOp::Nand: - return ~(src & dest); - - case FramebufferRegs::LogicOp::Or: - return src | dest; - - case FramebufferRegs::LogicOp::Nor: - return ~(src | dest); - - case FramebufferRegs::LogicOp::Xor: - return src ^ dest; - - case FramebufferRegs::LogicOp::Equiv: - return ~(src ^ dest); - - case FramebufferRegs::LogicOp::AndInverted: - return ~src & dest; - - case FramebufferRegs::LogicOp::OrReverse: - return src | ~dest; - - case FramebufferRegs::LogicOp::OrInverted: - return ~src | dest; - } - }; - blend_output = Math::MakeVec(LogicOp(combiner_output.r(), dest.r(), output_merger.logic_op), LogicOp(combiner_output.g(), dest.g(), output_merger.logic_op),