From 162d641a301d87d5e25ca5d677b7f8f07f29e748 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 12 Aug 2014 20:04:28 +0200 Subject: [PATCH] Pica/Math: Improved the design of the Vec2/Vec3/Vec4 classes and simplified rasterizer code accordingly. - Swizzlers now return const objects so that things like "first_vec4.xyz() = some_vec3" now will fail to compile (ideally we should support some vector holding references to make this actually work). - The methods "InsertBeforeX/Y/Z" and "Append" have been replaced by more versions of MakeVec, which now also supports building new vectors from vectors. - Vector library now follows C++ type promotion rules (hence, the result of Vec2 with another Vec2 is now a Vec2). --- src/video_core/math.h | 196 +++++++++++++++++++-------------- src/video_core/rasterizer.cpp | 32 +++--- src/video_core/vertex_shader.h | 3 +- 3 files changed, 133 insertions(+), 98 deletions(-) diff --git a/src/video_core/math.h b/src/video_core/math.h index 7030f2cfb..ca1fb0df2 100644 --- a/src/video_core/math.h +++ b/src/video_core/math.h @@ -39,6 +39,13 @@ template class Vec2; template class Vec3; template class Vec4; +template +static inline Vec2 MakeVec(const T& x, const T& y); +template +static inline Vec3 MakeVec(const T& x, const T& y, const T& z); +template +static inline Vec4 MakeVec(const T& x, const T& y, const T& z, const T& w); + template class Vec2 { @@ -68,34 +75,34 @@ public: a[0] = x; a[1] = y; } - Vec2 operator +(const Vec2& other) const + Vec2 operator +(const Vec2& other) const { - return Vec2(x+other.x, y+other.y); + return MakeVec(x+other.x, y+other.y); } void operator += (const Vec2 &other) { x+=other.x; y+=other.y; } - Vec2 operator -(const Vec2& other) const + Vec2 operator -(const Vec2& other) const { - return Vec2(x-other.x, y-other.y); + return MakeVec(x-other.x, y-other.y); } void operator -= (const Vec2& other) { x-=other.x; y-=other.y; } - Vec2 operator -() const + Vec2 operator -() const { - return Vec2(-x,-y); + return MakeVec(-x,-y); } - Vec2 operator * (const Vec2& other) const + Vec2 operator * (const Vec2& other) const { - return Vec2(x*other.x, y*other.y); + return MakeVec(x*other.x, y*other.y); } template - Vec2 operator * (const V& f) const + Vec2 operator * (const V& f) const { - return Vec2(x*f,y*f); + return MakeVec(x*f,y*f); } template void operator *= (const V& f) @@ -103,9 +110,9 @@ public: x*=f; y*=f; } template - Vec2 operator / (const V& f) const + Vec2 operator / (const V& f) const { - return Vec2(x/f,y/f); + return MakeVec(x/f,y/f); } template void operator /= (const V& f) @@ -152,20 +159,9 @@ public: const T& t() const { return y; } // swizzlers - create a subvector of specific components - Vec2 yx() const { return Vec2(y, x); } - Vec2 vu() const { return Vec2(y, x); } - Vec2 ts() const { return Vec2(y, x); } - - // Inserters to add new elements to effectively create larger vectors containing this Vec2 - Vec3 InsertBeforeX(const T& value) { - return Vec3(value, x, y); - } - Vec3 InsertBeforeY(const T& value) { - return Vec3(x, value, y); - } - Vec3 Append(const T& value) { - return Vec3(x, y, value); - } + const Vec2 yx() const { return Vec2(y, x); } + const Vec2 vu() const { return Vec2(y, x); } + const Vec2 ts() const { return Vec2(y, x); } }; template @@ -193,7 +189,7 @@ public: template Vec3 Cast() const { - return Vec3((T2)x, (T2)y, (T2)z); + return MakeVec((T2)x, (T2)y, (T2)z); } // Only implemented for T=int and T=float @@ -202,7 +198,7 @@ public: static Vec3 AssignToAll(const T& f) { - return Vec3(f, f, f); + return MakeVec(f, f, f); } void Write(T a[3]) @@ -210,34 +206,34 @@ public: a[0] = x; a[1] = y; a[2] = z; } - Vec3 operator +(const Vec3 &other) const + Vec3 operator +(const Vec3 &other) const { - return Vec3(x+other.x, y+other.y, z+other.z); + return MakeVec(x+other.x, y+other.y, z+other.z); } void operator += (const Vec3 &other) { x+=other.x; y+=other.y; z+=other.z; } - Vec3 operator -(const Vec3 &other) const + Vec3 operator -(const Vec3 &other) const { - return Vec3(x-other.x, y-other.y, z-other.z); + return MakeVec(x-other.x, y-other.y, z-other.z); } void operator -= (const Vec3 &other) { x-=other.x; y-=other.y; z-=other.z; } - Vec3 operator -() const + Vec3 operator -() const { - return Vec3(-x,-y,-z); + return MakeVec(-x,-y,-z); } - Vec3 operator * (const Vec3 &other) const + Vec3 operator * (const Vec3 &other) const { - return Vec3(x*other.x, y*other.y, z*other.z); + return MakeVec(x*other.x, y*other.y, z*other.z); } template - Vec3 operator * (const V& f) const + Vec3 operator * (const V& f) const { - return Vec3(x*f,y*f,z*f); + return MakeVec(x*f,y*f,z*f); } template void operator *= (const V& f) @@ -245,9 +241,9 @@ public: x*=f; y*=f; z*=f; } template - Vec3 operator / (const V& f) const + Vec3 operator / (const V& f) const { - return Vec3(x/f,y/f,z/f); + return MakeVec(x/f,y/f,z/f); } template void operator /= (const V& f) @@ -310,7 +306,7 @@ public: // swizzlers - create a subvector of specific components // e.g. Vec2 uv() { return Vec2(x,y); } // _DEFINE_SWIZZLER2 defines a single such function, DEFINE_SWIZZLER2 defines all of them for all component names (x<->r) and permutations (xy<->yx) -#define _DEFINE_SWIZZLER2(a, b, name) Vec2 name() const { return Vec2(a, b); } +#define _DEFINE_SWIZZLER2(a, b, name) const Vec2 name() const { return Vec2(a, b); } #define DEFINE_SWIZZLER2(a, b, a2, b2, a3, b3, a4, b4) \ _DEFINE_SWIZZLER2(a, b, a##b); \ _DEFINE_SWIZZLER2(a, b, a2##b2); \ @@ -326,20 +322,6 @@ public: DEFINE_SWIZZLER2(y, z, g, b, v, w, t, q); #undef DEFINE_SWIZZLER2 #undef _DEFINE_SWIZZLER2 - - // Inserters to add new elements to effectively create larger vectors containing this Vec2 - Vec4 InsertBeforeX(const T& value) { - return Vec4(value, x, y, z); - } - Vec4 InsertBeforeY(const T& value) { - return Vec4(x, value, y, z); - } - Vec4 InsertBeforeZ(const T& value) { - return Vec4(x, y, value, z); - } - Vec4 Append(const T& value) { - return Vec4(x, y, z, value); - } }; template @@ -383,34 +365,34 @@ public: a[0] = x; a[1] = y; a[2] = z; a[3] = w; } - Vec4 operator +(const Vec4& other) const + Vec4 operator +(const Vec4& other) const { - return Vec4(x+other.x, y+other.y, z+other.z, w+other.w); + return MakeVec(x+other.x, y+other.y, z+other.z, w+other.w); } void operator += (const Vec4& other) { x+=other.x; y+=other.y; z+=other.z; w+=other.w; } - Vec4 operator -(const Vec4 &other) const + Vec4 operator -(const Vec4 &other) const { - return Vec4(x-other.x, y-other.y, z-other.z, w-other.w); + return MakeVec(x-other.x, y-other.y, z-other.z, w-other.w); } void operator -= (const Vec4 &other) { x-=other.x; y-=other.y; z-=other.z; w-=other.w; } - Vec4 operator -() const + Vec4 operator -() const { - return Vec4(-x,-y,-z,-w); + return MakeVec(-x,-y,-z,-w); } - Vec4 operator * (const Vec4 &other) const + Vec4 operator * (const Vec4 &other) const { - return Vec4(x*other.x, y*other.y, z*other.z, w*other.w); + return MakeVec(x*other.x, y*other.y, z*other.z, w*other.w); } template - Vec4 operator * (const V& f) const + Vec4 operator * (const V& f) const { - return Vec4(x*f,y*f,z*f,w*f); + return MakeVec(x*f,y*f,z*f,w*f); } template void operator *= (const V& f) @@ -418,9 +400,9 @@ public: x*=f; y*=f; z*=f; w*=f; } template - Vec4 operator / (const V& f) const + Vec4 operator / (const V& f) const { - return Vec4(x/f,y/f,z/f,w/f); + return MakeVec(x/f,y/f,z/f,w/f); } template void operator /= (const V& f) @@ -469,7 +451,7 @@ public: // swizzlers - create a subvector of specific components // e.g. Vec2 uv() { return Vec2(x,y); } // _DEFINE_SWIZZLER2 defines a single such function, DEFINE_SWIZZLER2 defines all of them for all component names (x<->r) and permutations (xy<->yx) -#define _DEFINE_SWIZZLER2(a, b, name) Vec2 name() const { return Vec2(a, b); } +#define _DEFINE_SWIZZLER2(a, b, name) const Vec2 name() const { return Vec2(a, b); } #define DEFINE_SWIZZLER2(a, b, a2, b2) \ _DEFINE_SWIZZLER2(a, b, a##b); \ _DEFINE_SWIZZLER2(a, b, a2##b2); \ @@ -485,7 +467,7 @@ public: #undef DEFINE_SWIZZLER2 #undef _DEFINE_SWIZZLER2 -#define _DEFINE_SWIZZLER3(a, b, c, name) Vec3 name() const { return Vec3(a, b, c); } +#define _DEFINE_SWIZZLER3(a, b, c, name) const Vec3 name() const { return Vec3(a, b, c); } #define DEFINE_SWIZZLER3(a, b, c, a2, b2, c2) \ _DEFINE_SWIZZLER3(a, b, c, a##b##c); \ _DEFINE_SWIZZLER3(a, c, b, a##c##b); \ @@ -510,69 +492,121 @@ public: template -Vec4 operator * (const V& f, const Vec4& vec) +Vec4 operator * (const V& f, const Vec4& vec) { - return Vec4(f*vec.x,f*vec.y,f*vec.z,f*vec.w); + return MakeVec(f*vec.x,f*vec.y,f*vec.z,f*vec.w); } typedef Vec4 Vec4f; template -static inline T Dot(const Vec2& a, const Vec2& b) +static inline decltype(T{}*T{}+T{}*T{}) Dot(const Vec2& a, const Vec2& b) { return a.x*b.x + a.y*b.y; } template -static inline T Dot(const Vec3& a, const Vec3& b) +static inline decltype(T{}*T{}+T{}*T{}) Dot(const Vec3& a, const Vec3& b) { return a.x*b.x + a.y*b.y + a.z*b.z; } template -static inline T Dot(const Vec4& a, const Vec4& b) +static inline decltype(T{}*T{}+T{}*T{}) Dot(const Vec4& a, const Vec4& b) { return a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w; } template -static inline Vec3 Cross(const Vec3& a, const Vec3& b) +static inline Vec3 Cross(const Vec3& a, const Vec3& b) { - return Vec3(a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x); + return MakeVec(a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x); } // linear interpolation via float: 0.0=begin, 1.0=end template -static inline X Lerp(const X& begin, const X& end, const float t) +static inline decltype(X{}*float{}+X{}*float{}) Lerp(const X& begin, const X& end, const float t) { return begin*(1.f-t) + end*t; } // linear interpolation via int: 0=begin, base=end template -static inline X LerpInt(const X& begin, const X& end, const int t) +static inline decltype((X{}*int{}+X{}*int{}) / base) LerpInt(const X& begin, const X& end, const int t) { return (begin*(base-t) + end*t) / base; } // Utility vector factories template -static inline Vec2 MakeVec2(const T& x, const T& y) +static inline Vec2 MakeVec(const T& x, const T& y) { return Vec2{x, y}; } template -static inline Vec3 MakeVec3(const T& x, const T& y, const T& z) +static inline Vec3 MakeVec(const T& x, const T& y, const T& z) { return Vec3{x, y, z}; } template -static inline Vec4 MakeVec4(const T& x, const T& y, const T& z, const T& w) +static inline Vec4 MakeVec(const T& x, const T& y, const Vec2& zw) +{ + return MakeVec(x, y, zw[0], zw[1]); +} + +template +static inline Vec3 MakeVec(const Vec2& xy, const T& z) +{ + return MakeVec(xy[0], xy[1], z); +} + +template +static inline Vec3 MakeVec(const T& x, const Vec2& yz) +{ + return MakeVec(x, yz[0], yz[1]); +} + +template +static inline Vec4 MakeVec(const T& x, const T& y, const T& z, const T& w) { return Vec4{x, y, z, w}; } +template +static inline Vec4 MakeVec(const Vec2& xy, const T& z, const T& w) +{ + return MakeVec(xy[0], xy[1], z, w); +} + +template +static inline Vec4 MakeVec(const T& x, const Vec2& yz, const T& w) +{ + return MakeVec(x, yz[0], yz[1], w); +} + +// NOTE: This has priority over "Vec2> MakeVec(const Vec2& x, const Vec2& y)". +// Even if someone wanted to use an odd object like Vec2>, the compiler would error +// out soon enough due to misuse of the returned structure. +template +static inline Vec4 MakeVec(const Vec2& xy, const Vec2& zw) +{ + return MakeVec(xy[0], xy[1], zw[0], zw[1]); +} + +template +static inline Vec4 MakeVec(const Vec3& xyz, const T& w) +{ + return MakeVec(xyz[0], xyz[1], xyz[2], w); +} + +template +static inline Vec4 MakeVec(const T& x, const Vec2& yzw) +{ + return MakeVec(x, yzw[0], yzw[1], yzw[2]); +} + + } // namespace diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index a7c1bab3e..f418518a1 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -78,10 +78,10 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, u16 max_x = std::max({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x}); u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y}); - min_x = min_x & Fix12P4::IntMask(); - min_y = min_y & Fix12P4::IntMask(); - max_x = (max_x + Fix12P4::FracMask()) & Fix12P4::IntMask(); - max_y = (max_y + Fix12P4::FracMask()) & Fix12P4::IntMask(); + min_x &= Fix12P4::IntMask(); + min_y &= Fix12P4::IntMask(); + max_x = ((max_x + Fix12P4::FracMask()) & Fix12P4::IntMask()); + max_y = ((max_y + Fix12P4::FracMask()) & Fix12P4::IntMask()); // Triangle filling rules: Pixels on the right-sided edge or on flat bottom edges are not // drawn. Pixels on any other triangle border are drawn. This is implemented with three bias @@ -112,10 +112,10 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, auto orient2d = [](const Math::Vec2& vtx1, const Math::Vec2& vtx2, const Math::Vec2& vtx3) { - const auto vec1 = (vtx2.Cast() - vtx1.Cast()).Append(0); - const auto vec2 = (vtx3.Cast() - vtx1.Cast()).Append(0); + const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0); + const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0); // TODO: There is a very small chance this will overflow for sizeof(int) == 4 - return Cross(vec1, vec2).z; + return Math::Cross(vec1, vec2).z; }; int w0 = bias0 + orient2d(vtxpos[1].xy(), vtxpos[2].xy(), {x, y}); @@ -143,15 +143,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, // // The generalization to three vertices is straightforward in baricentric coordinates. auto GetInterpolatedAttribute = [&](float24 attr0, float24 attr1, float24 attr2) { - auto attr_over_w = Math::MakeVec3(attr0 / v0.pos.w, - attr1 / v1.pos.w, - attr2 / v2.pos.w); - auto w_inverse = Math::MakeVec3(float24::FromFloat32(1.f) / v0.pos.w, - float24::FromFloat32(1.f) / v1.pos.w, - float24::FromFloat32(1.f) / v2.pos.w); - auto baricentric_coordinates = Math::MakeVec3(float24::FromFloat32(w0), - float24::FromFloat32(w1), - float24::FromFloat32(w2)); + auto attr_over_w = Math::MakeVec(attr0 / v0.pos.w, + attr1 / v1.pos.w, + attr2 / v2.pos.w); + auto w_inverse = Math::MakeVec(float24::FromFloat32(1.f) / v0.pos.w, + float24::FromFloat32(1.f) / v1.pos.w, + float24::FromFloat32(1.f) / v2.pos.w); + auto baricentric_coordinates = Math::MakeVec(float24::FromFloat32(w0), + float24::FromFloat32(w1), + float24::FromFloat32(w2)); float24 interpolated_attr_over_w = Math::Dot(attr_over_w, baricentric_coordinates); float24 interpolated_w_inverse = Math::Dot(w_inverse, baricentric_coordinates); diff --git a/src/video_core/vertex_shader.h b/src/video_core/vertex_shader.h index f0a8a5b60..847fdc450 100644 --- a/src/video_core/vertex_shader.h +++ b/src/video_core/vertex_shader.h @@ -27,7 +27,6 @@ struct OutputVertex { Math::Vec4 dummy; // quaternions (not implemented, yet) Math::Vec4 color; Math::Vec2 tc0; - float24 tc0_v; // Padding for optimal alignment float24 pad[14]; @@ -36,6 +35,7 @@ struct OutputVertex { // position after perspective divide Math::Vec3 screenpos; + float24 pad2; // Linear interpolation // factor: 0=this, 1=vtx @@ -59,6 +59,7 @@ struct OutputVertex { } }; static_assert(std::is_pod::value, "Structure is not POD"); +static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); union Instruction { enum class OpCode : u32 {