pica_types: float: truncate, flush to 0, and treat
infinities for all values doing it at FromFloat32 results in this being applied for all float<M, E> that aren't constructed from raw. Note: due to lack of compiler support for C++23 at the moment, the use of std::isnormal and std::abs results in Trunc not being constexpr, which required the changes to Zero, One, and operator-, to prevent FromFloat32 being used in constexpr contexts, and those specific changes may be reverted in the future.
This commit is contained in:
parent
a19166ddec
commit
d2af98673a
2 changed files with 65 additions and 6 deletions
|
@ -7,6 +7,7 @@
|
|||
#include <bit>
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
#include <boost/serialization/access.hpp>
|
||||
#include "common/common_types.h"
|
||||
|
||||
|
@ -29,6 +30,41 @@ public:
|
|||
static constexpr Float<M, E> FromFloat32(float val) {
|
||||
Float<M, E> ret;
|
||||
ret.value = val;
|
||||
return Trunc(ret);
|
||||
}
|
||||
|
||||
static constexpr Float<M, E> MinNormal() {
|
||||
Float<M, E> ret;
|
||||
// Mininum normal value = 1.0 / (1 << ((1 << (E - 1)) - 2));
|
||||
if constexpr (E == 5) {
|
||||
ret.value = 0x1.p-14;
|
||||
} else {
|
||||
// E == 7
|
||||
ret.value = (0x1.p-62);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
// these values are approximate, rounded up
|
||||
static constexpr Float<M, E> Max() {
|
||||
Float<M, E> ret;
|
||||
if constexpr (E == 5) {
|
||||
ret.value = 0x1.p16;
|
||||
} else {
|
||||
// E == 7
|
||||
ret.value = 0x1.p64;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
// before C++23 std::isnormal and std::abs aren't considered constexpr so this function can't be
|
||||
// used as constexpr until the compilers support that.
|
||||
static constexpr Float<M, E> Trunc(const Float<M, E>& val) {
|
||||
Float<M, E> ret = val.Flushed().InfChecked();
|
||||
if (std::isnormal(val.ToFloat32())) {
|
||||
u32 hex = std::bit_cast<u32>(ret.ToFloat32()) & (0xffffffff ^ ((1 << (23 - M)) - 1));
|
||||
ret.value = std::bit_cast<float>(hex);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -57,11 +93,15 @@ public:
|
|||
}
|
||||
|
||||
static constexpr Float<M, E> Zero() {
|
||||
return FromFloat32(0.f);
|
||||
Float<M, E> ret;
|
||||
ret.value = 0.f;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static constexpr Float<M, E> One() {
|
||||
return FromFloat32(1.f);
|
||||
Float<M, E> ret;
|
||||
ret.value = 1.f;
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Not recommended for anything but logging
|
||||
|
@ -69,6 +109,24 @@ public:
|
|||
return value;
|
||||
}
|
||||
|
||||
constexpr Float<M, E> Flushed() const {
|
||||
Float<M, E> ret;
|
||||
ret.value = value;
|
||||
if (std::abs(value) < MinNormal().ToFloat32()) {
|
||||
ret.value = 0;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
constexpr Float<M, E> InfChecked() const {
|
||||
Float<M, E> ret;
|
||||
ret.value = value;
|
||||
if (std::abs(value) > Max().ToFloat32()) {
|
||||
ret.value = value * std::numeric_limits<float>::infinity();
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
constexpr Float<M, E> operator*(const Float<M, E>& flt) const {
|
||||
float result = value * flt.ToFloat32();
|
||||
// PICA gives 0 instead of NaN when multiplying by inf
|
||||
|
@ -111,7 +169,9 @@ public:
|
|||
}
|
||||
|
||||
constexpr Float<M, E> operator-() const {
|
||||
return Float<M, E>::FromFloat32(-ToFloat32());
|
||||
Float<M, E> ret;
|
||||
ret.value = -value;
|
||||
return ret;
|
||||
}
|
||||
|
||||
constexpr bool operator<(const Float<M, E>& flt) const {
|
||||
|
|
|
@ -125,9 +125,8 @@ void RasterizerSoftware::AddTriangle(const Pica::OutputVertex& v0, const Pica::O
|
|||
auto* input_list = &buffer_b;
|
||||
|
||||
// NOTE: We clip against a w=epsilon plane to guarantee that the output has a positive w value.
|
||||
// TODO: Not sure if this is a valid approach. Also should probably instead use the smallest
|
||||
// epsilon possible within f24 accuracy.
|
||||
static constexpr f24 EPSILON = f24::FromFloat32(0.00001f);
|
||||
// TODO: Not sure if this is a valid approach.
|
||||
static constexpr f24 EPSILON = f24::MinNormal();
|
||||
static constexpr f24 f0 = f24::Zero();
|
||||
static constexpr f24 f1 = f24::One();
|
||||
static constexpr std::array<ClippingEdge, 7> clipping_edges = {{
|
||||
|
|
Loading…
Reference in a new issue