math: add lol::real to 64-bit integer conversions and clean up code.

6 years ago · a407c5d5c4
--- a/src/lol/math/real.h
+++ b/src/lol/math/real.h
@@ -41,7 +41,10 @@ template<typename T>
 class LOL_ATTR_NODISCARD Real
 {
 public:
    Real();
    typedef T bigit_t;
    typedef int64_t exponent_t;

    Real() = default;

    Real(float f);
    Real(double f);
@@ -61,8 +64,10 @@ public:
    LOL_ATTR_NODISCARD operator float() const;
    LOL_ATTR_NODISCARD operator double() const;
    LOL_ATTR_NODISCARD operator long double() const;
    LOL_ATTR_NODISCARD operator int() const;
    LOL_ATTR_NODISCARD operator unsigned int() const;
    LOL_ATTR_NODISCARD operator int32_t() const;
    LOL_ATTR_NODISCARD operator uint32_t() const;
    LOL_ATTR_NODISCARD operator int64_t() const;
    LOL_ATTR_NODISCARD operator uint64_t() const;

    Real<T> operator +() const;
    Real<T> operator -() const;
@@ -114,8 +119,8 @@ public:
    template<typename U> friend Real<U> log10(Real<U> const &x);

    /* Floating-point functions */
    template<typename U> friend Real<U> frexp(Real<U> const &x, int64_t *exp);
    template<typename U> friend Real<U> ldexp(Real<U> const &x, int64_t exp);
    template<typename U> friend Real<U> frexp(Real<U> const &x, typename Real<U>::exponent_t *exp);
    template<typename U> friend Real<U> ldexp(Real<U> const &x, typename Real<U>::exponent_t exp);
    template<typename U> friend Real<U> modf(Real<U> const &x, Real<U> *iptr);
    template<typename U> friend Real<U> nextafter(Real<U> const &x, Real<U> const &y);

@@ -221,12 +226,9 @@ public:
    static Real<T> const& R_MAX();

 private:
    typedef T bigit_t;
    typedef int64_t exponent_t;

    array<T> m_mantissa;
    exponent_t m_exponent;
    bool m_sign, m_nan, m_inf;
    exponent_t m_exponent = 0;
    bool m_sign = false, m_nan = false, m_inf = false;

 public:
    static int DEFAULT_BIGIT_COUNT;
@@ -239,7 +241,6 @@ public:
 /*
 * Mandatory forward declarations of template specialisations
 */
 template<> real::Real();
 template<> real::Real(float f);
 template<> real::Real(double f);
 template<> real::Real(long double f);
@@ -252,8 +253,10 @@ template<> real::Real(char const *str);
 template<> LOL_ATTR_NODISCARD real::operator float() const;
 template<> LOL_ATTR_NODISCARD real::operator double() const;
 template<> LOL_ATTR_NODISCARD real::operator long double() const;
 template<> LOL_ATTR_NODISCARD real::operator int() const;
 template<> LOL_ATTR_NODISCARD real::operator unsigned int() const;
 template<> LOL_ATTR_NODISCARD real::operator int32_t() const;
 template<> LOL_ATTR_NODISCARD real::operator uint32_t() const;
 template<> LOL_ATTR_NODISCARD real::operator int64_t() const;
 template<> LOL_ATTR_NODISCARD real::operator uint64_t() const;
 template<> real real::operator +() const;
 template<> real real::operator -() const;
 template<> real real::operator +(real const &x) const;
@@ -294,8 +297,8 @@ template<typename U> Real<U> erf(Real<U> const &x);
 template<typename U> Real<U> log(Real<U> const &x);
 template<typename U> Real<U> log2(Real<U> const &x);
 template<typename U> Real<U> log10(Real<U> const &x);
 template<typename U> Real<U> frexp(Real<U> const &x, int64_t *exp);
 template<typename U> Real<U> ldexp(Real<U> const &x, int64_t exp);
 template<typename U> Real<U> frexp(Real<U> const &x, typename Real<U>::exponent_t *exp);
 template<typename U> Real<U> ldexp(Real<U> const &x, typename Real<U>::exponent_t exp);
 template<typename U> Real<U> modf(Real<U> const &x, Real<U> *iptr);
 template<typename U> Real<U> nextafter(Real<U> const &x, Real<U> const &y);
 template<typename U> Real<U> inverse(Real<U> const &x);
@@ -336,8 +339,8 @@ template<> real erf(real const &x);
 template<> real log(real const &x);
 template<> real log2(real const &x);
 template<> real log10(real const &x);
 template<> real frexp(real const &x, int64_t *exp);
 template<> real ldexp(real const &x, int64_t exp);
 template<> real frexp(real const &x, real::exponent_t *exp);
 template<> real ldexp(real const &x, real::exponent_t exp);
 template<> real modf(real const &x, real *iptr);
 template<> real nextafter(real const &x, real const &y);
 template<> real inverse(real const &x);
--- a/src/math/real.cpp
+++ b/src/math/real.cpp
@@ -93,14 +93,6 @@ LOL_CONSTANT_GETTER(R_SQRT1_2,  R_SQRT2() / 2);
 * Now carry on with the rest of the Real class.
 */

 template<> real::Real()
  : m_exponent(0),
    m_sign(false),
    m_nan(false),
    m_inf(false)
 {
 }

 template<> real::Real(int32_t i) { new(this) real((double)i); }
 template<> real::Real(uint32_t i) { new(this) real((double)i); }
 template<> real::Real(float f) { new(this) real((double)f); }
@@ -136,10 +128,6 @@ template<> real::Real(uint64_t i)
 }

 template<> real::Real(double d)
  : m_exponent(0),
    m_sign(false),
    m_nan(false),
    m_inf(false)
 {
    union { double d; uint64_t x; } u = { d };

@@ -177,9 +165,24 @@ template<> real::Real(long double f)
    m_exponent += exponent;
 }

 template<> real::operator float() const { return (float)(double)(*this); }
 template<> real::operator int() const { return (int)(double)(*this); }
 template<> real::operator unsigned() const { return (unsigned)(double)(*this); }
 template<> real::operator float() const { return (float)(double)*this; }
 template<> real::operator int32_t() const { return (int32_t)(double)floor(*this); }
 template<> real::operator uint32_t() const { return (uint32_t)(double)floor(*this); }

 template<> real::operator uint64_t() const
 {
    uint32_t msb = (uint32_t)ldexp(*this, -32);
    uint64_t ret = ((uint64_t)msb << 32)
                 | (uint32_t)(*this - ldexp((real)msb, 32));
    return ret;
 }

 template<> real::operator int64_t() const
 {
    /* If number is positive, convert it to uint64_t first. If it is
     * negative, switch its sign first. */
    return is_negative() ? -(int64_t)-*this : (int64_t)(uint64_t)*this;
 }

 template<> real::operator double() const
 {
@@ -233,7 +236,7 @@ template<> real::operator long double() const
 template<> real::Real(char const *str)
 {
    real ret = 0;
    int exponent = 0;
    exponent_t exponent = 0;
    bool hex = false, comma = false, nonzero = false, negative = false, finished = false;

    for (char const *p = str; *p && !finished; p++)
@@ -414,11 +417,11 @@ template<> real real::operator -(real const &x) const
    if (*this < x)
        return -(x - *this);

    int64_t e1 = m_exponent;
    int64_t e2 = x.m_exponent;
    exponent_t e1 = m_exponent;
    exponent_t e2 = x.m_exponent;

    int64_t bigoff = (e1 - e2) / bigit_bits();
    int64_t off = e1 - e2 - bigoff * bigit_bits();
    exponent_t bigoff = (e1 - e2) / bigit_bits();
    exponent_t off = e1 - e2 - bigoff * bigit_bits();

    /* FIXME: ensure we have the same number of bigits */
    if (bigoff > bigit_count())
@@ -429,7 +432,7 @@ template<> real real::operator -(real const &x) const
    ret.m_exponent = m_exponent;

    /* int64_t instead of uint64_t to preserve sign through shifts */
    int64_t carry = 0;
    exponent_t carry = 0;
    for (int i = 0; i < bigoff; ++i)
    {
        carry -= x.m_mantissa[bigit_count() - 1 - i];
@@ -438,8 +441,8 @@ template<> real real::operator -(real const &x) const
        carry |= carry << bigit_bits();
    }
    if (bigoff < bigit_count())
        carry -= x.m_mantissa[bigit_count() - 1 - bigoff] & (((int64_t)1 << off) - 1);
    carry /= (int64_t)1 << off;
        carry -= x.m_mantissa[bigit_count() - 1 - bigoff] & (((exponent_t)1 << off) - 1);
    carry /= (exponent_t)1 << off;

    for (int i = bigit_count(); i--; )
    {
@@ -452,7 +455,7 @@ template<> real real::operator -(real const &x) const
        else if (i - bigoff == 0)
            carry -= (uint64_t)1 << (bigit_bits() - off);

        ret.m_mantissa[i] = (uint32_t)carry;
        ret.m_mantissa[i] = (bigit_t)carry;
        carry >>= bigit_bits();
        carry |= carry << bigit_bits();
    }
@@ -473,7 +476,8 @@ template<> real real::operator -(real const &x) const
                continue;
            }

            for (uint32_t tmp = ret.m_mantissa[i]; tmp < 0x80000000u; tmp <<= 1)
            /* “~tmp > tmp” checks that the MSB is not set */
            for (bigit_t tmp = ret.m_mantissa[i]; ~tmp > tmp; tmp <<= 1)
                off++;
            break;
        }
@@ -489,7 +493,7 @@ template<> real real::operator -(real const &x) const

            for (int i = 0; i < bigit_count(); ++i)
            {
                uint32_t tmp = 0;
                bigit_t tmp = 0;
                if (i + bigoff < bigit_count())
                    tmp |= ret.m_mantissa[i + bigoff] << off;
                if (off && i + bigoff + 1 < bigit_count())
@@ -550,7 +554,7 @@ template<> real real::operator *(real const &x) const
        carry += x.m_mantissa[bigit_count() - 1 - i];
        if (carry < prev)
            hicarry++;
        ret.m_mantissa[bigit_count() - 1 - i] = carry & 0xffffffffu;
        ret.m_mantissa[bigit_count() - 1 - i] = carry & ~(bigit_t)0;
        carry >>= bigit_bits();
        carry |= hicarry << bigit_bits();
        hicarry >>= bigit_bits();
@@ -562,8 +566,8 @@ template<> real real::operator *(real const &x) const
        carry--;
        for (int i = 0; i < bigit_count(); ++i)
        {
            uint32_t tmp = (uint32_t)ret.m_mantissa[i];
            ret.m_mantissa[i] = ((uint32_t)carry << (bigit_bits() - 1))
            bigit_t tmp = ret.m_mantissa[i];
            ret.m_mantissa[i] = ((bigit_t)carry << (bigit_bits() - 1))
                              | (tmp >> 1);
            carry = tmp & 1u;
        }
@@ -729,7 +733,7 @@ template<> real inverse(real const &x)
    if (x.is_zero())
        return copysign(real::R_INF(), x);

    /* Use the system's float inversion to approximate 1/x */
    /* Use the system’s float inversion to approximate 1/x */
    union { float f; uint32_t x; } u = { 1.0f };
    u.x |= x.m_mantissa[0] >> 9;
    u.f = 1.0f / u.f;
@@ -739,8 +743,8 @@ template<> real inverse(real const &x)
    ret.m_sign = x.m_sign;
    ret.m_exponent = -x.m_exponent + (u.x >> 23) - 0x7f;

    /* FIXME: 1+log2(BIGIT_COUNT) steps of Newton-Raphson seems to be enough for
     * convergence, but this hasn't been checked seriously. */
    /* FIXME: 1+log2(bigit_count) steps of Newton-Raphson seems to be enough for
     * convergence, but this hasn’t been checked seriously. */
    for (int i = 1; i <= x.bigit_count(); i *= 2)
        ret = ret * (real::R_2() - ret * x);

@@ -759,7 +763,7 @@ template<> real sqrt(real const &x)

    int tweak = x.m_exponent & 1;

    /* Use the system's float inversion to approximate 1/sqrt(x). First
    /* Use the system’s float inversion to approximate 1/sqrt(x). First
     * we construct a float in the [1..4[ range that has roughly the same
     * mantissa as our real. Its exponent is 0 or 1, depending on the
     * parity of x’s exponent. The final exponent is 0, -1 or -2. We use
@@ -776,7 +780,7 @@ template<> real sqrt(real const &x)
    ret.m_exponent = -(x.m_exponent - tweak) / 2 + (u.x >> 23) - 0x7f;

    /* FIXME: 1+log2(bigit_count()) steps of Newton-Raphson seems to be enough for
     * convergence, but this hasn't been checked seriously. */
     * convergence, but this hasn’t been checked seriously. */
    for (int i = 1; i <= x.bigit_count(); i *= 2)
    {
        ret = ret * (real::R_3() - ret * ret * x);
@@ -796,7 +800,7 @@ template<> real cbrt(real const &x)
    if (tweak < 0)
        tweak += 3;

    /* Use the system's float inversion to approximate cbrt(x). First
    /* Use the system’s float inversion to approximate cbrt(x). First
     * we construct a float in the [1..8[ range that has roughly the same
     * mantissa as our real. Its exponent is 0, 1 or 2, depending on the
     * value of x. The final exponent is 0 or 1 (special case). We use
@@ -804,7 +808,7 @@ template<> real cbrt(real const &x)
    union { float f; uint32_t x; } u = { 1.0f };
    u.x += tweak << 23;
    u.x |= x.m_mantissa[0] >> 9;
    u.f = powf(u.f, 0.33333333333333333f);
    u.f = powf(u.f, 1.f / 3);

    real ret;
    ret.m_mantissa.resize(x.m_mantissa.count());
@@ -813,7 +817,7 @@ template<> real cbrt(real const &x)
    ret.m_sign = x.m_sign;

    /* FIXME: 1+log2(bigit_count()) steps of Newton-Raphson seems to be enough
     * for convergence, but this hasn't been checked seriously. */
     * for convergence, but this hasn’t been checked seriously. */
    real third = inverse(real::R_3());
    for (int i = 1; i <= x.bigit_count(); i *= 2)
    {
@@ -832,7 +836,7 @@ template<> real pow(real const &x, real const &y)
        return real::R_0();

    /* Small integer exponent: use exponentiation by squaring */
    int int_y = (int)y;
    int64_t int_y = (int64_t)y;
    if (y == (real)int_y)
    {
        real ret = real::R_1();
@@ -904,7 +908,7 @@ static real fast_fact(int x, int step = 1)

 template<> real gamma(real const &x)
 {
    /* We use Spouge's formula. FIXME: precision is far from acceptable,
    /* We use Spouge’s formula. FIXME: precision is far from acceptable,
     * especially with large values. We need to compute this with higher
     * precision values in order to attain the desired accuracy. It might
     * also be useful to sort the ck values by decreasing absolute value
@@ -1028,7 +1032,7 @@ static real fast_exp_sub(real const &x, real const &y)
    /* This fast exp method is tuned to work on the [-1..1] range and
     * no effort whatsoever was made to improve convergence outside this
     * domain of validity. The argument y is used for cases where we
     * don't want the leading 1 in the Taylor series. */
     * don’t want the leading 1 in the Taylor series. */
    real ret = real::R_1() - y, xn = x;
    int i = 1;

@@ -1049,9 +1053,9 @@ template<> real exp(real const &x)
    /* Strategy for exp(x): the Taylor series does not converge very fast
     * with large positive or negative values.
     *
     * However, we know that the result is going to be in the form M*2^E,
     * where M is the mantissa and E the exponent. We first try to predict
     * a value for E, which is approximately log2(exp(x)) = x / log(2).
     * However, since the result is going to be in the form M*2^E, we first
     * try to predict a value for E, which is approximately:
     *  E ≈ log2(exp(x)) = x / log(2)
     *
     * Let E0 be an integer close to x / log(2). We need to find a value x0
     * such that exp(x) = 2^E0 * exp(x0). We get x0 = x - E0 log(2).
@@ -1062,7 +1066,7 @@ template<> real exp(real const &x)
     *  real x1 = exp(x0)
     *  return x1 * 2^E0
     */
    int e0 = x / real::R_LN2();
    real::exponent_t e0 = x / real::R_LN2();
    real x0 = x - (real)e0 * real::R_LN2();
    real x1 = fast_exp_sub(x0, real::R_0());
    x1.m_exponent += e0;
@@ -1072,7 +1076,7 @@ template<> real exp(real const &x)
 template<> real exp2(real const &x)
 {
    /* Strategy for exp2(x): see strategy in exp(). */
    int e0 = x;
    real::exponent_t e0 = x;
    real x0 = x - (real)e0;
    real x1 = fast_exp_sub(x0 * real::R_LN2(), real::R_0());
    x1.m_exponent += e0;
@@ -1083,8 +1087,8 @@ template<> real erf(real const &x)
 {
    /* Strategy for erf(x):
     *  - if x<0, erf(x) = -erf(-x)
     *  - if x<7, erf(x) = sum((-1)^n×x^(2n+1)/((2n+1)×n!))/sqrt(pi/4)
     *  - if x≥7, erf(x) = 1+exp(-x²)/(x×sqrt(pi))×sum((-1)^n×(2n-1)!!/(2x²)^n
     *  - if x<7, erf(x) = sum((-1)^n·x^(2n+1)/((2n+1)·n!))/sqrt(π/4)
     *  - if x≥7, erf(x) = 1+exp(-x²)/(x·sqrt(π))·sum((-1)^n·(2n-1)!!/(2x²)^n
     *
     * FIXME: do not compute factorials at each iteration, accumulate
     * them instead (see fast_exp_sub).
@@ -1158,7 +1162,7 @@ template<> real cosh(real const &x)
    return (exp(x) + exp(-x)) / 2;
 }

 template<> real frexp(real const &x, int64_t *exp)
 template<> real frexp(real const &x, real::exponent_t *exp)
 {
    if (!x)
    {
@@ -1174,7 +1178,7 @@ template<> real frexp(real const &x, int64_t *exp)
    return ret;
 }

 template<> real ldexp(real const &x, int64_t exp)
 template<> real ldexp(real const &x, real::exponent_t exp)
 {
    real ret = x;
    if (ret) /* Only do something if non-zero */
@@ -1229,7 +1233,7 @@ template<> real floor(real const &x)
        return real::R_0();

    real ret = x;
    int64_t exponent = x.m_exponent;
    real::exponent_t exponent = x.m_exponent;

    for (int i = 0; i < x.bigit_count(); ++i)
    {
@@ -1253,10 +1257,9 @@ template<> real ceil(real const &x)
    if (x < -real::R_0())
        return -floor(-x);
    real ret = floor(x);
    if (x == ret)
        return ret;
    else
        return ret + real::R_1();
    if (ret < x)
        ret += real::R_1();
    return ret;
 }

 template<> real round(real const &x)
@@ -1647,7 +1650,7 @@ template<> void real::sprintf(char *str, int ndigits) const
 static real load_min()
 {
    real ret = 1;
    return ldexp(ret, std::numeric_limits<int64_t>::min());
    return ldexp(ret, std::numeric_limits<real::exponent_t>::min());
 }

 static real load_max()
@@ -1669,7 +1672,7 @@ static real load_max()

 static real load_pi()
 {
    /* Approximate Pi using Machin's formula: 16*atan(1/5)-4*atan(1/239) */
    /* Approximate π using Machin’s formula: 16*atan(1/5)-4*atan(1/239) */
    real ret = 0, x0 = 5, x1 = 239;
    real const m0 = -x0 * x0, m1 = -x1 * x1, r16 = 16, r4 = 4;