From a407c5d5c4a609ff0b9f9b6a666fc62b4af11977 Mon Sep 17 00:00:00 2001
From: Sam Hocevar <sam@hocevar.net>
Date: Mon, 10 Dec 2018 12:48:12 +0100
Subject: [PATCH] math: add lol::real to 64-bit integer conversions and clean
 up code.

---
 src/lol/math/real.h |  37 +++++++-------
 src/math/real.cpp   | 115 +++++++++++++++++++++++---------------------
 2 files changed, 79 insertions(+), 73 deletions(-)
diff --git a/src/lol/math/real.h b/src/lol/math/real.h
index 4f22515e..38c9975c 100644
--- a/src/lol/math/real.h
+++ b/src/lol/math/real.h
@@ -41,7 +41,10 @@ template<typename T>
 class LOL_ATTR_NODISCARD Real
 {
 public:
-    Real();
+    typedef T bigit_t;
+    typedef int64_t exponent_t;
+
+    Real() = default;
 
     Real(float f);
     Real(double f);
@@ -61,8 +64,10 @@ public:
     LOL_ATTR_NODISCARD operator float() const;
     LOL_ATTR_NODISCARD operator double() const;
     LOL_ATTR_NODISCARD operator long double() const;
-    LOL_ATTR_NODISCARD operator int() const;
-    LOL_ATTR_NODISCARD operator unsigned int() const;
+    LOL_ATTR_NODISCARD operator int32_t() const;
+    LOL_ATTR_NODISCARD operator uint32_t() const;
+    LOL_ATTR_NODISCARD operator int64_t() const;
+    LOL_ATTR_NODISCARD operator uint64_t() const;
 
     Real<T> operator +() const;
     Real<T> operator -() const;
@@ -114,8 +119,8 @@ public:
     template<typename U> friend Real<U> log10(Real<U> const &x);
 
     /* Floating-point functions */
-    template<typename U> friend Real<U> frexp(Real<U> const &x, int64_t *exp);
-    template<typename U> friend Real<U> ldexp(Real<U> const &x, int64_t exp);
+    template<typename U> friend Real<U> frexp(Real<U> const &x, typename Real<U>::exponent_t *exp);
+    template<typename U> friend Real<U> ldexp(Real<U> const &x, typename Real<U>::exponent_t exp);
     template<typename U> friend Real<U> modf(Real<U> const &x, Real<U> *iptr);
     template<typename U> friend Real<U> nextafter(Real<U> const &x, Real<U> const &y);
 
@@ -221,12 +226,9 @@ public:
     static Real<T> const& R_MAX();
 
 private:
-    typedef T bigit_t;
-    typedef int64_t exponent_t;
-
     array<T> m_mantissa;
-    exponent_t m_exponent;
-    bool m_sign, m_nan, m_inf;
+    exponent_t m_exponent = 0;
+    bool m_sign = false, m_nan = false, m_inf = false;
 
 public:
     static int DEFAULT_BIGIT_COUNT;
@@ -239,7 +241,6 @@ public:
 /*
  * Mandatory forward declarations of template specialisations
  */
-template<> real::Real();
 template<> real::Real(float f);
 template<> real::Real(double f);
 template<> real::Real(long double f);
@@ -252,8 +253,10 @@ template<> real::Real(char const *str);
 template<> LOL_ATTR_NODISCARD real::operator float() const;
 template<> LOL_ATTR_NODISCARD real::operator double() const;
 template<> LOL_ATTR_NODISCARD real::operator long double() const;
-template<> LOL_ATTR_NODISCARD real::operator int() const;
-template<> LOL_ATTR_NODISCARD real::operator unsigned int() const;
+template<> LOL_ATTR_NODISCARD real::operator int32_t() const;
+template<> LOL_ATTR_NODISCARD real::operator uint32_t() const;
+template<> LOL_ATTR_NODISCARD real::operator int64_t() const;
+template<> LOL_ATTR_NODISCARD real::operator uint64_t() const;
 template<> real real::operator +() const;
 template<> real real::operator -() const;
 template<> real real::operator +(real const &x) const;
@@ -294,8 +297,8 @@ template<typename U> Real<U> erf(Real<U> const &x);
 template<typename U> Real<U> log(Real<U> const &x);
 template<typename U> Real<U> log2(Real<U> const &x);
 template<typename U> Real<U> log10(Real<U> const &x);
-template<typename U> Real<U> frexp(Real<U> const &x, int64_t *exp);
-template<typename U> Real<U> ldexp(Real<U> const &x, int64_t exp);
+template<typename U> Real<U> frexp(Real<U> const &x, typename Real<U>::exponent_t *exp);
+template<typename U> Real<U> ldexp(Real<U> const &x, typename Real<U>::exponent_t exp);
 template<typename U> Real<U> modf(Real<U> const &x, Real<U> *iptr);
 template<typename U> Real<U> nextafter(Real<U> const &x, Real<U> const &y);
 template<typename U> Real<U> inverse(Real<U> const &x);
@@ -336,8 +339,8 @@ template<> real erf(real const &x);
 template<> real log(real const &x);
 template<> real log2(real const &x);
 template<> real log10(real const &x);
-template<> real frexp(real const &x, int64_t *exp);
-template<> real ldexp(real const &x, int64_t exp);
+template<> real frexp(real const &x, real::exponent_t *exp);
+template<> real ldexp(real const &x, real::exponent_t exp);
 template<> real modf(real const &x, real *iptr);
 template<> real nextafter(real const &x, real const &y);
 template<> real inverse(real const &x);
diff --git a/src/math/real.cpp b/src/math/real.cpp
index f3cf31fc..f90ebd0e 100644
--- a/src/math/real.cpp
+++ b/src/math/real.cpp
@@ -93,14 +93,6 @@ LOL_CONSTANT_GETTER(R_SQRT1_2,  R_SQRT2() / 2);
  * Now carry on with the rest of the Real class.
  */
 
-template<> real::Real()
-  : m_exponent(0),
-    m_sign(false),
-    m_nan(false),
-    m_inf(false)
-{
-}
-
 template<> real::Real(int32_t i) { new(this) real((double)i); }
 template<> real::Real(uint32_t i) { new(this) real((double)i); }
 template<> real::Real(float f) { new(this) real((double)f); }
@@ -136,10 +128,6 @@ template<> real::Real(uint64_t i)
 }
 
 template<> real::Real(double d)
-  : m_exponent(0),
-    m_sign(false),
-    m_nan(false),
-    m_inf(false)
 {
     union { double d; uint64_t x; } u = { d };
 
@@ -177,9 +165,24 @@ template<> real::Real(long double f)
     m_exponent += exponent;
 }
 
-template<> real::operator float() const { return (float)(double)(*this); }
-template<> real::operator int() const { return (int)(double)(*this); }
-template<> real::operator unsigned() const { return (unsigned)(double)(*this); }
+template<> real::operator float() const { return (float)(double)*this; }
+template<> real::operator int32_t() const { return (int32_t)(double)floor(*this); }
+template<> real::operator uint32_t() const { return (uint32_t)(double)floor(*this); }
+
+template<> real::operator uint64_t() const
+{
+    uint32_t msb = (uint32_t)ldexp(*this, -32);
+    uint64_t ret = ((uint64_t)msb << 32)
+                 | (uint32_t)(*this - ldexp((real)msb, 32));
+    return ret;
+}
+
+template<> real::operator int64_t() const
+{
+    /* If number is positive, convert it to uint64_t first. If it is
+     * negative, switch its sign first. */
+    return is_negative() ? -(int64_t)-*this : (int64_t)(uint64_t)*this;
+}
 
 template<> real::operator double() const
 {
@@ -233,7 +236,7 @@ template<> real::operator long double() const
 template<> real::Real(char const *str)
 {
     real ret = 0;
-    int exponent = 0;
+    exponent_t exponent = 0;
     bool hex = false, comma = false, nonzero = false, negative = false, finished = false;
 
     for (char const *p = str; *p && !finished; p++)
@@ -414,11 +417,11 @@ template<> real real::operator -(real const &x) const
     if (*this < x)
         return -(x - *this);
 
-    int64_t e1 = m_exponent;
-    int64_t e2 = x.m_exponent;
+    exponent_t e1 = m_exponent;
+    exponent_t e2 = x.m_exponent;
 
-    int64_t bigoff = (e1 - e2) / bigit_bits();
-    int64_t off = e1 - e2 - bigoff * bigit_bits();
+    exponent_t bigoff = (e1 - e2) / bigit_bits();
+    exponent_t off = e1 - e2 - bigoff * bigit_bits();
 
     /* FIXME: ensure we have the same number of bigits */
     if (bigoff > bigit_count())
@@ -429,7 +432,7 @@ template<> real real::operator -(real const &x) const
     ret.m_exponent = m_exponent;
 
     /* int64_t instead of uint64_t to preserve sign through shifts */
-    int64_t carry = 0;
+    exponent_t carry = 0;
     for (int i = 0; i < bigoff; ++i)
     {
         carry -= x.m_mantissa[bigit_count() - 1 - i];
@@ -438,8 +441,8 @@ template<> real real::operator -(real const &x) const
         carry |= carry << bigit_bits();
     }
     if (bigoff < bigit_count())
-        carry -= x.m_mantissa[bigit_count() - 1 - bigoff] & (((int64_t)1 << off) - 1);
-    carry /= (int64_t)1 << off;
+        carry -= x.m_mantissa[bigit_count() - 1 - bigoff] & (((exponent_t)1 << off) - 1);
+    carry /= (exponent_t)1 << off;
 
     for (int i = bigit_count(); i--; )
     {
@@ -452,7 +455,7 @@ template<> real real::operator -(real const &x) const
         else if (i - bigoff == 0)
             carry -= (uint64_t)1 << (bigit_bits() - off);
 
-        ret.m_mantissa[i] = (uint32_t)carry;
+        ret.m_mantissa[i] = (bigit_t)carry;
         carry >>= bigit_bits();
         carry |= carry << bigit_bits();
     }
@@ -473,7 +476,8 @@ template<> real real::operator -(real const &x) const
                 continue;
             }
 
-            for (uint32_t tmp = ret.m_mantissa[i]; tmp < 0x80000000u; tmp <<= 1)
+            /* “~tmp > tmp” checks that the MSB is not set */
+            for (bigit_t tmp = ret.m_mantissa[i]; ~tmp > tmp; tmp <<= 1)
                 off++;
             break;
         }
@@ -489,7 +493,7 @@ template<> real real::operator -(real const &x) const
 
             for (int i = 0; i < bigit_count(); ++i)
             {
-                uint32_t tmp = 0;
+                bigit_t tmp = 0;
                 if (i + bigoff < bigit_count())
                     tmp |= ret.m_mantissa[i + bigoff] << off;
                 if (off && i + bigoff + 1 < bigit_count())
@@ -550,7 +554,7 @@ template<> real real::operator *(real const &x) const
         carry += x.m_mantissa[bigit_count() - 1 - i];
         if (carry < prev)
             hicarry++;
-        ret.m_mantissa[bigit_count() - 1 - i] = carry & 0xffffffffu;
+        ret.m_mantissa[bigit_count() - 1 - i] = carry & ~(bigit_t)0;
         carry >>= bigit_bits();
         carry |= hicarry << bigit_bits();
         hicarry >>= bigit_bits();
@@ -562,8 +566,8 @@ template<> real real::operator *(real const &x) const
         carry--;
         for (int i = 0; i < bigit_count(); ++i)
         {
-            uint32_t tmp = (uint32_t)ret.m_mantissa[i];
-            ret.m_mantissa[i] = ((uint32_t)carry << (bigit_bits() - 1))
+            bigit_t tmp = ret.m_mantissa[i];
+            ret.m_mantissa[i] = ((bigit_t)carry << (bigit_bits() - 1))
                               | (tmp >> 1);
             carry = tmp & 1u;
         }
@@ -729,7 +733,7 @@ template<> real inverse(real const &x)
     if (x.is_zero())
         return copysign(real::R_INF(), x);
 
-    /* Use the system's float inversion to approximate 1/x */
+    /* Use the system’s float inversion to approximate 1/x */
     union { float f; uint32_t x; } u = { 1.0f };
     u.x |= x.m_mantissa[0] >> 9;
     u.f = 1.0f / u.f;
@@ -739,8 +743,8 @@ template<> real inverse(real const &x)
     ret.m_sign = x.m_sign;
     ret.m_exponent = -x.m_exponent + (u.x >> 23) - 0x7f;
 
-    /* FIXME: 1+log2(BIGIT_COUNT) steps of Newton-Raphson seems to be enough for
-     * convergence, but this hasn't been checked seriously. */
+    /* FIXME: 1+log2(bigit_count) steps of Newton-Raphson seems to be enough for
+     * convergence, but this hasn’t been checked seriously. */
     for (int i = 1; i <= x.bigit_count(); i *= 2)
         ret = ret * (real::R_2() - ret * x);
 
@@ -759,7 +763,7 @@ template<> real sqrt(real const &x)
 
     int tweak = x.m_exponent & 1;
 
-    /* Use the system's float inversion to approximate 1/sqrt(x). First
+    /* Use the system’s float inversion to approximate 1/sqrt(x). First
      * we construct a float in the [1..4[ range that has roughly the same
      * mantissa as our real. Its exponent is 0 or 1, depending on the
      * parity of x’s exponent. The final exponent is 0, -1 or -2. We use
@@ -776,7 +780,7 @@ template<> real sqrt(real const &x)
     ret.m_exponent = -(x.m_exponent - tweak) / 2 + (u.x >> 23) - 0x7f;
 
     /* FIXME: 1+log2(bigit_count()) steps of Newton-Raphson seems to be enough for
-     * convergence, but this hasn't been checked seriously. */
+     * convergence, but this hasn’t been checked seriously. */
     for (int i = 1; i <= x.bigit_count(); i *= 2)
     {
         ret = ret * (real::R_3() - ret * ret * x);
@@ -796,7 +800,7 @@ template<> real cbrt(real const &x)
     if (tweak < 0)
         tweak += 3;
 
-    /* Use the system's float inversion to approximate cbrt(x). First
+    /* Use the system’s float inversion to approximate cbrt(x). First
      * we construct a float in the [1..8[ range that has roughly the same
      * mantissa as our real. Its exponent is 0, 1 or 2, depending on the
      * value of x. The final exponent is 0 or 1 (special case). We use
@@ -804,7 +808,7 @@ template<> real cbrt(real const &x)
     union { float f; uint32_t x; } u = { 1.0f };
     u.x += tweak << 23;
     u.x |= x.m_mantissa[0] >> 9;
-    u.f = powf(u.f, 0.33333333333333333f);
+    u.f = powf(u.f, 1.f / 3);
 
     real ret;
     ret.m_mantissa.resize(x.m_mantissa.count());
@@ -813,7 +817,7 @@ template<> real cbrt(real const &x)
     ret.m_sign = x.m_sign;
 
     /* FIXME: 1+log2(bigit_count()) steps of Newton-Raphson seems to be enough
-     * for convergence, but this hasn't been checked seriously. */
+     * for convergence, but this hasn’t been checked seriously. */
     real third = inverse(real::R_3());
     for (int i = 1; i <= x.bigit_count(); i *= 2)
     {
@@ -832,7 +836,7 @@ template<> real pow(real const &x, real const &y)
         return real::R_0();
 
     /* Small integer exponent: use exponentiation by squaring */
-    int int_y = (int)y;
+    int64_t int_y = (int64_t)y;
     if (y == (real)int_y)
     {
         real ret = real::R_1();
@@ -904,7 +908,7 @@ static real fast_fact(int x, int step = 1)
 
 template<> real gamma(real const &x)
 {
-    /* We use Spouge's formula. FIXME: precision is far from acceptable,
+    /* We use Spouge’s formula. FIXME: precision is far from acceptable,
      * especially with large values. We need to compute this with higher
      * precision values in order to attain the desired accuracy. It might
      * also be useful to sort the ck values by decreasing absolute value
@@ -1028,7 +1032,7 @@ static real fast_exp_sub(real const &x, real const &y)
     /* This fast exp method is tuned to work on the [-1..1] range and
      * no effort whatsoever was made to improve convergence outside this
      * domain of validity. The argument y is used for cases where we
-     * don't want the leading 1 in the Taylor series. */
+     * don’t want the leading 1 in the Taylor series. */
     real ret = real::R_1() - y, xn = x;
     int i = 1;
 
@@ -1049,9 +1053,9 @@ template<> real exp(real const &x)
     /* Strategy for exp(x): the Taylor series does not converge very fast
      * with large positive or negative values.
      *
-     * However, we know that the result is going to be in the form M*2^E,
-     * where M is the mantissa and E the exponent. We first try to predict
-     * a value for E, which is approximately log2(exp(x)) = x / log(2).
+     * However, since the result is going to be in the form M*2^E, we first
+     * try to predict a value for E, which is approximately:
+     *  E ≈ log2(exp(x)) = x / log(2)
      *
      * Let E0 be an integer close to x / log(2). We need to find a value x0
      * such that exp(x) = 2^E0 * exp(x0). We get x0 = x - E0 log(2).
@@ -1062,7 +1066,7 @@ template<> real exp(real const &x)
      *  real x1 = exp(x0)
      *  return x1 * 2^E0
      */
-    int e0 = x / real::R_LN2();
+    real::exponent_t e0 = x / real::R_LN2();
     real x0 = x - (real)e0 * real::R_LN2();
     real x1 = fast_exp_sub(x0, real::R_0());
     x1.m_exponent += e0;
@@ -1072,7 +1076,7 @@ template<> real exp(real const &x)
 template<> real exp2(real const &x)
 {
     /* Strategy for exp2(x): see strategy in exp(). */
-    int e0 = x;
+    real::exponent_t e0 = x;
     real x0 = x - (real)e0;
     real x1 = fast_exp_sub(x0 * real::R_LN2(), real::R_0());
     x1.m_exponent += e0;
@@ -1083,8 +1087,8 @@ template<> real erf(real const &x)
 {
     /* Strategy for erf(x):
      *  - if x<0, erf(x) = -erf(-x)
-     *  - if x<7, erf(x) = sum((-1)^n×x^(2n+1)/((2n+1)×n!))/sqrt(pi/4)
-     *  - if x≥7, erf(x) = 1+exp(-x²)/(x×sqrt(pi))×sum((-1)^n×(2n-1)!!/(2x²)^n
+     *  - if x<7, erf(x) = sum((-1)^n·x^(2n+1)/((2n+1)·n!))/sqrt(π/4)
+     *  - if x≥7, erf(x) = 1+exp(-x²)/(x·sqrt(π))·sum((-1)^n·(2n-1)!!/(2x²)^n
      *
      * FIXME: do not compute factorials at each iteration, accumulate
      * them instead (see fast_exp_sub).
@@ -1158,7 +1162,7 @@ template<> real cosh(real const &x)
     return (exp(x) + exp(-x)) / 2;
 }
 
-template<> real frexp(real const &x, int64_t *exp)
+template<> real frexp(real const &x, real::exponent_t *exp)
 {
     if (!x)
     {
@@ -1174,7 +1178,7 @@ template<> real frexp(real const &x, int64_t *exp)
     return ret;
 }
 
-template<> real ldexp(real const &x, int64_t exp)
+template<> real ldexp(real const &x, real::exponent_t exp)
 {
     real ret = x;
     if (ret) /* Only do something if non-zero */
@@ -1229,7 +1233,7 @@ template<> real floor(real const &x)
         return real::R_0();
 
     real ret = x;
-    int64_t exponent = x.m_exponent;
+    real::exponent_t exponent = x.m_exponent;
 
     for (int i = 0; i < x.bigit_count(); ++i)
     {
@@ -1253,10 +1257,9 @@ template<> real ceil(real const &x)
     if (x < -real::R_0())
         return -floor(-x);
     real ret = floor(x);
-    if (x == ret)
-        return ret;
-    else
-        return ret + real::R_1();
+    if (ret < x)
+        ret += real::R_1();
+    return ret;
 }
 
 template<> real round(real const &x)
@@ -1647,7 +1650,7 @@ template<> void real::sprintf(char *str, int ndigits) const
 static real load_min()
 {
     real ret = 1;
-    return ldexp(ret, std::numeric_limits<int64_t>::min());
+    return ldexp(ret, std::numeric_limits<real::exponent_t>::min());
 }
 
 static real load_max()
@@ -1669,7 +1672,7 @@ static real load_max()
 
 static real load_pi()
 {
-    /* Approximate Pi using Machin's formula: 16*atan(1/5)-4*atan(1/239) */
+    /* Approximate π using Machin’s formula: 16*atan(1/5)-4*atan(1/239) */
     real ret = 0, x0 = 5, x1 = 239;
     real const m0 = -x0 * x0, m1 = -x1 * x1, r16 = 16, r4 = 4;