From 6281145d9dd7f72263cf0a45000579f0f20604c7 Mon Sep 17 00:00:00 2001
From: Sam Hocevar <sam@hocevar.net>
Date: Tue, 31 Oct 2017 14:04:04 +0100
Subject: [PATCH] Implement real::erf() with reasonable precision.

---
 src/lol/math/real.h |  5 ++++
 src/math/real.cpp   | 63 ++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 65 insertions(+), 3 deletions(-)
diff --git a/src/lol/math/real.h b/src/lol/math/real.h
index b196c218..f8cb4e02 100644
--- a/src/lol/math/real.h
+++ b/src/lol/math/real.h
@@ -108,9 +108,12 @@ public:
     /* Exponential and logarithmic functions */
     template<typename U> friend Real<U> exp(Real<U> const &x);
     template<typename U> friend Real<U> exp2(Real<U> const &x);
+    template<typename U> friend Real<U> erf(Real<U> const &x);
     template<typename U> friend Real<U> log(Real<U> const &x);
     template<typename U> friend Real<U> log2(Real<U> const &x);
     template<typename U> friend Real<U> log10(Real<U> const &x);
+
+    /* Floating-point functions */
     template<typename U> friend Real<U> frexp(Real<U> const &x, int64_t *exp);
     template<typename U> friend Real<U> ldexp(Real<U> const &x, int64_t exp);
     template<typename U> friend Real<U> modf(Real<U> const &x, Real<U> *iptr);
@@ -287,6 +290,7 @@ template<typename U> Real<U> cosh(Real<U> const &x);
 template<typename U> Real<U> tanh(Real<U> const &x);
 template<typename U> Real<U> exp(Real<U> const &x);
 template<typename U> Real<U> exp2(Real<U> const &x);
+template<typename U> Real<U> erf(Real<U> const &x);
 template<typename U> Real<U> log(Real<U> const &x);
 template<typename U> Real<U> log2(Real<U> const &x);
 template<typename U> Real<U> log10(Real<U> const &x);
@@ -328,6 +332,7 @@ template<> real cosh(real const &x);
 template<> real tanh(real const &x);
 template<> real exp(real const &x);
 template<> real exp2(real const &x);
+template<> real erf(real const &x);
 template<> real log(real const &x);
 template<> real log2(real const &x);
 template<> real log10(real const &x);
diff --git a/src/math/real.cpp b/src/math/real.cpp
index c822c208..fe05916f 100644
--- a/src/math/real.cpp
+++ b/src/math/real.cpp
@@ -864,13 +864,19 @@ template<> real pow(real const &x, real const &y)
 /* A fast factorial implementation for small numbers. An optional
  * step argument allows to compute double factorials (i.e. with
  * only the odd or the even terms. */
-static real fast_fact(unsigned int x, unsigned int step = 1)
+static real fast_fact(int x, int step = 1)
 {
+    if (x < step)
+        return 1;
+
+    if (x == step)
+        return x;
+
     unsigned int start = (x + step - 1) % step + 1;
-    real ret = start ? real(start) : real(step);
+    real ret(start);
     uint64_t multiplier = 1;
 
-    for (unsigned int i = start, exponent = 0;;)
+    for (int i = start, exponent = 0;;)
     {
         if (i >= x)
             return ldexp(ret * multiplier, exponent);
@@ -1072,6 +1078,57 @@ template<> real exp2(real const &x)
     return x1;
 }
 
+template<> real erf(real const &x)
+{
+    /* Strategy for erf(x):
+     *  - if x<0, erf(x) = -erf(-x)
+     *  - if x<7, erf(x) = sum((-1)^n×x^(2n+1)/((2n+1)×n!))/sqrt(pi/4)
+     *  - if x≥7, erf(x) = 1+exp(-x²)/(x×sqrt(pi))×sum((-1)^n×(2n-1)!!/(2x²)^n
+     *
+     * FIXME: do not compute factorials at each iteration, accumulate
+     * them instead (see fast_exp_sub).
+     * FIXME: For a potentially faster implementation, see “Expanding the
+     * Error Function erf(z)” at:
+     *  http://www.mathematica-journal.com/2014/11/on-burmanns-theorem-and-its-application-to-problems-of-linear-and-nonlinear-heat-transfer-and-diffusion/#more-39602/
+     */
+    if (x.is_negative())
+        return -erf(-x);
+
+    real sum = real::R_0();
+    real x2 = x * x;
+
+    /* FIXME: this test is inefficient; the series converges slowly for x≥1 */
+    if (x < real(7))
+    {
+        real xn = x, xmul = x2;
+        for (int n = 0;; ++n, xn *= xmul)
+        {
+            real tmp = xn / (fast_fact(n) * (2 * n + 1));
+            real newsum = (n & 1) ? sum - tmp : sum + tmp;
+            if (newsum == sum)
+                break;
+            sum = newsum;
+        }
+        return sum * real::R_2_SQRTPI();
+    }
+    else
+    {
+        real xn = real::R_1(), xmul = inverse(x2 + x2);
+        /* FIXME: this does not converge well! We need to stop at 30
+         * iterations and sacrifice some accuracy. */
+        for (int n = 0; n < 30; ++n, xn *= xmul)
+        {
+            real tmp = xn * fast_fact(n * 2 - 1, 2);
+            real newsum = (n & 1) ? sum - tmp : sum + tmp;
+            if (newsum == sum)
+                break;
+            sum = newsum;
+        }
+
+        return real::R_1() - exp(-x2) / (x * sqrt(real::R_PI())) * sum;
+    }
+}
+
 template<> real sinh(real const &x)
 {
     /* We cannot always use (exp(x)-exp(-x))/2 because we'll lose