From f556b500ffba2bcd39d000a1ecfdc756d1891ef0 Mon Sep 17 00:00:00 2001 From: Sam Hocevar Date: Wed, 2 Mar 2022 15:29:41 +0100 Subject: [PATCH] Move the erfcx() implementation to a helper function --- include/lol/private/types/real.ipp | 50 +++++++++++++++++------------- 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/include/lol/private/types/real.ipp b/include/lol/private/types/real.ipp index 5cb3a7e1..2a0ea53c 100644 --- a/include/lol/private/types/real.ipp +++ b/include/lol/private/types/real.ipp @@ -1161,6 +1161,28 @@ template real_t expm1(real_t const &x) return near_zero ? fast_exp_sub(x, real_t::R_1()) : exp(x) - real_t::R_1(); } +template real_t fast_erfcx(real_t const &x, real_t const &x2) +{ + auto sum = real_t::R_0(); + + // XXX: The series does not converge, there is an optimal value around + // ⌊x²+0.5⌋. If maximum accuracy cannot be reached, we must stop summing + // when divergence is detected. + auto prev = x2; + auto xn = real_t::R_1(), xmul = inverse(x2 + x2); + for (int n = 0;; ++n, xn *= xmul) + { + auto tmp = xn * fast_fact(n * 2 - 1, 2); + auto newsum = (n & 1) ? sum - tmp : sum + tmp; + if (newsum == sum || tmp > prev) + break; + sum = newsum; + prev = tmp; + } + + return sum / (x * sqrt(real_t::R_PI())); +} + template real_t erf(real_t const &x) { /* Strategy for erf(x): @@ -1206,43 +1228,29 @@ template real_t erfc(real_t const &x) if (x < real_t(7)) return real_t::R_1() - erf(x); - return exp(-x * x) * erfcx(x); + auto x2 = x * x; + return exp(-x2) * fast_erfcx(x, x2); } template real_t erfcx(real_t const &x) { - // Strategy for erfc(x): - // - if x<7, erfc(x) = exp(x²)·(1-erf(x)) - // - if x≥7, erfc(x) = sum((-1)^n·(2n-1)!!/(2x²)^n) / (x·sqrt(π)) + // Strategy for erfcx(x): + // - if x<7, erfcx(x) = exp(x²)·(1-erf(x)) + // - if x≥7, erfcx(x) = sum((-1)^n·(2n-1)!!/(2x²)^n) / (x·sqrt(π)) auto sum = real_t::R_0(); auto x2 = x * x; if (x < real_t(7)) return exp(x2) * (real_t::R_1() - erf(x)); - // XXX: The series does not converge, there is an optimal value around - // ⌊x²+0.5⌋. If maximum accuracy cannot be reached, we must stop summing - // when divergence is detected. - auto prev = x2; - auto xn = real_t::R_1(), xmul = inverse(x2 + x2); - for (int n = 0;; ++n, xn *= xmul) - { - auto tmp = xn * fast_fact(n * 2 - 1, 2); - auto newsum = (n & 1) ? sum - tmp : sum + tmp; - if (newsum == sum || tmp > prev) - break; - sum = newsum; - prev = tmp; - } - - return sum / (x * sqrt(real_t::R_PI())); + return fast_erfcx(x, x2); } template real_t sinh(real_t const &x) { // Use expm1() to ensure high accuracy around 0. No need to worry about // accuracy in other ranges because either exp(x) or exp(-x) will be - // very large and cancel the other term. + // large enough to cancel the other term. return (expm1(x) - expm1(-x)) / 2; }