From a407c5d5c4a609ff0b9f9b6a666fc62b4af11977 Mon Sep 17 00:00:00 2001 From: Sam Hocevar Date: Mon, 10 Dec 2018 12:48:12 +0100 Subject: [PATCH] math: add lol::real to 64-bit integer conversions and clean up code. --- src/lol/math/real.h | 37 +++++++------- src/math/real.cpp | 115 +++++++++++++++++++++++--------------------- 2 files changed, 79 insertions(+), 73 deletions(-) diff --git a/src/lol/math/real.h b/src/lol/math/real.h index 4f22515e..38c9975c 100644 --- a/src/lol/math/real.h +++ b/src/lol/math/real.h @@ -41,7 +41,10 @@ template class LOL_ATTR_NODISCARD Real { public: - Real(); + typedef T bigit_t; + typedef int64_t exponent_t; + + Real() = default; Real(float f); Real(double f); @@ -61,8 +64,10 @@ public: LOL_ATTR_NODISCARD operator float() const; LOL_ATTR_NODISCARD operator double() const; LOL_ATTR_NODISCARD operator long double() const; - LOL_ATTR_NODISCARD operator int() const; - LOL_ATTR_NODISCARD operator unsigned int() const; + LOL_ATTR_NODISCARD operator int32_t() const; + LOL_ATTR_NODISCARD operator uint32_t() const; + LOL_ATTR_NODISCARD operator int64_t() const; + LOL_ATTR_NODISCARD operator uint64_t() const; Real operator +() const; Real operator -() const; @@ -114,8 +119,8 @@ public: template friend Real log10(Real const &x); /* Floating-point functions */ - template friend Real frexp(Real const &x, int64_t *exp); - template friend Real ldexp(Real const &x, int64_t exp); + template friend Real frexp(Real const &x, typename Real::exponent_t *exp); + template friend Real ldexp(Real const &x, typename Real::exponent_t exp); template friend Real modf(Real const &x, Real *iptr); template friend Real nextafter(Real const &x, Real const &y); @@ -221,12 +226,9 @@ public: static Real const& R_MAX(); private: - typedef T bigit_t; - typedef int64_t exponent_t; - array m_mantissa; - exponent_t m_exponent; - bool m_sign, m_nan, m_inf; + exponent_t m_exponent = 0; + bool m_sign = false, m_nan = false, m_inf = false; public: static int DEFAULT_BIGIT_COUNT; @@ -239,7 +241,6 @@ public: /* * Mandatory forward declarations of template specialisations */ -template<> real::Real(); template<> real::Real(float f); template<> real::Real(double f); template<> real::Real(long double f); @@ -252,8 +253,10 @@ template<> real::Real(char const *str); template<> LOL_ATTR_NODISCARD real::operator float() const; template<> LOL_ATTR_NODISCARD real::operator double() const; template<> LOL_ATTR_NODISCARD real::operator long double() const; -template<> LOL_ATTR_NODISCARD real::operator int() const; -template<> LOL_ATTR_NODISCARD real::operator unsigned int() const; +template<> LOL_ATTR_NODISCARD real::operator int32_t() const; +template<> LOL_ATTR_NODISCARD real::operator uint32_t() const; +template<> LOL_ATTR_NODISCARD real::operator int64_t() const; +template<> LOL_ATTR_NODISCARD real::operator uint64_t() const; template<> real real::operator +() const; template<> real real::operator -() const; template<> real real::operator +(real const &x) const; @@ -294,8 +297,8 @@ template Real erf(Real const &x); template Real log(Real const &x); template Real log2(Real const &x); template Real log10(Real const &x); -template Real frexp(Real const &x, int64_t *exp); -template Real ldexp(Real const &x, int64_t exp); +template Real frexp(Real const &x, typename Real::exponent_t *exp); +template Real ldexp(Real const &x, typename Real::exponent_t exp); template Real modf(Real const &x, Real *iptr); template Real nextafter(Real const &x, Real const &y); template Real inverse(Real const &x); @@ -336,8 +339,8 @@ template<> real erf(real const &x); template<> real log(real const &x); template<> real log2(real const &x); template<> real log10(real const &x); -template<> real frexp(real const &x, int64_t *exp); -template<> real ldexp(real const &x, int64_t exp); +template<> real frexp(real const &x, real::exponent_t *exp); +template<> real ldexp(real const &x, real::exponent_t exp); template<> real modf(real const &x, real *iptr); template<> real nextafter(real const &x, real const &y); template<> real inverse(real const &x); diff --git a/src/math/real.cpp b/src/math/real.cpp index f3cf31fc..f90ebd0e 100644 --- a/src/math/real.cpp +++ b/src/math/real.cpp @@ -93,14 +93,6 @@ LOL_CONSTANT_GETTER(R_SQRT1_2, R_SQRT2() / 2); * Now carry on with the rest of the Real class. */ -template<> real::Real() - : m_exponent(0), - m_sign(false), - m_nan(false), - m_inf(false) -{ -} - template<> real::Real(int32_t i) { new(this) real((double)i); } template<> real::Real(uint32_t i) { new(this) real((double)i); } template<> real::Real(float f) { new(this) real((double)f); } @@ -136,10 +128,6 @@ template<> real::Real(uint64_t i) } template<> real::Real(double d) - : m_exponent(0), - m_sign(false), - m_nan(false), - m_inf(false) { union { double d; uint64_t x; } u = { d }; @@ -177,9 +165,24 @@ template<> real::Real(long double f) m_exponent += exponent; } -template<> real::operator float() const { return (float)(double)(*this); } -template<> real::operator int() const { return (int)(double)(*this); } -template<> real::operator unsigned() const { return (unsigned)(double)(*this); } +template<> real::operator float() const { return (float)(double)*this; } +template<> real::operator int32_t() const { return (int32_t)(double)floor(*this); } +template<> real::operator uint32_t() const { return (uint32_t)(double)floor(*this); } + +template<> real::operator uint64_t() const +{ + uint32_t msb = (uint32_t)ldexp(*this, -32); + uint64_t ret = ((uint64_t)msb << 32) + | (uint32_t)(*this - ldexp((real)msb, 32)); + return ret; +} + +template<> real::operator int64_t() const +{ + /* If number is positive, convert it to uint64_t first. If it is + * negative, switch its sign first. */ + return is_negative() ? -(int64_t)-*this : (int64_t)(uint64_t)*this; +} template<> real::operator double() const { @@ -233,7 +236,7 @@ template<> real::operator long double() const template<> real::Real(char const *str) { real ret = 0; - int exponent = 0; + exponent_t exponent = 0; bool hex = false, comma = false, nonzero = false, negative = false, finished = false; for (char const *p = str; *p && !finished; p++) @@ -414,11 +417,11 @@ template<> real real::operator -(real const &x) const if (*this < x) return -(x - *this); - int64_t e1 = m_exponent; - int64_t e2 = x.m_exponent; + exponent_t e1 = m_exponent; + exponent_t e2 = x.m_exponent; - int64_t bigoff = (e1 - e2) / bigit_bits(); - int64_t off = e1 - e2 - bigoff * bigit_bits(); + exponent_t bigoff = (e1 - e2) / bigit_bits(); + exponent_t off = e1 - e2 - bigoff * bigit_bits(); /* FIXME: ensure we have the same number of bigits */ if (bigoff > bigit_count()) @@ -429,7 +432,7 @@ template<> real real::operator -(real const &x) const ret.m_exponent = m_exponent; /* int64_t instead of uint64_t to preserve sign through shifts */ - int64_t carry = 0; + exponent_t carry = 0; for (int i = 0; i < bigoff; ++i) { carry -= x.m_mantissa[bigit_count() - 1 - i]; @@ -438,8 +441,8 @@ template<> real real::operator -(real const &x) const carry |= carry << bigit_bits(); } if (bigoff < bigit_count()) - carry -= x.m_mantissa[bigit_count() - 1 - bigoff] & (((int64_t)1 << off) - 1); - carry /= (int64_t)1 << off; + carry -= x.m_mantissa[bigit_count() - 1 - bigoff] & (((exponent_t)1 << off) - 1); + carry /= (exponent_t)1 << off; for (int i = bigit_count(); i--; ) { @@ -452,7 +455,7 @@ template<> real real::operator -(real const &x) const else if (i - bigoff == 0) carry -= (uint64_t)1 << (bigit_bits() - off); - ret.m_mantissa[i] = (uint32_t)carry; + ret.m_mantissa[i] = (bigit_t)carry; carry >>= bigit_bits(); carry |= carry << bigit_bits(); } @@ -473,7 +476,8 @@ template<> real real::operator -(real const &x) const continue; } - for (uint32_t tmp = ret.m_mantissa[i]; tmp < 0x80000000u; tmp <<= 1) + /* “~tmp > tmp” checks that the MSB is not set */ + for (bigit_t tmp = ret.m_mantissa[i]; ~tmp > tmp; tmp <<= 1) off++; break; } @@ -489,7 +493,7 @@ template<> real real::operator -(real const &x) const for (int i = 0; i < bigit_count(); ++i) { - uint32_t tmp = 0; + bigit_t tmp = 0; if (i + bigoff < bigit_count()) tmp |= ret.m_mantissa[i + bigoff] << off; if (off && i + bigoff + 1 < bigit_count()) @@ -550,7 +554,7 @@ template<> real real::operator *(real const &x) const carry += x.m_mantissa[bigit_count() - 1 - i]; if (carry < prev) hicarry++; - ret.m_mantissa[bigit_count() - 1 - i] = carry & 0xffffffffu; + ret.m_mantissa[bigit_count() - 1 - i] = carry & ~(bigit_t)0; carry >>= bigit_bits(); carry |= hicarry << bigit_bits(); hicarry >>= bigit_bits(); @@ -562,8 +566,8 @@ template<> real real::operator *(real const &x) const carry--; for (int i = 0; i < bigit_count(); ++i) { - uint32_t tmp = (uint32_t)ret.m_mantissa[i]; - ret.m_mantissa[i] = ((uint32_t)carry << (bigit_bits() - 1)) + bigit_t tmp = ret.m_mantissa[i]; + ret.m_mantissa[i] = ((bigit_t)carry << (bigit_bits() - 1)) | (tmp >> 1); carry = tmp & 1u; } @@ -729,7 +733,7 @@ template<> real inverse(real const &x) if (x.is_zero()) return copysign(real::R_INF(), x); - /* Use the system's float inversion to approximate 1/x */ + /* Use the system’s float inversion to approximate 1/x */ union { float f; uint32_t x; } u = { 1.0f }; u.x |= x.m_mantissa[0] >> 9; u.f = 1.0f / u.f; @@ -739,8 +743,8 @@ template<> real inverse(real const &x) ret.m_sign = x.m_sign; ret.m_exponent = -x.m_exponent + (u.x >> 23) - 0x7f; - /* FIXME: 1+log2(BIGIT_COUNT) steps of Newton-Raphson seems to be enough for - * convergence, but this hasn't been checked seriously. */ + /* FIXME: 1+log2(bigit_count) steps of Newton-Raphson seems to be enough for + * convergence, but this hasn’t been checked seriously. */ for (int i = 1; i <= x.bigit_count(); i *= 2) ret = ret * (real::R_2() - ret * x); @@ -759,7 +763,7 @@ template<> real sqrt(real const &x) int tweak = x.m_exponent & 1; - /* Use the system's float inversion to approximate 1/sqrt(x). First + /* Use the system’s float inversion to approximate 1/sqrt(x). First * we construct a float in the [1..4[ range that has roughly the same * mantissa as our real. Its exponent is 0 or 1, depending on the * parity of x’s exponent. The final exponent is 0, -1 or -2. We use @@ -776,7 +780,7 @@ template<> real sqrt(real const &x) ret.m_exponent = -(x.m_exponent - tweak) / 2 + (u.x >> 23) - 0x7f; /* FIXME: 1+log2(bigit_count()) steps of Newton-Raphson seems to be enough for - * convergence, but this hasn't been checked seriously. */ + * convergence, but this hasn’t been checked seriously. */ for (int i = 1; i <= x.bigit_count(); i *= 2) { ret = ret * (real::R_3() - ret * ret * x); @@ -796,7 +800,7 @@ template<> real cbrt(real const &x) if (tweak < 0) tweak += 3; - /* Use the system's float inversion to approximate cbrt(x). First + /* Use the system’s float inversion to approximate cbrt(x). First * we construct a float in the [1..8[ range that has roughly the same * mantissa as our real. Its exponent is 0, 1 or 2, depending on the * value of x. The final exponent is 0 or 1 (special case). We use @@ -804,7 +808,7 @@ template<> real cbrt(real const &x) union { float f; uint32_t x; } u = { 1.0f }; u.x += tweak << 23; u.x |= x.m_mantissa[0] >> 9; - u.f = powf(u.f, 0.33333333333333333f); + u.f = powf(u.f, 1.f / 3); real ret; ret.m_mantissa.resize(x.m_mantissa.count()); @@ -813,7 +817,7 @@ template<> real cbrt(real const &x) ret.m_sign = x.m_sign; /* FIXME: 1+log2(bigit_count()) steps of Newton-Raphson seems to be enough - * for convergence, but this hasn't been checked seriously. */ + * for convergence, but this hasn’t been checked seriously. */ real third = inverse(real::R_3()); for (int i = 1; i <= x.bigit_count(); i *= 2) { @@ -832,7 +836,7 @@ template<> real pow(real const &x, real const &y) return real::R_0(); /* Small integer exponent: use exponentiation by squaring */ - int int_y = (int)y; + int64_t int_y = (int64_t)y; if (y == (real)int_y) { real ret = real::R_1(); @@ -904,7 +908,7 @@ static real fast_fact(int x, int step = 1) template<> real gamma(real const &x) { - /* We use Spouge's formula. FIXME: precision is far from acceptable, + /* We use Spouge’s formula. FIXME: precision is far from acceptable, * especially with large values. We need to compute this with higher * precision values in order to attain the desired accuracy. It might * also be useful to sort the ck values by decreasing absolute value @@ -1028,7 +1032,7 @@ static real fast_exp_sub(real const &x, real const &y) /* This fast exp method is tuned to work on the [-1..1] range and * no effort whatsoever was made to improve convergence outside this * domain of validity. The argument y is used for cases where we - * don't want the leading 1 in the Taylor series. */ + * don’t want the leading 1 in the Taylor series. */ real ret = real::R_1() - y, xn = x; int i = 1; @@ -1049,9 +1053,9 @@ template<> real exp(real const &x) /* Strategy for exp(x): the Taylor series does not converge very fast * with large positive or negative values. * - * However, we know that the result is going to be in the form M*2^E, - * where M is the mantissa and E the exponent. We first try to predict - * a value for E, which is approximately log2(exp(x)) = x / log(2). + * However, since the result is going to be in the form M*2^E, we first + * try to predict a value for E, which is approximately: + * E ≈ log2(exp(x)) = x / log(2) * * Let E0 be an integer close to x / log(2). We need to find a value x0 * such that exp(x) = 2^E0 * exp(x0). We get x0 = x - E0 log(2). @@ -1062,7 +1066,7 @@ template<> real exp(real const &x) * real x1 = exp(x0) * return x1 * 2^E0 */ - int e0 = x / real::R_LN2(); + real::exponent_t e0 = x / real::R_LN2(); real x0 = x - (real)e0 * real::R_LN2(); real x1 = fast_exp_sub(x0, real::R_0()); x1.m_exponent += e0; @@ -1072,7 +1076,7 @@ template<> real exp(real const &x) template<> real exp2(real const &x) { /* Strategy for exp2(x): see strategy in exp(). */ - int e0 = x; + real::exponent_t e0 = x; real x0 = x - (real)e0; real x1 = fast_exp_sub(x0 * real::R_LN2(), real::R_0()); x1.m_exponent += e0; @@ -1083,8 +1087,8 @@ template<> real erf(real const &x) { /* Strategy for erf(x): * - if x<0, erf(x) = -erf(-x) - * - if x<7, erf(x) = sum((-1)^n×x^(2n+1)/((2n+1)×n!))/sqrt(pi/4) - * - if x≥7, erf(x) = 1+exp(-x²)/(x×sqrt(pi))×sum((-1)^n×(2n-1)!!/(2x²)^n + * - if x<7, erf(x) = sum((-1)^n·x^(2n+1)/((2n+1)·n!))/sqrt(π/4) + * - if x≥7, erf(x) = 1+exp(-x²)/(x·sqrt(π))·sum((-1)^n·(2n-1)!!/(2x²)^n * * FIXME: do not compute factorials at each iteration, accumulate * them instead (see fast_exp_sub). @@ -1158,7 +1162,7 @@ template<> real cosh(real const &x) return (exp(x) + exp(-x)) / 2; } -template<> real frexp(real const &x, int64_t *exp) +template<> real frexp(real const &x, real::exponent_t *exp) { if (!x) { @@ -1174,7 +1178,7 @@ template<> real frexp(real const &x, int64_t *exp) return ret; } -template<> real ldexp(real const &x, int64_t exp) +template<> real ldexp(real const &x, real::exponent_t exp) { real ret = x; if (ret) /* Only do something if non-zero */ @@ -1229,7 +1233,7 @@ template<> real floor(real const &x) return real::R_0(); real ret = x; - int64_t exponent = x.m_exponent; + real::exponent_t exponent = x.m_exponent; for (int i = 0; i < x.bigit_count(); ++i) { @@ -1253,10 +1257,9 @@ template<> real ceil(real const &x) if (x < -real::R_0()) return -floor(-x); real ret = floor(x); - if (x == ret) - return ret; - else - return ret + real::R_1(); + if (ret < x) + ret += real::R_1(); + return ret; } template<> real round(real const &x) @@ -1647,7 +1650,7 @@ template<> void real::sprintf(char *str, int ndigits) const static real load_min() { real ret = 1; - return ldexp(ret, std::numeric_limits::min()); + return ldexp(ret, std::numeric_limits::min()); } static real load_max() @@ -1669,7 +1672,7 @@ static real load_max() static real load_pi() { - /* Approximate Pi using Machin's formula: 16*atan(1/5)-4*atan(1/239) */ + /* Approximate π using Machin’s formula: 16*atan(1/5)-4*atan(1/239) */ real ret = 0, x0 = 5, x1 = 239; real const m0 = -x0 * x0, m1 = -x1 * x1, r16 = 16, r4 = 4;