You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

1290 line
32 KiB

  1. //
  2. // Lol Engine
  3. //
  4. // Copyright: (c) 2010-2011 Sam Hocevar <sam@hocevar.net>
  5. // This program is free software; you can redistribute it and/or
  6. // modify it under the terms of the Do What The Fuck You Want To
  7. // Public License, Version 2, as published by Sam Hocevar. See
  8. // http://sam.zoy.org/projects/COPYING.WTFPL for more details.
  9. //
  10. #if defined HAVE_CONFIG_H
  11. # include "config.h"
  12. #endif
  13. #include <cstring>
  14. #include <cstdio>
  15. #include "core.h"
  16. using namespace std;
  17. namespace lol
  18. {
  19. real::real()
  20. {
  21. m_mantissa = new uint32_t[BIGITS];
  22. m_signexp = 0;
  23. }
  24. real::real(real const &x)
  25. {
  26. m_mantissa = new uint32_t[BIGITS];
  27. memcpy(m_mantissa, x.m_mantissa, BIGITS * sizeof(uint32_t));
  28. m_signexp = x.m_signexp;
  29. }
  30. real const &real::operator =(real const &x)
  31. {
  32. if (&x != this)
  33. {
  34. memcpy(m_mantissa, x.m_mantissa, BIGITS * sizeof(uint32_t));
  35. m_signexp = x.m_signexp;
  36. }
  37. return *this;
  38. }
  39. real::~real()
  40. {
  41. delete[] m_mantissa;
  42. }
  43. real::real(float f) { new(this) real((double)f); }
  44. real::real(int i) { new(this) real((double)i); }
  45. real::real(unsigned int i) { new(this) real((double)i); }
  46. real::real(double d)
  47. {
  48. new(this) real();
  49. union { double d; uint64_t x; } u = { d };
  50. uint32_t sign = (u.x >> 63) << 31;
  51. uint32_t exponent = (u.x << 1) >> 53;
  52. switch (exponent)
  53. {
  54. case 0x00:
  55. m_signexp = sign;
  56. break;
  57. case 0x7ff:
  58. m_signexp = sign | 0x7fffffffu;
  59. break;
  60. default:
  61. m_signexp = sign | (exponent + (1 << 30) - (1 << 10));
  62. break;
  63. }
  64. m_mantissa[0] = u.x >> 20;
  65. m_mantissa[1] = u.x << 12;
  66. memset(m_mantissa + 2, 0, (BIGITS - 2) * sizeof(m_mantissa[0]));
  67. }
  68. real::operator float() const { return (float)(double)(*this); }
  69. real::operator int() const { return (int)(double)(*this); }
  70. real::operator unsigned int() const { return (unsigned int)(double)(*this); }
  71. real::operator double() const
  72. {
  73. union { double d; uint64_t x; } u;
  74. /* Get sign */
  75. u.x = m_signexp >> 31;
  76. u.x <<= 11;
  77. /* Compute new exponent */
  78. uint32_t exponent = (m_signexp << 1) >> 1;
  79. int e = (int)exponent - (1 << 30) + (1 << 10);
  80. if (e < 0)
  81. u.x <<= 52;
  82. else if (e >= 0x7ff)
  83. {
  84. u.x |= 0x7ff;
  85. u.x <<= 52;
  86. }
  87. else
  88. {
  89. u.x |= e;
  90. /* Store mantissa if necessary */
  91. u.x <<= 32;
  92. u.x |= m_mantissa[0];
  93. u.x <<= 20;
  94. u.x |= m_mantissa[1] >> 12;
  95. /* Rounding */
  96. u.x += (m_mantissa[1] >> 11) & 1;
  97. }
  98. return u.d;
  99. }
  100. real real::operator +() const
  101. {
  102. return *this;
  103. }
  104. real real::operator -() const
  105. {
  106. real ret = *this;
  107. ret.m_signexp ^= 0x80000000u;
  108. return ret;
  109. }
  110. real real::operator +(real const &x) const
  111. {
  112. if (x.m_signexp << 1 == 0)
  113. return *this;
  114. /* Ensure both arguments are positive. Otherwise, switch signs,
  115. * or replace + with -. */
  116. if (m_signexp >> 31)
  117. return -(-*this + -x);
  118. if (x.m_signexp >> 31)
  119. return *this - (-x);
  120. /* Ensure *this has the larger exponent (no need for the mantissa to
  121. * be larger, as in subtraction). Otherwise, switch. */
  122. if ((m_signexp << 1) < (x.m_signexp << 1))
  123. return x + *this;
  124. real ret;
  125. int e1 = m_signexp - (1 << 30) + 1;
  126. int e2 = x.m_signexp - (1 << 30) + 1;
  127. int bigoff = (e1 - e2) / BIGIT_BITS;
  128. int off = e1 - e2 - bigoff * BIGIT_BITS;
  129. if (bigoff > BIGITS)
  130. return *this;
  131. ret.m_signexp = m_signexp;
  132. uint64_t carry = 0;
  133. for (int i = BIGITS; i--; )
  134. {
  135. carry += m_mantissa[i];
  136. if (i - bigoff >= 0)
  137. carry += x.m_mantissa[i - bigoff] >> off;
  138. if (off && i - bigoff > 0)
  139. carry += (x.m_mantissa[i - bigoff - 1] << (BIGIT_BITS - off)) & 0xffffffffu;
  140. else if (i - bigoff == 0)
  141. carry += (uint64_t)1 << (BIGIT_BITS - off);
  142. ret.m_mantissa[i] = carry;
  143. carry >>= BIGIT_BITS;
  144. }
  145. /* Renormalise in case we overflowed the mantissa */
  146. if (carry)
  147. {
  148. carry--;
  149. for (int i = 0; i < BIGITS; i++)
  150. {
  151. uint32_t tmp = ret.m_mantissa[i];
  152. ret.m_mantissa[i] = (carry << (BIGIT_BITS - 1)) | (tmp >> 1);
  153. carry = tmp & 1u;
  154. }
  155. ret.m_signexp++;
  156. }
  157. return ret;
  158. }
  159. real real::operator -(real const &x) const
  160. {
  161. if (x.m_signexp << 1 == 0)
  162. return *this;
  163. /* Ensure both arguments are positive. Otherwise, switch signs,
  164. * or replace - with +. */
  165. if (m_signexp >> 31)
  166. return -(-*this + x);
  167. if (x.m_signexp >> 31)
  168. return (*this) + (-x);
  169. /* Ensure *this is larger than x */
  170. if (*this < x)
  171. return -(x - *this);
  172. real ret;
  173. int e1 = m_signexp - (1 << 30) + 1;
  174. int e2 = x.m_signexp - (1 << 30) + 1;
  175. int bigoff = (e1 - e2) / BIGIT_BITS;
  176. int off = e1 - e2 - bigoff * BIGIT_BITS;
  177. if (bigoff > BIGITS)
  178. return *this;
  179. ret.m_signexp = m_signexp;
  180. int64_t carry = 0;
  181. for (int i = 0; i < bigoff; i++)
  182. {
  183. carry -= x.m_mantissa[BIGITS - 1 - i];
  184. /* Emulates a signed shift */
  185. carry >>= BIGIT_BITS;
  186. carry |= carry << BIGIT_BITS;
  187. }
  188. if (bigoff < BIGITS)
  189. carry -= x.m_mantissa[BIGITS - 1 - bigoff] & (((int64_t)1 << off) - 1);
  190. carry /= (int64_t)1 << off;
  191. for (int i = BIGITS; i--; )
  192. {
  193. carry += m_mantissa[i];
  194. if (i - bigoff >= 0)
  195. carry -= x.m_mantissa[i - bigoff] >> off;
  196. if (off && i - bigoff > 0)
  197. carry -= (x.m_mantissa[i - bigoff - 1] << (BIGIT_BITS - off)) & 0xffffffffu;
  198. else if (i - bigoff == 0)
  199. carry -= (uint64_t)1 << (BIGIT_BITS - off);
  200. ret.m_mantissa[i] = carry;
  201. carry >>= BIGIT_BITS;
  202. carry |= carry << BIGIT_BITS;
  203. }
  204. carry += 1;
  205. /* Renormalise if we underflowed the mantissa */
  206. if (carry == 0)
  207. {
  208. /* How much do we need to shift the mantissa? FIXME: this could
  209. * be computed above */
  210. off = 0;
  211. for (int i = 0; i < BIGITS; i++)
  212. {
  213. if (!ret.m_mantissa[i])
  214. {
  215. off += BIGIT_BITS;
  216. continue;
  217. }
  218. for (uint32_t tmp = ret.m_mantissa[i]; tmp < 0x80000000u; tmp <<= 1)
  219. off++;
  220. break;
  221. }
  222. if (off == BIGITS * BIGIT_BITS)
  223. ret.m_signexp &= 0x80000000u;
  224. else
  225. {
  226. off++; /* Shift one more to get rid of the leading one */
  227. ret.m_signexp -= off;
  228. bigoff = off / BIGIT_BITS;
  229. off -= bigoff * BIGIT_BITS;
  230. for (int i = 0; i < BIGITS; i++)
  231. {
  232. uint32_t tmp = 0;
  233. if (i + bigoff < BIGITS)
  234. tmp |= ret.m_mantissa[i + bigoff] << off;
  235. if (off && i + bigoff + 1 < BIGITS)
  236. tmp |= ret.m_mantissa[i + bigoff + 1] >> (BIGIT_BITS - off);
  237. ret.m_mantissa[i] = tmp;
  238. }
  239. }
  240. }
  241. return ret;
  242. }
  243. real real::operator *(real const &x) const
  244. {
  245. real ret;
  246. if (m_signexp << 1 == 0 || x.m_signexp << 1 == 0)
  247. {
  248. ret = (m_signexp << 1 == 0) ? *this : x;
  249. ret.m_signexp ^= x.m_signexp & 0x80000000u;
  250. return ret;
  251. }
  252. ret.m_signexp = (m_signexp ^ x.m_signexp) & 0x80000000u;
  253. int e = (m_signexp & 0x7fffffffu) - (1 << 30) + 1
  254. + (x.m_signexp & 0x7fffffffu) - (1 << 30) + 1;
  255. /* Accumulate low order product; no need to store it, we just
  256. * want the carry value */
  257. uint64_t carry = 0, hicarry = 0, prev;
  258. for (int i = 0; i < BIGITS; i++)
  259. {
  260. for (int j = 0; j < i + 1; j++)
  261. {
  262. prev = carry;
  263. carry += (uint64_t)m_mantissa[BIGITS - 1 - j]
  264. * (uint64_t)x.m_mantissa[BIGITS - 1 + j - i];
  265. if (carry < prev)
  266. hicarry++;
  267. }
  268. carry >>= BIGIT_BITS;
  269. carry |= hicarry << BIGIT_BITS;
  270. hicarry >>= BIGIT_BITS;
  271. }
  272. for (int i = 0; i < BIGITS; i++)
  273. {
  274. for (int j = i + 1; j < BIGITS; j++)
  275. {
  276. prev = carry;
  277. carry += (uint64_t)m_mantissa[BIGITS - 1 - j]
  278. * (uint64_t)x.m_mantissa[j - 1 - i];
  279. if (carry < prev)
  280. hicarry++;
  281. }
  282. prev = carry;
  283. carry += m_mantissa[BIGITS - 1 - i];
  284. carry += x.m_mantissa[BIGITS - 1 - i];
  285. if (carry < prev)
  286. hicarry++;
  287. ret.m_mantissa[BIGITS - 1 - i] = carry & 0xffffffffu;
  288. carry >>= BIGIT_BITS;
  289. carry |= hicarry << BIGIT_BITS;
  290. hicarry >>= BIGIT_BITS;
  291. }
  292. /* Renormalise in case we overflowed the mantissa */
  293. if (carry)
  294. {
  295. carry--;
  296. for (int i = 0; i < BIGITS; i++)
  297. {
  298. uint32_t tmp = ret.m_mantissa[i];
  299. ret.m_mantissa[i] = (carry << (BIGIT_BITS - 1)) | (tmp >> 1);
  300. carry = tmp & 1u;
  301. }
  302. e++;
  303. }
  304. ret.m_signexp |= e + (1 << 30) - 1;
  305. return ret;
  306. }
  307. real real::operator /(real const &x) const
  308. {
  309. return *this * re(x);
  310. }
  311. real const &real::operator +=(real const &x)
  312. {
  313. real tmp = *this;
  314. return *this = tmp + x;
  315. }
  316. real const &real::operator -=(real const &x)
  317. {
  318. real tmp = *this;
  319. return *this = tmp - x;
  320. }
  321. real const &real::operator *=(real const &x)
  322. {
  323. real tmp = *this;
  324. return *this = tmp * x;
  325. }
  326. real const &real::operator /=(real const &x)
  327. {
  328. real tmp = *this;
  329. return *this = tmp / x;
  330. }
  331. real real::operator <<(int x) const
  332. {
  333. real tmp = *this;
  334. return tmp <<= x;
  335. }
  336. real real::operator >>(int x) const
  337. {
  338. real tmp = *this;
  339. return tmp >>= x;
  340. }
  341. real const &real::operator <<=(int x)
  342. {
  343. if (m_signexp << 1)
  344. m_signexp += x;
  345. return *this;
  346. }
  347. real const &real::operator >>=(int x)
  348. {
  349. if (m_signexp << 1)
  350. m_signexp -= x;
  351. return *this;
  352. }
  353. bool real::operator ==(real const &x) const
  354. {
  355. if ((m_signexp << 1) == 0 && (x.m_signexp << 1) == 0)
  356. return true;
  357. if (m_signexp != x.m_signexp)
  358. return false;
  359. return memcmp(m_mantissa, x.m_mantissa, BIGITS * sizeof(uint32_t)) == 0;
  360. }
  361. bool real::operator !=(real const &x) const
  362. {
  363. return !(*this == x);
  364. }
  365. bool real::operator <(real const &x) const
  366. {
  367. /* Ensure both numbers are positive */
  368. if (m_signexp >> 31)
  369. return (x.m_signexp >> 31) ? -*this > -x : true;
  370. if (x.m_signexp >> 31)
  371. return false;
  372. /* Compare all relevant bits */
  373. if (m_signexp != x.m_signexp)
  374. return m_signexp < x.m_signexp;
  375. for (int i = 0; i < BIGITS; i++)
  376. if (m_mantissa[i] != x.m_mantissa[i])
  377. return m_mantissa[i] < x.m_mantissa[i];
  378. return false;
  379. }
  380. bool real::operator <=(real const &x) const
  381. {
  382. return !(*this > x);
  383. }
  384. bool real::operator >(real const &x) const
  385. {
  386. /* Ensure both numbers are positive */
  387. if (m_signexp >> 31)
  388. return (x.m_signexp >> 31) ? -*this < -x : false;
  389. if (x.m_signexp >> 31)
  390. return true;
  391. /* Compare all relevant bits */
  392. if (m_signexp != x.m_signexp)
  393. return m_signexp > x.m_signexp;
  394. for (int i = 0; i < BIGITS; i++)
  395. if (m_mantissa[i] != x.m_mantissa[i])
  396. return m_mantissa[i] > x.m_mantissa[i];
  397. return false;
  398. }
  399. bool real::operator >=(real const &x) const
  400. {
  401. return !(*this < x);
  402. }
  403. bool real::operator !() const
  404. {
  405. return !(bool)*this;
  406. }
  407. real::operator bool() const
  408. {
  409. /* A real is "true" if it is non-zero (exponent is non-zero) AND
  410. * not NaN (exponent is not full bits OR higher order mantissa is zero) */
  411. uint32_t exponent = m_signexp << 1;
  412. return exponent && (~exponent || m_mantissa[0] == 0);
  413. }
  414. real re(real const &x)
  415. {
  416. if (!(x.m_signexp << 1))
  417. {
  418. real ret = x;
  419. ret.m_signexp = x.m_signexp | 0x7fffffffu;
  420. ret.m_mantissa[0] = 0;
  421. return ret;
  422. }
  423. /* Use the system's float inversion to approximate 1/x */
  424. union { float f; uint32_t x; } u = { 1.0f }, v = { 1.0f };
  425. v.x |= x.m_mantissa[0] >> 9;
  426. v.f = 1.0 / v.f;
  427. real ret;
  428. ret.m_mantissa[0] = v.x << 9;
  429. uint32_t sign = x.m_signexp & 0x80000000u;
  430. ret.m_signexp = sign;
  431. int exponent = (x.m_signexp & 0x7fffffffu) + 1;
  432. exponent = -exponent + (v.x >> 23) - (u.x >> 23);
  433. ret.m_signexp |= (exponent - 1) & 0x7fffffffu;
  434. /* FIXME: 1+log2(BIGITS) steps of Newton-Raphson seems to be enough for
  435. * convergence, but this hasn't been checked seriously. */
  436. for (int i = 1; i <= real::BIGITS; i *= 2)
  437. ret = ret * (real::R_2 - ret * x);
  438. return ret;
  439. }
  440. real sqrt(real const &x)
  441. {
  442. /* if zero, return x */
  443. if (!(x.m_signexp << 1))
  444. return x;
  445. /* if negative, return NaN */
  446. if (x.m_signexp >> 31)
  447. {
  448. real ret;
  449. ret.m_signexp = 0x7fffffffu;
  450. ret.m_mantissa[0] = 0xffffu;
  451. return ret;
  452. }
  453. /* Use the system's float inversion to approximate 1/sqrt(x). First
  454. * we construct a float in the [1..4[ range that has roughly the same
  455. * mantissa as our real. Its exponent is 0 or 1, depending on the
  456. * partity of x. The final exponent is 0, -1 or -2. We use the final
  457. * exponent and final mantissa to pre-fill the result. */
  458. union { float f; uint32_t x; } u = { 1.0f }, v = { 2.0f };
  459. v.x -= ((x.m_signexp & 1) << 23);
  460. v.x |= x.m_mantissa[0] >> 9;
  461. v.f = 1.0 / sqrtf(v.f);
  462. real ret;
  463. ret.m_mantissa[0] = v.x << 9;
  464. uint32_t sign = x.m_signexp & 0x80000000u;
  465. ret.m_signexp = sign;
  466. uint32_t exponent = (x.m_signexp & 0x7fffffffu);
  467. exponent = ((1 << 30) + (1 << 29) - 1) - (exponent + 1) / 2;
  468. exponent = exponent + (v.x >> 23) - (u.x >> 23);
  469. ret.m_signexp |= exponent & 0x7fffffffu;
  470. /* FIXME: 1+log2(BIGITS) steps of Newton-Raphson seems to be enough for
  471. * convergence, but this hasn't been checked seriously. */
  472. for (int i = 1; i <= real::BIGITS; i *= 2)
  473. {
  474. ret = ret * (real::R_3 - ret * ret * x);
  475. ret.m_signexp--;
  476. }
  477. return ret * x;
  478. }
  479. real cbrt(real const &x)
  480. {
  481. /* if zero, return x */
  482. if (!(x.m_signexp << 1))
  483. return x;
  484. /* Use the system's float inversion to approximate cbrt(x). First
  485. * we construct a float in the [1..8[ range that has roughly the same
  486. * mantissa as our real. Its exponent is 0, 1 or 2, depending on the
  487. * value of x. The final exponent is 0 or 1 (special case). We use
  488. * the final exponent and final mantissa to pre-fill the result. */
  489. union { float f; uint32_t x; } u = { 1.0f }, v = { 1.0f };
  490. v.x += ((x.m_signexp % 3) << 23);
  491. v.x |= x.m_mantissa[0] >> 9;
  492. v.f = powf(v.f, 0.33333333333333333f);
  493. real ret;
  494. ret.m_mantissa[0] = v.x << 9;
  495. uint32_t sign = x.m_signexp & 0x80000000u;
  496. ret.m_signexp = sign;
  497. int exponent = (x.m_signexp & 0x7fffffffu) - (1 << 30) + 1;
  498. exponent = exponent / 3 + (v.x >> 23) - (u.x >> 23);
  499. ret.m_signexp |= (exponent + (1 << 30) - 1) & 0x7fffffffu;
  500. /* FIXME: 1+log2(BIGITS) steps of Newton-Raphson seems to be enough for
  501. * convergence, but this hasn't been checked seriously. */
  502. for (int i = 1; i <= real::BIGITS; i *= 2)
  503. {
  504. static real third = re(real::R_3);
  505. ret = third * (x / (ret * ret) + (ret << 1));
  506. }
  507. return ret;
  508. }
  509. real pow(real const &x, real const &y)
  510. {
  511. if (!y)
  512. return real::R_1;
  513. if (!x)
  514. return real::R_0;
  515. if (x > real::R_0)
  516. return exp(y * log(x));
  517. else /* x < 0 */
  518. {
  519. if (y == round(y))
  520. return -exp(y * log(-x));
  521. /* FIXME: negative nth root */
  522. return real::R_0;
  523. }
  524. }
  525. real gamma(real const &x)
  526. {
  527. /* We use Spouge's formula. FIXME: precision is far from acceptable,
  528. * especially with large values. We need to compute this with higher
  529. * precision values in order to attain the desired accuracy. It might
  530. * also be useful to sort the ck values by decreasing absolute value
  531. * and do the addition in this order. */
  532. int a = ceilf(logf(2) / logf(2 * M_PI) * real::BIGITS * real::BIGIT_BITS);
  533. real ret = sqrt(real::R_PI << 1);
  534. real fact_k_1 = real::R_1;
  535. for (int k = 1; k < a; k++)
  536. {
  537. real a_k = (real)(a - k);
  538. real ck = pow(a_k, (real)((float)k - 0.5)) * exp(a_k)
  539. / (fact_k_1 * (x + (real)(k - 1)));
  540. ret += ck;
  541. fact_k_1 *= (real)-k;
  542. }
  543. ret *= pow(x + (real)(a - 1), x - (real::R_1 >> 1));
  544. ret *= exp(-x - (real)(a - 1));
  545. return ret;
  546. }
  547. real fabs(real const &x)
  548. {
  549. real ret = x;
  550. ret.m_signexp &= 0x7fffffffu;
  551. return ret;
  552. }
  553. static real fast_log(real const &x)
  554. {
  555. /* This fast log method is tuned to work on the [1..2] range and
  556. * no effort whatsoever was made to improve convergence outside this
  557. * domain of validity. It can converge pretty fast, provided we use
  558. * the following variable substitutions:
  559. * y = sqrt(x)
  560. * z = (y - 1) / (y + 1)
  561. *
  562. * And the following identities:
  563. * ln(x) = 2 ln(y)
  564. * = 2 ln((1 + z) / (1 - z))
  565. * = 4 z (1 + z^2 / 3 + z^4 / 5 + z^6 / 7...)
  566. *
  567. * Any additional sqrt() call would halve the convergence time, but
  568. * would also impact the final precision. For now we stick with one
  569. * sqrt() call. */
  570. real y = sqrt(x);
  571. real z = (y - real::R_1) / (y + real::R_1), z2 = z * z, zn = z2;
  572. real sum = real::R_1;
  573. for (int i = 3; ; i += 2)
  574. {
  575. real newsum = sum + zn / (real)i;
  576. if (newsum == sum)
  577. break;
  578. sum = newsum;
  579. zn *= z2;
  580. }
  581. return z * (sum << 2);
  582. }
  583. real log(real const &x)
  584. {
  585. /* Strategy for log(x): if x = 2^E*M then log(x) = E log(2) + log(M),
  586. * with the property that M is in [1..2[, so fast_log() applies here. */
  587. real tmp = x;
  588. if (x.m_signexp >> 31 || x.m_signexp == 0)
  589. {
  590. tmp.m_signexp = 0xffffffffu;
  591. tmp.m_mantissa[0] = 0xffffffffu;
  592. return tmp;
  593. }
  594. tmp.m_signexp = (1 << 30) - 1;
  595. return (real)(int)(x.m_signexp - (1 << 30) + 1) * real::R_LN2
  596. + fast_log(tmp);
  597. }
  598. real log2(real const &x)
  599. {
  600. /* Strategy for log2(x): see log(x). */
  601. real tmp = x;
  602. if (x.m_signexp >> 31 || x.m_signexp == 0)
  603. {
  604. tmp.m_signexp = 0xffffffffu;
  605. tmp.m_mantissa[0] = 0xffffffffu;
  606. return tmp;
  607. }
  608. tmp.m_signexp = (1 << 30) - 1;
  609. return (real)(int)(x.m_signexp - (1 << 30) + 1)
  610. + fast_log(tmp) * real::R_LOG2E;
  611. }
  612. real log10(real const &x)
  613. {
  614. return log(x) * real::R_LOG10E;
  615. }
  616. static real fast_exp_sub(real const &x, real const &y)
  617. {
  618. /* This fast exp method is tuned to work on the [-1..1] range and
  619. * no effort whatsoever was made to improve convergence outside this
  620. * domain of validity. The argument y is used for cases where we
  621. * don't want the leading 1 in the Taylor series. */
  622. real ret = real::R_1 - y, fact = real::R_1, xn = x;
  623. for (int i = 1; ; i++)
  624. {
  625. real newret = ret + xn;
  626. if (newret == ret)
  627. break;
  628. ret = newret;
  629. real mul = (i + 1);
  630. fact *= mul;
  631. ret *= mul;
  632. xn *= x;
  633. }
  634. ret /= fact;
  635. return ret;
  636. }
  637. real exp(real const &x)
  638. {
  639. /* Strategy for exp(x): the Taylor series does not converge very fast
  640. * with large positive or negative values.
  641. *
  642. * However, we know that the result is going to be in the form M*2^E,
  643. * where M is the mantissa and E the exponent. We first try to predict
  644. * a value for E, which is approximately log2(exp(x)) = x / log(2).
  645. *
  646. * Let E0 be an integer close to x / log(2). We need to find a value x0
  647. * such that exp(x) = 2^E0 * exp(x0). We get x0 = x - E0 log(2).
  648. *
  649. * Thus the final algorithm:
  650. * int E0 = x / log(2)
  651. * real x0 = x - E0 log(2)
  652. * real x1 = exp(x0)
  653. * return x1 * 2^E0
  654. */
  655. int e0 = x / real::R_LN2;
  656. real x0 = x - (real)e0 * real::R_LN2;
  657. real x1 = fast_exp_sub(x0, real::R_0);
  658. x1.m_signexp += e0;
  659. return x1;
  660. }
  661. real exp2(real const &x)
  662. {
  663. /* Strategy for exp2(x): see strategy in exp(). */
  664. int e0 = x;
  665. real x0 = x - (real)e0;
  666. real x1 = fast_exp_sub(x0 * real::R_LN2, real::R_0);
  667. x1.m_signexp += e0;
  668. return x1;
  669. }
  670. real sinh(real const &x)
  671. {
  672. /* We cannot always use (exp(x)-exp(-x))/2 because we'll lose
  673. * accuracy near zero. We only use this identity for |x|>0.5. If
  674. * |x|<=0.5, we compute exp(x)-1 and exp(-x)-1 instead. */
  675. bool near_zero = (fabs(x) < real::R_1 >> 1);
  676. real x1 = near_zero ? fast_exp_sub(x, real::R_1) : exp(x);
  677. real x2 = near_zero ? fast_exp_sub(-x, real::R_1) : exp(-x);
  678. return (x1 - x2) >> 1;
  679. }
  680. real tanh(real const &x)
  681. {
  682. /* See sinh() for the strategy here */
  683. bool near_zero = (fabs(x) < real::R_1 >> 1);
  684. real x1 = near_zero ? fast_exp_sub(x, real::R_1) : exp(x);
  685. real x2 = near_zero ? fast_exp_sub(-x, real::R_1) : exp(-x);
  686. real x3 = near_zero ? x1 + x2 + real::R_2 : x1 + x2;
  687. return (x1 - x2) / x3;
  688. }
  689. real cosh(real const &x)
  690. {
  691. /* No need to worry about accuracy here; maybe the last bit is slightly
  692. * off, but that's about it. */
  693. return (exp(x) + exp(-x)) >> 1;
  694. }
  695. real frexp(real const &x, int *exp)
  696. {
  697. if (!x)
  698. {
  699. *exp = 0;
  700. return x;
  701. }
  702. real ret = x;
  703. int exponent = (ret.m_signexp & 0x7fffffffu) - (1 << 30) + 1;
  704. *exp = exponent + 1;
  705. ret.m_signexp -= exponent + 1;
  706. return ret;
  707. }
  708. real ldexp(real const &x, int exp)
  709. {
  710. real ret = x;
  711. if (ret)
  712. ret.m_signexp += exp;
  713. return ret;
  714. }
  715. real modf(real const &x, real *iptr)
  716. {
  717. real absx = fabs(x);
  718. real tmp = floor(absx);
  719. *iptr = copysign(tmp, x);
  720. return copysign(absx - tmp, x);
  721. }
  722. real copysign(real const &x, real const &y)
  723. {
  724. real ret = x;
  725. ret.m_signexp &= 0x7fffffffu;
  726. ret.m_signexp |= y.m_signexp & 0x80000000u;
  727. return ret;
  728. }
  729. real floor(real const &x)
  730. {
  731. /* Strategy for floor(x):
  732. * - if negative, return -ceil(-x)
  733. * - if zero or negative zero, return x
  734. * - if less than one, return zero
  735. * - otherwise, if e is the exponent, clear all bits except the
  736. * first e. */
  737. if (x < -real::R_0)
  738. return -ceil(-x);
  739. if (!x)
  740. return x;
  741. if (x < real::R_1)
  742. return real::R_0;
  743. real ret = x;
  744. int exponent = x.m_signexp - (1 << 30) + 1;
  745. for (int i = 0; i < real::BIGITS; i++)
  746. {
  747. if (exponent <= 0)
  748. ret.m_mantissa[i] = 0;
  749. else if (exponent < real::BIGIT_BITS)
  750. ret.m_mantissa[i] &= ~((1 << (real::BIGIT_BITS - exponent)) - 1);
  751. exponent -= real::BIGIT_BITS;
  752. }
  753. return ret;
  754. }
  755. real ceil(real const &x)
  756. {
  757. /* Strategy for ceil(x):
  758. * - if negative, return -floor(-x)
  759. * - if x == floor(x), return x
  760. * - otherwise, return floor(x) + 1 */
  761. if (x < -real::R_0)
  762. return -floor(-x);
  763. real ret = floor(x);
  764. if (x == ret)
  765. return ret;
  766. else
  767. return ret + real::R_1;
  768. }
  769. real round(real const &x)
  770. {
  771. if (x < real::R_0)
  772. return -round(-x);
  773. return floor(x + (real::R_1 >> 1));
  774. }
  775. real fmod(real const &x, real const &y)
  776. {
  777. if (!y)
  778. return real::R_0; /* FIXME: return NaN */
  779. if (!x)
  780. return x;
  781. real tmp = round(x / y);
  782. return x - tmp * y;
  783. }
  784. real sin(real const &x)
  785. {
  786. bool switch_sign = x.m_signexp & 0x80000000u;
  787. real absx = fmod(fabs(x), real::R_PI << 1);
  788. if (absx > real::R_PI)
  789. {
  790. absx -= real::R_PI;
  791. switch_sign = !switch_sign;
  792. }
  793. if (absx > real::R_PI_2)
  794. absx = real::R_PI - absx;
  795. real ret = real::R_0, fact = real::R_1, xn = absx, mx2 = -absx * absx;
  796. for (int i = 1; ; i += 2)
  797. {
  798. real newret = ret + xn;
  799. if (newret == ret)
  800. break;
  801. ret = newret;
  802. real mul = (i + 1) * (i + 2);
  803. fact *= mul;
  804. ret *= mul;
  805. xn *= mx2;
  806. }
  807. ret /= fact;
  808. /* Propagate sign */
  809. if (switch_sign)
  810. ret.m_signexp ^= 0x80000000u;
  811. return ret;
  812. }
  813. real cos(real const &x)
  814. {
  815. return sin(real::R_PI_2 - x);
  816. }
  817. real tan(real const &x)
  818. {
  819. /* Constrain input to [-π,π] */
  820. real y = fmod(x, real::R_PI);
  821. /* Constrain input to [-π/2,π/2] */
  822. if (y < -real::R_PI_2)
  823. y += real::R_PI;
  824. else if (y > real::R_PI_2)
  825. y -= real::R_PI;
  826. /* In [-π/4,π/4] return sin/cos */
  827. if (fabs(y) <= real::R_PI_4)
  828. return sin(y) / cos(y);
  829. /* Otherwise, return cos/sin */
  830. if (y > real::R_0)
  831. y = real::R_PI_2 - y;
  832. else
  833. y = -real::R_PI_2 - y;
  834. return cos(y) / sin(y);
  835. }
  836. static real asinacos(real const &x, bool is_asin, bool is_negative)
  837. {
  838. /* Strategy for asin(): in [-0.5..0.5], use a Taylor series around
  839. * zero. In [0.5..1], use asin(x) = π/2 - 2*asin(sqrt((1-x)/2)), and
  840. * in [-1..-0.5] just revert the sign.
  841. * Strategy for acos(): use acos(x) = π/2 - asin(x) and try not to
  842. * lose the precision around x=1. */
  843. real absx = fabs(x);
  844. bool around_zero = (absx < (real::R_1 >> 1));
  845. if (!around_zero)
  846. absx = sqrt((real::R_1 - absx) >> 1);
  847. real ret = absx, xn = absx, x2 = absx * absx, fact1 = 2, fact2 = 1;
  848. for (int i = 1; ; i++)
  849. {
  850. xn *= x2;
  851. real mul = (real)(2 * i + 1);
  852. real newret = ret + ((fact1 * xn / (mul * fact2)) >> (i * 2));
  853. if (newret == ret)
  854. break;
  855. ret = newret;
  856. fact1 *= (real)((2 * i + 1) * (2 * i + 2));
  857. fact2 *= (real)((i + 1) * (i + 1));
  858. }
  859. if (is_negative)
  860. ret = -ret;
  861. if (around_zero)
  862. ret = is_asin ? ret : real::R_PI_2 - ret;
  863. else
  864. {
  865. real adjust = is_negative ? real::R_PI : real::R_0;
  866. if (is_asin)
  867. ret = real::R_PI_2 - adjust - (ret << 1);
  868. else
  869. ret = adjust + (ret << 1);
  870. }
  871. return ret;
  872. }
  873. real asin(real const &x)
  874. {
  875. return asinacos(x, true, x.m_signexp >> 31);
  876. }
  877. real acos(real const &x)
  878. {
  879. return asinacos(x, false, x.m_signexp >> 31);
  880. }
  881. real atan(real const &x)
  882. {
  883. /* Computing atan(x): we choose a different Taylor series depending on
  884. * the value of x to help with convergence.
  885. *
  886. * If |x| < 0.5 we evaluate atan(y) near 0:
  887. * atan(y) = y - y^3/3 + y^5/5 - y^7/7 + y^9/9 ...
  888. *
  889. * If 0.5 <= |x| < 1.5 we evaluate atan(1+y) near 0:
  890. * atan(1+y) = π/4 + y/(1*2^1) - y^2/(2*2^1) + y^3/(3*2^2)
  891. * - y^5/(5*2^3) + y^6/(6*2^3) - y^7/(7*2^4)
  892. * + y^9/(9*2^5) - y^10/(10*2^5) + y^11/(11*2^6) ...
  893. *
  894. * If 1.5 <= |x| < 2 we evaluate atan(sqrt(3)+2y) near 0:
  895. * atan(sqrt(3)+2y) = π/3 + 1/2 y - sqrt(3)/2 y^2/2
  896. * + y^3/3 - sqrt(3)/2 y^4/4 + 1/2 y^5/5
  897. * - 1/2 y^7/7 + sqrt(3)/2 y^8/8
  898. * - y^9/9 + sqrt(3)/2 y^10/10 - 1/2 y^11/11
  899. * + 1/2 y^13/13 - sqrt(3)/2 y^14/14
  900. * + y^15/15 - sqrt(3)/2 y^16/16 + 1/2 y^17/17 ...
  901. *
  902. * If |x| >= 2 we evaluate atan(y) near +∞:
  903. * atan(y) = π/2 - y^-1 + y^-3/3 - y^-5/5 + y^-7/7 - y^-9/9 ...
  904. */
  905. real absx = fabs(x);
  906. if (absx < (real::R_1 >> 1))
  907. {
  908. real ret = x, xn = x, mx2 = -x * x;
  909. for (int i = 3; ; i += 2)
  910. {
  911. xn *= mx2;
  912. real newret = ret + xn / (real)i;
  913. if (newret == ret)
  914. break;
  915. ret = newret;
  916. }
  917. return ret;
  918. }
  919. real ret = 0;
  920. if (absx < (real::R_3 >> 1))
  921. {
  922. real y = real::R_1 - absx;
  923. real yn = y, my2 = -y * y;
  924. for (int i = 0; ; i += 2)
  925. {
  926. real newret = ret + ((yn / (real)(2 * i + 1)) >> (i + 1));
  927. yn *= y;
  928. newret += (yn / (real)(2 * i + 2)) >> (i + 1);
  929. yn *= y;
  930. newret += (yn / (real)(2 * i + 3)) >> (i + 2);
  931. if (newret == ret)
  932. break;
  933. ret = newret;
  934. yn *= my2;
  935. }
  936. ret = real::R_PI_4 - ret;
  937. }
  938. else if (absx < real::R_2)
  939. {
  940. real y = (absx - real::R_SQRT3) >> 1;
  941. real yn = y, my2 = -y * y;
  942. for (int i = 1; ; i += 6)
  943. {
  944. real newret = ret + ((yn / (real)i) >> 1);
  945. yn *= y;
  946. newret -= (real::R_SQRT3 >> 1) * yn / (real)(i + 1);
  947. yn *= y;
  948. newret += yn / (real)(i + 2);
  949. yn *= y;
  950. newret -= (real::R_SQRT3 >> 1) * yn / (real)(i + 3);
  951. yn *= y;
  952. newret += (yn / (real)(i + 4)) >> 1;
  953. if (newret == ret)
  954. break;
  955. ret = newret;
  956. yn *= my2;
  957. }
  958. ret = real::R_PI_3 + ret;
  959. }
  960. else
  961. {
  962. real y = re(absx);
  963. real yn = y, my2 = -y * y;
  964. ret = y;
  965. for (int i = 3; ; i += 2)
  966. {
  967. yn *= my2;
  968. real newret = ret + yn / (real)i;
  969. if (newret == ret)
  970. break;
  971. ret = newret;
  972. }
  973. ret = real::R_PI_2 - ret;
  974. }
  975. /* Propagate sign */
  976. ret.m_signexp |= (x.m_signexp & 0x80000000u);
  977. return ret;
  978. }
  979. real atan2(real const &y, real const &x)
  980. {
  981. if (!y)
  982. {
  983. if ((x.m_signexp >> 31) == 0)
  984. return y;
  985. if (y.m_signexp >> 31)
  986. return -real::R_PI;
  987. return real::R_PI;
  988. }
  989. if (!x)
  990. {
  991. if (y.m_signexp >> 31)
  992. return -real::R_PI;
  993. return real::R_PI;
  994. }
  995. /* FIXME: handle the Inf and NaN cases */
  996. real z = y / x;
  997. real ret = atan(z);
  998. if (x < real::R_0)
  999. ret += (y > real::R_0) ? real::R_PI : -real::R_PI;
  1000. return ret;
  1001. }
  1002. void real::hexprint() const
  1003. {
  1004. printf("%08x", m_signexp);
  1005. for (int i = 0; i < BIGITS; i++)
  1006. printf(" %08x", m_mantissa[i]);
  1007. printf("\n");
  1008. }
  1009. void real::print(int ndigits) const
  1010. {
  1011. real x = *this;
  1012. if (x.m_signexp >> 31)
  1013. {
  1014. printf("-");
  1015. x = -x;
  1016. }
  1017. /* Normalise x so that mantissa is in [1..9.999] */
  1018. int exponent = 0;
  1019. if (x.m_signexp)
  1020. {
  1021. for (real div = R_1, newdiv; true; div = newdiv)
  1022. {
  1023. newdiv = div * R_10;
  1024. if (x < newdiv)
  1025. {
  1026. x /= div;
  1027. break;
  1028. }
  1029. exponent++;
  1030. }
  1031. for (real mul = 1, newx; true; mul *= R_10)
  1032. {
  1033. newx = x * mul;
  1034. if (newx >= R_1)
  1035. {
  1036. x = newx;
  1037. break;
  1038. }
  1039. exponent--;
  1040. }
  1041. }
  1042. /* Print digits */
  1043. for (int i = 0; i < ndigits; i++)
  1044. {
  1045. int digit = (int)x;
  1046. printf("%i", digit);
  1047. if (i == 0)
  1048. printf(".");
  1049. x -= real(digit);
  1050. x *= R_10;
  1051. }
  1052. /* Print exponent information */
  1053. if (exponent < 0)
  1054. printf("e-%i", -exponent);
  1055. else if (exponent > 0)
  1056. printf("e+%i", exponent);
  1057. printf("\n");
  1058. }
  1059. static real fast_pi()
  1060. {
  1061. /* Approximate Pi using Machin's formula: 16*atan(1/5)-4*atan(1/239) */
  1062. real ret = 0.0, x0 = 5.0, x1 = 239.0;
  1063. real const m0 = -x0 * x0, m1 = -x1 * x1, r16 = 16.0, r4 = 4.0;
  1064. for (int i = 1; ; i += 2)
  1065. {
  1066. real newret = ret + r16 / (x0 * (real)i) - r4 / (x1 * (real)i);
  1067. if (newret == ret)
  1068. break;
  1069. ret = newret;
  1070. x0 *= m0;
  1071. x1 *= m1;
  1072. }
  1073. return ret;
  1074. }
  1075. real const real::R_0 = (real)0.0;
  1076. real const real::R_1 = (real)1.0;
  1077. real const real::R_2 = (real)2.0;
  1078. real const real::R_3 = (real)3.0;
  1079. real const real::R_10 = (real)10.0;
  1080. real const real::R_LN2 = fast_log(R_2);
  1081. real const real::R_LN10 = log(R_10);
  1082. real const real::R_LOG2E = re(R_LN2);
  1083. real const real::R_LOG10E = re(R_LN10);
  1084. real const real::R_E = exp(R_1);
  1085. real const real::R_PI = fast_pi();
  1086. real const real::R_PI_2 = R_PI >> 1;
  1087. real const real::R_PI_3 = R_PI / R_3;
  1088. real const real::R_PI_4 = R_PI >> 2;
  1089. real const real::R_1_PI = re(R_PI);
  1090. real const real::R_2_PI = R_1_PI << 1;
  1091. real const real::R_2_SQRTPI = re(sqrt(R_PI)) << 1;
  1092. real const real::R_SQRT2 = sqrt(R_2);
  1093. real const real::R_SQRT3 = sqrt(R_3);
  1094. real const real::R_SQRT1_2 = R_SQRT2 >> 1;
  1095. } /* namespace lol */