1359 řádky
33 KiB

  1. //
  2. // Lol Engine
  3. //
  4. // Copyright: (c) 2010-2011 Sam Hocevar <sam@hocevar.net>
  5. // This program is free software; you can redistribute it and/or
  6. // modify it under the terms of the Do What The Fuck You Want To
  7. // Public License, Version 2, as published by Sam Hocevar. See
  8. // http://sam.zoy.org/projects/COPYING.WTFPL for more details.
  9. //
  10. #if defined HAVE_CONFIG_H
  11. # include "config.h"
  12. #endif
  13. #include <new>
  14. #include <cstring>
  15. #include <cstdio>
  16. #include <cstdlib>
  17. #include <cmath>
  18. #include "core.h"
  19. using namespace std;
  20. namespace lol
  21. {
  22. real::real()
  23. {
  24. m_mantissa = new uint32_t[BIGITS];
  25. m_signexp = 0;
  26. }
  27. real::real(real const &x)
  28. {
  29. m_mantissa = new uint32_t[BIGITS];
  30. memcpy(m_mantissa, x.m_mantissa, BIGITS * sizeof(uint32_t));
  31. m_signexp = x.m_signexp;
  32. }
  33. real const &real::operator =(real const &x)
  34. {
  35. if (&x != this)
  36. {
  37. memcpy(m_mantissa, x.m_mantissa, BIGITS * sizeof(uint32_t));
  38. m_signexp = x.m_signexp;
  39. }
  40. return *this;
  41. }
  42. real::~real()
  43. {
  44. delete[] m_mantissa;
  45. }
  46. real::real(float f) { new(this) real((double)f); }
  47. real::real(int i) { new(this) real((double)i); }
  48. real::real(unsigned int i) { new(this) real((double)i); }
  49. real::real(double d)
  50. {
  51. new(this) real();
  52. union { double d; uint64_t x; } u = { d };
  53. uint32_t sign = (u.x >> 63) << 31;
  54. uint32_t exponent = (u.x << 1) >> 53;
  55. switch (exponent)
  56. {
  57. case 0x00:
  58. m_signexp = sign;
  59. break;
  60. case 0x7ff:
  61. m_signexp = sign | 0x7fffffffu;
  62. break;
  63. default:
  64. m_signexp = sign | (exponent + (1 << 30) - (1 << 10));
  65. break;
  66. }
  67. m_mantissa[0] = u.x >> 20;
  68. m_mantissa[1] = u.x << 12;
  69. memset(m_mantissa + 2, 0, (BIGITS - 2) * sizeof(m_mantissa[0]));
  70. }
  71. real::operator float() const { return (float)(double)(*this); }
  72. real::operator int() const { return (int)(double)(*this); }
  73. real::operator unsigned int() const { return (unsigned int)(double)(*this); }
  74. real::operator double() const
  75. {
  76. union { double d; uint64_t x; } u;
  77. /* Get sign */
  78. u.x = m_signexp >> 31;
  79. u.x <<= 11;
  80. /* Compute new exponent */
  81. uint32_t exponent = (m_signexp << 1) >> 1;
  82. int e = (int)exponent - (1 << 30) + (1 << 10);
  83. if (e < 0)
  84. u.x <<= 52;
  85. else if (e >= 0x7ff)
  86. {
  87. u.x |= 0x7ff;
  88. u.x <<= 52;
  89. }
  90. else
  91. {
  92. u.x |= e;
  93. /* Store mantissa if necessary */
  94. u.x <<= 32;
  95. u.x |= m_mantissa[0];
  96. u.x <<= 20;
  97. u.x |= m_mantissa[1] >> 12;
  98. /* Rounding */
  99. u.x += (m_mantissa[1] >> 11) & 1;
  100. }
  101. return u.d;
  102. }
  103. /*
  104. * Create a real number from an ASCII representation
  105. */
  106. real::real(char const *str)
  107. {
  108. real ret = 0;
  109. int exponent = 0;
  110. bool comma = false, nonzero = false, negative = false, finished = false;
  111. for (char const *p = str; *p && !finished; p++)
  112. {
  113. switch (*p)
  114. {
  115. case '-':
  116. case '+':
  117. if (p != str)
  118. break;
  119. negative = (*p == '-');
  120. break;
  121. case '.':
  122. if (comma)
  123. finished = true;
  124. comma = true;
  125. break;
  126. case '0': case '1': case '2': case '3': case '4':
  127. case '5': case '6': case '7': case '8': case '9':
  128. if (nonzero)
  129. {
  130. real x = ret + ret;
  131. x = x + x + ret;
  132. ret = x + x;
  133. }
  134. if (*p != '0')
  135. {
  136. ret += (int)(*p - '0');
  137. nonzero = true;
  138. }
  139. if (comma)
  140. exponent--;
  141. break;
  142. case 'e':
  143. case 'E':
  144. exponent += atoi(p + 1);
  145. finished = true;
  146. break;
  147. default:
  148. finished = true;
  149. break;
  150. }
  151. }
  152. if (exponent)
  153. ret *= pow(R_10, (real)exponent);
  154. if (negative)
  155. ret = -ret;
  156. new(this) real(ret);
  157. }
  158. real real::operator +() const
  159. {
  160. return *this;
  161. }
  162. real real::operator -() const
  163. {
  164. real ret = *this;
  165. ret.m_signexp ^= 0x80000000u;
  166. return ret;
  167. }
  168. real real::operator +(real const &x) const
  169. {
  170. if (x.m_signexp << 1 == 0)
  171. return *this;
  172. /* Ensure both arguments are positive. Otherwise, switch signs,
  173. * or replace + with -. */
  174. if (m_signexp >> 31)
  175. return -(-*this + -x);
  176. if (x.m_signexp >> 31)
  177. return *this - (-x);
  178. /* Ensure *this has the larger exponent (no need for the mantissa to
  179. * be larger, as in subtraction). Otherwise, switch. */
  180. if ((m_signexp << 1) < (x.m_signexp << 1))
  181. return x + *this;
  182. real ret;
  183. int e1 = m_signexp - (1 << 30) + 1;
  184. int e2 = x.m_signexp - (1 << 30) + 1;
  185. int bigoff = (e1 - e2) / BIGIT_BITS;
  186. int off = e1 - e2 - bigoff * BIGIT_BITS;
  187. if (bigoff > BIGITS)
  188. return *this;
  189. ret.m_signexp = m_signexp;
  190. uint64_t carry = 0;
  191. for (int i = BIGITS; i--; )
  192. {
  193. carry += m_mantissa[i];
  194. if (i - bigoff >= 0)
  195. carry += x.m_mantissa[i - bigoff] >> off;
  196. if (off && i - bigoff > 0)
  197. carry += (x.m_mantissa[i - bigoff - 1] << (BIGIT_BITS - off)) & 0xffffffffu;
  198. else if (i - bigoff == 0)
  199. carry += (uint64_t)1 << (BIGIT_BITS - off);
  200. ret.m_mantissa[i] = carry;
  201. carry >>= BIGIT_BITS;
  202. }
  203. /* Renormalise in case we overflowed the mantissa */
  204. if (carry)
  205. {
  206. carry--;
  207. for (int i = 0; i < BIGITS; i++)
  208. {
  209. uint32_t tmp = ret.m_mantissa[i];
  210. ret.m_mantissa[i] = (carry << (BIGIT_BITS - 1)) | (tmp >> 1);
  211. carry = tmp & 1u;
  212. }
  213. ret.m_signexp++;
  214. }
  215. return ret;
  216. }
  217. real real::operator -(real const &x) const
  218. {
  219. if (x.m_signexp << 1 == 0)
  220. return *this;
  221. /* Ensure both arguments are positive. Otherwise, switch signs,
  222. * or replace - with +. */
  223. if (m_signexp >> 31)
  224. return -(-*this + x);
  225. if (x.m_signexp >> 31)
  226. return (*this) + (-x);
  227. /* Ensure *this is larger than x */
  228. if (*this < x)
  229. return -(x - *this);
  230. real ret;
  231. int e1 = m_signexp - (1 << 30) + 1;
  232. int e2 = x.m_signexp - (1 << 30) + 1;
  233. int bigoff = (e1 - e2) / BIGIT_BITS;
  234. int off = e1 - e2 - bigoff * BIGIT_BITS;
  235. if (bigoff > BIGITS)
  236. return *this;
  237. ret.m_signexp = m_signexp;
  238. int64_t carry = 0;
  239. for (int i = 0; i < bigoff; i++)
  240. {
  241. carry -= x.m_mantissa[BIGITS - 1 - i];
  242. /* Emulates a signed shift */
  243. carry >>= BIGIT_BITS;
  244. carry |= carry << BIGIT_BITS;
  245. }
  246. if (bigoff < BIGITS)
  247. carry -= x.m_mantissa[BIGITS - 1 - bigoff] & (((int64_t)1 << off) - 1);
  248. carry /= (int64_t)1 << off;
  249. for (int i = BIGITS; i--; )
  250. {
  251. carry += m_mantissa[i];
  252. if (i - bigoff >= 0)
  253. carry -= x.m_mantissa[i - bigoff] >> off;
  254. if (off && i - bigoff > 0)
  255. carry -= (x.m_mantissa[i - bigoff - 1] << (BIGIT_BITS - off)) & 0xffffffffu;
  256. else if (i - bigoff == 0)
  257. carry -= (uint64_t)1 << (BIGIT_BITS - off);
  258. ret.m_mantissa[i] = carry;
  259. carry >>= BIGIT_BITS;
  260. carry |= carry << BIGIT_BITS;
  261. }
  262. carry += 1;
  263. /* Renormalise if we underflowed the mantissa */
  264. if (carry == 0)
  265. {
  266. /* How much do we need to shift the mantissa? FIXME: this could
  267. * be computed above */
  268. off = 0;
  269. for (int i = 0; i < BIGITS; i++)
  270. {
  271. if (!ret.m_mantissa[i])
  272. {
  273. off += BIGIT_BITS;
  274. continue;
  275. }
  276. for (uint32_t tmp = ret.m_mantissa[i]; tmp < 0x80000000u; tmp <<= 1)
  277. off++;
  278. break;
  279. }
  280. if (off == BIGITS * BIGIT_BITS)
  281. ret.m_signexp &= 0x80000000u;
  282. else
  283. {
  284. off++; /* Shift one more to get rid of the leading one */
  285. ret.m_signexp -= off;
  286. bigoff = off / BIGIT_BITS;
  287. off -= bigoff * BIGIT_BITS;
  288. for (int i = 0; i < BIGITS; i++)
  289. {
  290. uint32_t tmp = 0;
  291. if (i + bigoff < BIGITS)
  292. tmp |= ret.m_mantissa[i + bigoff] << off;
  293. if (off && i + bigoff + 1 < BIGITS)
  294. tmp |= ret.m_mantissa[i + bigoff + 1] >> (BIGIT_BITS - off);
  295. ret.m_mantissa[i] = tmp;
  296. }
  297. }
  298. }
  299. return ret;
  300. }
  301. real real::operator *(real const &x) const
  302. {
  303. real ret;
  304. if (m_signexp << 1 == 0 || x.m_signexp << 1 == 0)
  305. {
  306. ret = (m_signexp << 1 == 0) ? *this : x;
  307. ret.m_signexp ^= x.m_signexp & 0x80000000u;
  308. return ret;
  309. }
  310. ret.m_signexp = (m_signexp ^ x.m_signexp) & 0x80000000u;
  311. int e = (m_signexp & 0x7fffffffu) - (1 << 30) + 1
  312. + (x.m_signexp & 0x7fffffffu) - (1 << 30) + 1;
  313. /* Accumulate low order product; no need to store it, we just
  314. * want the carry value */
  315. uint64_t carry = 0, hicarry = 0, prev;
  316. for (int i = 0; i < BIGITS; i++)
  317. {
  318. for (int j = 0; j < i + 1; j++)
  319. {
  320. prev = carry;
  321. carry += (uint64_t)m_mantissa[BIGITS - 1 - j]
  322. * (uint64_t)x.m_mantissa[BIGITS - 1 + j - i];
  323. if (carry < prev)
  324. hicarry++;
  325. }
  326. carry >>= BIGIT_BITS;
  327. carry |= hicarry << BIGIT_BITS;
  328. hicarry >>= BIGIT_BITS;
  329. }
  330. for (int i = 0; i < BIGITS; i++)
  331. {
  332. for (int j = i + 1; j < BIGITS; j++)
  333. {
  334. prev = carry;
  335. carry += (uint64_t)m_mantissa[BIGITS - 1 - j]
  336. * (uint64_t)x.m_mantissa[j - 1 - i];
  337. if (carry < prev)
  338. hicarry++;
  339. }
  340. prev = carry;
  341. carry += m_mantissa[BIGITS - 1 - i];
  342. carry += x.m_mantissa[BIGITS - 1 - i];
  343. if (carry < prev)
  344. hicarry++;
  345. ret.m_mantissa[BIGITS - 1 - i] = carry & 0xffffffffu;
  346. carry >>= BIGIT_BITS;
  347. carry |= hicarry << BIGIT_BITS;
  348. hicarry >>= BIGIT_BITS;
  349. }
  350. /* Renormalise in case we overflowed the mantissa */
  351. if (carry)
  352. {
  353. carry--;
  354. for (int i = 0; i < BIGITS; i++)
  355. {
  356. uint32_t tmp = ret.m_mantissa[i];
  357. ret.m_mantissa[i] = (carry << (BIGIT_BITS - 1)) | (tmp >> 1);
  358. carry = tmp & 1u;
  359. }
  360. e++;
  361. }
  362. ret.m_signexp |= e + (1 << 30) - 1;
  363. return ret;
  364. }
  365. real real::operator /(real const &x) const
  366. {
  367. return *this * re(x);
  368. }
  369. real const &real::operator +=(real const &x)
  370. {
  371. real tmp = *this;
  372. return *this = tmp + x;
  373. }
  374. real const &real::operator -=(real const &x)
  375. {
  376. real tmp = *this;
  377. return *this = tmp - x;
  378. }
  379. real const &real::operator *=(real const &x)
  380. {
  381. real tmp = *this;
  382. return *this = tmp * x;
  383. }
  384. real const &real::operator /=(real const &x)
  385. {
  386. real tmp = *this;
  387. return *this = tmp / x;
  388. }
  389. real real::operator <<(int x) const
  390. {
  391. real tmp = *this;
  392. return tmp <<= x;
  393. }
  394. real real::operator >>(int x) const
  395. {
  396. real tmp = *this;
  397. return tmp >>= x;
  398. }
  399. real const &real::operator <<=(int x)
  400. {
  401. if (m_signexp << 1)
  402. m_signexp += x;
  403. return *this;
  404. }
  405. real const &real::operator >>=(int x)
  406. {
  407. if (m_signexp << 1)
  408. m_signexp -= x;
  409. return *this;
  410. }
  411. bool real::operator ==(real const &x) const
  412. {
  413. if ((m_signexp << 1) == 0 && (x.m_signexp << 1) == 0)
  414. return true;
  415. if (m_signexp != x.m_signexp)
  416. return false;
  417. return memcmp(m_mantissa, x.m_mantissa, BIGITS * sizeof(uint32_t)) == 0;
  418. }
  419. bool real::operator !=(real const &x) const
  420. {
  421. return !(*this == x);
  422. }
  423. bool real::operator <(real const &x) const
  424. {
  425. /* Ensure both numbers are positive */
  426. if (m_signexp >> 31)
  427. return (x.m_signexp >> 31) ? -*this > -x : true;
  428. if (x.m_signexp >> 31)
  429. return false;
  430. /* Compare all relevant bits */
  431. if (m_signexp != x.m_signexp)
  432. return m_signexp < x.m_signexp;
  433. for (int i = 0; i < BIGITS; i++)
  434. if (m_mantissa[i] != x.m_mantissa[i])
  435. return m_mantissa[i] < x.m_mantissa[i];
  436. return false;
  437. }
  438. bool real::operator <=(real const &x) const
  439. {
  440. return !(*this > x);
  441. }
  442. bool real::operator >(real const &x) const
  443. {
  444. /* Ensure both numbers are positive */
  445. if (m_signexp >> 31)
  446. return (x.m_signexp >> 31) ? -*this < -x : false;
  447. if (x.m_signexp >> 31)
  448. return true;
  449. /* Compare all relevant bits */
  450. if (m_signexp != x.m_signexp)
  451. return m_signexp > x.m_signexp;
  452. for (int i = 0; i < BIGITS; i++)
  453. if (m_mantissa[i] != x.m_mantissa[i])
  454. return m_mantissa[i] > x.m_mantissa[i];
  455. return false;
  456. }
  457. bool real::operator >=(real const &x) const
  458. {
  459. return !(*this < x);
  460. }
  461. bool real::operator !() const
  462. {
  463. return !(bool)*this;
  464. }
  465. real::operator bool() const
  466. {
  467. /* A real is "true" if it is non-zero (exponent is non-zero) AND
  468. * not NaN (exponent is not full bits OR higher order mantissa is zero) */
  469. uint32_t exponent = m_signexp << 1;
  470. return exponent && (~exponent || m_mantissa[0] == 0);
  471. }
  472. real re(real const &x)
  473. {
  474. if (!(x.m_signexp << 1))
  475. {
  476. real ret = x;
  477. ret.m_signexp = x.m_signexp | 0x7fffffffu;
  478. ret.m_mantissa[0] = 0;
  479. return ret;
  480. }
  481. /* Use the system's float inversion to approximate 1/x */
  482. union { float f; uint32_t x; } u = { 1.0f }, v = { 1.0f };
  483. v.x |= x.m_mantissa[0] >> 9;
  484. v.f = 1.0 / v.f;
  485. real ret;
  486. ret.m_mantissa[0] = v.x << 9;
  487. uint32_t sign = x.m_signexp & 0x80000000u;
  488. ret.m_signexp = sign;
  489. int exponent = (x.m_signexp & 0x7fffffffu) + 1;
  490. exponent = -exponent + (v.x >> 23) - (u.x >> 23);
  491. ret.m_signexp |= (exponent - 1) & 0x7fffffffu;
  492. /* FIXME: 1+log2(BIGITS) steps of Newton-Raphson seems to be enough for
  493. * convergence, but this hasn't been checked seriously. */
  494. for (int i = 1; i <= real::BIGITS; i *= 2)
  495. ret = ret * (real::R_2 - ret * x);
  496. return ret;
  497. }
  498. real sqrt(real const &x)
  499. {
  500. /* if zero, return x */
  501. if (!(x.m_signexp << 1))
  502. return x;
  503. /* if negative, return NaN */
  504. if (x.m_signexp >> 31)
  505. {
  506. real ret;
  507. ret.m_signexp = 0x7fffffffu;
  508. ret.m_mantissa[0] = 0xffffu;
  509. return ret;
  510. }
  511. /* Use the system's float inversion to approximate 1/sqrt(x). First
  512. * we construct a float in the [1..4[ range that has roughly the same
  513. * mantissa as our real. Its exponent is 0 or 1, depending on the
  514. * partity of x. The final exponent is 0, -1 or -2. We use the final
  515. * exponent and final mantissa to pre-fill the result. */
  516. union { float f; uint32_t x; } u = { 1.0f }, v = { 2.0f };
  517. v.x -= ((x.m_signexp & 1) << 23);
  518. v.x |= x.m_mantissa[0] >> 9;
  519. v.f = 1.0 / sqrtf(v.f);
  520. real ret;
  521. ret.m_mantissa[0] = v.x << 9;
  522. uint32_t sign = x.m_signexp & 0x80000000u;
  523. ret.m_signexp = sign;
  524. uint32_t exponent = (x.m_signexp & 0x7fffffffu);
  525. exponent = ((1 << 30) + (1 << 29) - 1) - (exponent + 1) / 2;
  526. exponent = exponent + (v.x >> 23) - (u.x >> 23);
  527. ret.m_signexp |= exponent & 0x7fffffffu;
  528. /* FIXME: 1+log2(BIGITS) steps of Newton-Raphson seems to be enough for
  529. * convergence, but this hasn't been checked seriously. */
  530. for (int i = 1; i <= real::BIGITS; i *= 2)
  531. {
  532. ret = ret * (real::R_3 - ret * ret * x);
  533. ret.m_signexp--;
  534. }
  535. return ret * x;
  536. }
  537. real cbrt(real const &x)
  538. {
  539. /* if zero, return x */
  540. if (!(x.m_signexp << 1))
  541. return x;
  542. /* Use the system's float inversion to approximate cbrt(x). First
  543. * we construct a float in the [1..8[ range that has roughly the same
  544. * mantissa as our real. Its exponent is 0, 1 or 2, depending on the
  545. * value of x. The final exponent is 0 or 1 (special case). We use
  546. * the final exponent and final mantissa to pre-fill the result. */
  547. union { float f; uint32_t x; } u = { 1.0f }, v = { 1.0f };
  548. v.x += ((x.m_signexp % 3) << 23);
  549. v.x |= x.m_mantissa[0] >> 9;
  550. v.f = powf(v.f, 0.33333333333333333f);
  551. real ret;
  552. ret.m_mantissa[0] = v.x << 9;
  553. uint32_t sign = x.m_signexp & 0x80000000u;
  554. ret.m_signexp = sign;
  555. int exponent = (x.m_signexp & 0x7fffffffu) - (1 << 30) + 1;
  556. exponent = exponent / 3 + (v.x >> 23) - (u.x >> 23);
  557. ret.m_signexp |= (exponent + (1 << 30) - 1) & 0x7fffffffu;
  558. /* FIXME: 1+log2(BIGITS) steps of Newton-Raphson seems to be enough for
  559. * convergence, but this hasn't been checked seriously. */
  560. for (int i = 1; i <= real::BIGITS; i *= 2)
  561. {
  562. static real third = re(real::R_3);
  563. ret = third * (x / (ret * ret) + (ret << 1));
  564. }
  565. return ret;
  566. }
  567. real pow(real const &x, real const &y)
  568. {
  569. if (!y)
  570. return real::R_1;
  571. if (!x)
  572. return real::R_0;
  573. if (x > real::R_0)
  574. return exp(y * log(x));
  575. else /* x < 0 */
  576. {
  577. /* Odd integer exponent */
  578. if (y == (round(y >> 1) << 1))
  579. return exp(y * log(-x));
  580. /* Even integer exponent */
  581. if (y == round(y))
  582. return -exp(y * log(-x));
  583. /* FIXME: negative nth root */
  584. return real::R_0;
  585. }
  586. }
  587. real gamma(real const &x)
  588. {
  589. /* We use Spouge's formula. FIXME: precision is far from acceptable,
  590. * especially with large values. We need to compute this with higher
  591. * precision values in order to attain the desired accuracy. It might
  592. * also be useful to sort the ck values by decreasing absolute value
  593. * and do the addition in this order. */
  594. int a = ceilf(logf(2) / logf(2 * M_PI) * real::BIGITS * real::BIGIT_BITS);
  595. real ret = sqrt(real::R_PI << 1);
  596. real fact_k_1 = real::R_1;
  597. for (int k = 1; k < a; k++)
  598. {
  599. real a_k = (real)(a - k);
  600. real ck = pow(a_k, (real)((float)k - 0.5)) * exp(a_k)
  601. / (fact_k_1 * (x + (real)(k - 1)));
  602. ret += ck;
  603. fact_k_1 *= (real)-k;
  604. }
  605. ret *= pow(x + (real)(a - 1), x - (real::R_1 >> 1));
  606. ret *= exp(-x - (real)(a - 1));
  607. return ret;
  608. }
  609. real fabs(real const &x)
  610. {
  611. real ret = x;
  612. ret.m_signexp &= 0x7fffffffu;
  613. return ret;
  614. }
  615. static real fast_log(real const &x)
  616. {
  617. /* This fast log method is tuned to work on the [1..2] range and
  618. * no effort whatsoever was made to improve convergence outside this
  619. * domain of validity. It can converge pretty fast, provided we use
  620. * the following variable substitutions:
  621. * y = sqrt(x)
  622. * z = (y - 1) / (y + 1)
  623. *
  624. * And the following identities:
  625. * ln(x) = 2 ln(y)
  626. * = 2 ln((1 + z) / (1 - z))
  627. * = 4 z (1 + z^2 / 3 + z^4 / 5 + z^6 / 7...)
  628. *
  629. * Any additional sqrt() call would halve the convergence time, but
  630. * would also impact the final precision. For now we stick with one
  631. * sqrt() call. */
  632. real y = sqrt(x);
  633. real z = (y - real::R_1) / (y + real::R_1), z2 = z * z, zn = z2;
  634. real sum = real::R_1;
  635. for (int i = 3; ; i += 2)
  636. {
  637. real newsum = sum + zn / (real)i;
  638. if (newsum == sum)
  639. break;
  640. sum = newsum;
  641. zn *= z2;
  642. }
  643. return z * (sum << 2);
  644. }
  645. real log(real const &x)
  646. {
  647. /* Strategy for log(x): if x = 2^E*M then log(x) = E log(2) + log(M),
  648. * with the property that M is in [1..2[, so fast_log() applies here. */
  649. real tmp = x;
  650. if (x.m_signexp >> 31 || x.m_signexp == 0)
  651. {
  652. tmp.m_signexp = 0xffffffffu;
  653. tmp.m_mantissa[0] = 0xffffffffu;
  654. return tmp;
  655. }
  656. tmp.m_signexp = (1 << 30) - 1;
  657. return (real)(int)(x.m_signexp - (1 << 30) + 1) * real::R_LN2
  658. + fast_log(tmp);
  659. }
  660. real log2(real const &x)
  661. {
  662. /* Strategy for log2(x): see log(x). */
  663. real tmp = x;
  664. if (x.m_signexp >> 31 || x.m_signexp == 0)
  665. {
  666. tmp.m_signexp = 0xffffffffu;
  667. tmp.m_mantissa[0] = 0xffffffffu;
  668. return tmp;
  669. }
  670. tmp.m_signexp = (1 << 30) - 1;
  671. return (real)(int)(x.m_signexp - (1 << 30) + 1)
  672. + fast_log(tmp) * real::R_LOG2E;
  673. }
  674. real log10(real const &x)
  675. {
  676. return log(x) * real::R_LOG10E;
  677. }
  678. static real fast_exp_sub(real const &x, real const &y)
  679. {
  680. /* This fast exp method is tuned to work on the [-1..1] range and
  681. * no effort whatsoever was made to improve convergence outside this
  682. * domain of validity. The argument y is used for cases where we
  683. * don't want the leading 1 in the Taylor series. */
  684. real ret = real::R_1 - y, fact = real::R_1, xn = x;
  685. for (int i = 1; ; i++)
  686. {
  687. real newret = ret + xn;
  688. if (newret == ret)
  689. break;
  690. ret = newret;
  691. real mul = (i + 1);
  692. fact *= mul;
  693. ret *= mul;
  694. xn *= x;
  695. }
  696. ret /= fact;
  697. return ret;
  698. }
  699. real exp(real const &x)
  700. {
  701. /* Strategy for exp(x): the Taylor series does not converge very fast
  702. * with large positive or negative values.
  703. *
  704. * However, we know that the result is going to be in the form M*2^E,
  705. * where M is the mantissa and E the exponent. We first try to predict
  706. * a value for E, which is approximately log2(exp(x)) = x / log(2).
  707. *
  708. * Let E0 be an integer close to x / log(2). We need to find a value x0
  709. * such that exp(x) = 2^E0 * exp(x0). We get x0 = x - E0 log(2).
  710. *
  711. * Thus the final algorithm:
  712. * int E0 = x / log(2)
  713. * real x0 = x - E0 log(2)
  714. * real x1 = exp(x0)
  715. * return x1 * 2^E0
  716. */
  717. int e0 = x / real::R_LN2;
  718. real x0 = x - (real)e0 * real::R_LN2;
  719. real x1 = fast_exp_sub(x0, real::R_0);
  720. x1.m_signexp += e0;
  721. return x1;
  722. }
  723. real exp2(real const &x)
  724. {
  725. /* Strategy for exp2(x): see strategy in exp(). */
  726. int e0 = x;
  727. real x0 = x - (real)e0;
  728. real x1 = fast_exp_sub(x0 * real::R_LN2, real::R_0);
  729. x1.m_signexp += e0;
  730. return x1;
  731. }
  732. real sinh(real const &x)
  733. {
  734. /* We cannot always use (exp(x)-exp(-x))/2 because we'll lose
  735. * accuracy near zero. We only use this identity for |x|>0.5. If
  736. * |x|<=0.5, we compute exp(x)-1 and exp(-x)-1 instead. */
  737. bool near_zero = (fabs(x) < real::R_1 >> 1);
  738. real x1 = near_zero ? fast_exp_sub(x, real::R_1) : exp(x);
  739. real x2 = near_zero ? fast_exp_sub(-x, real::R_1) : exp(-x);
  740. return (x1 - x2) >> 1;
  741. }
  742. real tanh(real const &x)
  743. {
  744. /* See sinh() for the strategy here */
  745. bool near_zero = (fabs(x) < real::R_1 >> 1);
  746. real x1 = near_zero ? fast_exp_sub(x, real::R_1) : exp(x);
  747. real x2 = near_zero ? fast_exp_sub(-x, real::R_1) : exp(-x);
  748. real x3 = near_zero ? x1 + x2 + real::R_2 : x1 + x2;
  749. return (x1 - x2) / x3;
  750. }
  751. real cosh(real const &x)
  752. {
  753. /* No need to worry about accuracy here; maybe the last bit is slightly
  754. * off, but that's about it. */
  755. return (exp(x) + exp(-x)) >> 1;
  756. }
  757. real frexp(real const &x, int *exp)
  758. {
  759. if (!x)
  760. {
  761. *exp = 0;
  762. return x;
  763. }
  764. real ret = x;
  765. int exponent = (ret.m_signexp & 0x7fffffffu) - (1 << 30) + 1;
  766. *exp = exponent + 1;
  767. ret.m_signexp -= exponent + 1;
  768. return ret;
  769. }
  770. real ldexp(real const &x, int exp)
  771. {
  772. real ret = x;
  773. if (ret)
  774. ret.m_signexp += exp;
  775. return ret;
  776. }
  777. real modf(real const &x, real *iptr)
  778. {
  779. real absx = fabs(x);
  780. real tmp = floor(absx);
  781. *iptr = copysign(tmp, x);
  782. return copysign(absx - tmp, x);
  783. }
  784. real copysign(real const &x, real const &y)
  785. {
  786. real ret = x;
  787. ret.m_signexp &= 0x7fffffffu;
  788. ret.m_signexp |= y.m_signexp & 0x80000000u;
  789. return ret;
  790. }
  791. real floor(real const &x)
  792. {
  793. /* Strategy for floor(x):
  794. * - if negative, return -ceil(-x)
  795. * - if zero or negative zero, return x
  796. * - if less than one, return zero
  797. * - otherwise, if e is the exponent, clear all bits except the
  798. * first e. */
  799. if (x < -real::R_0)
  800. return -ceil(-x);
  801. if (!x)
  802. return x;
  803. if (x < real::R_1)
  804. return real::R_0;
  805. real ret = x;
  806. int exponent = x.m_signexp - (1 << 30) + 1;
  807. for (int i = 0; i < real::BIGITS; i++)
  808. {
  809. if (exponent <= 0)
  810. ret.m_mantissa[i] = 0;
  811. else if (exponent < real::BIGIT_BITS)
  812. ret.m_mantissa[i] &= ~((1 << (real::BIGIT_BITS - exponent)) - 1);
  813. exponent -= real::BIGIT_BITS;
  814. }
  815. return ret;
  816. }
  817. real ceil(real const &x)
  818. {
  819. /* Strategy for ceil(x):
  820. * - if negative, return -floor(-x)
  821. * - if x == floor(x), return x
  822. * - otherwise, return floor(x) + 1 */
  823. if (x < -real::R_0)
  824. return -floor(-x);
  825. real ret = floor(x);
  826. if (x == ret)
  827. return ret;
  828. else
  829. return ret + real::R_1;
  830. }
  831. real round(real const &x)
  832. {
  833. if (x < real::R_0)
  834. return -round(-x);
  835. return floor(x + (real::R_1 >> 1));
  836. }
  837. real fmod(real const &x, real const &y)
  838. {
  839. if (!y)
  840. return real::R_0; /* FIXME: return NaN */
  841. if (!x)
  842. return x;
  843. real tmp = round(x / y);
  844. return x - tmp * y;
  845. }
  846. real sin(real const &x)
  847. {
  848. bool switch_sign = x.m_signexp & 0x80000000u;
  849. real absx = fmod(fabs(x), real::R_PI << 1);
  850. if (absx > real::R_PI)
  851. {
  852. absx -= real::R_PI;
  853. switch_sign = !switch_sign;
  854. }
  855. if (absx > real::R_PI_2)
  856. absx = real::R_PI - absx;
  857. real ret = real::R_0, fact = real::R_1, xn = absx, mx2 = -absx * absx;
  858. for (int i = 1; ; i += 2)
  859. {
  860. real newret = ret + xn;
  861. if (newret == ret)
  862. break;
  863. ret = newret;
  864. real mul = (i + 1) * (i + 2);
  865. fact *= mul;
  866. ret *= mul;
  867. xn *= mx2;
  868. }
  869. ret /= fact;
  870. /* Propagate sign */
  871. if (switch_sign)
  872. ret.m_signexp ^= 0x80000000u;
  873. return ret;
  874. }
  875. real cos(real const &x)
  876. {
  877. return sin(real::R_PI_2 - x);
  878. }
  879. real tan(real const &x)
  880. {
  881. /* Constrain input to [-π,π] */
  882. real y = fmod(x, real::R_PI);
  883. /* Constrain input to [-π/2,π/2] */
  884. if (y < -real::R_PI_2)
  885. y += real::R_PI;
  886. else if (y > real::R_PI_2)
  887. y -= real::R_PI;
  888. /* In [-π/4,π/4] return sin/cos */
  889. if (fabs(y) <= real::R_PI_4)
  890. return sin(y) / cos(y);
  891. /* Otherwise, return cos/sin */
  892. if (y > real::R_0)
  893. y = real::R_PI_2 - y;
  894. else
  895. y = -real::R_PI_2 - y;
  896. return cos(y) / sin(y);
  897. }
  898. static real asinacos(real const &x, bool is_asin, bool is_negative)
  899. {
  900. /* Strategy for asin(): in [-0.5..0.5], use a Taylor series around
  901. * zero. In [0.5..1], use asin(x) = π/2 - 2*asin(sqrt((1-x)/2)), and
  902. * in [-1..-0.5] just revert the sign.
  903. * Strategy for acos(): use acos(x) = π/2 - asin(x) and try not to
  904. * lose the precision around x=1. */
  905. real absx = fabs(x);
  906. bool around_zero = (absx < (real::R_1 >> 1));
  907. if (!around_zero)
  908. absx = sqrt((real::R_1 - absx) >> 1);
  909. real ret = absx, xn = absx, x2 = absx * absx, fact1 = 2, fact2 = 1;
  910. for (int i = 1; ; i++)
  911. {
  912. xn *= x2;
  913. real mul = (real)(2 * i + 1);
  914. real newret = ret + ((fact1 * xn / (mul * fact2)) >> (i * 2));
  915. if (newret == ret)
  916. break;
  917. ret = newret;
  918. fact1 *= (real)((2 * i + 1) * (2 * i + 2));
  919. fact2 *= (real)((i + 1) * (i + 1));
  920. }
  921. if (is_negative)
  922. ret = -ret;
  923. if (around_zero)
  924. ret = is_asin ? ret : real::R_PI_2 - ret;
  925. else
  926. {
  927. real adjust = is_negative ? real::R_PI : real::R_0;
  928. if (is_asin)
  929. ret = real::R_PI_2 - adjust - (ret << 1);
  930. else
  931. ret = adjust + (ret << 1);
  932. }
  933. return ret;
  934. }
  935. real asin(real const &x)
  936. {
  937. return asinacos(x, true, x.m_signexp >> 31);
  938. }
  939. real acos(real const &x)
  940. {
  941. return asinacos(x, false, x.m_signexp >> 31);
  942. }
  943. real atan(real const &x)
  944. {
  945. /* Computing atan(x): we choose a different Taylor series depending on
  946. * the value of x to help with convergence.
  947. *
  948. * If |x| < 0.5 we evaluate atan(y) near 0:
  949. * atan(y) = y - y^3/3 + y^5/5 - y^7/7 + y^9/9 ...
  950. *
  951. * If 0.5 <= |x| < 1.5 we evaluate atan(1+y) near 0:
  952. * atan(1+y) = π/4 + y/(1*2^1) - y^2/(2*2^1) + y^3/(3*2^2)
  953. * - y^5/(5*2^3) + y^6/(6*2^3) - y^7/(7*2^4)
  954. * + y^9/(9*2^5) - y^10/(10*2^5) + y^11/(11*2^6) ...
  955. *
  956. * If 1.5 <= |x| < 2 we evaluate atan(sqrt(3)+2y) near 0:
  957. * atan(sqrt(3)+2y) = π/3 + 1/2 y - sqrt(3)/2 y^2/2
  958. * + y^3/3 - sqrt(3)/2 y^4/4 + 1/2 y^5/5
  959. * - 1/2 y^7/7 + sqrt(3)/2 y^8/8
  960. * - y^9/9 + sqrt(3)/2 y^10/10 - 1/2 y^11/11
  961. * + 1/2 y^13/13 - sqrt(3)/2 y^14/14
  962. * + y^15/15 - sqrt(3)/2 y^16/16 + 1/2 y^17/17 ...
  963. *
  964. * If |x| >= 2 we evaluate atan(y) near +∞:
  965. * atan(y) = π/2 - y^-1 + y^-3/3 - y^-5/5 + y^-7/7 - y^-9/9 ...
  966. */
  967. real absx = fabs(x);
  968. if (absx < (real::R_1 >> 1))
  969. {
  970. real ret = x, xn = x, mx2 = -x * x;
  971. for (int i = 3; ; i += 2)
  972. {
  973. xn *= mx2;
  974. real newret = ret + xn / (real)i;
  975. if (newret == ret)
  976. break;
  977. ret = newret;
  978. }
  979. return ret;
  980. }
  981. real ret = 0;
  982. if (absx < (real::R_3 >> 1))
  983. {
  984. real y = real::R_1 - absx;
  985. real yn = y, my2 = -y * y;
  986. for (int i = 0; ; i += 2)
  987. {
  988. real newret = ret + ((yn / (real)(2 * i + 1)) >> (i + 1));
  989. yn *= y;
  990. newret += (yn / (real)(2 * i + 2)) >> (i + 1);
  991. yn *= y;
  992. newret += (yn / (real)(2 * i + 3)) >> (i + 2);
  993. if (newret == ret)
  994. break;
  995. ret = newret;
  996. yn *= my2;
  997. }
  998. ret = real::R_PI_4 - ret;
  999. }
  1000. else if (absx < real::R_2)
  1001. {
  1002. real y = (absx - real::R_SQRT3) >> 1;
  1003. real yn = y, my2 = -y * y;
  1004. for (int i = 1; ; i += 6)
  1005. {
  1006. real newret = ret + ((yn / (real)i) >> 1);
  1007. yn *= y;
  1008. newret -= (real::R_SQRT3 >> 1) * yn / (real)(i + 1);
  1009. yn *= y;
  1010. newret += yn / (real)(i + 2);
  1011. yn *= y;
  1012. newret -= (real::R_SQRT3 >> 1) * yn / (real)(i + 3);
  1013. yn *= y;
  1014. newret += (yn / (real)(i + 4)) >> 1;
  1015. if (newret == ret)
  1016. break;
  1017. ret = newret;
  1018. yn *= my2;
  1019. }
  1020. ret = real::R_PI_3 + ret;
  1021. }
  1022. else
  1023. {
  1024. real y = re(absx);
  1025. real yn = y, my2 = -y * y;
  1026. ret = y;
  1027. for (int i = 3; ; i += 2)
  1028. {
  1029. yn *= my2;
  1030. real newret = ret + yn / (real)i;
  1031. if (newret == ret)
  1032. break;
  1033. ret = newret;
  1034. }
  1035. ret = real::R_PI_2 - ret;
  1036. }
  1037. /* Propagate sign */
  1038. ret.m_signexp |= (x.m_signexp & 0x80000000u);
  1039. return ret;
  1040. }
  1041. real atan2(real const &y, real const &x)
  1042. {
  1043. if (!y)
  1044. {
  1045. if ((x.m_signexp >> 31) == 0)
  1046. return y;
  1047. if (y.m_signexp >> 31)
  1048. return -real::R_PI;
  1049. return real::R_PI;
  1050. }
  1051. if (!x)
  1052. {
  1053. if (y.m_signexp >> 31)
  1054. return -real::R_PI;
  1055. return real::R_PI;
  1056. }
  1057. /* FIXME: handle the Inf and NaN cases */
  1058. real z = y / x;
  1059. real ret = atan(z);
  1060. if (x < real::R_0)
  1061. ret += (y > real::R_0) ? real::R_PI : -real::R_PI;
  1062. return ret;
  1063. }
  1064. void real::hexprint() const
  1065. {
  1066. printf("%08x", m_signexp);
  1067. for (int i = 0; i < BIGITS; i++)
  1068. printf(" %08x", m_mantissa[i]);
  1069. printf("\n");
  1070. }
  1071. void real::print(int ndigits) const
  1072. {
  1073. real x = *this;
  1074. if (x.m_signexp >> 31)
  1075. {
  1076. printf("-");
  1077. x = -x;
  1078. }
  1079. /* Normalise x so that mantissa is in [1..9.999] */
  1080. int exponent = 0;
  1081. if (x.m_signexp)
  1082. {
  1083. for (real div = R_1, newdiv; true; div = newdiv)
  1084. {
  1085. newdiv = div * R_10;
  1086. if (x < newdiv)
  1087. {
  1088. x /= div;
  1089. break;
  1090. }
  1091. exponent++;
  1092. }
  1093. for (real mul = 1, newx; true; mul *= R_10)
  1094. {
  1095. newx = x * mul;
  1096. if (newx >= R_1)
  1097. {
  1098. x = newx;
  1099. break;
  1100. }
  1101. exponent--;
  1102. }
  1103. }
  1104. /* Print digits */
  1105. for (int i = 0; i < ndigits; i++)
  1106. {
  1107. int digit = (int)x;
  1108. printf("%i", digit);
  1109. if (i == 0)
  1110. printf(".");
  1111. x -= real(digit);
  1112. x *= R_10;
  1113. }
  1114. /* Print exponent information */
  1115. if (exponent < 0)
  1116. printf("e-%i", -exponent);
  1117. else if (exponent > 0)
  1118. printf("e+%i", exponent);
  1119. printf("\n");
  1120. }
  1121. static real fast_pi()
  1122. {
  1123. /* Approximate Pi using Machin's formula: 16*atan(1/5)-4*atan(1/239) */
  1124. real ret = 0.0, x0 = 5.0, x1 = 239.0;
  1125. real const m0 = -x0 * x0, m1 = -x1 * x1, r16 = 16.0, r4 = 4.0;
  1126. for (int i = 1; ; i += 2)
  1127. {
  1128. real newret = ret + r16 / (x0 * (real)i) - r4 / (x1 * (real)i);
  1129. if (newret == ret)
  1130. break;
  1131. ret = newret;
  1132. x0 *= m0;
  1133. x1 *= m1;
  1134. }
  1135. return ret;
  1136. }
  1137. real const real::R_0 = (real)0.0;
  1138. real const real::R_1 = (real)1.0;
  1139. real const real::R_2 = (real)2.0;
  1140. real const real::R_3 = (real)3.0;
  1141. real const real::R_10 = (real)10.0;
  1142. real const real::R_LN2 = fast_log(R_2);
  1143. real const real::R_LN10 = log(R_10);
  1144. real const real::R_LOG2E = re(R_LN2);
  1145. real const real::R_LOG10E = re(R_LN10);
  1146. real const real::R_E = exp(R_1);
  1147. real const real::R_PI = fast_pi();
  1148. real const real::R_PI_2 = R_PI >> 1;
  1149. real const real::R_PI_3 = R_PI / R_3;
  1150. real const real::R_PI_4 = R_PI >> 2;
  1151. real const real::R_1_PI = re(R_PI);
  1152. real const real::R_2_PI = R_1_PI << 1;
  1153. real const real::R_2_SQRTPI = re(sqrt(R_PI)) << 1;
  1154. real const real::R_SQRT2 = sqrt(R_2);
  1155. real const real::R_SQRT3 = sqrt(R_3);
  1156. real const real::R_SQRT1_2 = R_SQRT2 >> 1;
  1157. } /* namespace lol */