1433 rader
36 KiB

  1. //
  2. // Lol Engine
  3. //
  4. // Copyright: (c) 2010-2012 Sam Hocevar <sam@hocevar.net>
  5. // This program is free software; you can redistribute it and/or
  6. // modify it under the terms of the Do What The Fuck You Want To
  7. // Public License, Version 2, as published by Sam Hocevar. See
  8. // http://sam.zoy.org/projects/COPYING.WTFPL for more details.
  9. //
  10. #if defined HAVE_CONFIG_H
  11. # include "config.h"
  12. #endif
  13. #if defined _XBOX
  14. # define _USE_MATH_DEFINES /* for M_PI */
  15. # include <xtl.h>
  16. # undef near /* Fuck Microsoft */
  17. # undef far /* Fuck Microsoft again */
  18. #elif defined WIN32
  19. # define _USE_MATH_DEFINES /* for M_PI */
  20. # define WIN32_LEAN_AND_MEAN
  21. # include <windows.h>
  22. # undef near /* Fuck Microsoft */
  23. # undef far /* Fuck Microsoft again */
  24. #endif
  25. #include <new>
  26. #include <cstring>
  27. #include <cstdio>
  28. #include <cstdlib>
  29. #include "core.h"
  30. using namespace std;
  31. namespace lol
  32. {
  33. /*
  34. * First handle explicit specialisation of our templates.
  35. *
  36. * Initialisation order is not important because everything is
  37. * done on demand, but here is the dependency list anyway:
  38. * - fast_log() requires R_1
  39. * - log() requires R_LN2
  40. * - re() require R_2
  41. * - exp() requires R_0, R_1, R_LN2
  42. * - sqrt() requires R_3
  43. */
  44. static real fast_log(real const &x);
  45. static real fast_pi();
  46. #define LOL_CONSTANT_GETTER(name, value) \
  47. template<> real const& real::name() \
  48. { \
  49. static real const ret = value; \
  50. return ret; \
  51. }
  52. LOL_CONSTANT_GETTER(R_0, (real)0.0);
  53. LOL_CONSTANT_GETTER(R_1, (real)1.0);
  54. LOL_CONSTANT_GETTER(R_2, (real)2.0);
  55. LOL_CONSTANT_GETTER(R_3, (real)3.0);
  56. LOL_CONSTANT_GETTER(R_10, (real)10.0);
  57. LOL_CONSTANT_GETTER(R_LN2, fast_log(R_2()));
  58. LOL_CONSTANT_GETTER(R_LN10, log(R_10()));
  59. LOL_CONSTANT_GETTER(R_LOG2E, re(R_LN2()));
  60. LOL_CONSTANT_GETTER(R_LOG10E, re(R_LN10()));
  61. LOL_CONSTANT_GETTER(R_E, exp(R_1()));
  62. LOL_CONSTANT_GETTER(R_PI, fast_pi());
  63. LOL_CONSTANT_GETTER(R_PI_2, R_PI() / 2);
  64. LOL_CONSTANT_GETTER(R_PI_3, R_PI() / R_3());
  65. LOL_CONSTANT_GETTER(R_PI_4, R_PI() / 4);
  66. LOL_CONSTANT_GETTER(R_1_PI, re(R_PI()));
  67. LOL_CONSTANT_GETTER(R_2_PI, R_1_PI() * 2);
  68. LOL_CONSTANT_GETTER(R_2_SQRTPI, re(sqrt(R_PI())) * 2);
  69. LOL_CONSTANT_GETTER(R_SQRT2, sqrt(R_2()));
  70. LOL_CONSTANT_GETTER(R_SQRT3, sqrt(R_3()));
  71. LOL_CONSTANT_GETTER(R_SQRT1_2, R_SQRT2() / 2);
  72. #undef LOL_CONSTANT_GETTER
  73. /*
  74. * Now carry on with the rest of the Real class.
  75. */
  76. template<> real::Real()
  77. {
  78. m_mantissa = new uint32_t[BIGITS];
  79. memset(m_mantissa, 0, BIGITS * sizeof(uint32_t));
  80. m_signexp = 0;
  81. }
  82. template<> real::Real(real const &x)
  83. {
  84. m_mantissa = new uint32_t[BIGITS];
  85. memcpy(m_mantissa, x.m_mantissa, BIGITS * sizeof(uint32_t));
  86. m_signexp = x.m_signexp;
  87. }
  88. template<> real const &real::operator =(real const &x)
  89. {
  90. if (&x != this)
  91. {
  92. memcpy(m_mantissa, x.m_mantissa, BIGITS * sizeof(uint32_t));
  93. m_signexp = x.m_signexp;
  94. }
  95. return *this;
  96. }
  97. template<> real::~Real()
  98. {
  99. delete[] m_mantissa;
  100. }
  101. template<> real::Real(float f) { new(this) real((double)f); }
  102. template<> real::Real(int i) { new(this) real((double)i); }
  103. template<> real::Real(unsigned int i) { new(this) real((double)i); }
  104. template<> real::Real(double d)
  105. {
  106. new(this) real();
  107. union { double d; uint64_t x; } u = { d };
  108. uint32_t sign = (u.x >> 63) << 31;
  109. uint32_t exponent = (u.x << 1) >> 53;
  110. switch (exponent)
  111. {
  112. case 0x00:
  113. m_signexp = sign;
  114. break;
  115. case 0x7ff:
  116. m_signexp = sign | 0x7fffffffu;
  117. break;
  118. default:
  119. m_signexp = sign | (exponent + (1 << 30) - (1 << 10));
  120. break;
  121. }
  122. m_mantissa[0] = (uint32_t)(u.x >> 20);
  123. m_mantissa[1] = (uint32_t)(u.x << 12);
  124. memset(m_mantissa + 2, 0, (BIGITS - 2) * sizeof(m_mantissa[0]));
  125. }
  126. template<> real::operator float() const { return (float)(double)(*this); }
  127. template<> real::operator int() const { return (int)(double)(*this); }
  128. template<> real::operator unsigned() const { return (unsigned)(double)(*this); }
  129. template<> real::operator double() const
  130. {
  131. union { double d; uint64_t x; } u;
  132. /* Get sign */
  133. u.x = m_signexp >> 31;
  134. u.x <<= 11;
  135. /* Compute new exponent */
  136. uint32_t exponent = (m_signexp << 1) >> 1;
  137. int e = (int)exponent - (1 << 30) + (1 << 10);
  138. if (e < 0)
  139. u.x <<= 52;
  140. else if (e >= 0x7ff)
  141. {
  142. u.x |= 0x7ff;
  143. u.x <<= 52;
  144. }
  145. else
  146. {
  147. u.x |= e;
  148. /* Store mantissa if necessary */
  149. u.x <<= 32;
  150. u.x |= m_mantissa[0];
  151. u.x <<= 20;
  152. u.x |= m_mantissa[1] >> 12;
  153. /* Rounding */
  154. u.x += (m_mantissa[1] >> 11) & 1;
  155. }
  156. return u.d;
  157. }
  158. /*
  159. * Create a real number from an ASCII representation
  160. */
  161. template<> real::Real(char const *str)
  162. {
  163. real ret = 0;
  164. int exponent = 0;
  165. bool comma = false, nonzero = false, negative = false, finished = false;
  166. for (char const *p = str; *p && !finished; p++)
  167. {
  168. switch (*p)
  169. {
  170. case '-':
  171. case '+':
  172. if (p != str)
  173. break;
  174. negative = (*p == '-');
  175. break;
  176. case '.':
  177. if (comma)
  178. finished = true;
  179. comma = true;
  180. break;
  181. case '0': case '1': case '2': case '3': case '4':
  182. case '5': case '6': case '7': case '8': case '9':
  183. if (nonzero)
  184. {
  185. real x = ret + ret;
  186. x = x + x + ret;
  187. ret = x + x;
  188. }
  189. if (*p != '0')
  190. {
  191. ret += (int)(*p - '0');
  192. nonzero = true;
  193. }
  194. if (comma)
  195. exponent--;
  196. break;
  197. case 'e':
  198. case 'E':
  199. exponent += atoi(p + 1);
  200. finished = true;
  201. break;
  202. default:
  203. finished = true;
  204. break;
  205. }
  206. }
  207. if (exponent)
  208. ret *= pow(R_10(), (real)exponent);
  209. if (negative)
  210. ret = -ret;
  211. new(this) real(ret);
  212. }
  213. template<> real real::operator +() const
  214. {
  215. return *this;
  216. }
  217. template<> real real::operator -() const
  218. {
  219. real ret = *this;
  220. ret.m_signexp ^= 0x80000000u;
  221. return ret;
  222. }
  223. template<> real real::operator +(real const &x) const
  224. {
  225. if (x.m_signexp << 1 == 0)
  226. return *this;
  227. /* Ensure both arguments are positive. Otherwise, switch signs,
  228. * or replace + with -. */
  229. if (m_signexp >> 31)
  230. return -(-*this + -x);
  231. if (x.m_signexp >> 31)
  232. return *this - (-x);
  233. /* Ensure *this has the larger exponent (no need for the mantissa to
  234. * be larger, as in subtraction). Otherwise, switch. */
  235. if ((m_signexp << 1) < (x.m_signexp << 1))
  236. return x + *this;
  237. real ret;
  238. int e1 = m_signexp - (1 << 30) + 1;
  239. int e2 = x.m_signexp - (1 << 30) + 1;
  240. int bigoff = (e1 - e2) / BIGIT_BITS;
  241. int off = e1 - e2 - bigoff * BIGIT_BITS;
  242. if (bigoff > BIGITS)
  243. return *this;
  244. ret.m_signexp = m_signexp;
  245. uint64_t carry = 0;
  246. for (int i = BIGITS; i--; )
  247. {
  248. carry += m_mantissa[i];
  249. if (i - bigoff >= 0)
  250. carry += x.m_mantissa[i - bigoff] >> off;
  251. if (off && i - bigoff > 0)
  252. carry += (x.m_mantissa[i - bigoff - 1] << (BIGIT_BITS - off)) & 0xffffffffu;
  253. else if (i - bigoff == 0)
  254. carry += (uint64_t)1 << (BIGIT_BITS - off);
  255. ret.m_mantissa[i] = (uint32_t)carry;
  256. carry >>= BIGIT_BITS;
  257. }
  258. /* Renormalise in case we overflowed the mantissa */
  259. if (carry)
  260. {
  261. carry--;
  262. for (int i = 0; i < BIGITS; i++)
  263. {
  264. uint32_t tmp = ret.m_mantissa[i];
  265. ret.m_mantissa[i] = ((uint32_t)carry << (BIGIT_BITS - 1))
  266. | (tmp >> 1);
  267. carry = tmp & 1u;
  268. }
  269. ret.m_signexp++;
  270. }
  271. return ret;
  272. }
  273. template<> real real::operator -(real const &x) const
  274. {
  275. if (x.m_signexp << 1 == 0)
  276. return *this;
  277. /* Ensure both arguments are positive. Otherwise, switch signs,
  278. * or replace - with +. */
  279. if (m_signexp >> 31)
  280. return -(-*this + x);
  281. if (x.m_signexp >> 31)
  282. return (*this) + (-x);
  283. /* Ensure *this is larger than x */
  284. if (*this < x)
  285. return -(x - *this);
  286. real ret;
  287. int e1 = m_signexp - (1 << 30) + 1;
  288. int e2 = x.m_signexp - (1 << 30) + 1;
  289. int bigoff = (e1 - e2) / BIGIT_BITS;
  290. int off = e1 - e2 - bigoff * BIGIT_BITS;
  291. if (bigoff > BIGITS)
  292. return *this;
  293. ret.m_signexp = m_signexp;
  294. /* int64_t instead of uint64_t to preserve sign through shifts */
  295. int64_t carry = 0;
  296. for (int i = 0; i < bigoff; i++)
  297. {
  298. carry -= x.m_mantissa[BIGITS - 1 - i];
  299. /* Emulates a signed shift */
  300. carry >>= BIGIT_BITS;
  301. carry |= carry << BIGIT_BITS;
  302. }
  303. if (bigoff < BIGITS)
  304. carry -= x.m_mantissa[BIGITS - 1 - bigoff] & (((int64_t)1 << off) - 1);
  305. carry /= (int64_t)1 << off;
  306. for (int i = BIGITS; i--; )
  307. {
  308. carry += m_mantissa[i];
  309. if (i - bigoff >= 0)
  310. carry -= x.m_mantissa[i - bigoff] >> off;
  311. if (off && i - bigoff > 0)
  312. carry -= (x.m_mantissa[i - bigoff - 1] << (BIGIT_BITS - off)) & 0xffffffffu;
  313. else if (i - bigoff == 0)
  314. carry -= (uint64_t)1 << (BIGIT_BITS - off);
  315. ret.m_mantissa[i] = (uint32_t)carry;
  316. carry >>= BIGIT_BITS;
  317. carry |= carry << BIGIT_BITS;
  318. }
  319. carry += 1;
  320. /* Renormalise if we underflowed the mantissa */
  321. if (carry == 0)
  322. {
  323. /* How much do we need to shift the mantissa? FIXME: this could
  324. * be computed above */
  325. off = 0;
  326. for (int i = 0; i < BIGITS; i++)
  327. {
  328. if (!ret.m_mantissa[i])
  329. {
  330. off += BIGIT_BITS;
  331. continue;
  332. }
  333. for (uint32_t tmp = ret.m_mantissa[i]; tmp < 0x80000000u; tmp <<= 1)
  334. off++;
  335. break;
  336. }
  337. if (off == BIGITS * BIGIT_BITS)
  338. ret.m_signexp &= 0x80000000u;
  339. else
  340. {
  341. off++; /* Shift one more to get rid of the leading one */
  342. ret.m_signexp -= off;
  343. bigoff = off / BIGIT_BITS;
  344. off -= bigoff * BIGIT_BITS;
  345. for (int i = 0; i < BIGITS; i++)
  346. {
  347. uint32_t tmp = 0;
  348. if (i + bigoff < BIGITS)
  349. tmp |= ret.m_mantissa[i + bigoff] << off;
  350. if (off && i + bigoff + 1 < BIGITS)
  351. tmp |= ret.m_mantissa[i + bigoff + 1] >> (BIGIT_BITS - off);
  352. ret.m_mantissa[i] = tmp;
  353. }
  354. }
  355. }
  356. return ret;
  357. }
  358. template<> real real::operator *(real const &x) const
  359. {
  360. real ret;
  361. if (m_signexp << 1 == 0 || x.m_signexp << 1 == 0)
  362. {
  363. ret = (m_signexp << 1 == 0) ? *this : x;
  364. ret.m_signexp ^= x.m_signexp & 0x80000000u;
  365. return ret;
  366. }
  367. ret.m_signexp = (m_signexp ^ x.m_signexp) & 0x80000000u;
  368. int e = (m_signexp & 0x7fffffffu) - (1 << 30) + 1
  369. + (x.m_signexp & 0x7fffffffu) - (1 << 30) + 1;
  370. /* Accumulate low order product; no need to store it, we just
  371. * want the carry value */
  372. uint64_t carry = 0, hicarry = 0, prev;
  373. for (int i = 0; i < BIGITS; i++)
  374. {
  375. for (int j = 0; j < i + 1; j++)
  376. {
  377. prev = carry;
  378. carry += (uint64_t)m_mantissa[BIGITS - 1 - j]
  379. * (uint64_t)x.m_mantissa[BIGITS - 1 + j - i];
  380. if (carry < prev)
  381. hicarry++;
  382. }
  383. carry >>= BIGIT_BITS;
  384. carry |= hicarry << BIGIT_BITS;
  385. hicarry >>= BIGIT_BITS;
  386. }
  387. for (int i = 0; i < BIGITS; i++)
  388. {
  389. for (int j = i + 1; j < BIGITS; j++)
  390. {
  391. prev = carry;
  392. carry += (uint64_t)m_mantissa[BIGITS - 1 - j]
  393. * (uint64_t)x.m_mantissa[j - 1 - i];
  394. if (carry < prev)
  395. hicarry++;
  396. }
  397. prev = carry;
  398. carry += m_mantissa[BIGITS - 1 - i];
  399. carry += x.m_mantissa[BIGITS - 1 - i];
  400. if (carry < prev)
  401. hicarry++;
  402. ret.m_mantissa[BIGITS - 1 - i] = carry & 0xffffffffu;
  403. carry >>= BIGIT_BITS;
  404. carry |= hicarry << BIGIT_BITS;
  405. hicarry >>= BIGIT_BITS;
  406. }
  407. /* Renormalise in case we overflowed the mantissa */
  408. if (carry)
  409. {
  410. carry--;
  411. for (int i = 0; i < BIGITS; i++)
  412. {
  413. uint32_t tmp = (uint32_t)ret.m_mantissa[i];
  414. ret.m_mantissa[i] = ((uint32_t)carry << (BIGIT_BITS - 1))
  415. | (tmp >> 1);
  416. carry = tmp & 1u;
  417. }
  418. e++;
  419. }
  420. ret.m_signexp |= e + (1 << 30) - 1;
  421. return ret;
  422. }
  423. template<> real real::operator /(real const &x) const
  424. {
  425. return *this * re(x);
  426. }
  427. template<> real const &real::operator +=(real const &x)
  428. {
  429. real tmp = *this;
  430. return *this = tmp + x;
  431. }
  432. template<> real const &real::operator -=(real const &x)
  433. {
  434. real tmp = *this;
  435. return *this = tmp - x;
  436. }
  437. template<> real const &real::operator *=(real const &x)
  438. {
  439. real tmp = *this;
  440. return *this = tmp * x;
  441. }
  442. template<> real const &real::operator /=(real const &x)
  443. {
  444. real tmp = *this;
  445. return *this = tmp / x;
  446. }
  447. template<> bool real::operator ==(real const &x) const
  448. {
  449. if ((m_signexp << 1) == 0 && (x.m_signexp << 1) == 0)
  450. return true;
  451. if (m_signexp != x.m_signexp)
  452. return false;
  453. return memcmp(m_mantissa, x.m_mantissa, BIGITS * sizeof(uint32_t)) == 0;
  454. }
  455. template<> bool real::operator !=(real const &x) const
  456. {
  457. return !(*this == x);
  458. }
  459. template<> bool real::operator <(real const &x) const
  460. {
  461. /* Ensure both numbers are positive */
  462. if (m_signexp >> 31)
  463. return (x.m_signexp >> 31) ? -*this > -x : true;
  464. if (x.m_signexp >> 31)
  465. return false;
  466. /* Compare all relevant bits */
  467. if (m_signexp != x.m_signexp)
  468. return m_signexp < x.m_signexp;
  469. for (int i = 0; i < BIGITS; i++)
  470. if (m_mantissa[i] != x.m_mantissa[i])
  471. return m_mantissa[i] < x.m_mantissa[i];
  472. return false;
  473. }
  474. template<> bool real::operator <=(real const &x) const
  475. {
  476. return !(*this > x);
  477. }
  478. template<> bool real::operator >(real const &x) const
  479. {
  480. /* Ensure both numbers are positive */
  481. if (m_signexp >> 31)
  482. return (x.m_signexp >> 31) ? -*this < -x : false;
  483. if (x.m_signexp >> 31)
  484. return true;
  485. /* Compare all relevant bits */
  486. if (m_signexp != x.m_signexp)
  487. return m_signexp > x.m_signexp;
  488. for (int i = 0; i < BIGITS; i++)
  489. if (m_mantissa[i] != x.m_mantissa[i])
  490. return m_mantissa[i] > x.m_mantissa[i];
  491. return false;
  492. }
  493. template<> bool real::operator >=(real const &x) const
  494. {
  495. return !(*this < x);
  496. }
  497. template<> bool real::operator !() const
  498. {
  499. return !(bool)*this;
  500. }
  501. template<> real::operator bool() const
  502. {
  503. /* A real is "true" if it is non-zero (exponent is non-zero) AND
  504. * not NaN (exponent is not full bits OR higher order mantissa is zero) */
  505. uint32_t exponent = m_signexp << 1;
  506. return exponent && (~exponent || m_mantissa[0] == 0);
  507. }
  508. template<> real min(real const &a, real const &b)
  509. {
  510. return (a < b) ? a : b;
  511. }
  512. template<> real max(real const &a, real const &b)
  513. {
  514. return (a > b) ? a : b;
  515. }
  516. template<> real clamp(real const &x, real const &a, real const &b)
  517. {
  518. return (x < a) ? a : (x > b) ? b : x;
  519. }
  520. template<> real re(real const &x)
  521. {
  522. if (!(x.m_signexp << 1))
  523. {
  524. real ret = x;
  525. ret.m_signexp = x.m_signexp | 0x7fffffffu;
  526. ret.m_mantissa[0] = 0;
  527. return ret;
  528. }
  529. /* Use the system's float inversion to approximate 1/x */
  530. union { float f; uint32_t x; } u = { 1.0f }, v = { 1.0f };
  531. v.x |= x.m_mantissa[0] >> 9;
  532. v.f = 1.0f / v.f;
  533. real ret;
  534. ret.m_mantissa[0] = v.x << 9;
  535. uint32_t sign = x.m_signexp & 0x80000000u;
  536. ret.m_signexp = sign;
  537. int exponent = (x.m_signexp & 0x7fffffffu) + 1;
  538. exponent = -exponent + (v.x >> 23) - (u.x >> 23);
  539. ret.m_signexp |= (exponent - 1) & 0x7fffffffu;
  540. /* FIXME: 1+log2(BIGITS) steps of Newton-Raphson seems to be enough for
  541. * convergence, but this hasn't been checked seriously. */
  542. for (int i = 1; i <= real::BIGITS; i *= 2)
  543. ret = ret * (real::R_2() - ret * x);
  544. return ret;
  545. }
  546. template<> real sqrt(real const &x)
  547. {
  548. /* if zero, return x */
  549. if (!(x.m_signexp << 1))
  550. return x;
  551. /* if negative, return NaN */
  552. if (x.m_signexp >> 31)
  553. {
  554. real ret;
  555. ret.m_signexp = 0x7fffffffu;
  556. ret.m_mantissa[0] = 0xffffu;
  557. return ret;
  558. }
  559. /* Use the system's float inversion to approximate 1/sqrt(x). First
  560. * we construct a float in the [1..4[ range that has roughly the same
  561. * mantissa as our real. Its exponent is 0 or 1, depending on the
  562. * partity of x. The final exponent is 0, -1 or -2. We use the final
  563. * exponent and final mantissa to pre-fill the result. */
  564. union { float f; uint32_t x; } u = { 1.0f }, v = { 2.0f };
  565. v.x -= ((x.m_signexp & 1) << 23);
  566. v.x |= x.m_mantissa[0] >> 9;
  567. v.f = 1.0f / sqrtf(v.f);
  568. real ret;
  569. ret.m_mantissa[0] = v.x << 9;
  570. uint32_t sign = x.m_signexp & 0x80000000u;
  571. ret.m_signexp = sign;
  572. uint32_t exponent = (x.m_signexp & 0x7fffffffu);
  573. exponent = ((1 << 30) + (1 << 29) - 1) - (exponent + 1) / 2;
  574. exponent = exponent + (v.x >> 23) - (u.x >> 23);
  575. ret.m_signexp |= exponent & 0x7fffffffu;
  576. /* FIXME: 1+log2(BIGITS) steps of Newton-Raphson seems to be enough for
  577. * convergence, but this hasn't been checked seriously. */
  578. for (int i = 1; i <= real::BIGITS; i *= 2)
  579. {
  580. ret = ret * (real::R_3() - ret * ret * x);
  581. ret.m_signexp--;
  582. }
  583. return ret * x;
  584. }
  585. template<> real cbrt(real const &x)
  586. {
  587. /* if zero, return x */
  588. if (!(x.m_signexp << 1))
  589. return x;
  590. /* Use the system's float inversion to approximate cbrt(x). First
  591. * we construct a float in the [1..8[ range that has roughly the same
  592. * mantissa as our real. Its exponent is 0, 1 or 2, depending on the
  593. * value of x. The final exponent is 0 or 1 (special case). We use
  594. * the final exponent and final mantissa to pre-fill the result. */
  595. union { float f; uint32_t x; } u = { 1.0f }, v = { 1.0f };
  596. v.x += ((x.m_signexp % 3) << 23);
  597. v.x |= x.m_mantissa[0] >> 9;
  598. v.f = powf(v.f, 0.33333333333333333f);
  599. real ret;
  600. ret.m_mantissa[0] = v.x << 9;
  601. uint32_t sign = x.m_signexp & 0x80000000u;
  602. ret.m_signexp = sign;
  603. int exponent = (x.m_signexp & 0x7fffffffu) - (1 << 30) + 1;
  604. exponent = exponent / 3 + (v.x >> 23) - (u.x >> 23);
  605. ret.m_signexp |= (exponent + (1 << 30) - 1) & 0x7fffffffu;
  606. /* FIXME: 1+log2(BIGITS) steps of Newton-Raphson seems to be enough for
  607. * convergence, but this hasn't been checked seriously. */
  608. for (int i = 1; i <= real::BIGITS; i *= 2)
  609. {
  610. static real third = re(real::R_3());
  611. ret = third * (x / (ret * ret) + (ret / 2));
  612. }
  613. return ret;
  614. }
  615. template<> real pow(real const &x, real const &y)
  616. {
  617. if (!y)
  618. return real::R_1();
  619. if (!x)
  620. return real::R_0();
  621. if (x > real::R_0())
  622. return exp(y * log(x));
  623. else /* x < 0 */
  624. {
  625. /* Odd integer exponent */
  626. if (y == (round(y / 2) * 2))
  627. return exp(y * log(-x));
  628. /* Even integer exponent */
  629. if (y == round(y))
  630. return -exp(y * log(-x));
  631. /* FIXME: negative nth root */
  632. return real::R_0();
  633. }
  634. }
  635. static real fast_fact(int x)
  636. {
  637. real ret = real::R_1();
  638. int i = 1, multiplier = 1, exponent = 0;
  639. for (;;)
  640. {
  641. if (i++ >= x)
  642. /* Multiplication is a no-op if multiplier == 1 */
  643. return ldexp(ret * multiplier, exponent);
  644. int tmp = i;
  645. while ((tmp & 1) == 0)
  646. {
  647. tmp >>= 1;
  648. exponent++;
  649. }
  650. if (multiplier * tmp / tmp != multiplier)
  651. {
  652. ret *= multiplier;
  653. multiplier = 1;
  654. }
  655. multiplier *= tmp;
  656. }
  657. }
  658. template<> real gamma(real const &x)
  659. {
  660. /* We use Spouge's formula. FIXME: precision is far from acceptable,
  661. * especially with large values. We need to compute this with higher
  662. * precision values in order to attain the desired accuracy. It might
  663. * also be useful to sort the ck values by decreasing absolute value
  664. * and do the addition in this order. */
  665. int a = ceilf(logf(2) / logf(2 * M_PI) * real::BIGITS * real::BIGIT_BITS);
  666. real ret = sqrt(real::R_PI() * 2);
  667. real fact_k_1 = real::R_1();
  668. for (int k = 1; k < a; k++)
  669. {
  670. real a_k = (real)(a - k);
  671. real ck = pow(a_k, (real)((float)k - 0.5)) * exp(a_k)
  672. / (fact_k_1 * (x + (real)(k - 1)));
  673. ret += ck;
  674. fact_k_1 *= (real)-k;
  675. }
  676. ret *= pow(x + (real)(a - 1), x - (real::R_1() / 2));
  677. ret *= exp(-x - (real)(a - 1));
  678. return ret;
  679. }
  680. template<> real fabs(real const &x)
  681. {
  682. real ret = x;
  683. ret.m_signexp &= 0x7fffffffu;
  684. return ret;
  685. }
  686. static real fast_log(real const &x)
  687. {
  688. /* This fast log method is tuned to work on the [1..2] range and
  689. * no effort whatsoever was made to improve convergence outside this
  690. * domain of validity. It can converge pretty fast, provided we use
  691. * the following variable substitutions:
  692. * y = sqrt(x)
  693. * z = (y - 1) / (y + 1)
  694. *
  695. * And the following identities:
  696. * ln(x) = 2 ln(y)
  697. * = 2 ln((1 + z) / (1 - z))
  698. * = 4 z (1 + z^2 / 3 + z^4 / 5 + z^6 / 7...)
  699. *
  700. * Any additional sqrt() call would halve the convergence time, but
  701. * would also impact the final precision. For now we stick with one
  702. * sqrt() call. */
  703. real y = sqrt(x);
  704. real z = (y - real::R_1()) / (y + real::R_1()), z2 = z * z, zn = z2;
  705. real sum = real::R_1();
  706. for (int i = 3; ; i += 2)
  707. {
  708. real newsum = sum + zn / (real)i;
  709. if (newsum == sum)
  710. break;
  711. sum = newsum;
  712. zn *= z2;
  713. }
  714. return z * sum * 4;
  715. }
  716. template<> real log(real const &x)
  717. {
  718. /* Strategy for log(x): if x = 2^E*M then log(x) = E log(2) + log(M),
  719. * with the property that M is in [1..2[, so fast_log() applies here. */
  720. real tmp = x;
  721. if (x.m_signexp >> 31 || x.m_signexp == 0)
  722. {
  723. tmp.m_signexp = 0xffffffffu;
  724. tmp.m_mantissa[0] = 0xffffffffu;
  725. return tmp;
  726. }
  727. tmp.m_signexp = (1 << 30) - 1;
  728. return (real)(int)(x.m_signexp - (1 << 30) + 1) * real::R_LN2()
  729. + fast_log(tmp);
  730. }
  731. template<> real log2(real const &x)
  732. {
  733. /* Strategy for log2(x): see log(x). */
  734. real tmp = x;
  735. if (x.m_signexp >> 31 || x.m_signexp == 0)
  736. {
  737. tmp.m_signexp = 0xffffffffu;
  738. tmp.m_mantissa[0] = 0xffffffffu;
  739. return tmp;
  740. }
  741. tmp.m_signexp = (1 << 30) - 1;
  742. return (real)(int)(x.m_signexp - (1 << 30) + 1)
  743. + fast_log(tmp) * real::R_LOG2E();
  744. }
  745. template<> real log10(real const &x)
  746. {
  747. return log(x) * real::R_LOG10E();
  748. }
  749. static real fast_exp_sub(real const &x, real const &y)
  750. {
  751. /* This fast exp method is tuned to work on the [-1..1] range and
  752. * no effort whatsoever was made to improve convergence outside this
  753. * domain of validity. The argument y is used for cases where we
  754. * don't want the leading 1 in the Taylor series. */
  755. real ret = real::R_1() - y, xn = x;
  756. int i = 1;
  757. for (;;)
  758. {
  759. real newret = ret + xn;
  760. if (newret == ret)
  761. break;
  762. ret = newret * ++i;
  763. xn *= x;
  764. }
  765. return ret / fast_fact(i);
  766. }
  767. template<> real exp(real const &x)
  768. {
  769. /* Strategy for exp(x): the Taylor series does not converge very fast
  770. * with large positive or negative values.
  771. *
  772. * However, we know that the result is going to be in the form M*2^E,
  773. * where M is the mantissa and E the exponent. We first try to predict
  774. * a value for E, which is approximately log2(exp(x)) = x / log(2).
  775. *
  776. * Let E0 be an integer close to x / log(2). We need to find a value x0
  777. * such that exp(x) = 2^E0 * exp(x0). We get x0 = x - E0 log(2).
  778. *
  779. * Thus the final algorithm:
  780. * int E0 = x / log(2)
  781. * real x0 = x - E0 log(2)
  782. * real x1 = exp(x0)
  783. * return x1 * 2^E0
  784. */
  785. int e0 = x / real::R_LN2();
  786. real x0 = x - (real)e0 * real::R_LN2();
  787. real x1 = fast_exp_sub(x0, real::R_0());
  788. x1.m_signexp += e0;
  789. return x1;
  790. }
  791. template<> real exp2(real const &x)
  792. {
  793. /* Strategy for exp2(x): see strategy in exp(). */
  794. int e0 = x;
  795. real x0 = x - (real)e0;
  796. real x1 = fast_exp_sub(x0 * real::R_LN2(), real::R_0());
  797. x1.m_signexp += e0;
  798. return x1;
  799. }
  800. template<> real sinh(real const &x)
  801. {
  802. /* We cannot always use (exp(x)-exp(-x))/2 because we'll lose
  803. * accuracy near zero. We only use this identity for |x|>0.5. If
  804. * |x|<=0.5, we compute exp(x)-1 and exp(-x)-1 instead. */
  805. bool near_zero = (fabs(x) < real::R_1() / 2);
  806. real x1 = near_zero ? fast_exp_sub(x, real::R_1()) : exp(x);
  807. real x2 = near_zero ? fast_exp_sub(-x, real::R_1()) : exp(-x);
  808. return (x1 - x2) / 2;
  809. }
  810. template<> real tanh(real const &x)
  811. {
  812. /* See sinh() for the strategy here */
  813. bool near_zero = (fabs(x) < real::R_1() / 2);
  814. real x1 = near_zero ? fast_exp_sub(x, real::R_1()) : exp(x);
  815. real x2 = near_zero ? fast_exp_sub(-x, real::R_1()) : exp(-x);
  816. real x3 = near_zero ? x1 + x2 + real::R_2() : x1 + x2;
  817. return (x1 - x2) / x3;
  818. }
  819. template<> real cosh(real const &x)
  820. {
  821. /* No need to worry about accuracy here; maybe the last bit is slightly
  822. * off, but that's about it. */
  823. return (exp(x) + exp(-x)) / 2;
  824. }
  825. template<> real frexp(real const &x, int *exp)
  826. {
  827. if (!x)
  828. {
  829. *exp = 0;
  830. return x;
  831. }
  832. real ret = x;
  833. int exponent = (ret.m_signexp & 0x7fffffffu) - (1 << 30) + 1;
  834. *exp = exponent + 1;
  835. ret.m_signexp -= exponent + 1;
  836. return ret;
  837. }
  838. template<> real ldexp(real const &x, int exp)
  839. {
  840. real ret = x;
  841. if (ret)
  842. ret.m_signexp += exp;
  843. return ret;
  844. }
  845. template<> real modf(real const &x, real *iptr)
  846. {
  847. real absx = fabs(x);
  848. real tmp = floor(absx);
  849. *iptr = copysign(tmp, x);
  850. return copysign(absx - tmp, x);
  851. }
  852. template<> real ulp(real const &x)
  853. {
  854. real ret = real::R_1();
  855. if (x)
  856. ret.m_signexp = x.m_signexp + 1 - real::BIGITS * real::BIGIT_BITS;
  857. else
  858. ret.m_signexp = 0;
  859. return ret;
  860. }
  861. template<> real nextafter(real const &x, real const &y)
  862. {
  863. if (x == y)
  864. return x;
  865. else if (x < y)
  866. return x + ulp(x);
  867. else
  868. return x - ulp(x);
  869. }
  870. template<> real copysign(real const &x, real const &y)
  871. {
  872. real ret = x;
  873. ret.m_signexp &= 0x7fffffffu;
  874. ret.m_signexp |= y.m_signexp & 0x80000000u;
  875. return ret;
  876. }
  877. template<> real floor(real const &x)
  878. {
  879. /* Strategy for floor(x):
  880. * - if negative, return -ceil(-x)
  881. * - if zero or negative zero, return x
  882. * - if less than one, return zero
  883. * - otherwise, if e is the exponent, clear all bits except the
  884. * first e. */
  885. if (x < -real::R_0())
  886. return -ceil(-x);
  887. if (!x)
  888. return x;
  889. if (x < real::R_1())
  890. return real::R_0();
  891. real ret = x;
  892. int exponent = x.m_signexp - (1 << 30) + 1;
  893. for (int i = 0; i < real::BIGITS; i++)
  894. {
  895. if (exponent <= 0)
  896. ret.m_mantissa[i] = 0;
  897. else if (exponent < real::BIGIT_BITS)
  898. ret.m_mantissa[i] &= ~((1 << (real::BIGIT_BITS - exponent)) - 1);
  899. exponent -= real::BIGIT_BITS;
  900. }
  901. return ret;
  902. }
  903. template<> real ceil(real const &x)
  904. {
  905. /* Strategy for ceil(x):
  906. * - if negative, return -floor(-x)
  907. * - if x == floor(x), return x
  908. * - otherwise, return floor(x) + 1 */
  909. if (x < -real::R_0())
  910. return -floor(-x);
  911. real ret = floor(x);
  912. if (x == ret)
  913. return ret;
  914. else
  915. return ret + real::R_1();
  916. }
  917. template<> real round(real const &x)
  918. {
  919. if (x < real::R_0())
  920. return -round(-x);
  921. return floor(x + (real::R_1() / 2));
  922. }
  923. template<> real fmod(real const &x, real const &y)
  924. {
  925. if (!y)
  926. return real::R_0(); /* FIXME: return NaN */
  927. if (!x)
  928. return x;
  929. real tmp = round(x / y);
  930. return x - tmp * y;
  931. }
  932. template<> real sin(real const &x)
  933. {
  934. int switch_sign = x.m_signexp & 0x80000000u;
  935. real absx = fmod(fabs(x), real::R_PI() * 2);
  936. if (absx > real::R_PI())
  937. {
  938. absx -= real::R_PI();
  939. switch_sign = !switch_sign;
  940. }
  941. if (absx > real::R_PI_2())
  942. absx = real::R_PI() - absx;
  943. real ret = real::R_0(), fact = real::R_1(), xn = absx, mx2 = -absx * absx;
  944. int i = 1;
  945. for (;;)
  946. {
  947. real newret = ret + xn;
  948. if (newret == ret)
  949. break;
  950. ret = newret * ((i + 1) * (i + 2));
  951. xn *= mx2;
  952. i += 2;
  953. }
  954. ret /= fast_fact(i);
  955. /* Propagate sign */
  956. if (switch_sign)
  957. ret.m_signexp ^= 0x80000000u;
  958. return ret;
  959. }
  960. template<> real cos(real const &x)
  961. {
  962. return sin(real::R_PI_2() - x);
  963. }
  964. template<> real tan(real const &x)
  965. {
  966. /* Constrain input to [-π,π] */
  967. real y = fmod(x, real::R_PI());
  968. /* Constrain input to [-π/2,π/2] */
  969. if (y < -real::R_PI_2())
  970. y += real::R_PI();
  971. else if (y > real::R_PI_2())
  972. y -= real::R_PI();
  973. /* In [-π/4,π/4] return sin/cos */
  974. if (fabs(y) <= real::R_PI_4())
  975. return sin(y) / cos(y);
  976. /* Otherwise, return cos/sin */
  977. if (y > real::R_0())
  978. y = real::R_PI_2() - y;
  979. else
  980. y = -real::R_PI_2() - y;
  981. return cos(y) / sin(y);
  982. }
  983. static inline real asinacos(real const &x, int is_asin, int is_negative)
  984. {
  985. /* Strategy for asin(): in [-0.5..0.5], use a Taylor series around
  986. * zero. In [0.5..1], use asin(x) = π/2 - 2*asin(sqrt((1-x)/2)), and
  987. * in [-1..-0.5] just revert the sign.
  988. * Strategy for acos(): use acos(x) = π/2 - asin(x) and try not to
  989. * lose the precision around x=1. */
  990. real absx = fabs(x);
  991. int around_zero = (absx < (real::R_1() / 2));
  992. if (!around_zero)
  993. absx = sqrt((real::R_1() - absx) / 2);
  994. real ret = absx, xn = absx, x2 = absx * absx, fact1 = 2, fact2 = 1;
  995. for (int i = 1; ; i++)
  996. {
  997. xn *= x2;
  998. real mul = (real)(2 * i + 1);
  999. real newret = ret + ldexp(fact1 * xn / (mul * fact2), -2 * i);
  1000. if (newret == ret)
  1001. break;
  1002. ret = newret;
  1003. fact1 *= (real)((2 * i + 1) * (2 * i + 2));
  1004. fact2 *= (real)((i + 1) * (i + 1));
  1005. }
  1006. if (is_negative)
  1007. ret = -ret;
  1008. if (around_zero)
  1009. ret = is_asin ? ret : real::R_PI_2() - ret;
  1010. else
  1011. {
  1012. real adjust = is_negative ? real::R_PI() : real::R_0();
  1013. if (is_asin)
  1014. ret = real::R_PI_2() - adjust - ret * 2;
  1015. else
  1016. ret = adjust + ret * 2;
  1017. }
  1018. return ret;
  1019. }
  1020. template<> real asin(real const &x)
  1021. {
  1022. return asinacos(x, 1, x.m_signexp >> 31);
  1023. }
  1024. template<> real acos(real const &x)
  1025. {
  1026. return asinacos(x, 0, x.m_signexp >> 31);
  1027. }
  1028. template<> real atan(real const &x)
  1029. {
  1030. /* Computing atan(x): we choose a different Taylor series depending on
  1031. * the value of x to help with convergence.
  1032. *
  1033. * If |x| < 0.5 we evaluate atan(y) near 0:
  1034. * atan(y) = y - y^3/3 + y^5/5 - y^7/7 + y^9/9 ...
  1035. *
  1036. * If 0.5 <= |x| < 1.5 we evaluate atan(1+y) near 0:
  1037. * atan(1+y) = π/4 + y/(1*2^1) - y^2/(2*2^1) + y^3/(3*2^2)
  1038. * - y^5/(5*2^3) + y^6/(6*2^3) - y^7/(7*2^4)
  1039. * + y^9/(9*2^5) - y^10/(10*2^5) + y^11/(11*2^6) ...
  1040. *
  1041. * If 1.5 <= |x| < 2 we evaluate atan(sqrt(3)+2y) near 0:
  1042. * atan(sqrt(3)+2y) = π/3 + 1/2 y - sqrt(3)/2 y^2/2
  1043. * + y^3/3 - sqrt(3)/2 y^4/4 + 1/2 y^5/5
  1044. * - 1/2 y^7/7 + sqrt(3)/2 y^8/8
  1045. * - y^9/9 + sqrt(3)/2 y^10/10 - 1/2 y^11/11
  1046. * + 1/2 y^13/13 - sqrt(3)/2 y^14/14
  1047. * + y^15/15 - sqrt(3)/2 y^16/16 + 1/2 y^17/17 ...
  1048. *
  1049. * If |x| >= 2 we evaluate atan(y) near +∞:
  1050. * atan(y) = π/2 - y^-1 + y^-3/3 - y^-5/5 + y^-7/7 - y^-9/9 ...
  1051. */
  1052. real absx = fabs(x);
  1053. if (absx < (real::R_1() / 2))
  1054. {
  1055. real ret = x, xn = x, mx2 = -x * x;
  1056. for (int i = 3; ; i += 2)
  1057. {
  1058. xn *= mx2;
  1059. real newret = ret + xn / (real)i;
  1060. if (newret == ret)
  1061. break;
  1062. ret = newret;
  1063. }
  1064. return ret;
  1065. }
  1066. real ret = 0;
  1067. if (absx < (real::R_3() / 2))
  1068. {
  1069. real y = real::R_1() - absx;
  1070. real yn = y, my2 = -y * y;
  1071. for (int i = 0; ; i += 2)
  1072. {
  1073. real newret = ret + ldexp(yn / (real)(2 * i + 1), -i - 1);
  1074. yn *= y;
  1075. newret += ldexp(yn / (real)(2 * i + 2), -i - 1);
  1076. yn *= y;
  1077. newret += ldexp(yn / (real)(2 * i + 3), -i - 2);
  1078. if (newret == ret)
  1079. break;
  1080. ret = newret;
  1081. yn *= my2;
  1082. }
  1083. ret = real::R_PI_4() - ret;
  1084. }
  1085. else if (absx < real::R_2())
  1086. {
  1087. real y = (absx - real::R_SQRT3()) / 2;
  1088. real yn = y, my2 = -y * y;
  1089. for (int i = 1; ; i += 6)
  1090. {
  1091. real newret = ret + ((yn / (real)i) / 2);
  1092. yn *= y;
  1093. newret -= (real::R_SQRT3() / 2) * yn / (real)(i + 1);
  1094. yn *= y;
  1095. newret += yn / (real)(i + 2);
  1096. yn *= y;
  1097. newret -= (real::R_SQRT3() / 2) * yn / (real)(i + 3);
  1098. yn *= y;
  1099. newret += (yn / (real)(i + 4)) / 2;
  1100. if (newret == ret)
  1101. break;
  1102. ret = newret;
  1103. yn *= my2;
  1104. }
  1105. ret = real::R_PI_3() + ret;
  1106. }
  1107. else
  1108. {
  1109. real y = re(absx);
  1110. real yn = y, my2 = -y * y;
  1111. ret = y;
  1112. for (int i = 3; ; i += 2)
  1113. {
  1114. yn *= my2;
  1115. real newret = ret + yn / (real)i;
  1116. if (newret == ret)
  1117. break;
  1118. ret = newret;
  1119. }
  1120. ret = real::R_PI_2() - ret;
  1121. }
  1122. /* Propagate sign */
  1123. ret.m_signexp |= (x.m_signexp & 0x80000000u);
  1124. return ret;
  1125. }
  1126. template<> real atan2(real const &y, real const &x)
  1127. {
  1128. if (!y)
  1129. {
  1130. if ((x.m_signexp >> 31) == 0)
  1131. return y;
  1132. if (y.m_signexp >> 31)
  1133. return -real::R_PI();
  1134. return real::R_PI();
  1135. }
  1136. if (!x)
  1137. {
  1138. if (y.m_signexp >> 31)
  1139. return -real::R_PI();
  1140. return real::R_PI();
  1141. }
  1142. /* FIXME: handle the Inf and NaN cases */
  1143. real z = y / x;
  1144. real ret = atan(z);
  1145. if (x < real::R_0())
  1146. ret += (y > real::R_0()) ? real::R_PI() : -real::R_PI();
  1147. return ret;
  1148. }
  1149. template<> void real::hexprint() const
  1150. {
  1151. std::printf("%08x", m_signexp);
  1152. for (int i = 0; i < BIGITS; i++)
  1153. std::printf(" %08x", m_mantissa[i]);
  1154. std::printf("\n");
  1155. }
  1156. template<> void real::sprintf(char *str, int ndigits) const;
  1157. template<> void real::print(int ndigits) const
  1158. {
  1159. char *buf = new char[ndigits + 32 + 10];
  1160. real::sprintf(buf, ndigits);
  1161. std::printf("%s\n", buf);
  1162. delete[] buf;
  1163. }
  1164. template<> void real::sprintf(char *str, int ndigits) const
  1165. {
  1166. real x = *this;
  1167. if (x.m_signexp >> 31)
  1168. {
  1169. *str++ = '-';
  1170. x = -x;
  1171. }
  1172. if (!x)
  1173. {
  1174. std::strcpy(str, "0.0\n");
  1175. return;
  1176. }
  1177. /* Normalise x so that mantissa is in [1..9.999] */
  1178. /* FIXME: better use int64_t when the cast is implemented */
  1179. int exponent = ceil(log10(x));
  1180. x /= pow(R_10(), (real)exponent);
  1181. if (x < R_1())
  1182. {
  1183. x *= R_10();
  1184. exponent--;
  1185. }
  1186. /* Print digits */
  1187. for (int i = 0; i < ndigits; i++)
  1188. {
  1189. int digit = (int)floor(x);
  1190. *str++ = '0' + digit;
  1191. if (i == 0)
  1192. *str++ = '.';
  1193. x -= real(digit);
  1194. x *= R_10();
  1195. }
  1196. /* Print exponent information */
  1197. if (exponent)
  1198. str += std::sprintf(str, "e%c%i", exponent > 0 ? '+' : '-', -exponent);
  1199. *str++ = '\0';
  1200. }
  1201. static real fast_pi()
  1202. {
  1203. /* Approximate Pi using Machin's formula: 16*atan(1/5)-4*atan(1/239) */
  1204. real ret = 0, x0 = 5, x1 = 239;
  1205. real const m0 = -x0 * x0, m1 = -x1 * x1, r16 = 16, r4 = 4;
  1206. for (int i = 1; ; i += 2)
  1207. {
  1208. real newret = ret + r16 / (x0 * (real)i) - r4 / (x1 * (real)i);
  1209. if (newret == ret)
  1210. break;
  1211. ret = newret;
  1212. x0 *= m0;
  1213. x1 *= m1;
  1214. }
  1215. return ret;
  1216. }
  1217. } /* namespace lol */