Du kannst nicht mehr als 25 Themen auswählen Themen müssen entweder mit einem Buchstaben oder einer Ziffer beginnen. Sie können Bindestriche („-“) enthalten und bis zu 35 Zeichen lang sein.
 
 
 

1413 Zeilen
35 KiB

  1. //
  2. // Lol Engine
  3. //
  4. // Copyright: (c) 2010-2012 Sam Hocevar <sam@hocevar.net>
  5. // This program is free software; you can redistribute it and/or
  6. // modify it under the terms of the Do What The Fuck You Want To
  7. // Public License, Version 2, as published by Sam Hocevar. See
  8. // http://sam.zoy.org/projects/COPYING.WTFPL for more details.
  9. //
  10. #if defined HAVE_CONFIG_H
  11. # include "config.h"
  12. #endif
  13. #if defined _XBOX
  14. # define _USE_MATH_DEFINES /* for M_PI */
  15. # include <xtl.h>
  16. # undef near /* Fuck Microsoft */
  17. # undef far /* Fuck Microsoft again */
  18. #elif defined WIN32
  19. # define _USE_MATH_DEFINES /* for M_PI */
  20. # define WIN32_LEAN_AND_MEAN
  21. # include <windows.h>
  22. # undef near /* Fuck Microsoft */
  23. # undef far /* Fuck Microsoft again */
  24. #endif
  25. #include <new>
  26. #include <cstring>
  27. #include <cstdio>
  28. #include <cstdlib>
  29. #include "core.h"
  30. using namespace std;
  31. namespace lol
  32. {
  33. template<> real::Real()
  34. {
  35. m_mantissa = new uint32_t[BIGITS];
  36. memset(m_mantissa, 0, BIGITS * sizeof(uint32_t));
  37. m_signexp = 0;
  38. }
  39. template<> real::Real(real const &x)
  40. {
  41. m_mantissa = new uint32_t[BIGITS];
  42. memcpy(m_mantissa, x.m_mantissa, BIGITS * sizeof(uint32_t));
  43. m_signexp = x.m_signexp;
  44. }
  45. template<> real const &real::operator =(real const &x)
  46. {
  47. if (&x != this)
  48. {
  49. memcpy(m_mantissa, x.m_mantissa, BIGITS * sizeof(uint32_t));
  50. m_signexp = x.m_signexp;
  51. }
  52. return *this;
  53. }
  54. template<> real::~Real()
  55. {
  56. delete[] m_mantissa;
  57. }
  58. template<> real::Real(float f) { new(this) real((double)f); }
  59. template<> real::Real(int i) { new(this) real((double)i); }
  60. template<> real::Real(unsigned int i) { new(this) real((double)i); }
  61. template<> real::Real(double d)
  62. {
  63. new(this) real();
  64. union { double d; uint64_t x; } u = { d };
  65. uint32_t sign = (u.x >> 63) << 31;
  66. uint32_t exponent = (u.x << 1) >> 53;
  67. switch (exponent)
  68. {
  69. case 0x00:
  70. m_signexp = sign;
  71. break;
  72. case 0x7ff:
  73. m_signexp = sign | 0x7fffffffu;
  74. break;
  75. default:
  76. m_signexp = sign | (exponent + (1 << 30) - (1 << 10));
  77. break;
  78. }
  79. m_mantissa[0] = (uint32_t)(u.x >> 20);
  80. m_mantissa[1] = (uint32_t)(u.x << 12);
  81. memset(m_mantissa + 2, 0, (BIGITS - 2) * sizeof(m_mantissa[0]));
  82. }
  83. template<> real::operator float() const { return (float)(double)(*this); }
  84. template<> real::operator int() const { return (int)(double)(*this); }
  85. template<> real::operator unsigned() const { return (unsigned)(double)(*this); }
  86. template<> real::operator double() const
  87. {
  88. union { double d; uint64_t x; } u;
  89. /* Get sign */
  90. u.x = m_signexp >> 31;
  91. u.x <<= 11;
  92. /* Compute new exponent */
  93. uint32_t exponent = (m_signexp << 1) >> 1;
  94. int e = (int)exponent - (1 << 30) + (1 << 10);
  95. if (e < 0)
  96. u.x <<= 52;
  97. else if (e >= 0x7ff)
  98. {
  99. u.x |= 0x7ff;
  100. u.x <<= 52;
  101. }
  102. else
  103. {
  104. u.x |= e;
  105. /* Store mantissa if necessary */
  106. u.x <<= 32;
  107. u.x |= m_mantissa[0];
  108. u.x <<= 20;
  109. u.x |= m_mantissa[1] >> 12;
  110. /* Rounding */
  111. u.x += (m_mantissa[1] >> 11) & 1;
  112. }
  113. return u.d;
  114. }
  115. /*
  116. * Create a real number from an ASCII representation
  117. */
  118. template<> real::Real(char const *str)
  119. {
  120. real ret = 0;
  121. int exponent = 0;
  122. bool comma = false, nonzero = false, negative = false, finished = false;
  123. for (char const *p = str; *p && !finished; p++)
  124. {
  125. switch (*p)
  126. {
  127. case '-':
  128. case '+':
  129. if (p != str)
  130. break;
  131. negative = (*p == '-');
  132. break;
  133. case '.':
  134. if (comma)
  135. finished = true;
  136. comma = true;
  137. break;
  138. case '0': case '1': case '2': case '3': case '4':
  139. case '5': case '6': case '7': case '8': case '9':
  140. if (nonzero)
  141. {
  142. real x = ret + ret;
  143. x = x + x + ret;
  144. ret = x + x;
  145. }
  146. if (*p != '0')
  147. {
  148. ret += (int)(*p - '0');
  149. nonzero = true;
  150. }
  151. if (comma)
  152. exponent--;
  153. break;
  154. case 'e':
  155. case 'E':
  156. exponent += atoi(p + 1);
  157. finished = true;
  158. break;
  159. default:
  160. finished = true;
  161. break;
  162. }
  163. }
  164. if (exponent)
  165. ret *= pow(R_10, (real)exponent);
  166. if (negative)
  167. ret = -ret;
  168. new(this) real(ret);
  169. }
  170. template<> real real::operator +() const
  171. {
  172. return *this;
  173. }
  174. template<> real real::operator -() const
  175. {
  176. real ret = *this;
  177. ret.m_signexp ^= 0x80000000u;
  178. return ret;
  179. }
  180. template<> real real::operator +(real const &x) const
  181. {
  182. if (x.m_signexp << 1 == 0)
  183. return *this;
  184. /* Ensure both arguments are positive. Otherwise, switch signs,
  185. * or replace + with -. */
  186. if (m_signexp >> 31)
  187. return -(-*this + -x);
  188. if (x.m_signexp >> 31)
  189. return *this - (-x);
  190. /* Ensure *this has the larger exponent (no need for the mantissa to
  191. * be larger, as in subtraction). Otherwise, switch. */
  192. if ((m_signexp << 1) < (x.m_signexp << 1))
  193. return x + *this;
  194. real ret;
  195. int e1 = m_signexp - (1 << 30) + 1;
  196. int e2 = x.m_signexp - (1 << 30) + 1;
  197. int bigoff = (e1 - e2) / BIGIT_BITS;
  198. int off = e1 - e2 - bigoff * BIGIT_BITS;
  199. if (bigoff > BIGITS)
  200. return *this;
  201. ret.m_signexp = m_signexp;
  202. uint64_t carry = 0;
  203. for (int i = BIGITS; i--; )
  204. {
  205. carry += m_mantissa[i];
  206. if (i - bigoff >= 0)
  207. carry += x.m_mantissa[i - bigoff] >> off;
  208. if (off && i - bigoff > 0)
  209. carry += (x.m_mantissa[i - bigoff - 1] << (BIGIT_BITS - off)) & 0xffffffffu;
  210. else if (i - bigoff == 0)
  211. carry += (uint64_t)1 << (BIGIT_BITS - off);
  212. ret.m_mantissa[i] = (uint32_t)carry;
  213. carry >>= BIGIT_BITS;
  214. }
  215. /* Renormalise in case we overflowed the mantissa */
  216. if (carry)
  217. {
  218. carry--;
  219. for (int i = 0; i < BIGITS; i++)
  220. {
  221. uint32_t tmp = ret.m_mantissa[i];
  222. ret.m_mantissa[i] = ((uint32_t)carry << (BIGIT_BITS - 1))
  223. | (tmp >> 1);
  224. carry = tmp & 1u;
  225. }
  226. ret.m_signexp++;
  227. }
  228. return ret;
  229. }
  230. template<> real real::operator -(real const &x) const
  231. {
  232. if (x.m_signexp << 1 == 0)
  233. return *this;
  234. /* Ensure both arguments are positive. Otherwise, switch signs,
  235. * or replace - with +. */
  236. if (m_signexp >> 31)
  237. return -(-*this + x);
  238. if (x.m_signexp >> 31)
  239. return (*this) + (-x);
  240. /* Ensure *this is larger than x */
  241. if (*this < x)
  242. return -(x - *this);
  243. real ret;
  244. int e1 = m_signexp - (1 << 30) + 1;
  245. int e2 = x.m_signexp - (1 << 30) + 1;
  246. int bigoff = (e1 - e2) / BIGIT_BITS;
  247. int off = e1 - e2 - bigoff * BIGIT_BITS;
  248. if (bigoff > BIGITS)
  249. return *this;
  250. ret.m_signexp = m_signexp;
  251. /* int64_t instead of uint64_t to preserve sign through shifts */
  252. int64_t carry = 0;
  253. for (int i = 0; i < bigoff; i++)
  254. {
  255. carry -= x.m_mantissa[BIGITS - 1 - i];
  256. /* Emulates a signed shift */
  257. carry >>= BIGIT_BITS;
  258. carry |= carry << BIGIT_BITS;
  259. }
  260. if (bigoff < BIGITS)
  261. carry -= x.m_mantissa[BIGITS - 1 - bigoff] & (((int64_t)1 << off) - 1);
  262. carry /= (int64_t)1 << off;
  263. for (int i = BIGITS; i--; )
  264. {
  265. carry += m_mantissa[i];
  266. if (i - bigoff >= 0)
  267. carry -= x.m_mantissa[i - bigoff] >> off;
  268. if (off && i - bigoff > 0)
  269. carry -= (x.m_mantissa[i - bigoff - 1] << (BIGIT_BITS - off)) & 0xffffffffu;
  270. else if (i - bigoff == 0)
  271. carry -= (uint64_t)1 << (BIGIT_BITS - off);
  272. ret.m_mantissa[i] = (uint32_t)carry;
  273. carry >>= BIGIT_BITS;
  274. carry |= carry << BIGIT_BITS;
  275. }
  276. carry += 1;
  277. /* Renormalise if we underflowed the mantissa */
  278. if (carry == 0)
  279. {
  280. /* How much do we need to shift the mantissa? FIXME: this could
  281. * be computed above */
  282. off = 0;
  283. for (int i = 0; i < BIGITS; i++)
  284. {
  285. if (!ret.m_mantissa[i])
  286. {
  287. off += BIGIT_BITS;
  288. continue;
  289. }
  290. for (uint32_t tmp = ret.m_mantissa[i]; tmp < 0x80000000u; tmp <<= 1)
  291. off++;
  292. break;
  293. }
  294. if (off == BIGITS * BIGIT_BITS)
  295. ret.m_signexp &= 0x80000000u;
  296. else
  297. {
  298. off++; /* Shift one more to get rid of the leading one */
  299. ret.m_signexp -= off;
  300. bigoff = off / BIGIT_BITS;
  301. off -= bigoff * BIGIT_BITS;
  302. for (int i = 0; i < BIGITS; i++)
  303. {
  304. uint32_t tmp = 0;
  305. if (i + bigoff < BIGITS)
  306. tmp |= ret.m_mantissa[i + bigoff] << off;
  307. if (off && i + bigoff + 1 < BIGITS)
  308. tmp |= ret.m_mantissa[i + bigoff + 1] >> (BIGIT_BITS - off);
  309. ret.m_mantissa[i] = tmp;
  310. }
  311. }
  312. }
  313. return ret;
  314. }
  315. template<> real real::operator *(real const &x) const
  316. {
  317. real ret;
  318. if (m_signexp << 1 == 0 || x.m_signexp << 1 == 0)
  319. {
  320. ret = (m_signexp << 1 == 0) ? *this : x;
  321. ret.m_signexp ^= x.m_signexp & 0x80000000u;
  322. return ret;
  323. }
  324. ret.m_signexp = (m_signexp ^ x.m_signexp) & 0x80000000u;
  325. int e = (m_signexp & 0x7fffffffu) - (1 << 30) + 1
  326. + (x.m_signexp & 0x7fffffffu) - (1 << 30) + 1;
  327. /* Accumulate low order product; no need to store it, we just
  328. * want the carry value */
  329. uint64_t carry = 0, hicarry = 0, prev;
  330. for (int i = 0; i < BIGITS; i++)
  331. {
  332. for (int j = 0; j < i + 1; j++)
  333. {
  334. prev = carry;
  335. carry += (uint64_t)m_mantissa[BIGITS - 1 - j]
  336. * (uint64_t)x.m_mantissa[BIGITS - 1 + j - i];
  337. if (carry < prev)
  338. hicarry++;
  339. }
  340. carry >>= BIGIT_BITS;
  341. carry |= hicarry << BIGIT_BITS;
  342. hicarry >>= BIGIT_BITS;
  343. }
  344. for (int i = 0; i < BIGITS; i++)
  345. {
  346. for (int j = i + 1; j < BIGITS; j++)
  347. {
  348. prev = carry;
  349. carry += (uint64_t)m_mantissa[BIGITS - 1 - j]
  350. * (uint64_t)x.m_mantissa[j - 1 - i];
  351. if (carry < prev)
  352. hicarry++;
  353. }
  354. prev = carry;
  355. carry += m_mantissa[BIGITS - 1 - i];
  356. carry += x.m_mantissa[BIGITS - 1 - i];
  357. if (carry < prev)
  358. hicarry++;
  359. ret.m_mantissa[BIGITS - 1 - i] = carry & 0xffffffffu;
  360. carry >>= BIGIT_BITS;
  361. carry |= hicarry << BIGIT_BITS;
  362. hicarry >>= BIGIT_BITS;
  363. }
  364. /* Renormalise in case we overflowed the mantissa */
  365. if (carry)
  366. {
  367. carry--;
  368. for (int i = 0; i < BIGITS; i++)
  369. {
  370. uint32_t tmp = (uint32_t)ret.m_mantissa[i];
  371. ret.m_mantissa[i] = ((uint32_t)carry << (BIGIT_BITS - 1))
  372. | (tmp >> 1);
  373. carry = tmp & 1u;
  374. }
  375. e++;
  376. }
  377. ret.m_signexp |= e + (1 << 30) - 1;
  378. return ret;
  379. }
  380. template<> real real::operator /(real const &x) const
  381. {
  382. return *this * re(x);
  383. }
  384. template<> real const &real::operator +=(real const &x)
  385. {
  386. real tmp = *this;
  387. return *this = tmp + x;
  388. }
  389. template<> real const &real::operator -=(real const &x)
  390. {
  391. real tmp = *this;
  392. return *this = tmp - x;
  393. }
  394. template<> real const &real::operator *=(real const &x)
  395. {
  396. real tmp = *this;
  397. return *this = tmp * x;
  398. }
  399. template<> real const &real::operator /=(real const &x)
  400. {
  401. real tmp = *this;
  402. return *this = tmp / x;
  403. }
  404. template<> bool real::operator ==(real const &x) const
  405. {
  406. if ((m_signexp << 1) == 0 && (x.m_signexp << 1) == 0)
  407. return true;
  408. if (m_signexp != x.m_signexp)
  409. return false;
  410. return memcmp(m_mantissa, x.m_mantissa, BIGITS * sizeof(uint32_t)) == 0;
  411. }
  412. template<> bool real::operator !=(real const &x) const
  413. {
  414. return !(*this == x);
  415. }
  416. template<> bool real::operator <(real const &x) const
  417. {
  418. /* Ensure both numbers are positive */
  419. if (m_signexp >> 31)
  420. return (x.m_signexp >> 31) ? -*this > -x : true;
  421. if (x.m_signexp >> 31)
  422. return false;
  423. /* Compare all relevant bits */
  424. if (m_signexp != x.m_signexp)
  425. return m_signexp < x.m_signexp;
  426. for (int i = 0; i < BIGITS; i++)
  427. if (m_mantissa[i] != x.m_mantissa[i])
  428. return m_mantissa[i] < x.m_mantissa[i];
  429. return false;
  430. }
  431. template<> bool real::operator <=(real const &x) const
  432. {
  433. return !(*this > x);
  434. }
  435. template<> bool real::operator >(real const &x) const
  436. {
  437. /* Ensure both numbers are positive */
  438. if (m_signexp >> 31)
  439. return (x.m_signexp >> 31) ? -*this < -x : false;
  440. if (x.m_signexp >> 31)
  441. return true;
  442. /* Compare all relevant bits */
  443. if (m_signexp != x.m_signexp)
  444. return m_signexp > x.m_signexp;
  445. for (int i = 0; i < BIGITS; i++)
  446. if (m_mantissa[i] != x.m_mantissa[i])
  447. return m_mantissa[i] > x.m_mantissa[i];
  448. return false;
  449. }
  450. template<> bool real::operator >=(real const &x) const
  451. {
  452. return !(*this < x);
  453. }
  454. template<> bool real::operator !() const
  455. {
  456. return !(bool)*this;
  457. }
  458. template<> real::operator bool() const
  459. {
  460. /* A real is "true" if it is non-zero (exponent is non-zero) AND
  461. * not NaN (exponent is not full bits OR higher order mantissa is zero) */
  462. uint32_t exponent = m_signexp << 1;
  463. return exponent && (~exponent || m_mantissa[0] == 0);
  464. }
  465. template<> real min(real const &a, real const &b)
  466. {
  467. return (a < b) ? a : b;
  468. }
  469. template<> real max(real const &a, real const &b)
  470. {
  471. return (a > b) ? a : b;
  472. }
  473. template<> real clamp(real const &x, real const &a, real const &b)
  474. {
  475. return (x < a) ? a : (x > b) ? b : x;
  476. }
  477. template<> real re(real const &x)
  478. {
  479. if (!(x.m_signexp << 1))
  480. {
  481. real ret = x;
  482. ret.m_signexp = x.m_signexp | 0x7fffffffu;
  483. ret.m_mantissa[0] = 0;
  484. return ret;
  485. }
  486. /* Use the system's float inversion to approximate 1/x */
  487. union { float f; uint32_t x; } u = { 1.0f }, v = { 1.0f };
  488. v.x |= x.m_mantissa[0] >> 9;
  489. v.f = 1.0f / v.f;
  490. real ret;
  491. ret.m_mantissa[0] = v.x << 9;
  492. uint32_t sign = x.m_signexp & 0x80000000u;
  493. ret.m_signexp = sign;
  494. int exponent = (x.m_signexp & 0x7fffffffu) + 1;
  495. exponent = -exponent + (v.x >> 23) - (u.x >> 23);
  496. ret.m_signexp |= (exponent - 1) & 0x7fffffffu;
  497. /* FIXME: 1+log2(BIGITS) steps of Newton-Raphson seems to be enough for
  498. * convergence, but this hasn't been checked seriously. */
  499. for (int i = 1; i <= real::BIGITS; i *= 2)
  500. ret = ret * (real::R_2 - ret * x);
  501. return ret;
  502. }
  503. template<> real sqrt(real const &x)
  504. {
  505. /* if zero, return x */
  506. if (!(x.m_signexp << 1))
  507. return x;
  508. /* if negative, return NaN */
  509. if (x.m_signexp >> 31)
  510. {
  511. real ret;
  512. ret.m_signexp = 0x7fffffffu;
  513. ret.m_mantissa[0] = 0xffffu;
  514. return ret;
  515. }
  516. /* Use the system's float inversion to approximate 1/sqrt(x). First
  517. * we construct a float in the [1..4[ range that has roughly the same
  518. * mantissa as our real. Its exponent is 0 or 1, depending on the
  519. * partity of x. The final exponent is 0, -1 or -2. We use the final
  520. * exponent and final mantissa to pre-fill the result. */
  521. union { float f; uint32_t x; } u = { 1.0f }, v = { 2.0f };
  522. v.x -= ((x.m_signexp & 1) << 23);
  523. v.x |= x.m_mantissa[0] >> 9;
  524. v.f = 1.0f / sqrtf(v.f);
  525. real ret;
  526. ret.m_mantissa[0] = v.x << 9;
  527. uint32_t sign = x.m_signexp & 0x80000000u;
  528. ret.m_signexp = sign;
  529. uint32_t exponent = (x.m_signexp & 0x7fffffffu);
  530. exponent = ((1 << 30) + (1 << 29) - 1) - (exponent + 1) / 2;
  531. exponent = exponent + (v.x >> 23) - (u.x >> 23);
  532. ret.m_signexp |= exponent & 0x7fffffffu;
  533. /* FIXME: 1+log2(BIGITS) steps of Newton-Raphson seems to be enough for
  534. * convergence, but this hasn't been checked seriously. */
  535. for (int i = 1; i <= real::BIGITS; i *= 2)
  536. {
  537. ret = ret * (real::R_3 - ret * ret * x);
  538. ret.m_signexp--;
  539. }
  540. return ret * x;
  541. }
  542. template<> real cbrt(real const &x)
  543. {
  544. /* if zero, return x */
  545. if (!(x.m_signexp << 1))
  546. return x;
  547. /* Use the system's float inversion to approximate cbrt(x). First
  548. * we construct a float in the [1..8[ range that has roughly the same
  549. * mantissa as our real. Its exponent is 0, 1 or 2, depending on the
  550. * value of x. The final exponent is 0 or 1 (special case). We use
  551. * the final exponent and final mantissa to pre-fill the result. */
  552. union { float f; uint32_t x; } u = { 1.0f }, v = { 1.0f };
  553. v.x += ((x.m_signexp % 3) << 23);
  554. v.x |= x.m_mantissa[0] >> 9;
  555. v.f = powf(v.f, 0.33333333333333333f);
  556. real ret;
  557. ret.m_mantissa[0] = v.x << 9;
  558. uint32_t sign = x.m_signexp & 0x80000000u;
  559. ret.m_signexp = sign;
  560. int exponent = (x.m_signexp & 0x7fffffffu) - (1 << 30) + 1;
  561. exponent = exponent / 3 + (v.x >> 23) - (u.x >> 23);
  562. ret.m_signexp |= (exponent + (1 << 30) - 1) & 0x7fffffffu;
  563. /* FIXME: 1+log2(BIGITS) steps of Newton-Raphson seems to be enough for
  564. * convergence, but this hasn't been checked seriously. */
  565. for (int i = 1; i <= real::BIGITS; i *= 2)
  566. {
  567. static real third = re(real::R_3);
  568. ret = third * (x / (ret * ret) + (ret / 2));
  569. }
  570. return ret;
  571. }
  572. template<> real pow(real const &x, real const &y)
  573. {
  574. if (!y)
  575. return real::R_1;
  576. if (!x)
  577. return real::R_0;
  578. if (x > real::R_0)
  579. return exp(y * log(x));
  580. else /* x < 0 */
  581. {
  582. /* Odd integer exponent */
  583. if (y == (round(y / 2) * 2))
  584. return exp(y * log(-x));
  585. /* Even integer exponent */
  586. if (y == round(y))
  587. return -exp(y * log(-x));
  588. /* FIXME: negative nth root */
  589. return real::R_0;
  590. }
  591. }
  592. static real fast_fact(int x)
  593. {
  594. real ret = real::R_1;
  595. int i = 1, multiplier = 1, exponent = 0;
  596. for (;;)
  597. {
  598. if (i++ >= x)
  599. /* Multiplication is a no-op if multiplier == 1 */
  600. return ldexp(ret * multiplier, exponent);
  601. int tmp = i;
  602. while ((tmp & 1) == 0)
  603. {
  604. tmp >>= 1;
  605. exponent++;
  606. }
  607. if (multiplier * tmp / tmp != multiplier)
  608. {
  609. ret *= multiplier;
  610. multiplier = 1;
  611. }
  612. multiplier *= tmp;
  613. }
  614. }
  615. template<> real gamma(real const &x)
  616. {
  617. /* We use Spouge's formula. FIXME: precision is far from acceptable,
  618. * especially with large values. We need to compute this with higher
  619. * precision values in order to attain the desired accuracy. It might
  620. * also be useful to sort the ck values by decreasing absolute value
  621. * and do the addition in this order. */
  622. int a = ceilf(logf(2) / logf(2 * M_PI) * real::BIGITS * real::BIGIT_BITS);
  623. real ret = sqrt(real::R_PI * 2);
  624. real fact_k_1 = real::R_1;
  625. for (int k = 1; k < a; k++)
  626. {
  627. real a_k = (real)(a - k);
  628. real ck = pow(a_k, (real)((float)k - 0.5)) * exp(a_k)
  629. / (fact_k_1 * (x + (real)(k - 1)));
  630. ret += ck;
  631. fact_k_1 *= (real)-k;
  632. }
  633. ret *= pow(x + (real)(a - 1), x - (real::R_1 / 2));
  634. ret *= exp(-x - (real)(a - 1));
  635. return ret;
  636. }
  637. template<> real fabs(real const &x)
  638. {
  639. real ret = x;
  640. ret.m_signexp &= 0x7fffffffu;
  641. return ret;
  642. }
  643. static real fast_log(real const &x)
  644. {
  645. /* This fast log method is tuned to work on the [1..2] range and
  646. * no effort whatsoever was made to improve convergence outside this
  647. * domain of validity. It can converge pretty fast, provided we use
  648. * the following variable substitutions:
  649. * y = sqrt(x)
  650. * z = (y - 1) / (y + 1)
  651. *
  652. * And the following identities:
  653. * ln(x) = 2 ln(y)
  654. * = 2 ln((1 + z) / (1 - z))
  655. * = 4 z (1 + z^2 / 3 + z^4 / 5 + z^6 / 7...)
  656. *
  657. * Any additional sqrt() call would halve the convergence time, but
  658. * would also impact the final precision. For now we stick with one
  659. * sqrt() call. */
  660. real y = sqrt(x);
  661. real z = (y - real::R_1) / (y + real::R_1), z2 = z * z, zn = z2;
  662. real sum = real::R_1;
  663. for (int i = 3; ; i += 2)
  664. {
  665. real newsum = sum + zn / (real)i;
  666. if (newsum == sum)
  667. break;
  668. sum = newsum;
  669. zn *= z2;
  670. }
  671. return z * sum * 4;
  672. }
  673. template<> real log(real const &x)
  674. {
  675. /* Strategy for log(x): if x = 2^E*M then log(x) = E log(2) + log(M),
  676. * with the property that M is in [1..2[, so fast_log() applies here. */
  677. real tmp = x;
  678. if (x.m_signexp >> 31 || x.m_signexp == 0)
  679. {
  680. tmp.m_signexp = 0xffffffffu;
  681. tmp.m_mantissa[0] = 0xffffffffu;
  682. return tmp;
  683. }
  684. tmp.m_signexp = (1 << 30) - 1;
  685. return (real)(int)(x.m_signexp - (1 << 30) + 1) * real::R_LN2
  686. + fast_log(tmp);
  687. }
  688. template<> real log2(real const &x)
  689. {
  690. /* Strategy for log2(x): see log(x). */
  691. real tmp = x;
  692. if (x.m_signexp >> 31 || x.m_signexp == 0)
  693. {
  694. tmp.m_signexp = 0xffffffffu;
  695. tmp.m_mantissa[0] = 0xffffffffu;
  696. return tmp;
  697. }
  698. tmp.m_signexp = (1 << 30) - 1;
  699. return (real)(int)(x.m_signexp - (1 << 30) + 1)
  700. + fast_log(tmp) * real::R_LOG2E;
  701. }
  702. template<> real log10(real const &x)
  703. {
  704. return log(x) * real::R_LOG10E;
  705. }
  706. static real fast_exp_sub(real const &x, real const &y)
  707. {
  708. /* This fast exp method is tuned to work on the [-1..1] range and
  709. * no effort whatsoever was made to improve convergence outside this
  710. * domain of validity. The argument y is used for cases where we
  711. * don't want the leading 1 in the Taylor series. */
  712. real ret = real::R_1 - y, xn = x;
  713. int i = 1;
  714. for (;;)
  715. {
  716. real newret = ret + xn;
  717. if (newret == ret)
  718. break;
  719. ret = newret * ++i;
  720. xn *= x;
  721. }
  722. return ret / fast_fact(i);
  723. }
  724. template<> real exp(real const &x)
  725. {
  726. /* Strategy for exp(x): the Taylor series does not converge very fast
  727. * with large positive or negative values.
  728. *
  729. * However, we know that the result is going to be in the form M*2^E,
  730. * where M is the mantissa and E the exponent. We first try to predict
  731. * a value for E, which is approximately log2(exp(x)) = x / log(2).
  732. *
  733. * Let E0 be an integer close to x / log(2). We need to find a value x0
  734. * such that exp(x) = 2^E0 * exp(x0). We get x0 = x - E0 log(2).
  735. *
  736. * Thus the final algorithm:
  737. * int E0 = x / log(2)
  738. * real x0 = x - E0 log(2)
  739. * real x1 = exp(x0)
  740. * return x1 * 2^E0
  741. */
  742. int e0 = x / real::R_LN2;
  743. real x0 = x - (real)e0 * real::R_LN2;
  744. real x1 = fast_exp_sub(x0, real::R_0);
  745. x1.m_signexp += e0;
  746. return x1;
  747. }
  748. template<> real exp2(real const &x)
  749. {
  750. /* Strategy for exp2(x): see strategy in exp(). */
  751. int e0 = x;
  752. real x0 = x - (real)e0;
  753. real x1 = fast_exp_sub(x0 * real::R_LN2, real::R_0);
  754. x1.m_signexp += e0;
  755. return x1;
  756. }
  757. template<> real sinh(real const &x)
  758. {
  759. /* We cannot always use (exp(x)-exp(-x))/2 because we'll lose
  760. * accuracy near zero. We only use this identity for |x|>0.5. If
  761. * |x|<=0.5, we compute exp(x)-1 and exp(-x)-1 instead. */
  762. bool near_zero = (fabs(x) < real::R_1 / 2);
  763. real x1 = near_zero ? fast_exp_sub(x, real::R_1) : exp(x);
  764. real x2 = near_zero ? fast_exp_sub(-x, real::R_1) : exp(-x);
  765. return (x1 - x2) / 2;
  766. }
  767. template<> real tanh(real const &x)
  768. {
  769. /* See sinh() for the strategy here */
  770. bool near_zero = (fabs(x) < real::R_1 / 2);
  771. real x1 = near_zero ? fast_exp_sub(x, real::R_1) : exp(x);
  772. real x2 = near_zero ? fast_exp_sub(-x, real::R_1) : exp(-x);
  773. real x3 = near_zero ? x1 + x2 + real::R_2 : x1 + x2;
  774. return (x1 - x2) / x3;
  775. }
  776. template<> real cosh(real const &x)
  777. {
  778. /* No need to worry about accuracy here; maybe the last bit is slightly
  779. * off, but that's about it. */
  780. return (exp(x) + exp(-x)) / 2;
  781. }
  782. template<> real frexp(real const &x, int *exp)
  783. {
  784. if (!x)
  785. {
  786. *exp = 0;
  787. return x;
  788. }
  789. real ret = x;
  790. int exponent = (ret.m_signexp & 0x7fffffffu) - (1 << 30) + 1;
  791. *exp = exponent + 1;
  792. ret.m_signexp -= exponent + 1;
  793. return ret;
  794. }
  795. template<> real ldexp(real const &x, int exp)
  796. {
  797. real ret = x;
  798. if (ret)
  799. ret.m_signexp += exp;
  800. return ret;
  801. }
  802. template<> real modf(real const &x, real *iptr)
  803. {
  804. real absx = fabs(x);
  805. real tmp = floor(absx);
  806. *iptr = copysign(tmp, x);
  807. return copysign(absx - tmp, x);
  808. }
  809. template<> real ulp(real const &x)
  810. {
  811. real ret = real::R_1;
  812. if (x)
  813. ret.m_signexp = x.m_signexp + 1 - real::BIGITS * real::BIGIT_BITS;
  814. else
  815. ret.m_signexp = 0;
  816. return ret;
  817. }
  818. template<> real nextafter(real const &x, real const &y)
  819. {
  820. if (x == y)
  821. return x;
  822. else if (x < y)
  823. return x + ulp(x);
  824. else
  825. return x - ulp(x);
  826. }
  827. template<> real copysign(real const &x, real const &y)
  828. {
  829. real ret = x;
  830. ret.m_signexp &= 0x7fffffffu;
  831. ret.m_signexp |= y.m_signexp & 0x80000000u;
  832. return ret;
  833. }
  834. template<> real floor(real const &x)
  835. {
  836. /* Strategy for floor(x):
  837. * - if negative, return -ceil(-x)
  838. * - if zero or negative zero, return x
  839. * - if less than one, return zero
  840. * - otherwise, if e is the exponent, clear all bits except the
  841. * first e. */
  842. if (x < -real::R_0)
  843. return -ceil(-x);
  844. if (!x)
  845. return x;
  846. if (x < real::R_1)
  847. return real::R_0;
  848. real ret = x;
  849. int exponent = x.m_signexp - (1 << 30) + 1;
  850. for (int i = 0; i < real::BIGITS; i++)
  851. {
  852. if (exponent <= 0)
  853. ret.m_mantissa[i] = 0;
  854. else if (exponent < real::BIGIT_BITS)
  855. ret.m_mantissa[i] &= ~((1 << (real::BIGIT_BITS - exponent)) - 1);
  856. exponent -= real::BIGIT_BITS;
  857. }
  858. return ret;
  859. }
  860. template<> real ceil(real const &x)
  861. {
  862. /* Strategy for ceil(x):
  863. * - if negative, return -floor(-x)
  864. * - if x == floor(x), return x
  865. * - otherwise, return floor(x) + 1 */
  866. if (x < -real::R_0)
  867. return -floor(-x);
  868. real ret = floor(x);
  869. if (x == ret)
  870. return ret;
  871. else
  872. return ret + real::R_1;
  873. }
  874. template<> real round(real const &x)
  875. {
  876. if (x < real::R_0)
  877. return -round(-x);
  878. return floor(x + (real::R_1 / 2));
  879. }
  880. template<> real fmod(real const &x, real const &y)
  881. {
  882. if (!y)
  883. return real::R_0; /* FIXME: return NaN */
  884. if (!x)
  885. return x;
  886. real tmp = round(x / y);
  887. return x - tmp * y;
  888. }
  889. template<> real sin(real const &x)
  890. {
  891. int switch_sign = x.m_signexp & 0x80000000u;
  892. real absx = fmod(fabs(x), real::R_PI * 2);
  893. if (absx > real::R_PI)
  894. {
  895. absx -= real::R_PI;
  896. switch_sign = !switch_sign;
  897. }
  898. if (absx > real::R_PI_2)
  899. absx = real::R_PI - absx;
  900. real ret = real::R_0, fact = real::R_1, xn = absx, mx2 = -absx * absx;
  901. int i = 1;
  902. for (;;)
  903. {
  904. real newret = ret + xn;
  905. if (newret == ret)
  906. break;
  907. ret = newret * ((i + 1) * (i + 2));
  908. xn *= mx2;
  909. i += 2;
  910. }
  911. ret /= fast_fact(i);
  912. /* Propagate sign */
  913. if (switch_sign)
  914. ret.m_signexp ^= 0x80000000u;
  915. return ret;
  916. }
  917. template<> real cos(real const &x)
  918. {
  919. return sin(real::R_PI_2 - x);
  920. }
  921. template<> real tan(real const &x)
  922. {
  923. /* Constrain input to [-π,π] */
  924. real y = fmod(x, real::R_PI);
  925. /* Constrain input to [-π/2,π/2] */
  926. if (y < -real::R_PI_2)
  927. y += real::R_PI;
  928. else if (y > real::R_PI_2)
  929. y -= real::R_PI;
  930. /* In [-π/4,π/4] return sin/cos */
  931. if (fabs(y) <= real::R_PI_4)
  932. return sin(y) / cos(y);
  933. /* Otherwise, return cos/sin */
  934. if (y > real::R_0)
  935. y = real::R_PI_2 - y;
  936. else
  937. y = -real::R_PI_2 - y;
  938. return cos(y) / sin(y);
  939. }
  940. static inline real asinacos(real const &x, int is_asin, int is_negative)
  941. {
  942. /* Strategy for asin(): in [-0.5..0.5], use a Taylor series around
  943. * zero. In [0.5..1], use asin(x) = π/2 - 2*asin(sqrt((1-x)/2)), and
  944. * in [-1..-0.5] just revert the sign.
  945. * Strategy for acos(): use acos(x) = π/2 - asin(x) and try not to
  946. * lose the precision around x=1. */
  947. real absx = fabs(x);
  948. int around_zero = (absx < (real::R_1 / 2));
  949. if (!around_zero)
  950. absx = sqrt((real::R_1 - absx) / 2);
  951. real ret = absx, xn = absx, x2 = absx * absx, fact1 = 2, fact2 = 1;
  952. for (int i = 1; ; i++)
  953. {
  954. xn *= x2;
  955. real mul = (real)(2 * i + 1);
  956. real newret = ret + ldexp(fact1 * xn / (mul * fact2), -2 * i);
  957. if (newret == ret)
  958. break;
  959. ret = newret;
  960. fact1 *= (real)((2 * i + 1) * (2 * i + 2));
  961. fact2 *= (real)((i + 1) * (i + 1));
  962. }
  963. if (is_negative)
  964. ret = -ret;
  965. if (around_zero)
  966. ret = is_asin ? ret : real::R_PI_2 - ret;
  967. else
  968. {
  969. real adjust = is_negative ? real::R_PI : real::R_0;
  970. if (is_asin)
  971. ret = real::R_PI_2 - adjust - ret * 2;
  972. else
  973. ret = adjust + ret * 2;
  974. }
  975. return ret;
  976. }
  977. template<> real asin(real const &x)
  978. {
  979. return asinacos(x, 1, x.m_signexp >> 31);
  980. }
  981. template<> real acos(real const &x)
  982. {
  983. return asinacos(x, 0, x.m_signexp >> 31);
  984. }
  985. template<> real atan(real const &x)
  986. {
  987. /* Computing atan(x): we choose a different Taylor series depending on
  988. * the value of x to help with convergence.
  989. *
  990. * If |x| < 0.5 we evaluate atan(y) near 0:
  991. * atan(y) = y - y^3/3 + y^5/5 - y^7/7 + y^9/9 ...
  992. *
  993. * If 0.5 <= |x| < 1.5 we evaluate atan(1+y) near 0:
  994. * atan(1+y) = π/4 + y/(1*2^1) - y^2/(2*2^1) + y^3/(3*2^2)
  995. * - y^5/(5*2^3) + y^6/(6*2^3) - y^7/(7*2^4)
  996. * + y^9/(9*2^5) - y^10/(10*2^5) + y^11/(11*2^6) ...
  997. *
  998. * If 1.5 <= |x| < 2 we evaluate atan(sqrt(3)+2y) near 0:
  999. * atan(sqrt(3)+2y) = π/3 + 1/2 y - sqrt(3)/2 y^2/2
  1000. * + y^3/3 - sqrt(3)/2 y^4/4 + 1/2 y^5/5
  1001. * - 1/2 y^7/7 + sqrt(3)/2 y^8/8
  1002. * - y^9/9 + sqrt(3)/2 y^10/10 - 1/2 y^11/11
  1003. * + 1/2 y^13/13 - sqrt(3)/2 y^14/14
  1004. * + y^15/15 - sqrt(3)/2 y^16/16 + 1/2 y^17/17 ...
  1005. *
  1006. * If |x| >= 2 we evaluate atan(y) near +∞:
  1007. * atan(y) = π/2 - y^-1 + y^-3/3 - y^-5/5 + y^-7/7 - y^-9/9 ...
  1008. */
  1009. real absx = fabs(x);
  1010. if (absx < (real::R_1 / 2))
  1011. {
  1012. real ret = x, xn = x, mx2 = -x * x;
  1013. for (int i = 3; ; i += 2)
  1014. {
  1015. xn *= mx2;
  1016. real newret = ret + xn / (real)i;
  1017. if (newret == ret)
  1018. break;
  1019. ret = newret;
  1020. }
  1021. return ret;
  1022. }
  1023. real ret = 0;
  1024. if (absx < (real::R_3 / 2))
  1025. {
  1026. real y = real::R_1 - absx;
  1027. real yn = y, my2 = -y * y;
  1028. for (int i = 0; ; i += 2)
  1029. {
  1030. real newret = ret + ldexp(yn / (real)(2 * i + 1), -i - 1);
  1031. yn *= y;
  1032. newret += ldexp(yn / (real)(2 * i + 2), -i - 1);
  1033. yn *= y;
  1034. newret += ldexp(yn / (real)(2 * i + 3), -i - 2);
  1035. if (newret == ret)
  1036. break;
  1037. ret = newret;
  1038. yn *= my2;
  1039. }
  1040. ret = real::R_PI_4 - ret;
  1041. }
  1042. else if (absx < real::R_2)
  1043. {
  1044. real y = (absx - real::R_SQRT3) / 2;
  1045. real yn = y, my2 = -y * y;
  1046. for (int i = 1; ; i += 6)
  1047. {
  1048. real newret = ret + ((yn / (real)i) / 2);
  1049. yn *= y;
  1050. newret -= (real::R_SQRT3 / 2) * yn / (real)(i + 1);
  1051. yn *= y;
  1052. newret += yn / (real)(i + 2);
  1053. yn *= y;
  1054. newret -= (real::R_SQRT3 / 2) * yn / (real)(i + 3);
  1055. yn *= y;
  1056. newret += (yn / (real)(i + 4)) / 2;
  1057. if (newret == ret)
  1058. break;
  1059. ret = newret;
  1060. yn *= my2;
  1061. }
  1062. ret = real::R_PI_3 + ret;
  1063. }
  1064. else
  1065. {
  1066. real y = re(absx);
  1067. real yn = y, my2 = -y * y;
  1068. ret = y;
  1069. for (int i = 3; ; i += 2)
  1070. {
  1071. yn *= my2;
  1072. real newret = ret + yn / (real)i;
  1073. if (newret == ret)
  1074. break;
  1075. ret = newret;
  1076. }
  1077. ret = real::R_PI_2 - ret;
  1078. }
  1079. /* Propagate sign */
  1080. ret.m_signexp |= (x.m_signexp & 0x80000000u);
  1081. return ret;
  1082. }
  1083. template<> real atan2(real const &y, real const &x)
  1084. {
  1085. if (!y)
  1086. {
  1087. if ((x.m_signexp >> 31) == 0)
  1088. return y;
  1089. if (y.m_signexp >> 31)
  1090. return -real::R_PI;
  1091. return real::R_PI;
  1092. }
  1093. if (!x)
  1094. {
  1095. if (y.m_signexp >> 31)
  1096. return -real::R_PI;
  1097. return real::R_PI;
  1098. }
  1099. /* FIXME: handle the Inf and NaN cases */
  1100. real z = y / x;
  1101. real ret = atan(z);
  1102. if (x < real::R_0)
  1103. ret += (y > real::R_0) ? real::R_PI : -real::R_PI;
  1104. return ret;
  1105. }
  1106. template<> void real::hexprint() const
  1107. {
  1108. std::printf("%08x", m_signexp);
  1109. for (int i = 0; i < BIGITS; i++)
  1110. std::printf(" %08x", m_mantissa[i]);
  1111. std::printf("\n");
  1112. }
  1113. template<> void real::sprintf(char *str, int ndigits) const;
  1114. template<> void real::print(int ndigits) const
  1115. {
  1116. char *buf = new char[ndigits + 32 + 10];
  1117. real::sprintf(buf, ndigits);
  1118. std::printf("%s\n", buf);
  1119. delete[] buf;
  1120. }
  1121. template<> void real::sprintf(char *str, int ndigits) const
  1122. {
  1123. real x = *this;
  1124. if (x.m_signexp >> 31)
  1125. {
  1126. *str++ = '-';
  1127. x = -x;
  1128. }
  1129. if (!x)
  1130. {
  1131. std::strcpy(str, "0.0\n");
  1132. return;
  1133. }
  1134. /* Normalise x so that mantissa is in [1..9.999] */
  1135. /* FIXME: better use int64_t when the cast is implemented */
  1136. int exponent = ceil(log10(x));
  1137. x /= pow(R_10, (real)exponent);
  1138. if (x < R_1)
  1139. {
  1140. x *= R_10;
  1141. exponent--;
  1142. }
  1143. /* Print digits */
  1144. for (int i = 0; i < ndigits; i++)
  1145. {
  1146. int digit = (int)floor(x);
  1147. *str++ = '0' + digit;
  1148. if (i == 0)
  1149. *str++ = '.';
  1150. x -= real(digit);
  1151. x *= R_10;
  1152. }
  1153. /* Print exponent information */
  1154. if (exponent)
  1155. str += std::sprintf(str, "e%c%i", exponent > 0 ? '+' : '-', -exponent);
  1156. *str++ = '\0';
  1157. }
  1158. static real fast_pi()
  1159. {
  1160. /* Approximate Pi using Machin's formula: 16*atan(1/5)-4*atan(1/239) */
  1161. real ret = 0, x0 = 5, x1 = 239;
  1162. real const m0 = -x0 * x0, m1 = -x1 * x1, r16 = 16, r4 = 4;
  1163. for (int i = 1; ; i += 2)
  1164. {
  1165. real newret = ret + r16 / (x0 * (real)i) - r4 / (x1 * (real)i);
  1166. if (newret == ret)
  1167. break;
  1168. ret = newret;
  1169. x0 *= m0;
  1170. x1 *= m1;
  1171. }
  1172. return ret;
  1173. }
  1174. template<> real const real::R_0 = (real)0.0;
  1175. template<> real const real::R_1 = (real)1.0;
  1176. template<> real const real::R_2 = (real)2.0;
  1177. template<> real const real::R_3 = (real)3.0;
  1178. template<> real const real::R_10 = (real)10.0;
  1179. /*
  1180. * Initialisation order is important here:
  1181. * - fast_log() requires R_1
  1182. * - log() requires R_LN2
  1183. * - re() require R_2
  1184. * - exp() requires R_0, R_1, R_LN2
  1185. * - sqrt() requires R_3
  1186. */
  1187. template<> real const real::R_LN2 = fast_log(R_2);
  1188. template<> real const real::R_LN10 = log(R_10);
  1189. template<> real const real::R_LOG2E = re(R_LN2);
  1190. template<> real const real::R_LOG10E = re(R_LN10);
  1191. template<> real const real::R_E = exp(R_1);
  1192. template<> real const real::R_PI = fast_pi();
  1193. template<> real const real::R_PI_2 = R_PI / 2;
  1194. template<> real const real::R_PI_3 = R_PI / R_3;
  1195. template<> real const real::R_PI_4 = R_PI / 4;
  1196. template<> real const real::R_1_PI = re(R_PI);
  1197. template<> real const real::R_2_PI = R_1_PI * 2;
  1198. template<> real const real::R_2_SQRTPI = re(sqrt(R_PI)) * 2;
  1199. template<> real const real::R_SQRT2 = sqrt(R_2);
  1200. template<> real const real::R_SQRT3 = sqrt(R_3);
  1201. template<> real const real::R_SQRT1_2 = R_SQRT2 / 2;
  1202. } /* namespace lol */