1583 regels
41 KiB

  1. //
  2. // Lol Engine
  3. //
  4. // Copyright © 2010—2017 Sam Hocevar <sam@hocevar.net>
  5. //
  6. // Lol Engine is free software. It comes without any warranty, to
  7. // the extent permitted by applicable law. You can redistribute it
  8. // and/or modify it under the terms of the Do What the Fuck You Want
  9. // to Public License, Version 2, as published by the WTFPL Task Force.
  10. // See http://www.wtfpl.net/ for more details.
  11. //
  12. #include <lol/engine-internal.h>
  13. #include <new>
  14. #include <cstring>
  15. #include <cstdio>
  16. #include <cstdlib>
  17. namespace lol
  18. {
  19. /*
  20. * First handle explicit specialisation of our templates.
  21. *
  22. * Initialisation order is not important because everything is
  23. * done on demand, but here is the dependency list anyway:
  24. * - fast_log() requires R_1
  25. * - log() requires R_LN2
  26. * - inverse() require R_2
  27. * - exp() requires R_0, R_1, R_LN2
  28. * - sqrt() requires R_3
  29. */
  30. static real fast_log(real const &x);
  31. static real load_min();
  32. static real load_max();
  33. static real load_pi();
  34. #define LOL_CONSTANT_GETTER(name, value) \
  35. template<> real const& real::name() \
  36. { \
  37. static real const ret = value; \
  38. return ret; \
  39. }
  40. LOL_CONSTANT_GETTER(R_0, (real)0.0);
  41. LOL_CONSTANT_GETTER(R_1, (real)1.0);
  42. LOL_CONSTANT_GETTER(R_2, (real)2.0);
  43. LOL_CONSTANT_GETTER(R_3, (real)3.0);
  44. LOL_CONSTANT_GETTER(R_10, (real)10.0);
  45. LOL_CONSTANT_GETTER(R_MIN, load_min());
  46. LOL_CONSTANT_GETTER(R_MAX, load_max());
  47. LOL_CONSTANT_GETTER(R_LN2, fast_log(R_2()));
  48. LOL_CONSTANT_GETTER(R_LN10, log(R_10()));
  49. LOL_CONSTANT_GETTER(R_LOG2E, inverse(R_LN2()));
  50. LOL_CONSTANT_GETTER(R_LOG10E, inverse(R_LN10()));
  51. LOL_CONSTANT_GETTER(R_E, exp(R_1()));
  52. LOL_CONSTANT_GETTER(R_PI, load_pi());
  53. LOL_CONSTANT_GETTER(R_PI_2, R_PI() / 2);
  54. LOL_CONSTANT_GETTER(R_PI_3, R_PI() / R_3());
  55. LOL_CONSTANT_GETTER(R_PI_4, R_PI() / 4);
  56. LOL_CONSTANT_GETTER(R_TAU, R_PI() + R_PI());
  57. LOL_CONSTANT_GETTER(R_1_PI, inverse(R_PI()));
  58. LOL_CONSTANT_GETTER(R_2_PI, R_1_PI() * 2);
  59. LOL_CONSTANT_GETTER(R_2_SQRTPI, inverse(sqrt(R_PI())) * 2);
  60. LOL_CONSTANT_GETTER(R_SQRT2, sqrt(R_2()));
  61. LOL_CONSTANT_GETTER(R_SQRT3, sqrt(R_3()));
  62. LOL_CONSTANT_GETTER(R_SQRT1_2, R_SQRT2() / 2);
  63. #undef LOL_CONSTANT_GETTER
  64. /*
  65. * Now carry on with the rest of the Real class.
  66. */
  67. #define DEFAULT_SIZE 16
  68. template<> real::Real()
  69. : m_exponent(0),
  70. m_sign(false),
  71. m_nan(false),
  72. m_inf(false)
  73. {
  74. }
  75. /* FIXME: 64-bit integer loading is incorrect, we lose precision. */
  76. template<> real::Real(int32_t i) { new(this) real((double)i); }
  77. template<> real::Real(uint32_t i) { new(this) real((double)i); }
  78. template<> real::Real(int64_t i) { new(this) real((double)i); }
  79. template<> real::Real(uint64_t i) { new(this) real((double)i); }
  80. template<> real::Real(float f) { new(this) real((double)f); }
  81. template<> real::Real(double d)
  82. : m_exponent(0),
  83. m_sign(false),
  84. m_nan(false),
  85. m_inf(false)
  86. {
  87. union { double d; uint64_t x; } u = { d };
  88. m_sign = bool(u.x >> 63);
  89. exponent_t exponent = (u.x << 1) >> 53;
  90. switch (exponent)
  91. {
  92. case 0x00: /* +0 / -0 */
  93. break;
  94. case 0x7ff: /* Inf/NaN (FIXME: handle NaN!) */
  95. m_inf = true;
  96. break;
  97. default:
  98. /* Only works with 32-bit bigits for now */
  99. static_assert(sizeof(bigit_t) == 4);
  100. m_exponent = exponent - ((1 << 10) - 1);
  101. m_mantissa.resize(DEFAULT_SIZE);
  102. m_mantissa[0] = (bigit_t)(u.x >> 20);
  103. m_mantissa[1] = (bigit_t)(u.x << 12);
  104. break;
  105. }
  106. }
  107. template<> real::Real(long double f)
  108. {
  109. /* We don’t know the long double layout, so we split it into
  110. * two doubles instead. */
  111. double hi = double(f);
  112. double lo = double(f - (long double)hi);;
  113. new(this) real(hi);
  114. *this += lo;
  115. }
  116. template<> real::operator float() const { return (float)(double)(*this); }
  117. template<> real::operator int() const { return (int)(double)(*this); }
  118. template<> real::operator unsigned() const { return (unsigned)(double)(*this); }
  119. template<> real::operator double() const
  120. {
  121. union { double d; uint64_t x; } u;
  122. /* Get sign */
  123. u.x = (m_sign ? 1 : 0) << 11;
  124. /* Compute new exponent (FIXME: handle Inf/NaN) */
  125. int64_t e = m_exponent + ((1 << 10) - 1);
  126. if (m_mantissa.count() == 0)
  127. u.x <<= 52;
  128. else if (e < 0) /* if exponent underflows, set to zero */
  129. u.x <<= 52;
  130. else if (e >= 0x7ff)
  131. {
  132. u.x |= 0x7ff;
  133. u.x <<= 52;
  134. }
  135. else
  136. {
  137. u.x |= e;
  138. /* Store mantissa if necessary */
  139. u.x <<= 32;
  140. if (m_mantissa.count() > 0)
  141. u.x |= m_mantissa[0];
  142. u.x <<= 20;
  143. if (m_mantissa.count() > 1)
  144. {
  145. u.x |= m_mantissa[1] >> 12;
  146. /* Rounding */
  147. u.x += (m_mantissa[1] >> 11) & 1;
  148. }
  149. }
  150. return u.d;
  151. }
  152. template<> real::operator long double() const
  153. {
  154. double hi = double(*this);
  155. double lo = double(*this - hi);
  156. return (long double)(hi) + (long double)(lo);
  157. }
  158. /*
  159. * Create a real number from an ASCII representation
  160. */
  161. template<> real::Real(char const *str)
  162. {
  163. real ret = 0;
  164. int exponent = 0;
  165. bool hex = false, comma = false, nonzero = false, negative = false, finished = false;
  166. for (char const *p = str; *p && !finished; p++)
  167. {
  168. switch (*p)
  169. {
  170. case '-':
  171. case '+':
  172. if (p != str)
  173. break;
  174. negative = (*p == '-');
  175. break;
  176. case '.':
  177. if (comma)
  178. finished = true;
  179. comma = true;
  180. break;
  181. case 'x':
  182. case 'X':
  183. /* This character is only valid for 0x... and 0X... numbers */
  184. if (p != str + 1 || str[0] != '0')
  185. finished = true;
  186. hex = true;
  187. break;
  188. case 'p':
  189. case 'P':
  190. if (hex)
  191. exponent += atoi(p + 1);
  192. finished = true;
  193. break;
  194. case 'e':
  195. case 'E':
  196. if (!hex)
  197. {
  198. exponent += atoi(p + 1);
  199. finished = true;
  200. break;
  201. }
  202. LOL_ATTR_FALLTHROUGH /* FIXME: why doesn’t this seem to work? */
  203. case 'a': case 'b': case 'c': case 'd': case 'f':
  204. case 'A': case 'B': case 'C': case 'D': case 'F':
  205. case '0': case '1': case '2': case '3': case '4':
  206. case '5': case '6': case '7': case '8': case '9':
  207. if (nonzero)
  208. {
  209. /* Multiply ret by 10 or 16 depending the base. */
  210. if (!hex)
  211. {
  212. real x = ret + ret;
  213. ret = x + x + ret;
  214. }
  215. ret.m_exponent += hex ? 4 : 1;
  216. }
  217. if (*p != '0')
  218. {
  219. ret += (*p >= 'a' && *p <= 'f') ? (int)(*p - 'a' + 10)
  220. : (*p >= 'A' && *p <= 'F') ? (int)(*p - 'A' + 10)
  221. : (int)(*p - '0');
  222. nonzero = true;
  223. }
  224. if (comma)
  225. exponent -= hex ? 4 : 1;
  226. break;
  227. default:
  228. finished = true;
  229. break;
  230. }
  231. }
  232. if (hex)
  233. ret.m_exponent += exponent;
  234. else if (exponent)
  235. ret *= pow(R_10(), (real)exponent);
  236. if (negative)
  237. ret = -ret;
  238. *this = ret;
  239. }
  240. template<> real real::operator +() const
  241. {
  242. return *this;
  243. }
  244. template<> real real::operator -() const
  245. {
  246. real ret = *this;
  247. ret.m_sign ^= true;
  248. return ret;
  249. }
  250. template<> real real::operator +(real const &x) const
  251. {
  252. if (x.m_mantissa.count() == 0)
  253. return *this;
  254. if (m_mantissa.count() == 0)
  255. return x;
  256. /* Ensure both arguments are positive. Otherwise, switch signs,
  257. * or replace + with -. */
  258. if (m_sign)
  259. return -(-*this + -x);
  260. if (x.m_sign)
  261. return *this - (-x);
  262. /* Ensure *this has the larger exponent (no need for the mantissa to
  263. * be larger, as in subtraction). Otherwise, switch. */
  264. if (m_exponent < x.m_exponent)
  265. return x + *this;
  266. int64_t e1 = m_exponent;
  267. int64_t e2 = x.m_exponent;
  268. int64_t bigoff = (e1 - e2) / bigit_bits();
  269. int64_t off = e1 - e2 - bigoff * bigit_bits();
  270. /* FIXME: ensure we have the same number of bigits */
  271. if (bigoff > bigit_count())
  272. return *this;
  273. real ret;
  274. ret.m_mantissa.resize(m_mantissa.count());
  275. ret.m_exponent = m_exponent;
  276. uint64_t carry = 0;
  277. for (int i = bigit_count(); i--; )
  278. {
  279. carry += m_mantissa[i];
  280. if (i - bigoff >= 0)
  281. carry += x.m_mantissa[i - bigoff] >> off;
  282. if (off && i - bigoff > 0)
  283. carry += (x.m_mantissa[i - bigoff - 1] << (bigit_bits() - off)) & 0xffffffffu;
  284. else if (i - bigoff == 0)
  285. carry += (uint64_t)1 << (bigit_bits() - off);
  286. ret.m_mantissa[i] = (uint32_t)carry;
  287. carry >>= bigit_bits();
  288. }
  289. /* Renormalise in case we overflowed the mantissa */
  290. if (carry)
  291. {
  292. carry--;
  293. for (int i = 0; i < bigit_count(); ++i)
  294. {
  295. uint32_t tmp = ret.m_mantissa[i];
  296. ret.m_mantissa[i] = ((uint32_t)carry << (bigit_bits() - 1))
  297. | (tmp >> 1);
  298. carry = tmp & 1u;
  299. }
  300. ret.m_exponent++;
  301. }
  302. return ret;
  303. }
  304. template<> real real::operator -(real const &x) const
  305. {
  306. if (x.m_mantissa.count() == 0)
  307. return *this;
  308. if (m_mantissa.count() == 0)
  309. return -x;
  310. /* Ensure both arguments are positive. Otherwise, switch signs,
  311. * or replace - with +. */
  312. if (m_sign)
  313. return -(-*this + x);
  314. if (x.m_sign)
  315. return (*this) + (-x);
  316. /* Ensure *this is larger than x */
  317. if (*this < x)
  318. return -(x - *this);
  319. int64_t e1 = m_exponent;
  320. int64_t e2 = x.m_exponent;
  321. int64_t bigoff = (e1 - e2) / bigit_bits();
  322. int64_t off = e1 - e2 - bigoff * bigit_bits();
  323. /* FIXME: ensure we have the same number of bigits */
  324. if (bigoff > bigit_count())
  325. return *this;
  326. real ret;
  327. ret.m_mantissa.resize(m_mantissa.count());
  328. ret.m_exponent = m_exponent;
  329. /* int64_t instead of uint64_t to preserve sign through shifts */
  330. int64_t carry = 0;
  331. for (int i = 0; i < bigoff; ++i)
  332. {
  333. carry -= x.m_mantissa[bigit_count() - 1 - i];
  334. /* Emulates a signed shift */
  335. carry >>= bigit_bits();
  336. carry |= carry << bigit_bits();
  337. }
  338. if (bigoff < bigit_count())
  339. carry -= x.m_mantissa[bigit_count() - 1 - bigoff] & (((int64_t)1 << off) - 1);
  340. carry /= (int64_t)1 << off;
  341. for (int i = bigit_count(); i--; )
  342. {
  343. carry += m_mantissa[i];
  344. if (i - bigoff >= 0)
  345. carry -= x.m_mantissa[i - bigoff] >> off;
  346. if (off && i - bigoff > 0)
  347. carry -= (x.m_mantissa[i - bigoff - 1] << (bigit_bits() - off)) & 0xffffffffu;
  348. else if (i - bigoff == 0)
  349. carry -= (uint64_t)1 << (bigit_bits() - off);
  350. ret.m_mantissa[i] = (uint32_t)carry;
  351. carry >>= bigit_bits();
  352. carry |= carry << bigit_bits();
  353. }
  354. carry += 1;
  355. /* Renormalise if we underflowed the mantissa */
  356. if (carry == 0)
  357. {
  358. /* How much do we need to shift the mantissa? FIXME: this could
  359. * be computed above */
  360. off = 0;
  361. for (int i = 0; i < bigit_count(); ++i)
  362. {
  363. if (!ret.m_mantissa[i])
  364. {
  365. off += bigit_bits();
  366. continue;
  367. }
  368. for (uint32_t tmp = ret.m_mantissa[i]; tmp < 0x80000000u; tmp <<= 1)
  369. off++;
  370. break;
  371. }
  372. if (off == total_bits())
  373. ret.m_mantissa.resize(0);
  374. else
  375. {
  376. off++; /* Shift once more to get rid of the leading 1 */
  377. ret.m_exponent -= off;
  378. bigoff = off / bigit_bits();
  379. off -= bigoff * bigit_bits();
  380. for (int i = 0; i < bigit_count(); ++i)
  381. {
  382. uint32_t tmp = 0;
  383. if (i + bigoff < bigit_count())
  384. tmp |= ret.m_mantissa[i + bigoff] << off;
  385. if (off && i + bigoff + 1 < bigit_count())
  386. tmp |= ret.m_mantissa[i + bigoff + 1] >> (bigit_bits() - off);
  387. ret.m_mantissa[i] = tmp;
  388. }
  389. }
  390. }
  391. return ret;
  392. }
  393. template<> real real::operator *(real const &x) const
  394. {
  395. real ret;
  396. /* The sign is easy to compute */
  397. ret.m_sign = m_sign ^ x.m_sign;
  398. /* If any operand is zero, return zero. */
  399. if (m_mantissa.count() == 0 || x.m_mantissa.count() == 0)
  400. return ret;
  401. ret.m_mantissa.resize(m_mantissa.count());
  402. ret.m_exponent = m_exponent + x.m_exponent;
  403. /* Accumulate low order product; no need to store it, we just
  404. * want the carry value */
  405. uint64_t carry = 0, hicarry = 0, prev;
  406. for (int i = 0; i < bigit_count(); ++i)
  407. {
  408. for (int j = 0; j < i + 1; j++)
  409. {
  410. prev = carry;
  411. carry += (uint64_t)m_mantissa[bigit_count() - 1 - j]
  412. * (uint64_t)x.m_mantissa[bigit_count() - 1 + j - i];
  413. if (carry < prev)
  414. hicarry++;
  415. }
  416. carry >>= bigit_bits();
  417. carry |= hicarry << bigit_bits();
  418. hicarry >>= bigit_bits();
  419. }
  420. /* Multiply the other components */
  421. for (int i = 0; i < bigit_count(); ++i)
  422. {
  423. for (int j = i + 1; j < bigit_count(); j++)
  424. {
  425. prev = carry;
  426. carry += (uint64_t)m_mantissa[bigit_count() - 1 - j]
  427. * (uint64_t)x.m_mantissa[j - 1 - i];
  428. if (carry < prev)
  429. hicarry++;
  430. }
  431. prev = carry;
  432. carry += m_mantissa[bigit_count() - 1 - i];
  433. carry += x.m_mantissa[bigit_count() - 1 - i];
  434. if (carry < prev)
  435. hicarry++;
  436. ret.m_mantissa[bigit_count() - 1 - i] = carry & 0xffffffffu;
  437. carry >>= bigit_bits();
  438. carry |= hicarry << bigit_bits();
  439. hicarry >>= bigit_bits();
  440. }
  441. /* Renormalise in case we overflowed the mantissa */
  442. if (carry)
  443. {
  444. carry--;
  445. for (int i = 0; i < bigit_count(); ++i)
  446. {
  447. uint32_t tmp = (uint32_t)ret.m_mantissa[i];
  448. ret.m_mantissa[i] = ((uint32_t)carry << (bigit_bits() - 1))
  449. | (tmp >> 1);
  450. carry = tmp & 1u;
  451. }
  452. ++ret.m_exponent;
  453. }
  454. return ret;
  455. }
  456. template<> real real::operator /(real const &x) const
  457. {
  458. return *this * inverse(x);
  459. }
  460. template<> real const &real::operator +=(real const &x)
  461. {
  462. real tmp = *this;
  463. return *this = tmp + x;
  464. }
  465. template<> real const &real::operator -=(real const &x)
  466. {
  467. real tmp = *this;
  468. return *this = tmp - x;
  469. }
  470. template<> real const &real::operator *=(real const &x)
  471. {
  472. real tmp = *this;
  473. return *this = tmp * x;
  474. }
  475. template<> real const &real::operator /=(real const &x)
  476. {
  477. real tmp = *this;
  478. return *this = tmp / x;
  479. }
  480. template<> bool real::operator ==(real const &x) const
  481. {
  482. /* If both zero, they are equal */
  483. if (m_mantissa.count() == 0 && x.m_mantissa.count() == 0)
  484. return true;
  485. /* FIXME: handle NaN/Inf */
  486. return m_exponent == x.m_exponent && m_mantissa == x.m_mantissa;
  487. }
  488. template<> bool real::operator !=(real const &x) const
  489. {
  490. return !(*this == x);
  491. }
  492. template<> bool real::operator <(real const &x) const
  493. {
  494. /* Ensure we are positive */
  495. if (m_sign)
  496. return -*this > -x;
  497. /* If x is zero or negative, we can’t be < x */
  498. if (x.m_sign || x.m_mantissa.count() == 0)
  499. return false;
  500. /* If we are zero, we must be < x */
  501. if (m_mantissa.count() == 0)
  502. return true;
  503. /* Compare exponents */
  504. if (m_exponent != x.m_exponent)
  505. return m_exponent < x.m_exponent;
  506. /* Compare all relevant bits */
  507. for (int i = 0; i < bigit_count(); ++i)
  508. if (m_mantissa[i] != x.m_mantissa[i])
  509. return m_mantissa[i] < x.m_mantissa[i];
  510. return false;
  511. }
  512. template<> bool real::operator <=(real const &x) const
  513. {
  514. return !(*this > x);
  515. }
  516. template<> bool real::operator >(real const &x) const
  517. {
  518. /* Ensure we are positive */
  519. if (m_sign)
  520. return -*this < -x;
  521. /* If x is zero, we’re > x iff we’re non-zero */
  522. if (x.m_mantissa.count() == 0)
  523. return m_mantissa.count() != 0;
  524. /* If x is strictly negative, we’re > x */
  525. if (x.m_sign)
  526. return true;
  527. /* If we are zero, we can’t be > x */
  528. if (m_mantissa.count() == 0)
  529. return false;
  530. /* Compare exponents */
  531. if (m_exponent != x.m_exponent)
  532. return m_exponent > x.m_exponent;
  533. /* Compare all relevant bits */
  534. for (int i = 0; i < bigit_count(); ++i)
  535. if (m_mantissa[i] != x.m_mantissa[i])
  536. return m_mantissa[i] > x.m_mantissa[i];
  537. return false;
  538. }
  539. template<> bool real::operator >=(real const &x) const
  540. {
  541. return !(*this < x);
  542. }
  543. template<> bool real::operator !() const
  544. {
  545. return !(bool)*this;
  546. }
  547. template<> real::operator bool() const
  548. {
  549. /* A real is "true" if it is non-zero (mantissa is non-zero) AND
  550. * not NaN */
  551. return m_mantissa.count() && !m_nan;
  552. }
  553. template<> real min(real const &a, real const &b)
  554. {
  555. return (a < b) ? a : b;
  556. }
  557. template<> real max(real const &a, real const &b)
  558. {
  559. return (a > b) ? a : b;
  560. }
  561. template<> real clamp(real const &x, real const &a, real const &b)
  562. {
  563. return (x < a) ? a : (x > b) ? b : x;
  564. }
  565. template<> real inverse(real const &x)
  566. {
  567. real ret;
  568. /* If zero, return infinite */
  569. if (!x.m_mantissa.count())
  570. {
  571. ret.m_sign = x.m_sign;
  572. ret.m_inf = true;
  573. return ret;
  574. }
  575. /* Use the system's float inversion to approximate 1/x */
  576. union { float f; uint32_t x; } u = { 1.0f }, v = { 1.0f };
  577. v.x |= x.m_mantissa[0] >> 9;
  578. v.f = 1.0f / v.f;
  579. ret.m_mantissa.resize(x.m_mantissa.count());
  580. ret.m_mantissa[0] = v.x << 9;
  581. ret.m_sign = x.m_sign;
  582. ret.m_exponent = -x.m_exponent + (v.x >> 23) - (u.x >> 23);
  583. /* FIXME: 1+log2(BIGIT_COUNT) steps of Newton-Raphson seems to be enough for
  584. * convergence, but this hasn't been checked seriously. */
  585. for (int i = 1; i <= x.bigit_count(); i *= 2)
  586. ret = ret * (real::R_2() - ret * x);
  587. return ret;
  588. }
  589. template<> real sqrt(real const &x)
  590. {
  591. /* if zero, return x (FIXME: negative zero?) */
  592. if (!x.m_mantissa.count())
  593. return x;
  594. /* if negative, return NaN */
  595. if (x.m_sign)
  596. {
  597. real ret;
  598. ret.m_nan = true;
  599. return ret;
  600. }
  601. /* Use the system's float inversion to approximate 1/sqrt(x). First
  602. * we construct a float in the [1..4[ range that has roughly the same
  603. * mantissa as our real. Its exponent is 0 or 1, depending on the
  604. * parity of x’s exponent. The final exponent is 0, -1 or -2. We use
  605. * the final exponent and final mantissa to pre-fill the result. */
  606. union { float f; uint32_t x; } u = { 1.0f }, v = { 2.0f };
  607. v.x -= ((x.m_exponent & 1) ^ 1) << 23;
  608. v.x |= x.m_mantissa[0] >> 9;
  609. v.f = 1.0f / sqrtf(v.f);
  610. real ret;
  611. ret.m_mantissa.resize(x.m_mantissa.count());
  612. ret.m_mantissa[0] = v.x << 9;
  613. /* FIXME: check this, it’s been a long time… */
  614. ret.m_exponent = -x.m_exponent / 2 + (v.x >> 23) - (u.x >> 23);
  615. /* FIXME: 1+log2(bigit_count()) steps of Newton-Raphson seems to be enough for
  616. * convergence, but this hasn't been checked seriously. */
  617. for (int i = 1; i <= x.bigit_count(); i *= 2)
  618. {
  619. ret = ret * (real::R_3() - ret * ret * x);
  620. --ret.m_exponent;
  621. }
  622. return ret * x;
  623. }
  624. template<> real cbrt(real const &x)
  625. {
  626. /* if zero, return x */
  627. if (!x.m_mantissa.count())
  628. return x;
  629. /* Use the system's float inversion to approximate cbrt(x). First
  630. * we construct a float in the [1..8[ range that has roughly the same
  631. * mantissa as our real. Its exponent is 0, 1 or 2, depending on the
  632. * value of x. The final exponent is 0 or 1 (special case). We use
  633. * the final exponent and final mantissa to pre-fill the result. */
  634. union { float f; uint32_t x; } u = { 1.0f }, v = { 1.0f };
  635. v.x += (x.m_exponent % 3) << 23;
  636. v.x |= x.m_mantissa[0] >> 9;
  637. v.f = powf(v.f, 0.33333333333333333f);
  638. real ret;
  639. ret.m_mantissa.resize(x.m_mantissa.count());
  640. ret.m_mantissa[0] = v.x << 9;
  641. ret.m_exponent = x.m_exponent / 3 + (v.x >> 23) - (u.x >> 23);
  642. ret.m_sign = x.m_sign;
  643. /* FIXME: 1+log2(bigit_count()) steps of Newton-Raphson seems to be enough
  644. * for convergence, but this hasn't been checked seriously. */
  645. real third = inverse(real::R_3());
  646. for (int i = 1; i <= x.bigit_count(); i *= 2)
  647. {
  648. ret = third * (x / (ret * ret) + (ret / 2));
  649. }
  650. return ret;
  651. }
  652. template<> real pow(real const &x, real const &y)
  653. {
  654. /* Shortcuts for degenerate cases */
  655. if (!y)
  656. return real::R_1();
  657. if (!x)
  658. return real::R_0();
  659. /* Small integer exponent: use exponentiation by squaring */
  660. int int_y = (int)y;
  661. if (y == (real)int_y)
  662. {
  663. real ret = real::R_1();
  664. real x_n = int_y > 0 ? x : inverse(x);
  665. while (int_y) /* Can be > 0 or < 0 */
  666. {
  667. if (int_y & 1)
  668. ret *= x_n;
  669. x_n *= x_n;
  670. int_y /= 2;
  671. }
  672. return ret;
  673. }
  674. /* If x is positive, nothing special to do. */
  675. if (x > real::R_0())
  676. return exp(y * log(x));
  677. /* XXX: manpage for pow() says “If x is a finite value less than 0,
  678. * and y is a finite noninteger, a domain error occurs, and a NaN is
  679. * returned”. We check whether y is closer to an even number or to
  680. * an odd number and return something reasonable. */
  681. real round_y = round(y);
  682. bool is_odd = round_y / 2 == round(round_y / 2);
  683. return is_odd ? exp(y * log(-x)) : -exp(y * log(-x));
  684. }
  685. static real fast_fact(unsigned int x)
  686. {
  687. real ret = real::R_1();
  688. unsigned int i = 1, multiplier = 1, exponent = 0;
  689. for (;;)
  690. {
  691. if (i++ >= x)
  692. /* Multiplication is a no-op if multiplier == 1 */
  693. return ldexp(ret * multiplier, exponent);
  694. unsigned int tmp = i;
  695. while ((tmp & 1) == 0)
  696. {
  697. tmp >>= 1;
  698. exponent++;
  699. }
  700. if (multiplier * tmp / tmp != multiplier)
  701. {
  702. ret *= multiplier;
  703. multiplier = 1;
  704. }
  705. multiplier *= tmp;
  706. }
  707. }
  708. template<> real gamma(real const &x)
  709. {
  710. /* We use Spouge's formula. FIXME: precision is far from acceptable,
  711. * especially with large values. We need to compute this with higher
  712. * precision values in order to attain the desired accuracy. It might
  713. * also be useful to sort the ck values by decreasing absolute value
  714. * and do the addition in this order. */
  715. int a = (int)ceilf(logf(2) / logf(2 * F_PI) * x.total_bits());
  716. real ret = sqrt(real::R_PI() * 2);
  717. real fact_k_1 = real::R_1();
  718. for (int k = 1; k < a; k++)
  719. {
  720. real a_k = (real)(a - k);
  721. real ck = pow(a_k, (real)((float)k - 0.5)) * exp(a_k)
  722. / (fact_k_1 * (x + (real)(k - 1)));
  723. ret += ck;
  724. fact_k_1 *= (real)-k;
  725. }
  726. ret *= pow(x + (real)(a - 1), x - (real::R_1() / 2));
  727. ret *= exp(-x - (real)(a - 1));
  728. return ret;
  729. }
  730. template<> real fabs(real const &x)
  731. {
  732. real ret = x;
  733. ret.m_sign = false;
  734. return ret;
  735. }
  736. template<> real abs(real const &x)
  737. {
  738. return fabs(x);
  739. }
  740. template<> real fract(real const &x)
  741. {
  742. return x - floor(x);
  743. }
  744. template<> real degrees(real const &x)
  745. {
  746. /* FIXME: need to recompute this for different mantissa sizes */
  747. static real mul = real(180) * real::R_1_PI();
  748. return x * mul;
  749. }
  750. template<> real radians(real const &x)
  751. {
  752. /* FIXME: need to recompute this for different mantissa sizes */
  753. static real mul = real::R_PI() / real(180);
  754. return x * mul;
  755. }
  756. static real fast_log(real const &x)
  757. {
  758. /* This fast log method is tuned to work on the [1..2] range and
  759. * no effort whatsoever was made to improve convergence outside this
  760. * domain of validity. It can converge pretty fast, provided we use
  761. * the following variable substitutions:
  762. * y = sqrt(x)
  763. * z = (y - 1) / (y + 1)
  764. *
  765. * And the following identities:
  766. * ln(x) = 2 ln(y)
  767. * = 2 ln((1 + z) / (1 - z))
  768. * = 4 z (1 + z^2 / 3 + z^4 / 5 + z^6 / 7...)
  769. *
  770. * Any additional sqrt() call would halve the convergence time, but
  771. * would also impact the final precision. For now we stick with one
  772. * sqrt() call. */
  773. real y = sqrt(x);
  774. real z = (y - real::R_1()) / (y + real::R_1()), z2 = z * z, zn = z2;
  775. real sum = real::R_1();
  776. for (int i = 3; ; i += 2)
  777. {
  778. real newsum = sum + zn / (real)i;
  779. if (newsum == sum)
  780. break;
  781. sum = newsum;
  782. zn *= z2;
  783. }
  784. return z * sum * 4;
  785. }
  786. template<> real log(real const &x)
  787. {
  788. /* Strategy for log(x): if x = 2^E*M then log(x) = E log(2) + log(M),
  789. * with the property that M is in [1..2[, so fast_log() applies here. */
  790. real tmp;
  791. if (x.m_sign || x.m_mantissa.count() == 0)
  792. {
  793. tmp.m_nan = true;
  794. return tmp;
  795. }
  796. tmp = x;
  797. tmp.m_exponent = 0;
  798. return real(x.m_exponent) * real::R_LN2() + fast_log(tmp);
  799. }
  800. template<> real log2(real const &x)
  801. {
  802. /* Strategy for log2(x): see log(x). */
  803. real tmp;
  804. if (x.m_sign || x.m_mantissa.count() == 0)
  805. {
  806. tmp.m_nan = true;
  807. return tmp;
  808. }
  809. tmp = x;
  810. tmp.m_exponent = 0;
  811. return real(x.m_exponent) + fast_log(tmp) * real::R_LOG2E();
  812. }
  813. template<> real log10(real const &x)
  814. {
  815. return log(x) * real::R_LOG10E();
  816. }
  817. static real fast_exp_sub(real const &x, real const &y)
  818. {
  819. /* This fast exp method is tuned to work on the [-1..1] range and
  820. * no effort whatsoever was made to improve convergence outside this
  821. * domain of validity. The argument y is used for cases where we
  822. * don't want the leading 1 in the Taylor series. */
  823. real ret = real::R_1() - y, xn = x;
  824. int i = 1;
  825. for (;;)
  826. {
  827. real newret = ret + xn;
  828. if (newret == ret)
  829. break;
  830. ret = newret * ++i;
  831. xn *= x;
  832. }
  833. return ret / fast_fact(i);
  834. }
  835. template<> real exp(real const &x)
  836. {
  837. /* Strategy for exp(x): the Taylor series does not converge very fast
  838. * with large positive or negative values.
  839. *
  840. * However, we know that the result is going to be in the form M*2^E,
  841. * where M is the mantissa and E the exponent. We first try to predict
  842. * a value for E, which is approximately log2(exp(x)) = x / log(2).
  843. *
  844. * Let E0 be an integer close to x / log(2). We need to find a value x0
  845. * such that exp(x) = 2^E0 * exp(x0). We get x0 = x - E0 log(2).
  846. *
  847. * Thus the final algorithm:
  848. * int E0 = x / log(2)
  849. * real x0 = x - E0 log(2)
  850. * real x1 = exp(x0)
  851. * return x1 * 2^E0
  852. */
  853. int e0 = x / real::R_LN2();
  854. real x0 = x - (real)e0 * real::R_LN2();
  855. real x1 = fast_exp_sub(x0, real::R_0());
  856. x1.m_exponent += e0;
  857. return x1;
  858. }
  859. template<> real exp2(real const &x)
  860. {
  861. /* Strategy for exp2(x): see strategy in exp(). */
  862. int e0 = x;
  863. real x0 = x - (real)e0;
  864. real x1 = fast_exp_sub(x0 * real::R_LN2(), real::R_0());
  865. x1.m_exponent += e0;
  866. return x1;
  867. }
  868. template<> real sinh(real const &x)
  869. {
  870. /* We cannot always use (exp(x)-exp(-x))/2 because we'll lose
  871. * accuracy near zero. We only use this identity for |x|>0.5. If
  872. * |x|<=0.5, we compute exp(x)-1 and exp(-x)-1 instead. */
  873. bool near_zero = (fabs(x) < real::R_1() / 2);
  874. real x1 = near_zero ? fast_exp_sub(x, real::R_1()) : exp(x);
  875. real x2 = near_zero ? fast_exp_sub(-x, real::R_1()) : exp(-x);
  876. return (x1 - x2) / 2;
  877. }
  878. template<> real tanh(real const &x)
  879. {
  880. /* See sinh() for the strategy here */
  881. bool near_zero = (fabs(x) < real::R_1() / 2);
  882. real x1 = near_zero ? fast_exp_sub(x, real::R_1()) : exp(x);
  883. real x2 = near_zero ? fast_exp_sub(-x, real::R_1()) : exp(-x);
  884. real x3 = near_zero ? x1 + x2 + real::R_2() : x1 + x2;
  885. return (x1 - x2) / x3;
  886. }
  887. template<> real cosh(real const &x)
  888. {
  889. /* No need to worry about accuracy here; maybe the last bit is slightly
  890. * off, but that's about it. */
  891. return (exp(x) + exp(-x)) / 2;
  892. }
  893. template<> real frexp(real const &x, int64_t *exp)
  894. {
  895. if (!x)
  896. {
  897. *exp = 0;
  898. return x;
  899. }
  900. /* FIXME: check that this works */
  901. *exp = x.m_exponent;
  902. real ret = x;
  903. ret.m_exponent = 0;
  904. return ret;
  905. }
  906. template<> real ldexp(real const &x, int64_t exp)
  907. {
  908. real ret = x;
  909. if (ret) /* Only do something if non-zero */
  910. ret.m_exponent += exp;
  911. return ret;
  912. }
  913. template<> real modf(real const &x, real *iptr)
  914. {
  915. real absx = fabs(x);
  916. real tmp = floor(absx);
  917. *iptr = copysign(tmp, x);
  918. return copysign(absx - tmp, x);
  919. }
  920. template<> real nextafter(real const &x, real const &y)
  921. {
  922. /* Linux manpage: “If x equals y, the functions return y.” */
  923. if (x == y)
  924. return y;
  925. /* Ensure x is positive. */
  926. if (x.m_sign)
  927. return -nextafter(-x, -y);
  928. /* FIXME: broken for now */
  929. real ulp = ldexp(x, -x.total_bits());
  930. return x < y ? x + ulp : x - ulp;
  931. }
  932. template<> real copysign(real const &x, real const &y)
  933. {
  934. real ret = x;
  935. ret.m_sign = y.m_sign;
  936. return ret;
  937. }
  938. template<> real floor(real const &x)
  939. {
  940. /* Strategy for floor(x):
  941. * - if negative, return -ceil(-x)
  942. * - if zero or negative zero, return x
  943. * - if less than one, return zero
  944. * - otherwise, if e is the exponent, clear all bits except the
  945. * first e. */
  946. if (x < -real::R_0())
  947. return -ceil(-x);
  948. if (!x)
  949. return x;
  950. if (x < real::R_1())
  951. return real::R_0();
  952. real ret = x;
  953. int64_t exponent = x.m_exponent;
  954. for (int i = 0; i < x.bigit_count(); ++i)
  955. {
  956. if (exponent <= 0)
  957. ret.m_mantissa[i] = 0;
  958. else if (exponent < real::bigit_bits())
  959. ret.m_mantissa[i] &= ~((1 << (real::bigit_bits() - exponent)) - 1);
  960. exponent -= real::bigit_bits();
  961. }
  962. return ret;
  963. }
  964. template<> real ceil(real const &x)
  965. {
  966. /* Strategy for ceil(x):
  967. * - if negative, return -floor(-x)
  968. * - if x == floor(x), return x
  969. * - otherwise, return floor(x) + 1 */
  970. if (x < -real::R_0())
  971. return -floor(-x);
  972. real ret = floor(x);
  973. if (x == ret)
  974. return ret;
  975. else
  976. return ret + real::R_1();
  977. }
  978. template<> real round(real const &x)
  979. {
  980. if (x < real::R_0())
  981. return -round(-x);
  982. return floor(x + (real::R_1() / 2));
  983. }
  984. template<> real fmod(real const &x, real const &y)
  985. {
  986. if (!y)
  987. return real::R_0(); /* FIXME: return NaN */
  988. if (!x)
  989. return x;
  990. real tmp = round(x / y);
  991. return x - tmp * y;
  992. }
  993. template<> real sin(real const &x)
  994. {
  995. bool switch_sign = x.m_sign;
  996. real absx = fmod(fabs(x), real::R_PI() * 2);
  997. if (absx > real::R_PI())
  998. {
  999. absx -= real::R_PI();
  1000. switch_sign = !switch_sign;
  1001. }
  1002. if (absx > real::R_PI_2())
  1003. absx = real::R_PI() - absx;
  1004. real ret = real::R_0(), fact = real::R_1(), xn = absx, mx2 = -absx * absx;
  1005. int i = 1;
  1006. for (;;)
  1007. {
  1008. real newret = ret + xn;
  1009. if (newret == ret)
  1010. break;
  1011. ret = newret * ((i + 1) * (i + 2));
  1012. xn *= mx2;
  1013. i += 2;
  1014. }
  1015. ret /= fast_fact(i);
  1016. /* Propagate sign */
  1017. ret.m_sign ^= switch_sign;
  1018. return ret;
  1019. }
  1020. template<> real cos(real const &x)
  1021. {
  1022. return sin(real::R_PI_2() - x);
  1023. }
  1024. template<> real tan(real const &x)
  1025. {
  1026. /* Constrain input to [-π,π] */
  1027. real y = fmod(x, real::R_PI());
  1028. /* Constrain input to [-π/2,π/2] */
  1029. if (y < -real::R_PI_2())
  1030. y += real::R_PI();
  1031. else if (y > real::R_PI_2())
  1032. y -= real::R_PI();
  1033. /* In [-π/4,π/4] return sin/cos */
  1034. if (fabs(y) <= real::R_PI_4())
  1035. return sin(y) / cos(y);
  1036. /* Otherwise, return cos/sin */
  1037. if (y > real::R_0())
  1038. y = real::R_PI_2() - y;
  1039. else
  1040. y = -real::R_PI_2() - y;
  1041. return cos(y) / sin(y);
  1042. }
  1043. static inline real asinacos(real const &x, int is_asin, bool is_negative)
  1044. {
  1045. /* Strategy for asin(): in [-0.5..0.5], use a Taylor series around
  1046. * zero. In [0.5..1], use asin(x) = π/2 - 2*asin(sqrt((1-x)/2)), and
  1047. * in [-1..-0.5] just revert the sign.
  1048. * Strategy for acos(): use acos(x) = π/2 - asin(x) and try not to
  1049. * lose the precision around x=1. */
  1050. real absx = fabs(x);
  1051. int around_zero = (absx < (real::R_1() / 2));
  1052. if (!around_zero)
  1053. absx = sqrt((real::R_1() - absx) / 2);
  1054. real ret = absx, xn = absx, x2 = absx * absx, fact1 = 2, fact2 = 1;
  1055. for (int i = 1; ; ++i)
  1056. {
  1057. xn *= x2;
  1058. real mul = (real)(2 * i + 1);
  1059. real newret = ret + ldexp(fact1 * xn / (mul * fact2), -2 * i);
  1060. if (newret == ret)
  1061. break;
  1062. ret = newret;
  1063. fact1 *= (real)((2 * i + 1) * (2 * i + 2));
  1064. fact2 *= (real)((i + 1) * (i + 1));
  1065. }
  1066. if (is_negative)
  1067. ret = -ret;
  1068. if (around_zero)
  1069. ret = is_asin ? ret : real::R_PI_2() - ret;
  1070. else
  1071. {
  1072. real adjust = is_negative ? real::R_PI() : real::R_0();
  1073. if (is_asin)
  1074. ret = real::R_PI_2() - adjust - ret * 2;
  1075. else
  1076. ret = adjust + ret * 2;
  1077. }
  1078. return ret;
  1079. }
  1080. template<> real asin(real const &x)
  1081. {
  1082. /* Pass m_sign because it is private… FIXME: implement is_negative() */
  1083. return asinacos(x, 1, x.m_sign);
  1084. }
  1085. template<> real acos(real const &x)
  1086. {
  1087. return asinacos(x, 0, x.m_sign);
  1088. }
  1089. template<> real atan(real const &x)
  1090. {
  1091. /* Computing atan(x): we choose a different Taylor series depending on
  1092. * the value of x to help with convergence.
  1093. *
  1094. * If |x| < 0.5 we evaluate atan(y) near 0:
  1095. * atan(y) = y - y^3/3 + y^5/5 - y^7/7 + y^9/9 ...
  1096. *
  1097. * If 0.5 <= |x| < 1.5 we evaluate atan(1+y) near 0:
  1098. * atan(1+y) = π/4 + y/(1*2^1) - y^2/(2*2^1) + y^3/(3*2^2)
  1099. * - y^5/(5*2^3) + y^6/(6*2^3) - y^7/(7*2^4)
  1100. * + y^9/(9*2^5) - y^10/(10*2^5) + y^11/(11*2^6) ...
  1101. *
  1102. * If 1.5 <= |x| < 2 we evaluate atan(sqrt(3)+2y) near 0:
  1103. * atan(sqrt(3)+2y) = π/3 + 1/2 y - sqrt(3)/2 y^2/2
  1104. * + y^3/3 - sqrt(3)/2 y^4/4 + 1/2 y^5/5
  1105. * - 1/2 y^7/7 + sqrt(3)/2 y^8/8
  1106. * - y^9/9 + sqrt(3)/2 y^10/10 - 1/2 y^11/11
  1107. * + 1/2 y^13/13 - sqrt(3)/2 y^14/14
  1108. * + y^15/15 - sqrt(3)/2 y^16/16 + 1/2 y^17/17 ...
  1109. *
  1110. * If |x| >= 2 we evaluate atan(y) near +∞:
  1111. * atan(y) = π/2 - y^-1 + y^-3/3 - y^-5/5 + y^-7/7 - y^-9/9 ...
  1112. */
  1113. real absx = fabs(x);
  1114. if (absx < (real::R_1() / 2))
  1115. {
  1116. real ret = x, xn = x, mx2 = -x * x;
  1117. for (int i = 3; ; i += 2)
  1118. {
  1119. xn *= mx2;
  1120. real newret = ret + xn / (real)i;
  1121. if (newret == ret)
  1122. break;
  1123. ret = newret;
  1124. }
  1125. return ret;
  1126. }
  1127. real ret = 0;
  1128. if (absx < (real::R_3() / 2))
  1129. {
  1130. real y = real::R_1() - absx;
  1131. real yn = y, my2 = -y * y;
  1132. for (int i = 0; ; i += 2)
  1133. {
  1134. real newret = ret + ldexp(yn / (real)(2 * i + 1), -i - 1);
  1135. yn *= y;
  1136. newret += ldexp(yn / (real)(2 * i + 2), -i - 1);
  1137. yn *= y;
  1138. newret += ldexp(yn / (real)(2 * i + 3), -i - 2);
  1139. if (newret == ret)
  1140. break;
  1141. ret = newret;
  1142. yn *= my2;
  1143. }
  1144. ret = real::R_PI_4() - ret;
  1145. }
  1146. else if (absx < real::R_2())
  1147. {
  1148. real y = (absx - real::R_SQRT3()) / 2;
  1149. real yn = y, my2 = -y * y;
  1150. for (int i = 1; ; i += 6)
  1151. {
  1152. real newret = ret + ((yn / (real)i) / 2);
  1153. yn *= y;
  1154. newret -= (real::R_SQRT3() / 2) * yn / (real)(i + 1);
  1155. yn *= y;
  1156. newret += yn / (real)(i + 2);
  1157. yn *= y;
  1158. newret -= (real::R_SQRT3() / 2) * yn / (real)(i + 3);
  1159. yn *= y;
  1160. newret += (yn / (real)(i + 4)) / 2;
  1161. if (newret == ret)
  1162. break;
  1163. ret = newret;
  1164. yn *= my2;
  1165. }
  1166. ret = real::R_PI_3() + ret;
  1167. }
  1168. else
  1169. {
  1170. real y = inverse(absx);
  1171. real yn = y, my2 = -y * y;
  1172. ret = y;
  1173. for (int i = 3; ; i += 2)
  1174. {
  1175. yn *= my2;
  1176. real newret = ret + yn / (real)i;
  1177. if (newret == ret)
  1178. break;
  1179. ret = newret;
  1180. }
  1181. ret = real::R_PI_2() - ret;
  1182. }
  1183. /* Propagate sign */
  1184. ret.m_sign = x.m_sign;
  1185. return ret;
  1186. }
  1187. template<> real atan2(real const &y, real const &x)
  1188. {
  1189. if (!y)
  1190. {
  1191. if (!x.m_sign)
  1192. return y;
  1193. return y.m_sign ? -real::R_PI() : real::R_PI();
  1194. }
  1195. if (!x)
  1196. {
  1197. return y.m_sign ? -real::R_PI() : real::R_PI();
  1198. }
  1199. /* FIXME: handle the Inf and NaN cases */
  1200. real z = y / x;
  1201. real ret = atan(z);
  1202. if (x < real::R_0())
  1203. ret += (y > real::R_0()) ? real::R_PI() : -real::R_PI();
  1204. return ret;
  1205. }
  1206. /* Franke’s function, used as a test for interpolation methods */
  1207. template<> real franke(real const &x, real const &y)
  1208. {
  1209. /* Compute 9x and 9y */
  1210. real nx = x + x; nx += nx; nx += nx + x;
  1211. real ny = y + y; ny += ny; ny += ny + y;
  1212. /* Temporary variables for the formula */
  1213. real a = nx - real::R_2();
  1214. real b = ny - real::R_2();
  1215. real c = nx + real::R_1();
  1216. real d = ny + real::R_1();
  1217. real e = nx - real(7);
  1218. real f = ny - real::R_3();
  1219. real g = nx - real(4);
  1220. real h = ny - real(7);
  1221. return exp(-(a * a + b * b) * real(0.25)) * real(0.75)
  1222. + exp(-(c * c / real(49) + d * d / real::R_10())) * real(0.75)
  1223. + exp(-(e * e + f * f) * real(0.25)) * real(0.5)
  1224. - exp(-(g * g + h * h)) / real(5);
  1225. }
  1226. /* The Peaks example function from Matlab */
  1227. template<> real peaks(real const &x, real const &y)
  1228. {
  1229. real x2 = x * x;
  1230. real y2 = y * y;
  1231. /* 3 * (1-x)^2 * exp(-x^2 - (y+1)^2) */
  1232. real ret = real::R_3()
  1233. * (x2 - x - x + real::R_1())
  1234. * exp(- x2 - y2 - y - y - real::R_1());
  1235. /* -10 * (x/5 - x^3 - y^5) * exp(-x^2 - y^2) */
  1236. ret -= (x + x - real::R_10() * (x2 * x + y2 * y2 * y)) * exp(-x2 - y2);
  1237. /* -1/3 * exp(-(x+1)^2 - y^2) */
  1238. ret -= exp(-x2 - x - x - real::R_1() - y2) / real::R_3();
  1239. return ret;
  1240. }
  1241. template<> void real::sxprintf(char *str) const;
  1242. template<> void real::sprintf(char *str, int ndigits) const;
  1243. template<> void real::xprint() const
  1244. {
  1245. char buf[150]; /* 128 bigit digits + room for 0x1, the exponent, etc. */
  1246. real::sxprintf(buf);
  1247. std::printf("%s", buf);
  1248. }
  1249. template<> void real::print(int ndigits) const
  1250. {
  1251. char *buf = new char[ndigits + 32 + 10];
  1252. real::sprintf(buf, ndigits);
  1253. std::printf("%s", buf);
  1254. delete[] buf;
  1255. }
  1256. template<> void real::sxprintf(char *str) const
  1257. {
  1258. if (m_sign)
  1259. *str++ = '-';
  1260. str += std::sprintf(str, "0x1.");
  1261. for (int i = 0; i < bigit_count(); ++i)
  1262. str += std::sprintf(str, "%08x", m_mantissa[i]);
  1263. while (str[-1] == '0')
  1264. *--str = '\0';
  1265. str += std::sprintf(str, "p%lld", (long long int)m_exponent);
  1266. }
  1267. template<> void real::sprintf(char *str, int ndigits) const
  1268. {
  1269. real x = *this;
  1270. if (x.m_sign)
  1271. {
  1272. *str++ = '-';
  1273. x = -x;
  1274. }
  1275. if (!x)
  1276. {
  1277. std::strcpy(str, "0");
  1278. return;
  1279. }
  1280. /* Normalise x so that mantissa is in [1..9.999] */
  1281. /* FIXME: better use int64_t when the cast is implemented */
  1282. /* FIXME: does not work with R_MAX and probably R_MIN */
  1283. int exponent = ceil(log10(x));
  1284. x *= pow(R_10(), -(real)exponent);
  1285. if (ndigits < 1)
  1286. ndigits = 1;
  1287. /* Add a bias to simulate some naive rounding */
  1288. x += real(4.99f) * pow(R_10(), -(real)(ndigits + 1));
  1289. if (x < R_1())
  1290. {
  1291. x *= R_10();
  1292. exponent--;
  1293. }
  1294. /* Print digits */
  1295. for (int i = 0; i < ndigits; ++i)
  1296. {
  1297. int digit = (int)floor(x);
  1298. *str++ = '0' + digit;
  1299. if (i == 0)
  1300. *str++ = '.';
  1301. x -= real(digit);
  1302. x *= R_10();
  1303. }
  1304. /* Remove excess trailing zeroes */
  1305. while (str[-1] == '0' && str[-2] != '.')
  1306. --str;
  1307. /* Print exponent information */
  1308. if (exponent)
  1309. str += std::sprintf(str, "e%c%i",
  1310. exponent >= 0 ? '+' : '-', lol::abs(exponent));
  1311. *str++ = '\0';
  1312. }
  1313. static real load_min()
  1314. {
  1315. real ret = 1;
  1316. return ldexp(ret, std::numeric_limits<int64_t>::min());
  1317. }
  1318. static real load_max()
  1319. {
  1320. /* FIXME: the last bits of the mantissa are not properly handled in this
  1321. * code! So we fallback to a slow but exact method. */
  1322. #if 0
  1323. real ret = 1;
  1324. ret = ldexp(ret, real::TOTAL_BITS - 1) - ret;
  1325. return ldexp(ret, real::EXPONENT_BIAS + 2 - real::TOTAL_BITS);
  1326. #endif
  1327. /* Generates 0x1.ffff..ffffp18446744073709551615 */
  1328. char str[160];
  1329. std::sprintf(str, "0x1.%llx%llx%llx%llx%llx%llx%llx%llxp%lld",
  1330. -1ll, -1ll, -1ll, -1ll, -1ll, -1ll, -1ll, -1ll,
  1331. (long long int)std::numeric_limits<int64_t>::max());
  1332. return real(str);
  1333. }
  1334. static real load_pi()
  1335. {
  1336. /* Approximate Pi using Machin's formula: 16*atan(1/5)-4*atan(1/239) */
  1337. real ret = 0, x0 = 5, x1 = 239;
  1338. real const m0 = -x0 * x0, m1 = -x1 * x1, r16 = 16, r4 = 4;
  1339. for (int i = 1; ; i += 2)
  1340. {
  1341. real newret = ret + r16 / (x0 * (real)i) - r4 / (x1 * (real)i);
  1342. if (newret == ret)
  1343. break;
  1344. ret = newret;
  1345. x0 *= m0;
  1346. x1 *= m1;
  1347. }
  1348. return ret;
  1349. }
  1350. } /* namespace lol */