You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

преди 7 години
преди 7 години
преди 7 години
преди 7 години
преди 7 години
преди 7 години
преди 7 години
преди 13 години
преди 7 години
преди 7 години
преди 13 години
преди 13 години
преди 13 години
преди 13 години
преди 13 години
преди 13 години
преди 13 години
преди 13 години
преди 13 години
преди 13 години
преди 13 години
преди 13 години
преди 13 години
преди 13 години
преди 13 години
преди 13 години
преди 13 години
преди 13 години
преди 13 години
преди 7 години
преди 7 години
преди 7 години
преди 7 години
преди 7 години
преди 13 години
преди 13 години
преди 7 години
преди 13 години
преди 13 години
преди 13 години
преди 13 години
преди 13 години
преди 13 години
преди 7 години
преди 7 години
преди 7 години
преди 7 години
преди 7 години
преди 7 години
преди 7 години
преди 7 години
преди 7 години
преди 7 години
преди 7 години
преди 7 години
преди 5 години
преди 7 години
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698
  1. //
  2. // Lol Engine
  3. //
  4. // Copyright © 2010—2019 Sam Hocevar <sam@hocevar.net>
  5. //
  6. // Lol Engine is free software. It comes without any warranty, to
  7. // the extent permitted by applicable law. You can redistribute it
  8. // and/or modify it under the terms of the Do What the Fuck You Want
  9. // to Public License, Version 2, as published by the WTFPL Task Force.
  10. // See http://www.wtfpl.net/ for more details.
  11. //
  12. #include <lol/engine-internal.h>
  13. #include <new>
  14. #include <string>
  15. #include <sstream>
  16. #include <iomanip>
  17. #include <cstring>
  18. #include <cstdlib>
  19. namespace lol
  20. {
  21. /*
  22. * First handle explicit specialisation of our templates.
  23. */
  24. template<> int real::DEFAULT_BIGIT_COUNT = 16;
  25. /*
  26. * Initialisation order is not important because everything is
  27. * done on demand, but here is the dependency list anyway:
  28. * - fast_log() requires R_1
  29. * - log() requires R_LN2
  30. * - inverse() require R_2
  31. * - exp() requires R_0, R_1, R_LN2
  32. * - sqrt() requires R_3
  33. */
  34. static real fast_log(real const &x);
  35. static real load_min();
  36. static real load_max();
  37. static real load_pi();
  38. /* These getters do not need caching, their return values are small */
  39. template<> real const real::R_0() { return real(); }
  40. template<> real const real::R_INF() { real ret; ret.m_inf = true; return ret; }
  41. template<> real const real::R_NAN() { real ret; ret.m_nan = true; return ret; }
  42. #define LOL_CONSTANT_GETTER(name, value) \
  43. template<> real const& real::name() \
  44. { \
  45. static real ret; \
  46. static int prev_bigit_count = -1; \
  47. /* If the default bigit count has changed, we must recompute
  48. * the value with the desired precision. */ \
  49. if (prev_bigit_count != DEFAULT_BIGIT_COUNT) \
  50. { \
  51. ret = (value); \
  52. prev_bigit_count = DEFAULT_BIGIT_COUNT; \
  53. } \
  54. return ret; \
  55. }
  56. LOL_CONSTANT_GETTER(R_1, real(1.0));
  57. LOL_CONSTANT_GETTER(R_2, real(2.0));
  58. LOL_CONSTANT_GETTER(R_3, real(3.0));
  59. LOL_CONSTANT_GETTER(R_10, real(10.0));
  60. LOL_CONSTANT_GETTER(R_MIN, load_min());
  61. LOL_CONSTANT_GETTER(R_MAX, load_max());
  62. LOL_CONSTANT_GETTER(R_LN2, fast_log(R_2()));
  63. LOL_CONSTANT_GETTER(R_LN10, log(R_10()));
  64. LOL_CONSTANT_GETTER(R_LOG2E, inverse(R_LN2()));
  65. LOL_CONSTANT_GETTER(R_LOG10E, inverse(R_LN10()));
  66. LOL_CONSTANT_GETTER(R_E, exp(R_1()));
  67. LOL_CONSTANT_GETTER(R_PI, load_pi());
  68. LOL_CONSTANT_GETTER(R_PI_2, R_PI() / 2);
  69. LOL_CONSTANT_GETTER(R_PI_3, R_PI() / R_3());
  70. LOL_CONSTANT_GETTER(R_PI_4, R_PI() / 4);
  71. LOL_CONSTANT_GETTER(R_TAU, R_PI() + R_PI());
  72. LOL_CONSTANT_GETTER(R_1_PI, inverse(R_PI()));
  73. LOL_CONSTANT_GETTER(R_2_PI, R_1_PI() * 2);
  74. LOL_CONSTANT_GETTER(R_2_SQRTPI, inverse(sqrt(R_PI())) * 2);
  75. LOL_CONSTANT_GETTER(R_SQRT2, sqrt(R_2()));
  76. LOL_CONSTANT_GETTER(R_SQRT3, sqrt(R_3()));
  77. LOL_CONSTANT_GETTER(R_SQRT1_2, R_SQRT2() / 2);
  78. #undef LOL_CONSTANT_GETTER
  79. /*
  80. * Now carry on with the rest of the Real class.
  81. */
  82. template<> real::Real(int32_t i) { new(this) real((double)i); }
  83. template<> real::Real(uint32_t i) { new(this) real((double)i); }
  84. template<> real::Real(float f) { new(this) real((double)f); }
  85. template<> real::Real(int64_t i)
  86. {
  87. new(this) real((uint64_t)lol::abs(i));
  88. m_sign = i < 0;
  89. }
  90. template<> real::Real(uint64_t i)
  91. {
  92. new(this) real();
  93. if (i)
  94. {
  95. /* Only works with 32-bit bigits for now */
  96. static_assert(sizeof(bigit_t) == 4, "bigit_t must be 32-bit");
  97. int delta = 1;
  98. while ((i >> 63) == 0)
  99. {
  100. i <<= 1;
  101. ++delta;
  102. }
  103. i <<= 1; /* Remove implicit one */
  104. m_exponent = 64 - delta;
  105. m_mantissa.resize(DEFAULT_BIGIT_COUNT);
  106. m_mantissa[0] = (bigit_t)(i >> 32);
  107. if (bigit_count() > 1)
  108. m_mantissa[1] = (bigit_t)i;
  109. }
  110. }
  111. template<> real::Real(double d)
  112. {
  113. union { double d; uint64_t x; } u = { d };
  114. m_sign = bool(u.x >> 63);
  115. exponent_t exponent = (u.x << 1) >> 53;
  116. switch (exponent)
  117. {
  118. case 0x00: /* +0 / -0 */
  119. break;
  120. case 0x7ff: /* Inf/NaN (FIXME: handle NaN!) */
  121. m_inf = true;
  122. break;
  123. default:
  124. /* Only works with 32-bit bigits for now */
  125. static_assert(sizeof(bigit_t) == 4, "bigit_t must be 32-bit");
  126. m_exponent = exponent - ((1 << 10) - 1);
  127. m_mantissa.resize(DEFAULT_BIGIT_COUNT);
  128. m_mantissa[0] = (bigit_t)(u.x >> 20);
  129. if (bigit_count() > 1)
  130. m_mantissa[1] = (bigit_t)(u.x << 12);
  131. break;
  132. }
  133. }
  134. template<> real::Real(long double f)
  135. {
  136. /* We don’t know the long double layout, so we get rid of the
  137. * exponent, then load it into a real in two steps. */
  138. int exponent;
  139. f = frexpl(f, &exponent);
  140. new(this) real(double(f));
  141. *this += double(f - (long double)*this);
  142. m_exponent += exponent;
  143. }
  144. template<> real::operator float() const { return (float)(double)*this; }
  145. template<> real::operator int32_t() const { return (int32_t)(double)floor(*this); }
  146. template<> real::operator uint32_t() const { return (uint32_t)(double)floor(*this); }
  147. template<> real::operator uint64_t() const
  148. {
  149. uint32_t msb = (uint32_t)ldexp(*this, -32);
  150. uint64_t ret = ((uint64_t)msb << 32)
  151. | (uint32_t)(*this - ldexp((real)msb, 32));
  152. return ret;
  153. }
  154. template<> real::operator int64_t() const
  155. {
  156. /* If number is positive, convert it to uint64_t first. If it is
  157. * negative, switch its sign first. */
  158. return is_negative() ? -(int64_t)-*this : (int64_t)(uint64_t)*this;
  159. }
  160. template<> real::operator double() const
  161. {
  162. union { double d; uint64_t x; } u;
  163. /* Get sign */
  164. u.x = (is_negative() ? 1 : 0) << 11;
  165. /* Compute new exponent (FIXME: handle Inf/NaN) */
  166. int64_t e = m_exponent + ((1 << 10) - 1);
  167. if (is_zero())
  168. u.x <<= 52;
  169. else if (e < 0) /* if exponent underflows, set to zero */
  170. u.x <<= 52;
  171. else if (e >= 0x7ff)
  172. {
  173. u.x |= 0x7ff;
  174. u.x <<= 52;
  175. }
  176. else
  177. {
  178. u.x |= e;
  179. /* Store mantissa if necessary */
  180. u.x <<= 32;
  181. if (bigit_count() > 0)
  182. u.x |= m_mantissa[0];
  183. u.x <<= 20;
  184. if (bigit_count() > 1)
  185. {
  186. u.x |= m_mantissa[1] >> 12;
  187. /* Rounding */
  188. u.x += (m_mantissa[1] >> 11) & 1;
  189. }
  190. }
  191. return u.d;
  192. }
  193. template<> real::operator long double() const
  194. {
  195. double hi = double(*this);
  196. double lo = double(*this - hi);
  197. return (long double)(hi) + (long double)(lo);
  198. }
  199. /*
  200. * Create a real number from an ASCII representation
  201. */
  202. template<> real::Real(char const *str)
  203. {
  204. real ret = 0;
  205. exponent_t exponent = 0;
  206. bool hex = false, comma = false, nonzero = false, negative = false, finished = false;
  207. for (char const *p = str; *p && !finished; p++)
  208. {
  209. switch (*p)
  210. {
  211. case '-':
  212. case '+':
  213. if (p != str)
  214. break;
  215. negative = (*p == '-');
  216. break;
  217. case '.':
  218. if (comma)
  219. finished = true;
  220. comma = true;
  221. break;
  222. case 'x':
  223. case 'X':
  224. /* This character is only valid for 0x... and 0X... numbers */
  225. if (p != str + 1 || str[0] != '0')
  226. finished = true;
  227. hex = true;
  228. break;
  229. case 'p':
  230. case 'P':
  231. if (hex)
  232. exponent += atoi(p + 1);
  233. finished = true;
  234. break;
  235. case 'e':
  236. case 'E':
  237. if (!hex)
  238. {
  239. exponent += atoi(p + 1);
  240. finished = true;
  241. break;
  242. }
  243. LOL_ATTR_FALLTHROUGH
  244. case 'a': case 'b': case 'c': case 'd': case 'f':
  245. case 'A': case 'B': case 'C': case 'D': case 'F':
  246. case '0': case '1': case '2': case '3': case '4':
  247. case '5': case '6': case '7': case '8': case '9':
  248. if (nonzero)
  249. {
  250. /* Multiply ret by 10 or 16 depending the base. */
  251. if (!hex)
  252. {
  253. real x = ret + ret;
  254. ret = x + x + ret;
  255. }
  256. ret.m_exponent += hex ? 4 : 1;
  257. }
  258. if (*p != '0')
  259. {
  260. ret += (*p >= 'a' && *p <= 'f') ? (int)(*p - 'a' + 10)
  261. : (*p >= 'A' && *p <= 'F') ? (int)(*p - 'A' + 10)
  262. : (int)(*p - '0');
  263. nonzero = true;
  264. }
  265. if (comma)
  266. exponent -= hex ? 4 : 1;
  267. break;
  268. default:
  269. finished = true;
  270. break;
  271. }
  272. }
  273. if (hex)
  274. ret.m_exponent += exponent;
  275. else if (exponent)
  276. ret *= pow(R_10(), (real)exponent);
  277. if (negative)
  278. ret = -ret;
  279. *this = ret;
  280. }
  281. template<> real real::operator +() const
  282. {
  283. return *this;
  284. }
  285. template<> real real::operator -() const
  286. {
  287. real ret = *this;
  288. ret.m_sign ^= true;
  289. return ret;
  290. }
  291. template<> real real::operator +(real const &x) const
  292. {
  293. if (x.is_zero())
  294. return *this;
  295. if (is_zero())
  296. return x;
  297. /* Ensure both arguments are positive. Otherwise, switch signs,
  298. * or replace + with -. */
  299. if (is_negative())
  300. return -(-*this + -x);
  301. if (x.is_negative())
  302. return *this - (-x);
  303. /* Ensure *this has the larger exponent (no need for the mantissa to
  304. * be larger, as in subtraction). Otherwise, switch. */
  305. if (m_exponent < x.m_exponent)
  306. return x + *this;
  307. int64_t e1 = m_exponent;
  308. int64_t e2 = x.m_exponent;
  309. int64_t bigoff = (e1 - e2) / bigit_bits();
  310. int64_t off = e1 - e2 - bigoff * bigit_bits();
  311. /* FIXME: ensure we have the same number of bigits */
  312. if (bigoff > bigit_count())
  313. return *this;
  314. real ret;
  315. ret.m_mantissa.resize(bigit_count());
  316. ret.m_exponent = m_exponent;
  317. uint64_t carry = 0;
  318. for (int i = bigit_count(); i--; )
  319. {
  320. carry += m_mantissa[i];
  321. if (i - bigoff >= 0)
  322. carry += x.m_mantissa[i - bigoff] >> off;
  323. if (off && i - bigoff > 0)
  324. carry += (x.m_mantissa[i - bigoff - 1] << (bigit_bits() - off)) & 0xffffffffu;
  325. else if (i - bigoff == 0)
  326. carry += (uint64_t)1 << (bigit_bits() - off);
  327. ret.m_mantissa[i] = (uint32_t)carry;
  328. carry >>= bigit_bits();
  329. }
  330. /* Renormalise in case we overflowed the mantissa */
  331. if (carry)
  332. {
  333. carry--;
  334. for (int i = 0; i < bigit_count(); ++i)
  335. {
  336. uint32_t tmp = ret.m_mantissa[i];
  337. ret.m_mantissa[i] = ((uint32_t)carry << (bigit_bits() - 1))
  338. | (tmp >> 1);
  339. carry = tmp & 1u;
  340. }
  341. ret.m_exponent++;
  342. }
  343. return ret;
  344. }
  345. template<> real real::operator -(real const &x) const
  346. {
  347. if (x.is_zero())
  348. return *this;
  349. if (is_zero())
  350. return -x;
  351. /* Ensure both arguments are positive. Otherwise, switch signs,
  352. * or replace - with +. */
  353. if (is_negative())
  354. return -(-*this + x);
  355. if (x.is_negative())
  356. return (*this) + (-x);
  357. /* Ensure *this is larger than x */
  358. if (*this < x)
  359. return -(x - *this);
  360. exponent_t e1 = m_exponent;
  361. exponent_t e2 = x.m_exponent;
  362. exponent_t bigoff = (e1 - e2) / bigit_bits();
  363. exponent_t off = e1 - e2 - bigoff * bigit_bits();
  364. /* FIXME: ensure we have the same number of bigits */
  365. if (bigoff > bigit_count())
  366. return *this;
  367. real ret;
  368. ret.m_mantissa.resize(bigit_count());
  369. ret.m_exponent = m_exponent;
  370. /* int64_t instead of uint64_t to preserve sign through shifts */
  371. exponent_t carry = 0;
  372. for (int i = 0; i < bigoff; ++i)
  373. {
  374. carry -= x.m_mantissa[bigit_count() - 1 - i];
  375. /* Emulates a signed shift */
  376. carry >>= bigit_bits();
  377. carry |= carry << bigit_bits();
  378. }
  379. if (bigoff < bigit_count())
  380. carry -= x.m_mantissa[bigit_count() - 1 - bigoff] & (((exponent_t)1 << off) - 1);
  381. carry /= (exponent_t)1 << off;
  382. for (int i = bigit_count(); i--; )
  383. {
  384. carry += m_mantissa[i];
  385. if (i - bigoff >= 0)
  386. carry -= x.m_mantissa[i - bigoff] >> off;
  387. if (off && i - bigoff > 0)
  388. carry -= (x.m_mantissa[i - bigoff - 1] << (bigit_bits() - off)) & 0xffffffffu;
  389. else if (i - bigoff == 0)
  390. carry -= (uint64_t)1 << (bigit_bits() - off);
  391. ret.m_mantissa[i] = (bigit_t)carry;
  392. carry >>= bigit_bits();
  393. carry |= carry << bigit_bits();
  394. }
  395. carry += 1;
  396. /* Renormalise if we underflowed the mantissa */
  397. if (carry == 0)
  398. {
  399. /* How much do we need to shift the mantissa? FIXME: this could
  400. * be computed above */
  401. off = 0;
  402. for (int i = 0; i < bigit_count(); ++i)
  403. {
  404. if (!ret.m_mantissa[i])
  405. {
  406. off += bigit_bits();
  407. continue;
  408. }
  409. /* “~tmp > tmp” checks that the MSB is not set */
  410. for (bigit_t tmp = ret.m_mantissa[i]; ~tmp > tmp; tmp <<= 1)
  411. off++;
  412. break;
  413. }
  414. if (off == total_bits())
  415. ret.m_mantissa.resize(0);
  416. else
  417. {
  418. off++; /* Shift once more to get rid of the leading 1 */
  419. ret.m_exponent -= off;
  420. bigoff = off / bigit_bits();
  421. off -= bigoff * bigit_bits();
  422. for (int i = 0; i < bigit_count(); ++i)
  423. {
  424. bigit_t tmp = 0;
  425. if (i + bigoff < bigit_count())
  426. tmp |= ret.m_mantissa[i + bigoff] << off;
  427. if (off && i + bigoff + 1 < bigit_count())
  428. tmp |= ret.m_mantissa[i + bigoff + 1] >> (bigit_bits() - off);
  429. ret.m_mantissa[i] = tmp;
  430. }
  431. }
  432. }
  433. return ret;
  434. }
  435. template<> real real::operator *(real const &x) const
  436. {
  437. real ret;
  438. /* The sign is easy to compute */
  439. ret.m_sign = is_negative() ^ x.is_negative();
  440. /* If any operand is zero, return zero. FIXME: 0 * Inf? */
  441. if (is_zero() || x.is_zero())
  442. return ret;
  443. ret.m_mantissa.resize(bigit_count());
  444. ret.m_exponent = m_exponent + x.m_exponent;
  445. /* Accumulate low order product; no need to store it, we just
  446. * want the carry value */
  447. uint64_t carry = 0, hicarry = 0, prev;
  448. for (int i = 0; i < bigit_count(); ++i)
  449. {
  450. for (int j = 0; j < i + 1; j++)
  451. {
  452. prev = carry;
  453. carry += (uint64_t)m_mantissa[bigit_count() - 1 - j]
  454. * (uint64_t)x.m_mantissa[bigit_count() - 1 + j - i];
  455. if (carry < prev)
  456. hicarry++;
  457. }
  458. carry >>= bigit_bits();
  459. carry |= hicarry << bigit_bits();
  460. hicarry >>= bigit_bits();
  461. }
  462. /* Multiply the other components */
  463. for (int i = 0; i < bigit_count(); ++i)
  464. {
  465. for (int j = i + 1; j < bigit_count(); j++)
  466. {
  467. prev = carry;
  468. carry += (uint64_t)m_mantissa[bigit_count() - 1 - j]
  469. * (uint64_t)x.m_mantissa[j - 1 - i];
  470. if (carry < prev)
  471. hicarry++;
  472. }
  473. prev = carry;
  474. carry += m_mantissa[bigit_count() - 1 - i];
  475. carry += x.m_mantissa[bigit_count() - 1 - i];
  476. if (carry < prev)
  477. hicarry++;
  478. ret.m_mantissa[bigit_count() - 1 - i] = carry & ~(bigit_t)0;
  479. carry >>= bigit_bits();
  480. carry |= hicarry << bigit_bits();
  481. hicarry >>= bigit_bits();
  482. }
  483. /* Renormalise in case we overflowed the mantissa */
  484. if (carry)
  485. {
  486. carry--;
  487. for (int i = 0; i < bigit_count(); ++i)
  488. {
  489. bigit_t tmp = ret.m_mantissa[i];
  490. ret.m_mantissa[i] = ((bigit_t)carry << (bigit_bits() - 1))
  491. | (tmp >> 1);
  492. carry = tmp & 1u;
  493. }
  494. ++ret.m_exponent;
  495. }
  496. return ret;
  497. }
  498. template<> real real::operator /(real const &x) const
  499. {
  500. return *this * inverse(x);
  501. }
  502. template<> real const &real::operator +=(real const &x)
  503. {
  504. real tmp = *this;
  505. return *this = tmp + x;
  506. }
  507. template<> real const &real::operator -=(real const &x)
  508. {
  509. real tmp = *this;
  510. return *this = tmp - x;
  511. }
  512. template<> real const &real::operator *=(real const &x)
  513. {
  514. real tmp = *this;
  515. return *this = tmp * x;
  516. }
  517. template<> real const &real::operator /=(real const &x)
  518. {
  519. real tmp = *this;
  520. return *this = tmp / x;
  521. }
  522. template<> bool real::operator ==(real const &x) const
  523. {
  524. /* If NaN is involved, return false */
  525. if (is_nan() || x.is_nan())
  526. return false;
  527. /* If both zero, they are equal; if either is zero, they are different */
  528. if (is_zero() || x.is_zero())
  529. return is_zero() && x.is_zero();
  530. /* FIXME: handle NaN/Inf */
  531. return m_exponent == x.m_exponent && m_mantissa == x.m_mantissa;
  532. }
  533. template<> bool real::operator !=(real const &x) const
  534. {
  535. return !(is_nan() || x.is_nan() || *this == x);
  536. }
  537. template<> bool real::operator <(real const &x) const
  538. {
  539. /* If NaN is involved, return false */
  540. if (is_nan() || x.is_nan())
  541. return false;
  542. /* Ensure we are positive */
  543. if (is_negative())
  544. return -*this > -x;
  545. /* If x is zero or negative, we can’t be < x */
  546. if (x.is_negative() || x.is_zero())
  547. return false;
  548. /* If we are zero, we must be < x */
  549. if (is_zero())
  550. return true;
  551. /* Compare exponents */
  552. if (m_exponent != x.m_exponent)
  553. return m_exponent < x.m_exponent;
  554. /* Compare all relevant bits */
  555. for (int i = 0; i < bigit_count(); ++i)
  556. if (m_mantissa[i] != x.m_mantissa[i])
  557. return m_mantissa[i] < x.m_mantissa[i];
  558. return false;
  559. }
  560. template<> bool real::operator <=(real const &x) const
  561. {
  562. return !(is_nan() || x.is_nan() || *this > x);
  563. }
  564. template<> bool real::operator >(real const &x) const
  565. {
  566. /* If NaN is involved, return false */
  567. if (is_nan() || x.is_nan())
  568. return false;
  569. /* Ensure we are positive */
  570. if (is_negative())
  571. return -*this < -x;
  572. /* If x is zero, we’re > x iff we’re non-zero since we’re positive */
  573. if (x.is_zero())
  574. return !is_zero();
  575. /* If x is strictly negative, we’re > x */
  576. if (x.is_negative())
  577. return true;
  578. /* If we are zero, we can’t be > x */
  579. if (is_zero())
  580. return false;
  581. /* Compare exponents */
  582. if (m_exponent != x.m_exponent)
  583. return m_exponent > x.m_exponent;
  584. /* Compare all relevant bits */
  585. for (int i = 0; i < bigit_count(); ++i)
  586. if (m_mantissa[i] != x.m_mantissa[i])
  587. return m_mantissa[i] > x.m_mantissa[i];
  588. return false;
  589. }
  590. template<> bool real::operator >=(real const &x) const
  591. {
  592. return !(is_nan() || x.is_nan() || *this < x);
  593. }
  594. template<> bool real::operator !() const
  595. {
  596. return !(bool)*this;
  597. }
  598. template<> real::operator bool() const
  599. {
  600. /* A real is "true" if it is non-zero AND not NaN */
  601. return !is_zero() && !is_nan();
  602. }
  603. template<> real min(real const &a, real const &b)
  604. {
  605. return (a < b) ? a : b;
  606. }
  607. template<> real max(real const &a, real const &b)
  608. {
  609. return (a > b) ? a : b;
  610. }
  611. template<> real clamp(real const &x, real const &a, real const &b)
  612. {
  613. return (x < a) ? a : (x > b) ? b : x;
  614. }
  615. template<> real inverse(real const &x)
  616. {
  617. real ret;
  618. /* If zero, return infinite */
  619. if (x.is_zero())
  620. return copysign(real::R_INF(), x);
  621. /* Use the system’s float inversion to approximate 1/x */
  622. union { float f; uint32_t x; } u = { 1.0f };
  623. u.x |= x.m_mantissa[0] >> 9;
  624. u.f = 1.0f / u.f;
  625. ret.m_mantissa.resize(x.bigit_count());
  626. ret.m_mantissa[0] = u.x << 9;
  627. ret.m_sign = x.m_sign;
  628. ret.m_exponent = -x.m_exponent + (u.x >> 23) - 0x7f;
  629. /* FIXME: 1+log2(bigit_count) steps of Newton-Raphson seems to be enough for
  630. * convergence, but this hasn’t been checked seriously. */
  631. for (int i = 1; i <= x.bigit_count(); i *= 2)
  632. ret = ret * (real::R_2() - ret * x);
  633. return ret;
  634. }
  635. template<> real sqrt(real const &x)
  636. {
  637. /* if zero, return x (FIXME: negative zero?) */
  638. if (x.is_zero())
  639. return x;
  640. /* if negative, return NaN */
  641. if (x.is_negative())
  642. return real::R_NAN();
  643. int tweak = x.m_exponent & 1;
  644. /* Use the system’s float inversion to approximate 1/sqrt(x). First
  645. * we construct a float in the [1..4[ range that has roughly the same
  646. * mantissa as our real. Its exponent is 0 or 1, depending on the
  647. * parity of x’s exponent. The final exponent is 0, -1 or -2. We use
  648. * the final exponent and final mantissa to pre-fill the result. */
  649. union { float f; uint32_t x; } u = { 1.0f };
  650. u.x += tweak << 23;
  651. u.x |= x.m_mantissa[0] >> 9;
  652. u.f = 1.0f / sqrtf(u.f);
  653. real ret;
  654. ret.m_mantissa.resize(x.bigit_count());
  655. ret.m_mantissa[0] = u.x << 9;
  656. ret.m_exponent = -(x.m_exponent - tweak) / 2 + (u.x >> 23) - 0x7f;
  657. /* FIXME: 1+log2(bigit_count()) steps of Newton-Raphson seems to be enough for
  658. * convergence, but this hasn’t been checked seriously. */
  659. for (int i = 1; i <= x.bigit_count(); i *= 2)
  660. {
  661. ret = ret * (real::R_3() - ret * ret * x);
  662. --ret.m_exponent;
  663. }
  664. return ret * x;
  665. }
  666. template<> real cbrt(real const &x)
  667. {
  668. /* if zero, return x */
  669. if (x.is_zero())
  670. return x;
  671. int tweak = x.m_exponent % 3;
  672. if (tweak < 0)
  673. tweak += 3;
  674. /* Use the system’s float inversion to approximate cbrt(x). First
  675. * we construct a float in the [1..8[ range that has roughly the same
  676. * mantissa as our real. Its exponent is 0, 1 or 2, depending on the
  677. * value of x. The final exponent is 0 or 1 (special case). We use
  678. * the final exponent and final mantissa to pre-fill the result. */
  679. union { float f; uint32_t x; } u = { 1.0f };
  680. u.x += tweak << 23;
  681. u.x |= x.m_mantissa[0] >> 9;
  682. u.f = powf(u.f, 1.f / 3);
  683. real ret;
  684. ret.m_mantissa.resize(x.bigit_count());
  685. ret.m_mantissa[0] = u.x << 9;
  686. ret.m_exponent = (x.m_exponent - tweak) / 3 + (u.x >> 23) - 0x7f;
  687. ret.m_sign = x.m_sign;
  688. /* FIXME: 1+log2(bigit_count()) steps of Newton-Raphson seems to be enough
  689. * for convergence, but this hasn’t been checked seriously. */
  690. real third = inverse(real::R_3());
  691. for (int i = 1; i <= x.bigit_count(); i *= 2)
  692. {
  693. ret = third * (x / (ret * ret) + (ret * 2));
  694. }
  695. return ret;
  696. }
  697. template<> real pow(real const &x, real const &y)
  698. {
  699. /* Shortcuts for degenerate cases */
  700. if (!y)
  701. return real::R_1();
  702. if (!x)
  703. return real::R_0();
  704. /* Small integer exponent: use exponentiation by squaring */
  705. int64_t int_y = (int64_t)y;
  706. if (y == (real)int_y)
  707. {
  708. real ret = real::R_1();
  709. real x_n = int_y > 0 ? x : inverse(x);
  710. while (int_y) /* Can be > 0 or < 0 */
  711. {
  712. if (int_y & 1)
  713. ret *= x_n;
  714. x_n *= x_n;
  715. int_y /= 2;
  716. }
  717. return ret;
  718. }
  719. /* If x is positive, nothing special to do. */
  720. if (x > real::R_0())
  721. return exp(y * log(x));
  722. /* XXX: manpage for pow() says “If x is a finite value less than 0,
  723. * and y is a finite noninteger, a domain error occurs, and a NaN is
  724. * returned”. We check whether y is closer to an even number or to
  725. * an odd number and return something reasonable. */
  726. real round_y = round(y);
  727. bool is_odd = round_y / 2 == round(round_y / 2);
  728. return is_odd ? exp(y * log(-x)) : -exp(y * log(-x));
  729. }
  730. /* A fast factorial implementation for small numbers. An optional
  731. * step argument allows to compute double factorials (i.e. with
  732. * only the odd or the even terms. */
  733. static real fast_fact(int x, int step = 1)
  734. {
  735. if (x < step)
  736. return 1;
  737. if (x == step)
  738. return x;
  739. unsigned int start = (x + step - 1) % step + 1;
  740. real ret(start);
  741. uint64_t multiplier = 1;
  742. for (int i = start, exponent = 0;;)
  743. {
  744. if (i >= x)
  745. return ldexp(ret * multiplier, exponent);
  746. i += step;
  747. /* Accumulate the power of two part in the exponent */
  748. unsigned int tmp = i;
  749. while ((tmp & 1) == 0)
  750. {
  751. tmp >>= 1;
  752. exponent++;
  753. }
  754. /* Accumulate the other factors in the multiplier */
  755. if (multiplier * tmp / tmp != multiplier)
  756. {
  757. ret *= multiplier;
  758. multiplier = 1;
  759. }
  760. multiplier *= tmp;
  761. }
  762. }
  763. template<> real gamma(real const &x)
  764. {
  765. /* We use Spouge’s formula. FIXME: precision is far from acceptable,
  766. * especially with large values. We need to compute this with higher
  767. * precision values in order to attain the desired accuracy. It might
  768. * also be useful to sort the ck values by decreasing absolute value
  769. * and do the addition in this order. */
  770. int a = (int)ceilf(logf(2) / logf(2 * F_PI) * x.total_bits());
  771. real ret = sqrt(real::R_PI() * 2);
  772. real fact_k_1 = real::R_1();
  773. for (int k = 1; k < a; k++)
  774. {
  775. real a_k = (real)(a - k);
  776. real ck = pow(a_k, (real)((float)k - 0.5)) * exp(a_k)
  777. / (fact_k_1 * (x + (real)(k - 1)));
  778. ret += ck;
  779. fact_k_1 *= (real)-k;
  780. }
  781. ret *= pow(x + (real)(a - 1), x - (real::R_1() / 2));
  782. ret *= exp(-x - (real)(a - 1));
  783. return ret;
  784. }
  785. template<> real fabs(real const &x)
  786. {
  787. real ret = x;
  788. ret.m_sign = false;
  789. return ret;
  790. }
  791. template<> real abs(real const &x)
  792. {
  793. return fabs(x);
  794. }
  795. template<> real fract(real const &x)
  796. {
  797. return x - floor(x);
  798. }
  799. template<> real degrees(real const &x)
  800. {
  801. /* FIXME: need to recompute this for different mantissa sizes */
  802. static real mul = real(180) * real::R_1_PI();
  803. return x * mul;
  804. }
  805. template<> real radians(real const &x)
  806. {
  807. /* FIXME: need to recompute this for different mantissa sizes */
  808. static real mul = real::R_PI() / real(180);
  809. return x * mul;
  810. }
  811. static real fast_log(real const &x)
  812. {
  813. /* This fast log method is tuned to work on the [1..2] range and
  814. * no effort whatsoever was made to improve convergence outside this
  815. * domain of validity. It can converge pretty fast, provided we use
  816. * the following variable substitutions:
  817. * y = sqrt(x)
  818. * z = (y - 1) / (y + 1)
  819. *
  820. * And the following identities:
  821. * ln(x) = 2 ln(y)
  822. * = 2 ln((1 + z) / (1 - z))
  823. * = 4 z (1 + z^2 / 3 + z^4 / 5 + z^6 / 7...)
  824. *
  825. * Any additional sqrt() call would halve the convergence time, but
  826. * would also impact the final precision. For now we stick with one
  827. * sqrt() call. */
  828. real y = sqrt(x);
  829. real z = (y - real::R_1()) / (y + real::R_1()), z2 = z * z, zn = z2;
  830. real sum = real::R_1();
  831. for (int i = 3; ; i += 2)
  832. {
  833. real newsum = sum + zn / (real)i;
  834. if (newsum == sum)
  835. break;
  836. sum = newsum;
  837. zn *= z2;
  838. }
  839. return z * sum * 4;
  840. }
  841. template<> real log(real const &x)
  842. {
  843. /* Strategy for log(x): if x = 2^E*M then log(x) = E log(2) + log(M),
  844. * with the property that M is in [1..2[, so fast_log() applies here. */
  845. if (x.is_negative() || x.is_zero())
  846. return real::R_NAN();
  847. real tmp(x);
  848. tmp.m_exponent = 0;
  849. return real(x.m_exponent) * real::R_LN2() + fast_log(tmp);
  850. }
  851. template<> real log2(real const &x)
  852. {
  853. /* Strategy for log2(x): see log(x). */
  854. if (x.is_negative() || x.is_zero())
  855. return real::R_NAN();
  856. real tmp(x);
  857. tmp.m_exponent = 0;
  858. return real(x.m_exponent) + fast_log(tmp) * real::R_LOG2E();
  859. }
  860. template<> real log10(real const &x)
  861. {
  862. return log(x) * real::R_LOG10E();
  863. }
  864. static real fast_exp_sub(real const &x, real const &y)
  865. {
  866. /* This fast exp method is tuned to work on the [-1..1] range and
  867. * no effort whatsoever was made to improve convergence outside this
  868. * domain of validity. The argument y is used for cases where we
  869. * don’t want the leading 1 in the Taylor series. */
  870. real ret = real::R_1() - y, xn = x;
  871. int i = 1;
  872. for (;;)
  873. {
  874. real newret = ret + xn;
  875. if (newret == ret)
  876. break;
  877. ret = newret * ++i;
  878. xn *= x;
  879. }
  880. return ret / fast_fact(i);
  881. }
  882. template<> real exp(real const &x)
  883. {
  884. /* Strategy for exp(x): the Taylor series does not converge very fast
  885. * with large positive or negative values.
  886. *
  887. * However, since the result is going to be in the form M*2^E, we first
  888. * try to predict a value for E, which is approximately:
  889. * E ≈ log2(exp(x)) = x / log(2)
  890. *
  891. * Let E0 be an integer close to x / log(2). We need to find a value x0
  892. * such that exp(x) = 2^E0 * exp(x0). We get x0 = x - E0 log(2).
  893. *
  894. * Thus the final algorithm:
  895. * int E0 = x / log(2)
  896. * real x0 = x - E0 log(2)
  897. * real x1 = exp(x0)
  898. * return x1 * 2^E0
  899. */
  900. real::exponent_t e0 = x / real::R_LN2();
  901. real x0 = x - (real)e0 * real::R_LN2();
  902. real x1 = fast_exp_sub(x0, real::R_0());
  903. x1.m_exponent += e0;
  904. return x1;
  905. }
  906. template<> real exp2(real const &x)
  907. {
  908. /* Strategy for exp2(x): see strategy in exp(). */
  909. real::exponent_t e0 = x;
  910. real x0 = x - (real)e0;
  911. real x1 = fast_exp_sub(x0 * real::R_LN2(), real::R_0());
  912. x1.m_exponent += e0;
  913. return x1;
  914. }
  915. template<> real erf(real const &x)
  916. {
  917. /* Strategy for erf(x):
  918. * - if x<0, erf(x) = -erf(-x)
  919. * - if x<7, erf(x) = sum((-1)^n·x^(2n+1)/((2n+1)·n!))/sqrt(π/4)
  920. * - if x≥7, erf(x) = 1+exp(-x²)/(x·sqrt(π))·sum((-1)^n·(2n-1)!!/(2x²)^n
  921. *
  922. * FIXME: do not compute factorials at each iteration, accumulate
  923. * them instead (see fast_exp_sub).
  924. * FIXME: For a potentially faster implementation, see “Expanding the
  925. * Error Function erf(z)” at:
  926. * http://www.mathematica-journal.com/2014/11/on-burmanns-theorem-and-its-application-to-problems-of-linear-and-nonlinear-heat-transfer-and-diffusion/#more-39602/
  927. */
  928. if (x.is_negative())
  929. return -erf(-x);
  930. real sum = real::R_0();
  931. real x2 = x * x;
  932. /* FIXME: this test is inefficient; the series converges slowly for x≥1 */
  933. if (x < real(7))
  934. {
  935. real xn = x, xmul = x2;
  936. for (int n = 0;; ++n, xn *= xmul)
  937. {
  938. real tmp = xn / (fast_fact(n) * (2 * n + 1));
  939. real newsum = (n & 1) ? sum - tmp : sum + tmp;
  940. if (newsum == sum)
  941. break;
  942. sum = newsum;
  943. }
  944. return sum * real::R_2_SQRTPI();
  945. }
  946. else
  947. {
  948. real xn = real::R_1(), xmul = inverse(x2 + x2);
  949. /* FIXME: this does not converge well! We need to stop at 30
  950. * iterations and sacrifice some accuracy. */
  951. for (int n = 0; n < 30; ++n, xn *= xmul)
  952. {
  953. real tmp = xn * fast_fact(n * 2 - 1, 2);
  954. real newsum = (n & 1) ? sum - tmp : sum + tmp;
  955. if (newsum == sum)
  956. break;
  957. sum = newsum;
  958. }
  959. return real::R_1() - exp(-x2) / (x * sqrt(real::R_PI())) * sum;
  960. }
  961. }
  962. template<> real sinh(real const &x)
  963. {
  964. /* We cannot always use (exp(x)-exp(-x))/2 because we'll lose
  965. * accuracy near zero. We only use this identity for |x|>0.5. If
  966. * |x|<=0.5, we compute exp(x)-1 and exp(-x)-1 instead. */
  967. bool near_zero = (fabs(x) < real::R_1() / 2);
  968. real x1 = near_zero ? fast_exp_sub(x, real::R_1()) : exp(x);
  969. real x2 = near_zero ? fast_exp_sub(-x, real::R_1()) : exp(-x);
  970. return (x1 - x2) / 2;
  971. }
  972. template<> real tanh(real const &x)
  973. {
  974. /* See sinh() for the strategy here */
  975. bool near_zero = (fabs(x) < real::R_1() / 2);
  976. real x1 = near_zero ? fast_exp_sub(x, real::R_1()) : exp(x);
  977. real x2 = near_zero ? fast_exp_sub(-x, real::R_1()) : exp(-x);
  978. real x3 = near_zero ? x1 + x2 + real::R_2() : x1 + x2;
  979. return (x1 - x2) / x3;
  980. }
  981. template<> real cosh(real const &x)
  982. {
  983. /* No need to worry about accuracy here; maybe the last bit is slightly
  984. * off, but that's about it. */
  985. return (exp(x) + exp(-x)) / 2;
  986. }
  987. template<> real frexp(real const &x, real::exponent_t *exp)
  988. {
  989. if (!x)
  990. {
  991. *exp = 0;
  992. return x;
  993. }
  994. /* FIXME: check that this works */
  995. *exp = x.m_exponent;
  996. real ret = x;
  997. ret.m_exponent = 0;
  998. return ret;
  999. }
  1000. template<> real ldexp(real const &x, real::exponent_t exp)
  1001. {
  1002. real ret = x;
  1003. if (ret) /* Only do something if non-zero */
  1004. ret.m_exponent += exp;
  1005. return ret;
  1006. }
  1007. template<> real modf(real const &x, real *iptr)
  1008. {
  1009. real absx = fabs(x);
  1010. real tmp = floor(absx);
  1011. *iptr = copysign(tmp, x);
  1012. return copysign(absx - tmp, x);
  1013. }
  1014. template<> real nextafter(real const &x, real const &y)
  1015. {
  1016. /* Linux manpage: “If x equals y, the functions return y.” */
  1017. if (x == y)
  1018. return y;
  1019. /* Ensure x is positive. */
  1020. if (x.is_negative())
  1021. return -nextafter(-x, -y);
  1022. /* FIXME: broken for now */
  1023. real ulp = ldexp(x, -x.total_bits());
  1024. return x < y ? x + ulp : x - ulp;
  1025. }
  1026. template<> real copysign(real const &x, real const &y)
  1027. {
  1028. real ret = x;
  1029. ret.m_sign = y.m_sign;
  1030. return ret;
  1031. }
  1032. template<> real floor(real const &x)
  1033. {
  1034. /* Strategy for floor(x):
  1035. * - if negative, return -ceil(-x)
  1036. * - if zero or negative zero, return x
  1037. * - if less than one, return zero
  1038. * - otherwise, if e is the exponent, clear all bits except the
  1039. * first e. */
  1040. if (x < -real::R_0())
  1041. return -ceil(-x);
  1042. if (!x)
  1043. return x;
  1044. if (x < real::R_1())
  1045. return real::R_0();
  1046. real ret = x;
  1047. real::exponent_t exponent = x.m_exponent;
  1048. for (int i = 0; i < x.bigit_count(); ++i)
  1049. {
  1050. if (exponent <= 0)
  1051. ret.m_mantissa[i] = 0;
  1052. else if (exponent < real::bigit_bits())
  1053. ret.m_mantissa[i] &= ~((1 << (real::bigit_bits() - exponent)) - 1);
  1054. exponent -= real::bigit_bits();
  1055. }
  1056. return ret;
  1057. }
  1058. template<> real ceil(real const &x)
  1059. {
  1060. /* Strategy for ceil(x):
  1061. * - if negative, return -floor(-x)
  1062. * - if x == floor(x), return x
  1063. * - otherwise, return floor(x) + 1 */
  1064. if (x < -real::R_0())
  1065. return -floor(-x);
  1066. real ret = floor(x);
  1067. if (ret < x)
  1068. ret += real::R_1();
  1069. return ret;
  1070. }
  1071. template<> real round(real const &x)
  1072. {
  1073. if (x < real::R_0())
  1074. return -round(-x);
  1075. return floor(x + (real::R_1() / 2));
  1076. }
  1077. template<> real fmod(real const &x, real const &y)
  1078. {
  1079. if (!y)
  1080. return real::R_0(); /* FIXME: return NaN */
  1081. if (!x)
  1082. return x;
  1083. real tmp = round(x / y);
  1084. return x - tmp * y;
  1085. }
  1086. template<> real sin(real const &x)
  1087. {
  1088. bool switch_sign = x.is_negative();
  1089. real absx = fmod(fabs(x), real::R_PI() * 2);
  1090. if (absx > real::R_PI())
  1091. {
  1092. absx -= real::R_PI();
  1093. switch_sign = !switch_sign;
  1094. }
  1095. if (absx > real::R_PI_2())
  1096. absx = real::R_PI() - absx;
  1097. real ret = real::R_0(), fact = real::R_1(), xn = absx, mx2 = -absx * absx;
  1098. int i = 1;
  1099. for (;;)
  1100. {
  1101. real newret = ret + xn;
  1102. if (newret == ret)
  1103. break;
  1104. ret = newret * ((i + 1) * (i + 2));
  1105. xn *= mx2;
  1106. i += 2;
  1107. }
  1108. ret /= fast_fact(i);
  1109. /* Propagate sign */
  1110. ret.m_sign ^= switch_sign;
  1111. return ret;
  1112. }
  1113. template<> real cos(real const &x)
  1114. {
  1115. return sin(real::R_PI_2() - x);
  1116. }
  1117. template<> real tan(real const &x)
  1118. {
  1119. /* Constrain input to [-π,π] */
  1120. real y = fmod(x, real::R_PI());
  1121. /* Constrain input to [-π/2,π/2] */
  1122. if (y < -real::R_PI_2())
  1123. y += real::R_PI();
  1124. else if (y > real::R_PI_2())
  1125. y -= real::R_PI();
  1126. /* In [-π/4,π/4] return sin/cos */
  1127. if (fabs(y) <= real::R_PI_4())
  1128. return sin(y) / cos(y);
  1129. /* Otherwise, return cos/sin */
  1130. if (y > real::R_0())
  1131. y = real::R_PI_2() - y;
  1132. else
  1133. y = -real::R_PI_2() - y;
  1134. return cos(y) / sin(y);
  1135. }
  1136. static inline real asinacos(real const &x, int is_asin)
  1137. {
  1138. /* Strategy for asin(): in [-0.5..0.5], use a Taylor series around
  1139. * zero. In [0.5..1], use asin(x) = π/2 - 2*asin(sqrt((1-x)/2)), and
  1140. * in [-1..-0.5] just revert the sign.
  1141. * Strategy for acos(): use acos(x) = π/2 - asin(x) and try not to
  1142. * lose the precision around x=1. */
  1143. real absx = fabs(x);
  1144. int around_zero = (absx < (real::R_1() / 2));
  1145. if (!around_zero)
  1146. absx = sqrt((real::R_1() - absx) / 2);
  1147. real ret = absx, xn = absx, x2 = absx * absx, fact1 = 2, fact2 = 1;
  1148. for (int i = 1; ; ++i)
  1149. {
  1150. xn *= x2;
  1151. real mul = (real)(2 * i + 1);
  1152. real newret = ret + ldexp(fact1 * xn / (mul * fact2), -2 * i);
  1153. if (newret == ret)
  1154. break;
  1155. ret = newret;
  1156. fact1 *= (real)((2 * i + 1) * (2 * i + 2));
  1157. fact2 *= (real)((i + 1) * (i + 1));
  1158. }
  1159. if (x.is_negative())
  1160. ret = -ret;
  1161. if (around_zero)
  1162. ret = is_asin ? ret : real::R_PI_2() - ret;
  1163. else
  1164. {
  1165. real adjust = x.is_negative() ? real::R_PI() : real::R_0();
  1166. if (is_asin)
  1167. ret = real::R_PI_2() - adjust - ret * 2;
  1168. else
  1169. ret = adjust + ret * 2;
  1170. }
  1171. return ret;
  1172. }
  1173. template<> real asin(real const &x)
  1174. {
  1175. return asinacos(x, 1);
  1176. }
  1177. template<> real acos(real const &x)
  1178. {
  1179. return asinacos(x, 0);
  1180. }
  1181. template<> real atan(real const &x)
  1182. {
  1183. /* Computing atan(x): we choose a different Taylor series depending on
  1184. * the value of x to help with convergence.
  1185. *
  1186. * If |x| < 0.5 we evaluate atan(y) near 0:
  1187. * atan(y) = y - y^3/3 + y^5/5 - y^7/7 + y^9/9 ...
  1188. *
  1189. * If 0.5 <= |x| < 1.5 we evaluate atan(1+y) near 0:
  1190. * atan(1+y) = π/4 + y/(1*2^1) - y^2/(2*2^1) + y^3/(3*2^2)
  1191. * - y^5/(5*2^3) + y^6/(6*2^3) - y^7/(7*2^4)
  1192. * + y^9/(9*2^5) - y^10/(10*2^5) + y^11/(11*2^6) ...
  1193. *
  1194. * If 1.5 <= |x| < 2 we evaluate atan(sqrt(3)+2y) near 0:
  1195. * atan(sqrt(3)+2y) = π/3 + 1/2 y - sqrt(3)/2 y^2/2
  1196. * + y^3/3 - sqrt(3)/2 y^4/4 + 1/2 y^5/5
  1197. * - 1/2 y^7/7 + sqrt(3)/2 y^8/8
  1198. * - y^9/9 + sqrt(3)/2 y^10/10 - 1/2 y^11/11
  1199. * + 1/2 y^13/13 - sqrt(3)/2 y^14/14
  1200. * + y^15/15 - sqrt(3)/2 y^16/16 + 1/2 y^17/17 ...
  1201. *
  1202. * If |x| >= 2 we evaluate atan(y) near +∞:
  1203. * atan(y) = π/2 - y^-1 + y^-3/3 - y^-5/5 + y^-7/7 - y^-9/9 ...
  1204. */
  1205. real absx = fabs(x);
  1206. if (absx < (real::R_1() / 2))
  1207. {
  1208. real ret = x, xn = x, mx2 = -x * x;
  1209. for (int i = 3; ; i += 2)
  1210. {
  1211. xn *= mx2;
  1212. real newret = ret + xn / (real)i;
  1213. if (newret == ret)
  1214. break;
  1215. ret = newret;
  1216. }
  1217. return ret;
  1218. }
  1219. real ret = 0;
  1220. if (absx < (real::R_3() / 2))
  1221. {
  1222. real y = real::R_1() - absx;
  1223. real yn = y, my2 = -y * y;
  1224. for (int i = 0; ; i += 2)
  1225. {
  1226. real newret = ret + ldexp(yn / (real)(2 * i + 1), -i - 1);
  1227. yn *= y;
  1228. newret += ldexp(yn / (real)(2 * i + 2), -i - 1);
  1229. yn *= y;
  1230. newret += ldexp(yn / (real)(2 * i + 3), -i - 2);
  1231. if (newret == ret)
  1232. break;
  1233. ret = newret;
  1234. yn *= my2;
  1235. }
  1236. ret = real::R_PI_4() - ret;
  1237. }
  1238. else if (absx < real::R_2())
  1239. {
  1240. real y = (absx - real::R_SQRT3()) / 2;
  1241. real yn = y, my2 = -y * y;
  1242. for (int i = 1; ; i += 6)
  1243. {
  1244. real newret = ret + ((yn / (real)i) / 2);
  1245. yn *= y;
  1246. newret -= (real::R_SQRT3() / 2) * yn / (real)(i + 1);
  1247. yn *= y;
  1248. newret += yn / (real)(i + 2);
  1249. yn *= y;
  1250. newret -= (real::R_SQRT3() / 2) * yn / (real)(i + 3);
  1251. yn *= y;
  1252. newret += (yn / (real)(i + 4)) / 2;
  1253. if (newret == ret)
  1254. break;
  1255. ret = newret;
  1256. yn *= my2;
  1257. }
  1258. ret = real::R_PI_3() + ret;
  1259. }
  1260. else
  1261. {
  1262. real y = inverse(absx);
  1263. real yn = y, my2 = -y * y;
  1264. ret = y;
  1265. for (int i = 3; ; i += 2)
  1266. {
  1267. yn *= my2;
  1268. real newret = ret + yn / (real)i;
  1269. if (newret == ret)
  1270. break;
  1271. ret = newret;
  1272. }
  1273. ret = real::R_PI_2() - ret;
  1274. }
  1275. /* Propagate sign */
  1276. ret.m_sign = x.m_sign;
  1277. return ret;
  1278. }
  1279. template<> real atan2(real const &y, real const &x)
  1280. {
  1281. if (!y)
  1282. {
  1283. if (!x.is_negative())
  1284. return y;
  1285. return y.is_negative() ? -real::R_PI() : real::R_PI();
  1286. }
  1287. if (!x)
  1288. {
  1289. return y.is_negative() ? -real::R_PI() : real::R_PI();
  1290. }
  1291. /* FIXME: handle the Inf and NaN cases */
  1292. real z = y / x;
  1293. real ret = atan(z);
  1294. if (x < real::R_0())
  1295. ret += (y > real::R_0()) ? real::R_PI() : -real::R_PI();
  1296. return ret;
  1297. }
  1298. /* Franke’s function, used as a test for interpolation methods */
  1299. template<> real franke(real const &x, real const &y)
  1300. {
  1301. /* Compute 9x and 9y */
  1302. real nx = x + x; nx += nx; nx += nx + x;
  1303. real ny = y + y; ny += ny; ny += ny + y;
  1304. /* Temporary variables for the formula */
  1305. real a = nx - real::R_2();
  1306. real b = ny - real::R_2();
  1307. real c = nx + real::R_1();
  1308. real d = ny + real::R_1();
  1309. real e = nx - real(7);
  1310. real f = ny - real::R_3();
  1311. real g = nx - real(4);
  1312. real h = ny - real(7);
  1313. return exp(-(a * a + b * b) * real(0.25)) * real(0.75)
  1314. + exp(-(c * c / real(49) + d * d / real::R_10())) * real(0.75)
  1315. + exp(-(e * e + f * f) * real(0.25)) * real(0.5)
  1316. - exp(-(g * g + h * h)) / real(5);
  1317. }
  1318. /* The Peaks example function from Matlab */
  1319. template<> real peaks(real const &x, real const &y)
  1320. {
  1321. real x2 = x * x;
  1322. real y2 = y * y;
  1323. /* 3 * (1-x)^2 * exp(-x^2 - (y+1)^2) */
  1324. real ret = real::R_3()
  1325. * (x2 - x - x + real::R_1())
  1326. * exp(- x2 - y2 - y - y - real::R_1());
  1327. /* -10 * (x/5 - x^3 - y^5) * exp(-x^2 - y^2) */
  1328. ret -= (x + x - real::R_10() * (x2 * x + y2 * y2 * y)) * exp(-x2 - y2);
  1329. /* -1/3 * exp(-(x+1)^2 - y^2) */
  1330. ret -= exp(-x2 - x - x - real::R_1() - y2) / real::R_3();
  1331. return ret;
  1332. }
  1333. template<>
  1334. std::ostream& operator <<(std::ostream &s, real const &x)
  1335. {
  1336. bool hex = (s.flags() & std::ios_base::basefield) == std::ios_base::hex;
  1337. s << (hex ? x.xstr() : x.str((int)s.precision()));
  1338. return s;
  1339. }
  1340. template<> std::string real::str(int ndigits) const
  1341. {
  1342. std::stringstream ss;
  1343. real x = *this;
  1344. if (x.is_negative())
  1345. {
  1346. ss << '-';
  1347. x = -x;
  1348. }
  1349. if (!x)
  1350. {
  1351. ss << '0';
  1352. return ss.str();
  1353. }
  1354. // Normalise x so that mantissa is in [1..9.999]
  1355. // FIXME: better use int64_t when the cast is implemented
  1356. // FIXME: does not work with R_MAX and probably R_MIN
  1357. int exponent = ceil(log10(x));
  1358. x *= pow(R_10(), -(real)exponent);
  1359. if (ndigits < 1)
  1360. ndigits = 1;
  1361. // Add a bias to simulate some naive rounding
  1362. x += real(4.99f) * pow(R_10(), -(real)(ndigits + 1));
  1363. if (x < R_1())
  1364. {
  1365. x *= R_10();
  1366. exponent--;
  1367. }
  1368. // Print digits
  1369. for (int i = 0; i < ndigits; ++i)
  1370. {
  1371. int digit = (int)floor(x);
  1372. ss << (char)('0' + digit);
  1373. if (i == 0)
  1374. ss << '.';
  1375. x -= real(digit);
  1376. x *= R_10();
  1377. }
  1378. // Remove trailing zeroes
  1379. std::string ret = ss.str();
  1380. ss.str("");
  1381. size_t end = ret.find_last_not_of('0');
  1382. if (end != std::string::npos)
  1383. ss << ret.substr(0, end + 1);
  1384. // Print exponent information
  1385. if (exponent)
  1386. ss << 'e' << (exponent >= 0 ? '+' : '-') << lol::abs(exponent);
  1387. return ss.str();
  1388. }
  1389. template<> std::string real::xstr() const
  1390. {
  1391. std::stringstream ss;
  1392. if (is_negative())
  1393. ss << '-';
  1394. ss << "0x1." << std::hex << std::setfill('0');
  1395. for (int i = 0; i < bigit_count(); ++i)
  1396. ss << std::setw(8) << m_mantissa[i];
  1397. ss << std::dec;
  1398. // Remove trailing zeroes
  1399. std::string ret = ss.str();
  1400. ss.str("");
  1401. size_t end = ret.find_last_not_of('0');
  1402. if (end != std::string::npos)
  1403. ss << ret.substr(0, end + 1);
  1404. ss << 'p' << m_exponent;
  1405. return ss.str();
  1406. }
  1407. static real load_min()
  1408. {
  1409. real ret = 1;
  1410. return ldexp(ret, std::numeric_limits<real::exponent_t>::min());
  1411. }
  1412. static real load_max()
  1413. {
  1414. /* FIXME: the last bits of the mantissa are not properly handled in this
  1415. * code! So we fallback to a slow but exact method. */
  1416. #if 0
  1417. real ret = 1;
  1418. ret = ldexp(ret, real::TOTAL_BITS - 1) - ret;
  1419. return ldexp(ret, real::EXPONENT_BIAS + 2 - real::TOTAL_BITS);
  1420. #endif
  1421. /* Generates 0x1.ffff..ffffp18446744073709551615 */
  1422. char str[160];
  1423. std::sprintf(str, "0x1.%llx%llx%llx%llx%llx%llx%llx%llxp%lld",
  1424. -1ll, -1ll, -1ll, -1ll, -1ll, -1ll, -1ll, -1ll,
  1425. (long long int)std::numeric_limits<int64_t>::max());
  1426. return real(str);
  1427. }
  1428. static real load_pi()
  1429. {
  1430. /* Approximate π using Machin’s formula: 16*atan(1/5)-4*atan(1/239) */
  1431. real ret = 0, x0 = 5, x1 = 239;
  1432. real const m0 = -x0 * x0, m1 = -x1 * x1, r16 = 16, r4 = 4;
  1433. for (int i = 1; ; i += 2)
  1434. {
  1435. real newret = ret + r16 / (x0 * (real)i) - r4 / (x1 * (real)i);
  1436. if (newret == ret)
  1437. break;
  1438. ret = newret;
  1439. x0 *= m0;
  1440. x1 *= m1;
  1441. }
  1442. return ret;
  1443. }
  1444. } /* namespace lol */