You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lol-bench.cpp 12 KiB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414
  1. //
  2. // Lol Engine - Benchmark program
  3. //
  4. // Copyright: (c) 2005-2011 Sam Hocevar <sam@hocevar.net>
  5. // This program is free software; you can redistribute it and/or
  6. // modify it under the terms of the Do What The Fuck You Want To
  7. // Public License, Version 2, as published by Sam Hocevar. See
  8. // http://sam.zoy.org/projects/COPYING.WTFPL for more details.
  9. //
  10. #if defined HAVE_CONFIG_H
  11. # include "config.h"
  12. #endif
  13. #ifdef WIN32
  14. # define _USE_MATH_DEFINES /* for M_PI */
  15. # define WIN32_LEAN_AND_MEAN
  16. # include <windows.h>
  17. #endif
  18. #include <cstdio>
  19. #if defined HAVE_FASTMATH_H
  20. # include <fastmath.h>
  21. #endif
  22. #include "core.h"
  23. #include "loldebug.h"
  24. using namespace std;
  25. using namespace lol;
  26. static size_t const TRIG_TABLE_SIZE = 128 * 1024;
  27. static size_t const TRIG_RUNS = 50;
  28. static size_t const MATRIX_TABLE_SIZE = 64 * 1024;
  29. static size_t const MATRIX_RUNS = 100;
  30. static size_t const HALF_TABLE_SIZE = 1024 * 1024;
  31. static size_t const HALF_RUNS = 50;
  32. static void bench_trig(int mode);
  33. static void bench_matrix(int mode);
  34. static void bench_half(int mode);
  35. int main(int argc, char **argv)
  36. {
  37. Log::Info("--------------------------\n");
  38. Log::Info(" Trigonometry [-1e5, 1e5]\n");
  39. Log::Info("--------------------------\n");
  40. bench_trig(1);
  41. Log::Info("------------------------\n");
  42. Log::Info(" Trigonometry [-pi, pi]\n");
  43. Log::Info("------------------------\n");
  44. bench_trig(2);
  45. Log::Info("----------------------------\n");
  46. Log::Info(" Trigonometry [-1e-2, 1e-2]\n");
  47. Log::Info("----------------------------\n");
  48. bench_trig(3);
  49. Log::Info("----------------------------\n");
  50. Log::Info(" Float matrices [-2.0, 2.0]\n");
  51. Log::Info("----------------------------\n");
  52. bench_matrix(1);
  53. Log::Info("-------------------------------------\n");
  54. Log::Info(" Half precision floats (random bits)\n");
  55. Log::Info("-------------------------------------\n");
  56. bench_half(1);
  57. Log::Info("-----------------------------------\n");
  58. Log::Info(" Half precision floats [-2.0, 2.0]\n");
  59. Log::Info("-----------------------------------\n");
  60. bench_half(2);
  61. #if defined _WIN32
  62. getchar();
  63. #endif
  64. return EXIT_SUCCESS;
  65. }
  66. static void bench_trig(int mode)
  67. {
  68. float result[12] = { 0.0f };
  69. Timer timer;
  70. /* Set up tables */
  71. float *pf = new float[TRIG_TABLE_SIZE];
  72. float *pf2 = new float[TRIG_TABLE_SIZE];
  73. float *pf3 = new float[TRIG_TABLE_SIZE];
  74. for (size_t run = 0; run < TRIG_RUNS; run++)
  75. {
  76. switch (mode)
  77. {
  78. case 1:
  79. for (size_t i = 0; i < TRIG_TABLE_SIZE; i++)
  80. pf[i] = RandF(-1e5f, 1e5f);
  81. break;
  82. case 2:
  83. for (size_t i = 0; i < TRIG_TABLE_SIZE; i++)
  84. pf[i] = RandF(-M_PI, M_PI);
  85. break;
  86. case 3:
  87. for (size_t i = 0; i < TRIG_TABLE_SIZE; i++)
  88. pf[i] = RandF(-1e-2f, 1e-2f);
  89. break;
  90. }
  91. /* Sin */
  92. timer.GetMs();
  93. for (size_t i = 0; i < TRIG_TABLE_SIZE; i++)
  94. #if defined __GNUC__
  95. pf2[i] = __builtin_sinf(pf[i]);
  96. #else
  97. pf2[i] = sinf(pf[i]);
  98. #endif
  99. result[0] += timer.GetMs();
  100. /* Fast sin */
  101. timer.GetMs();
  102. for (size_t i = 0; i < TRIG_TABLE_SIZE; i++)
  103. #if defined HAVE_FASTMATH_H
  104. pf2[i] = f_sinf(pf[i]);
  105. #else
  106. pf2[i] = sinf(pf[i]);
  107. #endif
  108. result[1] += timer.GetMs();
  109. /* Lol sin */
  110. timer.GetMs();
  111. for (size_t i = 0; i < TRIG_TABLE_SIZE; i++)
  112. pf2[i] = lol_sin(pf[i]);
  113. result[2] += timer.GetMs();
  114. /* Cos */
  115. timer.GetMs();
  116. for (size_t i = 0; i < TRIG_TABLE_SIZE; i++)
  117. #if defined __GNUC__
  118. pf2[i] = __builtin_cosf(pf[i]);
  119. #else
  120. pf2[i] = cosf(pf[i]);
  121. #endif
  122. result[3] += timer.GetMs();
  123. /* Fast cos */
  124. timer.GetMs();
  125. for (size_t i = 0; i < TRIG_TABLE_SIZE; i++)
  126. #if defined HAVE_FASTMATH_H
  127. pf2[i] = f_cosf(pf[i]);
  128. #else
  129. pf2[i] = cosf(pf[i]);
  130. #endif
  131. result[4] += timer.GetMs();
  132. /* Lol cos */
  133. timer.GetMs();
  134. for (size_t i = 0; i < TRIG_TABLE_SIZE; i++)
  135. pf2[i] = lol_cos(pf[i]);
  136. result[5] += timer.GetMs();
  137. /* Sin & cos */
  138. timer.GetMs();
  139. for (size_t i = 0; i < TRIG_TABLE_SIZE; i++)
  140. {
  141. #if defined __GNUC__
  142. pf2[i] = __builtin_sinf(pf[i]);
  143. pf3[i] = __builtin_cosf(pf[i]);
  144. #else
  145. pf2[i] = sinf(pf[i]);
  146. pf3[i] = cosf(pf[i]);
  147. #endif
  148. }
  149. result[6] += timer.GetMs();
  150. /* Fast sin & cos */
  151. timer.GetMs();
  152. for (size_t i = 0; i < TRIG_TABLE_SIZE; i++)
  153. {
  154. #if defined HAVE_FASTMATH_H
  155. pf2[i] = f_sinf(pf[i]);
  156. pf3[i] = f_cosf(pf[i]);
  157. #else
  158. pf2[i] = sinf(pf[i]);
  159. pf3[i] = cosf(pf[i]);
  160. #endif
  161. }
  162. result[7] += timer.GetMs();
  163. /* Lol sincos */
  164. timer.GetMs();
  165. for (size_t i = 0; i < TRIG_TABLE_SIZE; i++)
  166. lol_sincos(pf[i], &pf2[i], &pf3[i]);
  167. result[8] += timer.GetMs();
  168. /* Tan */
  169. timer.GetMs();
  170. for (size_t i = 0; i < TRIG_TABLE_SIZE; i++)
  171. #if defined __GNUC__
  172. pf2[i] = __builtin_tanf(pf[i]);
  173. #else
  174. pf2[i] = tanf(pf[i]);
  175. #endif
  176. result[9] += timer.GetMs();
  177. /* Fast tan */
  178. timer.GetMs();
  179. for (size_t i = 0; i < TRIG_TABLE_SIZE; i++)
  180. #if defined HAVE_FASTMATH_H
  181. pf2[i] = f_tanf(pf[i]);
  182. #else
  183. pf2[i] = tanf(pf[i]);
  184. #endif
  185. result[10] += timer.GetMs();
  186. /* Lol tan */
  187. timer.GetMs();
  188. for (size_t i = 0; i < TRIG_TABLE_SIZE; i++)
  189. pf2[i] = lol_tan(pf[i]);
  190. result[11] += timer.GetMs();
  191. }
  192. delete[] pf;
  193. delete[] pf2;
  194. delete[] pf3;
  195. for (size_t i = 0; i < sizeof(result) / sizeof(*result); i++)
  196. result[i] *= 1000000.0f / (TRIG_TABLE_SIZE * TRIG_RUNS);
  197. Log::Info(" ns/elem\n");
  198. Log::Info("float = sinf(float) %7.3f\n", result[0]);
  199. Log::Info("float = f_sinf(float) %7.3f\n", result[1]);
  200. Log::Info("float = lol_sin(float) %7.3f\n", result[2]);
  201. Log::Info("float = cosf(float) %7.3f\n", result[3]);
  202. Log::Info("float = f_cosf(float) %7.3f\n", result[4]);
  203. Log::Info("float = lol_cos(float) %7.3f\n", result[5]);
  204. Log::Info("float = sinf,cosf(float) %7.3f\n", result[6]);
  205. Log::Info("float = f_sinf,f_cosf(float) %7.3f\n", result[7]);
  206. Log::Info("float = lol_sincos(float) %7.3f\n", result[8]);
  207. Log::Info("float = tanf(float) %7.3f\n", result[9]);
  208. Log::Info("float = f_tanf(float) %7.3f\n", result[10]);
  209. Log::Info("float = lol_tanf(float) %7.3f\n", result[11]);
  210. }
  211. static void bench_matrix(int mode)
  212. {
  213. float result[5] = { 0.0f };
  214. Timer timer;
  215. /* Set up tables */
  216. mat4 *pm = new mat4[MATRIX_TABLE_SIZE + 1];
  217. float *pf = new float[MATRIX_TABLE_SIZE];
  218. for (size_t run = 0; run < MATRIX_RUNS; run++)
  219. {
  220. switch (mode)
  221. {
  222. case 1:
  223. for (size_t i = 0; i < MATRIX_TABLE_SIZE; i++)
  224. for (int j = 0; j < 4; j++)
  225. for (int k = 0; k < 4; k++)
  226. pm[i][j][k] = RandF(-2.0f, 2.0f);
  227. break;
  228. }
  229. /* Copy matrices */
  230. timer.GetMs();
  231. for (size_t i = 0; i < MATRIX_TABLE_SIZE; i++)
  232. pm[i] = pm[i + 1];
  233. result[0] += timer.GetMs();
  234. /* Determinant */
  235. timer.GetMs();
  236. for (size_t i = 0; i < MATRIX_TABLE_SIZE; i++)
  237. pf[i] = pm[i].det();
  238. result[1] += timer.GetMs();
  239. /* Multiply matrices */
  240. timer.GetMs();
  241. for (size_t i = 0; i < MATRIX_TABLE_SIZE; i++)
  242. pm[i] *= pm[i + 1];
  243. result[2] += timer.GetMs();
  244. /* Add matrices */
  245. timer.GetMs();
  246. for (size_t i = 0; i < MATRIX_TABLE_SIZE; i++)
  247. pm[i] += pm[i + 1];
  248. result[3] += timer.GetMs();
  249. /* Invert matrix */
  250. timer.GetMs();
  251. for (size_t i = 0; i < MATRIX_TABLE_SIZE; i++)
  252. pm[i] = pm[i].invert();
  253. result[4] += timer.GetMs();
  254. }
  255. delete[] pm;
  256. delete[] pf;
  257. for (size_t i = 0; i < sizeof(result) / sizeof(*result); i++)
  258. result[i] *= 1000000.0f / (MATRIX_TABLE_SIZE * MATRIX_RUNS);
  259. Log::Info(" ns/elem\n");
  260. Log::Info("mat4 = mat4 %7.3f\n", result[0]);
  261. Log::Info("float = mat4.det() %7.3f\n", result[1]);
  262. Log::Info("mat4 *= mat4 %7.3f\n", result[2]);
  263. Log::Info("mat4 += mat4 %7.3f\n", result[3]);
  264. Log::Info("mat4 = mat4.invert() %7.3f\n", result[4]);
  265. }
  266. static void bench_half(int mode)
  267. {
  268. float result[10] = { 0.0f };
  269. Timer timer;
  270. /* Set up tables */
  271. float *pf = new float[HALF_TABLE_SIZE + 1];
  272. half *ph = new half[HALF_TABLE_SIZE + 1];
  273. for (size_t run = 0; run < HALF_RUNS; run++)
  274. {
  275. switch (mode)
  276. {
  277. case 1:
  278. for (size_t i = 0; i < HALF_TABLE_SIZE + 1; i++)
  279. ph[i] = half::makebits(rand());
  280. break;
  281. case 2:
  282. for (size_t i = 0; i < HALF_TABLE_SIZE + 1; i++)
  283. ph[i] = RandF(-2.0f, 2.0f);
  284. break;
  285. }
  286. /* Copy float */
  287. timer.GetMs();
  288. for (size_t i = 0; i < HALF_TABLE_SIZE; i++)
  289. pf[i] = pf[i + 1];
  290. result[0] += timer.GetMs();
  291. /* Convert half to float (array) */
  292. timer.GetMs();
  293. half::convert(pf, ph, HALF_TABLE_SIZE);
  294. result[1] += timer.GetMs();
  295. /* Convert half to float (fast) */
  296. timer.GetMs();
  297. for (size_t i = 0; i < HALF_TABLE_SIZE; i++)
  298. pf[i] = (float)ph[i];
  299. result[2] += timer.GetMs();
  300. /* Add a half to every float */
  301. timer.GetMs();
  302. for (size_t i = 0; i < HALF_TABLE_SIZE; i++)
  303. pf[i] += ph[i];
  304. result[3] += timer.GetMs();
  305. /* Copy half */
  306. timer.GetMs();
  307. for (size_t i = 0; i < HALF_TABLE_SIZE; i++)
  308. ph[i] = ph[i + 1];
  309. result[4] += timer.GetMs();
  310. /* Change sign of every half */
  311. timer.GetMs();
  312. for (size_t i = 0; i < HALF_TABLE_SIZE; i++)
  313. ph[i] = -ph[i];
  314. result[5] += timer.GetMs();
  315. /* Convert float to half (array) */
  316. timer.GetMs();
  317. half::convert(ph, pf, HALF_TABLE_SIZE);
  318. result[6] += timer.GetMs();
  319. /* Convert float to half (fast) */
  320. timer.GetMs();
  321. for (size_t i = 0; i < HALF_TABLE_SIZE; i++)
  322. ph[i] = (half)pf[i];
  323. result[7] += timer.GetMs();
  324. /* Convert float to half (accurate) */
  325. timer.GetMs();
  326. for (size_t i = 0; i < HALF_TABLE_SIZE; i++)
  327. ph[i] = half::makeaccurate(pf[i]);
  328. result[8] += timer.GetMs();
  329. /* Add a float to every half */
  330. timer.GetMs();
  331. for (size_t i = 0; i < HALF_TABLE_SIZE; i++)
  332. ph[i] += pf[i];
  333. result[9] += timer.GetMs();
  334. }
  335. delete[] pf;
  336. delete[] ph;
  337. for (size_t i = 0; i < sizeof(result) / sizeof(*result); i++)
  338. result[i] *= 1000000.0f / (HALF_TABLE_SIZE * HALF_RUNS);
  339. Log::Info(" ns/elem\n");
  340. Log::Info("float = float %7.3f\n", result[0]);
  341. Log::Info("float = half (array) %7.3f\n", result[1]);
  342. Log::Info("float = half (fast) %7.3f\n", result[2]);
  343. Log::Info("float += half %7.3f\n", result[3]);
  344. Log::Info("half = half %7.3f\n", result[4]);
  345. Log::Info("half = -half %7.3f\n", result[5]);
  346. Log::Info("half = float (array) %7.3f\n", result[6]);
  347. Log::Info("half = float (fast) %7.3f\n", result[7]);
  348. Log::Info("half = float (accurate) %7.3f\n", result[8]);
  349. Log::Info("half += float %7.3f\n", result[9]);
  350. }