You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

import.c 18 KiB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622
  1. /*
  2. * libcucul Canvas for ultrafast compositing of Unicode letters
  3. * Copyright (c) 2002-2006 Sam Hocevar <sam@zoy.org>
  4. * All Rights Reserved
  5. *
  6. * $Id$
  7. *
  8. * This library is free software; you can redistribute it and/or
  9. * modify it under the terms of the Do What The Fuck You Want To
  10. * Public License, Version 2, as published by Sam Hocevar. See
  11. * http://sam.zoy.org/wtfpl/COPYING for more details.
  12. */
  13. /*
  14. * This file contains various import functions.
  15. */
  16. #include "config.h"
  17. #include "common.h"
  18. #if !defined __KERNEL__
  19. # if defined HAVE_ERRNO_H
  20. # include <errno.h>
  21. # endif
  22. # include <stdio.h>
  23. # include <stdlib.h>
  24. # include <string.h>
  25. #endif
  26. #include "cucul.h"
  27. #include "cucul_internals.h"
  28. static inline uint32_t sscanu32(void const *s)
  29. {
  30. uint32_t x;
  31. memcpy(&x, s, 4);
  32. return hton32(x);
  33. }
  34. static inline uint16_t sscanu16(void const *s)
  35. {
  36. uint16_t x;
  37. memcpy(&x, s, 2);
  38. return hton16(x);
  39. }
  40. /* ANSI Graphic Rendition Combination Mode */
  41. struct ansi_grcm
  42. {
  43. uint8_t fg, bg; /* ANSI-context fg/bg */
  44. uint8_t efg, ebg; /* Effective (libcucul) fg/bg */
  45. uint8_t bold, negative, concealed;
  46. };
  47. static long int import_caca(cucul_canvas_t *, void const *, unsigned int);
  48. static long int import_text(cucul_canvas_t *, void const *, unsigned int);
  49. static long int import_ansi(cucul_canvas_t *, void const *, unsigned int, int);
  50. static void ansi_parse_grcm(cucul_canvas_t *, struct ansi_grcm *,
  51. unsigned int, unsigned int const *);
  52. /** \brief Import a memory buffer into a canvas
  53. *
  54. * Import a memory buffer into the given libcucul canvas's current
  55. * frame. The current frame is resized accordingly and its contents are
  56. * replaced with the imported data.
  57. *
  58. * Valid values for \c format are:
  59. * - \c "": attempt to autodetect the file format.
  60. * - \c "text": import ASCII text files.
  61. * - \c "ansi": import ANSI files.
  62. * - \c "utf8": import UTF-8 files with ANSI colour codes.
  63. * - \c "caca": import native libcaca files.
  64. *
  65. * If an error occurs, -1 is returned and \b errno is set accordingly:
  66. * - \c ENOMEM Not enough memory to allocate canvas.
  67. * - \c EINVAL Invalid format requested.
  68. *
  69. * \param A libcucul canvas in which to import the file.
  70. * \param buffer A \e libcucul buffer containing the data to be loaded
  71. * into a canvas.
  72. * \param format A string describing the input format.
  73. * \return The number of bytes read, or -1 if an error occurred.
  74. */
  75. long int cucul_import_memory(cucul_canvas_t *cv, unsigned char const *buf,
  76. unsigned long int len, char const *format)
  77. {
  78. if(!strcasecmp("caca", format))
  79. return import_caca(cv, buf, len);
  80. if(!strcasecmp("utf8", format))
  81. return import_ansi(cv, buf, len, 1);
  82. if(!strcasecmp("text", format))
  83. return import_text(cv, buf, len);
  84. if(!strcasecmp("ansi", format))
  85. return import_ansi(cv, buf, len, 0);
  86. /* Autodetection */
  87. if(!strcasecmp("", format))
  88. {
  89. unsigned int i;
  90. /* If 4 first bytes are 0xcaca + 'CV' */
  91. if(len >= 4 && buf[0] == 0xca &&
  92. buf[1] == 0xca && buf[2] == 'C' && buf[3] == 'V')
  93. return import_caca(cv, buf, len);
  94. /* If we find ESC[ argv, we guess it's an ANSI file */
  95. for(i = 0; i + 1 < len; i++)
  96. if((buf[i] == 0x1b) && (buf[i + 1] == '['))
  97. return import_ansi(cv, buf, len, 0);
  98. /* Otherwise, import it as text */
  99. return import_text(cv, buf, len);
  100. }
  101. #if defined HAVE_ERRNO_H
  102. errno = EINVAL;
  103. #endif
  104. return -1;
  105. }
  106. /** \brief Import a file into a canvas
  107. *
  108. * Import a file into the given libcucul canvas's current frame. The
  109. * current frame is resized accordingly and its contents are replaced
  110. * with the imported data.
  111. *
  112. * Valid values for \c format are:
  113. * - \c "": attempt to autodetect the file format.
  114. * - \c "text": import ASCII text files.
  115. * - \c "ansi": import ANSI files.
  116. * - \c "utf8": import UTF-8 files with ANSI colour codes.
  117. * - \c "caca": import native libcaca files.
  118. *
  119. * If an error occurs, -1 is returned and \b errno is set accordingly:
  120. * - \c ENOSYS File access is not implemented on this system.
  121. * - \c ENOMEM Not enough memory to allocate canvas.
  122. * - \c EINVAL Invalid format requested.
  123. * cucul_import_file() may also fail and set \b errno for any of the
  124. * errors specified for the routine fopen().
  125. *
  126. * \param A libcucul canvas in which to import the file.
  127. * \param filename The name of the file to load.
  128. * \param format A string describing the input format.
  129. * \return The number of bytes read, or -1 if an error occurred.
  130. */
  131. long int cucul_import_file(cucul_canvas_t *cv, char const *filename,
  132. char const *format)
  133. {
  134. #if defined __KERNEL__
  135. # if defined HAVE_ERRNO_H
  136. errno = ENOSYS;
  137. # endif
  138. return -1;
  139. #else
  140. FILE *fp;
  141. void *data;
  142. long int size;
  143. int ret;
  144. fp = fopen(filename, "rb");
  145. if(!fp)
  146. return -1; /* fopen already set errno */
  147. fseek(fp, 0, SEEK_END);
  148. size = ftell(fp);
  149. data = malloc(size);
  150. if(!data)
  151. {
  152. fclose(fp);
  153. # if defined HAVE_ERRNO_H
  154. errno = ENOMEM;
  155. # endif
  156. return -1;
  157. }
  158. fseek(fp, 0, SEEK_SET);
  159. fread(data, size, 1, fp);
  160. fclose(fp);
  161. ret = cucul_import_memory(cv, data, size, format);
  162. free(data);
  163. return ret;
  164. #endif
  165. }
  166. /** \brief Get available import formats
  167. *
  168. * Return a list of available import formats. The list is a NULL-terminated
  169. * array of strings, interleaving a string containing the internal value for
  170. * the import format, to be used with cucul_import_canvas(), and a string
  171. * containing the natural language description for that import format.
  172. *
  173. * This function never fails.
  174. *
  175. * \return An array of strings.
  176. */
  177. char const * const * cucul_get_import_list(void)
  178. {
  179. static char const * const list[] =
  180. {
  181. "", "autodetect",
  182. "text", "plain text",
  183. "caca", "native libcaca format",
  184. "ansi", "ANSI coloured text",
  185. NULL, NULL
  186. };
  187. return list;
  188. }
  189. /*
  190. * XXX: the following functions are local.
  191. */
  192. static long int import_caca(cucul_canvas_t *cv,
  193. void const *data, unsigned int size)
  194. {
  195. uint8_t const *buf = (uint8_t const *)data;
  196. unsigned int control_size, data_size, full_size, frames, f, n;
  197. uint16_t version, flags;
  198. cucul_set_canvas_size(cv, 0, 0);
  199. if(size < 20)
  200. goto invalid_caca;
  201. if(buf[0] != 0xca || buf[1] != 0xca || buf[2] != 'C' || buf[3] != 'V')
  202. goto invalid_caca;
  203. control_size = sscanu32(buf + 4);
  204. data_size = sscanu32(buf + 8);
  205. version = sscanu16(buf + 12);
  206. frames = sscanu32(buf + 14);
  207. flags = sscanu16(buf + 18);
  208. if(size != 4 + control_size + data_size)
  209. goto invalid_caca;
  210. if(control_size < 16 + frames * 24)
  211. goto invalid_caca;
  212. for(full_size = 0, f = 0; f < frames; f++)
  213. {
  214. unsigned int width, height, duration;
  215. uint32_t attr;
  216. int x, y;
  217. width = sscanu32(buf + 4 + 16 + f * 24);
  218. height = sscanu32(buf + 4 + 16 + f * 24 + 4);
  219. duration = sscanu32(buf + 4 + 16 + f * 24 + 8);
  220. attr = sscanu32(buf + 4 + 16 + f * 24 + 12);
  221. x = (int32_t)sscanu32(buf + 4 + 16 + f * 24 + 16);
  222. y = (int32_t)sscanu32(buf + 4 + 16 + f * 24 + 20);
  223. full_size += width * height * 8;
  224. }
  225. if(full_size != data_size)
  226. goto invalid_caca;
  227. /* FIXME: read all frames, not only the first one */
  228. cucul_set_canvas_size(cv, sscanu32(buf + 4 + 16),
  229. sscanu32(buf + 4 + 16 + 4));
  230. /* FIXME: check for return value */
  231. for(n = sscanu32(buf + 4 + 16) * sscanu32(buf + 4 + 16 + 4); n--; )
  232. {
  233. cv->chars[n] = sscanu32(buf + 4 + control_size + 8 * n);
  234. cv->attrs[n] = sscanu32(buf + 4 + control_size + 8 * n + 4);
  235. }
  236. cv->curattr = sscanu32(buf + 4 + 16 + 12);
  237. return size;
  238. invalid_caca:
  239. #if defined HAVE_ERRNO_H
  240. errno = EINVAL;
  241. #endif
  242. return -1;
  243. }
  244. static long int import_text(cucul_canvas_t *cv,
  245. void const *data, unsigned int size)
  246. {
  247. char const *text = (char const *)data;
  248. unsigned int width = 0, height = 0, x = 0, y = 0, i;
  249. cucul_set_canvas_size(cv, width, height);
  250. cucul_set_color_ansi(cv, CUCUL_DEFAULT, CUCUL_TRANSPARENT);
  251. for(i = 0; i < size; i++)
  252. {
  253. unsigned char ch = *text++;
  254. if(ch == '\r')
  255. continue;
  256. if(ch == '\n')
  257. {
  258. x = 0;
  259. y++;
  260. continue;
  261. }
  262. if(x >= width || y >= height)
  263. {
  264. if(x >= width)
  265. width = x + 1;
  266. if(y >= height)
  267. height = y + 1;
  268. cucul_set_canvas_size(cv, width, height);
  269. }
  270. cucul_putchar(cv, x, y, ch);
  271. x++;
  272. }
  273. if(y > height)
  274. cucul_set_canvas_size(cv, width, height = y);
  275. return size;
  276. }
  277. static long int import_ansi(cucul_canvas_t *cv,
  278. void const *data, unsigned int size, int utf8)
  279. {
  280. struct ansi_grcm grcm;
  281. unsigned char const *buffer = (unsigned char const*)data;
  282. unsigned int i, j, skip, dummy = 0;
  283. unsigned int width = 0, height = 0, wch = 1;
  284. unsigned long int ch;
  285. int x = 0, y = 0, save_x = 0, save_y = 0;
  286. cucul_set_canvas_size(cv, width, height);
  287. ansi_parse_grcm(cv, &grcm, 1, &dummy);
  288. for(i = 0; i < size; i += skip)
  289. {
  290. skip = 1;
  291. /* Wrap long lines */
  292. if((unsigned int)x >= 80)
  293. {
  294. x = 0;
  295. y++;
  296. }
  297. if(buffer[i] == '\x1a' && size - i >= 8
  298. && !memcmp(buffer + i + 1, "SAUCE00", 7))
  299. break; /* End before SAUCE data */
  300. if(buffer[i] == '\r')
  301. continue; /* DOS sucks */
  302. if(buffer[i] == '\n')
  303. {
  304. x = 0;
  305. y++;
  306. continue;
  307. }
  308. /* Interpret escape commands, as per Standard ECMA-48 "Control
  309. * Functions for Coded Character Sets", 5.4. Control sequences. */
  310. if(buffer[i] == '\x1b' && buffer[i + 1] == '[')
  311. {
  312. unsigned int argc = 0, argv[101];
  313. unsigned int param, inter, final;
  314. /* Compute offsets to parameter bytes, intermediate bytes and
  315. * to the final byte. Only the final byte is mandatory, there
  316. * can be zero of the others.
  317. * 0 param=2 inter final final+1
  318. * +-----+------------------+---------------------+-----------------+
  319. * | CSI | parameter bytes | intermediate bytes | final byte |
  320. * | | 0x30 - 0x3f | 0x20 - 0x2f | 0x40 - 0x7e |
  321. * | ^[[ | 0123456789:;<=>? | SPC !"#$%&'()*+,-./ | azAZ@[\]^_`{|}~ |
  322. * +-----+------------------+---------------------+-----------------+
  323. */
  324. param = 2;
  325. for(inter = param; i + inter < size; inter++)
  326. if(buffer[i + inter] < 0x30 || buffer[i + inter] > 0x3f)
  327. break;
  328. for(final = inter; i + final < size; final++)
  329. if(buffer[i + final] < 0x20 || buffer[i + final] > 0x2f)
  330. break;
  331. if(buffer[i + final] < 0x40 || buffer[i + final] > 0x7e)
  332. break; /* Invalid Final Byte */
  333. skip += final;
  334. /* Sanity checks */
  335. if(param < inter && buffer[i + param] >= 0x3c)
  336. {
  337. fprintf(stderr, "private sequence \"^[[%.*s\"\n",
  338. final - param + 1, buffer + i + param);
  339. continue; /* Private sequence, skip it entirely */
  340. }
  341. if(final - param > 100)
  342. continue; /* Suspiciously long sequence, skip it */
  343. /* Parse parameter bytes as per ECMA-48 5.4.2: Parameter string
  344. * format */
  345. if(param < inter)
  346. {
  347. argv[0] = 0;
  348. for(j = param; j < inter; j++)
  349. {
  350. if(buffer[i + j] == ';')
  351. argv[++argc] = 0;
  352. else if(buffer[i + j] >= '0' && buffer[i + j] <= '9')
  353. argv[argc] = 10 * argv[argc] + (buffer[i + j] - '0');
  354. }
  355. argc++;
  356. }
  357. /* Interpret final byte. The code representations are given in
  358. * ECMA-48 5.4: Control sequences, and the code definitions are
  359. * given in ECMA-48 8.3: Definition of control functions. */
  360. switch(buffer[i + final])
  361. {
  362. case 'f': /* CUP - Cursor Position */
  363. case 'H': /* HVP - Character And Line Position */
  364. x = (argc > 1 && argv[1] > 0) ? argv[1] - 1 : 0;
  365. y = (argc > 0 && argv[0] > 0) ? argv[0] - 1 : 0;
  366. break;
  367. case 'A': /* CUU - Cursor Up */
  368. y -= argc ? argv[0] : 1;
  369. if(y < 0)
  370. y = 0;
  371. break;
  372. case 'B': /* CUD - Cursor Down */
  373. y += argc ? argv[0] : 1;
  374. break;
  375. case 'C': /* CUF - Cursor Right */
  376. x += argc ? argv[0] : 1;
  377. break;
  378. case 'D': /* CUB - Cursor Left */
  379. x -= argc ? argv[0] : 1;
  380. if(x < 0)
  381. x = 0;
  382. break;
  383. case 's': /* Private (save cursor position) */
  384. save_x = x;
  385. save_y = y;
  386. break;
  387. case 'u': /* Private (reload cursor position) */
  388. x = save_x;
  389. y = save_y;
  390. break;
  391. case 'J': /* ED - Erase In Page */
  392. if(argv[0] == 2)
  393. x = y = 0;
  394. break;
  395. case 'K': /* EL - Erase In Line */
  396. if(width < 80)
  397. cucul_set_color_ansi(cv, CUCUL_DEFAULT, CUCUL_TRANSPARENT);
  398. cucul_set_canvas_size(cv, width = 80, height);
  399. for(j = x; j < 80; j++)
  400. cucul_putchar(cv, j, y, ' ');
  401. x = 80;
  402. break;
  403. case 'm': /* SGR - Select Graphic Rendition */
  404. ansi_parse_grcm(cv, &grcm, argc, argv);
  405. break;
  406. default:
  407. fprintf(stderr, "unknown command %c\n", buffer[i + final]);
  408. break;
  409. }
  410. continue;
  411. }
  412. /* Get the character we’re going to paste */
  413. if(utf8)
  414. {
  415. unsigned int bytes;
  416. if(i + 6 < size)
  417. ch = cucul_utf8_to_utf32((char const *)(buffer + i), &bytes);
  418. else
  419. {
  420. /* Add a trailing zero to what we're going to read */
  421. char tmp[7];
  422. memcpy(tmp, buffer + i, size - i);
  423. tmp[size - i] = '\0';
  424. ch = cucul_utf8_to_utf32(tmp, &bytes);
  425. }
  426. if(!bytes)
  427. {
  428. /* If the Unicode is invalid, assume it was latin1. */
  429. ch = buffer[i];
  430. bytes = 1;
  431. }
  432. wch = cucul_utf32_is_fullwidth(ch) ? 2 : 1;
  433. skip += bytes - 1;
  434. }
  435. else
  436. {
  437. ch = cucul_cp437_to_utf32(buffer[i]);
  438. }
  439. /* Make sure the canvas is big enough. */
  440. if((unsigned int)x + wch > width)
  441. {
  442. cucul_set_color_ansi(cv, CUCUL_DEFAULT, CUCUL_TRANSPARENT);
  443. cucul_set_canvas_size(cv, width = x + wch, height);
  444. }
  445. if((unsigned int)y >= height)
  446. {
  447. cucul_set_color_ansi(cv, CUCUL_DEFAULT, CUCUL_TRANSPARENT);
  448. cucul_set_canvas_size(cv, width, height = y + 1);
  449. }
  450. /* Now paste our character */
  451. cucul_set_color_ansi(cv, grcm.efg, grcm.ebg);
  452. cucul_putchar(cv, x, y, ch);
  453. x += wch;
  454. }
  455. if((unsigned int)y > height)
  456. {
  457. cucul_set_color_ansi(cv, CUCUL_DEFAULT, CUCUL_TRANSPARENT);
  458. cucul_set_canvas_size(cv, width, height = y);
  459. }
  460. return size;
  461. }
  462. /* XXX : ANSI loader helper */
  463. static void ansi_parse_grcm(cucul_canvas_t *cv, struct ansi_grcm *g,
  464. unsigned int argc, unsigned int const *argv)
  465. {
  466. static uint8_t const ansi2cucul[] =
  467. {
  468. CUCUL_BLACK, CUCUL_RED, CUCUL_GREEN, CUCUL_BROWN,
  469. CUCUL_BLUE, CUCUL_MAGENTA, CUCUL_CYAN, CUCUL_LIGHTGRAY
  470. };
  471. unsigned int j;
  472. for(j = 0; j < argc; j++)
  473. {
  474. /* Defined in ECMA-48 8.3.117: SGR - SELECT GRAPHIC RENDITION */
  475. if(argv[j] >= 30 && argv[j] <= 37)
  476. g->fg = ansi2cucul[argv[j] - 30];
  477. else if(argv[j] >= 40 && argv[j] <= 47)
  478. g->bg = ansi2cucul[argv[j] - 40];
  479. else if(argv[j] >= 90 && argv[j] <= 97)
  480. g->fg = ansi2cucul[argv[j] - 90] + 8;
  481. else if(argv[j] >= 100 && argv[j] <= 107)
  482. g->bg = ansi2cucul[argv[j] - 100] + 8;
  483. else switch(argv[j])
  484. {
  485. case 0: /* default rendition */
  486. g->fg = CUCUL_DEFAULT;
  487. g->bg = CUCUL_TRANSPARENT;
  488. g->bold = g->negative = g->concealed = 0;
  489. break;
  490. case 1: /* bold or increased intensity */
  491. g->bold = 1;
  492. break;
  493. case 4: /* singly underlined */
  494. break;
  495. case 5: /* slowly blinking (less then 150 per minute) */
  496. break;
  497. case 7: /* negative image */
  498. g->negative = 1;
  499. break;
  500. case 8: /* concealed characters */
  501. g->concealed = 1;
  502. break;
  503. case 22: /* normal colour or normal intensity (neither bold nor faint) */
  504. g->bold = 0;
  505. break;
  506. case 28: /* revealed characters */
  507. g->concealed = 0;
  508. break;
  509. case 39: /* default display colour (implementation-defined) */
  510. g->fg = CUCUL_DEFAULT;
  511. break;
  512. case 49: /* default background colour (implementation-defined) */
  513. g->bg = CUCUL_TRANSPARENT;
  514. break;
  515. default:
  516. fprintf(stderr, "unknown sgr %i\n", argv[j]);
  517. break;
  518. }
  519. }
  520. if(g->concealed)
  521. {
  522. g->efg = g->ebg = CUCUL_TRANSPARENT;
  523. }
  524. else
  525. {
  526. g->efg = g->negative ? g->bg : g->fg;
  527. g->ebg = g->negative ? g->fg : g->bg;
  528. if(g->bold)
  529. {
  530. if(g->efg < 8)
  531. g->efg += 8;
  532. else if(g->efg == CUCUL_DEFAULT)
  533. g->efg = CUCUL_WHITE;
  534. }
  535. }
  536. }