/* * libcucul Canvas for ultrafast compositing of Unicode letters * Copyright (c) 2002-2006 Sam Hocevar * All Rights Reserved * * $Id$ * * This library is free software; you can redistribute it and/or * modify it under the terms of the Do What The Fuck You Want To * Public License, Version 2, as published by Sam Hocevar. See * http://sam.zoy.org/wtfpl/COPYING for more details. */ /* * This file contains various import functions. */ #include "config.h" #include "common.h" #if !defined __KERNEL__ # if defined HAVE_ERRNO_H # include # endif # include # include # include #endif #include "cucul.h" #include "cucul_internals.h" static inline uint32_t sscanu32(void const *s) { uint32_t x; memcpy(&x, s, 4); return hton32(x); } static inline uint16_t sscanu16(void const *s) { uint16_t x; memcpy(&x, s, 2); return hton16(x); } /* ANSI Graphic Rendition Combination Mode */ struct ansi_grcm { uint8_t fg, bg; /* ANSI-context fg/bg */ uint8_t efg, ebg; /* Effective (libcucul) fg/bg */ uint8_t bold, negative, concealed; }; static long int import_caca(cucul_canvas_t *, void const *, unsigned int); static long int import_text(cucul_canvas_t *, void const *, unsigned int); static long int import_ansi(cucul_canvas_t *, void const *, unsigned int, int); static void ansi_parse_grcm(cucul_canvas_t *, struct ansi_grcm *, unsigned int, unsigned int const *); /** \brief Import a memory buffer into a canvas * * Import a memory buffer into the given libcucul canvas's current * frame. The current frame is resized accordingly and its contents are * replaced with the imported data. * * Valid values for \c format are: * - \c "": attempt to autodetect the file format. * - \c "caca": import native libcaca files. * - \c "text": import ASCII text files. * - \c "ansi": import ANSI files. * - \c "utf8": import UTF-8 files with ANSI colour codes. * * The number of bytes read is returned. If the file format is valid, but * not enough data was available, 0 is returned. * * If an error occurs, -1 is returned and \b errno is set accordingly: * - \c ENOMEM Not enough memory to allocate canvas. * - \c EINVAL Invalid format requested. * * \param cv A libcucul canvas in which to import the file. * \param data A memory area containing the data to be loaded into the canvas. * \param len The size in bytes of the memory area. * \param format A string describing the input format. * \return The number of bytes read, or 0 if there was not enough data, * or -1 if an error occurred. */ long int cucul_import_memory(cucul_canvas_t *cv, void const *data, unsigned long int len, char const *format) { if(!strcasecmp("caca", format)) return import_caca(cv, data, len); if(!strcasecmp("utf8", format)) return import_ansi(cv, data, len, 1); if(!strcasecmp("text", format)) return import_text(cv, data, len); if(!strcasecmp("ansi", format)) return import_ansi(cv, data, len, 0); /* Autodetection */ if(!strcasecmp("", format)) { unsigned char const *str = data; unsigned int i; /* If 4 first bytes are 0xcaca + 'CV' */ if(len >= 4 && str[0] == 0xca && str[1] == 0xca && str[2] == 'C' && str[3] == 'V') return import_caca(cv, data, len); /* If we find ESC[ argv, we guess it's an ANSI file */ for(i = 0; i + 1 < len; i++) if((str[i] == 0x1b) && (str[i + 1] == '[')) return import_ansi(cv, data, len, 0); /* Otherwise, import it as text */ return import_text(cv, data, len); } #if defined HAVE_ERRNO_H errno = EINVAL; #endif return -1; } /** \brief Import a file into a canvas * * Import a file into the given libcucul canvas's current frame. The * current frame is resized accordingly and its contents are replaced * with the imported data. * * Valid values for \c format are: * - \c "": attempt to autodetect the file format. * - \c "caca": import native libcaca files. * - \c "text": import ASCII text files. * - \c "ansi": import ANSI files. * - \c "utf8": import UTF-8 files with ANSI colour codes. * * The number of bytes read is returned. If the file format is valid, but * not enough data was available, 0 is returned. * * If an error occurs, -1 is returned and \b errno is set accordingly: * - \c ENOSYS File access is not implemented on this system. * - \c ENOMEM Not enough memory to allocate canvas. * - \c EINVAL Invalid format requested. * cucul_import_file() may also fail and set \b errno for any of the * errors specified for the routine fopen(). * * \param cv A libcucul canvas in which to import the file. * \param filename The name of the file to load. * \param format A string describing the input format. * \return The number of bytes read, or 0 if there was not enough data, * or -1 if an error occurred. */ long int cucul_import_file(cucul_canvas_t *cv, char const *filename, char const *format) { #if defined __KERNEL__ # if defined HAVE_ERRNO_H errno = ENOSYS; # endif return -1; #else FILE *fp; void *data; long int size; int ret; fp = fopen(filename, "rb"); if(!fp) return -1; /* fopen already set errno */ fseek(fp, 0, SEEK_END); size = ftell(fp); data = malloc(size); if(!data) { fclose(fp); # if defined HAVE_ERRNO_H errno = ENOMEM; # endif return -1; } fseek(fp, 0, SEEK_SET); fread(data, size, 1, fp); fclose(fp); ret = cucul_import_memory(cv, data, size, format); free(data); return ret; #endif } /** \brief Get available import formats * * Return a list of available import formats. The list is a NULL-terminated * array of strings, interleaving a string containing the internal value for * the import format, to be used with cucul_import_canvas(), and a string * containing the natural language description for that import format. * * This function never fails. * * \return An array of strings. */ char const * const * cucul_get_import_list(void) { static char const * const list[] = { "", "autodetect", "caca", "native libcaca format", "text", "plain text", "ansi", "ANSI coloured text", "utf8", "UTF-8 files with ANSI colour codes", NULL, NULL }; return list; } /* * XXX: the following functions are local. */ static long int import_caca(cucul_canvas_t *cv, void const *data, unsigned int size) { uint8_t const *buf = (uint8_t const *)data; unsigned int control_size, data_size, expected_size, frames, f, n; uint16_t version, flags; if(size < 20) return 0; if(buf[0] != 0xca || buf[1] != 0xca || buf[2] != 'C' || buf[3] != 'V') goto invalid_caca; control_size = sscanu32(buf + 4); data_size = sscanu32(buf + 8); version = sscanu16(buf + 12); frames = sscanu32(buf + 14); flags = sscanu16(buf + 18); if(size < 4 + control_size + data_size) return 0; if(control_size < 16 + frames * 32) goto invalid_caca; for(expected_size = 0, f = 0; f < frames; f++) { unsigned int width, height, duration; uint32_t attr; int x, y, handlex, handley; width = sscanu32(buf + 4 + 16 + f * 24); height = sscanu32(buf + 4 + 16 + f * 24 + 4); duration = sscanu32(buf + 4 + 16 + f * 24 + 8); attr = sscanu32(buf + 4 + 16 + f * 24 + 12); x = (int32_t)sscanu32(buf + 4 + 16 + f * 24 + 16); y = (int32_t)sscanu32(buf + 4 + 16 + f * 24 + 20); handlex = (int32_t)sscanu32(buf + 4 + 16 + f * 24 + 24); handley = (int32_t)sscanu32(buf + 4 + 16 + f * 24 + 28); expected_size += width * height * 8; } if(expected_size != data_size) goto invalid_caca; /* FIXME: read all frames, not only the first one */ cucul_set_canvas_size(cv, 0, 0); cucul_set_canvas_size(cv, sscanu32(buf + 4 + 16), sscanu32(buf + 4 + 16 + 4)); /* FIXME: check for return value */ for(n = sscanu32(buf + 4 + 16) * sscanu32(buf + 4 + 16 + 4); n--; ) { cv->chars[n] = sscanu32(buf + 4 + control_size + 8 * n); cv->attrs[n] = sscanu32(buf + 4 + control_size + 8 * n + 4); } cv->curattr = sscanu32(buf + 4 + 16 + 12); cv->frames[0].x = (int32_t)sscanu32(buf + 4 + 16 + 0 * 24 + 16); cv->frames[0].y = (int32_t)sscanu32(buf + 4 + 16 + 0 * 24 + 20); cv->frames[0].handlex = (int32_t)sscanu32(buf + 4 + 16 + 0 * 24 + 24); cv->frames[0].handley = (int32_t)sscanu32(buf + 4 + 16 + 0 * 24 + 28); return 4 + control_size + data_size; invalid_caca: #if defined HAVE_ERRNO_H errno = EINVAL; #endif return -1; } static long int import_text(cucul_canvas_t *cv, void const *data, unsigned int size) { char const *text = (char const *)data; unsigned int width = 0, height = 0, x = 0, y = 0, i; cucul_set_canvas_size(cv, width, height); for(i = 0; i < size; i++) { unsigned char ch = *text++; if(ch == '\r') continue; if(ch == '\n') { x = 0; y++; continue; } if(x >= width || y >= height) { if(x >= width) width = x + 1; if(y >= height) height = y + 1; cucul_set_canvas_size(cv, width, height); } cucul_put_char(cv, x, y, ch); x++; } if(y > height) cucul_set_canvas_size(cv, width, height = y); return size; } static long int import_ansi(cucul_canvas_t *cv, void const *data, unsigned int size, int utf8) { struct ansi_grcm grcm; unsigned char const *buffer = (unsigned char const*)data; unsigned int i, j, skip, dummy = 0; unsigned int width = 0, height = 0, wch = 1; unsigned long int ch; int x = 0, y = 0, save_x = 0, save_y = 0; cucul_set_canvas_size(cv, width, height); ansi_parse_grcm(cv, &grcm, 1, &dummy); for(i = 0; i < size; i += skip) { skip = 1; /* Wrap long lines */ if((unsigned int)x >= 80) { x = 0; y++; } if(buffer[i] == '\x1a' && size - i >= 8 && !memcmp(buffer + i + 1, "SAUCE00", 7)) break; /* End before SAUCE data */ if(buffer[i] == '\r') continue; /* DOS sucks */ if(buffer[i] == '\n') { x = 0; y++; continue; } /* Interpret escape commands, as per Standard ECMA-48 "Control * Functions for Coded Character Sets", 5.4. Control sequences. */ if(buffer[i] == '\x1b' && buffer[i + 1] == '[') { unsigned int argc = 0, argv[101]; unsigned int param, inter, final; /* Compute offsets to parameter bytes, intermediate bytes and * to the final byte. Only the final byte is mandatory, there * can be zero of the others. * 0 param=2 inter final final+1 * +-----+------------------+---------------------+-----------------+ * | CSI | parameter bytes | intermediate bytes | final byte | * | | 0x30 - 0x3f | 0x20 - 0x2f | 0x40 - 0x7e | * | ^[[ | 0123456789:;<=>? | SPC !"#$%&'()*+,-./ | azAZ@[\]^_`{|}~ | * +-----+------------------+---------------------+-----------------+ */ param = 2; for(inter = param; i + inter < size; inter++) if(buffer[i + inter] < 0x30 || buffer[i + inter] > 0x3f) break; for(final = inter; i + final < size; final++) if(buffer[i + final] < 0x20 || buffer[i + final] > 0x2f) break; if(buffer[i + final] < 0x40 || buffer[i + final] > 0x7e) break; /* Invalid Final Byte */ skip += final; /* Sanity checks */ if(param < inter && buffer[i + param] >= 0x3c) { fprintf(stderr, "private sequence \"^[[%.*s\"\n", final - param + 1, buffer + i + param); continue; /* Private sequence, skip it entirely */ } if(final - param > 100) continue; /* Suspiciously long sequence, skip it */ /* Parse parameter bytes as per ECMA-48 5.4.2: Parameter string * format */ if(param < inter) { argv[0] = 0; for(j = param; j < inter; j++) { if(buffer[i + j] == ';') argv[++argc] = 0; else if(buffer[i + j] >= '0' && buffer[i + j] <= '9') argv[argc] = 10 * argv[argc] + (buffer[i + j] - '0'); } argc++; } /* Interpret final byte. The code representations are given in * ECMA-48 5.4: Control sequences, and the code definitions are * given in ECMA-48 8.3: Definition of control functions. */ switch(buffer[i + final]) { case 'f': /* CUP - Cursor Position */ case 'H': /* HVP - Character And Line Position */ x = (argc > 1 && argv[1] > 0) ? argv[1] - 1 : 0; y = (argc > 0 && argv[0] > 0) ? argv[0] - 1 : 0; break; case 'A': /* CUU - Cursor Up */ y -= argc ? argv[0] : 1; if(y < 0) y = 0; break; case 'B': /* CUD - Cursor Down */ y += argc ? argv[0] : 1; break; case 'C': /* CUF - Cursor Right */ x += argc ? argv[0] : 1; break; case 'D': /* CUB - Cursor Left */ x -= argc ? argv[0] : 1; if(x < 0) x = 0; break; case 's': /* Private (save cursor position) */ save_x = x; save_y = y; break; case 'u': /* Private (reload cursor position) */ x = save_x; y = save_y; break; case 'J': /* ED - Erase In Page */ if(argv[0] == 2) x = y = 0; break; case 'K': /* EL - Erase In Line */ if(width < 80) cucul_set_color_ansi(cv, CUCUL_DEFAULT, CUCUL_TRANSPARENT); cucul_set_canvas_size(cv, width = 80, height); for(j = x; j < 80; j++) cucul_put_char(cv, j, y, ' '); x = 80; break; case 'm': /* SGR - Select Graphic Rendition */ ansi_parse_grcm(cv, &grcm, argc, argv); break; default: fprintf(stderr, "unknown command %c\n", buffer[i + final]); break; } continue; } /* Get the character we’re going to paste */ if(utf8) { unsigned int bytes; if(i + 6 < size) ch = cucul_utf8_to_utf32((char const *)(buffer + i), &bytes); else { /* Add a trailing zero to what we're going to read */ char tmp[7]; memcpy(tmp, buffer + i, size - i); tmp[size - i] = '\0'; ch = cucul_utf8_to_utf32(tmp, &bytes); } if(!bytes) { /* If the Unicode is invalid, assume it was latin1. */ ch = buffer[i]; bytes = 1; } wch = cucul_utf32_is_fullwidth(ch) ? 2 : 1; skip += bytes - 1; } else { ch = cucul_cp437_to_utf32(buffer[i]); } /* Make sure the canvas is big enough. */ if((unsigned int)x + wch > width) { cucul_set_color_ansi(cv, CUCUL_DEFAULT, CUCUL_TRANSPARENT); cucul_set_canvas_size(cv, width = x + wch, height); } if((unsigned int)y >= height) { cucul_set_color_ansi(cv, CUCUL_DEFAULT, CUCUL_TRANSPARENT); cucul_set_canvas_size(cv, width, height = y + 1); } /* Now paste our character */ cucul_set_color_ansi(cv, grcm.efg, grcm.ebg); cucul_put_char(cv, x, y, ch); x += wch; } if((unsigned int)y > height) { cucul_set_color_ansi(cv, CUCUL_DEFAULT, CUCUL_TRANSPARENT); cucul_set_canvas_size(cv, width, height = y); } return size; } /* XXX : ANSI loader helper */ static void ansi_parse_grcm(cucul_canvas_t *cv, struct ansi_grcm *g, unsigned int argc, unsigned int const *argv) { static uint8_t const ansi2cucul[] = { CUCUL_BLACK, CUCUL_RED, CUCUL_GREEN, CUCUL_BROWN, CUCUL_BLUE, CUCUL_MAGENTA, CUCUL_CYAN, CUCUL_LIGHTGRAY }; unsigned int j; for(j = 0; j < argc; j++) { /* Defined in ECMA-48 8.3.117: SGR - SELECT GRAPHIC RENDITION */ if(argv[j] >= 30 && argv[j] <= 37) g->fg = ansi2cucul[argv[j] - 30]; else if(argv[j] >= 40 && argv[j] <= 47) g->bg = ansi2cucul[argv[j] - 40]; else if(argv[j] >= 90 && argv[j] <= 97) g->fg = ansi2cucul[argv[j] - 90] + 8; else if(argv[j] >= 100 && argv[j] <= 107) g->bg = ansi2cucul[argv[j] - 100] + 8; else switch(argv[j]) { case 0: /* default rendition */ g->fg = CUCUL_DEFAULT; g->bg = CUCUL_TRANSPARENT; g->bold = g->negative = g->concealed = 0; break; case 1: /* bold or increased intensity */ g->bold = 1; break; case 4: /* singly underlined */ break; case 5: /* slowly blinking (less then 150 per minute) */ break; case 7: /* negative image */ g->negative = 1; break; case 8: /* concealed characters */ g->concealed = 1; break; case 22: /* normal colour or normal intensity (neither bold nor faint) */ g->bold = 0; break; case 28: /* revealed characters */ g->concealed = 0; break; case 39: /* default display colour (implementation-defined) */ g->fg = CUCUL_DEFAULT; break; case 49: /* default background colour (implementation-defined) */ g->bg = CUCUL_TRANSPARENT; break; default: fprintf(stderr, "unknown sgr %i\n", argv[j]); break; } } if(g->concealed) { g->efg = g->ebg = CUCUL_TRANSPARENT; } else { g->efg = g->negative ? g->bg : g->fg; g->ebg = g->negative ? g->fg : g->bg; if(g->bold) { if(g->efg < 8) g->efg += 8; else if(g->efg == CUCUL_DEFAULT) g->efg = CUCUL_WHITE; } } }