Преглед на файлове

Add multiple charsets support to img2twit, and autodetect charset when

decoding.

git-svn-id: file:///srv/caca.zoy.org/var/lib/svn/libpipi/trunk@3526 92316355-f0b4-4df1-b90c-862c8a59935f
master
sam преди 15 години
родител
ревизия
858b677fcf
променени са 1 файла, в които са добавени 82 реда и са изтрити 43 реда
  1. +82
    -43
      examples/img2twit.cpp

+ 82
- 43
examples/img2twit.cpp Целия файл

@@ -26,40 +26,40 @@
#include "../genethumb/mygetopt.h"

/*
* User-definable settings.
* Format-dependent settings. Change this and you risk making all other
* generated strings unusable.
*/

/* The Unicode characters at disposal - XXX: must be _ordered_ */
static const uint32_t unichars[] =
{
/* Printable ASCII (except space) */
//0x0021, 0x007f,

/* Stupid symbols and Dingbats shit */
//0x25a0, 0x2600, /* Geometric Shapes */
//0x2600, 0x269e, 0x26a0, 0x26bd, 0x26c0, 0x26c4, /* Misc. Symbols */
//0x2701, 0x2705, 0x2706, 0x270a, 0x270c, 0x2728, 0x2729, 0x274c,
// 0x274d, 0x274e, 0x274f, 0x2753, 0x2756, 0x2757, 0x2758, 0x275f,
// 0x2761, 0x2795, 0x2798, 0x27b0, 0x27b1, 0x27bf, /* Dingbats */

/* Chinese-looking stuff */
//0x2e80, 0x2e9a, 0x2e9b, 0x2ef4, /* CJK Radicals Supplement */
//0x2f00, 0x2fd6, /* Kangxi Radicals */
//0x3400, 0x4db6, /* CJK Unified Ideographs Extension A */
0x4e00, 0x9fa6, /* CJK Unified Ideographs */

/* Korean - most people don't know the difference anyway */
//0xac00, 0xd7a4, /* Hangul Syllables */

/* More Chinese */
//0xf900, 0xfa2e, 0xfa30, 0xfa6b, 0xfa70, 0xfada, /* CJK Compat. Idgphs. */

/* TODO: there's also the U+20000 and U+2f800 planes, but they're
* not supported by the Twitter Javascript filter (yet?). */

/* End of list marker - XXX: don't remove! */
0x0000, 0x0000
};
/* Printable ASCII (except space) */
#define RANGE_ASCII 0x0021, 0x007f

/* CJK Unified Ideographs */
#define RANGE_CJK 0x4e00, 0x9fa6
//0x2e80, 0x2e9a, 0x2e9b, 0x2ef4, /* CJK Radicals Supplement */
//0x2f00, 0x2fd6, /* Kangxi Radicals */
//0x3400, 0x4db6, /* CJK Unified Ideographs Extension A */
//0xac00, 0xd7a4, /* Hangul Syllables -- Korean, not Chinese */
//0xf900, 0xfa2e, 0xfa30, 0xfa6b, 0xfa70, 0xfada, /* CJK Compat. Idgphs. */
/* TODO: there's also the U+20000 and U+2f800 planes, but they're
* not supported by the Twitter Javascript filter (yet?). */

/* Stupid symbols and Dingbats shit */
#define RANGE_SYMBOLS 0x25a0, 0x2600, /* Geometric Shapes */ \
0x2600, 0x269e, 0x26a0, 0x26bd, 0x26c0, 0x26c4, /* Misc. Symbols */ \
0x2701, 0x2705, 0x2706, 0x270a, 0x270c, 0x2728, 0x2729, 0x274c, \
0x274d, 0x274e, 0x274f, 0x2753, 0x2756, 0x2757, 0x2758, 0x275f, \
0x2761, 0x2795, 0x2798, 0x27b0, 0x27b1, 0x27bf /* Dingbats */

/* End of list marker */
#define RANGE_END 0x0, 0x0

/* Pre-defined character ranges XXX: must be _ordered_ */
static const uint32_t unichars_ascii[] = { RANGE_ASCII, RANGE_END };
static const uint32_t unichars_cjk[] = { RANGE_CJK, RANGE_END };
static const uint32_t unichars_symbols[] = { RANGE_SYMBOLS, RANGE_END };

/* The Unicode characters at disposal */
static const uint32_t *unichars;

/* The maximum image size we want to support */
#define MAX_W 4000
@@ -794,11 +794,12 @@ static void analyse(pipi_image_t *src)

int main(int argc, char *argv[])
{
uint32_t unicode_data[4096]; /* FIXME: allocate this dynamically */
int opstats[2 * NB_OPS];
char const *srcname = NULL, *dstname = NULL;
pipi_image_t *src, *tmp, *dst;
double error = 1.0;
int width, height, ret = 0;
int width, height;

/* Parse command-line options */
for(;;)
@@ -808,12 +809,13 @@ int main(int argc, char *argv[])
{
{ "output", 1, NULL, 'o' },
{ "length", 1, NULL, 'l' },
{ "charset", 1, NULL, 'c' },
{ "quality", 1, NULL, 'q' },
{ "debug", 0, NULL, 'd' },
{ "help", 0, NULL, 'h' },
{ NULL, 0, NULL, 0 },
};
int c = mygetopt(argc, argv, "o:l:q:dh", long_options, &option_index);
int c = mygetopt(argc, argv, "o:l:c:q:dh", long_options, &option_index);

if(c == -1)
break;
@@ -831,6 +833,20 @@ int main(int argc, char *argv[])
MAX_MSG_LEN = 16;
}
break;
case 'c':
if(!strcmp(myoptarg, "ascii"))
unichars = unichars_ascii;
else if(!strcmp(myoptarg, "cjk"))
unichars = unichars_cjk;
else if(!strcmp(myoptarg, "symbols"))
unichars = unichars_symbols;
else
{
fprintf(stderr, "Error: invalid char block \"%s\".", myoptarg);
fprintf(stderr, "Valid sets are: ascii, cjk, symbols\n");
return EXIT_FAILURE;
}
break;
case 'q':
ITERATIONS_PER_POINT = 10 * atof(myoptarg);
if(ITERATIONS_PER_POINT < 0)
@@ -849,6 +865,7 @@ int main(int argc, char *argv[])
printf("Mandatory arguments to long options are mandatory for short options too.\n");
printf(" -o, --output <filename> output resulting image to filename\n");
printf(" -l, --length <size> message length in characters (default 140)\n");
printf(" -c, --charset <block> character set to use (ascii, [cjk], symbols)\n");
printf(" -q, --quality <rate> set image quality (0 - 10) (default 5)\n");
printf(" -d, --debug print debug information\n");
printf(" -h, --help display this help and exit\n");
@@ -879,6 +896,32 @@ int main(int argc, char *argv[])
if(myoptind == argc - 1)
srcname = argv[myoptind];

/* Decoding mode: read UTF-8 text from stdin */
if(dstname)
for(int i = 0; i < MAX_MSG_LEN; i++)
unicode_data[i] = fread_utf8(stdin);

/* Autodetect charset if decoding, otherwise switch to CJK. */
if(!unichars)
{
if(dstname)
{
if(unicode_data[0] >= 0x0021 && unicode_data[0] < 0x007f)
unichars = unichars_ascii;
else if(unicode_data[0] >= 0x4e00 && unicode_data[0] < 0x9fa6)
unichars = unichars_cjk;
else if(unicode_data[0] >= 0x25a0 && unicode_data[0] < 0x27bf)
unichars = unichars_symbols;
else
{
fprintf(stderr, "Error: unable to detect charset\n");
return EXIT_FAILURE;
}
}
else
unichars = unichars_cjk;
}

pipi_set_gamma(1.0);

/* Precompute bit allocation */
@@ -903,14 +946,10 @@ int main(int argc, char *argv[])

if(dstname)
{
/* Decoding mode: read UTF-8 text from stdin, find each
* character's index in our character list, and push it to our
* wonderful custom bitstream. */
uint32_t data[MAX_MSG_LEN];
for(int i = 0; i < MAX_MSG_LEN; i++)
data[i] = uni2index(fread_utf8(stdin));
/* Decoding mode: find each character's index in our character
* list, and push it to our wonderful custom bitstream. */
for(int i = MAX_MSG_LEN; i--; )
b.push(data[i], NUM_CHARACTERS);
b.push(uni2index(unicode_data[i]), NUM_CHARACTERS);

/* Read width and height from bitstream */
src = NULL;
@@ -932,7 +971,7 @@ int main(int argc, char *argv[])
height = pipi_get_image_height(src);
}

/* Compute best w/h ratio */
/* Compute "best" w/h ratio */
dw = 1; dh = TOTAL_CELLS;
for(unsigned int i = 1; i <= TOTAL_CELLS; i++)
{
@@ -1160,6 +1199,6 @@ int main(int argc, char *argv[])
pipi_free(dst);
}

return ret;
return EXIT_SUCCESS;
}


Зареждане…
Отказ
Запис