/*
 *  img2twit      Image to short text message encoder/decoder
 *  Copyright (c) 2009 Sam Hocevar <sam@hocevar.net>
 *                All Rights Reserved
 *
 *  This program is free software. It comes without any warranty, to
 *  the extent permitted by applicable law. You can redistribute it
 *  and/or modify it under the terms of the Do What The Fuck You Want
 *  To Public License, Version 2, as published by Sam Hocevar. See
 *  http://sam.zoy.org/wtfpl/COPYING for more details.
 */

/* TODO:
 * - remove the complicated stuff from get_point/set_point, it's only
 *   the final packing that really matters.
 */

#include "config.h"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>

#include <CGAL/Exact_predicates_inexact_constructions_kernel.h>
#include <CGAL/Delaunay_triangulation_2.h>
#include <CGAL/natural_neighbor_coordinates_2.h>

#include <pipi.h>

#include "../genethumb/mygetopt.h"

/*
 * Format-dependent settings. Change this and you risk making all other
 * generated strings unusable.
 */

/* Printable ASCII (except space) */
#define RANGE_ASCII 0x0021, 0x007f

/* CJK Unified Ideographs */
#define RANGE_CJK 0x4e00, 0x9fa6
//0x2e80, 0x2e9a, 0x2e9b, 0x2ef4, /* CJK Radicals Supplement */
//0x2f00, 0x2fd6, /* Kangxi Radicals */
//0x3400, 0x4db6, /* CJK Unified Ideographs Extension A */
//0xac00, 0xd7a4, /* Hangul Syllables -- Korean, not Chinese */
//0xf900, 0xfa2e, 0xfa30, 0xfa6b, 0xfa70, 0xfada, /* CJK Compat. Idgphs. */
/* TODO: there's also the U+20000 and U+2f800 planes, but they're
 * not supported by the Twitter Javascript filter (yet?). */

/* Stupid symbols and Dingbats shit */
#define RANGE_SYMBOLS 0x25a0, 0x2600, /* Geometric Shapes */ \
  0x2600, 0x269e, 0x26a0, 0x26bd, 0x26c0, 0x26c4, /* Misc. Symbols */ \
  0x2701, 0x2705, 0x2706, 0x270a, 0x270c, 0x2728, 0x2729, 0x274c, \
    0x274d, 0x274e, 0x274f, 0x2753, 0x2756, 0x2757, 0x2758, 0x275f, \
    0x2761, 0x2795, 0x2798, 0x27b0, 0x27b1, 0x27bf /* Dingbats */

/* End of list marker */
#define RANGE_END 0x0, 0x0

/* Pre-defined character ranges XXX: must be _ordered_ */
static const uint32_t unichars_ascii[] = { RANGE_ASCII, RANGE_END };
static const uint32_t unichars_cjk[] = { RANGE_CJK, RANGE_END };
static const uint32_t unichars_symbols[] = { RANGE_SYMBOLS, RANGE_END };

/* The Unicode characters at disposal */
static const uint32_t *unichars;

/* The maximum image size we want to support, and the version range */
#define RANGE_W 2000
#define RANGE_H 2000
#define RANGE_V 10

/* Start with a random image (1), or with a good estimate (0)? */
#define RANDOM_START 0

/*
 * These values can be overwritten at runtime
 */

/* Debug mode */
static bool DEBUG_MODE = false;

/* The maximum message length */
static int MAX_MSG_LEN = 140;

/* Iterations per point -- larger means slower but nicer */
static int ITERATIONS_PER_POINT = 50;

/* Points per cell -- 1 allows to put more cells, but 2 gives better results */
static int POINTS_PER_CELL = 2;

/* The range value for point parameters: X Y, red/green/blue, "strength"
 * Tested values (on Mona Lisa) are:
 *  16 16 5 5 5 2 -> 0.06511725914
 *  16 16 6 7 6 1 -> 0.05731491348 *
 *  16 16 7 6 6 1 -> 0.06450513783
 *  14 14 7 7 6 1 -> 0.0637207893
 *  19 19 6 6 5 1 -> 0.06801999094 */
static unsigned int RANGE_X = 16;
static unsigned int RANGE_Y = 16;
static unsigned int RANGE_R = 6;
static unsigned int RANGE_G = 6;
static unsigned int RANGE_B = 6;
static unsigned int RANGE_S = 1;

/*
 * These values are computed at runtime
 */

static float TOTAL_BITS;
static float HEADER_BITS;
static float DATA_BITS;
static float CELL_BITS;

static int NUM_CHARACTERS;
static int MAX_ITERATIONS;
static unsigned int TOTAL_CELLS;

#define RANGE_XY2 (RANGE_Y*RANGE_X*(RANGE_Y*RANGE_X+1)/2)
#define RANGE_SBGR (RANGE_R*RANGE_G*RANGE_B*RANGE_S)
#define RANGE_SBGRXY (RANGE_Y*RANGE_X*RANGE_R*RANGE_G*RANGE_B*RANGE_S)

struct K : CGAL::Exact_predicates_inexact_constructions_kernel {};
typedef CGAL::Delaunay_triangulation_2<K> Delaunay_triangulation;
typedef std::vector<std::pair<K::Point_2, K::FT> > Point_coordinate_vector;

/* Global aspect ratio */
static unsigned int dw, dh;

/* Algorithm version */
static unsigned int version;

/* Global point encoding */
typedef struct point
{
    uint8_t x, y, r, g, b, s;
}
point_t;
static point_t points[4096]; /* FIXME: allocate this dynamically */
static int npoints = 0;

/* Global triangulation */
static Delaunay_triangulation dt;

/*
 * Bit allocation handling
 */

void compute_ranges(int width, int height)
{
    TOTAL_BITS = MAX_MSG_LEN * logf(NUM_CHARACTERS) / logf(2);
    HEADER_BITS = logf(RANGE_W * RANGE_H * RANGE_V) / logf(2);
    DATA_BITS = TOTAL_BITS - HEADER_BITS;
    if(version == 0)
    {
        POINTS_PER_CELL = 1;
        CELL_BITS = logf(RANGE_SBGRXY) / logf(2);
    }
    else if(version == 1)
    {
        POINTS_PER_CELL = 2;
        CELL_BITS = (2 * logf(RANGE_SBGR) + logf(RANGE_XY2)) / logf(2);
    }
    TOTAL_CELLS = (int)(DATA_BITS / CELL_BITS);
    MAX_ITERATIONS = ITERATIONS_PER_POINT * POINTS_PER_CELL * TOTAL_CELLS;

    /* Compute "best" w/h ratio */
    dw = 1; dh = TOTAL_CELLS;
    for(unsigned int i = 1; i <= TOTAL_CELLS; i++)
    {
        int j = TOTAL_CELLS / i;

        float r = (float)width / (float)height;
        float ir = (float)i / (float)j;
        float dwr = (float)dw / (float)dh;

        if(fabs(logf(r / ir)) < fabs(logf(r / dwr)))
        {
            dw = i;
            dh = TOTAL_CELLS / dw;
        }
    }
    while((dh + 1) * dw <= TOTAL_CELLS) dh++;
    while(dh * (dw + 1) <= TOTAL_CELLS) dw++;
}

/*
 * Unicode stuff handling
 */

/* Return the number of chars in the unichars table */
static int count_unichars(void)
{
    int ret = 0;

    for(int u = 0; unichars[u] != unichars[u + 1]; u += 2)
        ret += unichars[u + 1] - unichars[u];

    return ret;
}

/* Get the ith Unicode character in our list */
static uint32_t index2uni(uint32_t i)
{
    for(int u = 0; unichars[u] != unichars[u + 1]; u += 2)
        if(i < unichars[u + 1] - unichars[u])
            return unichars[u] + i;
        else
            i -= unichars[u + 1] - unichars[u];

    return 0; /* Should not happen! */
}

/* Convert a Unicode character to its position in the compact list */
static uint32_t uni2index(uint32_t x)
{
    uint32_t ret = 0;

    for(int u = 0; unichars[u] != unichars[u + 1]; u += 2)
        if(x < unichars[u + 1])
            return ret + x - unichars[u];
        else
            ret += unichars[u + 1] - unichars[u];

    return ret; /* Should not happen! */
}

static uint8_t const utf8_trailing[256] =
{
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
};

static uint32_t const utf8_offsets[6] =
{
    0x00000000UL, 0x00003080UL, 0x000E2080UL,
    0x03C82080UL, 0xFA082080UL, 0x82082080UL
};

static uint32_t fread_utf8(FILE *f)
{
    int ch, i = 0, todo = -1;
    uint32_t ret = 0;

    for(;;)
    {
        ch = fgetc(f);
        if(!ch)
            return 0;
        if(todo == -1)
            todo = utf8_trailing[ch];
        ret += ((uint32_t)ch) << (6 * (todo - i));
        if(todo == i++)
            return ret - utf8_offsets[todo];
    }
}

static void fwrite_utf8(FILE *f, uint32_t x)
{
    static const uint8_t mark[7] =
    {
        0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC
    };

    char buf[8];
    char *parser = buf;
    size_t bytes;

    if(x < 0x80)
    {
        fprintf(f, "%c", x);
        return;
    }

    bytes = (x < 0x800) ? 2 : (x < 0x10000) ? 3 : 4;
    parser += bytes;
    *parser = '\0';

    switch(bytes)
    {
        case 4: *--parser = (x | 0x80) & 0xbf; x >>= 6;
        case 3: *--parser = (x | 0x80) & 0xbf; x >>= 6;
        case 2: *--parser = (x | 0x80) & 0xbf; x >>= 6;
    }
    *--parser = x | mark[bytes];

    fprintf(f, "%s", buf);
}

/*
 * Our nifty non-power-of-two bitstack handling
 */

class bitstack
{
public:
    bitstack(int max) { alloc(max); init(0); }

    ~bitstack() { delete[] digits; delete[] str; }

    char const *tostring()
    {
        int pos = sprintf(str, "0x%x", digits[msb]);

        for(int i = msb - 1; i >= 0; i--)
            pos += sprintf(str + pos, "%08x", digits[i]);

        return str;
    }

    void push(uint32_t val, uint32_t range)
    {
        if(!range)
            return;

        mul(range);
        add(val % range);
    }

    uint32_t pop(uint32_t range)
    {
        if(!range)
            return 0;

        return div(range);
    }

    bool isempty()
    {
        for(int i = msb; i >= 0; i--)
            if(digits[i])
                return false;

        return true;
    }

private:
    bitstack(int max, uint32_t x) { alloc(max); init(x); }

    bitstack(bitstack &b)
    {
        alloc(b.max_size);
        msb = b.msb;
        memcpy(digits, b.digits, (max_size + 1) * sizeof(uint32_t));
    }

    bitstack(bitstack const &b)
    {
        alloc(b.max_size);
        msb = b.msb;
        memcpy(digits, b.digits, (max_size + 1) * sizeof(uint32_t));
    }

    void alloc(int max)
    {
        max_size = max;
        digits = new uint32_t[max_size + 1];
        str = new char[(max_size + 1) * 8 + 1];
    }

    void init(uint32_t i)
    {
        msb = 0;
        memset(digits, 0, (max_size + 1) * sizeof(uint32_t));
        digits[0] = i;
    }

    /* Could be done much faster, but we don't care! */
    void add(uint32_t x) { add(bitstack(max_size, x)); }
    void sub(uint32_t x) { sub(bitstack(max_size, x)); }

    void add(bitstack const &_b)
    {
        /* Copy the operand in case we get added to ourselves */
        bitstack b(_b);
        uint64_t x = 0;

        if(msb < b.msb)
            msb = b.msb;

        for(int i = 0; i <= msb; i++)
        {
            uint64_t tmp = (uint64_t)digits[i] + (uint64_t)b.digits[i] + x;
            digits[i] = tmp;
            if((uint64_t)digits[i] == tmp)
                x = 0;
            else
            {
                x = 1;
                if(i == msb)
                    msb++;
            }
        }
    }

    void sub(bitstack const &_b)
    {
        /* Copy the operand in case we get substracted from ourselves */
        bitstack b(_b);
        uint64_t x = 0;

        /* We cannot substract a larger number! */
        if(msb < b.msb)
        {
            init(0);
            return;
        }

        for(int i = 0; i <= msb; i++)
        {
            uint64_t tmp = (uint64_t)digits[i] - (uint64_t)b.digits[i] - x;
            digits[i] = tmp;
            if((uint64_t)digits[i] == tmp)
                x = 0;
            else
            {
                x = 1;
                if(i == msb)
                {
                    /* Error: carry into MSB! */
                    init(0);
                    return;
                }
            }
        }

        while(msb > 0 && digits[msb] == 0) msb--;
    }

    void mul(uint32_t x)
    {
        bitstack b(*this);
        init(0);

        while(x)
        {
            if(x & 1)
                add(b);
            x /= 2;
            b.add(b);
        }
    }

    uint32_t div(uint32_t x)
    {
        bitstack b(*this);

        for(int i = msb; i >= 0; i--)
        {
            uint64_t tmp = b.digits[i] + (((uint64_t)b.digits[i + 1]) << 32);
            uint32_t res = tmp / x;
            uint32_t rem = tmp % x;
            digits[i]= res;
            b.digits[i + 1] = 0;
            b.digits[i] = rem;
        }

        while(msb > 0 && digits[msb] == 0) msb--;

        return b.digits[0];
    }

    int msb, max_size;
    uint32_t *digits;
    char *str;
};

/*
 * Point handling
 */

static unsigned int det_rand(unsigned int mod)
{
    static unsigned long next = 1;
    next = next * 1103515245 + 12345;
    return ((unsigned)(next / 65536) % 32768) % mod;
}

static inline int range2int(float val, int range)
{
    int ret = (int)(val * ((float)range - 0.0001));
    return ret < 0 ? 0 : ret > range - 1 ? range - 1 : ret;
}

static inline float int2midrange(int val, int range)
{
    return (float)(1 + 2 * val) / (float)(2 * range);
}

static inline float int2fullrange(int val, int range)
{
    return range > 1 ? (float)val / (float)(range - 1) : 0.0;
}

static inline void index2cell(int index, int *dx, int *dy)
{
    *dx = (index / POINTS_PER_CELL) % dw;
    *dy = (index / POINTS_PER_CELL) / dw;
}

static inline void set_point(int index, float x, float y, float r,
                             float g, float b, float s)
{
    int dx, dy;

    index2cell(index, &dx, &dy);

    float fx = (x - dx * RANGE_X) / RANGE_X;
    float fy = (y - dy * RANGE_Y) / RANGE_Y;

    points[index].x = range2int(fx, RANGE_X);
    points[index].y = range2int(fy, RANGE_Y);

    points[index].r = range2int(r, RANGE_R);
    points[index].g = range2int(g, RANGE_G);
    points[index].b = range2int(b, RANGE_B);

    points[index].s = range2int(s, RANGE_S);
}

static inline void get_point(int index, float *x, float *y, float *r,
                             float *g, float *b, float *s)
{
    int dx, dy;

    index2cell(index, &dx, &dy);

    float fx = int2midrange(points[index].x, RANGE_X);
    float fy = int2midrange(points[index].y, RANGE_Y);

    *y = (fy + dy) * RANGE_Y /*+ 0.5 * (index & 1)*/;
    *x = (fx + dx) * RANGE_X /*+ 0.5 * (index & 1)*/;

    *r = int2fullrange(points[index].r, RANGE_R);
    *g = int2fullrange(points[index].g, RANGE_G);
    *b = int2fullrange(points[index].b, RANGE_B);

    *s = int2fullrange(points[index].s, RANGE_S);
}

static void add_point(float x, float y, float r, float g, float b, float s)
{
    set_point(npoints, x, y, r, g, b, s);
    npoints++;
}

static uint32_t pack_coords(int x1, int y1, int x2, int y2, bool *swap)
{
    int k1 = y1 * RANGE_X + x1;
    int k2 = y2 * RANGE_X + x2;

    /* XXX: this should not happen */
    if(k1 == k2)
        k1 += (x1 > 0 ? -1 : 1);

    *swap = k1 > k2;

    if(*swap)
    {
        int tmp = k1; k1 = k2; k2 = tmp;
    }

    return k2 * (k2 + 1) / 2 + k1;
}

static void unpack_coords(uint32_t pack, int *x1, int *y1, int *x2, int *y2)
{
    int k2 = ((int)sqrt(1.0 + 8 * pack) - 1) / 2;
    int k1 = pack - k2 * (k2 + 1) / 2;

    *x1 = k1 % RANGE_X;
    *y1 = k1 / RANGE_X;
    *x2 = k2 % RANGE_X;
    *y2 = k2 / RANGE_X;
}

#if RANDOM_START == 1
static void add_random_point()
{
    points[npoints].x = det_rand(RANGE_X);
    points[npoints].y = det_rand(RANGE_Y);
    points[npoints].r = det_rand(RANGE_R);
    points[npoints].g = det_rand(RANGE_G);
    points[npoints].b = det_rand(RANGE_B);
    points[npoints].s = det_rand(RANGE_S);
    npoints++;
}
#endif

#define NB_OPS 20

static uint8_t rand_op(void)
{
    uint8_t x = det_rand(NB_OPS);

    /* Randomly ignore statistically less efficient ops */
    if(x == 0)
        return rand_op();
    if(x == 1 && (RANGE_S == 1 || det_rand(2)))
        return rand_op();
    if(x <= 5 && det_rand(2))
        return rand_op();
    //if((x < 10 || x > 15) && !det_rand(4)) /* Favour colour changes */
    //    return rand_op();

    return x;
}

static void apply_op(uint8_t op, point_t *val)
{
    switch(op)
    {
    case 0: /* Flip strength value */
    case 1:
        /* Statistics show that this helps often, but does not reduce
         * the error significantly. */
        val->s ^= 1; break;
    case 2: /* Move up; if impossible, down */
        val->y = val->y > 0 ? val->y - 1 : val->y + 1; break;
    case 3: /* Move down; if impossible, up */
        val->y = val->y + 1U < RANGE_Y ? val->y + 1 : val->y - 1; break;
    case 4: /* Move left; if impossible, right */
        val->x = val->x > 0 ? val->x - 1 : val->x + 1; break;
    case 5: /* Move right; if impossible, left */
        val->x = val->x + 1U < RANGE_X ? val->x + 1 : val->x - 1; break;
    case 6: /* Corner 1 */
        val->y = val->y > 0 ? val->y - 1 : val->y + 1;
        val->x = val->x > 0 ? val->x - 1 : val->x + 1; break;
    case 7: /* Corner 2 */
        val->y = val->y > 0 ? val->y - 1 : val->y + 1;
        val->x = val->x + 1U < RANGE_X ? val->x + 1 : val->x - 1; break;
    case 8: /* Corner 3 */
        val->y = val->y + 1U < RANGE_Y ? val->y + 1 : val->y - 1;
        val->x = val->x + 1U < RANGE_X ? val->x + 1 : val->x - 1; break;
    case 9: /* Corner 4 */
        val->y = val->y + 1U < RANGE_Y ? val->y + 1 : val->y - 1;
        val->x = val->x > 0 ? val->x - 1 : val->x + 1; break;
    case 16: /* Double up */
        val->y = val->y > 1 ? val->y - 2 : val->y + 2; break;
    case 17: /* Double down */
        val->y = val->y + 2U < RANGE_Y ? val->y + 2 : val->y - 2; break;
    case 18: /* Double left */
        val->x = val->x > 1 ? val->x - 2 : val->x + 2; break;
    case 19: /* Double right */
        val->x = val->x + 2U < RANGE_X ? val->x + 2 : val->x - 2; break;
    case 10: /* R-- (or R++) */
        val->r = val->r > 0 ? val->r - 1 : val->r + 1; break;
    case 11: /* R++ (or R--) */
        val->r = val->r + 1U < RANGE_R ? val->r + 1 : val->r - 1; break;
    case 12: /* G-- (or G++) */
        val->g = val->g > 0 ? val->g - 1 : val->g + 1; break;
    case 13: /* G++ (or G--) */
        val->g = val->g + 1U < RANGE_G ? val->g + 1 : val->g - 1; break;
    case 14: /* B-- (or B++) */
        val->b = val->b > 0 ? val->g - 1 : val->b + 1; break;
    case 15: /* B++ (or B--) */
        val->b = val->b + 1U < RANGE_B ? val->b + 1 : val->b - 1; break;
#if 0
    case 15: /* Brightness-- */
        apply_op(9, val); apply_op(11, val); apply_op(13, val); break;
    case 16: /* Brightness++ */
        apply_op(10, val); apply_op(12, val); apply_op(14, val); break;
    case 17: /* RG-- */
        apply_op(9, val); apply_op(11, val); break;
    case 18: /* RG++ */
        apply_op(10, val); apply_op(12, val); break;
    case 19: /* GB-- */
        apply_op(11, val); apply_op(13, val); break;
    case 20: /* GB++ */
        apply_op(12, val); apply_op(14, val); break;
    case 21: /* RB-- */
        apply_op(9, val); apply_op(13, val); break;
    case 22: /* RB++ */
        apply_op(10, val); apply_op(14, val); break;
#endif
    default:
        break;
    }
}

static void render(pipi_image_t *dst, int rx, int ry, int rw, int rh)
{
    int lookup[dw * RANGE_X * 2 * dh * RANGE_Y * 2];
    pipi_pixels_t *p = pipi_get_pixels(dst, PIPI_PIXELS_RGBA_F32);
    float *data = (float *)p->pixels;
    int x, y;

    memset(lookup, 0, sizeof(lookup));
    dt.clear();
    for(int i = 0; i < npoints; i++)
    {
        float fx, fy, fr, fg, fb, fs;
        get_point(i, &fx, &fy, &fr, &fg, &fb, &fs);
        dt.insert(K::Point_2(fx + dw * RANGE_X, fy + dh * RANGE_Y));
        /* Keep link to point */
        lookup[(int)(fx * 2) + dw * RANGE_X * 2 * (int)(fy * 2)] = i;
    }

    /* Add fake points to close the triangulation */
    dt.insert(K::Point_2(0, 0));
    dt.insert(K::Point_2(3 * dw * RANGE_X, 0));
    dt.insert(K::Point_2(0, 3 * dh * RANGE_Y));
    dt.insert(K::Point_2(3 * dw * RANGE_X, 3 * dh * RANGE_Y));

    for(y = ry; y < ry + rh; y++)
    {
        for(x = rx; x < rx + rw; x++)
        {
            float myx = (float)x * dw * RANGE_X / p->w;
            float myy = (float)y * dh * RANGE_Y / p->h;

            K::Point_2 m(myx + dw * RANGE_X, myy + dh * RANGE_Y);
            Point_coordinate_vector coords;
            CGAL::Triple<
              std::back_insert_iterator<Point_coordinate_vector>,
              K::FT, bool> result =
              CGAL::natural_neighbor_coordinates_2(dt, m,
                                                   std::back_inserter(coords));

            float r = 0.0f, g = 0.0f, b = 0.0f, norm = 0.000000000000001f;

            Point_coordinate_vector::iterator it;
            for(it = coords.begin(); it != coords.end(); ++it)
            {
                float fx, fy, fr, fg, fb, fs;

                fx = (*it).first.x() - dw * RANGE_X;
                fy = (*it).first.y() - dh * RANGE_Y;

                if(fx < 0 || fy < 0
                    || fx > dw * RANGE_X - 1 || fy > dh * RANGE_Y - 1)
                    continue;

                int index = lookup[(int)(fx * 2)
                                    + dw * RANGE_X * 2 * (int)(fy * 2)];

                get_point(index, &fx, &fy, &fr, &fg, &fb, &fs);

                //float k = pow((*it).second * (1.0 + fs), 1.2);
                float k = (*it).second * (1.00f + fs);
                //float k = (*it).second * (0.60f + fs);
                //float k = pow((*it).second, (1.0f + fs));

                // Try to attenuate peak artifacts
                k *= pow(((myx - fx) * (myx - fx) + (myy - fy) * (myy - fy)
                          + 0.01) / (RANGE_X * RANGE_X + RANGE_Y * RANGE_Y),
                          -0.5);

                // Cute circles
                //k = 1.0 / (0.015 * (RANGE_X * RANGE_X + RANGE_Y * RANGE_Y)
                //       + (myx - fx) * (myx - fx) + (myy - fy) * (myy - fy));

                r += k * fr;
                g += k * fg;
                b += k * fb;
                norm += k;
            }

            data[4 * (x + y * p->w) + 0] = r / norm;
            data[4 * (x + y * p->w) + 1] = g / norm;
            data[4 * (x + y * p->w) + 2] = b / norm;
            data[4 * (x + y * p->w) + 3] = 0.0;
        }
    }

    pipi_release_pixels(dst, p);
}

static void analyse(pipi_image_t *src)
{
    pipi_pixels_t *p = pipi_get_pixels(src, PIPI_PIXELS_RGBA_F32);
    float *data = (float *)p->pixels;

    for(unsigned int dy = 0; dy < dh; dy++)
        for(unsigned int dx = 0; dx < dw; dx++)
        {
            float min = 1.1f, max = -0.1f, mr = 0.0f, mg = 0.0f, mb = 0.0f;
            float total = 0.0;
            int xmin = 0, xmax = 0, ymin = 0, ymax = 0;
            int npixels = 0;

            for(unsigned int iy = RANGE_Y * dy; iy < RANGE_Y * (dy + 1); iy++)
                for(unsigned int ix = RANGE_X * dx; ix < RANGE_X * (dx + 1); ix++)
                {
                    float lum = 0.0f;

                    lum += data[4 * (ix + iy * p->w) + 0];
                    lum += data[4 * (ix + iy * p->w) + 1];
                    lum += data[4 * (ix + iy * p->w) + 2];
                    lum /= 3;

                    mr += data[4 * (ix + iy * p->w) + 0];
                    mg += data[4 * (ix + iy * p->w) + 1];
                    mb += data[4 * (ix + iy * p->w) + 2];

                    if(lum < min)
                    {
                        min = lum;
                        xmin = ix;
                        ymin = iy;
                    }

                    if(lum > max)
                    {
                        max = lum;
                        xmax = ix;
                        ymax = iy;
                    }

                    total += lum;
                    npixels++;
                }

            total /= npixels;
            mr /= npixels;
            mg /= npixels;
            mb /= npixels;

            float wmin, wmax;

            if(total < min + (max - min) / 4)
                wmin = 1.0, wmax = 0.0;
            else if(total < min + (max - min) / 4 * 3)
                wmin = 0.0, wmax = 0.0;
            else
                wmin = 0.0, wmax = 1.0;

#if RANDOM_START == 1
            for(int i = 0; i < POINTS_PER_CELL; i++)
                add_random_point();
#else
            /* 0.80 and 0.20 were chosen empirically, it gives a 10% better
             * initial distance. Definitely worth it. */
            if(POINTS_PER_CELL == 2 || total < min + (max - min) / 2)
                add_point(xmin, ymin,
                          data[4 * (xmin + ymin * p->w) + 0] * 0.80 + mr * 0.20,
                          data[4 * (xmin + ymin * p->w) + 1] * 0.80 + mg * 0.20,
                          data[4 * (xmin + ymin * p->w) + 2] * 0.80 + mb * 0.20,
                          wmin);

            if(POINTS_PER_CELL == 2 || total >= min + (max - min) / 2)
                add_point(xmax, ymax,
                          data[4 * (xmax + ymax * p->w) + 0] * 0.80 + mr * 0.20,
                          data[4 * (xmax + ymax * p->w) + 1] * 0.80 + mg * 0.20,
                          data[4 * (xmax + ymax * p->w) + 2] * 0.80 + mb * 0.20,
                          wmax);
#endif
        }
}

#define MOREINFO "Try `%s --help' for more information.\n"

int main(int argc, char *argv[])
{
    uint32_t unicode_data[2048];
    int opstats[2 * NB_OPS];
    char const *srcname = NULL, *dstname = NULL;
    pipi_image_t *src, *tmp, *dst;
    double error = 1.0;
    int width, height;

    /* Parse command-line options */
    for(;;)
    {
        int option_index = 0;
        static struct myoption long_options[] =
        {
            { "output",      1, NULL, 'o' },
            { "length",      1, NULL, 'l' },
            { "charset",     1, NULL, 'c' },
            { "quality",     1, NULL, 'q' },
            { "debug",       0, NULL, 'd' },
            { "help",        0, NULL, 'h' },
            { NULL,          0, NULL, 0   },
        };
        int c = mygetopt(argc, argv, "o:l:c:q:dh", long_options, &option_index);

        if(c == -1)
            break;

        switch(c)
        {
        case 'o':
            dstname = myoptarg;
            break;
        case 'l':
            MAX_MSG_LEN = atoi(myoptarg);
            if(MAX_MSG_LEN < 16)
            {
                fprintf(stderr, "Warning: rounding minimum message length to 16\n");
                MAX_MSG_LEN = 16;
            }
            break;
        case 'c':
            if(!strcmp(myoptarg, "ascii"))
                unichars = unichars_ascii;
            else if(!strcmp(myoptarg, "cjk"))
                unichars = unichars_cjk;
            else if(!strcmp(myoptarg, "symbols"))
                unichars = unichars_symbols;
            else
            {
                fprintf(stderr, "Error: invalid char block \"%s\".", myoptarg);
                fprintf(stderr, "Valid sets are: ascii, cjk, symbols\n");
                return EXIT_FAILURE;
            }
            break;
        case 'q':
            ITERATIONS_PER_POINT = 10 * atof(myoptarg);
            if(ITERATIONS_PER_POINT < 0)
                ITERATIONS_PER_POINT = 0;
            else if(ITERATIONS_PER_POINT > 200)
                ITERATIONS_PER_POINT = 200;
            break;
        case 'd':
            DEBUG_MODE = true;
            break;
        case 'h':
            printf("Usage: img2twit [OPTIONS] SOURCE\n");
            printf("       img2twit [OPTIONS] -o DESTINATION\n");
            printf("Encode SOURCE image to stdout or decode stdin to DESTINATION.\n");
            printf("\n");
            printf("Mandatory arguments to long options are mandatory for short options too.\n");
            printf("  -o, --output <filename>   output resulting image to filename\n");
            printf("  -l, --length <size>       message length in characters (default 140)\n");
            printf("  -c, --charset <block>     character set to use (ascii, [cjk], symbols)\n");
            printf("  -q, --quality <rate>      set image quality (0 - 20) (default 5)\n");
            printf("  -d, --debug               print debug information\n");
            printf("  -h, --help                display this help and exit\n");
            printf("\n");
            printf("Written by Sam Hocevar. Report bugs to <sam@hocevar.net>.\n");
            return EXIT_SUCCESS;
        default:
            fprintf(stderr, "%s: invalid option -- %c\n", argv[0], c);
            printf(MOREINFO, argv[0]);
            return EXIT_FAILURE;
        }
    }

    if(myoptind == argc && !dstname)
    {
        fprintf(stderr, "%s: too few arguments\n", argv[0]);
        printf(MOREINFO, argv[0]);
        return EXIT_FAILURE;
    }

    if((myoptind == argc - 1 && dstname) || myoptind < argc - 1)
    {
        fprintf(stderr, "%s: too many arguments\n", argv[0]);
        printf(MOREINFO, argv[0]);
        return EXIT_FAILURE;
    }

    if(myoptind == argc - 1)
        srcname = argv[myoptind];

    /* Decoding mode: read UTF-8 text from stdin */
    if(dstname)
        for(MAX_MSG_LEN = 0; ;)
        {
            uint32_t ch = fread_utf8(stdin);
            if(ch == 0xffffffff || ch == '\n')
                break;
            if(ch <= ' ')
                continue;
            unicode_data[MAX_MSG_LEN++] = ch;

            if(MAX_MSG_LEN >= 2048)
            {
                fprintf(stderr, "Error: message too long.\n");
                return EXIT_FAILURE;
            }
        }

    if(MAX_MSG_LEN == 0)
    {
        fprintf(stderr, "Error: empty message.\n");
        return EXIT_FAILURE;
    }

    bitstack b(MAX_MSG_LEN); /* We cannot declare this before, because
                              * MAX_MSG_LEN wouldn't be defined. */

    /* Autodetect charset if decoding, otherwise switch to CJK. */
    if(dstname)
    {
        char const *charset;

        if(unicode_data[0] >= 0x0021 && unicode_data[0] < 0x007f)
        {
            unichars = unichars_ascii;
            charset = "ascii";
        }
        else if(unicode_data[0] >= 0x4e00 && unicode_data[0] < 0x9fa6)
        {
            unichars = unichars_cjk;
            charset = "cjk";
        }
        else if(unicode_data[0] >= 0x25a0 && unicode_data[0] < 0x27bf)
        {
            unichars = unichars_symbols;
            charset = "symbols";
        }
        else
        {
            fprintf(stderr, "Error: unable to detect charset\n");
            return EXIT_FAILURE;
        }

        if(DEBUG_MODE)
            fprintf(stderr, "Detected charset \"%s\"\n", charset);
    }
    else if(!unichars)
        unichars = unichars_cjk;

    pipi_set_gamma(1.0);

    /* Precompute bit allocation */
    NUM_CHARACTERS = count_unichars();

    if(dstname)
    {
        /* Decoding mode: find each character's index in our character
         * list, and push it to our wonderful custom bitstream. */
        for(int i = MAX_MSG_LEN; i--; )
            b.push(uni2index(unicode_data[i]), NUM_CHARACTERS);

        /* The first thing we pop from the stream is the version information */
        version = b.pop(RANGE_V);

        if(version > 1)
        {
            fprintf(stderr, "Error: unsupported algorithm version %i\n",
                    version);
            return EXIT_FAILURE;
        }

        /* Read width and height from bitstream */
        width = b.pop(RANGE_W) + 1;
        height = b.pop(RANGE_H) + 1;
        src = NULL;
    }
    else
    {
        /* Argument given: open image for encoding */
        src = pipi_load(srcname);

        if(!src)
        {
            fprintf(stderr, "Error loading %s\n", srcname);
            return EXIT_FAILURE;
        }

        version = 1;
        width = pipi_get_image_width(src);
        height = pipi_get_image_height(src);
    }

    if(width <= 0 || height <= 0 || width > RANGE_W || height > RANGE_H)
    {
        fprintf(stderr, "Error: image size %ix%i is out of bounds\n",
                width, height);
        return EXIT_FAILURE;
    }

    compute_ranges(width, height);

    /* Try to cram some more information into our points as long as it
     * does not change the cell distribution. This cannot be too clever,
     * because we want the computation to depend only on the source image
     * coordinates. */
#define TRY(op, revert) \
    do { \
        unsigned int olddw = dw, olddh = dh; \
        op; compute_ranges(width, height); \
        if(dw != olddw || dh != olddh) \
            { revert; compute_ranges(width, height); } \
    } while(0)

    for(int i = 0; i < 2; i++)
    {
        TRY(RANGE_G++, RANGE_G--);
        TRY(RANGE_R++, RANGE_R--);
        TRY(RANGE_B++, RANGE_B--);
    }

    for(int i = 0; i < 10; i++)
    {
        if((float)width / dw >= (float)height / dh)
        {
            TRY(RANGE_X++, RANGE_X--);
            TRY(RANGE_Y++, RANGE_Y--);
        }
        else
        {
            TRY(RANGE_Y++, RANGE_Y--);
            TRY(RANGE_X++, RANGE_X--);
        }
    }

    /* Print debug information */
    if(DEBUG_MODE)
    {
        fprintf(stderr, "Message size: %i\n", MAX_MSG_LEN);
        fprintf(stderr, "Available characters: %i\n", NUM_CHARACTERS);
        fprintf(stderr, "Available bits: %f\n", TOTAL_BITS);
        fprintf(stderr, "Width/Height ranges: %ix%i\n", RANGE_W, RANGE_H);
        fprintf(stderr, "Algorithm version: %i\n", RANGE_V);
        fprintf(stderr, "Image resolution: %ix%i\n", width, height);
        fprintf(stderr, "Header bits: %f\n", HEADER_BITS);
        fprintf(stderr, "Bits available for data: %f\n", DATA_BITS);
        fprintf(stderr, "X/Y/Red/Green/Blue/Extra ranges: %i %i %i %i %i %i\n",
                RANGE_X, RANGE_Y, RANGE_R, RANGE_G, RANGE_B, RANGE_S);
        fprintf(stderr, "Cell bits: %f\n", CELL_BITS);
        fprintf(stderr, "Available cells: %i\n", TOTAL_CELLS);
        fprintf(stderr, "Wasted bits: %f\n",
                DATA_BITS - CELL_BITS * TOTAL_CELLS);
        fprintf(stderr, "Chosen image ratio: %i:%i (wasting %i point cells)\n",
                dw, dh, TOTAL_CELLS - dw * dh);
        fprintf(stderr, "Total wasted bits: %f\n",
                DATA_BITS - CELL_BITS * dw * dh);
    }

    if(srcname)
    {
        /* Resize and filter image to better state */
        tmp = pipi_gaussian_blur(src, 0.25 * dw * RANGE_X / width);
        pipi_free(src);
        src = pipi_resize(tmp, dw * RANGE_X, dh * RANGE_Y);
        pipi_free(tmp);

        /* Analyse image */
        analyse(src);

        /* Render what we just computed */
        tmp = pipi_new(dw * RANGE_X, dh * RANGE_Y);
        render(tmp, 0, 0, dw * RANGE_X, dh * RANGE_Y);
        error = pipi_measure_rmsd(src, tmp);

        if(DEBUG_MODE)
            fprintf(stderr, "Initial distance: %2.10g\n", error);

        memset(opstats, 0, sizeof(opstats));
        for(int iter = 0, stuck = 0, failures = 0, success = 0;
            iter < MAX_ITERATIONS /* && stuck < 5 && */;
            iter++)
        {
            if(failures > 500)
            {
                stuck++;
                failures = 0;
            }

            if(!DEBUG_MODE && !(iter % 16))
                fprintf(stderr, "\rEncoding... %i%%",
                        iter * 100 / MAX_ITERATIONS);

            pipi_image_t *scrap = pipi_copy(tmp);

            /* Choose a point at random */
            int pt = det_rand(npoints);
            point_t oldpt = points[pt];

            /* Compute the affected image zone */
            float fx, fy, fr, fg, fb, fs;
            get_point(pt, &fx, &fy, &fr, &fg, &fb, &fs);
            int zonex = (int)fx / RANGE_X - 2;
            int zoney = (int)fy / RANGE_Y - 2;
            int zonew = 4;
            int zoneh = 4;
            if(zonex < 0) { zonew += zonex; zonex = 0; }
            if(zoney < 0) { zoneh += zoney; zoney = 0;; }
            if(zonex + zonew > (int)dw) { zonew = dw - zonex; }
            if(zoney + zoneh > (int)dh) { zoneh = dh - zoney; }

            /* Choose random operations and measure their effect */
            uint8_t op1 = rand_op();
            //uint8_t op2 = rand_op();

            apply_op(op1, &points[pt]);

            /* Check that two points don't fall at the same place */
            if(POINTS_PER_CELL == 2)
            {
                while(points[pt].x == points[pt ^ 1].x
                       && points[pt].y == points[pt ^ 1].y)
                {
                    points[pt] = oldpt;
                    op1 = rand_op();
                    apply_op(op1, &points[pt]);
                }
            }

            render(scrap, zonex * RANGE_X, zoney * RANGE_Y,
                   zonew * RANGE_X, zoneh * RANGE_Y);

            double newerr = pipi_measure_rmsd(src, scrap);

            opstats[op1 * 2]++;
            //opstats[op2 * 2]++;

            if(newerr < error)
            {
                pipi_free(tmp);

#if 0
                /* Save image! */
                if((success % 10) == 0)
                {
                    char buf[128];
                    sprintf(buf, "twit%08i.bmp", success);
                    tmp = pipi_new(width, height);
                    render(tmp, 0, 0, width, height);
                    pipi_save(tmp, buf);
                    pipi_free(tmp);
                }
#endif

                tmp = scrap;

                if(DEBUG_MODE)
                    fprintf(stderr, "%08i -0.%010i %2.010g after op%i(%i)\n",
                            iter, (int)((error - newerr) * 10000000000L),
                            error, op1, pt);

                error = newerr;
                opstats[op1 * 2 + 1]++;
                //opstats[op2 * 2 + 1]++;
                failures = 0;
                success++;
            }
            else
            {
                pipi_free(scrap);
                points[pt] = oldpt;
                failures++;
            }
        }

        if(DEBUG_MODE)
        {
            for(int j = 0; j < 2; j++)
            {
                fprintf(stderr,   "operation: ");
                for(int i = NB_OPS / 2 * j; i < NB_OPS / 2 * (j + 1); i++)
                    fprintf(stderr, "%4i ", i);
                fprintf(stderr, "\nattempts:  ");
                for(int i = NB_OPS / 2 * j; i < NB_OPS / 2 * (j + 1); i++)
                    fprintf(stderr, "%4i ", opstats[i * 2]);
                fprintf(stderr, "\nsuccesses: ");
                for(int i = NB_OPS / 2 * j; i < NB_OPS / 2 * (j + 1); i++)
                    fprintf(stderr, "%4i ", opstats[i * 2 + 1]);
                fprintf(stderr, "\n");
            }

            fprintf(stderr, "Distance: %2.10g\n", error);
        }
        else
            fprintf(stderr, "\r                    \r");

#if 0
        dst = pipi_resize(tmp, width, height);
        pipi_free(tmp);

        /* Save image and bail out */
        pipi_save(dst, "lol.bmp");
        pipi_free(dst);
#endif

        /* Push our points to the bitstream */
        for(int i = 0; i < npoints; i += POINTS_PER_CELL)
        {
            if(POINTS_PER_CELL == 2)
            {
                int x1, y1, x2, y2;
                x1 = points[i].x;
                y1 = points[i].y;
                x2 = points[i + 1].x;
                y2 = points[i + 1].y;

                bool swap;
                uint32_t pack = pack_coords(x1, y1, x2, y2, &swap);

                b.push(points[i + (swap ? 1 : 0)].s, RANGE_S);
                b.push(points[i + (swap ? 1 : 0)].b, RANGE_B);
                b.push(points[i + (swap ? 1 : 0)].g, RANGE_G);
                b.push(points[i + (swap ? 1 : 0)].r, RANGE_R);
                b.push(points[i + (swap ? 0 : 1)].s, RANGE_S);
                b.push(points[i + (swap ? 0 : 1)].b, RANGE_B);
                b.push(points[i + (swap ? 0 : 1)].g, RANGE_G);
                b.push(points[i + (swap ? 0 : 1)].r, RANGE_R);
                b.push(pack, RANGE_XY2);
            }
            else
            {
                b.push(points[i].s, RANGE_S);
                b.push(points[i].b, RANGE_B);
                b.push(points[i].g, RANGE_G);
                b.push(points[i].r, RANGE_R);
                b.push(points[i].x, RANGE_X);
                b.push(points[i].y, RANGE_Y);
            }
        }
        b.push(height - 1, RANGE_H);
        b.push(width - 1, RANGE_W);
        b.push(version, RANGE_V);

        /* Pop Unicode characters from the bitstream and print them */
        for(int i = 0; i < MAX_MSG_LEN; i++)
            fwrite_utf8(stdout, index2uni(b.pop(NUM_CHARACTERS)));
        fprintf(stdout, "\n");
    }
    else
    {
        /* Pop points from the bitstream */
        for(int i = dw * dh; i--; )
        {
            if(POINTS_PER_CELL == 2)
            {
                uint32_t pack = b.pop(RANGE_XY2);
                int x1, y1, x2, y2;
                unpack_coords(pack, &x1, &y1, &x2, &y2);

                points[i * 2 + 1].y = y2;
                points[i * 2 + 1].x = x2;
                points[i * 2 + 1].r = b.pop(RANGE_R);
                points[i * 2 + 1].g = b.pop(RANGE_G);
                points[i * 2 + 1].b = b.pop(RANGE_B);
                points[i * 2 + 1].s = b.pop(RANGE_S);
                points[i * 2].y = y1;
                points[i * 2].x = x1;
                points[i * 2].r = b.pop(RANGE_R);
                points[i * 2].g = b.pop(RANGE_G);
                points[i * 2].b = b.pop(RANGE_B);
                points[i * 2].s = b.pop(RANGE_S);
            }
            else
            {
                points[i].y = b.pop(RANGE_Y);
                points[i].x = b.pop(RANGE_X);
                points[i].r = b.pop(RANGE_R);
                points[i].g = b.pop(RANGE_G);
                points[i].b = b.pop(RANGE_B);
                points[i].s = b.pop(RANGE_S);
            }
        }
        npoints = dw * dh * POINTS_PER_CELL;

        /* Render these points to a new image */
        dst = pipi_new(width, height);
        render(dst, 0, 0, width, height);

        /* Save image and bail out */
        pipi_save(dst, dstname);
        pipi_free(dst);
    }

    return EXIT_SUCCESS;
}