/*
 *  libpipi       Proper image processing implementation library
 *  Copyright (c) 2004-2008 Sam Hocevar <sam@zoy.org>
 *                All Rights Reserved
 *
 *  $Id$
 *
 *  This library is free software. It comes without any warranty, to
 *  the extent permitted by applicable law. You can redistribute it
 *  and/or modify it under the terms of the Do What The Fuck You Want
 *  To Public License, Version 2, as published by Sam Hocevar. See
 *  http://sam.zoy.org/wtfpl/COPYING for more details.
 */

/*
 * blur.c: blur functions
 */

#include "config.h"
#include "common.h"

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>

#include "pipi.h"
#include "pipi_internals.h"

/* Any standard deviation below this value will be rounded up, in order
 * to avoid ridiculously low values. exp(-1/(2*0.2*0.2)) is < 10^-5 so
 * there is little chance that any value below 0.2 will be useful. */
#define BLUR_EPSILON 0.2

pipi_image_t *pipi_gaussian_blur(pipi_image_t *src, float radius)
{
    return pipi_gaussian_blur_ext(src, radius, radius, 0.0, 0.0);
}

pipi_image_t *pipi_gaussian_blur_ext(pipi_image_t *src, float rx, float ry,
                                     float dx, float dy)
{
    pipi_image_t *dst;
    pipi_pixels_t *srcp, *dstp;
    float *srcdata, *dstdata;
    double *kernel, *buffer;
    double K;
    int x, y, i, w, h, kr, kw;

    if(rx < BLUR_EPSILON) rx = BLUR_EPSILON;
    if(ry < BLUR_EPSILON) ry = BLUR_EPSILON;

    w = src->w;
    h = src->h;

    srcp = pipi_getpixels(src, PIPI_PIXELS_RGBA_F);
    srcdata = (float *)srcp->pixels;

    dst = pipi_new(w, h);
    dstp = pipi_getpixels(dst, PIPI_PIXELS_RGBA_F);
    dstdata = (float *)dstp->pixels;

    buffer = malloc(w * h * 4 * sizeof(double));

    kr = (int)(3. * rx + 1.99999);
    kw = 2 * kr + 1;
    K = -1. / (2. * rx * rx);

    kernel = malloc(kw * sizeof(double));
    for(i = -kr; i <= kr; i++)
        kernel[i + kr] = exp(K * ((double)i - dx) * ((double)i - dx));

    for(y = 0; y < h; y++)
    {
        for(x = 0; x < w; x++)
        {
            double R = 0., G = 0., B = 0., t = 0.;
            int x2, off = 4 * (y * w + x);

            for(i = -kr; i <= kr; i++)
            {
                double f = kernel[i + kr];

                x2 = x + i;
                if(x2 < 0) x2 = 0;
                else if(x2 >= w) x2 = w - 1;

                R += f * srcdata[(y * w + x2) * 4];
                G += f * srcdata[(y * w + x2) * 4 + 1];
                B += f * srcdata[(y * w + x2) * 4 + 2];
                t += f;
            }

            buffer[off] = R / t;
            buffer[off + 1] = G / t;
            buffer[off + 2] = B / t;
        }
    }

    free(kernel);

    kr = (int)(3. * ry + 1.99999);
    kw = 2 * kr + 1;
    K = -1. / (2. * ry * ry);

    kernel = malloc(kw * sizeof(double));
    for(i = -kr; i <= kr; i++)
        kernel[i + kr] = exp(K * ((double)i - dy) * ((double)i - dy));

    for(y = 0; y < h; y++)
    {
        for(x = 0; x < w; x++)
        {
            double R = 0., G = 0., B = 0., t = 0.;
            int y2, off = 4 * (y * w + x);

            for(i = -kr; i <= kr; i++)
            {
                double f = kernel[i + kr];

                y2 = y + i;
                if(y2 < 0) y2 = 0;
                else if(y2 >= h) y2 = h - 1;

                R += f * buffer[(y2 * w + x) * 4];
                G += f * buffer[(y2 * w + x) * 4 + 1];
                B += f * buffer[(y2 * w + x) * 4 + 2];
                t += f;
            }

            dstdata[off] = R / t;
            dstdata[off + 1] = G / t;
            dstdata[off + 2] = B / t;
        }
    }

    free(buffer);
    free(kernel);

    return dst;
}