/*
 *  libpipi       Proper image processing implementation library
 *  Copyright (c) 2004-2008 Sam Hocevar <sam@zoy.org>
 *                All Rights Reserved
 *
 *  $Id$
 *
 *  This library is free software. It comes without any warranty, to
 *  the extent permitted by applicable law. You can redistribute it
 *  and/or modify it under the terms of the Do What The Fuck You Want
 *  To Public License, Version 2, as published by Sam Hocevar. See
 *  http://sam.zoy.org/wtfpl/COPYING for more details.
 */

/*
 * convolution.c: generic convolution functions
 */

#include "config.h"
#include "common.h"

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>

#include "pipi.h"
#include "pipi_internals.h"

static pipi_image_t *pipi_convolution_standard(pipi_image_t *src,
                                               int m, int n, double mat[]);
static pipi_image_t *pipi_convolution_separable(pipi_image_t *src,
                                                int m, double hvec[],
                                                int n, double vvec[]);

pipi_image_t *pipi_convolution(pipi_image_t *src, int m, int n, double mat[])
{
    double tmp;
    double *hvec, *vvec;
    int i, j, besti = -1, bestj = -1;

    /* Find the cell with the largest value */
    tmp = 0.0;
    for(i = 0; i < m * n; i++)
        if(mat[i] * mat[i] > tmp)
        {
            tmp = mat[i] * mat[i];
            besti = i % m;
            bestj = i / m;
        }

    /* If the kernel is empty, return an empty picture */
    if(tmp == 0.0)
        return pipi_new(src->w, src->h);

    /* Check whether the matrix rank is 1 */
    for(j = 0; j < n; j++)
    {
        if(j == bestj)
            continue;

        for(i = 0; i < m; i++)
        {
            double p, q;

            if(i == besti)
                continue;

            p = mat[j * m + i] * mat[bestj * m + besti];
            q = mat[bestj * m + i] * mat[j * m + besti];

            if(fabs(p - q) > 0.0001 * 0.0001)
                return pipi_convolution_standard(src, m, n, mat);
        }
    }

    /* Matrix rank is 1! Separate the filter */
    /* FIXME: memleak */
    hvec = malloc(m * sizeof(double));
    vvec = malloc(n * sizeof(double));

    tmp = sqrt(fabs(mat[bestj * m + besti]));
    for(i = 0; i < m; i++)
        hvec[i] = mat[bestj * m + i] / tmp;
    for(j = 0; j < n; j++)
        vvec[j] = mat[j * m + besti] / tmp;

    return pipi_convolution_separable(src, m, hvec, n, vvec);
}

static pipi_image_t *pipi_convolution_standard(pipi_image_t *src,
                                               int m, int n, double mat[])
{
    pipi_image_t *dst;
    pipi_pixels_t *srcp, *dstp;
    float *srcdata, *dstdata;
    int x, y, i, j, w, h, gray;

    w = src->w;
    h = src->h;

    gray = (src->last_modified == PIPI_PIXELS_Y_F);

    srcp = gray ? pipi_getpixels(src, PIPI_PIXELS_Y_F)
                : pipi_getpixels(src, PIPI_PIXELS_RGBA_F);
    srcdata = (float *)srcp->pixels;

    dst = pipi_new(w, h);
    dstp = gray ? pipi_getpixels(dst, PIPI_PIXELS_Y_F)
                : pipi_getpixels(dst, PIPI_PIXELS_RGBA_F);
    dstdata = (float *)dstp->pixels;

    for(y = 0; y < h; y++)
    {
        for(x = 0; x < w; x++)
        {
            if(gray)
            {
                double Y = 0.;
                int x2, y2;

                for(j = 0; j < n; j++)
                {
                    y2 = y + j - n / 2;
                    if(y2 < 0) y2 = 0;
                    else if(y2 >= h) y2 = h - 1;

                    for(i = 0; i < m; i++)
                    {
                        x2 = x + i - m / 2;
                        if(x2 < 0) x2 = 0;
                        else if(x2 >= w) x2 = w - 1;

                        Y += mat[j * m + i] * srcdata[y2 * w + x2];
                    }
                }

                dstdata[y * w + x] = Y;
            }
            else
            {
                double R = 0., G = 0., B = 0.;
                int x2, y2, off = 4 * (y * w + x);

                for(j = 0; j < n; j++)
                {
                    y2 = y + j - n / 2;
                    if(y2 < 0) y2 = 0;
                    else if(y2 >= h) y2 = h - 1;

                    for(i = 0; i < m; i++)
                    {
                        double f = mat[j * m + i];

                        x2 = x + i - m / 2;
                        if(x2 < 0) x2 = 0;
                        else if(x2 >= w) x2 = w - 1;

                        R += f * srcdata[(y2 * w + x2) * 4];
                        G += f * srcdata[(y2 * w + x2) * 4 + 1];
                        B += f * srcdata[(y2 * w + x2) * 4 + 2];
                    }
                }

                dstdata[off] = R;
                dstdata[off + 1] = G;
                dstdata[off + 2] = B;
            }
        }
    }

    return dst;
}

static pipi_image_t *pipi_convolution_separable(pipi_image_t *src,
                                                int m, double hvec[],
                                                int n, double vvec[])
{
    pipi_image_t *dst;
    pipi_pixels_t *srcp, *dstp;
    float *srcdata, *dstdata;
    double *buffer;
    int x, y, i, j, w, h, gray;

    w = src->w;
    h = src->h;

    gray = (src->last_modified == PIPI_PIXELS_Y_F);

    srcp = gray ? pipi_getpixels(src, PIPI_PIXELS_Y_F)
                : pipi_getpixels(src, PIPI_PIXELS_RGBA_F);
    srcdata = (float *)srcp->pixels;

    dst = pipi_new(w, h);
    dstp = gray ? pipi_getpixels(dst, PIPI_PIXELS_Y_F)
                : pipi_getpixels(dst, PIPI_PIXELS_RGBA_F);
    dstdata = (float *)dstp->pixels;

    buffer = malloc(w * h * (gray ? 1 : 4) * sizeof(double));

    for(y = 0; y < h; y++)
    {
        for(x = 0; x < w; x++)
        {
            if(gray)
            {
                double Y = 0.;
                int x2;

                for(i = 0; i < m; i++)
                {
                    x2 = x + i - m / 2;
                    if(x2 < 0) x2 = 0;
                    else if(x2 >= w) x2 = w - 1;

                    Y += hvec[i] * srcdata[y * w + x2];
                }

                buffer[y * w + x] = Y;
            }
            else
            {
                double R = 0., G = 0., B = 0.;
                int x2, off = 4 * (y * w + x);

                for(i = 0; i < m; i++)
                {
                    double f = hvec[i];

                    x2 = x + i - m / 2;
                    if(x2 < 0) x2 = 0;
                    else if(x2 >= w) x2 = w - 1;

                    R += f * srcdata[(y * w + x2) * 4];
                    G += f * srcdata[(y * w + x2) * 4 + 1];
                    B += f * srcdata[(y * w + x2) * 4 + 2];
                }

                buffer[off] = R;
                buffer[off + 1] = G;
                buffer[off + 2] = B;
            }
        }
    }

    for(y = 0; y < h; y++)
    {
        for(x = 0; x < w; x++)
        {
            if(gray)
            {
                double Y = 0.;
                int y2;

                for(j = 0; j < n; j++)
                {
                    y2 = y + j - n / 2;
                    if(y2 < 0) y2 = 0;
                    else if(y2 >= h) y2 = h - 1;

                    Y += vvec[j] * buffer[y2 * w + x];
                }

                dstdata[y * w + x] = Y;
            }
            else
            {
                double R = 0., G = 0., B = 0.;
                int y2, off = 4 * (y * w + x);

                for(j = 0; j < n; j++)
                {
                    double f = vvec[j];

                    y2 = y + j - n / 2;
                    if(y2 < 0) y2 = 0;
                    else if(y2 >= h) y2 = h - 1;

                    R += f * buffer[(y2 * w + x) * 4];
                    G += f * buffer[(y2 * w + x) * 4 + 1];
                    B += f * buffer[(y2 * w + x) * 4 + 2];
                }

                dstdata[off] = R;
                dstdata[off + 1] = G;
                dstdata[off + 2] = B;
            }
        }
    }

    free(buffer);

    return dst;
}