diff --git a/pipi/filter/convolution.c b/pipi/filter/convolution.c
index e00b721..143f304 100644
--- a/pipi/filter/convolution.c
+++ b/pipi/filter/convolution.c
@@ -27,7 +27,71 @@
 #include "pipi.h"
 #include "pipi_internals.h"
 
+static pipi_image_t *pipi_convolution_standard(pipi_image_t *src,
+                                               int m, int n, double mat[]);
+static pipi_image_t *pipi_convolution_separable(pipi_image_t *src,
+                                                int m, double hvec[],
+                                                int n, double vvec[]);
+
 pipi_image_t *pipi_convolution(pipi_image_t *src, int m, int n, double mat[])
+{
+    double tmp;
+    double *hvec, *vvec;
+    int i, j, besti = -1, bestj = -1;
+
+    /* Find the cell with the largest value */
+    tmp = 0.0;
+    for(i = 0; i < m * n; i++)
+        if(mat[i] * mat[i] > tmp)
+        {
+            tmp = mat[i] * mat[i];
+            besti = i % m;
+            bestj = i / m;
+        }
+
+    /* If the kernel is empty, return an empty picture */
+    if(tmp == 0.0)
+        return pipi_new(src->w, src->h);
+
+    /* Check whether the matrix rank is 1 */
+    for(j = 0; j < n; j++)
+    {
+        if(j == bestj)
+            continue;
+
+        for(i = 0; i < m; i++)
+        {
+            double p, q;
+
+            if(i == besti)
+                continue;
+
+            p = mat[j * m + i] * mat[bestj * m + besti];
+            q = mat[bestj * m + i] * mat[j * m + besti];
+
+            if(fabs(p - q) > 0.0001 * 0.0001)
+                return pipi_convolution_standard(src, m, n, mat);
+        }
+    }
+
+printf("separabble!\n");
+
+    /* Matrix rank is 1! Separate the filter */
+    /* FIXME: memleak */
+    hvec = malloc(m * sizeof(double));
+    vvec = malloc(n * sizeof(double));
+
+    tmp = sqrt(fabs(mat[bestj * m + besti]));
+    for(i = 0; i < m; i++)
+        hvec[i] = mat[bestj * m + i] / tmp;
+    for(j = 0; j < n; j++)
+        vvec[j] = mat[j * m + besti] / tmp;
+
+    return pipi_convolution_separable(src, m, hvec, n, vvec);
+}
+
+static pipi_image_t *pipi_convolution_standard(pipi_image_t *src,
+                                               int m, int n, double mat[])
 {
     pipi_image_t *dst;
     pipi_pixels_t *srcp, *dstp;
@@ -65,11 +129,11 @@ pipi_image_t *pipi_convolution(pipi_image_t *src, int m, int n, double mat[])
 
                     for(i = 0; i < m; i++)
                     {
-                        x2 = x + i;
+                        x2 = x + i - m / 2;
                         if(x2 < 0) x2 = 0;
                         else if(x2 >= w) x2 = w - 1;
 
-                        Y += mat[j * m + i] * srcdata[y * w + x2];
+                        Y += mat[j * m + i] * srcdata[y2 * w + x2];
                     }
                 }
 
@@ -90,13 +154,13 @@ pipi_image_t *pipi_convolution(pipi_image_t *src, int m, int n, double mat[])
                     {
                         double f = mat[j * m + i];
 
-                        x2 = x + i;
+                        x2 = x + i - m / 2;
                         if(x2 < 0) x2 = 0;
                         else if(x2 >= w) x2 = w - 1;
 
-                        R += f * srcdata[(y * w + x2) * 4];
-                        G += f * srcdata[(y * w + x2) * 4 + 1];
-                        B += f * srcdata[(y * w + x2) * 4 + 2];
+                        R += f * srcdata[(y2 * w + x2) * 4];
+                        G += f * srcdata[(y2 * w + x2) * 4 + 1];
+                        B += f * srcdata[(y2 * w + x2) * 4 + 2];
                     }
                 }
 
@@ -110,3 +174,124 @@ pipi_image_t *pipi_convolution(pipi_image_t *src, int m, int n, double mat[])
     return dst;
 }
 
+static pipi_image_t *pipi_convolution_separable(pipi_image_t *src,
+                                                int m, double hvec[],
+                                                int n, double vvec[])
+{
+    pipi_image_t *dst;
+    pipi_pixels_t *srcp, *dstp;
+    float *srcdata, *dstdata;
+    double *buffer;
+    int x, y, i, j, w, h, gray;
+
+    w = src->w;
+    h = src->h;
+
+    gray = (src->last_modified == PIPI_PIXELS_Y_F);
+
+    srcp = gray ? pipi_getpixels(src, PIPI_PIXELS_Y_F)
+                : pipi_getpixels(src, PIPI_PIXELS_RGBA_F);
+    srcdata = (float *)srcp->pixels;
+
+    dst = pipi_new(w, h);
+    dstp = gray ? pipi_getpixels(dst, PIPI_PIXELS_Y_F)
+                : pipi_getpixels(dst, PIPI_PIXELS_RGBA_F);
+    dstdata = (float *)dstp->pixels;
+
+    buffer = malloc(w * h * (gray ? 1 : 4) * sizeof(double));
+
+    for(y = 0; y < h; y++)
+    {
+        for(x = 0; x < w; x++)
+        {
+            if(gray)
+            {
+                double Y = 0.;
+                int x2;
+
+                for(i = 0; i < m; i++)
+                {
+                    x2 = x + i - m / 2;
+                    if(x2 < 0) x2 = 0;
+                    else if(x2 >= w) x2 = w - 1;
+
+                    Y += hvec[i] * srcdata[y * w + x2];
+                }
+
+                buffer[y * w + x] = Y;
+            }
+            else
+            {
+                double R = 0., G = 0., B = 0.;
+                int x2, off = 4 * (y * w + x);
+
+                for(i = 0; i < m; i++)
+                {
+                    double f = hvec[i];
+
+                    x2 = x + i - m / 2;
+                    if(x2 < 0) x2 = 0;
+                    else if(x2 >= w) x2 = w - 1;
+
+                    R += f * srcdata[(y * w + x2) * 4];
+                    G += f * srcdata[(y * w + x2) * 4 + 1];
+                    B += f * srcdata[(y * w + x2) * 4 + 2];
+                }
+
+                buffer[off] = R;
+                buffer[off + 1] = G;
+                buffer[off + 2] = B;
+            }
+        }
+    }
+
+    for(y = 0; y < h; y++)
+    {
+        for(x = 0; x < w; x++)
+        {
+            if(gray)
+            {
+                double Y = 0.;
+                int y2;
+
+                for(j = 0; j < n; j++)
+                {
+                    y2 = y + j - n / 2;
+                    if(y2 < 0) y2 = 0;
+                    else if(y2 >= h) y2 = h - 1;
+
+                    Y += vvec[j] * buffer[y2 * w + x];
+                }
+
+                dstdata[y * w + x] = Y;
+            }
+            else
+            {
+                double R = 0., G = 0., B = 0.;
+                int y2, off = 4 * (y * w + x);
+
+                for(j = 0; j < n; j++)
+                {
+                    double f = vvec[j];
+
+                    y2 = y + j - n / 2;
+                    if(y2 < 0) y2 = 0;
+                    else if(y2 >= h) y2 = h - 1;
+
+                    R += f * buffer[(y2 * w + x) * 4];
+                    G += f * buffer[(y2 * w + x) * 4 + 1];
+                    B += f * buffer[(y2 * w + x) * 4 + 2];
+                }
+
+                dstdata[off] = R;
+                dstdata[off + 1] = G;
+                dstdata[off + 2] = B;
+            }
+        }
+    }
+
+    free(buffer);
+
+    return dst;
+}
+