diff --git a/pipi/dither/dbs.c b/pipi/dither/dbs.c
index a046514..2efead2 100644
--- a/pipi/dither/dbs.c
+++ b/pipi/dither/dbs.c
@@ -19,11 +19,16 @@
 #include "config.h"
 #include "common.h"
 
+#include <string.h>
+#include <stdlib.h>
+
 #include <math.h>
 
 #include "pipi.h"
 #include "pipi_internals.h"
 
+#define CELL 16
+
 #define N 7
 #define NN ((N * 2 + 1))
 
@@ -36,17 +41,19 @@ pipi_image_t *pipi_dbs(pipi_image_t *src)
     double t = 0.;
     pipi_image_t *dst, *tmp1, *tmp2;
     pipi_pixels_t *srcp, *dstp, *tmp1p, *tmp2p;
+    int *changelist;
     float *srcdata, *dstdata, *tmp1data, *tmp2data;
-    int i, j, w, h;
+    int i, j, x, y, w, h, cw, ch;
 
+    /* Build our human visual system kernel. */
     for(j = 0; j < NN; j++)
         for(i = 0; i < NN; i++)
         {
             double a = (double)(i - N);
             double b = (double)(j - N);
             kernel[j * NN + i] =
-                    1.0 * exp(-(a * a + b * b) / (2. * 1.5 * 1.5))
-                  + 0.1 * exp(-(a * a + b * b) / (2. * 0.6 * 0.6));
+                    exp(-(a * a + b * b) / (2. * 1.6 * 1.6))
+                  + exp(-(a * a + b * b) / (2. * 0.6 * 0.6));
             t += kernel[j * NN + i];
         }
 
@@ -57,6 +64,11 @@ pipi_image_t *pipi_dbs(pipi_image_t *src)
     w = src->w;
     h = src->h;
 
+    cw = (w + CELL - 1) / CELL;
+    ch = (h + CELL - 1) / CELL;
+    changelist = malloc(cw * ch * sizeof(int));
+    memset(changelist, 0, cw * ch * sizeof(int));
+
     srcp = pipi_getpixels(src, PIPI_PIXELS_Y_F);
     srcdata = (float *)srcp->pixels;
 
@@ -71,129 +83,149 @@ pipi_image_t *pipi_dbs(pipi_image_t *src)
     dstp = pipi_getpixels(dst, PIPI_PIXELS_Y_F);
     dstdata = (float *)dstp->pixels;
 
+    for(y = 0; y < h; y++)
+        for(x = 0; x < w; x++)
+            dstdata[y * w + x] = srcdata[y * w + x] > 0.5 ? 1.0 : 0.0;
+
     tmp2 = pipi_convolution(dst, NN, NN, kernel);
     tmp2p = pipi_getpixels(tmp2, PIPI_PIXELS_Y_F);
     tmp2data = (float *)tmp2p->pixels;
 
     for(;;)
     {
-        int changes = 0;
-        int x, y, n;
+        int cx, cy, n;
+        int allchanges = 0;
 
-        for(y = 0; y < h; y++)
-        for(x = 0; x < w; x++)
+        for(cy = 0; cy < ch; cy++)
+        for(cx = 0; cx < cw; cx++)
         {
-            double d, d2, e, best = 0.;
-            int opx = -1, opy = -1;
+            int changes = 0;
 
-            d = dstdata[y * w + x];
+            if(changelist[cy * cw + cx] >= 2)
+                continue;
 
-            /* Compute the effect of a toggle */
-            e = 0.;
-            for(j = -N; j < N + 1; j++)
+            for(y = cy * CELL; y < (cy + 1) * CELL; y++)
+            for(x = cx * CELL; x < (cx + 1) * CELL; x++)
             {
-                if(y + j < 0 || y + j >= h)
-                    continue;
-
-                for(i = -N; i < N + 1; i++)
-                {
-                    double m, p, q1, q2;
-
-                    if(x + i < 0 || x + i >= w)
-                        continue;
+                double d, d2, e, best = 0.;
+                int opx = -1, opy = -1;
 
-                    m = kernel[(j + N) * NN + i + N];
-                    p = tmp1data[(y + j) * w + x + i];
-                    q1 = tmp2data[(y + j) * w + x + i];
-                    q2 = q1 - m * d + m * (1. - d);
-                    e += (q1 - p) * (q1 - p) - (q2 - p) * (q2 - p);
-                }
-            }
-            if(e > best)
-            {
-                best = e;
-                opx = opy = 0;
-            }
+                d = dstdata[y * w + x];
 
-            /* Compute the effect of swaps */
-            for(n = 0; n < 8; n++)
-            {
-                static int const step[] =
-                    { 0, 1, 0, -1, -1, 0, 1, 0, -1, -1, -1, 1, 1, -1, 1, 1 };
-                int idx = step[n * 2], idy = step[n * 2 + 1];
-                if(y + idy < 0 || y + idy >= h
-                     || x + idx < 0 || x + idx >= w)
-                    continue;
-                d2 = dstdata[(y + idy) * w + x + idx];
-                if(d2 == d)
-                    continue;
+                /* Compute the effect of a toggle */
                 e = 0.;
                 for(j = -N; j < N + 1; j++)
                 {
                     if(y + j < 0 || y + j >= h)
                         continue;
-                    if(j - idy + N < 0 || j - idy + N >= NN)
-                        continue;
+
                     for(i = -N; i < N + 1; i++)
                     {
-                        double ma, mb, p, q1, q2;
+                        double m, p, q1, q2;
+
                         if(x + i < 0 || x + i >= w)
                             continue;
-                        if(i - idx + N < 0 || i - idx + N >= NN)
-                            continue;
-                        ma = kernel[(j + N) * NN + i + N];
-                        mb = kernel[(j - idy + N) * NN + i - idx + N];
+
+                        m = kernel[(j + N) * NN + i + N];
                         p = tmp1data[(y + j) * w + x + i];
                         q1 = tmp2data[(y + j) * w + x + i];
-                        q2 = q1 - ma * d + ma * d2 - mb * d2 + mb * d;
+                        q2 = q1 - m * d + m * (1. - d);
                         e += (q1 - p) * (q1 - p) - (q2 - p) * (q2 - p);
                     }
                 }
                 if(e > best)
                 {
                     best = e;
-                    opx = idx;
-                    opy = idy;
+                    opx = opy = 0;
                 }
-            }
 
-            /* Apply the change if interesting */
-            if(best <= 0.)
-                continue;
-            if(opx || opy)
-            {
-                d2 = dstdata[(y + opy) * w + x + opx];
-                dstdata[(y + opy) * w + x + opx] = d;
-            }
-            else
-                d2 = 1. - d;
-            dstdata[y * w + x] = d2;
-            for(j = -N; j < N + 1; j++)
-            for(i = -N; i < N + 1; i++)
-            {
-                double m = kernel[(j + N) * NN + i + N];
-                if(y + j >= 0 && y + j < h
-                    && x + i >= 0 && x + i < w)
+                /* Compute the effect of swaps */
+                for(n = 0; n < 8; n++)
                 {
-                    t = tmp2data[(y + j) * w + x + i];
-                    tmp2data[(y + j) * w + x + i] = t + m * (d2 - d);
+                    static int const step[] =
+                      { 0, 1, 0, -1, -1, 0, 1, 0, -1, -1, -1, 1, 1, -1, 1, 1 };
+                    int idx = step[n * 2], idy = step[n * 2 + 1];
+                    if(y + idy < 0 || y + idy >= h
+                         || x + idx < 0 || x + idx >= w)
+                        continue;
+                    d2 = dstdata[(y + idy) * w + x + idx];
+                    if(d2 == d)
+                        continue;
+                    e = 0.;
+                    for(j = -N; j < N + 1; j++)
+                    {
+                        if(y + j < 0 || y + j >= h)
+                            continue;
+                        if(j - idy + N < 0 || j - idy + N >= NN)
+                            continue;
+                        for(i = -N; i < N + 1; i++)
+                        {
+                            double ma, mb, p, q1, q2;
+                            if(x + i < 0 || x + i >= w)
+                                continue;
+                            if(i - idx + N < 0 || i - idx + N >= NN)
+                                continue;
+                            ma = kernel[(j + N) * NN + i + N];
+                            mb = kernel[(j - idy + N) * NN + i - idx + N];
+                            p = tmp1data[(y + j) * w + x + i];
+                            q1 = tmp2data[(y + j) * w + x + i];
+                            q2 = q1 - ma * d + ma * d2 - mb * d2 + mb * d;
+                            e += (q1 - p) * (q1 - p) - (q2 - p) * (q2 - p);
+                        }
+                    }
+                    if(e > best)
+                    {
+                        best = e;
+                        opx = idx;
+                        opy = idy;
+                    }
                 }
-                if((opx || opy) && y + opy + j >= 0 && y + opy + j < h
-                                && x + opx + i >= 0 && x + opx + i < w)
+
+                /* Apply the change if interesting */
+                if(best <= 0.)
+                    continue;
+                if(opx || opy)
+                {
+                    d2 = dstdata[(y + opy) * w + x + opx];
+                    dstdata[(y + opy) * w + x + opx] = d;
+                }
+                else
+                    d2 = 1. - d;
+                dstdata[y * w + x] = d2;
+                for(j = -N; j < N + 1; j++)
+                for(i = -N; i < N + 1; i++)
                 {
-                    t = tmp2data[(y + opy + j) * w + x + opx + i];
-                    tmp2data[(y + opy + j) * w + x + opx + i]
-                            = t + m * (d - d2);
+                    double m = kernel[(j + N) * NN + i + N];
+                    if(y + j >= 0 && y + j < h
+                        && x + i >= 0 && x + i < w)
+                    {
+                        t = tmp2data[(y + j) * w + x + i];
+                        tmp2data[(y + j) * w + x + i] = t + m * (d2 - d);
+                    }
+                    if((opx || opy) && y + opy + j >= 0 && y + opy + j < h
+                                    && x + opx + i >= 0 && x + opx + i < w)
+                    {
+                        t = tmp2data[(y + opy + j) * w + x + opx + i];
+                        tmp2data[(y + opy + j) * w + x + opx + i]
+                                = t + m * (d - d2);
+                    }
                 }
+
+                changes++;
             }
 
-            changes++;
+            if(changes == 0)
+                changelist[cy * cw + cx]++;
+
+            allchanges += changes;
         }
 
-        if(changes == 0)
+        if(allchanges == 0)
             break;
     }
 
+    free(changelist);
+
     pipi_free(tmp1);
     pipi_free(tmp2);