diff --git a/caca/codec/export.c b/caca/codec/export.c
index f525c31..704785c 100644
--- a/caca/codec/export.c
+++ b/caca/codec/export.c
@@ -503,9 +503,9 @@ static void *export_html3(caca_canvas_t const *cv, size_t *bytes)
* A line: 10 chars for "
\n"
* A glyph: up to 48 chars for ""
* up to 36 chars for ""
- * up to 9 chars for "xxxxx;" (far less for pure ASCII)
+ * up to 10 chars for "xxxxxx;" (far less for pure ASCII)
* 17 chars for " | " */
- *bytes = 1000 + cv->height * (10 + maxcols * (48 + 36 + 9 + 17));
+ *bytes = 1000 + cv->height * (10 + maxcols * (48 + 36 + 10 + 17));
cur = data = malloc(*bytes);
cur += sprintf(cur, "\n");
@@ -583,7 +583,11 @@ static void *export_html3(caca_canvas_t const *cv, size_t *bytes)
if(linechar[x + i] == CACA_MAGIC_FULLWIDTH)
;
- else if(linechar[x + i] <= 0x00000020)
+ else if((linechar[x + i] <= 0x00000020)
+ ||
+ ((linechar[x + i] >= 0x0000007f)
+ &&
+ (linechar[x + i] <= 0x0000009f)))
{
/* Control characters and space converted to
* U+00A0 NO-BREAK SPACE, a.k.a. " " in HTML,
@@ -613,8 +617,18 @@ static void *export_html3(caca_canvas_t const *cv, size_t *bytes)
cur += sprintf(cur, "'");
else if(linechar[x + i] < 0x00000080)
cur += sprintf(cur, "%c", (uint8_t)linechar[x + i]);
- else
+ else if((linechar[x + i] <= 0x0010fffd)
+ &&
+ ((linechar[x + i] & 0x0000fffe) != 0x0000fffe)
+ &&
+ ((linechar[x + i] < 0x0000d800)
+ ||
+ (linechar[x + i] > 0x0000dfff)))
cur += sprintf(cur, "%i;", (unsigned int)linechar[x + i]);
+ else
+ /* non-character codepoints become U+FFFD
+ * REPLACEMENT CHARACTER */
+ cur += sprintf(cur, "%i;", (unsigned int)0x0000fffd);
if (((i + 1) == len) || (lineattr[x + i + 1] != lineattr[x + i]))
{