From 55704cfce8ed8f119c2b264f891a5c740564b988 Mon Sep 17 00:00:00 2001 From: Enno Rehling Date: Sun, 9 Aug 2020 20:56:43 +0200 Subject: [PATCH] Parser fallback to latin1 when detecting invalid utf-8 --- src/util/parser.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/util/parser.c b/src/util/parser.c index 7e2ac665e..36a4b1565 100644 --- a/src/util/parser.c +++ b/src/util/parser.c @@ -173,8 +173,15 @@ char *parse_token(const char **str, char *lbuf, size_t buflen) else { int ret = unicode_utf8_decode(&wc, ctoken, &len); if (ret != 0) { - log_warning("illegal character sequence in UTF8 string: %s\n", ctoken); - break; + log_info("falling back to ISO-8859-1: %s\n", cstart); + if (cursor - buflen < lbuf - 2) { + size_t inlen = 1; + len = 2; + unicode_latin1_to_utf8(cursor, &len, ctoken, &inlen); + cursor += len; + ctoken += inlen; + continue; + } } } if (escape) {