fix truncated names that split a utf8 sequence.

2016-11-20 14:50:38 +01:00 · 2016-11-20 14:50:38 +01:00 · 55a0388eb2
parent a08563e846
commit 55a0388eb2
2 changed files with 26 additions and 5 deletions
--- a/src/util/unicode.c
+++ b/src/util/unicode.c
@ -40,12 +40,19 @@ int unicode_utf8_trim(utf8_t *buf)
        size_t size = 1;
        wint_t wc = *ip;
        if (wc & 0x80) {
-            ucs4_t ucs;
-            int ret = unicode_utf8_to_ucs4(&ucs, ip, &size);
-            if (ret != 0) {
-                return ret;
+            ucs4_t ucs = 0;
+            if (ip[1]) {
+                int ret = unicode_utf8_to_ucs4(&ucs, ip, &size);
+                if (ret != 0) {
+                    return ret;
+                }
+                wc = (wint_t)ucs;
+            }
+            else {
+                wc = *op = '?';
+                size = 1;
+                ++result;
            }
-            wc = (wint_t)ucs;
        }
        if (op == buf && iswspace(wc)) {
            ++result;
--- a/src/util/unicode.test.c
+++ b/src/util/unicode.test.c
@ -28,6 +28,10 @@ static void test_unicode_trim(CuTest * tc)
    strcpy(buffer, " \t Hello Word");
    CuAssertIntEquals(tc, 3, unicode_utf8_trim(buffer));
    CuAssertStrEquals(tc, "Hello Word", buffer);
+
+    buffer[9] = 0xc3;
+    CuAssertIntEquals(tc, 1, unicode_utf8_trim(buffer));
+    CuAssertStrEquals(tc, "Hello Wor?", buffer);
 }

 static void test_unicode_tolower(CuTest * tc)
@ -87,11 +91,21 @@ static void test_unicode_utf8_to_other(CuTest *tc)
    CuAssertIntEquals(tc, 'l', ch);
 }

+static void test_unicode_utf8_to_ucs(CuTest *tc) {
+    ucs4_t ucs;
+    size_t sz;
+
+    CuAssertIntEquals(tc, 0, unicode_utf8_to_ucs4(&ucs, "a", &sz));
+    CuAssertIntEquals(tc, 'a', ucs);
+    CuAssertIntEquals(tc, 1, sz);
+}
+
 CuSuite *get_unicode_suite(void)
 {
    CuSuite *suite = CuSuiteNew();
    SUITE_ADD_TEST(suite, test_unicode_tolower);
    SUITE_ADD_TEST(suite, test_unicode_trim);
    SUITE_ADD_TEST(suite, test_unicode_utf8_to_other);
+    SUITE_ADD_TEST(suite, test_unicode_utf8_to_ucs);
    return suite;
 }