diff --git a/src/util/unicode.c b/src/util/unicode.c index f9a6d48ec..161983ba7 100644 --- a/src/util/unicode.c +++ b/src/util/unicode.c @@ -40,12 +40,19 @@ int unicode_utf8_trim(utf8_t *buf) size_t size = 1; wint_t wc = *ip; if (wc & 0x80) { - ucs4_t ucs; - int ret = unicode_utf8_to_ucs4(&ucs, ip, &size); - if (ret != 0) { - return ret; + ucs4_t ucs = 0; + if (ip[1]) { + int ret = unicode_utf8_to_ucs4(&ucs, ip, &size); + if (ret != 0) { + return ret; + } + wc = (wint_t)ucs; + } + else { + wc = *op = '?'; + size = 1; + ++result; } - wc = (wint_t)ucs; } if (op == buf && iswspace(wc)) { ++result; diff --git a/src/util/unicode.test.c b/src/util/unicode.test.c index 679c19d6b..e16ec7f96 100644 --- a/src/util/unicode.test.c +++ b/src/util/unicode.test.c @@ -28,6 +28,10 @@ static void test_unicode_trim(CuTest * tc) strcpy(buffer, " \t Hello Word"); CuAssertIntEquals(tc, 3, unicode_utf8_trim(buffer)); CuAssertStrEquals(tc, "Hello Word", buffer); + + buffer[9] = 0xc3; + CuAssertIntEquals(tc, 1, unicode_utf8_trim(buffer)); + CuAssertStrEquals(tc, "Hello Wor?", buffer); } static void test_unicode_tolower(CuTest * tc) @@ -87,11 +91,21 @@ static void test_unicode_utf8_to_other(CuTest *tc) CuAssertIntEquals(tc, 'l', ch); } +static void test_unicode_utf8_to_ucs(CuTest *tc) { + ucs4_t ucs; + size_t sz; + + CuAssertIntEquals(tc, 0, unicode_utf8_to_ucs4(&ucs, "a", &sz)); + CuAssertIntEquals(tc, 'a', ucs); + CuAssertIntEquals(tc, 1, sz); +} + CuSuite *get_unicode_suite(void) { CuSuite *suite = CuSuiteNew(); SUITE_ADD_TEST(suite, test_unicode_tolower); SUITE_ADD_TEST(suite, test_unicode_trim); SUITE_ADD_TEST(suite, test_unicode_utf8_to_other); + SUITE_ADD_TEST(suite, test_unicode_utf8_to_ucs); return suite; }