fix truncated names that split a utf8 sequence.

This commit is contained in:
Enno Rehling 2016-11-20 14:50:38 +01:00
parent a08563e846
commit 55a0388eb2
2 changed files with 26 additions and 5 deletions

View File

@ -40,12 +40,19 @@ int unicode_utf8_trim(utf8_t *buf)
size_t size = 1;
wint_t wc = *ip;
if (wc & 0x80) {
ucs4_t ucs;
int ret = unicode_utf8_to_ucs4(&ucs, ip, &size);
if (ret != 0) {
return ret;
ucs4_t ucs = 0;
if (ip[1]) {
int ret = unicode_utf8_to_ucs4(&ucs, ip, &size);
if (ret != 0) {
return ret;
}
wc = (wint_t)ucs;
}
else {
wc = *op = '?';
size = 1;
++result;
}
wc = (wint_t)ucs;
}
if (op == buf && iswspace(wc)) {
++result;

View File

@ -28,6 +28,10 @@ static void test_unicode_trim(CuTest * tc)
strcpy(buffer, " \t Hello Word");
CuAssertIntEquals(tc, 3, unicode_utf8_trim(buffer));
CuAssertStrEquals(tc, "Hello Word", buffer);
buffer[9] = 0xc3;
CuAssertIntEquals(tc, 1, unicode_utf8_trim(buffer));
CuAssertStrEquals(tc, "Hello Wor?", buffer);
}
static void test_unicode_tolower(CuTest * tc)
@ -87,11 +91,21 @@ static void test_unicode_utf8_to_other(CuTest *tc)
CuAssertIntEquals(tc, 'l', ch);
}
static void test_unicode_utf8_to_ucs(CuTest *tc) {
ucs4_t ucs;
size_t sz;
CuAssertIntEquals(tc, 0, unicode_utf8_to_ucs4(&ucs, "a", &sz));
CuAssertIntEquals(tc, 'a', ucs);
CuAssertIntEquals(tc, 1, sz);
}
CuSuite *get_unicode_suite(void)
{
CuSuite *suite = CuSuiteNew();
SUITE_ADD_TEST(suite, test_unicode_tolower);
SUITE_ADD_TEST(suite, test_unicode_trim);
SUITE_ADD_TEST(suite, test_unicode_utf8_to_other);
SUITE_ADD_TEST(suite, test_unicode_utf8_to_ucs);
return suite;
}