version 1.20, 2015/11/13 08:09:28 |
version 1.21, 2015/11/14 10:56:31 |
|
|
/* |
/* |
* Append character to UTF-8, closing if finished. |
* Append character to UTF-8, closing if finished. |
* |
* |
* Returns 1 if more UTF-8 data to come, 0 if finished. |
* Returns 1 if more UTF-8 data to come, 0 if finished and valid, -1 if |
|
* finished and invalid. |
*/ |
*/ |
int |
int |
utf8_append(struct utf8_data *ud, u_char ch) |
utf8_append(struct utf8_data *ud, u_char ch) |
{ |
{ |
/* XXX this should do validity checks too! */ |
|
|
|
if (ud->have >= ud->size) |
if (ud->have >= ud->size) |
fatalx("UTF-8 character overflow"); |
fatalx("UTF-8 character overflow"); |
if (ud->size > sizeof ud->data) |
if (ud->size > sizeof ud->data) |
fatalx("UTF-8 character size too large"); |
fatalx("UTF-8 character size too large"); |
|
|
|
if (ud->have != 0 && (ch & 0xc0) != 0x80) |
|
ud->width = 0xff; |
|
|
ud->data[ud->have++] = ch; |
ud->data[ud->have++] = ch; |
if (ud->have != ud->size) |
if (ud->have != ud->size) |
return (1); |
return (1); |
|
|
|
if (ud->width == 0xff) |
|
return (-1); |
ud->width = utf8_width(utf8_combine(ud)); |
ud->width = utf8_width(utf8_combine(ud)); |
return (0); |
return (0); |
} |
} |
|
|
while (src < end) { |
while (src < end) { |
if (utf8_open(&ud, *src)) { |
if (utf8_open(&ud, *src)) { |
more = 1; |
more = 1; |
while (++src < end && more) |
while (++src < end && more == 1) |
more = utf8_append(&ud, *src); |
more = utf8_append(&ud, *src); |
if (!more) { |
if (more == 0) { |
/* UTF-8 character finished. */ |
/* UTF-8 character finished. */ |
for (i = 0; i < ud.size; i++) |
for (i = 0; i < ud.size; i++) |
*dst++ = ud.data[i]; |
*dst++ = ud.data[i]; |
continue; |
continue; |
} else if (ud.have > 0) { |
} else if (ud.have > 0) { |
/* Not a complete UTF-8 character. */ |
/* Not a complete, valid UTF-8 character. */ |
src -= ud.have; |
src -= ud.have; |
} |
} |
} |
} |
|
|
dst = xreallocarray(dst, n + 1, sizeof *dst); |
dst = xreallocarray(dst, n + 1, sizeof *dst); |
if (utf8_open(&ud, *src)) { |
if (utf8_open(&ud, *src)) { |
more = 1; |
more = 1; |
while (*++src != '\0' && more) |
while (*++src != '\0' && more == 1) |
more = utf8_append(&ud, *src); |
more = utf8_append(&ud, *src); |
if (!more) { |
if (more != 1) { |
dst = xreallocarray(dst, n + ud.width, |
dst = xreallocarray(dst, n + ud.width, |
sizeof *dst); |
sizeof *dst); |
for (i = 0; i < ud.width; i++) |
for (i = 0; i < ud.width; i++) |
|
|
src -= ud.have; |
src -= ud.have; |
} |
} |
if (*src > 0x1f && *src < 0x7f) |
if (*src > 0x1f && *src < 0x7f) |
dst[n] = *src; |
dst[n++] = *src; |
src++; |
src++; |
|
|
n++; |
|
} |
} |
|
|
dst = xreallocarray(dst, n + 1, sizeof *dst); |
dst = xreallocarray(dst, n + 1, sizeof *dst); |
|
|
dst = xreallocarray(dst, n + 1, sizeof *dst); |
dst = xreallocarray(dst, n + 1, sizeof *dst); |
if (utf8_open(&dst[n], *src)) { |
if (utf8_open(&dst[n], *src)) { |
more = 1; |
more = 1; |
while (*++src != '\0' && more) |
while (*++src != '\0' && more == 1) |
more = utf8_append(&dst[n], *src); |
more = utf8_append(&dst[n], *src); |
if (!more) { |
if (more != 1) { |
n++; |
n++; |
continue; |
continue; |
} |
} |
src -= dst[n].have; |
src -= dst[n].have; |
} |
} |
utf8_set(&dst[n], *src); |
if (*src > 0x1f && *src < 0x7f) { |
|
utf8_set(&dst[n], *src); |
|
n++; |
|
} |
src++; |
src++; |
|
|
n++; |
|
} |
} |
|
|
dst = xreallocarray(dst, n + 1, sizeof *dst); |
dst = xreallocarray(dst, n + 1, sizeof *dst); |
|
|
while (*s != '\0') { |
while (*s != '\0') { |
if (utf8_open(&tmp, *s)) { |
if (utf8_open(&tmp, *s)) { |
more = 1; |
more = 1; |
while (*++s != '\0' && more) |
while (*++s != '\0' && more == 1) |
more = utf8_append(&tmp, *s); |
more = utf8_append(&tmp, *s); |
if (!more) { |
if (more != 1) { |
width += tmp.width; |
width += tmp.width; |
continue; |
continue; |
} |
} |
s -= tmp.have; |
s -= tmp.have; |
} |
} |
width++; |
if (*s > 0x1f && *s < 0x7f) |
|
width++; |
s++; |
s++; |
} |
} |
return (width); |
return (width); |