=================================================================== RCS file: /cvsrepo/anoncvs/cvs/src/usr.bin/tmux/utf8.c,v retrieving revision 1.52 retrieving revision 1.53 diff -c -r1.52 -r1.53 *** src/usr.bin/tmux/utf8.c 2020/06/02 20:10:23 1.52 --- src/usr.bin/tmux/utf8.c 2020/06/06 12:38:32 1.53 *************** *** 1,4 **** ! /* $OpenBSD: utf8.c,v 1.52 2020/06/02 20:10:23 nicm Exp $ */ /* * Copyright (c) 2008 Nicholas Marriott --- 1,4 ---- ! /* $OpenBSD: utf8.c,v 1.53 2020/06/06 12:38:32 nicm Exp $ */ /* * Copyright (c) 2008 Nicholas Marriott *************** *** 52,84 **** static u_int utf8_list_size; static u_int utf8_list_used; ! union utf8_map { ! utf8_char uc; ! struct { ! u_char flags; ! u_char data[3]; ! }; ! } __packed; ! #define UTF8_GET_SIZE(flags) ((flags) & 0x1f) ! #define UTF8_GET_WIDTH(flags) (((flags) >> 5) - 1) - #define UTF8_SET_SIZE(size) (size) - #define UTF8_SET_WIDTH(width) ((width + 1) << 5) - - static const union utf8_map utf8_space0 = { - .flags = UTF8_SET_WIDTH(0)|UTF8_SET_SIZE(0), - .data = "" - }; - static const union utf8_map utf8_space1 = { - .flags = UTF8_SET_WIDTH(1)|UTF8_SET_SIZE(1), - .data = " " - }; - static const union utf8_map utf8_space2 = { - .flags = UTF8_SET_WIDTH(2)|UTF8_SET_SIZE(2), - .data = " " - }; - /* Get a UTF-8 item by offset. */ static struct utf8_item * utf8_get_item(const char *data, size_t size) --- 52,63 ---- static u_int utf8_list_size; static u_int utf8_list_used; ! #define UTF8_GET_SIZE(uc) (((uc) >> 24) & 0x1f) ! #define UTF8_GET_WIDTH(flags) (((uc) >> 29) - 1) ! #define UTF8_SET_SIZE(size) (((utf8_char)(size)) << 24) ! #define UTF8_SET_WIDTH(width) ((((utf8_char)(width)) + 1) << 29) /* Get a UTF-8 item by offset. */ static struct utf8_item * utf8_get_item(const char *data, size_t size) *************** *** 139,172 **** enum utf8_state utf8_from_data(const struct utf8_data *ud, utf8_char *uc) { ! union utf8_map m = { .uc = 0 }; ! u_int offset; if (ud->width > 2) fatalx("invalid UTF-8 width"); if (ud->size > UTF8_SIZE) goto fail; ! m.flags = UTF8_SET_SIZE(ud->size)|UTF8_SET_WIDTH(ud->width); ! if (ud->size <= 3) ! memcpy(m.data, ud->data, ud->size); ! else { ! if (utf8_put_item(ud->data, ud->size, &offset) != 0) ! goto fail; ! m.data[0] = (offset & 0xff); ! m.data[1] = (offset >> 8) & 0xff; ! m.data[2] = (offset >> 16); ! } ! *uc = htonl(m.uc); return (UTF8_DONE); fail: if (ud->width == 0) ! *uc = htonl(utf8_space0.uc); else if (ud->width == 1) ! *uc = htonl(utf8_space1.uc); else ! *uc = htonl(utf8_space2.uc); return (UTF8_ERROR); } --- 118,148 ---- enum utf8_state utf8_from_data(const struct utf8_data *ud, utf8_char *uc) { ! u_int offset; if (ud->width > 2) fatalx("invalid UTF-8 width"); if (ud->size > UTF8_SIZE) goto fail; ! if (ud->size <= 3) { ! offset = (((utf8_char)ud->data[2] << 16)| ! ((utf8_char)ud->data[1] << 8)| ! ((utf8_char)ud->data[0])); ! } else if (utf8_put_item(ud->data, ud->size, &offset) != 0) ! goto fail; ! *uc = UTF8_SET_SIZE(ud->size)|UTF8_SET_WIDTH(ud->width)|offset; ! log_debug("%s: (%d %d %.*s) -> %08x", __func__, ud->width, ud->size, ! (int)ud->size, ud->data, *uc); return (UTF8_DONE); fail: if (ud->width == 0) ! *uc = UTF8_SET_SIZE(0)|UTF8_SET_WIDTH(0); else if (ud->width == 1) ! *uc = UTF8_SET_SIZE(1)|UTF8_SET_WIDTH(1)|0x20; else ! *uc = UTF8_SET_SIZE(1)|UTF8_SET_WIDTH(1)|0x2020; return (UTF8_ERROR); } *************** *** 174,210 **** void utf8_to_data(utf8_char uc, struct utf8_data *ud) { - union utf8_map m = { .uc = ntohl(uc) }; struct utf8_item *ui; u_int offset; memset(ud, 0, sizeof *ud); ! ud->size = ud->have = UTF8_GET_SIZE(m.flags); ! ud->width = UTF8_GET_WIDTH(m.flags); if (ud->size <= 3) { ! memcpy(ud->data, m.data, ud->size); ! return; } ! offset = ((u_int)m.data[2] << 16)|((u_int)m.data[1] << 8)|m.data[0]; ! if (offset >= utf8_list_used) ! memset(ud->data, ' ', ud->size); ! else { ! ui = &utf8_list[offset]; ! memcpy(ud->data, ui->data, ud->size); ! } } /* Get UTF-8 character from a single ASCII character. */ u_int utf8_build_one(u_char ch) { ! union utf8_map m; ! ! m.flags = UTF8_SET_SIZE(1)|UTF8_SET_WIDTH(1); ! m.data[0] = ch; ! return (htonl(m.uc)); } /* Set a single character. */ --- 150,185 ---- void utf8_to_data(utf8_char uc, struct utf8_data *ud) { struct utf8_item *ui; u_int offset; memset(ud, 0, sizeof *ud); ! ud->size = ud->have = UTF8_GET_SIZE(uc); ! ud->width = UTF8_GET_WIDTH(uc); if (ud->size <= 3) { ! ud->data[2] = (uc >> 16); ! ud->data[1] = ((uc >> 8) & 0xff); ! ud->data[0] = (uc & 0xff); ! } else { ! offset = (uc & 0xffffff); ! if (offset >= utf8_list_used) ! memset(ud->data, ' ', ud->size); ! else { ! ui = &utf8_list[offset]; ! memcpy(ud->data, ui->data, ud->size); ! } } ! log_debug("%s: %08x -> (%d %d %.*s)", __func__, uc, ud->width, ud->size, ! (int)ud->size, ud->data); } /* Get UTF-8 character from a single ASCII character. */ u_int utf8_build_one(u_char ch) { ! return (UTF8_SET_SIZE(1)|UTF8_SET_WIDTH(1)|ch); } /* Set a single character. */