=================================================================== RCS file: /cvsrepo/anoncvs/cvs/src/usr.bin/tmux/utf8.c,v retrieving revision 1.52 retrieving revision 1.53 diff -u -r1.52 -r1.53 --- src/usr.bin/tmux/utf8.c 2020/06/02 20:10:23 1.52 +++ src/usr.bin/tmux/utf8.c 2020/06/06 12:38:32 1.53 @@ -1,4 +1,4 @@ -/* $OpenBSD: utf8.c,v 1.52 2020/06/02 20:10:23 nicm Exp $ */ +/* $OpenBSD: utf8.c,v 1.53 2020/06/06 12:38:32 nicm Exp $ */ /* * Copyright (c) 2008 Nicholas Marriott @@ -52,33 +52,12 @@ static u_int utf8_list_size; static u_int utf8_list_used; -union utf8_map { - utf8_char uc; - struct { - u_char flags; - u_char data[3]; - }; -} __packed; +#define UTF8_GET_SIZE(uc) (((uc) >> 24) & 0x1f) +#define UTF8_GET_WIDTH(flags) (((uc) >> 29) - 1) -#define UTF8_GET_SIZE(flags) ((flags) & 0x1f) -#define UTF8_GET_WIDTH(flags) (((flags) >> 5) - 1) +#define UTF8_SET_SIZE(size) (((utf8_char)(size)) << 24) +#define UTF8_SET_WIDTH(width) ((((utf8_char)(width)) + 1) << 29) -#define UTF8_SET_SIZE(size) (size) -#define UTF8_SET_WIDTH(width) ((width + 1) << 5) - -static const union utf8_map utf8_space0 = { - .flags = UTF8_SET_WIDTH(0)|UTF8_SET_SIZE(0), - .data = "" -}; -static const union utf8_map utf8_space1 = { - .flags = UTF8_SET_WIDTH(1)|UTF8_SET_SIZE(1), - .data = " " -}; -static const union utf8_map utf8_space2 = { - .flags = UTF8_SET_WIDTH(2)|UTF8_SET_SIZE(2), - .data = " " -}; - /* Get a UTF-8 item by offset. */ static struct utf8_item * utf8_get_item(const char *data, size_t size) @@ -139,34 +118,31 @@ enum utf8_state utf8_from_data(const struct utf8_data *ud, utf8_char *uc) { - union utf8_map m = { .uc = 0 }; - u_int offset; + u_int offset; if (ud->width > 2) fatalx("invalid UTF-8 width"); if (ud->size > UTF8_SIZE) goto fail; - m.flags = UTF8_SET_SIZE(ud->size)|UTF8_SET_WIDTH(ud->width); - if (ud->size <= 3) - memcpy(m.data, ud->data, ud->size); - else { - if (utf8_put_item(ud->data, ud->size, &offset) != 0) - goto fail; - m.data[0] = (offset & 0xff); - m.data[1] = (offset >> 8) & 0xff; - m.data[2] = (offset >> 16); - } - *uc = htonl(m.uc); + if (ud->size <= 3) { + offset = (((utf8_char)ud->data[2] << 16)| + ((utf8_char)ud->data[1] << 8)| + ((utf8_char)ud->data[0])); + } else if (utf8_put_item(ud->data, ud->size, &offset) != 0) + goto fail; + *uc = UTF8_SET_SIZE(ud->size)|UTF8_SET_WIDTH(ud->width)|offset; + log_debug("%s: (%d %d %.*s) -> %08x", __func__, ud->width, ud->size, + (int)ud->size, ud->data, *uc); return (UTF8_DONE); fail: if (ud->width == 0) - *uc = htonl(utf8_space0.uc); + *uc = UTF8_SET_SIZE(0)|UTF8_SET_WIDTH(0); else if (ud->width == 1) - *uc = htonl(utf8_space1.uc); + *uc = UTF8_SET_SIZE(1)|UTF8_SET_WIDTH(1)|0x20; else - *uc = htonl(utf8_space2.uc); + *uc = UTF8_SET_SIZE(1)|UTF8_SET_WIDTH(1)|0x2020; return (UTF8_ERROR); } @@ -174,37 +150,36 @@ void utf8_to_data(utf8_char uc, struct utf8_data *ud) { - union utf8_map m = { .uc = ntohl(uc) }; struct utf8_item *ui; u_int offset; memset(ud, 0, sizeof *ud); - ud->size = ud->have = UTF8_GET_SIZE(m.flags); - ud->width = UTF8_GET_WIDTH(m.flags); + ud->size = ud->have = UTF8_GET_SIZE(uc); + ud->width = UTF8_GET_WIDTH(uc); if (ud->size <= 3) { - memcpy(ud->data, m.data, ud->size); - return; + ud->data[2] = (uc >> 16); + ud->data[1] = ((uc >> 8) & 0xff); + ud->data[0] = (uc & 0xff); + } else { + offset = (uc & 0xffffff); + if (offset >= utf8_list_used) + memset(ud->data, ' ', ud->size); + else { + ui = &utf8_list[offset]; + memcpy(ud->data, ui->data, ud->size); + } } - offset = ((u_int)m.data[2] << 16)|((u_int)m.data[1] << 8)|m.data[0]; - if (offset >= utf8_list_used) - memset(ud->data, ' ', ud->size); - else { - ui = &utf8_list[offset]; - memcpy(ud->data, ui->data, ud->size); - } + log_debug("%s: %08x -> (%d %d %.*s)", __func__, uc, ud->width, ud->size, + (int)ud->size, ud->data); } /* Get UTF-8 character from a single ASCII character. */ u_int utf8_build_one(u_char ch) { - union utf8_map m; - - m.flags = UTF8_SET_SIZE(1)|UTF8_SET_WIDTH(1); - m.data[0] = ch; - return (htonl(m.uc)); + return (UTF8_SET_SIZE(1)|UTF8_SET_WIDTH(1)|ch); } /* Set a single character. */