=================================================================== RCS file: /cvsrepo/anoncvs/cvs/src/usr.bin/tmux/utf8.c,v retrieving revision 1.44 retrieving revision 1.45 diff -c -r1.44 -r1.45 *** src/usr.bin/tmux/utf8.c 2019/11/25 15:04:15 1.44 --- src/usr.bin/tmux/utf8.c 2020/05/25 09:32:10 1.45 *************** *** 1,4 **** ! /* $OpenBSD: utf8.c,v 1.44 2019/11/25 15:04:15 nicm Exp $ */ /* * Copyright (c) 2008 Nicholas Marriott --- 1,4 ---- ! /* $OpenBSD: utf8.c,v 1.45 2020/05/25 09:32:10 nicm Exp $ */ /* * Copyright (c) 2008 Nicholas Marriott *************** *** 28,33 **** --- 28,207 ---- #include "tmux.h" static int utf8_width(wchar_t); + + struct utf8_big_item { + u_int index; + RB_ENTRY(utf8_big_item) entry; + + char data[UTF8_SIZE]; + u_char size; + }; + RB_HEAD(utf8_big_tree, utf8_big_item); + + static int + utf8_big_cmp(struct utf8_big_item *bi1, struct utf8_big_item *bi2) + { + if (bi1->size < bi2->size) + return (-1); + if (bi1->size > bi2->size) + return (1); + return (memcmp(bi1->data, bi2->data, bi1->size)); + } + RB_GENERATE_STATIC(utf8_big_tree, utf8_big_item, entry, utf8_big_cmp); + static struct utf8_big_tree utf8_big_tree = RB_INITIALIZER(utf8_big_tree); + + static struct utf8_big_item *utf8_big_list; + static u_int utf8_big_list_size; + static u_int utf8_big_list_used; + + union utf8_big_map { + uint32_t value; + struct { + u_char flags; + #define UTF8_BIG_SIZE 0x1f + #define UTF8_BIG_WIDTH2 0x20 + + u_char data[3]; + }; + } __packed; + + static const union utf8_big_map utf8_big_space1 = { + .flags = 1, + .data = " " + }; + static const union utf8_big_map utf8_big_space2 = { + .flags = UTF8_BIG_WIDTH2|2, + .data = " " + }; + + /* Get a big item by index. */ + static struct utf8_big_item * + utf8_get_big_item(const char *data, size_t size) + { + struct utf8_big_item bi; + + memcpy(bi.data, data, size); + bi.size = size; + + return (RB_FIND(utf8_big_tree, &utf8_big_tree, &bi)); + } + + /* Add a big item. */ + static int + utf8_put_big_item(const char *data, size_t size, u_int *index) + { + struct utf8_big_item *bi; + + bi = utf8_get_big_item(data, size); + if (bi != NULL) { + *index = bi->index; + log_debug("%s: have %.*s at %u", __func__, (int)size, data, + *index); + return (0); + } + + if (utf8_big_list_used == utf8_big_list_size) { + if (utf8_big_list_size == 0xffffff) + return (-1); + if (utf8_big_list_size == 0) + utf8_big_list_size = 256; + else if (utf8_big_list_size > 0x7fffff) + utf8_big_list_size = 0xffffff; + else + utf8_big_list_size *= 2; + utf8_big_list = xreallocarray(utf8_big_list, utf8_big_list_size, + sizeof *utf8_big_list); + } + *index = utf8_big_list_used++; + + bi = &utf8_big_list[*index]; + bi->index = *index; + memcpy(bi->data, data, size); + bi->size = size; + RB_INSERT(utf8_big_tree, &utf8_big_tree, bi); + + log_debug("%s: added %.*s at %u", __func__, (int)size, data, *index); + return (0); + } + + /* Get UTF-8 as index into buffer. */ + uint32_t + utf8_map_big(const struct utf8_data *ud) + { + union utf8_big_map m = { .value = 0 }; + u_int o; + const char *data = ud->data; + size_t size = ud->size; + + if (ud->width != 1 && ud->width != 2) + return (utf8_big_space1.value); + + if (size > UTF8_BIG_SIZE) + goto fail; + if (size == 1) + return (utf8_set_big(data[0], 1)); + + m.flags = size; + if (ud->width == 2) + m.flags |= UTF8_BIG_WIDTH2; + + if (size <= 3) { + memcpy(&m.data, data, size); + return (m.value); + } + + if (utf8_put_big_item(data, size, &o) != 0) + goto fail; + m.data[0] = (o & 0xff); + m.data[1] = (o >> 8) & 0xff; + m.data[2] = (o >> 16); + return (m.value); + + fail: + if (ud->width == 1) + return (utf8_big_space1.value); + return (utf8_big_space2.value); + } + + /* Get UTF-8 from index into buffer. */ + void + utf8_get_big(uint32_t v, struct utf8_data *ud) + { + union utf8_big_map m = { .value = v }; + struct utf8_big_item *bi; + u_int o; + + memset(ud, 0, sizeof *ud); + ud->size = ud->have = (m.flags & UTF8_BIG_SIZE); + if (m.flags & UTF8_BIG_WIDTH2) + ud->width = 2; + else + ud->width = 1; + + if (ud->size <= 3) { + memcpy(ud->data, m.data, ud->size); + return; + } + + o = ((uint32_t)m.data[2] << 16)|((uint32_t)m.data[1] << 8)|m.data[0]; + if (o >= utf8_big_list_used) + memset(ud->data, ' ', ud->size); + else { + bi = &utf8_big_list[o]; + memcpy(ud->data, bi->data, ud->size); + } + } + + /* Get big value for UTF-8 single character. */ + uint32_t + utf8_set_big(char c, u_int width) + { + union utf8_big_map m = { .flags = 1, .data[0] = c }; + + if (width == 2) + m.flags |= UTF8_BIG_WIDTH2; + return (m.value); + } /* Set a single character. */ void