===================================================================
RCS file: /cvsrepo/anoncvs/cvs/src/usr.bin/tmux/utf8.c,v
retrieving revision 1.53
retrieving revision 1.54
diff -c -r1.53 -r1.54
*** src/usr.bin/tmux/utf8.c	2020/06/06 12:38:32	1.53
--- src/usr.bin/tmux/utf8.c	2020/06/09 08:34:33	1.54
***************
*** 1,4 ****
! /* $OpenBSD: utf8.c,v 1.53 2020/06/06 12:38:32 nicm Exp $ */
  
  /*
   * Copyright (c) 2008 Nicholas Marriott <nicholas.marriott@gmail.com>
--- 1,4 ----
! /* $OpenBSD: utf8.c,v 1.54 2020/06/09 08:34:33 nicm Exp $ */
  
  /*
   * Copyright (c) 2008 Nicholas Marriott <nicholas.marriott@gmail.com>
***************
*** 28,43 ****
  #include "tmux.h"
  
  struct utf8_item {
! 	u_int			offset;
! 	RB_ENTRY(utf8_item)	entry;
  
  	char			data[UTF8_SIZE];
  	u_char			size;
  };
- RB_HEAD(utf8_tree, utf8_item);
  
  static int
! utf8_cmp(struct utf8_item *ui1, struct utf8_item *ui2)
  {
  	if (ui1->size < ui2->size)
  		return (-1);
--- 28,43 ----
  #include "tmux.h"
  
  struct utf8_item {
! 	RB_ENTRY(utf8_item)	index_entry;
! 	u_int			index;
  
+ 	RB_ENTRY(utf8_item)	data_entry;
  	char			data[UTF8_SIZE];
  	u_char			size;
  };
  
  static int
! utf8_data_cmp(struct utf8_item *ui1, struct utf8_item *ui2)
  {
  	if (ui1->size < ui2->size)
  		return (-1);
***************
*** 45,116 ****
  		return (1);
  	return (memcmp(ui1->data, ui2->data, ui1->size));
  }
! RB_GENERATE_STATIC(utf8_tree, utf8_item, entry, utf8_cmp);
! static struct utf8_tree utf8_tree = RB_INITIALIZER(utf8_tree);
  
! static struct utf8_item *utf8_list;
! static u_int		 utf8_list_size;
! static u_int		 utf8_list_used;
  
  #define UTF8_GET_SIZE(uc) (((uc) >> 24) & 0x1f)
  #define UTF8_GET_WIDTH(flags) (((uc) >> 29) - 1)
  
  #define UTF8_SET_SIZE(size) (((utf8_char)(size)) << 24)
  #define UTF8_SET_WIDTH(width) ((((utf8_char)(width)) + 1) << 29)
  
! /* Get a UTF-8 item by offset. */
  static struct utf8_item *
! utf8_get_item(const char *data, size_t size)
  {
  	struct utf8_item	ui;
  
  	memcpy(ui.data, data, size);
  	ui.size = size;
  
! 	return (RB_FIND(utf8_tree, &utf8_tree, &ui));
  }
  
! /* Expand UTF-8 list. */
! static int
! utf8_expand_list(void)
  {
! 	if (utf8_list_size == 0xffffff)
! 		return (-1);
! 	if (utf8_list_size == 0)
! 		utf8_list_size = 256;
! 	else if (utf8_list_size > 0x7fffff)
! 		utf8_list_size = 0xffffff;
! 	else
! 		utf8_list_size *= 2;
! 	utf8_list = xreallocarray(utf8_list, utf8_list_size, sizeof *utf8_list);
! 	return (0);
  }
  
  /* Add a UTF-8 item. */
  static int
! utf8_put_item(const char *data, size_t size, u_int *offset)
  {
  	struct utf8_item	*ui;
  
! 	ui = utf8_get_item(data, size);
  	if (ui != NULL) {
! 		*offset = ui->offset;
! 		log_debug("%s: have %.*s at %u", __func__, (int)size, data,
! 		    *offset);
  		return (0);
  	}
  
! 	if (utf8_list_used == utf8_list_size && utf8_expand_list() != 0)
  		return (-1);
- 	*offset = utf8_list_used++;
  
! 	ui = &utf8_list[*offset];
! 	ui->offset = *offset;
  	memcpy(ui->data, data, size);
  	ui->size = size;
! 	RB_INSERT(utf8_tree, &utf8_tree, ui);
  
! 	log_debug("%s: added %.*s at %u", __func__, (int)size, data, *offset);
  	return (0);
  }
  
--- 45,125 ----
  		return (1);
  	return (memcmp(ui1->data, ui2->data, ui1->size));
  }
! RB_HEAD(utf8_data_tree, utf8_item);
! RB_GENERATE_STATIC(utf8_data_tree, utf8_item, data_entry, utf8_data_cmp);
! static struct utf8_data_tree utf8_data_tree = RB_INITIALIZER(utf8_data_tree);
  
! static int
! utf8_index_cmp(struct utf8_item *ui1, struct utf8_item *ui2)
! {
! 	if (ui1->index < ui2->index)
! 		return (-1);
! 	if (ui1->index > ui2->index)
! 		return (1);
! 	return (0);
! }
! RB_HEAD(utf8_index_tree, utf8_item);
! RB_GENERATE_STATIC(utf8_index_tree, utf8_item, index_entry, utf8_index_cmp);
! static struct utf8_index_tree utf8_index_tree = RB_INITIALIZER(utf8_index_tree);
  
+ static u_int utf8_next_index;
+ 
  #define UTF8_GET_SIZE(uc) (((uc) >> 24) & 0x1f)
  #define UTF8_GET_WIDTH(flags) (((uc) >> 29) - 1)
  
  #define UTF8_SET_SIZE(size) (((utf8_char)(size)) << 24)
  #define UTF8_SET_WIDTH(width) ((((utf8_char)(width)) + 1) << 29)
  
! /* Get a UTF-8 item from data. */
  static struct utf8_item *
! utf8_item_by_data(const char *data, size_t size)
  {
  	struct utf8_item	ui;
  
  	memcpy(ui.data, data, size);
  	ui.size = size;
  
! 	return (RB_FIND(utf8_data_tree, &utf8_data_tree, &ui));
  }
  
! /* Get a UTF-8 item from data. */
! static struct utf8_item *
! utf8_item_by_index(u_int index)
  {
! 	struct utf8_item	ui;
! 
! 	ui.index = index;
! 
! 	return (RB_FIND(utf8_index_tree, &utf8_index_tree, &ui));
  }
  
  /* Add a UTF-8 item. */
  static int
! utf8_put_item(const char *data, size_t size, u_int *index)
  {
  	struct utf8_item	*ui;
  
! 	ui = utf8_item_by_data(data, size);
  	if (ui != NULL) {
! 		log_debug("%s: found %.*s = %u", __func__, (int)size, data,
! 		    *index);
! 		*index = ui->index;
  		return (0);
  	}
  
! 	if (utf8_next_index == 0xffffff + 1)
  		return (-1);
  
! 	ui = xcalloc(1, sizeof *ui);
! 	ui->index = utf8_next_index++;
! 	RB_INSERT(utf8_index_tree, &utf8_index_tree, ui);
! 
  	memcpy(ui->data, data, size);
  	ui->size = size;
! 	RB_INSERT(utf8_data_tree, &utf8_data_tree, ui);
  
! 	log_debug("%s: added %.*s = %u", __func__, (int)size, data, *index);
! 	*index = ui->index;
  	return (0);
  }
  
***************
*** 118,124 ****
  enum utf8_state
  utf8_from_data(const struct utf8_data *ud, utf8_char *uc)
  {
! 	u_int	offset;
  
  	if (ud->width > 2)
  		fatalx("invalid UTF-8 width");
--- 127,133 ----
  enum utf8_state
  utf8_from_data(const struct utf8_data *ud, utf8_char *uc)
  {
! 	u_int	index;
  
  	if (ud->width > 2)
  		fatalx("invalid UTF-8 width");
***************
*** 126,137 ****
  	if (ud->size > UTF8_SIZE)
  		goto fail;
  	if (ud->size <= 3) {
! 		offset = (((utf8_char)ud->data[2] << 16)|
  		          ((utf8_char)ud->data[1] << 8)|
  		          ((utf8_char)ud->data[0]));
! 	} else if (utf8_put_item(ud->data, ud->size, &offset) != 0)
  		goto fail;
! 	*uc = UTF8_SET_SIZE(ud->size)|UTF8_SET_WIDTH(ud->width)|offset;
  	log_debug("%s: (%d %d %.*s) -> %08x", __func__, ud->width, ud->size,
  	    (int)ud->size, ud->data, *uc);
  	return (UTF8_DONE);
--- 135,146 ----
  	if (ud->size > UTF8_SIZE)
  		goto fail;
  	if (ud->size <= 3) {
! 		index = (((utf8_char)ud->data[2] << 16)|
  		          ((utf8_char)ud->data[1] << 8)|
  		          ((utf8_char)ud->data[0]));
! 	} else if (utf8_put_item(ud->data, ud->size, &index) != 0)
  		goto fail;
! 	*uc = UTF8_SET_SIZE(ud->size)|UTF8_SET_WIDTH(ud->width)|index;
  	log_debug("%s: (%d %d %.*s) -> %08x", __func__, ud->width, ud->size,
  	    (int)ud->size, ud->data, *uc);
  	return (UTF8_DONE);
***************
*** 151,157 ****
  utf8_to_data(utf8_char uc, struct utf8_data *ud)
  {
  	struct utf8_item	*ui;
! 	u_int			 offset;
  
  	memset(ud, 0, sizeof *ud);
  	ud->size = ud->have = UTF8_GET_SIZE(uc);
--- 160,166 ----
  utf8_to_data(utf8_char uc, struct utf8_data *ud)
  {
  	struct utf8_item	*ui;
! 	u_int			 index;
  
  	memset(ud, 0, sizeof *ud);
  	ud->size = ud->have = UTF8_GET_SIZE(uc);
***************
*** 162,174 ****
  		ud->data[1] = ((uc >> 8) & 0xff);
  		ud->data[0] = (uc & 0xff);
  	} else {
! 		offset = (uc & 0xffffff);
! 		if (offset >= utf8_list_used)
  			memset(ud->data, ' ', ud->size);
! 		else {
! 			ui = &utf8_list[offset];
  			memcpy(ud->data, ui->data, ud->size);
- 		}
  	}
  
  	log_debug("%s: %08x -> (%d %d %.*s)", __func__, uc, ud->width, ud->size,
--- 171,181 ----
  		ud->data[1] = ((uc >> 8) & 0xff);
  		ud->data[0] = (uc & 0xff);
  	} else {
! 		index = (uc & 0xffffff);
! 		if ((ui = utf8_item_by_index(index)) == NULL)
  			memset(ud->data, ' ', ud->size);
! 		else
  			memcpy(ud->data, ui->data, ud->size);
  	}
  
  	log_debug("%s: %08x -> (%d %d %.*s)", __func__, uc, ud->width, ud->size,