===================================================================
RCS file: /cvsrepo/anoncvs/cvs/src/usr.bin/tmux/utf8.c,v
retrieving revision 1.52
retrieving revision 1.53
diff -c -r1.52 -r1.53
*** src/usr.bin/tmux/utf8.c	2020/06/02 20:10:23	1.52
--- src/usr.bin/tmux/utf8.c	2020/06/06 12:38:32	1.53
***************
*** 1,4 ****
! /* $OpenBSD: utf8.c,v 1.52 2020/06/02 20:10:23 nicm Exp $ */
  
  /*
   * Copyright (c) 2008 Nicholas Marriott <nicholas.marriott@gmail.com>
--- 1,4 ----
! /* $OpenBSD: utf8.c,v 1.53 2020/06/06 12:38:32 nicm Exp $ */
  
  /*
   * Copyright (c) 2008 Nicholas Marriott <nicholas.marriott@gmail.com>
***************
*** 52,84 ****
  static u_int		 utf8_list_size;
  static u_int		 utf8_list_used;
  
! union utf8_map {
! 	utf8_char	uc;
! 	struct {
! 		u_char	flags;
! 		u_char	data[3];
! 	};
! } __packed;
  
! #define UTF8_GET_SIZE(flags) ((flags) & 0x1f)
! #define UTF8_GET_WIDTH(flags) (((flags) >> 5) - 1)
  
- #define UTF8_SET_SIZE(size) (size)
- #define UTF8_SET_WIDTH(width) ((width + 1) << 5)
- 
- static const union utf8_map utf8_space0 = {
- 	.flags = UTF8_SET_WIDTH(0)|UTF8_SET_SIZE(0),
- 	.data = ""
- };
- static const union utf8_map utf8_space1 = {
- 	.flags = UTF8_SET_WIDTH(1)|UTF8_SET_SIZE(1),
- 	.data = " "
- };
- static const union utf8_map utf8_space2 = {
- 	.flags = UTF8_SET_WIDTH(2)|UTF8_SET_SIZE(2),
- 	.data = "  "
- };
- 
  /* Get a UTF-8 item by offset. */
  static struct utf8_item *
  utf8_get_item(const char *data, size_t size)
--- 52,63 ----
  static u_int		 utf8_list_size;
  static u_int		 utf8_list_used;
  
! #define UTF8_GET_SIZE(uc) (((uc) >> 24) & 0x1f)
! #define UTF8_GET_WIDTH(flags) (((uc) >> 29) - 1)
  
! #define UTF8_SET_SIZE(size) (((utf8_char)(size)) << 24)
! #define UTF8_SET_WIDTH(width) ((((utf8_char)(width)) + 1) << 29)
  
  /* Get a UTF-8 item by offset. */
  static struct utf8_item *
  utf8_get_item(const char *data, size_t size)
***************
*** 139,172 ****
  enum utf8_state
  utf8_from_data(const struct utf8_data *ud, utf8_char *uc)
  {
! 	union utf8_map	 m = { .uc = 0 };
! 	u_int		 offset;
  
  	if (ud->width > 2)
  		fatalx("invalid UTF-8 width");
  
  	if (ud->size > UTF8_SIZE)
  		goto fail;
! 	m.flags = UTF8_SET_SIZE(ud->size)|UTF8_SET_WIDTH(ud->width);
! 	if (ud->size <= 3)
! 		memcpy(m.data, ud->data, ud->size);
! 	else {
! 		if (utf8_put_item(ud->data, ud->size, &offset) != 0)
! 			goto fail;
! 		m.data[0] = (offset & 0xff);
! 		m.data[1] = (offset >> 8) & 0xff;
! 		m.data[2] = (offset >> 16);
! 	}
! 	*uc = htonl(m.uc);
  	return (UTF8_DONE);
  
  fail:
  	if (ud->width == 0)
! 		*uc = htonl(utf8_space0.uc);
  	else if (ud->width == 1)
! 		*uc = htonl(utf8_space1.uc);
  	else
! 		*uc = htonl(utf8_space2.uc);
  	return (UTF8_ERROR);
  }
  
--- 118,148 ----
  enum utf8_state
  utf8_from_data(const struct utf8_data *ud, utf8_char *uc)
  {
! 	u_int	offset;
  
  	if (ud->width > 2)
  		fatalx("invalid UTF-8 width");
  
  	if (ud->size > UTF8_SIZE)
  		goto fail;
! 	if (ud->size <= 3) {
! 		offset = (((utf8_char)ud->data[2] << 16)|
! 		          ((utf8_char)ud->data[1] << 8)|
! 		          ((utf8_char)ud->data[0]));
! 	} else if (utf8_put_item(ud->data, ud->size, &offset) != 0)
! 		goto fail;
! 	*uc = UTF8_SET_SIZE(ud->size)|UTF8_SET_WIDTH(ud->width)|offset;
! 	log_debug("%s: (%d %d %.*s) -> %08x", __func__, ud->width, ud->size,
! 	    (int)ud->size, ud->data, *uc);
  	return (UTF8_DONE);
  
  fail:
  	if (ud->width == 0)
! 		*uc = UTF8_SET_SIZE(0)|UTF8_SET_WIDTH(0);
  	else if (ud->width == 1)
! 		*uc = UTF8_SET_SIZE(1)|UTF8_SET_WIDTH(1)|0x20;
  	else
! 		*uc = UTF8_SET_SIZE(1)|UTF8_SET_WIDTH(1)|0x2020;
  	return (UTF8_ERROR);
  }
  
***************
*** 174,210 ****
  void
  utf8_to_data(utf8_char uc, struct utf8_data *ud)
  {
- 	union utf8_map		 m = { .uc = ntohl(uc) };
  	struct utf8_item	*ui;
  	u_int			 offset;
  
  	memset(ud, 0, sizeof *ud);
! 	ud->size = ud->have = UTF8_GET_SIZE(m.flags);
! 	ud->width = UTF8_GET_WIDTH(m.flags);
  
  	if (ud->size <= 3) {
! 		memcpy(ud->data, m.data, ud->size);
! 		return;
  	}
  
! 	offset = ((u_int)m.data[2] << 16)|((u_int)m.data[1] << 8)|m.data[0];
! 	if (offset >= utf8_list_used)
! 		memset(ud->data, ' ', ud->size);
! 	else {
! 		ui = &utf8_list[offset];
! 		memcpy(ud->data, ui->data, ud->size);
! 	}
  }
  
  /* Get UTF-8 character from a single ASCII character. */
  u_int
  utf8_build_one(u_char ch)
  {
! 	union utf8_map	m;
! 
! 	m.flags = UTF8_SET_SIZE(1)|UTF8_SET_WIDTH(1);
! 	m.data[0] = ch;
! 	return (htonl(m.uc));
  }
  
  /* Set a single character. */
--- 150,185 ----
  void
  utf8_to_data(utf8_char uc, struct utf8_data *ud)
  {
  	struct utf8_item	*ui;
  	u_int			 offset;
  
  	memset(ud, 0, sizeof *ud);
! 	ud->size = ud->have = UTF8_GET_SIZE(uc);
! 	ud->width = UTF8_GET_WIDTH(uc);
  
  	if (ud->size <= 3) {
! 		ud->data[2] = (uc >> 16);
! 		ud->data[1] = ((uc >> 8) & 0xff);
! 		ud->data[0] = (uc & 0xff);
! 	} else {
! 		offset = (uc & 0xffffff);
! 		if (offset >= utf8_list_used)
! 			memset(ud->data, ' ', ud->size);
! 		else {
! 			ui = &utf8_list[offset];
! 			memcpy(ud->data, ui->data, ud->size);
! 		}
  	}
  
! 	log_debug("%s: %08x -> (%d %d %.*s)", __func__, uc, ud->width, ud->size,
! 	    (int)ud->size, ud->data);
  }
  
  /* Get UTF-8 character from a single ASCII character. */
  u_int
  utf8_build_one(u_char ch)
  {
! 	return (UTF8_SET_SIZE(1)|UTF8_SET_WIDTH(1)|ch);
  }
  
  /* Set a single character. */