Annotation of src/usr.bin/ssh/utf8.c, Revision 1.1
1.1 ! schwarze 1: /* $OpenBSD$ */
! 2: /*
! 3: * Copyright (c) 2016 Ingo Schwarze <schwarze@openbsd.org>
! 4: *
! 5: * Permission to use, copy, modify, and distribute this software for any
! 6: * purpose with or without fee is hereby granted, provided that the above
! 7: * copyright notice and this permission notice appear in all copies.
! 8: *
! 9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
! 10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
! 11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
! 12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
! 13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
! 14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
! 15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
! 16: */
! 17:
! 18: /*
! 19: * Utility functions for multibyte-character handling,
! 20: * in particular to sanitize untrusted strings for terminal output.
! 21: */
! 22:
! 23: #include <sys/types.h>
! 24: #include <langinfo.h>
! 25: #include <limits.h>
! 26: #include <stdarg.h>
! 27: #include <stdio.h>
! 28: #include <stdlib.h>
! 29: #include <string.h>
! 30: #include <vis.h>
! 31: #include <wchar.h>
! 32:
! 33: #include "utf8.h"
! 34:
! 35: static int dangerous_locale(void);
! 36: static int vasnmprintf(char **, size_t, int *, const char *, va_list);
! 37:
! 38:
! 39: /*
! 40: * For US-ASCII and UTF-8 encodings, we can safely recover from
! 41: * encoding errors and from non-printable characters. For any
! 42: * other encodings, err to the side of caution and abort parsing:
! 43: * For state-dependent encodings, recovery is impossible.
! 44: * For arbitrary encodings, replacement of non-printable
! 45: * characters would be non-trivial and too fragile.
! 46: */
! 47:
! 48: static int
! 49: dangerous_locale(void) {
! 50: char *loc;
! 51:
! 52: loc = nl_langinfo(CODESET);
! 53: return strcmp(loc, "US-ASCII") && strcmp(loc, "UTF-8");
! 54: }
! 55:
! 56: /*
! 57: * The following two functions limit the number of bytes written,
! 58: * including the terminating '\0', to sz. Unless wp is NULL,
! 59: * they limit the number of display columns occupied to *wp.
! 60: * Whichever is reached first terminates the output string.
! 61: * To stay close to the standard interfaces, they return the number of
! 62: * non-NUL bytes that would have been written if both were unlimited.
! 63: * If wp is NULL, newline, carriage return, and tab are allowed;
! 64: * otherwise, the actual number of columns occupied by what was
! 65: * written is returned in *wp.
! 66: */
! 67:
! 68: static int
! 69: vasnmprintf(char **str, size_t maxsz, int *wp, const char *fmt, va_list ap)
! 70: {
! 71: char *src; /* Source string returned from vasprintf. */
! 72: char *sp; /* Pointer into src. */
! 73: char *dst; /* Destination string to be returned. */
! 74: char *dp; /* Pointer into dst. */
! 75: char *tp; /* Temporary pointer for dst. */
! 76: size_t sz; /* Number of bytes allocated for dst. */
! 77: size_t tsz; /* Temporary size while extending dst. */
! 78: wchar_t wc; /* Wide character at sp. */
! 79: int len; /* Number of bytes in the character at sp. */
! 80: int ret; /* Number of bytes needed to format src. */
! 81: int width; /* Display width of the character wc. */
! 82: int total_width, max_width, print;
! 83:
! 84: src = dst = NULL;
! 85: if (vasprintf(&src, fmt, ap) <= 0)
! 86: goto fail;
! 87:
! 88: sz = strlen(src);
! 89: if ((dst = malloc(sz)) == NULL)
! 90: goto fail;
! 91:
! 92: if (maxsz > INT_MAX)
! 93: maxsz = INT_MAX;
! 94:
! 95: sp = src;
! 96: dp = dst;
! 97: ret = 0;
! 98: print = 1;
! 99: total_width = 0;
! 100: max_width = wp == NULL ? INT_MAX : *wp;
! 101: while (*sp != '\0') {
! 102: if ((len = mbtowc(&wc, sp, MB_CUR_MAX)) == -1) {
! 103: (void)mbtowc(NULL, NULL, MB_CUR_MAX);
! 104: if (dangerous_locale()) {
! 105: ret = -1;
! 106: break;
! 107: }
! 108: len = 1;
! 109: width = -1;
! 110: } else if (wp == NULL &&
! 111: (wc == L'\n' || wc == L'\r' || wc == L'\t')) {
! 112: /*
! 113: * Don't use width uninitialized; the actual
! 114: * value doesn't matter because total_width
! 115: * is only returned for wp != NULL.
! 116: */
! 117: width = 0;
! 118: } else if ((width = wcwidth(wc)) == -1 &&
! 119: dangerous_locale()) {
! 120: ret = -1;
! 121: break;
! 122: }
! 123:
! 124: /* Valid, printable character. */
! 125:
! 126: if (width >= 0) {
! 127: if (print && (dp - dst >= (int)maxsz - len ||
! 128: total_width > max_width - width))
! 129: print = 0;
! 130: if (print) {
! 131: total_width += width;
! 132: memcpy(dp, sp, len);
! 133: dp += len;
! 134: }
! 135: sp += len;
! 136: if (ret >= 0)
! 137: ret += len;
! 138: continue;
! 139: }
! 140:
! 141: /* Escaping required. */
! 142:
! 143: while (len > 0) {
! 144: if (print && (dp - dst >= (int)maxsz - 4 ||
! 145: total_width > max_width - 4))
! 146: print = 0;
! 147: if (print) {
! 148: if (dp + 4 >= dst + sz) {
! 149: tsz = sz + 128;
! 150: if (tsz > maxsz)
! 151: tsz = maxsz;
! 152: tp = realloc(dst, tsz);
! 153: if (tp == NULL) {
! 154: ret = -1;
! 155: break;
! 156: }
! 157: dp = tp + (dp - dst);
! 158: dst = tp;
! 159: sz = tsz;
! 160: }
! 161: tp = vis(dp, *sp, VIS_OCTAL | VIS_ALL, 0);
! 162: width = tp - dp;
! 163: total_width += width;
! 164: dp = tp;
! 165: } else
! 166: width = 4;
! 167: len--;
! 168: sp++;
! 169: if (ret >= 0)
! 170: ret += width;
! 171: }
! 172: if (len > 0)
! 173: break;
! 174: }
! 175: free(src);
! 176: *dp = '\0';
! 177: *str = dst;
! 178: if (wp != NULL)
! 179: *wp = total_width;
! 180:
! 181: /*
! 182: * If the string was truncated by the width limit but
! 183: * would have fit into the size limit, the only sane way
! 184: * to report the problem is using the return value, such
! 185: * that the usual idiom "if (ret < 0 || ret >= sz) error"
! 186: * works as expected.
! 187: */
! 188:
! 189: if (ret < (int)maxsz && !print)
! 190: ret = -1;
! 191: return ret;
! 192:
! 193: fail:
! 194: free(src);
! 195: free(dst);
! 196: *str = NULL;
! 197: if (wp != NULL)
! 198: *wp = 0;
! 199: return -1;
! 200: }
! 201:
! 202: int
! 203: snmprintf(char *str, size_t sz, int *wp, const char *fmt, ...)
! 204: {
! 205: va_list ap;
! 206: char *cp;
! 207: int ret;
! 208:
! 209: va_start(ap, fmt);
! 210: ret = vasnmprintf(&cp, sz, wp, fmt, ap);
! 211: va_end(ap);
! 212: (void)strlcpy(str, cp, sz);
! 213: free(cp);
! 214: return ret;
! 215: }
! 216:
! 217: /*
! 218: * To stay close to the standard interfaces, the following functions
! 219: * return the number of non-NUL bytes written.
! 220: */
! 221:
! 222: int
! 223: vfmprintf(FILE *stream, const char *fmt, va_list ap)
! 224: {
! 225: char *str;
! 226: int ret;
! 227:
! 228: if ((ret = vasnmprintf(&str, INT_MAX, NULL, fmt, ap)) < 0)
! 229: return -1;
! 230: if (fputs(str, stream) == EOF)
! 231: ret = -1;
! 232: free(str);
! 233: return ret;
! 234: }
! 235:
! 236: int
! 237: fmprintf(FILE *stream, const char *fmt, ...)
! 238: {
! 239: va_list ap;
! 240: int ret;
! 241:
! 242: va_start(ap, fmt);
! 243: ret = vfmprintf(stream, fmt, ap);
! 244: va_end(ap);
! 245: return ret;
! 246: }
! 247:
! 248: int
! 249: mprintf(const char *fmt, ...)
! 250: {
! 251: va_list ap;
! 252: int ret;
! 253:
! 254: va_start(ap, fmt);
! 255: ret = vfmprintf(stdout, fmt, ap);
! 256: va_end(ap);
! 257: return ret;
! 258: }