Annotation of src/usr.bin/ssh/utf8.c, Revision 1.9
1.9 ! markus 1: /* $OpenBSD: utf8.c,v 1.8 2018/08/21 13:56:27 schwarze Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2016 Ingo Schwarze <schwarze@openbsd.org>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17:
18: /*
19: * Utility functions for multibyte-character handling,
20: * in particular to sanitize untrusted strings for terminal output.
21: */
22:
23: #include <sys/types.h>
24: #include <langinfo.h>
25: #include <limits.h>
26: #include <stdarg.h>
27: #include <stdio.h>
28: #include <stdlib.h>
29: #include <string.h>
30: #include <vis.h>
31: #include <wchar.h>
32:
33: #include "utf8.h"
34:
35: static int dangerous_locale(void);
1.3 schwarze 36: static int grow_dst(char **, size_t *, size_t, char **, size_t);
1.1 schwarze 37: static int vasnmprintf(char **, size_t, int *, const char *, va_list);
38:
39:
40: /*
41: * For US-ASCII and UTF-8 encodings, we can safely recover from
42: * encoding errors and from non-printable characters. For any
43: * other encodings, err to the side of caution and abort parsing:
44: * For state-dependent encodings, recovery is impossible.
45: * For arbitrary encodings, replacement of non-printable
46: * characters would be non-trivial and too fragile.
1.8 schwarze 47: * The comments indicate what nl_langinfo(CODESET)
48: * returns for US-ASCII on various operating systems.
1.1 schwarze 49: */
50:
51: static int
52: dangerous_locale(void) {
53: char *loc;
54:
55: loc = nl_langinfo(CODESET);
1.8 schwarze 56: return strcmp(loc, "UTF-8") != 0 &&
57: strcmp(loc, "US-ASCII") != 0 && /* OpenBSD */
58: strcmp(loc, "ANSI_X3.4-1968") != 0 && /* Linux */
59: strcmp(loc, "ISO8859-1") != 0 && /* AIX */
60: strcmp(loc, "646") != 0 && /* Solaris, NetBSD */
61: strcmp(loc, "") != 0; /* Solaris 6 */
1.1 schwarze 62: }
63:
1.3 schwarze 64: static int
65: grow_dst(char **dst, size_t *sz, size_t maxsz, char **dp, size_t need)
66: {
67: char *tp;
68: size_t tsz;
69:
70: if (*dp + need < *dst + *sz)
71: return 0;
72: tsz = *sz + 128;
73: if (tsz > maxsz)
74: tsz = maxsz;
1.7 deraadt 75: if ((tp = recallocarray(*dst, *sz, tsz, 1)) == NULL)
1.3 schwarze 76: return -1;
77: *dp = tp + (*dp - *dst);
78: *dst = tp;
79: *sz = tsz;
80: return 0;
81: }
82:
1.1 schwarze 83: /*
84: * The following two functions limit the number of bytes written,
85: * including the terminating '\0', to sz. Unless wp is NULL,
86: * they limit the number of display columns occupied to *wp.
87: * Whichever is reached first terminates the output string.
88: * To stay close to the standard interfaces, they return the number of
89: * non-NUL bytes that would have been written if both were unlimited.
90: * If wp is NULL, newline, carriage return, and tab are allowed;
91: * otherwise, the actual number of columns occupied by what was
92: * written is returned in *wp.
93: */
94:
95: static int
96: vasnmprintf(char **str, size_t maxsz, int *wp, const char *fmt, va_list ap)
97: {
98: char *src; /* Source string returned from vasprintf. */
99: char *sp; /* Pointer into src. */
100: char *dst; /* Destination string to be returned. */
101: char *dp; /* Pointer into dst. */
102: char *tp; /* Temporary pointer for dst. */
103: size_t sz; /* Number of bytes allocated for dst. */
104: wchar_t wc; /* Wide character at sp. */
105: int len; /* Number of bytes in the character at sp. */
106: int ret; /* Number of bytes needed to format src. */
107: int width; /* Display width of the character wc. */
108: int total_width, max_width, print;
109:
1.2 schwarze 110: src = NULL;
111: if ((ret = vasprintf(&src, fmt, ap)) <= 0)
1.1 schwarze 112: goto fail;
113:
1.3 schwarze 114: sz = strlen(src) + 1;
1.2 schwarze 115: if ((dst = malloc(sz)) == NULL) {
116: free(src);
1.4 jsg 117: ret = -1;
1.1 schwarze 118: goto fail;
1.2 schwarze 119: }
1.1 schwarze 120:
121: if (maxsz > INT_MAX)
122: maxsz = INT_MAX;
123:
124: sp = src;
125: dp = dst;
126: ret = 0;
127: print = 1;
128: total_width = 0;
129: max_width = wp == NULL ? INT_MAX : *wp;
130: while (*sp != '\0') {
131: if ((len = mbtowc(&wc, sp, MB_CUR_MAX)) == -1) {
132: (void)mbtowc(NULL, NULL, MB_CUR_MAX);
133: if (dangerous_locale()) {
134: ret = -1;
135: break;
136: }
137: len = 1;
138: width = -1;
139: } else if (wp == NULL &&
140: (wc == L'\n' || wc == L'\r' || wc == L'\t')) {
141: /*
142: * Don't use width uninitialized; the actual
143: * value doesn't matter because total_width
144: * is only returned for wp != NULL.
145: */
146: width = 0;
147: } else if ((width = wcwidth(wc)) == -1 &&
148: dangerous_locale()) {
149: ret = -1;
150: break;
151: }
152:
153: /* Valid, printable character. */
154:
155: if (width >= 0) {
156: if (print && (dp - dst >= (int)maxsz - len ||
157: total_width > max_width - width))
158: print = 0;
159: if (print) {
1.3 schwarze 160: if (grow_dst(&dst, &sz, maxsz,
161: &dp, len) == -1) {
162: ret = -1;
163: break;
164: }
1.1 schwarze 165: total_width += width;
166: memcpy(dp, sp, len);
167: dp += len;
168: }
169: sp += len;
170: if (ret >= 0)
171: ret += len;
172: continue;
173: }
174:
175: /* Escaping required. */
176:
177: while (len > 0) {
178: if (print && (dp - dst >= (int)maxsz - 4 ||
179: total_width > max_width - 4))
180: print = 0;
181: if (print) {
1.3 schwarze 182: if (grow_dst(&dst, &sz, maxsz,
183: &dp, 4) == -1) {
184: ret = -1;
185: break;
1.1 schwarze 186: }
187: tp = vis(dp, *sp, VIS_OCTAL | VIS_ALL, 0);
188: width = tp - dp;
189: total_width += width;
190: dp = tp;
191: } else
192: width = 4;
193: len--;
194: sp++;
195: if (ret >= 0)
196: ret += width;
197: }
198: if (len > 0)
199: break;
200: }
201: free(src);
202: *dp = '\0';
203: *str = dst;
204: if (wp != NULL)
205: *wp = total_width;
206:
207: /*
208: * If the string was truncated by the width limit but
209: * would have fit into the size limit, the only sane way
210: * to report the problem is using the return value, such
211: * that the usual idiom "if (ret < 0 || ret >= sz) error"
212: * works as expected.
213: */
214:
215: if (ret < (int)maxsz && !print)
216: ret = -1;
217: return ret;
218:
219: fail:
220: if (wp != NULL)
221: *wp = 0;
1.2 schwarze 222: if (ret == 0) {
223: *str = src;
224: return 0;
225: } else {
226: *str = NULL;
227: return -1;
228: }
1.1 schwarze 229: }
230:
231: int
232: snmprintf(char *str, size_t sz, int *wp, const char *fmt, ...)
233: {
234: va_list ap;
1.9 ! markus 235: char *cp = NULL;
1.1 schwarze 236: int ret;
237:
238: va_start(ap, fmt);
239: ret = vasnmprintf(&cp, sz, wp, fmt, ap);
240: va_end(ap);
1.2 schwarze 241: if (cp != NULL) {
242: (void)strlcpy(str, cp, sz);
243: free(cp);
244: } else
245: *str = '\0';
1.1 schwarze 246: return ret;
247: }
248:
249: /*
250: * To stay close to the standard interfaces, the following functions
251: * return the number of non-NUL bytes written.
252: */
253:
254: int
255: vfmprintf(FILE *stream, const char *fmt, va_list ap)
256: {
1.9 ! markus 257: char *str = NULL;
1.1 schwarze 258: int ret;
259:
1.9 ! markus 260: if ((ret = vasnmprintf(&str, INT_MAX, NULL, fmt, ap)) < 0) {
! 261: free(str);
1.1 schwarze 262: return -1;
1.9 ! markus 263: }
1.1 schwarze 264: if (fputs(str, stream) == EOF)
265: ret = -1;
266: free(str);
267: return ret;
268: }
269:
270: int
271: fmprintf(FILE *stream, const char *fmt, ...)
272: {
273: va_list ap;
274: int ret;
275:
276: va_start(ap, fmt);
277: ret = vfmprintf(stream, fmt, ap);
278: va_end(ap);
279: return ret;
280: }
281:
282: int
283: mprintf(const char *fmt, ...)
284: {
285: va_list ap;
286: int ret;
287:
288: va_start(ap, fmt);
289: ret = vfmprintf(stdout, fmt, ap);
290: va_end(ap);
291: return ret;
292: }