Annotation of src/usr.bin/ssh/utf8.c, Revision 1.6
1.6 ! schwarze 1: /* $OpenBSD: utf8.c,v 1.5 2017/02/19 00:10:57 djm Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2016 Ingo Schwarze <schwarze@openbsd.org>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17:
18: /*
19: * Utility functions for multibyte-character handling,
20: * in particular to sanitize untrusted strings for terminal output.
21: */
22:
23: #include <sys/types.h>
24: #include <langinfo.h>
25: #include <limits.h>
26: #include <stdarg.h>
27: #include <stdio.h>
28: #include <stdlib.h>
29: #include <string.h>
30: #include <vis.h>
31: #include <wchar.h>
32:
33: #include "utf8.h"
34:
35: static int dangerous_locale(void);
1.3 schwarze 36: static int grow_dst(char **, size_t *, size_t, char **, size_t);
1.1 schwarze 37: static int vasnmprintf(char **, size_t, int *, const char *, va_list);
38:
39:
40: /*
41: * For US-ASCII and UTF-8 encodings, we can safely recover from
42: * encoding errors and from non-printable characters. For any
43: * other encodings, err to the side of caution and abort parsing:
44: * For state-dependent encodings, recovery is impossible.
45: * For arbitrary encodings, replacement of non-printable
46: * characters would be non-trivial and too fragile.
47: */
48:
49: static int
50: dangerous_locale(void) {
51: char *loc;
52:
53: loc = nl_langinfo(CODESET);
1.5 djm 54: return strcmp(loc, "US-ASCII") != 0 && strcmp(loc, "UTF-8") != 0 &&
1.6 ! schwarze 55: strcmp(loc, "ANSI_X3.4-1968") != 0 && strcmp(loc, "646") != 0 &&
! 56: strcmp(loc, "") != 0;
1.1 schwarze 57: }
58:
1.3 schwarze 59: static int
60: grow_dst(char **dst, size_t *sz, size_t maxsz, char **dp, size_t need)
61: {
62: char *tp;
63: size_t tsz;
64:
65: if (*dp + need < *dst + *sz)
66: return 0;
67: tsz = *sz + 128;
68: if (tsz > maxsz)
69: tsz = maxsz;
70: if ((tp = realloc(*dst, tsz)) == NULL)
71: return -1;
72: *dp = tp + (*dp - *dst);
73: *dst = tp;
74: *sz = tsz;
75: return 0;
76: }
77:
1.1 schwarze 78: /*
79: * The following two functions limit the number of bytes written,
80: * including the terminating '\0', to sz. Unless wp is NULL,
81: * they limit the number of display columns occupied to *wp.
82: * Whichever is reached first terminates the output string.
83: * To stay close to the standard interfaces, they return the number of
84: * non-NUL bytes that would have been written if both were unlimited.
85: * If wp is NULL, newline, carriage return, and tab are allowed;
86: * otherwise, the actual number of columns occupied by what was
87: * written is returned in *wp.
88: */
89:
90: static int
91: vasnmprintf(char **str, size_t maxsz, int *wp, const char *fmt, va_list ap)
92: {
93: char *src; /* Source string returned from vasprintf. */
94: char *sp; /* Pointer into src. */
95: char *dst; /* Destination string to be returned. */
96: char *dp; /* Pointer into dst. */
97: char *tp; /* Temporary pointer for dst. */
98: size_t sz; /* Number of bytes allocated for dst. */
99: wchar_t wc; /* Wide character at sp. */
100: int len; /* Number of bytes in the character at sp. */
101: int ret; /* Number of bytes needed to format src. */
102: int width; /* Display width of the character wc. */
103: int total_width, max_width, print;
104:
1.2 schwarze 105: src = NULL;
106: if ((ret = vasprintf(&src, fmt, ap)) <= 0)
1.1 schwarze 107: goto fail;
108:
1.3 schwarze 109: sz = strlen(src) + 1;
1.2 schwarze 110: if ((dst = malloc(sz)) == NULL) {
111: free(src);
1.4 jsg 112: ret = -1;
1.1 schwarze 113: goto fail;
1.2 schwarze 114: }
1.1 schwarze 115:
116: if (maxsz > INT_MAX)
117: maxsz = INT_MAX;
118:
119: sp = src;
120: dp = dst;
121: ret = 0;
122: print = 1;
123: total_width = 0;
124: max_width = wp == NULL ? INT_MAX : *wp;
125: while (*sp != '\0') {
126: if ((len = mbtowc(&wc, sp, MB_CUR_MAX)) == -1) {
127: (void)mbtowc(NULL, NULL, MB_CUR_MAX);
128: if (dangerous_locale()) {
129: ret = -1;
130: break;
131: }
132: len = 1;
133: width = -1;
134: } else if (wp == NULL &&
135: (wc == L'\n' || wc == L'\r' || wc == L'\t')) {
136: /*
137: * Don't use width uninitialized; the actual
138: * value doesn't matter because total_width
139: * is only returned for wp != NULL.
140: */
141: width = 0;
142: } else if ((width = wcwidth(wc)) == -1 &&
143: dangerous_locale()) {
144: ret = -1;
145: break;
146: }
147:
148: /* Valid, printable character. */
149:
150: if (width >= 0) {
151: if (print && (dp - dst >= (int)maxsz - len ||
152: total_width > max_width - width))
153: print = 0;
154: if (print) {
1.3 schwarze 155: if (grow_dst(&dst, &sz, maxsz,
156: &dp, len) == -1) {
157: ret = -1;
158: break;
159: }
1.1 schwarze 160: total_width += width;
161: memcpy(dp, sp, len);
162: dp += len;
163: }
164: sp += len;
165: if (ret >= 0)
166: ret += len;
167: continue;
168: }
169:
170: /* Escaping required. */
171:
172: while (len > 0) {
173: if (print && (dp - dst >= (int)maxsz - 4 ||
174: total_width > max_width - 4))
175: print = 0;
176: if (print) {
1.3 schwarze 177: if (grow_dst(&dst, &sz, maxsz,
178: &dp, 4) == -1) {
179: ret = -1;
180: break;
1.1 schwarze 181: }
182: tp = vis(dp, *sp, VIS_OCTAL | VIS_ALL, 0);
183: width = tp - dp;
184: total_width += width;
185: dp = tp;
186: } else
187: width = 4;
188: len--;
189: sp++;
190: if (ret >= 0)
191: ret += width;
192: }
193: if (len > 0)
194: break;
195: }
196: free(src);
197: *dp = '\0';
198: *str = dst;
199: if (wp != NULL)
200: *wp = total_width;
201:
202: /*
203: * If the string was truncated by the width limit but
204: * would have fit into the size limit, the only sane way
205: * to report the problem is using the return value, such
206: * that the usual idiom "if (ret < 0 || ret >= sz) error"
207: * works as expected.
208: */
209:
210: if (ret < (int)maxsz && !print)
211: ret = -1;
212: return ret;
213:
214: fail:
215: if (wp != NULL)
216: *wp = 0;
1.2 schwarze 217: if (ret == 0) {
218: *str = src;
219: return 0;
220: } else {
221: *str = NULL;
222: return -1;
223: }
1.1 schwarze 224: }
225:
226: int
227: snmprintf(char *str, size_t sz, int *wp, const char *fmt, ...)
228: {
229: va_list ap;
230: char *cp;
231: int ret;
232:
233: va_start(ap, fmt);
234: ret = vasnmprintf(&cp, sz, wp, fmt, ap);
235: va_end(ap);
1.2 schwarze 236: if (cp != NULL) {
237: (void)strlcpy(str, cp, sz);
238: free(cp);
239: } else
240: *str = '\0';
1.1 schwarze 241: return ret;
242: }
243:
244: /*
245: * To stay close to the standard interfaces, the following functions
246: * return the number of non-NUL bytes written.
247: */
248:
249: int
250: vfmprintf(FILE *stream, const char *fmt, va_list ap)
251: {
252: char *str;
253: int ret;
254:
255: if ((ret = vasnmprintf(&str, INT_MAX, NULL, fmt, ap)) < 0)
256: return -1;
257: if (fputs(str, stream) == EOF)
258: ret = -1;
259: free(str);
260: return ret;
261: }
262:
263: int
264: fmprintf(FILE *stream, const char *fmt, ...)
265: {
266: va_list ap;
267: int ret;
268:
269: va_start(ap, fmt);
270: ret = vfmprintf(stream, fmt, ap);
271: va_end(ap);
272: return ret;
273: }
274:
275: int
276: mprintf(const char *fmt, ...)
277: {
278: va_list ap;
279: int ret;
280:
281: va_start(ap, fmt);
282: ret = vfmprintf(stdout, fmt, ap);
283: va_end(ap);
284: return ret;
285: }