Annotation of src/usr.bin/ssh/utf8.c, Revision 1.4
1.4 ! jsg 1: /* $OpenBSD: utf8.c,v 1.3 2016/05/30 12:57:21 schwarze Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2016 Ingo Schwarze <schwarze@openbsd.org>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17:
18: /*
19: * Utility functions for multibyte-character handling,
20: * in particular to sanitize untrusted strings for terminal output.
21: */
22:
23: #include <sys/types.h>
24: #include <langinfo.h>
25: #include <limits.h>
26: #include <stdarg.h>
27: #include <stdio.h>
28: #include <stdlib.h>
29: #include <string.h>
30: #include <vis.h>
31: #include <wchar.h>
32:
33: #include "utf8.h"
34:
35: static int dangerous_locale(void);
1.3 schwarze 36: static int grow_dst(char **, size_t *, size_t, char **, size_t);
1.1 schwarze 37: static int vasnmprintf(char **, size_t, int *, const char *, va_list);
38:
39:
40: /*
41: * For US-ASCII and UTF-8 encodings, we can safely recover from
42: * encoding errors and from non-printable characters. For any
43: * other encodings, err to the side of caution and abort parsing:
44: * For state-dependent encodings, recovery is impossible.
45: * For arbitrary encodings, replacement of non-printable
46: * characters would be non-trivial and too fragile.
47: */
48:
49: static int
50: dangerous_locale(void) {
51: char *loc;
52:
53: loc = nl_langinfo(CODESET);
54: return strcmp(loc, "US-ASCII") && strcmp(loc, "UTF-8");
55: }
56:
1.3 schwarze 57: static int
58: grow_dst(char **dst, size_t *sz, size_t maxsz, char **dp, size_t need)
59: {
60: char *tp;
61: size_t tsz;
62:
63: if (*dp + need < *dst + *sz)
64: return 0;
65: tsz = *sz + 128;
66: if (tsz > maxsz)
67: tsz = maxsz;
68: if ((tp = realloc(*dst, tsz)) == NULL)
69: return -1;
70: *dp = tp + (*dp - *dst);
71: *dst = tp;
72: *sz = tsz;
73: return 0;
74: }
75:
1.1 schwarze 76: /*
77: * The following two functions limit the number of bytes written,
78: * including the terminating '\0', to sz. Unless wp is NULL,
79: * they limit the number of display columns occupied to *wp.
80: * Whichever is reached first terminates the output string.
81: * To stay close to the standard interfaces, they return the number of
82: * non-NUL bytes that would have been written if both were unlimited.
83: * If wp is NULL, newline, carriage return, and tab are allowed;
84: * otherwise, the actual number of columns occupied by what was
85: * written is returned in *wp.
86: */
87:
88: static int
89: vasnmprintf(char **str, size_t maxsz, int *wp, const char *fmt, va_list ap)
90: {
91: char *src; /* Source string returned from vasprintf. */
92: char *sp; /* Pointer into src. */
93: char *dst; /* Destination string to be returned. */
94: char *dp; /* Pointer into dst. */
95: char *tp; /* Temporary pointer for dst. */
96: size_t sz; /* Number of bytes allocated for dst. */
97: wchar_t wc; /* Wide character at sp. */
98: int len; /* Number of bytes in the character at sp. */
99: int ret; /* Number of bytes needed to format src. */
100: int width; /* Display width of the character wc. */
101: int total_width, max_width, print;
102:
1.2 schwarze 103: src = NULL;
104: if ((ret = vasprintf(&src, fmt, ap)) <= 0)
1.1 schwarze 105: goto fail;
106:
1.3 schwarze 107: sz = strlen(src) + 1;
1.2 schwarze 108: if ((dst = malloc(sz)) == NULL) {
109: free(src);
1.4 ! jsg 110: ret = -1;
1.1 schwarze 111: goto fail;
1.2 schwarze 112: }
1.1 schwarze 113:
114: if (maxsz > INT_MAX)
115: maxsz = INT_MAX;
116:
117: sp = src;
118: dp = dst;
119: ret = 0;
120: print = 1;
121: total_width = 0;
122: max_width = wp == NULL ? INT_MAX : *wp;
123: while (*sp != '\0') {
124: if ((len = mbtowc(&wc, sp, MB_CUR_MAX)) == -1) {
125: (void)mbtowc(NULL, NULL, MB_CUR_MAX);
126: if (dangerous_locale()) {
127: ret = -1;
128: break;
129: }
130: len = 1;
131: width = -1;
132: } else if (wp == NULL &&
133: (wc == L'\n' || wc == L'\r' || wc == L'\t')) {
134: /*
135: * Don't use width uninitialized; the actual
136: * value doesn't matter because total_width
137: * is only returned for wp != NULL.
138: */
139: width = 0;
140: } else if ((width = wcwidth(wc)) == -1 &&
141: dangerous_locale()) {
142: ret = -1;
143: break;
144: }
145:
146: /* Valid, printable character. */
147:
148: if (width >= 0) {
149: if (print && (dp - dst >= (int)maxsz - len ||
150: total_width > max_width - width))
151: print = 0;
152: if (print) {
1.3 schwarze 153: if (grow_dst(&dst, &sz, maxsz,
154: &dp, len) == -1) {
155: ret = -1;
156: break;
157: }
1.1 schwarze 158: total_width += width;
159: memcpy(dp, sp, len);
160: dp += len;
161: }
162: sp += len;
163: if (ret >= 0)
164: ret += len;
165: continue;
166: }
167:
168: /* Escaping required. */
169:
170: while (len > 0) {
171: if (print && (dp - dst >= (int)maxsz - 4 ||
172: total_width > max_width - 4))
173: print = 0;
174: if (print) {
1.3 schwarze 175: if (grow_dst(&dst, &sz, maxsz,
176: &dp, 4) == -1) {
177: ret = -1;
178: break;
1.1 schwarze 179: }
180: tp = vis(dp, *sp, VIS_OCTAL | VIS_ALL, 0);
181: width = tp - dp;
182: total_width += width;
183: dp = tp;
184: } else
185: width = 4;
186: len--;
187: sp++;
188: if (ret >= 0)
189: ret += width;
190: }
191: if (len > 0)
192: break;
193: }
194: free(src);
195: *dp = '\0';
196: *str = dst;
197: if (wp != NULL)
198: *wp = total_width;
199:
200: /*
201: * If the string was truncated by the width limit but
202: * would have fit into the size limit, the only sane way
203: * to report the problem is using the return value, such
204: * that the usual idiom "if (ret < 0 || ret >= sz) error"
205: * works as expected.
206: */
207:
208: if (ret < (int)maxsz && !print)
209: ret = -1;
210: return ret;
211:
212: fail:
213: if (wp != NULL)
214: *wp = 0;
1.2 schwarze 215: if (ret == 0) {
216: *str = src;
217: return 0;
218: } else {
219: *str = NULL;
220: return -1;
221: }
1.1 schwarze 222: }
223:
224: int
225: snmprintf(char *str, size_t sz, int *wp, const char *fmt, ...)
226: {
227: va_list ap;
228: char *cp;
229: int ret;
230:
231: va_start(ap, fmt);
232: ret = vasnmprintf(&cp, sz, wp, fmt, ap);
233: va_end(ap);
1.2 schwarze 234: if (cp != NULL) {
235: (void)strlcpy(str, cp, sz);
236: free(cp);
237: } else
238: *str = '\0';
1.1 schwarze 239: return ret;
240: }
241:
242: /*
243: * To stay close to the standard interfaces, the following functions
244: * return the number of non-NUL bytes written.
245: */
246:
247: int
248: vfmprintf(FILE *stream, const char *fmt, va_list ap)
249: {
250: char *str;
251: int ret;
252:
253: if ((ret = vasnmprintf(&str, INT_MAX, NULL, fmt, ap)) < 0)
254: return -1;
255: if (fputs(str, stream) == EOF)
256: ret = -1;
257: free(str);
258: return ret;
259: }
260:
261: int
262: fmprintf(FILE *stream, const char *fmt, ...)
263: {
264: va_list ap;
265: int ret;
266:
267: va_start(ap, fmt);
268: ret = vfmprintf(stream, fmt, ap);
269: va_end(ap);
270: return ret;
271: }
272:
273: int
274: mprintf(const char *fmt, ...)
275: {
276: va_list ap;
277: int ret;
278:
279: va_start(ap, fmt);
280: ret = vfmprintf(stdout, fmt, ap);
281: va_end(ap);
282: return ret;
283: }