Annotation of src/usr.bin/ssh/utf8.c, Revision 1.5
1.5 ! djm 1: /* $OpenBSD: utf8.c,v 1.4 2017/02/02 10:54:25 jsg Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2016 Ingo Schwarze <schwarze@openbsd.org>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17:
18: /*
19: * Utility functions for multibyte-character handling,
20: * in particular to sanitize untrusted strings for terminal output.
21: */
22:
23: #include <sys/types.h>
24: #include <langinfo.h>
25: #include <limits.h>
26: #include <stdarg.h>
27: #include <stdio.h>
28: #include <stdlib.h>
29: #include <string.h>
30: #include <vis.h>
31: #include <wchar.h>
32:
33: #include "utf8.h"
34:
35: static int dangerous_locale(void);
1.3 schwarze 36: static int grow_dst(char **, size_t *, size_t, char **, size_t);
1.1 schwarze 37: static int vasnmprintf(char **, size_t, int *, const char *, va_list);
38:
39:
40: /*
41: * For US-ASCII and UTF-8 encodings, we can safely recover from
42: * encoding errors and from non-printable characters. For any
43: * other encodings, err to the side of caution and abort parsing:
44: * For state-dependent encodings, recovery is impossible.
45: * For arbitrary encodings, replacement of non-printable
46: * characters would be non-trivial and too fragile.
47: */
48:
49: static int
50: dangerous_locale(void) {
51: char *loc;
52:
53: loc = nl_langinfo(CODESET);
1.5 ! djm 54: return strcmp(loc, "US-ASCII") != 0 && strcmp(loc, "UTF-8") != 0 &&
! 55: strcmp(loc, "ANSI_X3.4-1968") != 0;
1.1 schwarze 56: }
57:
1.3 schwarze 58: static int
59: grow_dst(char **dst, size_t *sz, size_t maxsz, char **dp, size_t need)
60: {
61: char *tp;
62: size_t tsz;
63:
64: if (*dp + need < *dst + *sz)
65: return 0;
66: tsz = *sz + 128;
67: if (tsz > maxsz)
68: tsz = maxsz;
69: if ((tp = realloc(*dst, tsz)) == NULL)
70: return -1;
71: *dp = tp + (*dp - *dst);
72: *dst = tp;
73: *sz = tsz;
74: return 0;
75: }
76:
1.1 schwarze 77: /*
78: * The following two functions limit the number of bytes written,
79: * including the terminating '\0', to sz. Unless wp is NULL,
80: * they limit the number of display columns occupied to *wp.
81: * Whichever is reached first terminates the output string.
82: * To stay close to the standard interfaces, they return the number of
83: * non-NUL bytes that would have been written if both were unlimited.
84: * If wp is NULL, newline, carriage return, and tab are allowed;
85: * otherwise, the actual number of columns occupied by what was
86: * written is returned in *wp.
87: */
88:
89: static int
90: vasnmprintf(char **str, size_t maxsz, int *wp, const char *fmt, va_list ap)
91: {
92: char *src; /* Source string returned from vasprintf. */
93: char *sp; /* Pointer into src. */
94: char *dst; /* Destination string to be returned. */
95: char *dp; /* Pointer into dst. */
96: char *tp; /* Temporary pointer for dst. */
97: size_t sz; /* Number of bytes allocated for dst. */
98: wchar_t wc; /* Wide character at sp. */
99: int len; /* Number of bytes in the character at sp. */
100: int ret; /* Number of bytes needed to format src. */
101: int width; /* Display width of the character wc. */
102: int total_width, max_width, print;
103:
1.2 schwarze 104: src = NULL;
105: if ((ret = vasprintf(&src, fmt, ap)) <= 0)
1.1 schwarze 106: goto fail;
107:
1.3 schwarze 108: sz = strlen(src) + 1;
1.2 schwarze 109: if ((dst = malloc(sz)) == NULL) {
110: free(src);
1.4 jsg 111: ret = -1;
1.1 schwarze 112: goto fail;
1.2 schwarze 113: }
1.1 schwarze 114:
115: if (maxsz > INT_MAX)
116: maxsz = INT_MAX;
117:
118: sp = src;
119: dp = dst;
120: ret = 0;
121: print = 1;
122: total_width = 0;
123: max_width = wp == NULL ? INT_MAX : *wp;
124: while (*sp != '\0') {
125: if ((len = mbtowc(&wc, sp, MB_CUR_MAX)) == -1) {
126: (void)mbtowc(NULL, NULL, MB_CUR_MAX);
127: if (dangerous_locale()) {
128: ret = -1;
129: break;
130: }
131: len = 1;
132: width = -1;
133: } else if (wp == NULL &&
134: (wc == L'\n' || wc == L'\r' || wc == L'\t')) {
135: /*
136: * Don't use width uninitialized; the actual
137: * value doesn't matter because total_width
138: * is only returned for wp != NULL.
139: */
140: width = 0;
141: } else if ((width = wcwidth(wc)) == -1 &&
142: dangerous_locale()) {
143: ret = -1;
144: break;
145: }
146:
147: /* Valid, printable character. */
148:
149: if (width >= 0) {
150: if (print && (dp - dst >= (int)maxsz - len ||
151: total_width > max_width - width))
152: print = 0;
153: if (print) {
1.3 schwarze 154: if (grow_dst(&dst, &sz, maxsz,
155: &dp, len) == -1) {
156: ret = -1;
157: break;
158: }
1.1 schwarze 159: total_width += width;
160: memcpy(dp, sp, len);
161: dp += len;
162: }
163: sp += len;
164: if (ret >= 0)
165: ret += len;
166: continue;
167: }
168:
169: /* Escaping required. */
170:
171: while (len > 0) {
172: if (print && (dp - dst >= (int)maxsz - 4 ||
173: total_width > max_width - 4))
174: print = 0;
175: if (print) {
1.3 schwarze 176: if (grow_dst(&dst, &sz, maxsz,
177: &dp, 4) == -1) {
178: ret = -1;
179: break;
1.1 schwarze 180: }
181: tp = vis(dp, *sp, VIS_OCTAL | VIS_ALL, 0);
182: width = tp - dp;
183: total_width += width;
184: dp = tp;
185: } else
186: width = 4;
187: len--;
188: sp++;
189: if (ret >= 0)
190: ret += width;
191: }
192: if (len > 0)
193: break;
194: }
195: free(src);
196: *dp = '\0';
197: *str = dst;
198: if (wp != NULL)
199: *wp = total_width;
200:
201: /*
202: * If the string was truncated by the width limit but
203: * would have fit into the size limit, the only sane way
204: * to report the problem is using the return value, such
205: * that the usual idiom "if (ret < 0 || ret >= sz) error"
206: * works as expected.
207: */
208:
209: if (ret < (int)maxsz && !print)
210: ret = -1;
211: return ret;
212:
213: fail:
214: if (wp != NULL)
215: *wp = 0;
1.2 schwarze 216: if (ret == 0) {
217: *str = src;
218: return 0;
219: } else {
220: *str = NULL;
221: return -1;
222: }
1.1 schwarze 223: }
224:
225: int
226: snmprintf(char *str, size_t sz, int *wp, const char *fmt, ...)
227: {
228: va_list ap;
229: char *cp;
230: int ret;
231:
232: va_start(ap, fmt);
233: ret = vasnmprintf(&cp, sz, wp, fmt, ap);
234: va_end(ap);
1.2 schwarze 235: if (cp != NULL) {
236: (void)strlcpy(str, cp, sz);
237: free(cp);
238: } else
239: *str = '\0';
1.1 schwarze 240: return ret;
241: }
242:
243: /*
244: * To stay close to the standard interfaces, the following functions
245: * return the number of non-NUL bytes written.
246: */
247:
248: int
249: vfmprintf(FILE *stream, const char *fmt, va_list ap)
250: {
251: char *str;
252: int ret;
253:
254: if ((ret = vasnmprintf(&str, INT_MAX, NULL, fmt, ap)) < 0)
255: return -1;
256: if (fputs(str, stream) == EOF)
257: ret = -1;
258: free(str);
259: return ret;
260: }
261:
262: int
263: fmprintf(FILE *stream, const char *fmt, ...)
264: {
265: va_list ap;
266: int ret;
267:
268: va_start(ap, fmt);
269: ret = vfmprintf(stream, fmt, ap);
270: va_end(ap);
271: return ret;
272: }
273:
274: int
275: mprintf(const char *fmt, ...)
276: {
277: va_list ap;
278: int ret;
279:
280: va_start(ap, fmt);
281: ret = vfmprintf(stdout, fmt, ap);
282: va_end(ap);
283: return ret;
284: }