Annotation of src/usr.bin/ssh/utf8.c, Revision 1.2
1.2 ! schwarze 1: /* $OpenBSD: utf8.c,v 1.1 2016/05/25 23:48:45 schwarze Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2016 Ingo Schwarze <schwarze@openbsd.org>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17:
18: /*
19: * Utility functions for multibyte-character handling,
20: * in particular to sanitize untrusted strings for terminal output.
21: */
22:
23: #include <sys/types.h>
24: #include <langinfo.h>
25: #include <limits.h>
26: #include <stdarg.h>
27: #include <stdio.h>
28: #include <stdlib.h>
29: #include <string.h>
30: #include <vis.h>
31: #include <wchar.h>
32:
33: #include "utf8.h"
34:
35: static int dangerous_locale(void);
36: static int vasnmprintf(char **, size_t, int *, const char *, va_list);
37:
38:
39: /*
40: * For US-ASCII and UTF-8 encodings, we can safely recover from
41: * encoding errors and from non-printable characters. For any
42: * other encodings, err to the side of caution and abort parsing:
43: * For state-dependent encodings, recovery is impossible.
44: * For arbitrary encodings, replacement of non-printable
45: * characters would be non-trivial and too fragile.
46: */
47:
48: static int
49: dangerous_locale(void) {
50: char *loc;
51:
52: loc = nl_langinfo(CODESET);
53: return strcmp(loc, "US-ASCII") && strcmp(loc, "UTF-8");
54: }
55:
56: /*
57: * The following two functions limit the number of bytes written,
58: * including the terminating '\0', to sz. Unless wp is NULL,
59: * they limit the number of display columns occupied to *wp.
60: * Whichever is reached first terminates the output string.
61: * To stay close to the standard interfaces, they return the number of
62: * non-NUL bytes that would have been written if both were unlimited.
63: * If wp is NULL, newline, carriage return, and tab are allowed;
64: * otherwise, the actual number of columns occupied by what was
65: * written is returned in *wp.
66: */
67:
68: static int
69: vasnmprintf(char **str, size_t maxsz, int *wp, const char *fmt, va_list ap)
70: {
71: char *src; /* Source string returned from vasprintf. */
72: char *sp; /* Pointer into src. */
73: char *dst; /* Destination string to be returned. */
74: char *dp; /* Pointer into dst. */
75: char *tp; /* Temporary pointer for dst. */
76: size_t sz; /* Number of bytes allocated for dst. */
77: size_t tsz; /* Temporary size while extending dst. */
78: wchar_t wc; /* Wide character at sp. */
79: int len; /* Number of bytes in the character at sp. */
80: int ret; /* Number of bytes needed to format src. */
81: int width; /* Display width of the character wc. */
82: int total_width, max_width, print;
83:
1.2 ! schwarze 84: src = NULL;
! 85: if ((ret = vasprintf(&src, fmt, ap)) <= 0)
1.1 schwarze 86: goto fail;
87:
88: sz = strlen(src);
1.2 ! schwarze 89: if ((dst = malloc(sz)) == NULL) {
! 90: free(src);
1.1 schwarze 91: goto fail;
1.2 ! schwarze 92: }
1.1 schwarze 93:
94: if (maxsz > INT_MAX)
95: maxsz = INT_MAX;
96:
97: sp = src;
98: dp = dst;
99: ret = 0;
100: print = 1;
101: total_width = 0;
102: max_width = wp == NULL ? INT_MAX : *wp;
103: while (*sp != '\0') {
104: if ((len = mbtowc(&wc, sp, MB_CUR_MAX)) == -1) {
105: (void)mbtowc(NULL, NULL, MB_CUR_MAX);
106: if (dangerous_locale()) {
107: ret = -1;
108: break;
109: }
110: len = 1;
111: width = -1;
112: } else if (wp == NULL &&
113: (wc == L'\n' || wc == L'\r' || wc == L'\t')) {
114: /*
115: * Don't use width uninitialized; the actual
116: * value doesn't matter because total_width
117: * is only returned for wp != NULL.
118: */
119: width = 0;
120: } else if ((width = wcwidth(wc)) == -1 &&
121: dangerous_locale()) {
122: ret = -1;
123: break;
124: }
125:
126: /* Valid, printable character. */
127:
128: if (width >= 0) {
129: if (print && (dp - dst >= (int)maxsz - len ||
130: total_width > max_width - width))
131: print = 0;
132: if (print) {
133: total_width += width;
134: memcpy(dp, sp, len);
135: dp += len;
136: }
137: sp += len;
138: if (ret >= 0)
139: ret += len;
140: continue;
141: }
142:
143: /* Escaping required. */
144:
145: while (len > 0) {
146: if (print && (dp - dst >= (int)maxsz - 4 ||
147: total_width > max_width - 4))
148: print = 0;
149: if (print) {
150: if (dp + 4 >= dst + sz) {
151: tsz = sz + 128;
152: if (tsz > maxsz)
153: tsz = maxsz;
154: tp = realloc(dst, tsz);
155: if (tp == NULL) {
156: ret = -1;
157: break;
158: }
159: dp = tp + (dp - dst);
160: dst = tp;
161: sz = tsz;
162: }
163: tp = vis(dp, *sp, VIS_OCTAL | VIS_ALL, 0);
164: width = tp - dp;
165: total_width += width;
166: dp = tp;
167: } else
168: width = 4;
169: len--;
170: sp++;
171: if (ret >= 0)
172: ret += width;
173: }
174: if (len > 0)
175: break;
176: }
177: free(src);
178: *dp = '\0';
179: *str = dst;
180: if (wp != NULL)
181: *wp = total_width;
182:
183: /*
184: * If the string was truncated by the width limit but
185: * would have fit into the size limit, the only sane way
186: * to report the problem is using the return value, such
187: * that the usual idiom "if (ret < 0 || ret >= sz) error"
188: * works as expected.
189: */
190:
191: if (ret < (int)maxsz && !print)
192: ret = -1;
193: return ret;
194:
195: fail:
196: if (wp != NULL)
197: *wp = 0;
1.2 ! schwarze 198: if (ret == 0) {
! 199: *str = src;
! 200: return 0;
! 201: } else {
! 202: *str = NULL;
! 203: return -1;
! 204: }
1.1 schwarze 205: }
206:
207: int
208: snmprintf(char *str, size_t sz, int *wp, const char *fmt, ...)
209: {
210: va_list ap;
211: char *cp;
212: int ret;
213:
214: va_start(ap, fmt);
215: ret = vasnmprintf(&cp, sz, wp, fmt, ap);
216: va_end(ap);
1.2 ! schwarze 217: if (cp != NULL) {
! 218: (void)strlcpy(str, cp, sz);
! 219: free(cp);
! 220: } else
! 221: *str = '\0';
1.1 schwarze 222: return ret;
223: }
224:
225: /*
226: * To stay close to the standard interfaces, the following functions
227: * return the number of non-NUL bytes written.
228: */
229:
230: int
231: vfmprintf(FILE *stream, const char *fmt, va_list ap)
232: {
233: char *str;
234: int ret;
235:
236: if ((ret = vasnmprintf(&str, INT_MAX, NULL, fmt, ap)) < 0)
237: return -1;
238: if (fputs(str, stream) == EOF)
239: ret = -1;
240: free(str);
241: return ret;
242: }
243:
244: int
245: fmprintf(FILE *stream, const char *fmt, ...)
246: {
247: va_list ap;
248: int ret;
249:
250: va_start(ap, fmt);
251: ret = vfmprintf(stream, fmt, ap);
252: va_end(ap);
253: return ret;
254: }
255:
256: int
257: mprintf(const char *fmt, ...)
258: {
259: va_list ap;
260: int ret;
261:
262: va_start(ap, fmt);
263: ret = vfmprintf(stdout, fmt, ap);
264: va_end(ap);
265: return ret;
266: }