[BACK]Return to utf8.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / ssh

Annotation of src/usr.bin/ssh/utf8.c, Revision 1.4

1.4     ! jsg         1: /* $OpenBSD: utf8.c,v 1.3 2016/05/30 12:57:21 schwarze Exp $ */
1.1       schwarze    2: /*
                      3:  * Copyright (c) 2016 Ingo Schwarze <schwarze@openbsd.org>
                      4:  *
                      5:  * Permission to use, copy, modify, and distribute this software for any
                      6:  * purpose with or without fee is hereby granted, provided that the above
                      7:  * copyright notice and this permission notice appear in all copies.
                      8:  *
                      9:  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
                     10:  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
                     11:  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
                     12:  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
                     13:  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
                     14:  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
                     15:  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
                     16:  */
                     17:
                     18: /*
                     19:  * Utility functions for multibyte-character handling,
                     20:  * in particular to sanitize untrusted strings for terminal output.
                     21:  */
                     22:
                     23: #include <sys/types.h>
                     24: #include <langinfo.h>
                     25: #include <limits.h>
                     26: #include <stdarg.h>
                     27: #include <stdio.h>
                     28: #include <stdlib.h>
                     29: #include <string.h>
                     30: #include <vis.h>
                     31: #include <wchar.h>
                     32:
                     33: #include "utf8.h"
                     34:
                     35: static int      dangerous_locale(void);
1.3       schwarze   36: static int      grow_dst(char **, size_t *, size_t, char **, size_t);
1.1       schwarze   37: static int      vasnmprintf(char **, size_t, int *, const char *, va_list);
                     38:
                     39:
                     40: /*
                     41:  * For US-ASCII and UTF-8 encodings, we can safely recover from
                     42:  * encoding errors and from non-printable characters.  For any
                     43:  * other encodings, err to the side of caution and abort parsing:
                     44:  * For state-dependent encodings, recovery is impossible.
                     45:  * For arbitrary encodings, replacement of non-printable
                     46:  * characters would be non-trivial and too fragile.
                     47:  */
                     48:
                     49: static int
                     50: dangerous_locale(void) {
                     51:        char    *loc;
                     52:
                     53:        loc = nl_langinfo(CODESET);
                     54:        return strcmp(loc, "US-ASCII") && strcmp(loc, "UTF-8");
                     55: }
                     56:
1.3       schwarze   57: static int
                     58: grow_dst(char **dst, size_t *sz, size_t maxsz, char **dp, size_t need)
                     59: {
                     60:        char    *tp;
                     61:        size_t   tsz;
                     62:
                     63:        if (*dp + need < *dst + *sz)
                     64:                return 0;
                     65:        tsz = *sz + 128;
                     66:        if (tsz > maxsz)
                     67:                tsz = maxsz;
                     68:        if ((tp = realloc(*dst, tsz)) == NULL)
                     69:                return -1;
                     70:        *dp = tp + (*dp - *dst);
                     71:        *dst = tp;
                     72:        *sz = tsz;
                     73:        return 0;
                     74: }
                     75:
1.1       schwarze   76: /*
                     77:  * The following two functions limit the number of bytes written,
                     78:  * including the terminating '\0', to sz.  Unless wp is NULL,
                     79:  * they limit the number of display columns occupied to *wp.
                     80:  * Whichever is reached first terminates the output string.
                     81:  * To stay close to the standard interfaces, they return the number of
                     82:  * non-NUL bytes that would have been written if both were unlimited.
                     83:  * If wp is NULL, newline, carriage return, and tab are allowed;
                     84:  * otherwise, the actual number of columns occupied by what was
                     85:  * written is returned in *wp.
                     86:  */
                     87:
                     88: static int
                     89: vasnmprintf(char **str, size_t maxsz, int *wp, const char *fmt, va_list ap)
                     90: {
                     91:        char    *src;   /* Source string returned from vasprintf. */
                     92:        char    *sp;    /* Pointer into src. */
                     93:        char    *dst;   /* Destination string to be returned. */
                     94:        char    *dp;    /* Pointer into dst. */
                     95:        char    *tp;    /* Temporary pointer for dst. */
                     96:        size_t   sz;    /* Number of bytes allocated for dst. */
                     97:        wchar_t  wc;    /* Wide character at sp. */
                     98:        int      len;   /* Number of bytes in the character at sp. */
                     99:        int      ret;   /* Number of bytes needed to format src. */
                    100:        int      width; /* Display width of the character wc. */
                    101:        int      total_width, max_width, print;
                    102:
1.2       schwarze  103:        src = NULL;
                    104:        if ((ret = vasprintf(&src, fmt, ap)) <= 0)
1.1       schwarze  105:                goto fail;
                    106:
1.3       schwarze  107:        sz = strlen(src) + 1;
1.2       schwarze  108:        if ((dst = malloc(sz)) == NULL) {
                    109:                free(src);
1.4     ! jsg       110:                ret = -1;
1.1       schwarze  111:                goto fail;
1.2       schwarze  112:        }
1.1       schwarze  113:
                    114:        if (maxsz > INT_MAX)
                    115:                maxsz = INT_MAX;
                    116:
                    117:        sp = src;
                    118:        dp = dst;
                    119:        ret = 0;
                    120:        print = 1;
                    121:        total_width = 0;
                    122:        max_width = wp == NULL ? INT_MAX : *wp;
                    123:        while (*sp != '\0') {
                    124:                if ((len = mbtowc(&wc, sp, MB_CUR_MAX)) == -1) {
                    125:                        (void)mbtowc(NULL, NULL, MB_CUR_MAX);
                    126:                        if (dangerous_locale()) {
                    127:                                ret = -1;
                    128:                                break;
                    129:                        }
                    130:                        len = 1;
                    131:                        width = -1;
                    132:                } else if (wp == NULL &&
                    133:                    (wc == L'\n' || wc == L'\r' || wc == L'\t')) {
                    134:                        /*
                    135:                         * Don't use width uninitialized; the actual
                    136:                         * value doesn't matter because total_width
                    137:                         * is only returned for wp != NULL.
                    138:                         */
                    139:                        width = 0;
                    140:                } else if ((width = wcwidth(wc)) == -1 &&
                    141:                    dangerous_locale()) {
                    142:                        ret = -1;
                    143:                        break;
                    144:                }
                    145:
                    146:                /* Valid, printable character. */
                    147:
                    148:                if (width >= 0) {
                    149:                        if (print && (dp - dst >= (int)maxsz - len ||
                    150:                            total_width > max_width - width))
                    151:                                print = 0;
                    152:                        if (print) {
1.3       schwarze  153:                                if (grow_dst(&dst, &sz, maxsz,
                    154:                                    &dp, len) == -1) {
                    155:                                        ret = -1;
                    156:                                        break;
                    157:                                }
1.1       schwarze  158:                                total_width += width;
                    159:                                memcpy(dp, sp, len);
                    160:                                dp += len;
                    161:                        }
                    162:                        sp += len;
                    163:                        if (ret >= 0)
                    164:                                ret += len;
                    165:                        continue;
                    166:                }
                    167:
                    168:                /* Escaping required. */
                    169:
                    170:                while (len > 0) {
                    171:                        if (print && (dp - dst >= (int)maxsz - 4 ||
                    172:                            total_width > max_width - 4))
                    173:                                print = 0;
                    174:                        if (print) {
1.3       schwarze  175:                                if (grow_dst(&dst, &sz, maxsz,
                    176:                                    &dp, 4) == -1) {
                    177:                                        ret = -1;
                    178:                                        break;
1.1       schwarze  179:                                }
                    180:                                tp = vis(dp, *sp, VIS_OCTAL | VIS_ALL, 0);
                    181:                                width = tp - dp;
                    182:                                total_width += width;
                    183:                                dp = tp;
                    184:                        } else
                    185:                                width = 4;
                    186:                        len--;
                    187:                        sp++;
                    188:                        if (ret >= 0)
                    189:                                ret += width;
                    190:                }
                    191:                if (len > 0)
                    192:                        break;
                    193:        }
                    194:        free(src);
                    195:        *dp = '\0';
                    196:        *str = dst;
                    197:        if (wp != NULL)
                    198:                *wp = total_width;
                    199:
                    200:        /*
                    201:         * If the string was truncated by the width limit but
                    202:         * would have fit into the size limit, the only sane way
                    203:         * to report the problem is using the return value, such
                    204:         * that the usual idiom "if (ret < 0 || ret >= sz) error"
                    205:         * works as expected.
                    206:         */
                    207:
                    208:        if (ret < (int)maxsz && !print)
                    209:                ret = -1;
                    210:        return ret;
                    211:
                    212: fail:
                    213:        if (wp != NULL)
                    214:                *wp = 0;
1.2       schwarze  215:        if (ret == 0) {
                    216:                *str = src;
                    217:                return 0;
                    218:        } else {
                    219:                *str = NULL;
                    220:                return -1;
                    221:        }
1.1       schwarze  222: }
                    223:
                    224: int
                    225: snmprintf(char *str, size_t sz, int *wp, const char *fmt, ...)
                    226: {
                    227:        va_list  ap;
                    228:        char    *cp;
                    229:        int      ret;
                    230:
                    231:        va_start(ap, fmt);
                    232:        ret = vasnmprintf(&cp, sz, wp, fmt, ap);
                    233:        va_end(ap);
1.2       schwarze  234:        if (cp != NULL) {
                    235:                (void)strlcpy(str, cp, sz);
                    236:                free(cp);
                    237:        } else
                    238:                *str = '\0';
1.1       schwarze  239:        return ret;
                    240: }
                    241:
                    242: /*
                    243:  * To stay close to the standard interfaces, the following functions
                    244:  * return the number of non-NUL bytes written.
                    245:  */
                    246:
                    247: int
                    248: vfmprintf(FILE *stream, const char *fmt, va_list ap)
                    249: {
                    250:        char    *str;
                    251:        int      ret;
                    252:
                    253:        if ((ret = vasnmprintf(&str, INT_MAX, NULL, fmt, ap)) < 0)
                    254:                return -1;
                    255:        if (fputs(str, stream) == EOF)
                    256:                ret = -1;
                    257:        free(str);
                    258:        return ret;
                    259: }
                    260:
                    261: int
                    262: fmprintf(FILE *stream, const char *fmt, ...)
                    263: {
                    264:        va_list  ap;
                    265:        int      ret;
                    266:
                    267:        va_start(ap, fmt);
                    268:        ret = vfmprintf(stream, fmt, ap);
                    269:        va_end(ap);
                    270:        return ret;
                    271: }
                    272:
                    273: int
                    274: mprintf(const char *fmt, ...)
                    275: {
                    276:        va_list  ap;
                    277:        int      ret;
                    278:
                    279:        va_start(ap, fmt);
                    280:        ret = vfmprintf(stdout, fmt, ap);
                    281:        va_end(ap);
                    282:        return ret;
                    283: }