[BACK]Return to utf8.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / ssh

Annotation of src/usr.bin/ssh/utf8.c, Revision 1.5

1.5     ! djm         1: /* $OpenBSD: utf8.c,v 1.4 2017/02/02 10:54:25 jsg Exp $ */
1.1       schwarze    2: /*
                      3:  * Copyright (c) 2016 Ingo Schwarze <schwarze@openbsd.org>
                      4:  *
                      5:  * Permission to use, copy, modify, and distribute this software for any
                      6:  * purpose with or without fee is hereby granted, provided that the above
                      7:  * copyright notice and this permission notice appear in all copies.
                      8:  *
                      9:  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
                     10:  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
                     11:  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
                     12:  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
                     13:  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
                     14:  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
                     15:  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
                     16:  */
                     17:
                     18: /*
                     19:  * Utility functions for multibyte-character handling,
                     20:  * in particular to sanitize untrusted strings for terminal output.
                     21:  */
                     22:
                     23: #include <sys/types.h>
                     24: #include <langinfo.h>
                     25: #include <limits.h>
                     26: #include <stdarg.h>
                     27: #include <stdio.h>
                     28: #include <stdlib.h>
                     29: #include <string.h>
                     30: #include <vis.h>
                     31: #include <wchar.h>
                     32:
                     33: #include "utf8.h"
                     34:
                     35: static int      dangerous_locale(void);
1.3       schwarze   36: static int      grow_dst(char **, size_t *, size_t, char **, size_t);
1.1       schwarze   37: static int      vasnmprintf(char **, size_t, int *, const char *, va_list);
                     38:
                     39:
                     40: /*
                     41:  * For US-ASCII and UTF-8 encodings, we can safely recover from
                     42:  * encoding errors and from non-printable characters.  For any
                     43:  * other encodings, err to the side of caution and abort parsing:
                     44:  * For state-dependent encodings, recovery is impossible.
                     45:  * For arbitrary encodings, replacement of non-printable
                     46:  * characters would be non-trivial and too fragile.
                     47:  */
                     48:
                     49: static int
                     50: dangerous_locale(void) {
                     51:        char    *loc;
                     52:
                     53:        loc = nl_langinfo(CODESET);
1.5     ! djm        54:        return strcmp(loc, "US-ASCII") != 0 && strcmp(loc, "UTF-8") != 0 &&
        !            55:            strcmp(loc, "ANSI_X3.4-1968") != 0;
1.1       schwarze   56: }
                     57:
1.3       schwarze   58: static int
                     59: grow_dst(char **dst, size_t *sz, size_t maxsz, char **dp, size_t need)
                     60: {
                     61:        char    *tp;
                     62:        size_t   tsz;
                     63:
                     64:        if (*dp + need < *dst + *sz)
                     65:                return 0;
                     66:        tsz = *sz + 128;
                     67:        if (tsz > maxsz)
                     68:                tsz = maxsz;
                     69:        if ((tp = realloc(*dst, tsz)) == NULL)
                     70:                return -1;
                     71:        *dp = tp + (*dp - *dst);
                     72:        *dst = tp;
                     73:        *sz = tsz;
                     74:        return 0;
                     75: }
                     76:
1.1       schwarze   77: /*
                     78:  * The following two functions limit the number of bytes written,
                     79:  * including the terminating '\0', to sz.  Unless wp is NULL,
                     80:  * they limit the number of display columns occupied to *wp.
                     81:  * Whichever is reached first terminates the output string.
                     82:  * To stay close to the standard interfaces, they return the number of
                     83:  * non-NUL bytes that would have been written if both were unlimited.
                     84:  * If wp is NULL, newline, carriage return, and tab are allowed;
                     85:  * otherwise, the actual number of columns occupied by what was
                     86:  * written is returned in *wp.
                     87:  */
                     88:
                     89: static int
                     90: vasnmprintf(char **str, size_t maxsz, int *wp, const char *fmt, va_list ap)
                     91: {
                     92:        char    *src;   /* Source string returned from vasprintf. */
                     93:        char    *sp;    /* Pointer into src. */
                     94:        char    *dst;   /* Destination string to be returned. */
                     95:        char    *dp;    /* Pointer into dst. */
                     96:        char    *tp;    /* Temporary pointer for dst. */
                     97:        size_t   sz;    /* Number of bytes allocated for dst. */
                     98:        wchar_t  wc;    /* Wide character at sp. */
                     99:        int      len;   /* Number of bytes in the character at sp. */
                    100:        int      ret;   /* Number of bytes needed to format src. */
                    101:        int      width; /* Display width of the character wc. */
                    102:        int      total_width, max_width, print;
                    103:
1.2       schwarze  104:        src = NULL;
                    105:        if ((ret = vasprintf(&src, fmt, ap)) <= 0)
1.1       schwarze  106:                goto fail;
                    107:
1.3       schwarze  108:        sz = strlen(src) + 1;
1.2       schwarze  109:        if ((dst = malloc(sz)) == NULL) {
                    110:                free(src);
1.4       jsg       111:                ret = -1;
1.1       schwarze  112:                goto fail;
1.2       schwarze  113:        }
1.1       schwarze  114:
                    115:        if (maxsz > INT_MAX)
                    116:                maxsz = INT_MAX;
                    117:
                    118:        sp = src;
                    119:        dp = dst;
                    120:        ret = 0;
                    121:        print = 1;
                    122:        total_width = 0;
                    123:        max_width = wp == NULL ? INT_MAX : *wp;
                    124:        while (*sp != '\0') {
                    125:                if ((len = mbtowc(&wc, sp, MB_CUR_MAX)) == -1) {
                    126:                        (void)mbtowc(NULL, NULL, MB_CUR_MAX);
                    127:                        if (dangerous_locale()) {
                    128:                                ret = -1;
                    129:                                break;
                    130:                        }
                    131:                        len = 1;
                    132:                        width = -1;
                    133:                } else if (wp == NULL &&
                    134:                    (wc == L'\n' || wc == L'\r' || wc == L'\t')) {
                    135:                        /*
                    136:                         * Don't use width uninitialized; the actual
                    137:                         * value doesn't matter because total_width
                    138:                         * is only returned for wp != NULL.
                    139:                         */
                    140:                        width = 0;
                    141:                } else if ((width = wcwidth(wc)) == -1 &&
                    142:                    dangerous_locale()) {
                    143:                        ret = -1;
                    144:                        break;
                    145:                }
                    146:
                    147:                /* Valid, printable character. */
                    148:
                    149:                if (width >= 0) {
                    150:                        if (print && (dp - dst >= (int)maxsz - len ||
                    151:                            total_width > max_width - width))
                    152:                                print = 0;
                    153:                        if (print) {
1.3       schwarze  154:                                if (grow_dst(&dst, &sz, maxsz,
                    155:                                    &dp, len) == -1) {
                    156:                                        ret = -1;
                    157:                                        break;
                    158:                                }
1.1       schwarze  159:                                total_width += width;
                    160:                                memcpy(dp, sp, len);
                    161:                                dp += len;
                    162:                        }
                    163:                        sp += len;
                    164:                        if (ret >= 0)
                    165:                                ret += len;
                    166:                        continue;
                    167:                }
                    168:
                    169:                /* Escaping required. */
                    170:
                    171:                while (len > 0) {
                    172:                        if (print && (dp - dst >= (int)maxsz - 4 ||
                    173:                            total_width > max_width - 4))
                    174:                                print = 0;
                    175:                        if (print) {
1.3       schwarze  176:                                if (grow_dst(&dst, &sz, maxsz,
                    177:                                    &dp, 4) == -1) {
                    178:                                        ret = -1;
                    179:                                        break;
1.1       schwarze  180:                                }
                    181:                                tp = vis(dp, *sp, VIS_OCTAL | VIS_ALL, 0);
                    182:                                width = tp - dp;
                    183:                                total_width += width;
                    184:                                dp = tp;
                    185:                        } else
                    186:                                width = 4;
                    187:                        len--;
                    188:                        sp++;
                    189:                        if (ret >= 0)
                    190:                                ret += width;
                    191:                }
                    192:                if (len > 0)
                    193:                        break;
                    194:        }
                    195:        free(src);
                    196:        *dp = '\0';
                    197:        *str = dst;
                    198:        if (wp != NULL)
                    199:                *wp = total_width;
                    200:
                    201:        /*
                    202:         * If the string was truncated by the width limit but
                    203:         * would have fit into the size limit, the only sane way
                    204:         * to report the problem is using the return value, such
                    205:         * that the usual idiom "if (ret < 0 || ret >= sz) error"
                    206:         * works as expected.
                    207:         */
                    208:
                    209:        if (ret < (int)maxsz && !print)
                    210:                ret = -1;
                    211:        return ret;
                    212:
                    213: fail:
                    214:        if (wp != NULL)
                    215:                *wp = 0;
1.2       schwarze  216:        if (ret == 0) {
                    217:                *str = src;
                    218:                return 0;
                    219:        } else {
                    220:                *str = NULL;
                    221:                return -1;
                    222:        }
1.1       schwarze  223: }
                    224:
                    225: int
                    226: snmprintf(char *str, size_t sz, int *wp, const char *fmt, ...)
                    227: {
                    228:        va_list  ap;
                    229:        char    *cp;
                    230:        int      ret;
                    231:
                    232:        va_start(ap, fmt);
                    233:        ret = vasnmprintf(&cp, sz, wp, fmt, ap);
                    234:        va_end(ap);
1.2       schwarze  235:        if (cp != NULL) {
                    236:                (void)strlcpy(str, cp, sz);
                    237:                free(cp);
                    238:        } else
                    239:                *str = '\0';
1.1       schwarze  240:        return ret;
                    241: }
                    242:
                    243: /*
                    244:  * To stay close to the standard interfaces, the following functions
                    245:  * return the number of non-NUL bytes written.
                    246:  */
                    247:
                    248: int
                    249: vfmprintf(FILE *stream, const char *fmt, va_list ap)
                    250: {
                    251:        char    *str;
                    252:        int      ret;
                    253:
                    254:        if ((ret = vasnmprintf(&str, INT_MAX, NULL, fmt, ap)) < 0)
                    255:                return -1;
                    256:        if (fputs(str, stream) == EOF)
                    257:                ret = -1;
                    258:        free(str);
                    259:        return ret;
                    260: }
                    261:
                    262: int
                    263: fmprintf(FILE *stream, const char *fmt, ...)
                    264: {
                    265:        va_list  ap;
                    266:        int      ret;
                    267:
                    268:        va_start(ap, fmt);
                    269:        ret = vfmprintf(stream, fmt, ap);
                    270:        va_end(ap);
                    271:        return ret;
                    272: }
                    273:
                    274: int
                    275: mprintf(const char *fmt, ...)
                    276: {
                    277:        va_list  ap;
                    278:        int      ret;
                    279:
                    280:        va_start(ap, fmt);
                    281:        ret = vfmprintf(stdout, fmt, ap);
                    282:        va_end(ap);
                    283:        return ret;
                    284: }