[BACK]Return to utf8.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / ssh

Annotation of src/usr.bin/ssh/utf8.c, Revision 1.6

1.6     ! schwarze    1: /* $OpenBSD: utf8.c,v 1.5 2017/02/19 00:10:57 djm Exp $ */
1.1       schwarze    2: /*
                      3:  * Copyright (c) 2016 Ingo Schwarze <schwarze@openbsd.org>
                      4:  *
                      5:  * Permission to use, copy, modify, and distribute this software for any
                      6:  * purpose with or without fee is hereby granted, provided that the above
                      7:  * copyright notice and this permission notice appear in all copies.
                      8:  *
                      9:  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
                     10:  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
                     11:  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
                     12:  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
                     13:  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
                     14:  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
                     15:  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
                     16:  */
                     17:
                     18: /*
                     19:  * Utility functions for multibyte-character handling,
                     20:  * in particular to sanitize untrusted strings for terminal output.
                     21:  */
                     22:
                     23: #include <sys/types.h>
                     24: #include <langinfo.h>
                     25: #include <limits.h>
                     26: #include <stdarg.h>
                     27: #include <stdio.h>
                     28: #include <stdlib.h>
                     29: #include <string.h>
                     30: #include <vis.h>
                     31: #include <wchar.h>
                     32:
                     33: #include "utf8.h"
                     34:
                     35: static int      dangerous_locale(void);
1.3       schwarze   36: static int      grow_dst(char **, size_t *, size_t, char **, size_t);
1.1       schwarze   37: static int      vasnmprintf(char **, size_t, int *, const char *, va_list);
                     38:
                     39:
                     40: /*
                     41:  * For US-ASCII and UTF-8 encodings, we can safely recover from
                     42:  * encoding errors and from non-printable characters.  For any
                     43:  * other encodings, err to the side of caution and abort parsing:
                     44:  * For state-dependent encodings, recovery is impossible.
                     45:  * For arbitrary encodings, replacement of non-printable
                     46:  * characters would be non-trivial and too fragile.
                     47:  */
                     48:
                     49: static int
                     50: dangerous_locale(void) {
                     51:        char    *loc;
                     52:
                     53:        loc = nl_langinfo(CODESET);
1.5       djm        54:        return strcmp(loc, "US-ASCII") != 0 && strcmp(loc, "UTF-8") != 0 &&
1.6     ! schwarze   55:            strcmp(loc, "ANSI_X3.4-1968") != 0 && strcmp(loc, "646") != 0 &&
        !            56:            strcmp(loc, "") != 0;
1.1       schwarze   57: }
                     58:
1.3       schwarze   59: static int
                     60: grow_dst(char **dst, size_t *sz, size_t maxsz, char **dp, size_t need)
                     61: {
                     62:        char    *tp;
                     63:        size_t   tsz;
                     64:
                     65:        if (*dp + need < *dst + *sz)
                     66:                return 0;
                     67:        tsz = *sz + 128;
                     68:        if (tsz > maxsz)
                     69:                tsz = maxsz;
                     70:        if ((tp = realloc(*dst, tsz)) == NULL)
                     71:                return -1;
                     72:        *dp = tp + (*dp - *dst);
                     73:        *dst = tp;
                     74:        *sz = tsz;
                     75:        return 0;
                     76: }
                     77:
1.1       schwarze   78: /*
                     79:  * The following two functions limit the number of bytes written,
                     80:  * including the terminating '\0', to sz.  Unless wp is NULL,
                     81:  * they limit the number of display columns occupied to *wp.
                     82:  * Whichever is reached first terminates the output string.
                     83:  * To stay close to the standard interfaces, they return the number of
                     84:  * non-NUL bytes that would have been written if both were unlimited.
                     85:  * If wp is NULL, newline, carriage return, and tab are allowed;
                     86:  * otherwise, the actual number of columns occupied by what was
                     87:  * written is returned in *wp.
                     88:  */
                     89:
                     90: static int
                     91: vasnmprintf(char **str, size_t maxsz, int *wp, const char *fmt, va_list ap)
                     92: {
                     93:        char    *src;   /* Source string returned from vasprintf. */
                     94:        char    *sp;    /* Pointer into src. */
                     95:        char    *dst;   /* Destination string to be returned. */
                     96:        char    *dp;    /* Pointer into dst. */
                     97:        char    *tp;    /* Temporary pointer for dst. */
                     98:        size_t   sz;    /* Number of bytes allocated for dst. */
                     99:        wchar_t  wc;    /* Wide character at sp. */
                    100:        int      len;   /* Number of bytes in the character at sp. */
                    101:        int      ret;   /* Number of bytes needed to format src. */
                    102:        int      width; /* Display width of the character wc. */
                    103:        int      total_width, max_width, print;
                    104:
1.2       schwarze  105:        src = NULL;
                    106:        if ((ret = vasprintf(&src, fmt, ap)) <= 0)
1.1       schwarze  107:                goto fail;
                    108:
1.3       schwarze  109:        sz = strlen(src) + 1;
1.2       schwarze  110:        if ((dst = malloc(sz)) == NULL) {
                    111:                free(src);
1.4       jsg       112:                ret = -1;
1.1       schwarze  113:                goto fail;
1.2       schwarze  114:        }
1.1       schwarze  115:
                    116:        if (maxsz > INT_MAX)
                    117:                maxsz = INT_MAX;
                    118:
                    119:        sp = src;
                    120:        dp = dst;
                    121:        ret = 0;
                    122:        print = 1;
                    123:        total_width = 0;
                    124:        max_width = wp == NULL ? INT_MAX : *wp;
                    125:        while (*sp != '\0') {
                    126:                if ((len = mbtowc(&wc, sp, MB_CUR_MAX)) == -1) {
                    127:                        (void)mbtowc(NULL, NULL, MB_CUR_MAX);
                    128:                        if (dangerous_locale()) {
                    129:                                ret = -1;
                    130:                                break;
                    131:                        }
                    132:                        len = 1;
                    133:                        width = -1;
                    134:                } else if (wp == NULL &&
                    135:                    (wc == L'\n' || wc == L'\r' || wc == L'\t')) {
                    136:                        /*
                    137:                         * Don't use width uninitialized; the actual
                    138:                         * value doesn't matter because total_width
                    139:                         * is only returned for wp != NULL.
                    140:                         */
                    141:                        width = 0;
                    142:                } else if ((width = wcwidth(wc)) == -1 &&
                    143:                    dangerous_locale()) {
                    144:                        ret = -1;
                    145:                        break;
                    146:                }
                    147:
                    148:                /* Valid, printable character. */
                    149:
                    150:                if (width >= 0) {
                    151:                        if (print && (dp - dst >= (int)maxsz - len ||
                    152:                            total_width > max_width - width))
                    153:                                print = 0;
                    154:                        if (print) {
1.3       schwarze  155:                                if (grow_dst(&dst, &sz, maxsz,
                    156:                                    &dp, len) == -1) {
                    157:                                        ret = -1;
                    158:                                        break;
                    159:                                }
1.1       schwarze  160:                                total_width += width;
                    161:                                memcpy(dp, sp, len);
                    162:                                dp += len;
                    163:                        }
                    164:                        sp += len;
                    165:                        if (ret >= 0)
                    166:                                ret += len;
                    167:                        continue;
                    168:                }
                    169:
                    170:                /* Escaping required. */
                    171:
                    172:                while (len > 0) {
                    173:                        if (print && (dp - dst >= (int)maxsz - 4 ||
                    174:                            total_width > max_width - 4))
                    175:                                print = 0;
                    176:                        if (print) {
1.3       schwarze  177:                                if (grow_dst(&dst, &sz, maxsz,
                    178:                                    &dp, 4) == -1) {
                    179:                                        ret = -1;
                    180:                                        break;
1.1       schwarze  181:                                }
                    182:                                tp = vis(dp, *sp, VIS_OCTAL | VIS_ALL, 0);
                    183:                                width = tp - dp;
                    184:                                total_width += width;
                    185:                                dp = tp;
                    186:                        } else
                    187:                                width = 4;
                    188:                        len--;
                    189:                        sp++;
                    190:                        if (ret >= 0)
                    191:                                ret += width;
                    192:                }
                    193:                if (len > 0)
                    194:                        break;
                    195:        }
                    196:        free(src);
                    197:        *dp = '\0';
                    198:        *str = dst;
                    199:        if (wp != NULL)
                    200:                *wp = total_width;
                    201:
                    202:        /*
                    203:         * If the string was truncated by the width limit but
                    204:         * would have fit into the size limit, the only sane way
                    205:         * to report the problem is using the return value, such
                    206:         * that the usual idiom "if (ret < 0 || ret >= sz) error"
                    207:         * works as expected.
                    208:         */
                    209:
                    210:        if (ret < (int)maxsz && !print)
                    211:                ret = -1;
                    212:        return ret;
                    213:
                    214: fail:
                    215:        if (wp != NULL)
                    216:                *wp = 0;
1.2       schwarze  217:        if (ret == 0) {
                    218:                *str = src;
                    219:                return 0;
                    220:        } else {
                    221:                *str = NULL;
                    222:                return -1;
                    223:        }
1.1       schwarze  224: }
                    225:
                    226: int
                    227: snmprintf(char *str, size_t sz, int *wp, const char *fmt, ...)
                    228: {
                    229:        va_list  ap;
                    230:        char    *cp;
                    231:        int      ret;
                    232:
                    233:        va_start(ap, fmt);
                    234:        ret = vasnmprintf(&cp, sz, wp, fmt, ap);
                    235:        va_end(ap);
1.2       schwarze  236:        if (cp != NULL) {
                    237:                (void)strlcpy(str, cp, sz);
                    238:                free(cp);
                    239:        } else
                    240:                *str = '\0';
1.1       schwarze  241:        return ret;
                    242: }
                    243:
                    244: /*
                    245:  * To stay close to the standard interfaces, the following functions
                    246:  * return the number of non-NUL bytes written.
                    247:  */
                    248:
                    249: int
                    250: vfmprintf(FILE *stream, const char *fmt, va_list ap)
                    251: {
                    252:        char    *str;
                    253:        int      ret;
                    254:
                    255:        if ((ret = vasnmprintf(&str, INT_MAX, NULL, fmt, ap)) < 0)
                    256:                return -1;
                    257:        if (fputs(str, stream) == EOF)
                    258:                ret = -1;
                    259:        free(str);
                    260:        return ret;
                    261: }
                    262:
                    263: int
                    264: fmprintf(FILE *stream, const char *fmt, ...)
                    265: {
                    266:        va_list  ap;
                    267:        int      ret;
                    268:
                    269:        va_start(ap, fmt);
                    270:        ret = vfmprintf(stream, fmt, ap);
                    271:        va_end(ap);
                    272:        return ret;
                    273: }
                    274:
                    275: int
                    276: mprintf(const char *fmt, ...)
                    277: {
                    278:        va_list  ap;
                    279:        int      ret;
                    280:
                    281:        va_start(ap, fmt);
                    282:        ret = vfmprintf(stdout, fmt, ap);
                    283:        va_end(ap);
                    284:        return ret;
                    285: }