[BACK]Return to utf8.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / ssh

Annotation of src/usr.bin/ssh/utf8.c, Revision 1.2

1.2     ! schwarze    1: /* $OpenBSD: utf8.c,v 1.1 2016/05/25 23:48:45 schwarze Exp $ */
1.1       schwarze    2: /*
                      3:  * Copyright (c) 2016 Ingo Schwarze <schwarze@openbsd.org>
                      4:  *
                      5:  * Permission to use, copy, modify, and distribute this software for any
                      6:  * purpose with or without fee is hereby granted, provided that the above
                      7:  * copyright notice and this permission notice appear in all copies.
                      8:  *
                      9:  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
                     10:  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
                     11:  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
                     12:  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
                     13:  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
                     14:  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
                     15:  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
                     16:  */
                     17:
                     18: /*
                     19:  * Utility functions for multibyte-character handling,
                     20:  * in particular to sanitize untrusted strings for terminal output.
                     21:  */
                     22:
                     23: #include <sys/types.h>
                     24: #include <langinfo.h>
                     25: #include <limits.h>
                     26: #include <stdarg.h>
                     27: #include <stdio.h>
                     28: #include <stdlib.h>
                     29: #include <string.h>
                     30: #include <vis.h>
                     31: #include <wchar.h>
                     32:
                     33: #include "utf8.h"
                     34:
                     35: static int      dangerous_locale(void);
                     36: static int      vasnmprintf(char **, size_t, int *, const char *, va_list);
                     37:
                     38:
                     39: /*
                     40:  * For US-ASCII and UTF-8 encodings, we can safely recover from
                     41:  * encoding errors and from non-printable characters.  For any
                     42:  * other encodings, err to the side of caution and abort parsing:
                     43:  * For state-dependent encodings, recovery is impossible.
                     44:  * For arbitrary encodings, replacement of non-printable
                     45:  * characters would be non-trivial and too fragile.
                     46:  */
                     47:
                     48: static int
                     49: dangerous_locale(void) {
                     50:        char    *loc;
                     51:
                     52:        loc = nl_langinfo(CODESET);
                     53:        return strcmp(loc, "US-ASCII") && strcmp(loc, "UTF-8");
                     54: }
                     55:
                     56: /*
                     57:  * The following two functions limit the number of bytes written,
                     58:  * including the terminating '\0', to sz.  Unless wp is NULL,
                     59:  * they limit the number of display columns occupied to *wp.
                     60:  * Whichever is reached first terminates the output string.
                     61:  * To stay close to the standard interfaces, they return the number of
                     62:  * non-NUL bytes that would have been written if both were unlimited.
                     63:  * If wp is NULL, newline, carriage return, and tab are allowed;
                     64:  * otherwise, the actual number of columns occupied by what was
                     65:  * written is returned in *wp.
                     66:  */
                     67:
                     68: static int
                     69: vasnmprintf(char **str, size_t maxsz, int *wp, const char *fmt, va_list ap)
                     70: {
                     71:        char    *src;   /* Source string returned from vasprintf. */
                     72:        char    *sp;    /* Pointer into src. */
                     73:        char    *dst;   /* Destination string to be returned. */
                     74:        char    *dp;    /* Pointer into dst. */
                     75:        char    *tp;    /* Temporary pointer for dst. */
                     76:        size_t   sz;    /* Number of bytes allocated for dst. */
                     77:        size_t   tsz;   /* Temporary size while extending dst. */
                     78:        wchar_t  wc;    /* Wide character at sp. */
                     79:        int      len;   /* Number of bytes in the character at sp. */
                     80:        int      ret;   /* Number of bytes needed to format src. */
                     81:        int      width; /* Display width of the character wc. */
                     82:        int      total_width, max_width, print;
                     83:
1.2     ! schwarze   84:        src = NULL;
        !            85:        if ((ret = vasprintf(&src, fmt, ap)) <= 0)
1.1       schwarze   86:                goto fail;
                     87:
                     88:        sz = strlen(src);
1.2     ! schwarze   89:        if ((dst = malloc(sz)) == NULL) {
        !            90:                free(src);
1.1       schwarze   91:                goto fail;
1.2     ! schwarze   92:        }
1.1       schwarze   93:
                     94:        if (maxsz > INT_MAX)
                     95:                maxsz = INT_MAX;
                     96:
                     97:        sp = src;
                     98:        dp = dst;
                     99:        ret = 0;
                    100:        print = 1;
                    101:        total_width = 0;
                    102:        max_width = wp == NULL ? INT_MAX : *wp;
                    103:        while (*sp != '\0') {
                    104:                if ((len = mbtowc(&wc, sp, MB_CUR_MAX)) == -1) {
                    105:                        (void)mbtowc(NULL, NULL, MB_CUR_MAX);
                    106:                        if (dangerous_locale()) {
                    107:                                ret = -1;
                    108:                                break;
                    109:                        }
                    110:                        len = 1;
                    111:                        width = -1;
                    112:                } else if (wp == NULL &&
                    113:                    (wc == L'\n' || wc == L'\r' || wc == L'\t')) {
                    114:                        /*
                    115:                         * Don't use width uninitialized; the actual
                    116:                         * value doesn't matter because total_width
                    117:                         * is only returned for wp != NULL.
                    118:                         */
                    119:                        width = 0;
                    120:                } else if ((width = wcwidth(wc)) == -1 &&
                    121:                    dangerous_locale()) {
                    122:                        ret = -1;
                    123:                        break;
                    124:                }
                    125:
                    126:                /* Valid, printable character. */
                    127:
                    128:                if (width >= 0) {
                    129:                        if (print && (dp - dst >= (int)maxsz - len ||
                    130:                            total_width > max_width - width))
                    131:                                print = 0;
                    132:                        if (print) {
                    133:                                total_width += width;
                    134:                                memcpy(dp, sp, len);
                    135:                                dp += len;
                    136:                        }
                    137:                        sp += len;
                    138:                        if (ret >= 0)
                    139:                                ret += len;
                    140:                        continue;
                    141:                }
                    142:
                    143:                /* Escaping required. */
                    144:
                    145:                while (len > 0) {
                    146:                        if (print && (dp - dst >= (int)maxsz - 4 ||
                    147:                            total_width > max_width - 4))
                    148:                                print = 0;
                    149:                        if (print) {
                    150:                                if (dp + 4 >= dst + sz) {
                    151:                                        tsz = sz + 128;
                    152:                                        if (tsz > maxsz)
                    153:                                                tsz = maxsz;
                    154:                                        tp = realloc(dst, tsz);
                    155:                                        if (tp == NULL) {
                    156:                                                ret = -1;
                    157:                                                break;
                    158:                                        }
                    159:                                        dp = tp + (dp - dst);
                    160:                                        dst = tp;
                    161:                                        sz = tsz;
                    162:                                }
                    163:                                tp = vis(dp, *sp, VIS_OCTAL | VIS_ALL, 0);
                    164:                                width = tp - dp;
                    165:                                total_width += width;
                    166:                                dp = tp;
                    167:                        } else
                    168:                                width = 4;
                    169:                        len--;
                    170:                        sp++;
                    171:                        if (ret >= 0)
                    172:                                ret += width;
                    173:                }
                    174:                if (len > 0)
                    175:                        break;
                    176:        }
                    177:        free(src);
                    178:        *dp = '\0';
                    179:        *str = dst;
                    180:        if (wp != NULL)
                    181:                *wp = total_width;
                    182:
                    183:        /*
                    184:         * If the string was truncated by the width limit but
                    185:         * would have fit into the size limit, the only sane way
                    186:         * to report the problem is using the return value, such
                    187:         * that the usual idiom "if (ret < 0 || ret >= sz) error"
                    188:         * works as expected.
                    189:         */
                    190:
                    191:        if (ret < (int)maxsz && !print)
                    192:                ret = -1;
                    193:        return ret;
                    194:
                    195: fail:
                    196:        if (wp != NULL)
                    197:                *wp = 0;
1.2     ! schwarze  198:        if (ret == 0) {
        !           199:                *str = src;
        !           200:                return 0;
        !           201:        } else {
        !           202:                *str = NULL;
        !           203:                return -1;
        !           204:        }
1.1       schwarze  205: }
                    206:
                    207: int
                    208: snmprintf(char *str, size_t sz, int *wp, const char *fmt, ...)
                    209: {
                    210:        va_list  ap;
                    211:        char    *cp;
                    212:        int      ret;
                    213:
                    214:        va_start(ap, fmt);
                    215:        ret = vasnmprintf(&cp, sz, wp, fmt, ap);
                    216:        va_end(ap);
1.2     ! schwarze  217:        if (cp != NULL) {
        !           218:                (void)strlcpy(str, cp, sz);
        !           219:                free(cp);
        !           220:        } else
        !           221:                *str = '\0';
1.1       schwarze  222:        return ret;
                    223: }
                    224:
                    225: /*
                    226:  * To stay close to the standard interfaces, the following functions
                    227:  * return the number of non-NUL bytes written.
                    228:  */
                    229:
                    230: int
                    231: vfmprintf(FILE *stream, const char *fmt, va_list ap)
                    232: {
                    233:        char    *str;
                    234:        int      ret;
                    235:
                    236:        if ((ret = vasnmprintf(&str, INT_MAX, NULL, fmt, ap)) < 0)
                    237:                return -1;
                    238:        if (fputs(str, stream) == EOF)
                    239:                ret = -1;
                    240:        free(str);
                    241:        return ret;
                    242: }
                    243:
                    244: int
                    245: fmprintf(FILE *stream, const char *fmt, ...)
                    246: {
                    247:        va_list  ap;
                    248:        int      ret;
                    249:
                    250:        va_start(ap, fmt);
                    251:        ret = vfmprintf(stream, fmt, ap);
                    252:        va_end(ap);
                    253:        return ret;
                    254: }
                    255:
                    256: int
                    257: mprintf(const char *fmt, ...)
                    258: {
                    259:        va_list  ap;
                    260:        int      ret;
                    261:
                    262:        va_start(ap, fmt);
                    263:        ret = vfmprintf(stdout, fmt, ap);
                    264:        va_end(ap);
                    265:        return ret;
                    266: }