[BACK]Return to utf8.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / ssh

Annotation of src/usr.bin/ssh/utf8.c, Revision 1.9

1.9     ! markus      1: /* $OpenBSD: utf8.c,v 1.8 2018/08/21 13:56:27 schwarze Exp $ */
1.1       schwarze    2: /*
                      3:  * Copyright (c) 2016 Ingo Schwarze <schwarze@openbsd.org>
                      4:  *
                      5:  * Permission to use, copy, modify, and distribute this software for any
                      6:  * purpose with or without fee is hereby granted, provided that the above
                      7:  * copyright notice and this permission notice appear in all copies.
                      8:  *
                      9:  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
                     10:  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
                     11:  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
                     12:  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
                     13:  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
                     14:  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
                     15:  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
                     16:  */
                     17:
                     18: /*
                     19:  * Utility functions for multibyte-character handling,
                     20:  * in particular to sanitize untrusted strings for terminal output.
                     21:  */
                     22:
                     23: #include <sys/types.h>
                     24: #include <langinfo.h>
                     25: #include <limits.h>
                     26: #include <stdarg.h>
                     27: #include <stdio.h>
                     28: #include <stdlib.h>
                     29: #include <string.h>
                     30: #include <vis.h>
                     31: #include <wchar.h>
                     32:
                     33: #include "utf8.h"
                     34:
                     35: static int      dangerous_locale(void);
1.3       schwarze   36: static int      grow_dst(char **, size_t *, size_t, char **, size_t);
1.1       schwarze   37: static int      vasnmprintf(char **, size_t, int *, const char *, va_list);
                     38:
                     39:
                     40: /*
                     41:  * For US-ASCII and UTF-8 encodings, we can safely recover from
                     42:  * encoding errors and from non-printable characters.  For any
                     43:  * other encodings, err to the side of caution and abort parsing:
                     44:  * For state-dependent encodings, recovery is impossible.
                     45:  * For arbitrary encodings, replacement of non-printable
                     46:  * characters would be non-trivial and too fragile.
1.8       schwarze   47:  * The comments indicate what nl_langinfo(CODESET)
                     48:  * returns for US-ASCII on various operating systems.
1.1       schwarze   49:  */
                     50:
                     51: static int
                     52: dangerous_locale(void) {
                     53:        char    *loc;
                     54:
                     55:        loc = nl_langinfo(CODESET);
1.8       schwarze   56:        return strcmp(loc, "UTF-8") != 0 &&
                     57:            strcmp(loc, "US-ASCII") != 0 &&             /* OpenBSD */
                     58:            strcmp(loc, "ANSI_X3.4-1968") != 0 &&       /* Linux */
                     59:            strcmp(loc, "ISO8859-1") != 0 &&            /* AIX */
                     60:            strcmp(loc, "646") != 0 &&                  /* Solaris, NetBSD */
                     61:            strcmp(loc, "") != 0;                       /* Solaris 6 */
1.1       schwarze   62: }
                     63:
1.3       schwarze   64: static int
                     65: grow_dst(char **dst, size_t *sz, size_t maxsz, char **dp, size_t need)
                     66: {
                     67:        char    *tp;
                     68:        size_t   tsz;
                     69:
                     70:        if (*dp + need < *dst + *sz)
                     71:                return 0;
                     72:        tsz = *sz + 128;
                     73:        if (tsz > maxsz)
                     74:                tsz = maxsz;
1.7       deraadt    75:        if ((tp = recallocarray(*dst, *sz, tsz, 1)) == NULL)
1.3       schwarze   76:                return -1;
                     77:        *dp = tp + (*dp - *dst);
                     78:        *dst = tp;
                     79:        *sz = tsz;
                     80:        return 0;
                     81: }
                     82:
1.1       schwarze   83: /*
                     84:  * The following two functions limit the number of bytes written,
                     85:  * including the terminating '\0', to sz.  Unless wp is NULL,
                     86:  * they limit the number of display columns occupied to *wp.
                     87:  * Whichever is reached first terminates the output string.
                     88:  * To stay close to the standard interfaces, they return the number of
                     89:  * non-NUL bytes that would have been written if both were unlimited.
                     90:  * If wp is NULL, newline, carriage return, and tab are allowed;
                     91:  * otherwise, the actual number of columns occupied by what was
                     92:  * written is returned in *wp.
                     93:  */
                     94:
                     95: static int
                     96: vasnmprintf(char **str, size_t maxsz, int *wp, const char *fmt, va_list ap)
                     97: {
                     98:        char    *src;   /* Source string returned from vasprintf. */
                     99:        char    *sp;    /* Pointer into src. */
                    100:        char    *dst;   /* Destination string to be returned. */
                    101:        char    *dp;    /* Pointer into dst. */
                    102:        char    *tp;    /* Temporary pointer for dst. */
                    103:        size_t   sz;    /* Number of bytes allocated for dst. */
                    104:        wchar_t  wc;    /* Wide character at sp. */
                    105:        int      len;   /* Number of bytes in the character at sp. */
                    106:        int      ret;   /* Number of bytes needed to format src. */
                    107:        int      width; /* Display width of the character wc. */
                    108:        int      total_width, max_width, print;
                    109:
1.2       schwarze  110:        src = NULL;
                    111:        if ((ret = vasprintf(&src, fmt, ap)) <= 0)
1.1       schwarze  112:                goto fail;
                    113:
1.3       schwarze  114:        sz = strlen(src) + 1;
1.2       schwarze  115:        if ((dst = malloc(sz)) == NULL) {
                    116:                free(src);
1.4       jsg       117:                ret = -1;
1.1       schwarze  118:                goto fail;
1.2       schwarze  119:        }
1.1       schwarze  120:
                    121:        if (maxsz > INT_MAX)
                    122:                maxsz = INT_MAX;
                    123:
                    124:        sp = src;
                    125:        dp = dst;
                    126:        ret = 0;
                    127:        print = 1;
                    128:        total_width = 0;
                    129:        max_width = wp == NULL ? INT_MAX : *wp;
                    130:        while (*sp != '\0') {
                    131:                if ((len = mbtowc(&wc, sp, MB_CUR_MAX)) == -1) {
                    132:                        (void)mbtowc(NULL, NULL, MB_CUR_MAX);
                    133:                        if (dangerous_locale()) {
                    134:                                ret = -1;
                    135:                                break;
                    136:                        }
                    137:                        len = 1;
                    138:                        width = -1;
                    139:                } else if (wp == NULL &&
                    140:                    (wc == L'\n' || wc == L'\r' || wc == L'\t')) {
                    141:                        /*
                    142:                         * Don't use width uninitialized; the actual
                    143:                         * value doesn't matter because total_width
                    144:                         * is only returned for wp != NULL.
                    145:                         */
                    146:                        width = 0;
                    147:                } else if ((width = wcwidth(wc)) == -1 &&
                    148:                    dangerous_locale()) {
                    149:                        ret = -1;
                    150:                        break;
                    151:                }
                    152:
                    153:                /* Valid, printable character. */
                    154:
                    155:                if (width >= 0) {
                    156:                        if (print && (dp - dst >= (int)maxsz - len ||
                    157:                            total_width > max_width - width))
                    158:                                print = 0;
                    159:                        if (print) {
1.3       schwarze  160:                                if (grow_dst(&dst, &sz, maxsz,
                    161:                                    &dp, len) == -1) {
                    162:                                        ret = -1;
                    163:                                        break;
                    164:                                }
1.1       schwarze  165:                                total_width += width;
                    166:                                memcpy(dp, sp, len);
                    167:                                dp += len;
                    168:                        }
                    169:                        sp += len;
                    170:                        if (ret >= 0)
                    171:                                ret += len;
                    172:                        continue;
                    173:                }
                    174:
                    175:                /* Escaping required. */
                    176:
                    177:                while (len > 0) {
                    178:                        if (print && (dp - dst >= (int)maxsz - 4 ||
                    179:                            total_width > max_width - 4))
                    180:                                print = 0;
                    181:                        if (print) {
1.3       schwarze  182:                                if (grow_dst(&dst, &sz, maxsz,
                    183:                                    &dp, 4) == -1) {
                    184:                                        ret = -1;
                    185:                                        break;
1.1       schwarze  186:                                }
                    187:                                tp = vis(dp, *sp, VIS_OCTAL | VIS_ALL, 0);
                    188:                                width = tp - dp;
                    189:                                total_width += width;
                    190:                                dp = tp;
                    191:                        } else
                    192:                                width = 4;
                    193:                        len--;
                    194:                        sp++;
                    195:                        if (ret >= 0)
                    196:                                ret += width;
                    197:                }
                    198:                if (len > 0)
                    199:                        break;
                    200:        }
                    201:        free(src);
                    202:        *dp = '\0';
                    203:        *str = dst;
                    204:        if (wp != NULL)
                    205:                *wp = total_width;
                    206:
                    207:        /*
                    208:         * If the string was truncated by the width limit but
                    209:         * would have fit into the size limit, the only sane way
                    210:         * to report the problem is using the return value, such
                    211:         * that the usual idiom "if (ret < 0 || ret >= sz) error"
                    212:         * works as expected.
                    213:         */
                    214:
                    215:        if (ret < (int)maxsz && !print)
                    216:                ret = -1;
                    217:        return ret;
                    218:
                    219: fail:
                    220:        if (wp != NULL)
                    221:                *wp = 0;
1.2       schwarze  222:        if (ret == 0) {
                    223:                *str = src;
                    224:                return 0;
                    225:        } else {
                    226:                *str = NULL;
                    227:                return -1;
                    228:        }
1.1       schwarze  229: }
                    230:
                    231: int
                    232: snmprintf(char *str, size_t sz, int *wp, const char *fmt, ...)
                    233: {
                    234:        va_list  ap;
1.9     ! markus    235:        char    *cp = NULL;
1.1       schwarze  236:        int      ret;
                    237:
                    238:        va_start(ap, fmt);
                    239:        ret = vasnmprintf(&cp, sz, wp, fmt, ap);
                    240:        va_end(ap);
1.2       schwarze  241:        if (cp != NULL) {
                    242:                (void)strlcpy(str, cp, sz);
                    243:                free(cp);
                    244:        } else
                    245:                *str = '\0';
1.1       schwarze  246:        return ret;
                    247: }
                    248:
                    249: /*
                    250:  * To stay close to the standard interfaces, the following functions
                    251:  * return the number of non-NUL bytes written.
                    252:  */
                    253:
                    254: int
                    255: vfmprintf(FILE *stream, const char *fmt, va_list ap)
                    256: {
1.9     ! markus    257:        char    *str = NULL;
1.1       schwarze  258:        int      ret;
                    259:
1.9     ! markus    260:        if ((ret = vasnmprintf(&str, INT_MAX, NULL, fmt, ap)) < 0) {
        !           261:                free(str);
1.1       schwarze  262:                return -1;
1.9     ! markus    263:        }
1.1       schwarze  264:        if (fputs(str, stream) == EOF)
                    265:                ret = -1;
                    266:        free(str);
                    267:        return ret;
                    268: }
                    269:
                    270: int
                    271: fmprintf(FILE *stream, const char *fmt, ...)
                    272: {
                    273:        va_list  ap;
                    274:        int      ret;
                    275:
                    276:        va_start(ap, fmt);
                    277:        ret = vfmprintf(stream, fmt, ap);
                    278:        va_end(ap);
                    279:        return ret;
                    280: }
                    281:
                    282: int
                    283: mprintf(const char *fmt, ...)
                    284: {
                    285:        va_list  ap;
                    286:        int      ret;
                    287:
                    288:        va_start(ap, fmt);
                    289:        ret = vfmprintf(stdout, fmt, ap);
                    290:        va_end(ap);
                    291:        return ret;
                    292: }