[BACK]Return to utf8.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / ssh

Annotation of src/usr.bin/ssh/utf8.c, Revision 1.1

1.1     ! schwarze    1: /* $OpenBSD$ */
        !             2: /*
        !             3:  * Copyright (c) 2016 Ingo Schwarze <schwarze@openbsd.org>
        !             4:  *
        !             5:  * Permission to use, copy, modify, and distribute this software for any
        !             6:  * purpose with or without fee is hereby granted, provided that the above
        !             7:  * copyright notice and this permission notice appear in all copies.
        !             8:  *
        !             9:  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
        !            10:  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
        !            11:  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
        !            12:  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
        !            13:  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
        !            14:  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
        !            15:  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
        !            16:  */
        !            17:
        !            18: /*
        !            19:  * Utility functions for multibyte-character handling,
        !            20:  * in particular to sanitize untrusted strings for terminal output.
        !            21:  */
        !            22:
        !            23: #include <sys/types.h>
        !            24: #include <langinfo.h>
        !            25: #include <limits.h>
        !            26: #include <stdarg.h>
        !            27: #include <stdio.h>
        !            28: #include <stdlib.h>
        !            29: #include <string.h>
        !            30: #include <vis.h>
        !            31: #include <wchar.h>
        !            32:
        !            33: #include "utf8.h"
        !            34:
        !            35: static int      dangerous_locale(void);
        !            36: static int      vasnmprintf(char **, size_t, int *, const char *, va_list);
        !            37:
        !            38:
        !            39: /*
        !            40:  * For US-ASCII and UTF-8 encodings, we can safely recover from
        !            41:  * encoding errors and from non-printable characters.  For any
        !            42:  * other encodings, err to the side of caution and abort parsing:
        !            43:  * For state-dependent encodings, recovery is impossible.
        !            44:  * For arbitrary encodings, replacement of non-printable
        !            45:  * characters would be non-trivial and too fragile.
        !            46:  */
        !            47:
        !            48: static int
        !            49: dangerous_locale(void) {
        !            50:        char    *loc;
        !            51:
        !            52:        loc = nl_langinfo(CODESET);
        !            53:        return strcmp(loc, "US-ASCII") && strcmp(loc, "UTF-8");
        !            54: }
        !            55:
        !            56: /*
        !            57:  * The following two functions limit the number of bytes written,
        !            58:  * including the terminating '\0', to sz.  Unless wp is NULL,
        !            59:  * they limit the number of display columns occupied to *wp.
        !            60:  * Whichever is reached first terminates the output string.
        !            61:  * To stay close to the standard interfaces, they return the number of
        !            62:  * non-NUL bytes that would have been written if both were unlimited.
        !            63:  * If wp is NULL, newline, carriage return, and tab are allowed;
        !            64:  * otherwise, the actual number of columns occupied by what was
        !            65:  * written is returned in *wp.
        !            66:  */
        !            67:
        !            68: static int
        !            69: vasnmprintf(char **str, size_t maxsz, int *wp, const char *fmt, va_list ap)
        !            70: {
        !            71:        char    *src;   /* Source string returned from vasprintf. */
        !            72:        char    *sp;    /* Pointer into src. */
        !            73:        char    *dst;   /* Destination string to be returned. */
        !            74:        char    *dp;    /* Pointer into dst. */
        !            75:        char    *tp;    /* Temporary pointer for dst. */
        !            76:        size_t   sz;    /* Number of bytes allocated for dst. */
        !            77:        size_t   tsz;   /* Temporary size while extending dst. */
        !            78:        wchar_t  wc;    /* Wide character at sp. */
        !            79:        int      len;   /* Number of bytes in the character at sp. */
        !            80:        int      ret;   /* Number of bytes needed to format src. */
        !            81:        int      width; /* Display width of the character wc. */
        !            82:        int      total_width, max_width, print;
        !            83:
        !            84:        src = dst = NULL;
        !            85:        if (vasprintf(&src, fmt, ap) <= 0)
        !            86:                goto fail;
        !            87:
        !            88:        sz = strlen(src);
        !            89:        if ((dst = malloc(sz)) == NULL)
        !            90:                goto fail;
        !            91:
        !            92:        if (maxsz > INT_MAX)
        !            93:                maxsz = INT_MAX;
        !            94:
        !            95:        sp = src;
        !            96:        dp = dst;
        !            97:        ret = 0;
        !            98:        print = 1;
        !            99:        total_width = 0;
        !           100:        max_width = wp == NULL ? INT_MAX : *wp;
        !           101:        while (*sp != '\0') {
        !           102:                if ((len = mbtowc(&wc, sp, MB_CUR_MAX)) == -1) {
        !           103:                        (void)mbtowc(NULL, NULL, MB_CUR_MAX);
        !           104:                        if (dangerous_locale()) {
        !           105:                                ret = -1;
        !           106:                                break;
        !           107:                        }
        !           108:                        len = 1;
        !           109:                        width = -1;
        !           110:                } else if (wp == NULL &&
        !           111:                    (wc == L'\n' || wc == L'\r' || wc == L'\t')) {
        !           112:                        /*
        !           113:                         * Don't use width uninitialized; the actual
        !           114:                         * value doesn't matter because total_width
        !           115:                         * is only returned for wp != NULL.
        !           116:                         */
        !           117:                        width = 0;
        !           118:                } else if ((width = wcwidth(wc)) == -1 &&
        !           119:                    dangerous_locale()) {
        !           120:                        ret = -1;
        !           121:                        break;
        !           122:                }
        !           123:
        !           124:                /* Valid, printable character. */
        !           125:
        !           126:                if (width >= 0) {
        !           127:                        if (print && (dp - dst >= (int)maxsz - len ||
        !           128:                            total_width > max_width - width))
        !           129:                                print = 0;
        !           130:                        if (print) {
        !           131:                                total_width += width;
        !           132:                                memcpy(dp, sp, len);
        !           133:                                dp += len;
        !           134:                        }
        !           135:                        sp += len;
        !           136:                        if (ret >= 0)
        !           137:                                ret += len;
        !           138:                        continue;
        !           139:                }
        !           140:
        !           141:                /* Escaping required. */
        !           142:
        !           143:                while (len > 0) {
        !           144:                        if (print && (dp - dst >= (int)maxsz - 4 ||
        !           145:                            total_width > max_width - 4))
        !           146:                                print = 0;
        !           147:                        if (print) {
        !           148:                                if (dp + 4 >= dst + sz) {
        !           149:                                        tsz = sz + 128;
        !           150:                                        if (tsz > maxsz)
        !           151:                                                tsz = maxsz;
        !           152:                                        tp = realloc(dst, tsz);
        !           153:                                        if (tp == NULL) {
        !           154:                                                ret = -1;
        !           155:                                                break;
        !           156:                                        }
        !           157:                                        dp = tp + (dp - dst);
        !           158:                                        dst = tp;
        !           159:                                        sz = tsz;
        !           160:                                }
        !           161:                                tp = vis(dp, *sp, VIS_OCTAL | VIS_ALL, 0);
        !           162:                                width = tp - dp;
        !           163:                                total_width += width;
        !           164:                                dp = tp;
        !           165:                        } else
        !           166:                                width = 4;
        !           167:                        len--;
        !           168:                        sp++;
        !           169:                        if (ret >= 0)
        !           170:                                ret += width;
        !           171:                }
        !           172:                if (len > 0)
        !           173:                        break;
        !           174:        }
        !           175:        free(src);
        !           176:        *dp = '\0';
        !           177:        *str = dst;
        !           178:        if (wp != NULL)
        !           179:                *wp = total_width;
        !           180:
        !           181:        /*
        !           182:         * If the string was truncated by the width limit but
        !           183:         * would have fit into the size limit, the only sane way
        !           184:         * to report the problem is using the return value, such
        !           185:         * that the usual idiom "if (ret < 0 || ret >= sz) error"
        !           186:         * works as expected.
        !           187:         */
        !           188:
        !           189:        if (ret < (int)maxsz && !print)
        !           190:                ret = -1;
        !           191:        return ret;
        !           192:
        !           193: fail:
        !           194:        free(src);
        !           195:        free(dst);
        !           196:        *str = NULL;
        !           197:        if (wp != NULL)
        !           198:                *wp = 0;
        !           199:        return -1;
        !           200: }
        !           201:
        !           202: int
        !           203: snmprintf(char *str, size_t sz, int *wp, const char *fmt, ...)
        !           204: {
        !           205:        va_list  ap;
        !           206:        char    *cp;
        !           207:        int      ret;
        !           208:
        !           209:        va_start(ap, fmt);
        !           210:        ret = vasnmprintf(&cp, sz, wp, fmt, ap);
        !           211:        va_end(ap);
        !           212:        (void)strlcpy(str, cp, sz);
        !           213:        free(cp);
        !           214:        return ret;
        !           215: }
        !           216:
        !           217: /*
        !           218:  * To stay close to the standard interfaces, the following functions
        !           219:  * return the number of non-NUL bytes written.
        !           220:  */
        !           221:
        !           222: int
        !           223: vfmprintf(FILE *stream, const char *fmt, va_list ap)
        !           224: {
        !           225:        char    *str;
        !           226:        int      ret;
        !           227:
        !           228:        if ((ret = vasnmprintf(&str, INT_MAX, NULL, fmt, ap)) < 0)
        !           229:                return -1;
        !           230:        if (fputs(str, stream) == EOF)
        !           231:                ret = -1;
        !           232:        free(str);
        !           233:        return ret;
        !           234: }
        !           235:
        !           236: int
        !           237: fmprintf(FILE *stream, const char *fmt, ...)
        !           238: {
        !           239:        va_list  ap;
        !           240:        int      ret;
        !           241:
        !           242:        va_start(ap, fmt);
        !           243:        ret = vfmprintf(stream, fmt, ap);
        !           244:        va_end(ap);
        !           245:        return ret;
        !           246: }
        !           247:
        !           248: int
        !           249: mprintf(const char *fmt, ...)
        !           250: {
        !           251:        va_list  ap;
        !           252:        int      ret;
        !           253:
        !           254:        va_start(ap, fmt);
        !           255:        ret = vfmprintf(stdout, fmt, ap);
        !           256:        va_end(ap);
        !           257:        return ret;
        !           258: }