[BACK]Return to str.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / tr

Annotation of src/usr.bin/tr/str.c, Revision 1.1

1.1     ! deraadt     1: /*     $NetBSD: str.c,v 1.7 1995/08/31 22:13:47 jtc Exp $      */
        !             2:
        !             3: /*-
        !             4:  * Copyright (c) 1991, 1993
        !             5:  *     The Regents of the University of California.  All rights reserved.
        !             6:  *
        !             7:  * Redistribution and use in source and binary forms, with or without
        !             8:  * modification, are permitted provided that the following conditions
        !             9:  * are met:
        !            10:  * 1. Redistributions of source code must retain the above copyright
        !            11:  *    notice, this list of conditions and the following disclaimer.
        !            12:  * 2. Redistributions in binary form must reproduce the above copyright
        !            13:  *    notice, this list of conditions and the following disclaimer in the
        !            14:  *    documentation and/or other materials provided with the distribution.
        !            15:  * 3. All advertising materials mentioning features or use of this software
        !            16:  *    must display the following acknowledgement:
        !            17:  *     This product includes software developed by the University of
        !            18:  *     California, Berkeley and its contributors.
        !            19:  * 4. Neither the name of the University nor the names of its contributors
        !            20:  *    may be used to endorse or promote products derived from this software
        !            21:  *    without specific prior written permission.
        !            22:  *
        !            23:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
        !            24:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
        !            25:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
        !            26:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
        !            27:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
        !            28:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
        !            29:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
        !            30:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
        !            31:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
        !            32:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
        !            33:  * SUCH DAMAGE.
        !            34:  */
        !            35:
        !            36: #ifndef lint
        !            37: #if 0
        !            38: static char sccsid[] = "@(#)str.c      8.2 (Berkeley) 4/28/95";
        !            39: #endif
        !            40: static char rcsid[] = "$NetBSD: str.c,v 1.7 1995/08/31 22:13:47 jtc Exp $";
        !            41: #endif /* not lint */
        !            42:
        !            43: #include <sys/cdefs.h>
        !            44: #include <sys/types.h>
        !            45:
        !            46: #include <errno.h>
        !            47: #include <stddef.h>
        !            48: #include <stdio.h>
        !            49: #include <stdlib.h>
        !            50: #include <string.h>
        !            51: #include <ctype.h>
        !            52:
        !            53: #include "extern.h"
        !            54:
        !            55: static int     backslash __P((STR *));
        !            56: static int     bracket __P((STR *));
        !            57: static int     c_class __P((const void *, const void *));
        !            58: static void    genclass __P((STR *));
        !            59: static void    genequiv __P((STR *));
        !            60: static int     genrange __P((STR *));
        !            61: static void    genseq __P((STR *));
        !            62:
        !            63: int
        !            64: next(s)
        !            65:        register STR *s;
        !            66: {
        !            67:        register int ch;
        !            68:
        !            69:        switch (s->state) {
        !            70:        case EOS:
        !            71:                return (0);
        !            72:        case INFINITE:
        !            73:                return (1);
        !            74:        case NORMAL:
        !            75:                switch (ch = *s->str) {
        !            76:                case '\0':
        !            77:                        s->state = EOS;
        !            78:                        return (0);
        !            79:                case '\\':
        !            80:                        s->lastch = backslash(s);
        !            81:                        break;
        !            82:                case '[':
        !            83:                        if (bracket(s))
        !            84:                                return (next(s));
        !            85:                        /* FALLTHROUGH */
        !            86:                default:
        !            87:                        ++s->str;
        !            88:                        s->lastch = ch;
        !            89:                        break;
        !            90:                }
        !            91:
        !            92:                /* We can start a range at any time. */
        !            93:                if (s->str[0] == '-' && genrange(s))
        !            94:                        return (next(s));
        !            95:                return (1);
        !            96:        case RANGE:
        !            97:                if (s->cnt-- == 0) {
        !            98:                        s->state = NORMAL;
        !            99:                        return (next(s));
        !           100:                }
        !           101:                ++s->lastch;
        !           102:                return (1);
        !           103:        case SEQUENCE:
        !           104:                if (s->cnt-- == 0) {
        !           105:                        s->state = NORMAL;
        !           106:                        return (next(s));
        !           107:                }
        !           108:                return (1);
        !           109:        case SET:
        !           110:                if ((s->lastch = s->set[s->cnt++]) == OOBCH) {
        !           111:                        s->state = NORMAL;
        !           112:                        return (next(s));
        !           113:                }
        !           114:                return (1);
        !           115:        }
        !           116:        /* NOTREACHED */
        !           117: }
        !           118:
        !           119: static int
        !           120: bracket(s)
        !           121:        register STR *s;
        !           122: {
        !           123:        register char *p;
        !           124:
        !           125:        switch (s->str[1]) {
        !           126:        case ':':                               /* "[:class:]" */
        !           127:                if ((p = strstr(s->str + 2, ":]")) == NULL)
        !           128:                        return (0);
        !           129:                *p = '\0';
        !           130:                s->str += 2;
        !           131:                genclass(s);
        !           132:                s->str = p + 2;
        !           133:                return (1);
        !           134:        case '=':                               /* "[=equiv=]" */
        !           135:                if ((p = strstr(s->str + 2, "=]")) == NULL)
        !           136:                        return (0);
        !           137:                s->str += 2;
        !           138:                genequiv(s);
        !           139:                return (1);
        !           140:        default:                                /* "[\###*n]" or "[#*n]" */
        !           141:                if ((p = strpbrk(s->str + 2, "*]")) == NULL)
        !           142:                        return (0);
        !           143:                if (p[0] != '*' || index(p, ']') == NULL)
        !           144:                        return (0);
        !           145:                s->str += 1;
        !           146:                genseq(s);
        !           147:                return (1);
        !           148:        }
        !           149:        /* NOTREACHED */
        !           150: }
        !           151:
        !           152: typedef struct {
        !           153:        char *name;
        !           154:        int (*func) __P((int));
        !           155:        int *set;
        !           156: } CLASS;
        !           157:
        !           158: static CLASS classes[] = {
        !           159:        { "alnum",  isalnum,  },
        !           160:        { "alpha",  isalpha,  },
        !           161:        { "blank",  isblank,  },
        !           162:        { "cntrl",  iscntrl,  },
        !           163:        { "digit",  isdigit,  },
        !           164:        { "graph",  isgraph,  },
        !           165:        { "lower",  islower,  },
        !           166:        { "print",  isupper,  },
        !           167:        { "punct",  ispunct,  },
        !           168:        { "space",  isspace,  },
        !           169:        { "upper",  isupper,  },
        !           170:        { "xdigit", isxdigit, },
        !           171: };
        !           172:
        !           173: static void
        !           174: genclass(s)
        !           175:        STR *s;
        !           176: {
        !           177:        register int cnt, (*func) __P((int));
        !           178:        CLASS *cp, tmp;
        !           179:        int *p;
        !           180:
        !           181:        tmp.name = s->str;
        !           182:        if ((cp = (CLASS *)bsearch(&tmp, classes, sizeof(classes) /
        !           183:            sizeof(CLASS), sizeof(CLASS), c_class)) == NULL)
        !           184:                err("unknown class %s", s->str);
        !           185:
        !           186:        if ((cp->set = p = malloc((NCHARS + 1) * sizeof(int))) == NULL)
        !           187:                err("%s", strerror(errno));
        !           188:        bzero(p, (NCHARS + 1) * sizeof(int));
        !           189:        for (cnt = 0, func = cp->func; cnt < NCHARS; ++cnt)
        !           190:                if ((func)(cnt))
        !           191:                        *p++ = cnt;
        !           192:        *p = OOBCH;
        !           193:
        !           194:        s->cnt = 0;
        !           195:        s->state = SET;
        !           196:        s->set = cp->set;
        !           197: }
        !           198:
        !           199: static int
        !           200: c_class(a, b)
        !           201:        const void *a, *b;
        !           202: {
        !           203:        return (strcmp(((CLASS *)a)->name, ((CLASS *)b)->name));
        !           204: }
        !           205:
        !           206: /*
        !           207:  * English doesn't have any equivalence classes, so for now
        !           208:  * we just syntax check and grab the character.
        !           209:  */
        !           210: static void
        !           211: genequiv(s)
        !           212:        STR *s;
        !           213: {
        !           214:        if (*s->str == '\\') {
        !           215:                s->equiv[0] = backslash(s);
        !           216:                if (*s->str != '=')
        !           217:                        err("misplaced equivalence equals sign");
        !           218:        } else {
        !           219:                s->equiv[0] = s->str[0];
        !           220:                if (s->str[1] != '=')
        !           221:                        err("misplaced equivalence equals sign");
        !           222:        }
        !           223:        s->str += 2;
        !           224:        s->cnt = 0;
        !           225:        s->state = SET;
        !           226:        s->set = s->equiv;
        !           227: }
        !           228:
        !           229: static int
        !           230: genrange(s)
        !           231:        STR *s;
        !           232: {
        !           233:        int stopval;
        !           234:        char *savestart;
        !           235:
        !           236:        savestart = s->str;
        !           237:        stopval = *++s->str == '\\' ? backslash(s) : *s->str++;
        !           238:        if (stopval < (u_char)s->lastch) {
        !           239:                s->str = savestart;
        !           240:                return (0);
        !           241:        }
        !           242:        s->cnt = stopval - s->lastch + 1;
        !           243:        s->state = RANGE;
        !           244:        --s->lastch;
        !           245:        return (1);
        !           246: }
        !           247:
        !           248: static void
        !           249: genseq(s)
        !           250:        STR *s;
        !           251: {
        !           252:        char *ep;
        !           253:
        !           254:        if (s->which == STRING1)
        !           255:                err("sequences only valid in string2");
        !           256:
        !           257:        if (*s->str == '\\')
        !           258:                s->lastch = backslash(s);
        !           259:        else
        !           260:                s->lastch = *s->str++;
        !           261:        if (*s->str != '*')
        !           262:                err("misplaced sequence asterisk");
        !           263:
        !           264:        switch (*++s->str) {
        !           265:        case '\\':
        !           266:                s->cnt = backslash(s);
        !           267:                break;
        !           268:        case ']':
        !           269:                s->cnt = 0;
        !           270:                ++s->str;
        !           271:                break;
        !           272:        default:
        !           273:                if (isdigit(*s->str)) {
        !           274:                        s->cnt = strtol(s->str, &ep, 0);
        !           275:                        if (*ep == ']') {
        !           276:                                s->str = ep + 1;
        !           277:                                break;
        !           278:                        }
        !           279:                }
        !           280:                err("illegal sequence count");
        !           281:                /* NOTREACHED */
        !           282:        }
        !           283:
        !           284:        s->state = s->cnt ? SEQUENCE : INFINITE;
        !           285: }
        !           286:
        !           287: /*
        !           288:  * Translate \??? into a character.  Up to 3 octal digits, if no digits either
        !           289:  * an escape code or a literal character.
        !           290:  */
        !           291: static int
        !           292: backslash(s)
        !           293:        register STR *s;
        !           294: {
        !           295:        register int ch, cnt, val;
        !           296:
        !           297:        for (cnt = val = 0;;) {
        !           298:                ch = *++s->str;
        !           299:                if (!isascii(ch) || !isdigit(ch))
        !           300:                        break;
        !           301:                val = val * 8 + ch - '0';
        !           302:                if (++cnt == 3) {
        !           303:                        ++s->str;
        !           304:                        break;
        !           305:                }
        !           306:        }
        !           307:        if (cnt)
        !           308:                return (val);
        !           309:        if (ch != '\0')
        !           310:                ++s->str;
        !           311:        switch (ch) {
        !           312:                case 'a':                       /* escape characters */
        !           313:                        return ('\7');
        !           314:                case 'b':
        !           315:                        return ('\b');
        !           316:                case 'f':
        !           317:                        return ('\f');
        !           318:                case 'n':
        !           319:                        return ('\n');
        !           320:                case 'r':
        !           321:                        return ('\r');
        !           322:                case 't':
        !           323:                        return ('\t');
        !           324:                case 'v':
        !           325:                        return ('\13');
        !           326:                case '\0':                      /*  \" -> \ */
        !           327:                        s->state = EOS;
        !           328:                        return ('\\');
        !           329:                default:                        /* \x" -> x */
        !           330:                        return (ch);
        !           331:        }
        !           332: }