[BACK]Return to str.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / tr

Annotation of src/usr.bin/tr/str.c, Revision 1.7

1.7     ! millert     1: /*     $OpenBSD: str.c,v 1.6 2001/11/19 19:02:17 mpech Exp $   */
1.1       deraadt     2: /*     $NetBSD: str.c,v 1.7 1995/08/31 22:13:47 jtc Exp $      */
                      3:
                      4: /*-
                      5:  * Copyright (c) 1991, 1993
                      6:  *     The Regents of the University of California.  All rights reserved.
                      7:  *
                      8:  * Redistribution and use in source and binary forms, with or without
                      9:  * modification, are permitted provided that the following conditions
                     10:  * are met:
                     11:  * 1. Redistributions of source code must retain the above copyright
                     12:  *    notice, this list of conditions and the following disclaimer.
                     13:  * 2. Redistributions in binary form must reproduce the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer in the
                     15:  *    documentation and/or other materials provided with the distribution.
                     16:  * 3. All advertising materials mentioning features or use of this software
                     17:  *    must display the following acknowledgement:
                     18:  *     This product includes software developed by the University of
                     19:  *     California, Berkeley and its contributors.
                     20:  * 4. Neither the name of the University nor the names of its contributors
                     21:  *    may be used to endorse or promote products derived from this software
                     22:  *    without specific prior written permission.
                     23:  *
                     24:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     25:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     26:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     27:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     28:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     29:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     30:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     31:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     32:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     33:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     34:  * SUCH DAMAGE.
                     35:  */
                     36:
                     37: #ifndef lint
                     38: #if 0
                     39: static char sccsid[] = "@(#)str.c      8.2 (Berkeley) 4/28/95";
                     40: #endif
1.7     ! millert    41: static char rcsid[] = "$OpenBSD: str.c,v 1.6 2001/11/19 19:02:17 mpech Exp $";
1.1       deraadt    42: #endif /* not lint */
                     43:
                     44: #include <sys/cdefs.h>
                     45: #include <sys/types.h>
                     46:
                     47: #include <errno.h>
                     48: #include <stddef.h>
                     49: #include <stdio.h>
                     50: #include <stdlib.h>
                     51: #include <string.h>
                     52: #include <ctype.h>
1.4       mickey     53: #include <err.h>
1.1       deraadt    54:
                     55: #include "extern.h"
                     56:
1.7     ! millert    57: static int     backslash(STR *);
        !            58: static int     bracket(STR *);
        !            59: static int     c_class(const void *, const void *);
        !            60: static void    genclass(STR *);
        !            61: static void    genequiv(STR *);
        !            62: static int     genrange(STR *);
        !            63: static void    genseq(STR *);
1.1       deraadt    64:
                     65: int
                     66: next(s)
1.6       mpech      67:        STR *s;
1.1       deraadt    68: {
1.6       mpech      69:        int ch;
1.1       deraadt    70:
                     71:        switch (s->state) {
                     72:        case EOS:
                     73:                return (0);
                     74:        case INFINITE:
                     75:                return (1);
                     76:        case NORMAL:
                     77:                switch (ch = *s->str) {
                     78:                case '\0':
                     79:                        s->state = EOS;
                     80:                        return (0);
                     81:                case '\\':
                     82:                        s->lastch = backslash(s);
                     83:                        break;
                     84:                case '[':
                     85:                        if (bracket(s))
                     86:                                return (next(s));
                     87:                        /* FALLTHROUGH */
                     88:                default:
                     89:                        ++s->str;
                     90:                        s->lastch = ch;
                     91:                        break;
                     92:                }
                     93:
                     94:                /* We can start a range at any time. */
                     95:                if (s->str[0] == '-' && genrange(s))
                     96:                        return (next(s));
                     97:                return (1);
                     98:        case RANGE:
                     99:                if (s->cnt-- == 0) {
                    100:                        s->state = NORMAL;
                    101:                        return (next(s));
                    102:                }
                    103:                ++s->lastch;
                    104:                return (1);
                    105:        case SEQUENCE:
                    106:                if (s->cnt-- == 0) {
                    107:                        s->state = NORMAL;
                    108:                        return (next(s));
                    109:                }
                    110:                return (1);
                    111:        case SET:
                    112:                if ((s->lastch = s->set[s->cnt++]) == OOBCH) {
                    113:                        s->state = NORMAL;
                    114:                        return (next(s));
                    115:                }
                    116:                return (1);
1.4       mickey    117:        default:
                    118:                return 0;
1.1       deraadt   119:        }
                    120:        /* NOTREACHED */
                    121: }
                    122:
                    123: static int
                    124: bracket(s)
1.6       mpech     125:        STR *s;
1.1       deraadt   126: {
1.6       mpech     127:        char *p;
1.1       deraadt   128:
                    129:        switch (s->str[1]) {
                    130:        case ':':                               /* "[:class:]" */
                    131:                if ((p = strstr(s->str + 2, ":]")) == NULL)
                    132:                        return (0);
                    133:                *p = '\0';
                    134:                s->str += 2;
                    135:                genclass(s);
                    136:                s->str = p + 2;
                    137:                return (1);
                    138:        case '=':                               /* "[=equiv=]" */
                    139:                if ((p = strstr(s->str + 2, "=]")) == NULL)
                    140:                        return (0);
                    141:                s->str += 2;
                    142:                genequiv(s);
                    143:                return (1);
                    144:        default:                                /* "[\###*n]" or "[#*n]" */
                    145:                if ((p = strpbrk(s->str + 2, "*]")) == NULL)
                    146:                        return (0);
1.3       millert   147:                if (p[0] != '*' || strchr(p, ']') == NULL)
1.1       deraadt   148:                        return (0);
                    149:                s->str += 1;
                    150:                genseq(s);
                    151:                return (1);
                    152:        }
                    153:        /* NOTREACHED */
                    154: }
                    155:
                    156: typedef struct {
                    157:        char *name;
1.7     ! millert   158:        int (*func)(int);
1.1       deraadt   159:        int *set;
                    160: } CLASS;
                    161:
                    162: static CLASS classes[] = {
                    163:        { "alnum",  isalnum,  },
                    164:        { "alpha",  isalpha,  },
                    165:        { "blank",  isblank,  },
                    166:        { "cntrl",  iscntrl,  },
                    167:        { "digit",  isdigit,  },
                    168:        { "graph",  isgraph,  },
                    169:        { "lower",  islower,  },
1.5       deraadt   170:        { "print",  isprint,  },
1.1       deraadt   171:        { "punct",  ispunct,  },
                    172:        { "space",  isspace,  },
                    173:        { "upper",  isupper,  },
                    174:        { "xdigit", isxdigit, },
                    175: };
                    176:
                    177: static void
                    178: genclass(s)
                    179:        STR *s;
                    180: {
1.7     ! millert   181:        int cnt, (*func)(int);
1.1       deraadt   182:        CLASS *cp, tmp;
                    183:        int *p;
                    184:
                    185:        tmp.name = s->str;
                    186:        if ((cp = (CLASS *)bsearch(&tmp, classes, sizeof(classes) /
                    187:            sizeof(CLASS), sizeof(CLASS), c_class)) == NULL)
1.4       mickey    188:                errx(1, "unknown class %s", s->str);
1.1       deraadt   189:
                    190:        if ((cp->set = p = malloc((NCHARS + 1) * sizeof(int))) == NULL)
1.4       mickey    191:                errx(1, "no memory for a class");
1.1       deraadt   192:        bzero(p, (NCHARS + 1) * sizeof(int));
                    193:        for (cnt = 0, func = cp->func; cnt < NCHARS; ++cnt)
                    194:                if ((func)(cnt))
                    195:                        *p++ = cnt;
                    196:        *p = OOBCH;
                    197:
                    198:        s->cnt = 0;
                    199:        s->state = SET;
                    200:        s->set = cp->set;
                    201: }
                    202:
                    203: static int
                    204: c_class(a, b)
                    205:        const void *a, *b;
                    206: {
                    207:        return (strcmp(((CLASS *)a)->name, ((CLASS *)b)->name));
                    208: }
                    209:
                    210: /*
                    211:  * English doesn't have any equivalence classes, so for now
                    212:  * we just syntax check and grab the character.
                    213:  */
                    214: static void
                    215: genequiv(s)
                    216:        STR *s;
                    217: {
                    218:        if (*s->str == '\\') {
                    219:                s->equiv[0] = backslash(s);
                    220:                if (*s->str != '=')
1.4       mickey    221:                        errx(1, "misplaced equivalence equals sign");
1.1       deraadt   222:        } else {
                    223:                s->equiv[0] = s->str[0];
                    224:                if (s->str[1] != '=')
1.4       mickey    225:                        errx(1, "misplaced equivalence equals sign");
1.1       deraadt   226:        }
                    227:        s->str += 2;
                    228:        s->cnt = 0;
                    229:        s->state = SET;
                    230:        s->set = s->equiv;
                    231: }
                    232:
                    233: static int
                    234: genrange(s)
                    235:        STR *s;
                    236: {
                    237:        int stopval;
                    238:        char *savestart;
                    239:
                    240:        savestart = s->str;
                    241:        stopval = *++s->str == '\\' ? backslash(s) : *s->str++;
                    242:        if (stopval < (u_char)s->lastch) {
                    243:                s->str = savestart;
                    244:                return (0);
                    245:        }
                    246:        s->cnt = stopval - s->lastch + 1;
                    247:        s->state = RANGE;
                    248:        --s->lastch;
                    249:        return (1);
                    250: }
                    251:
                    252: static void
                    253: genseq(s)
                    254:        STR *s;
                    255: {
                    256:        char *ep;
                    257:
                    258:        if (s->which == STRING1)
1.4       mickey    259:                errx(1, "sequences only valid in string2");
1.1       deraadt   260:
                    261:        if (*s->str == '\\')
                    262:                s->lastch = backslash(s);
                    263:        else
                    264:                s->lastch = *s->str++;
                    265:        if (*s->str != '*')
1.4       mickey    266:                errx(1, "misplaced sequence asterisk");
1.1       deraadt   267:
                    268:        switch (*++s->str) {
                    269:        case '\\':
                    270:                s->cnt = backslash(s);
                    271:                break;
                    272:        case ']':
                    273:                s->cnt = 0;
                    274:                ++s->str;
                    275:                break;
                    276:        default:
                    277:                if (isdigit(*s->str)) {
                    278:                        s->cnt = strtol(s->str, &ep, 0);
                    279:                        if (*ep == ']') {
                    280:                                s->str = ep + 1;
                    281:                                break;
                    282:                        }
                    283:                }
1.4       mickey    284:                errx(1, "illegal sequence count");
1.1       deraadt   285:                /* NOTREACHED */
                    286:        }
                    287:
                    288:        s->state = s->cnt ? SEQUENCE : INFINITE;
                    289: }
                    290:
                    291: /*
                    292:  * Translate \??? into a character.  Up to 3 octal digits, if no digits either
                    293:  * an escape code or a literal character.
                    294:  */
                    295: static int
                    296: backslash(s)
1.6       mpech     297:        STR *s;
1.1       deraadt   298: {
1.6       mpech     299:        int ch, cnt, val;
1.1       deraadt   300:
                    301:        for (cnt = val = 0;;) {
                    302:                ch = *++s->str;
                    303:                if (!isascii(ch) || !isdigit(ch))
                    304:                        break;
                    305:                val = val * 8 + ch - '0';
                    306:                if (++cnt == 3) {
                    307:                        ++s->str;
                    308:                        break;
                    309:                }
                    310:        }
                    311:        if (cnt)
                    312:                return (val);
                    313:        if (ch != '\0')
                    314:                ++s->str;
                    315:        switch (ch) {
                    316:                case 'a':                       /* escape characters */
                    317:                        return ('\7');
                    318:                case 'b':
                    319:                        return ('\b');
                    320:                case 'f':
                    321:                        return ('\f');
                    322:                case 'n':
                    323:                        return ('\n');
                    324:                case 'r':
                    325:                        return ('\r');
                    326:                case 't':
                    327:                        return ('\t');
                    328:                case 'v':
                    329:                        return ('\13');
                    330:                case '\0':                      /*  \" -> \ */
                    331:                        s->state = EOS;
                    332:                        return ('\\');
                    333:                default:                        /* \x" -> x */
                    334:                        return (ch);
                    335:        }
                    336: }