[BACK]Return to str.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / tr

Annotation of src/usr.bin/tr/str.c, Revision 1.2

1.2     ! deraadt     1: /*     $OpenBSD: str.c,v 1.7 1995/08/31 22:13:47 jtc Exp $     */
1.1       deraadt     2: /*     $NetBSD: str.c,v 1.7 1995/08/31 22:13:47 jtc Exp $      */
                      3:
                      4: /*-
                      5:  * Copyright (c) 1991, 1993
                      6:  *     The Regents of the University of California.  All rights reserved.
                      7:  *
                      8:  * Redistribution and use in source and binary forms, with or without
                      9:  * modification, are permitted provided that the following conditions
                     10:  * are met:
                     11:  * 1. Redistributions of source code must retain the above copyright
                     12:  *    notice, this list of conditions and the following disclaimer.
                     13:  * 2. Redistributions in binary form must reproduce the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer in the
                     15:  *    documentation and/or other materials provided with the distribution.
                     16:  * 3. All advertising materials mentioning features or use of this software
                     17:  *    must display the following acknowledgement:
                     18:  *     This product includes software developed by the University of
                     19:  *     California, Berkeley and its contributors.
                     20:  * 4. Neither the name of the University nor the names of its contributors
                     21:  *    may be used to endorse or promote products derived from this software
                     22:  *    without specific prior written permission.
                     23:  *
                     24:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     25:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     26:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     27:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     28:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     29:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     30:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     31:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     32:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     33:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     34:  * SUCH DAMAGE.
                     35:  */
                     36:
                     37: #ifndef lint
                     38: #if 0
                     39: static char sccsid[] = "@(#)str.c      8.2 (Berkeley) 4/28/95";
                     40: #endif
1.2     ! deraadt    41: static char rcsid[] = "$OpenBSD: str.c,v 1.7 1995/08/31 22:13:47 jtc Exp $";
1.1       deraadt    42: #endif /* not lint */
                     43:
                     44: #include <sys/cdefs.h>
                     45: #include <sys/types.h>
                     46:
                     47: #include <errno.h>
                     48: #include <stddef.h>
                     49: #include <stdio.h>
                     50: #include <stdlib.h>
                     51: #include <string.h>
                     52: #include <ctype.h>
                     53:
                     54: #include "extern.h"
                     55:
                     56: static int     backslash __P((STR *));
                     57: static int     bracket __P((STR *));
                     58: static int     c_class __P((const void *, const void *));
                     59: static void    genclass __P((STR *));
                     60: static void    genequiv __P((STR *));
                     61: static int     genrange __P((STR *));
                     62: static void    genseq __P((STR *));
                     63:
                     64: int
                     65: next(s)
                     66:        register STR *s;
                     67: {
                     68:        register int ch;
                     69:
                     70:        switch (s->state) {
                     71:        case EOS:
                     72:                return (0);
                     73:        case INFINITE:
                     74:                return (1);
                     75:        case NORMAL:
                     76:                switch (ch = *s->str) {
                     77:                case '\0':
                     78:                        s->state = EOS;
                     79:                        return (0);
                     80:                case '\\':
                     81:                        s->lastch = backslash(s);
                     82:                        break;
                     83:                case '[':
                     84:                        if (bracket(s))
                     85:                                return (next(s));
                     86:                        /* FALLTHROUGH */
                     87:                default:
                     88:                        ++s->str;
                     89:                        s->lastch = ch;
                     90:                        break;
                     91:                }
                     92:
                     93:                /* We can start a range at any time. */
                     94:                if (s->str[0] == '-' && genrange(s))
                     95:                        return (next(s));
                     96:                return (1);
                     97:        case RANGE:
                     98:                if (s->cnt-- == 0) {
                     99:                        s->state = NORMAL;
                    100:                        return (next(s));
                    101:                }
                    102:                ++s->lastch;
                    103:                return (1);
                    104:        case SEQUENCE:
                    105:                if (s->cnt-- == 0) {
                    106:                        s->state = NORMAL;
                    107:                        return (next(s));
                    108:                }
                    109:                return (1);
                    110:        case SET:
                    111:                if ((s->lastch = s->set[s->cnt++]) == OOBCH) {
                    112:                        s->state = NORMAL;
                    113:                        return (next(s));
                    114:                }
                    115:                return (1);
                    116:        }
                    117:        /* NOTREACHED */
                    118: }
                    119:
                    120: static int
                    121: bracket(s)
                    122:        register STR *s;
                    123: {
                    124:        register char *p;
                    125:
                    126:        switch (s->str[1]) {
                    127:        case ':':                               /* "[:class:]" */
                    128:                if ((p = strstr(s->str + 2, ":]")) == NULL)
                    129:                        return (0);
                    130:                *p = '\0';
                    131:                s->str += 2;
                    132:                genclass(s);
                    133:                s->str = p + 2;
                    134:                return (1);
                    135:        case '=':                               /* "[=equiv=]" */
                    136:                if ((p = strstr(s->str + 2, "=]")) == NULL)
                    137:                        return (0);
                    138:                s->str += 2;
                    139:                genequiv(s);
                    140:                return (1);
                    141:        default:                                /* "[\###*n]" or "[#*n]" */
                    142:                if ((p = strpbrk(s->str + 2, "*]")) == NULL)
                    143:                        return (0);
                    144:                if (p[0] != '*' || index(p, ']') == NULL)
                    145:                        return (0);
                    146:                s->str += 1;
                    147:                genseq(s);
                    148:                return (1);
                    149:        }
                    150:        /* NOTREACHED */
                    151: }
                    152:
                    153: typedef struct {
                    154:        char *name;
                    155:        int (*func) __P((int));
                    156:        int *set;
                    157: } CLASS;
                    158:
                    159: static CLASS classes[] = {
                    160:        { "alnum",  isalnum,  },
                    161:        { "alpha",  isalpha,  },
                    162:        { "blank",  isblank,  },
                    163:        { "cntrl",  iscntrl,  },
                    164:        { "digit",  isdigit,  },
                    165:        { "graph",  isgraph,  },
                    166:        { "lower",  islower,  },
                    167:        { "print",  isupper,  },
                    168:        { "punct",  ispunct,  },
                    169:        { "space",  isspace,  },
                    170:        { "upper",  isupper,  },
                    171:        { "xdigit", isxdigit, },
                    172: };
                    173:
                    174: static void
                    175: genclass(s)
                    176:        STR *s;
                    177: {
                    178:        register int cnt, (*func) __P((int));
                    179:        CLASS *cp, tmp;
                    180:        int *p;
                    181:
                    182:        tmp.name = s->str;
                    183:        if ((cp = (CLASS *)bsearch(&tmp, classes, sizeof(classes) /
                    184:            sizeof(CLASS), sizeof(CLASS), c_class)) == NULL)
                    185:                err("unknown class %s", s->str);
                    186:
                    187:        if ((cp->set = p = malloc((NCHARS + 1) * sizeof(int))) == NULL)
                    188:                err("%s", strerror(errno));
                    189:        bzero(p, (NCHARS + 1) * sizeof(int));
                    190:        for (cnt = 0, func = cp->func; cnt < NCHARS; ++cnt)
                    191:                if ((func)(cnt))
                    192:                        *p++ = cnt;
                    193:        *p = OOBCH;
                    194:
                    195:        s->cnt = 0;
                    196:        s->state = SET;
                    197:        s->set = cp->set;
                    198: }
                    199:
                    200: static int
                    201: c_class(a, b)
                    202:        const void *a, *b;
                    203: {
                    204:        return (strcmp(((CLASS *)a)->name, ((CLASS *)b)->name));
                    205: }
                    206:
                    207: /*
                    208:  * English doesn't have any equivalence classes, so for now
                    209:  * we just syntax check and grab the character.
                    210:  */
                    211: static void
                    212: genequiv(s)
                    213:        STR *s;
                    214: {
                    215:        if (*s->str == '\\') {
                    216:                s->equiv[0] = backslash(s);
                    217:                if (*s->str != '=')
                    218:                        err("misplaced equivalence equals sign");
                    219:        } else {
                    220:                s->equiv[0] = s->str[0];
                    221:                if (s->str[1] != '=')
                    222:                        err("misplaced equivalence equals sign");
                    223:        }
                    224:        s->str += 2;
                    225:        s->cnt = 0;
                    226:        s->state = SET;
                    227:        s->set = s->equiv;
                    228: }
                    229:
                    230: static int
                    231: genrange(s)
                    232:        STR *s;
                    233: {
                    234:        int stopval;
                    235:        char *savestart;
                    236:
                    237:        savestart = s->str;
                    238:        stopval = *++s->str == '\\' ? backslash(s) : *s->str++;
                    239:        if (stopval < (u_char)s->lastch) {
                    240:                s->str = savestart;
                    241:                return (0);
                    242:        }
                    243:        s->cnt = stopval - s->lastch + 1;
                    244:        s->state = RANGE;
                    245:        --s->lastch;
                    246:        return (1);
                    247: }
                    248:
                    249: static void
                    250: genseq(s)
                    251:        STR *s;
                    252: {
                    253:        char *ep;
                    254:
                    255:        if (s->which == STRING1)
                    256:                err("sequences only valid in string2");
                    257:
                    258:        if (*s->str == '\\')
                    259:                s->lastch = backslash(s);
                    260:        else
                    261:                s->lastch = *s->str++;
                    262:        if (*s->str != '*')
                    263:                err("misplaced sequence asterisk");
                    264:
                    265:        switch (*++s->str) {
                    266:        case '\\':
                    267:                s->cnt = backslash(s);
                    268:                break;
                    269:        case ']':
                    270:                s->cnt = 0;
                    271:                ++s->str;
                    272:                break;
                    273:        default:
                    274:                if (isdigit(*s->str)) {
                    275:                        s->cnt = strtol(s->str, &ep, 0);
                    276:                        if (*ep == ']') {
                    277:                                s->str = ep + 1;
                    278:                                break;
                    279:                        }
                    280:                }
                    281:                err("illegal sequence count");
                    282:                /* NOTREACHED */
                    283:        }
                    284:
                    285:        s->state = s->cnt ? SEQUENCE : INFINITE;
                    286: }
                    287:
                    288: /*
                    289:  * Translate \??? into a character.  Up to 3 octal digits, if no digits either
                    290:  * an escape code or a literal character.
                    291:  */
                    292: static int
                    293: backslash(s)
                    294:        register STR *s;
                    295: {
                    296:        register int ch, cnt, val;
                    297:
                    298:        for (cnt = val = 0;;) {
                    299:                ch = *++s->str;
                    300:                if (!isascii(ch) || !isdigit(ch))
                    301:                        break;
                    302:                val = val * 8 + ch - '0';
                    303:                if (++cnt == 3) {
                    304:                        ++s->str;
                    305:                        break;
                    306:                }
                    307:        }
                    308:        if (cnt)
                    309:                return (val);
                    310:        if (ch != '\0')
                    311:                ++s->str;
                    312:        switch (ch) {
                    313:                case 'a':                       /* escape characters */
                    314:                        return ('\7');
                    315:                case 'b':
                    316:                        return ('\b');
                    317:                case 'f':
                    318:                        return ('\f');
                    319:                case 'n':
                    320:                        return ('\n');
                    321:                case 'r':
                    322:                        return ('\r');
                    323:                case 't':
                    324:                        return ('\t');
                    325:                case 'v':
                    326:                        return ('\13');
                    327:                case '\0':                      /*  \" -> \ */
                    328:                        s->state = EOS;
                    329:                        return ('\\');
                    330:                default:                        /* \x" -> x */
                    331:                        return (ch);
                    332:        }
                    333: }