Annotation of src/usr.bin/tr/str.c, Revision 1.1
1.1 ! deraadt 1: /* $NetBSD: str.c,v 1.7 1995/08/31 22:13:47 jtc Exp $ */
! 2:
! 3: /*-
! 4: * Copyright (c) 1991, 1993
! 5: * The Regents of the University of California. All rights reserved.
! 6: *
! 7: * Redistribution and use in source and binary forms, with or without
! 8: * modification, are permitted provided that the following conditions
! 9: * are met:
! 10: * 1. Redistributions of source code must retain the above copyright
! 11: * notice, this list of conditions and the following disclaimer.
! 12: * 2. Redistributions in binary form must reproduce the above copyright
! 13: * notice, this list of conditions and the following disclaimer in the
! 14: * documentation and/or other materials provided with the distribution.
! 15: * 3. All advertising materials mentioning features or use of this software
! 16: * must display the following acknowledgement:
! 17: * This product includes software developed by the University of
! 18: * California, Berkeley and its contributors.
! 19: * 4. Neither the name of the University nor the names of its contributors
! 20: * may be used to endorse or promote products derived from this software
! 21: * without specific prior written permission.
! 22: *
! 23: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
! 24: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
! 25: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
! 26: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
! 27: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
! 28: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
! 29: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
! 30: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
! 31: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
! 32: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
! 33: * SUCH DAMAGE.
! 34: */
! 35:
! 36: #ifndef lint
! 37: #if 0
! 38: static char sccsid[] = "@(#)str.c 8.2 (Berkeley) 4/28/95";
! 39: #endif
! 40: static char rcsid[] = "$NetBSD: str.c,v 1.7 1995/08/31 22:13:47 jtc Exp $";
! 41: #endif /* not lint */
! 42:
! 43: #include <sys/cdefs.h>
! 44: #include <sys/types.h>
! 45:
! 46: #include <errno.h>
! 47: #include <stddef.h>
! 48: #include <stdio.h>
! 49: #include <stdlib.h>
! 50: #include <string.h>
! 51: #include <ctype.h>
! 52:
! 53: #include "extern.h"
! 54:
! 55: static int backslash __P((STR *));
! 56: static int bracket __P((STR *));
! 57: static int c_class __P((const void *, const void *));
! 58: static void genclass __P((STR *));
! 59: static void genequiv __P((STR *));
! 60: static int genrange __P((STR *));
! 61: static void genseq __P((STR *));
! 62:
! 63: int
! 64: next(s)
! 65: register STR *s;
! 66: {
! 67: register int ch;
! 68:
! 69: switch (s->state) {
! 70: case EOS:
! 71: return (0);
! 72: case INFINITE:
! 73: return (1);
! 74: case NORMAL:
! 75: switch (ch = *s->str) {
! 76: case '\0':
! 77: s->state = EOS;
! 78: return (0);
! 79: case '\\':
! 80: s->lastch = backslash(s);
! 81: break;
! 82: case '[':
! 83: if (bracket(s))
! 84: return (next(s));
! 85: /* FALLTHROUGH */
! 86: default:
! 87: ++s->str;
! 88: s->lastch = ch;
! 89: break;
! 90: }
! 91:
! 92: /* We can start a range at any time. */
! 93: if (s->str[0] == '-' && genrange(s))
! 94: return (next(s));
! 95: return (1);
! 96: case RANGE:
! 97: if (s->cnt-- == 0) {
! 98: s->state = NORMAL;
! 99: return (next(s));
! 100: }
! 101: ++s->lastch;
! 102: return (1);
! 103: case SEQUENCE:
! 104: if (s->cnt-- == 0) {
! 105: s->state = NORMAL;
! 106: return (next(s));
! 107: }
! 108: return (1);
! 109: case SET:
! 110: if ((s->lastch = s->set[s->cnt++]) == OOBCH) {
! 111: s->state = NORMAL;
! 112: return (next(s));
! 113: }
! 114: return (1);
! 115: }
! 116: /* NOTREACHED */
! 117: }
! 118:
! 119: static int
! 120: bracket(s)
! 121: register STR *s;
! 122: {
! 123: register char *p;
! 124:
! 125: switch (s->str[1]) {
! 126: case ':': /* "[:class:]" */
! 127: if ((p = strstr(s->str + 2, ":]")) == NULL)
! 128: return (0);
! 129: *p = '\0';
! 130: s->str += 2;
! 131: genclass(s);
! 132: s->str = p + 2;
! 133: return (1);
! 134: case '=': /* "[=equiv=]" */
! 135: if ((p = strstr(s->str + 2, "=]")) == NULL)
! 136: return (0);
! 137: s->str += 2;
! 138: genequiv(s);
! 139: return (1);
! 140: default: /* "[\###*n]" or "[#*n]" */
! 141: if ((p = strpbrk(s->str + 2, "*]")) == NULL)
! 142: return (0);
! 143: if (p[0] != '*' || index(p, ']') == NULL)
! 144: return (0);
! 145: s->str += 1;
! 146: genseq(s);
! 147: return (1);
! 148: }
! 149: /* NOTREACHED */
! 150: }
! 151:
! 152: typedef struct {
! 153: char *name;
! 154: int (*func) __P((int));
! 155: int *set;
! 156: } CLASS;
! 157:
! 158: static CLASS classes[] = {
! 159: { "alnum", isalnum, },
! 160: { "alpha", isalpha, },
! 161: { "blank", isblank, },
! 162: { "cntrl", iscntrl, },
! 163: { "digit", isdigit, },
! 164: { "graph", isgraph, },
! 165: { "lower", islower, },
! 166: { "print", isupper, },
! 167: { "punct", ispunct, },
! 168: { "space", isspace, },
! 169: { "upper", isupper, },
! 170: { "xdigit", isxdigit, },
! 171: };
! 172:
! 173: static void
! 174: genclass(s)
! 175: STR *s;
! 176: {
! 177: register int cnt, (*func) __P((int));
! 178: CLASS *cp, tmp;
! 179: int *p;
! 180:
! 181: tmp.name = s->str;
! 182: if ((cp = (CLASS *)bsearch(&tmp, classes, sizeof(classes) /
! 183: sizeof(CLASS), sizeof(CLASS), c_class)) == NULL)
! 184: err("unknown class %s", s->str);
! 185:
! 186: if ((cp->set = p = malloc((NCHARS + 1) * sizeof(int))) == NULL)
! 187: err("%s", strerror(errno));
! 188: bzero(p, (NCHARS + 1) * sizeof(int));
! 189: for (cnt = 0, func = cp->func; cnt < NCHARS; ++cnt)
! 190: if ((func)(cnt))
! 191: *p++ = cnt;
! 192: *p = OOBCH;
! 193:
! 194: s->cnt = 0;
! 195: s->state = SET;
! 196: s->set = cp->set;
! 197: }
! 198:
! 199: static int
! 200: c_class(a, b)
! 201: const void *a, *b;
! 202: {
! 203: return (strcmp(((CLASS *)a)->name, ((CLASS *)b)->name));
! 204: }
! 205:
! 206: /*
! 207: * English doesn't have any equivalence classes, so for now
! 208: * we just syntax check and grab the character.
! 209: */
! 210: static void
! 211: genequiv(s)
! 212: STR *s;
! 213: {
! 214: if (*s->str == '\\') {
! 215: s->equiv[0] = backslash(s);
! 216: if (*s->str != '=')
! 217: err("misplaced equivalence equals sign");
! 218: } else {
! 219: s->equiv[0] = s->str[0];
! 220: if (s->str[1] != '=')
! 221: err("misplaced equivalence equals sign");
! 222: }
! 223: s->str += 2;
! 224: s->cnt = 0;
! 225: s->state = SET;
! 226: s->set = s->equiv;
! 227: }
! 228:
! 229: static int
! 230: genrange(s)
! 231: STR *s;
! 232: {
! 233: int stopval;
! 234: char *savestart;
! 235:
! 236: savestart = s->str;
! 237: stopval = *++s->str == '\\' ? backslash(s) : *s->str++;
! 238: if (stopval < (u_char)s->lastch) {
! 239: s->str = savestart;
! 240: return (0);
! 241: }
! 242: s->cnt = stopval - s->lastch + 1;
! 243: s->state = RANGE;
! 244: --s->lastch;
! 245: return (1);
! 246: }
! 247:
! 248: static void
! 249: genseq(s)
! 250: STR *s;
! 251: {
! 252: char *ep;
! 253:
! 254: if (s->which == STRING1)
! 255: err("sequences only valid in string2");
! 256:
! 257: if (*s->str == '\\')
! 258: s->lastch = backslash(s);
! 259: else
! 260: s->lastch = *s->str++;
! 261: if (*s->str != '*')
! 262: err("misplaced sequence asterisk");
! 263:
! 264: switch (*++s->str) {
! 265: case '\\':
! 266: s->cnt = backslash(s);
! 267: break;
! 268: case ']':
! 269: s->cnt = 0;
! 270: ++s->str;
! 271: break;
! 272: default:
! 273: if (isdigit(*s->str)) {
! 274: s->cnt = strtol(s->str, &ep, 0);
! 275: if (*ep == ']') {
! 276: s->str = ep + 1;
! 277: break;
! 278: }
! 279: }
! 280: err("illegal sequence count");
! 281: /* NOTREACHED */
! 282: }
! 283:
! 284: s->state = s->cnt ? SEQUENCE : INFINITE;
! 285: }
! 286:
! 287: /*
! 288: * Translate \??? into a character. Up to 3 octal digits, if no digits either
! 289: * an escape code or a literal character.
! 290: */
! 291: static int
! 292: backslash(s)
! 293: register STR *s;
! 294: {
! 295: register int ch, cnt, val;
! 296:
! 297: for (cnt = val = 0;;) {
! 298: ch = *++s->str;
! 299: if (!isascii(ch) || !isdigit(ch))
! 300: break;
! 301: val = val * 8 + ch - '0';
! 302: if (++cnt == 3) {
! 303: ++s->str;
! 304: break;
! 305: }
! 306: }
! 307: if (cnt)
! 308: return (val);
! 309: if (ch != '\0')
! 310: ++s->str;
! 311: switch (ch) {
! 312: case 'a': /* escape characters */
! 313: return ('\7');
! 314: case 'b':
! 315: return ('\b');
! 316: case 'f':
! 317: return ('\f');
! 318: case 'n':
! 319: return ('\n');
! 320: case 'r':
! 321: return ('\r');
! 322: case 't':
! 323: return ('\t');
! 324: case 'v':
! 325: return ('\13');
! 326: case '\0': /* \" -> \ */
! 327: s->state = EOS;
! 328: return ('\\');
! 329: default: /* \x" -> x */
! 330: return (ch);
! 331: }
! 332: }