Annotation of src/usr.bin/tr/str.c, Revision 1.7
1.7 ! millert 1: /* $OpenBSD: str.c,v 1.6 2001/11/19 19:02:17 mpech Exp $ */
1.1 deraadt 2: /* $NetBSD: str.c,v 1.7 1995/08/31 22:13:47 jtc Exp $ */
3:
4: /*-
5: * Copyright (c) 1991, 1993
6: * The Regents of the University of California. All rights reserved.
7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: * 3. All advertising materials mentioning features or use of this software
17: * must display the following acknowledgement:
18: * This product includes software developed by the University of
19: * California, Berkeley and its contributors.
20: * 4. Neither the name of the University nor the names of its contributors
21: * may be used to endorse or promote products derived from this software
22: * without specific prior written permission.
23: *
24: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34: * SUCH DAMAGE.
35: */
36:
37: #ifndef lint
38: #if 0
39: static char sccsid[] = "@(#)str.c 8.2 (Berkeley) 4/28/95";
40: #endif
1.7 ! millert 41: static char rcsid[] = "$OpenBSD: str.c,v 1.6 2001/11/19 19:02:17 mpech Exp $";
1.1 deraadt 42: #endif /* not lint */
43:
44: #include <sys/cdefs.h>
45: #include <sys/types.h>
46:
47: #include <errno.h>
48: #include <stddef.h>
49: #include <stdio.h>
50: #include <stdlib.h>
51: #include <string.h>
52: #include <ctype.h>
1.4 mickey 53: #include <err.h>
1.1 deraadt 54:
55: #include "extern.h"
56:
1.7 ! millert 57: static int backslash(STR *);
! 58: static int bracket(STR *);
! 59: static int c_class(const void *, const void *);
! 60: static void genclass(STR *);
! 61: static void genequiv(STR *);
! 62: static int genrange(STR *);
! 63: static void genseq(STR *);
1.1 deraadt 64:
65: int
66: next(s)
1.6 mpech 67: STR *s;
1.1 deraadt 68: {
1.6 mpech 69: int ch;
1.1 deraadt 70:
71: switch (s->state) {
72: case EOS:
73: return (0);
74: case INFINITE:
75: return (1);
76: case NORMAL:
77: switch (ch = *s->str) {
78: case '\0':
79: s->state = EOS;
80: return (0);
81: case '\\':
82: s->lastch = backslash(s);
83: break;
84: case '[':
85: if (bracket(s))
86: return (next(s));
87: /* FALLTHROUGH */
88: default:
89: ++s->str;
90: s->lastch = ch;
91: break;
92: }
93:
94: /* We can start a range at any time. */
95: if (s->str[0] == '-' && genrange(s))
96: return (next(s));
97: return (1);
98: case RANGE:
99: if (s->cnt-- == 0) {
100: s->state = NORMAL;
101: return (next(s));
102: }
103: ++s->lastch;
104: return (1);
105: case SEQUENCE:
106: if (s->cnt-- == 0) {
107: s->state = NORMAL;
108: return (next(s));
109: }
110: return (1);
111: case SET:
112: if ((s->lastch = s->set[s->cnt++]) == OOBCH) {
113: s->state = NORMAL;
114: return (next(s));
115: }
116: return (1);
1.4 mickey 117: default:
118: return 0;
1.1 deraadt 119: }
120: /* NOTREACHED */
121: }
122:
123: static int
124: bracket(s)
1.6 mpech 125: STR *s;
1.1 deraadt 126: {
1.6 mpech 127: char *p;
1.1 deraadt 128:
129: switch (s->str[1]) {
130: case ':': /* "[:class:]" */
131: if ((p = strstr(s->str + 2, ":]")) == NULL)
132: return (0);
133: *p = '\0';
134: s->str += 2;
135: genclass(s);
136: s->str = p + 2;
137: return (1);
138: case '=': /* "[=equiv=]" */
139: if ((p = strstr(s->str + 2, "=]")) == NULL)
140: return (0);
141: s->str += 2;
142: genequiv(s);
143: return (1);
144: default: /* "[\###*n]" or "[#*n]" */
145: if ((p = strpbrk(s->str + 2, "*]")) == NULL)
146: return (0);
1.3 millert 147: if (p[0] != '*' || strchr(p, ']') == NULL)
1.1 deraadt 148: return (0);
149: s->str += 1;
150: genseq(s);
151: return (1);
152: }
153: /* NOTREACHED */
154: }
155:
156: typedef struct {
157: char *name;
1.7 ! millert 158: int (*func)(int);
1.1 deraadt 159: int *set;
160: } CLASS;
161:
162: static CLASS classes[] = {
163: { "alnum", isalnum, },
164: { "alpha", isalpha, },
165: { "blank", isblank, },
166: { "cntrl", iscntrl, },
167: { "digit", isdigit, },
168: { "graph", isgraph, },
169: { "lower", islower, },
1.5 deraadt 170: { "print", isprint, },
1.1 deraadt 171: { "punct", ispunct, },
172: { "space", isspace, },
173: { "upper", isupper, },
174: { "xdigit", isxdigit, },
175: };
176:
177: static void
178: genclass(s)
179: STR *s;
180: {
1.7 ! millert 181: int cnt, (*func)(int);
1.1 deraadt 182: CLASS *cp, tmp;
183: int *p;
184:
185: tmp.name = s->str;
186: if ((cp = (CLASS *)bsearch(&tmp, classes, sizeof(classes) /
187: sizeof(CLASS), sizeof(CLASS), c_class)) == NULL)
1.4 mickey 188: errx(1, "unknown class %s", s->str);
1.1 deraadt 189:
190: if ((cp->set = p = malloc((NCHARS + 1) * sizeof(int))) == NULL)
1.4 mickey 191: errx(1, "no memory for a class");
1.1 deraadt 192: bzero(p, (NCHARS + 1) * sizeof(int));
193: for (cnt = 0, func = cp->func; cnt < NCHARS; ++cnt)
194: if ((func)(cnt))
195: *p++ = cnt;
196: *p = OOBCH;
197:
198: s->cnt = 0;
199: s->state = SET;
200: s->set = cp->set;
201: }
202:
203: static int
204: c_class(a, b)
205: const void *a, *b;
206: {
207: return (strcmp(((CLASS *)a)->name, ((CLASS *)b)->name));
208: }
209:
210: /*
211: * English doesn't have any equivalence classes, so for now
212: * we just syntax check and grab the character.
213: */
214: static void
215: genequiv(s)
216: STR *s;
217: {
218: if (*s->str == '\\') {
219: s->equiv[0] = backslash(s);
220: if (*s->str != '=')
1.4 mickey 221: errx(1, "misplaced equivalence equals sign");
1.1 deraadt 222: } else {
223: s->equiv[0] = s->str[0];
224: if (s->str[1] != '=')
1.4 mickey 225: errx(1, "misplaced equivalence equals sign");
1.1 deraadt 226: }
227: s->str += 2;
228: s->cnt = 0;
229: s->state = SET;
230: s->set = s->equiv;
231: }
232:
233: static int
234: genrange(s)
235: STR *s;
236: {
237: int stopval;
238: char *savestart;
239:
240: savestart = s->str;
241: stopval = *++s->str == '\\' ? backslash(s) : *s->str++;
242: if (stopval < (u_char)s->lastch) {
243: s->str = savestart;
244: return (0);
245: }
246: s->cnt = stopval - s->lastch + 1;
247: s->state = RANGE;
248: --s->lastch;
249: return (1);
250: }
251:
252: static void
253: genseq(s)
254: STR *s;
255: {
256: char *ep;
257:
258: if (s->which == STRING1)
1.4 mickey 259: errx(1, "sequences only valid in string2");
1.1 deraadt 260:
261: if (*s->str == '\\')
262: s->lastch = backslash(s);
263: else
264: s->lastch = *s->str++;
265: if (*s->str != '*')
1.4 mickey 266: errx(1, "misplaced sequence asterisk");
1.1 deraadt 267:
268: switch (*++s->str) {
269: case '\\':
270: s->cnt = backslash(s);
271: break;
272: case ']':
273: s->cnt = 0;
274: ++s->str;
275: break;
276: default:
277: if (isdigit(*s->str)) {
278: s->cnt = strtol(s->str, &ep, 0);
279: if (*ep == ']') {
280: s->str = ep + 1;
281: break;
282: }
283: }
1.4 mickey 284: errx(1, "illegal sequence count");
1.1 deraadt 285: /* NOTREACHED */
286: }
287:
288: s->state = s->cnt ? SEQUENCE : INFINITE;
289: }
290:
291: /*
292: * Translate \??? into a character. Up to 3 octal digits, if no digits either
293: * an escape code or a literal character.
294: */
295: static int
296: backslash(s)
1.6 mpech 297: STR *s;
1.1 deraadt 298: {
1.6 mpech 299: int ch, cnt, val;
1.1 deraadt 300:
301: for (cnt = val = 0;;) {
302: ch = *++s->str;
303: if (!isascii(ch) || !isdigit(ch))
304: break;
305: val = val * 8 + ch - '0';
306: if (++cnt == 3) {
307: ++s->str;
308: break;
309: }
310: }
311: if (cnt)
312: return (val);
313: if (ch != '\0')
314: ++s->str;
315: switch (ch) {
316: case 'a': /* escape characters */
317: return ('\7');
318: case 'b':
319: return ('\b');
320: case 'f':
321: return ('\f');
322: case 'n':
323: return ('\n');
324: case 'r':
325: return ('\r');
326: case 't':
327: return ('\t');
328: case 'v':
329: return ('\13');
330: case '\0': /* \" -> \ */
331: s->state = EOS;
332: return ('\\');
333: default: /* \x" -> x */
334: return (ch);
335: }
336: }