Annotation of src/usr.bin/tr/str.c, Revision 1.2
1.2 ! deraadt 1: /* $OpenBSD: str.c,v 1.7 1995/08/31 22:13:47 jtc Exp $ */
1.1 deraadt 2: /* $NetBSD: str.c,v 1.7 1995/08/31 22:13:47 jtc Exp $ */
3:
4: /*-
5: * Copyright (c) 1991, 1993
6: * The Regents of the University of California. All rights reserved.
7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: * 3. All advertising materials mentioning features or use of this software
17: * must display the following acknowledgement:
18: * This product includes software developed by the University of
19: * California, Berkeley and its contributors.
20: * 4. Neither the name of the University nor the names of its contributors
21: * may be used to endorse or promote products derived from this software
22: * without specific prior written permission.
23: *
24: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34: * SUCH DAMAGE.
35: */
36:
37: #ifndef lint
38: #if 0
39: static char sccsid[] = "@(#)str.c 8.2 (Berkeley) 4/28/95";
40: #endif
1.2 ! deraadt 41: static char rcsid[] = "$OpenBSD: str.c,v 1.7 1995/08/31 22:13:47 jtc Exp $";
1.1 deraadt 42: #endif /* not lint */
43:
44: #include <sys/cdefs.h>
45: #include <sys/types.h>
46:
47: #include <errno.h>
48: #include <stddef.h>
49: #include <stdio.h>
50: #include <stdlib.h>
51: #include <string.h>
52: #include <ctype.h>
53:
54: #include "extern.h"
55:
56: static int backslash __P((STR *));
57: static int bracket __P((STR *));
58: static int c_class __P((const void *, const void *));
59: static void genclass __P((STR *));
60: static void genequiv __P((STR *));
61: static int genrange __P((STR *));
62: static void genseq __P((STR *));
63:
64: int
65: next(s)
66: register STR *s;
67: {
68: register int ch;
69:
70: switch (s->state) {
71: case EOS:
72: return (0);
73: case INFINITE:
74: return (1);
75: case NORMAL:
76: switch (ch = *s->str) {
77: case '\0':
78: s->state = EOS;
79: return (0);
80: case '\\':
81: s->lastch = backslash(s);
82: break;
83: case '[':
84: if (bracket(s))
85: return (next(s));
86: /* FALLTHROUGH */
87: default:
88: ++s->str;
89: s->lastch = ch;
90: break;
91: }
92:
93: /* We can start a range at any time. */
94: if (s->str[0] == '-' && genrange(s))
95: return (next(s));
96: return (1);
97: case RANGE:
98: if (s->cnt-- == 0) {
99: s->state = NORMAL;
100: return (next(s));
101: }
102: ++s->lastch;
103: return (1);
104: case SEQUENCE:
105: if (s->cnt-- == 0) {
106: s->state = NORMAL;
107: return (next(s));
108: }
109: return (1);
110: case SET:
111: if ((s->lastch = s->set[s->cnt++]) == OOBCH) {
112: s->state = NORMAL;
113: return (next(s));
114: }
115: return (1);
116: }
117: /* NOTREACHED */
118: }
119:
120: static int
121: bracket(s)
122: register STR *s;
123: {
124: register char *p;
125:
126: switch (s->str[1]) {
127: case ':': /* "[:class:]" */
128: if ((p = strstr(s->str + 2, ":]")) == NULL)
129: return (0);
130: *p = '\0';
131: s->str += 2;
132: genclass(s);
133: s->str = p + 2;
134: return (1);
135: case '=': /* "[=equiv=]" */
136: if ((p = strstr(s->str + 2, "=]")) == NULL)
137: return (0);
138: s->str += 2;
139: genequiv(s);
140: return (1);
141: default: /* "[\###*n]" or "[#*n]" */
142: if ((p = strpbrk(s->str + 2, "*]")) == NULL)
143: return (0);
144: if (p[0] != '*' || index(p, ']') == NULL)
145: return (0);
146: s->str += 1;
147: genseq(s);
148: return (1);
149: }
150: /* NOTREACHED */
151: }
152:
153: typedef struct {
154: char *name;
155: int (*func) __P((int));
156: int *set;
157: } CLASS;
158:
159: static CLASS classes[] = {
160: { "alnum", isalnum, },
161: { "alpha", isalpha, },
162: { "blank", isblank, },
163: { "cntrl", iscntrl, },
164: { "digit", isdigit, },
165: { "graph", isgraph, },
166: { "lower", islower, },
167: { "print", isupper, },
168: { "punct", ispunct, },
169: { "space", isspace, },
170: { "upper", isupper, },
171: { "xdigit", isxdigit, },
172: };
173:
174: static void
175: genclass(s)
176: STR *s;
177: {
178: register int cnt, (*func) __P((int));
179: CLASS *cp, tmp;
180: int *p;
181:
182: tmp.name = s->str;
183: if ((cp = (CLASS *)bsearch(&tmp, classes, sizeof(classes) /
184: sizeof(CLASS), sizeof(CLASS), c_class)) == NULL)
185: err("unknown class %s", s->str);
186:
187: if ((cp->set = p = malloc((NCHARS + 1) * sizeof(int))) == NULL)
188: err("%s", strerror(errno));
189: bzero(p, (NCHARS + 1) * sizeof(int));
190: for (cnt = 0, func = cp->func; cnt < NCHARS; ++cnt)
191: if ((func)(cnt))
192: *p++ = cnt;
193: *p = OOBCH;
194:
195: s->cnt = 0;
196: s->state = SET;
197: s->set = cp->set;
198: }
199:
200: static int
201: c_class(a, b)
202: const void *a, *b;
203: {
204: return (strcmp(((CLASS *)a)->name, ((CLASS *)b)->name));
205: }
206:
207: /*
208: * English doesn't have any equivalence classes, so for now
209: * we just syntax check and grab the character.
210: */
211: static void
212: genequiv(s)
213: STR *s;
214: {
215: if (*s->str == '\\') {
216: s->equiv[0] = backslash(s);
217: if (*s->str != '=')
218: err("misplaced equivalence equals sign");
219: } else {
220: s->equiv[0] = s->str[0];
221: if (s->str[1] != '=')
222: err("misplaced equivalence equals sign");
223: }
224: s->str += 2;
225: s->cnt = 0;
226: s->state = SET;
227: s->set = s->equiv;
228: }
229:
230: static int
231: genrange(s)
232: STR *s;
233: {
234: int stopval;
235: char *savestart;
236:
237: savestart = s->str;
238: stopval = *++s->str == '\\' ? backslash(s) : *s->str++;
239: if (stopval < (u_char)s->lastch) {
240: s->str = savestart;
241: return (0);
242: }
243: s->cnt = stopval - s->lastch + 1;
244: s->state = RANGE;
245: --s->lastch;
246: return (1);
247: }
248:
249: static void
250: genseq(s)
251: STR *s;
252: {
253: char *ep;
254:
255: if (s->which == STRING1)
256: err("sequences only valid in string2");
257:
258: if (*s->str == '\\')
259: s->lastch = backslash(s);
260: else
261: s->lastch = *s->str++;
262: if (*s->str != '*')
263: err("misplaced sequence asterisk");
264:
265: switch (*++s->str) {
266: case '\\':
267: s->cnt = backslash(s);
268: break;
269: case ']':
270: s->cnt = 0;
271: ++s->str;
272: break;
273: default:
274: if (isdigit(*s->str)) {
275: s->cnt = strtol(s->str, &ep, 0);
276: if (*ep == ']') {
277: s->str = ep + 1;
278: break;
279: }
280: }
281: err("illegal sequence count");
282: /* NOTREACHED */
283: }
284:
285: s->state = s->cnt ? SEQUENCE : INFINITE;
286: }
287:
288: /*
289: * Translate \??? into a character. Up to 3 octal digits, if no digits either
290: * an escape code or a literal character.
291: */
292: static int
293: backslash(s)
294: register STR *s;
295: {
296: register int ch, cnt, val;
297:
298: for (cnt = val = 0;;) {
299: ch = *++s->str;
300: if (!isascii(ch) || !isdigit(ch))
301: break;
302: val = val * 8 + ch - '0';
303: if (++cnt == 3) {
304: ++s->str;
305: break;
306: }
307: }
308: if (cnt)
309: return (val);
310: if (ch != '\0')
311: ++s->str;
312: switch (ch) {
313: case 'a': /* escape characters */
314: return ('\7');
315: case 'b':
316: return ('\b');
317: case 'f':
318: return ('\f');
319: case 'n':
320: return ('\n');
321: case 'r':
322: return ('\r');
323: case 't':
324: return ('\t');
325: case 'v':
326: return ('\13');
327: case '\0': /* \" -> \ */
328: s->state = EOS;
329: return ('\\');
330: default: /* \x" -> x */
331: return (ch);
332: }
333: }