Annotation of src/usr.bin/tr/str.c, Revision 1.11
1.11 ! deraadt 1: /* $OpenBSD: str.c,v 1.10 2006/04/03 01:31:11 djm Exp $ */
1.1 deraadt 2: /* $NetBSD: str.c,v 1.7 1995/08/31 22:13:47 jtc Exp $ */
3:
4: /*-
5: * Copyright (c) 1991, 1993
6: * The Regents of the University of California. All rights reserved.
7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
1.8 millert 16: * 3. Neither the name of the University nor the names of its contributors
1.1 deraadt 17: * may be used to endorse or promote products derived from this software
18: * without specific prior written permission.
19: *
20: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30: * SUCH DAMAGE.
31: */
32:
33: #include <sys/cdefs.h>
34: #include <sys/types.h>
35:
36: #include <errno.h>
37: #include <stddef.h>
38: #include <stdio.h>
39: #include <stdlib.h>
40: #include <string.h>
41: #include <ctype.h>
1.4 mickey 42: #include <err.h>
1.1 deraadt 43:
44: #include "extern.h"
45:
1.7 millert 46: static int backslash(STR *);
47: static int bracket(STR *);
48: static int c_class(const void *, const void *);
49: static void genclass(STR *);
50: static void genequiv(STR *);
51: static int genrange(STR *);
52: static void genseq(STR *);
1.1 deraadt 53:
54: int
55: next(s)
1.6 mpech 56: STR *s;
1.1 deraadt 57: {
1.6 mpech 58: int ch;
1.1 deraadt 59:
60: switch (s->state) {
61: case EOS:
62: return (0);
63: case INFINITE:
64: return (1);
65: case NORMAL:
66: switch (ch = *s->str) {
67: case '\0':
68: s->state = EOS;
69: return (0);
70: case '\\':
71: s->lastch = backslash(s);
72: break;
73: case '[':
74: if (bracket(s))
75: return (next(s));
76: /* FALLTHROUGH */
77: default:
78: ++s->str;
79: s->lastch = ch;
80: break;
81: }
82:
83: /* We can start a range at any time. */
84: if (s->str[0] == '-' && genrange(s))
85: return (next(s));
86: return (1);
87: case RANGE:
88: if (s->cnt-- == 0) {
89: s->state = NORMAL;
90: return (next(s));
91: }
92: ++s->lastch;
93: return (1);
94: case SEQUENCE:
95: if (s->cnt-- == 0) {
96: s->state = NORMAL;
97: return (next(s));
98: }
99: return (1);
100: case SET:
101: if ((s->lastch = s->set[s->cnt++]) == OOBCH) {
102: s->state = NORMAL;
103: return (next(s));
104: }
105: return (1);
1.4 mickey 106: default:
107: return 0;
1.1 deraadt 108: }
109: /* NOTREACHED */
110: }
111:
112: static int
113: bracket(s)
1.6 mpech 114: STR *s;
1.1 deraadt 115: {
1.6 mpech 116: char *p;
1.1 deraadt 117:
118: switch (s->str[1]) {
119: case ':': /* "[:class:]" */
1.9 deraadt 120: if ((p = strstr((char *)s->str + 2, ":]")) == NULL)
1.1 deraadt 121: return (0);
122: *p = '\0';
123: s->str += 2;
124: genclass(s);
1.9 deraadt 125: s->str = (unsigned char *)p + 2;
1.1 deraadt 126: return (1);
127: case '=': /* "[=equiv=]" */
1.9 deraadt 128: if ((p = strstr((char *)s->str + 2, "=]")) == NULL)
1.1 deraadt 129: return (0);
130: s->str += 2;
131: genequiv(s);
132: return (1);
133: default: /* "[\###*n]" or "[#*n]" */
1.9 deraadt 134: if ((p = strpbrk((char *)s->str + 2, "*]")) == NULL)
1.1 deraadt 135: return (0);
1.3 millert 136: if (p[0] != '*' || strchr(p, ']') == NULL)
1.1 deraadt 137: return (0);
138: s->str += 1;
139: genseq(s);
140: return (1);
141: }
142: /* NOTREACHED */
143: }
144:
145: typedef struct {
146: char *name;
1.7 millert 147: int (*func)(int);
1.1 deraadt 148: int *set;
149: } CLASS;
150:
151: static CLASS classes[] = {
152: { "alnum", isalnum, },
153: { "alpha", isalpha, },
154: { "blank", isblank, },
155: { "cntrl", iscntrl, },
156: { "digit", isdigit, },
157: { "graph", isgraph, },
158: { "lower", islower, },
1.5 deraadt 159: { "print", isprint, },
1.1 deraadt 160: { "punct", ispunct, },
161: { "space", isspace, },
162: { "upper", isupper, },
163: { "xdigit", isxdigit, },
164: };
165:
166: static void
167: genclass(s)
168: STR *s;
169: {
1.7 millert 170: int cnt, (*func)(int);
1.1 deraadt 171: CLASS *cp, tmp;
172: int *p;
173:
1.9 deraadt 174: tmp.name = (char *)s->str;
1.1 deraadt 175: if ((cp = (CLASS *)bsearch(&tmp, classes, sizeof(classes) /
176: sizeof(CLASS), sizeof(CLASS), c_class)) == NULL)
1.4 mickey 177: errx(1, "unknown class %s", s->str);
1.1 deraadt 178:
1.10 djm 179: if ((cp->set = p = calloc(NCHARS + 1, sizeof(int))) == NULL)
1.4 mickey 180: errx(1, "no memory for a class");
1.1 deraadt 181: for (cnt = 0, func = cp->func; cnt < NCHARS; ++cnt)
182: if ((func)(cnt))
183: *p++ = cnt;
184: *p = OOBCH;
185:
186: s->cnt = 0;
187: s->state = SET;
188: s->set = cp->set;
189: }
190:
191: static int
192: c_class(a, b)
193: const void *a, *b;
194: {
195: return (strcmp(((CLASS *)a)->name, ((CLASS *)b)->name));
196: }
197:
198: /*
199: * English doesn't have any equivalence classes, so for now
200: * we just syntax check and grab the character.
201: */
202: static void
203: genequiv(s)
204: STR *s;
205: {
206: if (*s->str == '\\') {
207: s->equiv[0] = backslash(s);
208: if (*s->str != '=')
1.4 mickey 209: errx(1, "misplaced equivalence equals sign");
1.1 deraadt 210: } else {
211: s->equiv[0] = s->str[0];
212: if (s->str[1] != '=')
1.4 mickey 213: errx(1, "misplaced equivalence equals sign");
1.1 deraadt 214: }
215: s->str += 2;
216: s->cnt = 0;
217: s->state = SET;
218: s->set = s->equiv;
219: }
220:
221: static int
222: genrange(s)
223: STR *s;
224: {
225: int stopval;
1.9 deraadt 226: unsigned char *savestart;
1.1 deraadt 227:
228: savestart = s->str;
229: stopval = *++s->str == '\\' ? backslash(s) : *s->str++;
230: if (stopval < (u_char)s->lastch) {
231: s->str = savestart;
232: return (0);
233: }
234: s->cnt = stopval - s->lastch + 1;
235: s->state = RANGE;
236: --s->lastch;
237: return (1);
238: }
239:
240: static void
241: genseq(s)
242: STR *s;
243: {
244: char *ep;
245:
246: if (s->which == STRING1)
1.4 mickey 247: errx(1, "sequences only valid in string2");
1.1 deraadt 248:
249: if (*s->str == '\\')
250: s->lastch = backslash(s);
251: else
252: s->lastch = *s->str++;
253: if (*s->str != '*')
1.4 mickey 254: errx(1, "misplaced sequence asterisk");
1.1 deraadt 255:
256: switch (*++s->str) {
257: case '\\':
258: s->cnt = backslash(s);
259: break;
260: case ']':
261: s->cnt = 0;
262: ++s->str;
263: break;
264: default:
265: if (isdigit(*s->str)) {
1.9 deraadt 266: s->cnt = strtol((char *)s->str, &ep, 0);
1.1 deraadt 267: if (*ep == ']') {
1.9 deraadt 268: s->str = (unsigned char *)ep + 1;
1.1 deraadt 269: break;
270: }
271: }
1.4 mickey 272: errx(1, "illegal sequence count");
1.1 deraadt 273: /* NOTREACHED */
274: }
275:
276: s->state = s->cnt ? SEQUENCE : INFINITE;
277: }
278:
279: /*
280: * Translate \??? into a character. Up to 3 octal digits, if no digits either
281: * an escape code or a literal character.
282: */
283: static int
284: backslash(s)
1.6 mpech 285: STR *s;
1.1 deraadt 286: {
1.6 mpech 287: int ch, cnt, val;
1.1 deraadt 288:
289: for (cnt = val = 0;;) {
290: ch = *++s->str;
291: if (!isascii(ch) || !isdigit(ch))
292: break;
293: val = val * 8 + ch - '0';
294: if (++cnt == 3) {
295: ++s->str;
296: break;
297: }
298: }
299: if (cnt)
300: return (val);
301: if (ch != '\0')
302: ++s->str;
303: switch (ch) {
304: case 'a': /* escape characters */
305: return ('\7');
306: case 'b':
307: return ('\b');
308: case 'f':
309: return ('\f');
310: case 'n':
311: return ('\n');
312: case 'r':
313: return ('\r');
314: case 't':
315: return ('\t');
316: case 'v':
317: return ('\13');
318: case '\0': /* \" -> \ */
319: s->state = EOS;
320: return ('\\');
321: default: /* \x" -> x */
322: return (ch);
323: }
324: }