Annotation of src/usr.bin/tr/str.c, Revision 1.12
1.12 ! deraadt 1: /* $OpenBSD: str.c,v 1.11 2009/10/27 23:59:46 deraadt Exp $ */
1.1 deraadt 2: /* $NetBSD: str.c,v 1.7 1995/08/31 22:13:47 jtc Exp $ */
3:
4: /*-
5: * Copyright (c) 1991, 1993
6: * The Regents of the University of California. All rights reserved.
7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
1.8 millert 16: * 3. Neither the name of the University nor the names of its contributors
1.1 deraadt 17: * may be used to endorse or promote products derived from this software
18: * without specific prior written permission.
19: *
20: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30: * SUCH DAMAGE.
31: */
32:
33: #include <sys/types.h>
34:
35: #include <errno.h>
36: #include <stddef.h>
37: #include <stdio.h>
38: #include <stdlib.h>
39: #include <string.h>
40: #include <ctype.h>
1.4 mickey 41: #include <err.h>
1.1 deraadt 42:
43: #include "extern.h"
44:
1.7 millert 45: static int backslash(STR *);
46: static int bracket(STR *);
47: static int c_class(const void *, const void *);
48: static void genclass(STR *);
49: static void genequiv(STR *);
50: static int genrange(STR *);
51: static void genseq(STR *);
1.1 deraadt 52:
53: int
54: next(s)
1.6 mpech 55: STR *s;
1.1 deraadt 56: {
1.6 mpech 57: int ch;
1.1 deraadt 58:
59: switch (s->state) {
60: case EOS:
61: return (0);
62: case INFINITE:
63: return (1);
64: case NORMAL:
65: switch (ch = *s->str) {
66: case '\0':
67: s->state = EOS;
68: return (0);
69: case '\\':
70: s->lastch = backslash(s);
71: break;
72: case '[':
73: if (bracket(s))
74: return (next(s));
75: /* FALLTHROUGH */
76: default:
77: ++s->str;
78: s->lastch = ch;
79: break;
80: }
81:
82: /* We can start a range at any time. */
83: if (s->str[0] == '-' && genrange(s))
84: return (next(s));
85: return (1);
86: case RANGE:
87: if (s->cnt-- == 0) {
88: s->state = NORMAL;
89: return (next(s));
90: }
91: ++s->lastch;
92: return (1);
93: case SEQUENCE:
94: if (s->cnt-- == 0) {
95: s->state = NORMAL;
96: return (next(s));
97: }
98: return (1);
99: case SET:
100: if ((s->lastch = s->set[s->cnt++]) == OOBCH) {
101: s->state = NORMAL;
102: return (next(s));
103: }
104: return (1);
1.4 mickey 105: default:
106: return 0;
1.1 deraadt 107: }
108: /* NOTREACHED */
109: }
110:
111: static int
112: bracket(s)
1.6 mpech 113: STR *s;
1.1 deraadt 114: {
1.6 mpech 115: char *p;
1.1 deraadt 116:
117: switch (s->str[1]) {
118: case ':': /* "[:class:]" */
1.9 deraadt 119: if ((p = strstr((char *)s->str + 2, ":]")) == NULL)
1.1 deraadt 120: return (0);
121: *p = '\0';
122: s->str += 2;
123: genclass(s);
1.9 deraadt 124: s->str = (unsigned char *)p + 2;
1.1 deraadt 125: return (1);
126: case '=': /* "[=equiv=]" */
1.9 deraadt 127: if ((p = strstr((char *)s->str + 2, "=]")) == NULL)
1.1 deraadt 128: return (0);
129: s->str += 2;
130: genequiv(s);
131: return (1);
132: default: /* "[\###*n]" or "[#*n]" */
1.9 deraadt 133: if ((p = strpbrk((char *)s->str + 2, "*]")) == NULL)
1.1 deraadt 134: return (0);
1.3 millert 135: if (p[0] != '*' || strchr(p, ']') == NULL)
1.1 deraadt 136: return (0);
137: s->str += 1;
138: genseq(s);
139: return (1);
140: }
141: /* NOTREACHED */
142: }
143:
144: typedef struct {
145: char *name;
1.7 millert 146: int (*func)(int);
1.1 deraadt 147: int *set;
148: } CLASS;
149:
150: static CLASS classes[] = {
151: { "alnum", isalnum, },
152: { "alpha", isalpha, },
153: { "blank", isblank, },
154: { "cntrl", iscntrl, },
155: { "digit", isdigit, },
156: { "graph", isgraph, },
157: { "lower", islower, },
1.5 deraadt 158: { "print", isprint, },
1.1 deraadt 159: { "punct", ispunct, },
160: { "space", isspace, },
161: { "upper", isupper, },
162: { "xdigit", isxdigit, },
163: };
164:
165: static void
166: genclass(s)
167: STR *s;
168: {
1.7 millert 169: int cnt, (*func)(int);
1.1 deraadt 170: CLASS *cp, tmp;
171: int *p;
172:
1.9 deraadt 173: tmp.name = (char *)s->str;
1.1 deraadt 174: if ((cp = (CLASS *)bsearch(&tmp, classes, sizeof(classes) /
175: sizeof(CLASS), sizeof(CLASS), c_class)) == NULL)
1.4 mickey 176: errx(1, "unknown class %s", s->str);
1.1 deraadt 177:
1.10 djm 178: if ((cp->set = p = calloc(NCHARS + 1, sizeof(int))) == NULL)
1.4 mickey 179: errx(1, "no memory for a class");
1.1 deraadt 180: for (cnt = 0, func = cp->func; cnt < NCHARS; ++cnt)
181: if ((func)(cnt))
182: *p++ = cnt;
183: *p = OOBCH;
184:
185: s->cnt = 0;
186: s->state = SET;
187: s->set = cp->set;
188: }
189:
190: static int
191: c_class(a, b)
192: const void *a, *b;
193: {
194: return (strcmp(((CLASS *)a)->name, ((CLASS *)b)->name));
195: }
196:
197: /*
198: * English doesn't have any equivalence classes, so for now
199: * we just syntax check and grab the character.
200: */
201: static void
202: genequiv(s)
203: STR *s;
204: {
205: if (*s->str == '\\') {
206: s->equiv[0] = backslash(s);
207: if (*s->str != '=')
1.4 mickey 208: errx(1, "misplaced equivalence equals sign");
1.1 deraadt 209: } else {
210: s->equiv[0] = s->str[0];
211: if (s->str[1] != '=')
1.4 mickey 212: errx(1, "misplaced equivalence equals sign");
1.1 deraadt 213: }
214: s->str += 2;
215: s->cnt = 0;
216: s->state = SET;
217: s->set = s->equiv;
218: }
219:
220: static int
221: genrange(s)
222: STR *s;
223: {
224: int stopval;
1.9 deraadt 225: unsigned char *savestart;
1.1 deraadt 226:
227: savestart = s->str;
228: stopval = *++s->str == '\\' ? backslash(s) : *s->str++;
229: if (stopval < (u_char)s->lastch) {
230: s->str = savestart;
231: return (0);
232: }
233: s->cnt = stopval - s->lastch + 1;
234: s->state = RANGE;
235: --s->lastch;
236: return (1);
237: }
238:
239: static void
240: genseq(s)
241: STR *s;
242: {
243: char *ep;
244:
245: if (s->which == STRING1)
1.4 mickey 246: errx(1, "sequences only valid in string2");
1.1 deraadt 247:
248: if (*s->str == '\\')
249: s->lastch = backslash(s);
250: else
251: s->lastch = *s->str++;
252: if (*s->str != '*')
1.4 mickey 253: errx(1, "misplaced sequence asterisk");
1.1 deraadt 254:
255: switch (*++s->str) {
256: case '\\':
257: s->cnt = backslash(s);
258: break;
259: case ']':
260: s->cnt = 0;
261: ++s->str;
262: break;
263: default:
264: if (isdigit(*s->str)) {
1.9 deraadt 265: s->cnt = strtol((char *)s->str, &ep, 0);
1.1 deraadt 266: if (*ep == ']') {
1.9 deraadt 267: s->str = (unsigned char *)ep + 1;
1.1 deraadt 268: break;
269: }
270: }
1.4 mickey 271: errx(1, "illegal sequence count");
1.1 deraadt 272: /* NOTREACHED */
273: }
274:
275: s->state = s->cnt ? SEQUENCE : INFINITE;
276: }
277:
278: /*
279: * Translate \??? into a character. Up to 3 octal digits, if no digits either
280: * an escape code or a literal character.
281: */
282: static int
283: backslash(s)
1.6 mpech 284: STR *s;
1.1 deraadt 285: {
1.6 mpech 286: int ch, cnt, val;
1.1 deraadt 287:
288: for (cnt = val = 0;;) {
289: ch = *++s->str;
290: if (!isascii(ch) || !isdigit(ch))
291: break;
292: val = val * 8 + ch - '0';
293: if (++cnt == 3) {
294: ++s->str;
295: break;
296: }
297: }
298: if (cnt)
299: return (val);
300: if (ch != '\0')
301: ++s->str;
302: switch (ch) {
303: case 'a': /* escape characters */
304: return ('\7');
305: case 'b':
306: return ('\b');
307: case 'f':
308: return ('\f');
309: case 'n':
310: return ('\n');
311: case 'r':
312: return ('\r');
313: case 't':
314: return ('\t');
315: case 'v':
316: return ('\13');
317: case '\0': /* \" -> \ */
318: s->state = EOS;
319: return ('\\');
320: default: /* \x" -> x */
321: return (ch);
322: }
323: }