Annotation of src/usr.bin/awk/b.c, Revision 1.9
1.9 ! deraadt 1: /* $OpenBSD: b.c,v 1.8 1999/12/08 23:09:45 millert Exp $ */
1.1 tholo 2: /****************************************************************
1.5 kstailey 3: Copyright (C) Lucent Technologies 1997
1.1 tholo 4: All Rights Reserved
5:
6: Permission to use, copy, modify, and distribute this software and
7: its documentation for any purpose and without fee is hereby
8: granted, provided that the above copyright notice appear in all
9: copies and that both that the copyright notice and this
10: permission notice and warranty disclaimer appear in supporting
1.5 kstailey 11: documentation, and that the name Lucent Technologies or any of
12: its entities not be used in advertising or publicity pertaining
13: to distribution of the software without specific, written prior
14: permission.
15:
16: LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17: INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18: IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19: SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20: WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21: IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22: ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23: THIS SOFTWARE.
1.1 tholo 24: ****************************************************************/
25:
26: /* lasciate ogne speranza, voi ch'entrate. */
27:
28: #define DEBUG
29:
30: #include <ctype.h>
31: #include <stdio.h>
32: #include <string.h>
33: #include <stdlib.h>
34: #include "awk.h"
1.5 kstailey 35: #include "ytab.h"
1.1 tholo 36:
37: #define HAT (NCHARS-1) /* matches ^ in regular expr */
38: /* NCHARS is 2**n */
39: #define MAXLIN 22
40:
1.7 millert 41: #define type(v) (v)->nobj /* badly overloaded here */
42: #define info(v) (v)->ntype /* badly overloaded here */
1.1 tholo 43: #define left(v) (v)->narg[0]
44: #define right(v) (v)->narg[1]
45: #define parent(v) (v)->nnext
46:
47: #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
48: #define UNARY case STAR: case PLUS: case QUEST:
49:
50: /* encoding in tree Nodes:
51: leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
52: left is index, right contains value or pointer to value
53: unary (STAR, PLUS, QUEST): left is child, right is null
54: binary (CAT, OR): left and right are children
55: parent contains pointer to parent
56: */
57:
58:
59: int *setvec;
60: int *tmpset;
61: int maxsetvec = 0;
62:
63: int rtok; /* next token in current re */
1.4 millert 64: int rlxval;
1.1 tholo 65: char *rlxstr;
66: char *prestr; /* current position in current re */
67: char *lastre; /* origin of last re */
68:
1.4 millert 69: static int setcnt;
70: static int poscnt;
1.1 tholo 71:
72: char *patbeg;
73: int patlen;
74:
75: #define NFA 20 /* cache this many dynamic fa's */
76: fa *fatab[NFA];
77: int nfatab = 0; /* entries in fatab */
78:
79: fa *makedfa(char *s, int anchor) /* returns dfa for reg expr s */
80: {
81: int i, use, nuse;
82: fa *pfa;
1.6 millert 83: static int now = 1;
1.1 tholo 84:
85: if (setvec == 0) { /* first time through any RE */
86: maxsetvec = MAXLIN;
87: setvec = (int *) malloc(maxsetvec * sizeof(int));
88: tmpset = (int *) malloc(maxsetvec * sizeof(int));
89: if (setvec == 0 || tmpset == 0)
90: overflo("out of space initializing makedfa");
91: }
92:
93: if (compile_time) /* a constant for sure */
94: return mkdfa(s, anchor);
95: for (i = 0; i < nfatab; i++) /* is it there already? */
96: if (fatab[i]->anchor == anchor
97: && strcmp(fatab[i]->restr, s) == 0) {
1.6 millert 98: fatab[i]->use = now++;
1.1 tholo 99: return fatab[i];
100: }
101: pfa = mkdfa(s, anchor);
102: if (nfatab < NFA) { /* room for another */
103: fatab[nfatab] = pfa;
1.6 millert 104: fatab[nfatab]->use = now++;
1.1 tholo 105: nfatab++;
106: return pfa;
107: }
108: use = fatab[0]->use; /* replace least-recently used */
109: nuse = 0;
110: for (i = 1; i < nfatab; i++)
111: if (fatab[i]->use < use) {
112: use = fatab[i]->use;
113: nuse = i;
114: }
115: freefa(fatab[nuse]);
116: fatab[nuse] = pfa;
1.6 millert 117: pfa->use = now++;
1.1 tholo 118: return pfa;
119: }
120:
121: fa *mkdfa(char *s, int anchor) /* does the real work of making a dfa */
122: /* anchor = 1 for anchored matches, else 0 */
123: {
124: Node *p, *p1;
125: fa *f;
126:
127: p = reparse(s);
128: p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
129: /* put ALL STAR in front of reg. exp. */
130: p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
131: /* put FINAL after reg. exp. */
132:
133: poscnt = 0;
134: penter(p1); /* enter parent pointers and leaf indices */
135: if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
136: overflo("out of space for fa");
137: f->accept = poscnt-1; /* penter has computed number of positions in re */
138: cfoll(f, p1); /* set up follow sets */
139: freetr(p1);
1.4 millert 140: if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
1.1 tholo 141: overflo("out of space in makedfa");
1.4 millert 142: if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
1.1 tholo 143: overflo("out of space in makedfa");
144: *f->posns[1] = 0;
145: f->initstat = makeinit(f, anchor);
146: f->anchor = anchor;
147: f->restr = tostring(s);
148: return f;
149: }
150:
151: int makeinit(fa *f, int anchor)
152: {
153: int i, k;
154:
155: f->curstat = 2;
156: f->out[2] = 0;
157: f->reset = 0;
158: k = *(f->re[0].lfollow);
159: xfree(f->posns[2]);
1.4 millert 160: if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
1.1 tholo 161: overflo("out of space in makeinit");
162: for (i=0; i <= k; i++) {
163: (f->posns[2])[i] = (f->re[0].lfollow)[i];
164: }
165: if ((f->posns[2])[1] == f->accept)
166: f->out[2] = 1;
167: for (i=0; i < NCHARS; i++)
168: f->gototab[2][i] = 0;
169: f->curstat = cgoto(f, 2, HAT);
170: if (anchor) {
171: *f->posns[2] = k-1; /* leave out position 0 */
172: for (i=0; i < k; i++) {
173: (f->posns[0])[i] = (f->posns[2])[i];
174: }
175:
176: f->out[0] = f->out[2];
177: if (f->curstat != 2)
178: --(*f->posns[f->curstat]);
179: }
180: return f->curstat;
181: }
182:
183: void penter(Node *p) /* set up parent pointers and leaf indices */
184: {
185: switch (type(p)) {
186: LEAF
1.7 millert 187: info(p) = poscnt;
1.1 tholo 188: poscnt++;
189: break;
190: UNARY
191: penter(left(p));
192: parent(left(p)) = p;
193: break;
194: case CAT:
195: case OR:
196: penter(left(p));
197: penter(right(p));
198: parent(left(p)) = p;
199: parent(right(p)) = p;
200: break;
201: default: /* can't happen */
1.8 millert 202: FATAL("can't happen: unknown type %d in penter", type(p));
1.1 tholo 203: break;
204: }
205: }
206:
207: void freetr(Node *p) /* free parse tree */
208: {
209: switch (type(p)) {
210: LEAF
211: xfree(p);
212: break;
213: UNARY
214: freetr(left(p));
215: xfree(p);
216: break;
217: case CAT:
218: case OR:
219: freetr(left(p));
220: freetr(right(p));
221: xfree(p);
222: break;
223: default: /* can't happen */
1.8 millert 224: FATAL("can't happen: unknown type %d in freetr", type(p));
1.1 tholo 225: break;
226: }
227: }
228:
229: /* in the parsing of regular expressions, metacharacters like . have */
230: /* to be seen literally; \056 is not a metacharacter. */
231:
232: int hexstr(char **pp) /* find and eval hex string at pp, return new p */
1.2 millert 233: { /* only pick up one 8-bit byte (2 chars) */
1.1 tholo 234: char *p;
235: int n = 0;
1.2 millert 236: int i;
1.1 tholo 237:
1.2 millert 238: for (i = 0, p = *pp; i < 2 && isxdigit(*p); i++, p++) {
1.1 tholo 239: if (isdigit(*p))
240: n = 16 * n + *p - '0';
241: else if (*p >= 'a' && *p <= 'f')
242: n = 16 * n + *p - 'a' + 10;
243: else if (*p >= 'A' && *p <= 'F')
244: n = 16 * n + *p - 'A' + 10;
245: }
246: *pp = p;
247: return n;
248: }
249:
1.2 millert 250: #define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */
1.1 tholo 251:
252: int quoted(char **pp) /* pick up next thing after a \\ */
253: /* and increment *pp */
254: {
255: char *p = *pp;
256: int c;
257:
258: if ((c = *p++) == 't')
259: c = '\t';
260: else if (c == 'n')
261: c = '\n';
262: else if (c == 'f')
263: c = '\f';
264: else if (c == 'r')
265: c = '\r';
266: else if (c == 'b')
267: c = '\b';
268: else if (c == '\\')
269: c = '\\';
270: else if (c == 'x') { /* hexadecimal goo follows */
271: c = hexstr(&p); /* this adds a null if number is invalid */
272: } else if (isoctdigit(c)) { /* \d \dd \ddd */
273: int n = c - '0';
274: if (isoctdigit(*p)) {
275: n = 8 * n + *p++ - '0';
276: if (isoctdigit(*p))
277: n = 8 * n + *p++ - '0';
278: }
279: c = n;
280: } /* else */
281: /* c = c; */
282: *pp = p;
283: return c;
284: }
285:
286: char *cclenter(char *p) /* add a character class */
287: {
288: int i, c, c2;
1.5 kstailey 289: char *op, *bp;
290: static char *buf = 0;
291: static int bufsz = 100;
1.1 tholo 292:
293: op = p;
1.5 kstailey 294: if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
1.8 millert 295: FATAL("out of space for character class [%.10s...] 1", p);
1.5 kstailey 296: bp = buf;
297: for (i = 0; (c = *p++) != 0; ) {
1.1 tholo 298: if (c == '\\') {
299: c = quoted(&p);
1.5 kstailey 300: } else if (c == '-' && i > 0 && bp[-1] != 0) {
1.1 tholo 301: if (*p != 0) {
1.5 kstailey 302: c = bp[-1];
1.1 tholo 303: c2 = *p++;
304: if (c2 == '\\')
305: c2 = quoted(&p);
306: if (c > c2) { /* empty; ignore */
1.5 kstailey 307: bp--;
1.1 tholo 308: i--;
309: continue;
310: }
311: while (c < c2) {
1.5 kstailey 312: if (!adjbuf(&buf, &bufsz, bp-buf+2, 100, &bp, 0))
1.8 millert 313: FATAL("out of space for character class [%.10s...] 2", p);
1.5 kstailey 314: *bp++ = ++c;
1.1 tholo 315: i++;
316: }
317: continue;
318: }
319: }
1.5 kstailey 320: if (!adjbuf(&buf, &bufsz, bp-buf+2, 100, &bp, 0))
1.8 millert 321: FATAL("out of space for character class [%.10s...] 3", p);
1.5 kstailey 322: *bp++ = c;
1.1 tholo 323: i++;
324: }
1.5 kstailey 325: *bp = 0;
326: dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
1.1 tholo 327: xfree(op);
1.5 kstailey 328: return(tostring(buf));
1.1 tholo 329: }
330:
331: void overflo(char *s)
332: {
1.8 millert 333: FATAL("regular expression too big: %.30s...", s);
1.1 tholo 334: }
335:
336: void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */
337: {
338: int i;
1.4 millert 339: int *p;
1.1 tholo 340:
341: switch (type(v)) {
342: LEAF
1.7 millert 343: f->re[info(v)].ltype = type(v);
344: f->re[info(v)].lval.np = right(v);
1.1 tholo 345: while (f->accept >= maxsetvec) { /* guessing here! */
346: maxsetvec *= 4;
347: setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
348: tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
1.5 kstailey 349: if (setvec == 0 || tmpset == 0)
1.1 tholo 350: overflo("out of space in cfoll()");
351: }
352: for (i = 0; i <= f->accept; i++)
353: setvec[i] = 0;
354: setcnt = 0;
355: follow(v); /* computes setvec and setcnt */
1.4 millert 356: if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
1.1 tholo 357: overflo("out of space building follow set");
1.7 millert 358: f->re[info(v)].lfollow = p;
1.1 tholo 359: *p = setcnt;
360: for (i = f->accept; i >= 0; i--)
361: if (setvec[i] == 1)
362: *++p = i;
363: break;
364: UNARY
365: cfoll(f,left(v));
366: break;
367: case CAT:
368: case OR:
369: cfoll(f,left(v));
370: cfoll(f,right(v));
371: break;
372: default: /* can't happen */
1.8 millert 373: FATAL("can't happen: unknown type %d in cfoll", type(v));
1.1 tholo 374: }
375: }
376:
377: int first(Node *p) /* collects initially active leaves of p into setvec */
378: /* returns 1 if p matches empty string */
379: {
1.4 millert 380: int b, lp;
1.1 tholo 381:
382: switch (type(p)) {
383: LEAF
1.7 millert 384: lp = info(p); /* look for high-water mark of subscripts */
1.1 tholo 385: while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */
386: maxsetvec *= 4;
387: setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
388: tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
1.7 millert 389: if (setvec == 0 || tmpset == 0)
1.1 tholo 390: overflo("out of space in first()");
391: }
392: if (setvec[lp] != 1) {
393: setvec[lp] = 1;
394: setcnt++;
395: }
396: if (type(p) == CCL && (*(char *) right(p)) == '\0')
397: return(0); /* empty CCL */
398: else return(1);
399: case PLUS:
400: if (first(left(p)) == 0) return(0);
401: return(1);
402: case STAR:
403: case QUEST:
404: first(left(p));
405: return(0);
406: case CAT:
407: if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
408: return(1);
409: case OR:
410: b = first(right(p));
411: if (first(left(p)) == 0 || b == 0) return(0);
412: return(1);
413: }
1.8 millert 414: FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */
1.1 tholo 415: return(-1);
416: }
417:
418: void follow(Node *v) /* collects leaves that can follow v into setvec */
419: {
420: Node *p;
421:
422: if (type(v) == FINAL)
423: return;
424: p = parent(v);
425: switch (type(p)) {
426: case STAR:
427: case PLUS:
428: first(v);
429: follow(p);
430: return;
431:
432: case OR:
433: case QUEST:
434: follow(p);
435: return;
436:
437: case CAT:
438: if (v == left(p)) { /* v is left child of p */
439: if (first(right(p)) == 0) {
440: follow(p);
441: return;
442: }
443: } else /* v is right child */
444: follow(p);
445: return;
446: }
447: }
448:
449: int member(int c, char *s) /* is c in s? */
450: {
451: while (*s)
452: if (c == *s++)
453: return(1);
454: return(0);
455: }
456:
457: int match(fa *f, char *p0) /* shortest match ? */
458: {
459: int s, ns;
460: uschar *p = (uschar *) p0;
461:
462: s = f->reset ? makeinit(f,0) : f->initstat;
463: if (f->out[s])
464: return(1);
465: do {
466: if ((ns = f->gototab[s][*p]) != 0)
467: s = ns;
468: else
469: s = cgoto(f, s, *p);
470: if (f->out[s])
471: return(1);
472: } while (*p++ != 0);
473: return(0);
474: }
475:
476: int pmatch(fa *f, char *p0) /* longest match, for sub */
477: {
478: int s, ns;
479: uschar *p = (uschar *) p0;
480: uschar *q;
481: int i, k;
482:
483: s = f->reset ? makeinit(f,1) : f->initstat;
484: patbeg = (char *) p;
485: patlen = -1;
486: do {
487: q = p;
488: do {
489: if (f->out[s]) /* final state */
490: patlen = q-p;
491: if ((ns = f->gototab[s][*q]) != 0)
492: s = ns;
493: else
494: s = cgoto(f, s, *q);
1.9 ! deraadt 495: if (s == 1) { /* no transition */
1.1 tholo 496: if (patlen >= 0) {
497: patbeg = (char *) p;
498: return(1);
499: }
500: else
501: goto nextin; /* no match */
1.9 ! deraadt 502: }
1.1 tholo 503: } while (*q++ != 0);
504: if (f->out[s])
505: patlen = q-p-1; /* don't count $ */
506: if (patlen >= 0) {
507: patbeg = (char *) p;
508: return(1);
509: }
510: nextin:
511: s = 2;
512: if (f->reset) {
513: for (i = 2; i <= f->curstat; i++)
514: xfree(f->posns[i]);
515: k = *f->posns[0];
1.4 millert 516: if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
1.1 tholo 517: overflo("out of space in pmatch");
518: for (i = 0; i <= k; i++)
519: (f->posns[2])[i] = (f->posns[0])[i];
520: f->initstat = f->curstat = 2;
521: f->out[2] = f->out[0];
522: for (i = 0; i < NCHARS; i++)
523: f->gototab[2][i] = 0;
524: }
525: } while (*p++ != 0);
526: return (0);
527: }
528:
529: int nematch(fa *f, char *p0) /* non-empty match, for sub */
530: {
531: int s, ns;
532: uschar *p = (uschar *) p0;
533: uschar *q;
534: int i, k;
535:
536: s = f->reset ? makeinit(f,1) : f->initstat;
537: patlen = -1;
538: while (*p) {
539: q = p;
540: do {
541: if (f->out[s]) /* final state */
542: patlen = q-p;
543: if ((ns = f->gototab[s][*q]) != 0)
544: s = ns;
545: else
546: s = cgoto(f, s, *q);
1.9 ! deraadt 547: if (s == 1) { /* no transition */
1.1 tholo 548: if (patlen > 0) {
549: patbeg = (char *) p;
550: return(1);
551: } else
552: goto nnextin; /* no nonempty match */
1.9 ! deraadt 553: }
1.1 tholo 554: } while (*q++ != 0);
555: if (f->out[s])
556: patlen = q-p-1; /* don't count $ */
557: if (patlen > 0 ) {
558: patbeg = (char *) p;
559: return(1);
560: }
561: nnextin:
562: s = 2;
563: if (f->reset) {
564: for (i = 2; i <= f->curstat; i++)
565: xfree(f->posns[i]);
566: k = *f->posns[0];
1.4 millert 567: if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
1.1 tholo 568: overflo("out of state space");
569: for (i = 0; i <= k; i++)
570: (f->posns[2])[i] = (f->posns[0])[i];
571: f->initstat = f->curstat = 2;
572: f->out[2] = f->out[0];
573: for (i = 0; i < NCHARS; i++)
574: f->gototab[2][i] = 0;
575: }
576: p++;
577: }
578: return (0);
579: }
580:
581: Node *reparse(char *p) /* parses regular expression pointed to by p */
582: { /* uses relex() to scan regular expression */
583: Node *np;
584:
585: dprintf( ("reparse <%s>\n", p) );
586: lastre = prestr = p; /* prestr points to string to be parsed */
587: rtok = relex();
588: if (rtok == '\0')
1.8 millert 589: FATAL("empty regular expression");
1.1 tholo 590: np = regexp();
591: if (rtok != '\0')
1.8 millert 592: FATAL("syntax error in regular expression %s at %s", lastre, prestr);
1.1 tholo 593: return(np);
594: }
595:
596: Node *regexp(void) /* top-level parse of reg expr */
597: {
598: return (alt(concat(primary())));
599: }
600:
601: Node *primary(void)
602: {
603: Node *np;
604:
605: switch (rtok) {
606: case CHAR:
1.7 millert 607: np = op2(CHAR, NIL, itonp(rlxval));
1.1 tholo 608: rtok = relex();
609: return (unary(np));
610: case ALL:
611: rtok = relex();
612: return (unary(op2(ALL, NIL, NIL)));
613: case DOT:
614: rtok = relex();
615: return (unary(op2(DOT, NIL, NIL)));
616: case CCL:
617: np = op2(CCL, NIL, (Node*) cclenter(rlxstr));
618: rtok = relex();
619: return (unary(np));
620: case NCCL:
621: np = op2(NCCL, NIL, (Node *) cclenter(rlxstr));
622: rtok = relex();
623: return (unary(np));
624: case '^':
625: rtok = relex();
1.7 millert 626: return (unary(op2(CHAR, NIL, itonp(HAT))));
1.1 tholo 627: case '$':
628: rtok = relex();
629: return (unary(op2(CHAR, NIL, NIL)));
630: case '(':
631: rtok = relex();
632: if (rtok == ')') { /* special pleading for () */
633: rtok = relex();
634: return unary(op2(CCL, NIL, (Node *) tostring("")));
635: }
636: np = regexp();
637: if (rtok == ')') {
638: rtok = relex();
639: return (unary(np));
640: }
641: else
1.8 millert 642: FATAL("syntax error in regular expression %s at %s", lastre, prestr);
1.1 tholo 643: default:
1.8 millert 644: FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
1.1 tholo 645: }
646: return 0; /*NOTREACHED*/
647: }
648:
649: Node *concat(Node *np)
650: {
651: switch (rtok) {
652: case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
653: return (concat(op2(CAT, np, primary())));
654: }
655: return (np);
656: }
657:
658: Node *alt(Node *np)
659: {
660: if (rtok == OR) {
661: rtok = relex();
662: return (alt(op2(OR, np, concat(primary()))));
663: }
664: return (np);
665: }
666:
667: Node *unary(Node *np)
668: {
669: switch (rtok) {
670: case STAR:
671: rtok = relex();
672: return (unary(op2(STAR, np, NIL)));
673: case PLUS:
674: rtok = relex();
675: return (unary(op2(PLUS, np, NIL)));
676: case QUEST:
677: rtok = relex();
678: return (unary(op2(QUEST, np, NIL)));
679: default:
680: return (np);
681: }
682: }
683:
684: int relex(void) /* lexical analyzer for reparse */
685: {
1.5 kstailey 686: int c, n;
1.1 tholo 687: int cflag;
1.5 kstailey 688: static char *buf = 0;
689: static int bufsz = 100;
690: char *bp;
1.1 tholo 691:
692: switch (c = *prestr++) {
693: case '|': return OR;
694: case '*': return STAR;
695: case '+': return PLUS;
696: case '?': return QUEST;
697: case '.': return DOT;
698: case '\0': prestr--; return '\0';
699: case '^':
700: case '$':
701: case '(':
702: case ')':
703: return c;
704: case '\\':
705: rlxval = quoted(&prestr);
706: return CHAR;
707: default:
708: rlxval = c;
709: return CHAR;
710: case '[':
1.5 kstailey 711: if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
1.8 millert 712: FATAL("out of space in reg expr %.10s..", lastre);
1.5 kstailey 713: bp = buf;
1.1 tholo 714: if (*prestr == '^') {
715: cflag = 1;
716: prestr++;
717: }
718: else
719: cflag = 0;
1.5 kstailey 720: n = 2 * strlen(prestr)+1;
721: if (!adjbuf(&buf, &bufsz, n, n, &bp, 0))
1.8 millert 722: FATAL("out of space for reg expr %.10s...", lastre);
1.1 tholo 723: for (; ; ) {
724: if ((c = *prestr++) == '\\') {
1.5 kstailey 725: *bp++ = '\\';
1.1 tholo 726: if ((c = *prestr++) == '\0')
1.8 millert 727: FATAL("nonterminated character class %.20s...", lastre);
1.5 kstailey 728: *bp++ = c;
1.1 tholo 729: } else if (c == '\n') {
1.8 millert 730: FATAL("newline in character class %.20s...", lastre);
1.1 tholo 731: } else if (c == '\0') {
1.8 millert 732: FATAL("nonterminated character class %.20s", lastre);
1.5 kstailey 733: } else if (bp == buf) { /* 1st char is special */
734: *bp++ = c;
1.1 tholo 735: } else if (c == ']') {
1.5 kstailey 736: *bp++ = 0;
737: rlxstr = tostring(buf);
1.1 tholo 738: if (cflag == 0)
739: return CCL;
740: else
741: return NCCL;
742: } else
1.5 kstailey 743: *bp++ = c;
1.1 tholo 744: }
745: }
746: }
747:
748: int cgoto(fa *f, int s, int c)
749: {
750: int i, j, k;
1.4 millert 751: int *p, *q;
1.1 tholo 752:
753: if (c < 0)
1.8 millert 754: FATAL("can't happen: neg char %d in cgoto", c);
1.1 tholo 755: while (f->accept >= maxsetvec) { /* guessing here! */
756: maxsetvec *= 4;
757: setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
758: tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
1.7 millert 759: if (setvec == 0 || tmpset == 0)
1.1 tholo 760: overflo("out of space in cgoto()");
761: }
762: for (i = 0; i <= f->accept; i++)
763: setvec[i] = 0;
764: setcnt = 0;
765: /* compute positions of gototab[s,c] into setvec */
766: p = f->posns[s];
767: for (i = 1; i <= *p; i++) {
768: if ((k = f->re[p[i]].ltype) != FINAL) {
1.7 millert 769: if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
1.1 tholo 770: || (k == DOT && c != 0 && c != HAT)
771: || (k == ALL && c != 0)
772: || (k == CCL && member(c, f->re[p[i]].lval.up))
773: || (k == NCCL && !member(c, f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
774: q = f->re[p[i]].lfollow;
775: for (j = 1; j <= *q; j++) {
776: if (q[j] >= maxsetvec) {
777: maxsetvec *= 4;
778: setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
779: tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int));
780: if (setvec == 0 || tmpset == 0)
781: overflo("cgoto overflow");
782: }
783: if (setvec[q[j]] == 0) {
784: setcnt++;
785: setvec[q[j]] = 1;
786: }
787: }
788: }
789: }
790: }
791: /* determine if setvec is a previous state */
792: tmpset[0] = setcnt;
793: j = 1;
794: for (i = f->accept; i >= 0; i--)
795: if (setvec[i]) {
796: tmpset[j++] = i;
797: }
798: /* tmpset == previous state? */
799: for (i = 1; i <= f->curstat; i++) {
800: p = f->posns[i];
801: if ((k = tmpset[0]) != p[0])
802: goto different;
803: for (j = 1; j <= k; j++)
804: if (tmpset[j] != p[j])
805: goto different;
806: /* setvec is state i */
807: f->gototab[s][c] = i;
808: return i;
809: different:;
810: }
811:
812: /* add tmpset to current set of states */
813: if (f->curstat >= NSTATES-1) {
814: f->curstat = 2;
815: f->reset = 1;
816: for (i = 2; i < NSTATES; i++)
817: xfree(f->posns[i]);
818: } else
819: ++(f->curstat);
820: for (i = 0; i < NCHARS; i++)
821: f->gototab[f->curstat][i] = 0;
822: xfree(f->posns[f->curstat]);
1.4 millert 823: if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
1.1 tholo 824: overflo("out of space in cgoto");
825:
826: f->posns[f->curstat] = p;
827: f->gototab[s][c] = f->curstat;
828: for (i = 0; i <= setcnt; i++)
829: p[i] = tmpset[i];
830: if (setvec[f->accept])
831: f->out[f->curstat] = 1;
832: else
833: f->out[f->curstat] = 0;
834: return f->curstat;
835: }
836:
837:
838: void freefa(fa *f) /* free a finite automaton */
839: {
840: int i;
841:
842: if (f == NULL)
843: return;
844: for (i = 0; i <= f->curstat; i++)
845: xfree(f->posns[i]);
846: for (i = 0; i <= f->accept; i++) {
847: xfree(f->re[i].lfollow);
848: if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
849: xfree((f->re[i].lval.np));
850: }
851: xfree(f->restr);
852: xfree(f);
853: }