Annotation of src/usr.bin/awk/b.c, Revision 1.3
1.1 tholo 1: /****************************************************************
2: Copyright (C) AT&T and Lucent Technologies 1996
3: All Rights Reserved
4:
5: Permission to use, copy, modify, and distribute this software and
6: its documentation for any purpose and without fee is hereby
7: granted, provided that the above copyright notice appear in all
8: copies and that both that the copyright notice and this
9: permission notice and warranty disclaimer appear in supporting
10: documentation, and that the names of AT&T or Lucent Technologies
11: or any of their entities not be used in advertising or publicity
12: pertaining to distribution of the software without specific,
13: written prior permission.
14:
15: AT&T AND LUCENT DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
16: SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
17: FITNESS. IN NO EVENT SHALL AT&T OR LUCENT OR ANY OF THEIR
18: ENTITIES BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
19: DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
20: DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
21: OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE
22: USE OR PERFORMANCE OF THIS SOFTWARE.
23: ****************************************************************/
24:
25: /* lasciate ogne speranza, voi ch'entrate. */
26:
27: #define DEBUG
28:
29: #include <ctype.h>
30: #include <stdio.h>
31: #include <string.h>
32: #include <stdlib.h>
33: #include "awk.h"
34: #include "awkgram.h"
35:
36: #define HAT (NCHARS-1) /* matches ^ in regular expr */
37: /* NCHARS is 2**n */
38: #define MAXLIN 22
39:
40: #define type(v) (v)->nobj
41: #define left(v) (v)->narg[0]
42: #define right(v) (v)->narg[1]
43: #define parent(v) (v)->nnext
44:
45: #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
46: #define UNARY case STAR: case PLUS: case QUEST:
47:
48: /* encoding in tree Nodes:
49: leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
50: left is index, right contains value or pointer to value
51: unary (STAR, PLUS, QUEST): left is child, right is null
52: binary (CAT, OR): left and right are children
53: parent contains pointer to parent
54: */
55:
56:
57: int *setvec;
58: int *tmpset;
59: int maxsetvec = 0;
60:
61: int rtok; /* next token in current re */
1.3 ! millert 62: long rlxval;
1.1 tholo 63: char *rlxstr;
64: char *prestr; /* current position in current re */
65: char *lastre; /* origin of last re */
66:
1.3 ! millert 67: static long setcnt;
! 68: static long poscnt;
1.1 tholo 69:
70: char *patbeg;
71: int patlen;
72:
73: #define NFA 20 /* cache this many dynamic fa's */
74: fa *fatab[NFA];
75: int nfatab = 0; /* entries in fatab */
76:
77: fa *makedfa(char *s, int anchor) /* returns dfa for reg expr s */
78: {
79: int i, use, nuse;
80: fa *pfa;
81:
82: if (setvec == 0) { /* first time through any RE */
83: maxsetvec = MAXLIN;
84: setvec = (int *) malloc(maxsetvec * sizeof(int));
85: tmpset = (int *) malloc(maxsetvec * sizeof(int));
86: if (setvec == 0 || tmpset == 0)
87: overflo("out of space initializing makedfa");
88: }
89:
90: if (compile_time) /* a constant for sure */
91: return mkdfa(s, anchor);
92: for (i = 0; i < nfatab; i++) /* is it there already? */
93: if (fatab[i]->anchor == anchor
94: && strcmp(fatab[i]->restr, s) == 0) {
95: fatab[i]->use++;
96: return fatab[i];
97: }
98: pfa = mkdfa(s, anchor);
99: if (nfatab < NFA) { /* room for another */
100: fatab[nfatab] = pfa;
101: fatab[nfatab]->use = 1;
102: nfatab++;
103: return pfa;
104: }
105: use = fatab[0]->use; /* replace least-recently used */
106: nuse = 0;
107: for (i = 1; i < nfatab; i++)
108: if (fatab[i]->use < use) {
109: use = fatab[i]->use;
110: nuse = i;
111: }
112: freefa(fatab[nuse]);
113: fatab[nuse] = pfa;
114: pfa->use = 1;
115: return pfa;
116: }
117:
118: fa *mkdfa(char *s, int anchor) /* does the real work of making a dfa */
119: /* anchor = 1 for anchored matches, else 0 */
120: {
121: Node *p, *p1;
122: fa *f;
123:
124: p = reparse(s);
125: p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
126: /* put ALL STAR in front of reg. exp. */
127: p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
128: /* put FINAL after reg. exp. */
129:
130: poscnt = 0;
131: penter(p1); /* enter parent pointers and leaf indices */
132: if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
133: overflo("out of space for fa");
134: f->accept = poscnt-1; /* penter has computed number of positions in re */
135: cfoll(f, p1); /* set up follow sets */
136: freetr(p1);
1.3 ! millert 137: if ((f->posns[0] = (long *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
1.1 tholo 138: overflo("out of space in makedfa");
1.3 ! millert 139: if ((f->posns[1] = (long *) calloc(1, sizeof(int))) == NULL)
1.1 tholo 140: overflo("out of space in makedfa");
141: *f->posns[1] = 0;
142: f->initstat = makeinit(f, anchor);
143: f->anchor = anchor;
144: f->restr = tostring(s);
145: return f;
146: }
147:
148: int makeinit(fa *f, int anchor)
149: {
150: int i, k;
151:
152: f->curstat = 2;
153: f->out[2] = 0;
154: f->reset = 0;
155: k = *(f->re[0].lfollow);
156: xfree(f->posns[2]);
1.3 ! millert 157: if ((f->posns[2] = (long *) calloc(1, (k+1)*sizeof(int))) == NULL)
1.1 tholo 158: overflo("out of space in makeinit");
159: for (i=0; i <= k; i++) {
160: (f->posns[2])[i] = (f->re[0].lfollow)[i];
161: }
162: if ((f->posns[2])[1] == f->accept)
163: f->out[2] = 1;
164: for (i=0; i < NCHARS; i++)
165: f->gototab[2][i] = 0;
166: f->curstat = cgoto(f, 2, HAT);
167: if (anchor) {
168: *f->posns[2] = k-1; /* leave out position 0 */
169: for (i=0; i < k; i++) {
170: (f->posns[0])[i] = (f->posns[2])[i];
171: }
172:
173: f->out[0] = f->out[2];
174: if (f->curstat != 2)
175: --(*f->posns[f->curstat]);
176: }
177: return f->curstat;
178: }
179:
180: void penter(Node *p) /* set up parent pointers and leaf indices */
181: {
182: switch (type(p)) {
183: LEAF
184: left(p) = (Node *) poscnt;
185: poscnt++;
186: break;
187: UNARY
188: penter(left(p));
189: parent(left(p)) = p;
190: break;
191: case CAT:
192: case OR:
193: penter(left(p));
194: penter(right(p));
195: parent(left(p)) = p;
196: parent(right(p)) = p;
197: break;
198: default: /* can't happen */
199: ERROR "can't happen: unknown type %d in penter", type(p) FATAL;
200: break;
201: }
202: }
203:
204: void freetr(Node *p) /* free parse tree */
205: {
206: switch (type(p)) {
207: LEAF
208: xfree(p);
209: break;
210: UNARY
211: freetr(left(p));
212: xfree(p);
213: break;
214: case CAT:
215: case OR:
216: freetr(left(p));
217: freetr(right(p));
218: xfree(p);
219: break;
220: default: /* can't happen */
221: ERROR "can't happen: unknown type %d in freetr", type(p) FATAL;
222: break;
223: }
224: }
225:
226: /* in the parsing of regular expressions, metacharacters like . have */
227: /* to be seen literally; \056 is not a metacharacter. */
228:
229: int hexstr(char **pp) /* find and eval hex string at pp, return new p */
1.2 millert 230: { /* only pick up one 8-bit byte (2 chars) */
1.1 tholo 231: char *p;
232: int n = 0;
1.2 millert 233: int i;
1.1 tholo 234:
1.2 millert 235: for (i = 0, p = *pp; i < 2 && isxdigit(*p); i++, p++) {
1.1 tholo 236: if (isdigit(*p))
237: n = 16 * n + *p - '0';
238: else if (*p >= 'a' && *p <= 'f')
239: n = 16 * n + *p - 'a' + 10;
240: else if (*p >= 'A' && *p <= 'F')
241: n = 16 * n + *p - 'A' + 10;
242: }
243: *pp = p;
244: return n;
245: }
246:
1.2 millert 247: #define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */
1.1 tholo 248:
249: int quoted(char **pp) /* pick up next thing after a \\ */
250: /* and increment *pp */
251: {
252: char *p = *pp;
253: int c;
254:
255: if ((c = *p++) == 't')
256: c = '\t';
257: else if (c == 'n')
258: c = '\n';
259: else if (c == 'f')
260: c = '\f';
261: else if (c == 'r')
262: c = '\r';
263: else if (c == 'b')
264: c = '\b';
265: else if (c == '\\')
266: c = '\\';
267: else if (c == 'x') { /* hexadecimal goo follows */
268: c = hexstr(&p); /* this adds a null if number is invalid */
269: } else if (isoctdigit(c)) { /* \d \dd \ddd */
270: int n = c - '0';
271: if (isoctdigit(*p)) {
272: n = 8 * n + *p++ - '0';
273: if (isoctdigit(*p))
274: n = 8 * n + *p++ - '0';
275: }
276: c = n;
277: } /* else */
278: /* c = c; */
279: *pp = p;
280: return c;
281: }
282:
283: char *cclenter(char *p) /* add a character class */
284: {
285: int i, c, c2;
286: char *op;
287: static Gstring *cgp = 0;
288:
289: op = p;
290: if (cgp == 0)
291: cgp = newGstring();
292: caddreset(cgp);
293: i = 0;
294: while ((c = *p++) != 0) {
295: if (c == '\\') {
296: c = quoted(&p);
297: } else if (c == '-' && i > 0 && cgp->cbuf[i-1] != 0) {
298: if (*p != 0) {
299: c = cgp->cbuf[i-1];
300: c2 = *p++;
301: if (c2 == '\\')
302: c2 = quoted(&p);
303: if (c > c2) { /* empty; ignore */
304: cunadd(cgp);
305: i--;
306: continue;
307: }
308: while (c < c2) {
309: cadd(cgp, ++c);
310: i++;
311: }
312: continue;
313: }
314: }
315: cadd(cgp, c);
316: i++;
317: }
318: cadd(cgp, 0);
319: dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, cgp->cbuf) );
320: xfree(op);
321: return(tostring(cgp->cbuf));
322: }
323:
324: void overflo(char *s)
325: {
326: ERROR "regular expression too big: %.30s...", s FATAL;
327: }
328:
329: void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */
330: {
331: int i;
1.3 ! millert 332: long *p;
1.1 tholo 333:
334: switch (type(v)) {
335: LEAF
1.3 ! millert 336: f->re[(long) left(v)].ltype = type(v);
! 337: f->re[(long) left(v)].lval.np = right(v);
1.1 tholo 338: while (f->accept >= maxsetvec) { /* guessing here! */
339: maxsetvec *= 4;
340: setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
341: tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
342: if (setvec == 0 || tmpset == 0) { abort();
343: overflo("out of space in cfoll()");
344: }
345: }
346: for (i = 0; i <= f->accept; i++)
347: setvec[i] = 0;
348: setcnt = 0;
349: follow(v); /* computes setvec and setcnt */
1.3 ! millert 350: if ((p = (long *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
1.1 tholo 351: overflo("out of space building follow set");
1.3 ! millert 352: f->re[(long) left(v)].lfollow = p;
1.1 tholo 353: *p = setcnt;
354: for (i = f->accept; i >= 0; i--)
355: if (setvec[i] == 1)
356: *++p = i;
357: break;
358: UNARY
359: cfoll(f,left(v));
360: break;
361: case CAT:
362: case OR:
363: cfoll(f,left(v));
364: cfoll(f,right(v));
365: break;
366: default: /* can't happen */
367: ERROR "can't happen: unknown type %d in cfoll", type(v) FATAL;
368: }
369: }
370:
371: int first(Node *p) /* collects initially active leaves of p into setvec */
372: /* returns 1 if p matches empty string */
373: {
1.3 ! millert 374: int b;
! 375: long lp;
1.1 tholo 376:
377: switch (type(p)) {
378: LEAF
1.3 ! millert 379: lp = (long) left(p); /* look for high-water mark of subscripts */
1.1 tholo 380: while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */
381: maxsetvec *= 4;
382: setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
383: tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
384: if (setvec == 0 || tmpset == 0) { abort();
385: overflo("out of space in first()");
386: }
387: }
388: if (setvec[lp] != 1) {
389: setvec[lp] = 1;
390: setcnt++;
391: }
392: if (type(p) == CCL && (*(char *) right(p)) == '\0')
393: return(0); /* empty CCL */
394: else return(1);
395: case PLUS:
396: if (first(left(p)) == 0) return(0);
397: return(1);
398: case STAR:
399: case QUEST:
400: first(left(p));
401: return(0);
402: case CAT:
403: if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
404: return(1);
405: case OR:
406: b = first(right(p));
407: if (first(left(p)) == 0 || b == 0) return(0);
408: return(1);
409: }
410: ERROR "can't happen: unknown type %d in first", type(p) FATAL; /* can't happen */
411: return(-1);
412: }
413:
414: void follow(Node *v) /* collects leaves that can follow v into setvec */
415: {
416: Node *p;
417:
418: if (type(v) == FINAL)
419: return;
420: p = parent(v);
421: switch (type(p)) {
422: case STAR:
423: case PLUS:
424: first(v);
425: follow(p);
426: return;
427:
428: case OR:
429: case QUEST:
430: follow(p);
431: return;
432:
433: case CAT:
434: if (v == left(p)) { /* v is left child of p */
435: if (first(right(p)) == 0) {
436: follow(p);
437: return;
438: }
439: } else /* v is right child */
440: follow(p);
441: return;
442: }
443: }
444:
445: int member(int c, char *s) /* is c in s? */
446: {
447: while (*s)
448: if (c == *s++)
449: return(1);
450: return(0);
451: }
452:
453: int match(fa *f, char *p0) /* shortest match ? */
454: {
455: int s, ns;
456: uschar *p = (uschar *) p0;
457:
458: s = f->reset ? makeinit(f,0) : f->initstat;
459: if (f->out[s])
460: return(1);
461: do {
462: if ((ns = f->gototab[s][*p]) != 0)
463: s = ns;
464: else
465: s = cgoto(f, s, *p);
466: if (f->out[s])
467: return(1);
468: } while (*p++ != 0);
469: return(0);
470: }
471:
472: int pmatch(fa *f, char *p0) /* longest match, for sub */
473: {
474: int s, ns;
475: uschar *p = (uschar *) p0;
476: uschar *q;
477: int i, k;
478:
479: s = f->reset ? makeinit(f,1) : f->initstat;
480: patbeg = (char *) p;
481: patlen = -1;
482: do {
483: q = p;
484: do {
485: if (f->out[s]) /* final state */
486: patlen = q-p;
487: if ((ns = f->gototab[s][*q]) != 0)
488: s = ns;
489: else
490: s = cgoto(f, s, *q);
491: if (s == 1) /* no transition */
492: if (patlen >= 0) {
493: patbeg = (char *) p;
494: return(1);
495: }
496: else
497: goto nextin; /* no match */
498: } while (*q++ != 0);
499: if (f->out[s])
500: patlen = q-p-1; /* don't count $ */
501: if (patlen >= 0) {
502: patbeg = (char *) p;
503: return(1);
504: }
505: nextin:
506: s = 2;
507: if (f->reset) {
508: for (i = 2; i <= f->curstat; i++)
509: xfree(f->posns[i]);
510: k = *f->posns[0];
1.3 ! millert 511: if ((f->posns[2] = (long *) calloc(1, (k+1)*sizeof(int))) == NULL)
1.1 tholo 512: overflo("out of space in pmatch");
513: for (i = 0; i <= k; i++)
514: (f->posns[2])[i] = (f->posns[0])[i];
515: f->initstat = f->curstat = 2;
516: f->out[2] = f->out[0];
517: for (i = 0; i < NCHARS; i++)
518: f->gototab[2][i] = 0;
519: }
520: } while (*p++ != 0);
521: return (0);
522: }
523:
524: int nematch(fa *f, char *p0) /* non-empty match, for sub */
525: {
526: int s, ns;
527: uschar *p = (uschar *) p0;
528: uschar *q;
529: int i, k;
530:
531: s = f->reset ? makeinit(f,1) : f->initstat;
532: patlen = -1;
533: while (*p) {
534: q = p;
535: do {
536: if (f->out[s]) /* final state */
537: patlen = q-p;
538: if ((ns = f->gototab[s][*q]) != 0)
539: s = ns;
540: else
541: s = cgoto(f, s, *q);
542: if (s == 1) /* no transition */
543: if (patlen > 0) {
544: patbeg = (char *) p;
545: return(1);
546: } else
547: goto nnextin; /* no nonempty match */
548: } while (*q++ != 0);
549: if (f->out[s])
550: patlen = q-p-1; /* don't count $ */
551: if (patlen > 0 ) {
552: patbeg = (char *) p;
553: return(1);
554: }
555: nnextin:
556: s = 2;
557: if (f->reset) {
558: for (i = 2; i <= f->curstat; i++)
559: xfree(f->posns[i]);
560: k = *f->posns[0];
1.3 ! millert 561: if ((f->posns[2] = (long *) calloc(1, (k+1)*sizeof(int))) == NULL)
1.1 tholo 562: overflo("out of state space");
563: for (i = 0; i <= k; i++)
564: (f->posns[2])[i] = (f->posns[0])[i];
565: f->initstat = f->curstat = 2;
566: f->out[2] = f->out[0];
567: for (i = 0; i < NCHARS; i++)
568: f->gototab[2][i] = 0;
569: }
570: p++;
571: }
572: return (0);
573: }
574:
575: Node *reparse(char *p) /* parses regular expression pointed to by p */
576: { /* uses relex() to scan regular expression */
577: Node *np;
578:
579: dprintf( ("reparse <%s>\n", p) );
580: lastre = prestr = p; /* prestr points to string to be parsed */
581: rtok = relex();
582: if (rtok == '\0')
583: ERROR "empty regular expression" FATAL;
584: np = regexp();
585: if (rtok != '\0')
586: ERROR "syntax error in regular expression %s at %s", lastre, prestr FATAL;
587: return(np);
588: }
589:
590: Node *regexp(void) /* top-level parse of reg expr */
591: {
592: return (alt(concat(primary())));
593: }
594:
595: Node *primary(void)
596: {
597: Node *np;
598:
599: switch (rtok) {
600: case CHAR:
601: np = op2(CHAR, NIL, (Node *) rlxval);
602: rtok = relex();
603: return (unary(np));
604: case ALL:
605: rtok = relex();
606: return (unary(op2(ALL, NIL, NIL)));
607: case DOT:
608: rtok = relex();
609: return (unary(op2(DOT, NIL, NIL)));
610: case CCL:
611: np = op2(CCL, NIL, (Node*) cclenter(rlxstr));
612: rtok = relex();
613: return (unary(np));
614: case NCCL:
615: np = op2(NCCL, NIL, (Node *) cclenter(rlxstr));
616: rtok = relex();
617: return (unary(np));
618: case '^':
619: rtok = relex();
620: return (unary(op2(CHAR, NIL, (Node *) HAT)));
621: case '$':
622: rtok = relex();
623: return (unary(op2(CHAR, NIL, NIL)));
624: case '(':
625: rtok = relex();
626: if (rtok == ')') { /* special pleading for () */
627: rtok = relex();
628: return unary(op2(CCL, NIL, (Node *) tostring("")));
629: }
630: np = regexp();
631: if (rtok == ')') {
632: rtok = relex();
633: return (unary(np));
634: }
635: else
636: ERROR "syntax error in regular expression %s at %s", lastre, prestr FATAL;
637: default:
638: ERROR "illegal primary in regular expression %s at %s", lastre, prestr FATAL;
639: }
640: return 0; /*NOTREACHED*/
641: }
642:
643: Node *concat(Node *np)
644: {
645: switch (rtok) {
646: case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
647: return (concat(op2(CAT, np, primary())));
648: }
649: return (np);
650: }
651:
652: Node *alt(Node *np)
653: {
654: if (rtok == OR) {
655: rtok = relex();
656: return (alt(op2(OR, np, concat(primary()))));
657: }
658: return (np);
659: }
660:
661: Node *unary(Node *np)
662: {
663: switch (rtok) {
664: case STAR:
665: rtok = relex();
666: return (unary(op2(STAR, np, NIL)));
667: case PLUS:
668: rtok = relex();
669: return (unary(op2(PLUS, np, NIL)));
670: case QUEST:
671: rtok = relex();
672: return (unary(op2(QUEST, np, NIL)));
673: default:
674: return (np);
675: }
676: }
677:
678: int relex(void) /* lexical analyzer for reparse */
679: {
680: int c;
681: int cflag;
682: static Gstring *gp = 0;
683:
684: switch (c = *prestr++) {
685: case '|': return OR;
686: case '*': return STAR;
687: case '+': return PLUS;
688: case '?': return QUEST;
689: case '.': return DOT;
690: case '\0': prestr--; return '\0';
691: case '^':
692: case '$':
693: case '(':
694: case ')':
695: return c;
696: case '\\':
697: rlxval = quoted(&prestr);
698: return CHAR;
699: default:
700: rlxval = c;
701: return CHAR;
702: case '[':
703: if (gp == 0)
704: gp = newGstring();
705: caddreset(gp);
706: if (*prestr == '^') {
707: cflag = 1;
708: prestr++;
709: }
710: else
711: cflag = 0;
712: for (; ; ) {
713: if ((c = *prestr++) == '\\') {
714: cadd(gp, '\\');
715: if ((c = *prestr++) == '\0')
716: ERROR "nonterminated character class %.20s...", lastre FATAL;
717: cadd(gp, c);
718: } else if (c == '\n') {
719: ERROR "newline in character class %.20s...", lastre FATAL;
720: } else if (c == '\0') {
721: ERROR "nonterminated character class %.20s", lastre FATAL;
722: } else if (gp->clen == 0) { /* 1st char is special */
723: cadd(gp, c);
724: } else if (c == ']') {
725: cadd(gp, 0);
726: rlxstr = tostring(gp->cbuf);
727: if (cflag == 0)
728: return CCL;
729: else
730: return NCCL;
731: } else
732: cadd(gp, c);
733: }
734: }
735: }
736:
737: int cgoto(fa *f, int s, int c)
738: {
739: int i, j, k;
1.3 ! millert 740: long *p, *q;
1.1 tholo 741:
742: if (c < 0)
743: ERROR "can't happen: neg char %d in cgoto", c FATAL;
744: while (f->accept >= maxsetvec) { /* guessing here! */
745: maxsetvec *= 4;
746: setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
747: tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
748: if (setvec == 0 || tmpset == 0) { abort();
749: overflo("out of space in cgoto()");
750: }
751: }
752: for (i = 0; i <= f->accept; i++)
753: setvec[i] = 0;
754: setcnt = 0;
755: /* compute positions of gototab[s,c] into setvec */
756: p = f->posns[s];
757: for (i = 1; i <= *p; i++) {
758: if ((k = f->re[p[i]].ltype) != FINAL) {
759: if ((k == CHAR && c == f->re[p[i]].lval.i)
760: || (k == DOT && c != 0 && c != HAT)
761: || (k == ALL && c != 0)
762: || (k == CCL && member(c, f->re[p[i]].lval.up))
763: || (k == NCCL && !member(c, f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
764: q = f->re[p[i]].lfollow;
765: for (j = 1; j <= *q; j++) {
766: if (q[j] >= maxsetvec) {
767: maxsetvec *= 4;
768: setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
769: tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int));
770: if (setvec == 0 || tmpset == 0)
771: overflo("cgoto overflow");
772: }
773: if (setvec[q[j]] == 0) {
774: setcnt++;
775: setvec[q[j]] = 1;
776: }
777: }
778: }
779: }
780: }
781: /* determine if setvec is a previous state */
782: tmpset[0] = setcnt;
783: j = 1;
784: for (i = f->accept; i >= 0; i--)
785: if (setvec[i]) {
786: tmpset[j++] = i;
787: }
788: /* tmpset == previous state? */
789: for (i = 1; i <= f->curstat; i++) {
790: p = f->posns[i];
791: if ((k = tmpset[0]) != p[0])
792: goto different;
793: for (j = 1; j <= k; j++)
794: if (tmpset[j] != p[j])
795: goto different;
796: /* setvec is state i */
797: f->gototab[s][c] = i;
798: return i;
799: different:;
800: }
801:
802: /* add tmpset to current set of states */
803: if (f->curstat >= NSTATES-1) {
804: f->curstat = 2;
805: f->reset = 1;
806: for (i = 2; i < NSTATES; i++)
807: xfree(f->posns[i]);
808: } else
809: ++(f->curstat);
810: for (i = 0; i < NCHARS; i++)
811: f->gototab[f->curstat][i] = 0;
812: xfree(f->posns[f->curstat]);
1.3 ! millert 813: if ((p = (long *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
1.1 tholo 814: overflo("out of space in cgoto");
815:
816: f->posns[f->curstat] = p;
817: f->gototab[s][c] = f->curstat;
818: for (i = 0; i <= setcnt; i++)
819: p[i] = tmpset[i];
820: if (setvec[f->accept])
821: f->out[f->curstat] = 1;
822: else
823: f->out[f->curstat] = 0;
824: return f->curstat;
825: }
826:
827:
828: void freefa(fa *f) /* free a finite automaton */
829: {
830: int i;
831:
832: if (f == NULL)
833: return;
834: for (i = 0; i <= f->curstat; i++)
835: xfree(f->posns[i]);
836: for (i = 0; i <= f->accept; i++) {
837: xfree(f->re[i].lfollow);
838: if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
839: xfree((f->re[i].lval.np));
840: }
841: xfree(f->restr);
842: xfree(f);
843: }