Annotation of src/usr.bin/awk/b.c, Revision 1.2
1.1 tholo 1: /****************************************************************
2: Copyright (C) AT&T and Lucent Technologies 1996
3: All Rights Reserved
4:
5: Permission to use, copy, modify, and distribute this software and
6: its documentation for any purpose and without fee is hereby
7: granted, provided that the above copyright notice appear in all
8: copies and that both that the copyright notice and this
9: permission notice and warranty disclaimer appear in supporting
10: documentation, and that the names of AT&T or Lucent Technologies
11: or any of their entities not be used in advertising or publicity
12: pertaining to distribution of the software without specific,
13: written prior permission.
14:
15: AT&T AND LUCENT DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
16: SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
17: FITNESS. IN NO EVENT SHALL AT&T OR LUCENT OR ANY OF THEIR
18: ENTITIES BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
19: DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
20: DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
21: OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE
22: USE OR PERFORMANCE OF THIS SOFTWARE.
23: ****************************************************************/
24:
25: /* lasciate ogne speranza, voi ch'entrate. */
26:
27: #define DEBUG
28:
29: #include <ctype.h>
30: #include <stdio.h>
31: #include <string.h>
32: #include <stdlib.h>
33: #include "awk.h"
34: #include "awkgram.h"
35:
36: #define HAT (NCHARS-1) /* matches ^ in regular expr */
37: /* NCHARS is 2**n */
38: #define MAXLIN 22
39:
40: #define type(v) (v)->nobj
41: #define left(v) (v)->narg[0]
42: #define right(v) (v)->narg[1]
43: #define parent(v) (v)->nnext
44:
45: #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
46: #define UNARY case STAR: case PLUS: case QUEST:
47:
48: /* encoding in tree Nodes:
49: leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
50: left is index, right contains value or pointer to value
51: unary (STAR, PLUS, QUEST): left is child, right is null
52: binary (CAT, OR): left and right are children
53: parent contains pointer to parent
54: */
55:
56:
57: int *setvec;
58: int *tmpset;
59: int maxsetvec = 0;
60:
61: int rtok; /* next token in current re */
62: int rlxval;
63: char *rlxstr;
64: char *prestr; /* current position in current re */
65: char *lastre; /* origin of last re */
66:
67: static int setcnt;
68: static int poscnt;
69:
70: char *patbeg;
71: int patlen;
72:
73: #define NFA 20 /* cache this many dynamic fa's */
74: fa *fatab[NFA];
75: int nfatab = 0; /* entries in fatab */
76:
77: fa *makedfa(char *s, int anchor) /* returns dfa for reg expr s */
78: {
79: int i, use, nuse;
80: fa *pfa;
81:
82: if (setvec == 0) { /* first time through any RE */
83: maxsetvec = MAXLIN;
84: setvec = (int *) malloc(maxsetvec * sizeof(int));
85: tmpset = (int *) malloc(maxsetvec * sizeof(int));
86: if (setvec == 0 || tmpset == 0)
87: overflo("out of space initializing makedfa");
88: }
89:
90: if (compile_time) /* a constant for sure */
91: return mkdfa(s, anchor);
92: for (i = 0; i < nfatab; i++) /* is it there already? */
93: if (fatab[i]->anchor == anchor
94: && strcmp(fatab[i]->restr, s) == 0) {
95: fatab[i]->use++;
96: return fatab[i];
97: }
98: pfa = mkdfa(s, anchor);
99: if (nfatab < NFA) { /* room for another */
100: fatab[nfatab] = pfa;
101: fatab[nfatab]->use = 1;
102: nfatab++;
103: return pfa;
104: }
105: use = fatab[0]->use; /* replace least-recently used */
106: nuse = 0;
107: for (i = 1; i < nfatab; i++)
108: if (fatab[i]->use < use) {
109: use = fatab[i]->use;
110: nuse = i;
111: }
112: freefa(fatab[nuse]);
113: fatab[nuse] = pfa;
114: pfa->use = 1;
115: return pfa;
116: }
117:
118: fa *mkdfa(char *s, int anchor) /* does the real work of making a dfa */
119: /* anchor = 1 for anchored matches, else 0 */
120: {
121: Node *p, *p1;
122: fa *f;
123:
124: p = reparse(s);
125: p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
126: /* put ALL STAR in front of reg. exp. */
127: p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
128: /* put FINAL after reg. exp. */
129:
130: poscnt = 0;
131: penter(p1); /* enter parent pointers and leaf indices */
132: if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
133: overflo("out of space for fa");
134: f->accept = poscnt-1; /* penter has computed number of positions in re */
135: cfoll(f, p1); /* set up follow sets */
136: freetr(p1);
137: if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
138: overflo("out of space in makedfa");
139: if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
140: overflo("out of space in makedfa");
141: *f->posns[1] = 0;
142: f->initstat = makeinit(f, anchor);
143: f->anchor = anchor;
144: f->restr = tostring(s);
145: return f;
146: }
147:
148: int makeinit(fa *f, int anchor)
149: {
150: int i, k;
151:
152: f->curstat = 2;
153: f->out[2] = 0;
154: f->reset = 0;
155: k = *(f->re[0].lfollow);
156: xfree(f->posns[2]);
157: if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
158: overflo("out of space in makeinit");
159: for (i=0; i <= k; i++) {
160: (f->posns[2])[i] = (f->re[0].lfollow)[i];
161: }
162: if ((f->posns[2])[1] == f->accept)
163: f->out[2] = 1;
164: for (i=0; i < NCHARS; i++)
165: f->gototab[2][i] = 0;
166: f->curstat = cgoto(f, 2, HAT);
167: if (anchor) {
168: *f->posns[2] = k-1; /* leave out position 0 */
169: for (i=0; i < k; i++) {
170: (f->posns[0])[i] = (f->posns[2])[i];
171: }
172:
173: f->out[0] = f->out[2];
174: if (f->curstat != 2)
175: --(*f->posns[f->curstat]);
176: }
177: return f->curstat;
178: }
179:
180: void penter(Node *p) /* set up parent pointers and leaf indices */
181: {
182: switch (type(p)) {
183: LEAF
184: left(p) = (Node *) poscnt;
185: poscnt++;
186: break;
187: UNARY
188: penter(left(p));
189: parent(left(p)) = p;
190: break;
191: case CAT:
192: case OR:
193: penter(left(p));
194: penter(right(p));
195: parent(left(p)) = p;
196: parent(right(p)) = p;
197: break;
198: default: /* can't happen */
199: ERROR "can't happen: unknown type %d in penter", type(p) FATAL;
200: break;
201: }
202: }
203:
204: void freetr(Node *p) /* free parse tree */
205: {
206: switch (type(p)) {
207: LEAF
208: xfree(p);
209: break;
210: UNARY
211: freetr(left(p));
212: xfree(p);
213: break;
214: case CAT:
215: case OR:
216: freetr(left(p));
217: freetr(right(p));
218: xfree(p);
219: break;
220: default: /* can't happen */
221: ERROR "can't happen: unknown type %d in freetr", type(p) FATAL;
222: break;
223: }
224: }
225:
226: /* in the parsing of regular expressions, metacharacters like . have */
227: /* to be seen literally; \056 is not a metacharacter. */
228:
229: int hexstr(char **pp) /* find and eval hex string at pp, return new p */
1.2 ! millert 230: { /* only pick up one 8-bit byte (2 chars) */
1.1 tholo 231: char *p;
232: int n = 0;
1.2 ! millert 233: int i;
1.1 tholo 234:
1.2 ! millert 235: for (i = 0, p = *pp; i < 2 && isxdigit(*p); i++, p++) {
1.1 tholo 236: if (isdigit(*p))
237: n = 16 * n + *p - '0';
238: else if (*p >= 'a' && *p <= 'f')
239: n = 16 * n + *p - 'a' + 10;
240: else if (*p >= 'A' && *p <= 'F')
241: n = 16 * n + *p - 'A' + 10;
242: }
243: *pp = p;
244: return n;
245: }
246:
1.2 ! millert 247: #define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */
1.1 tholo 248:
249: int quoted(char **pp) /* pick up next thing after a \\ */
250: /* and increment *pp */
251: {
252: char *p = *pp;
253: int c;
254:
255: if ((c = *p++) == 't')
256: c = '\t';
257: else if (c == 'n')
258: c = '\n';
259: else if (c == 'f')
260: c = '\f';
261: else if (c == 'r')
262: c = '\r';
263: else if (c == 'b')
264: c = '\b';
265: else if (c == '\\')
266: c = '\\';
267: else if (c == 'x') { /* hexadecimal goo follows */
268: c = hexstr(&p); /* this adds a null if number is invalid */
269: } else if (isoctdigit(c)) { /* \d \dd \ddd */
270: int n = c - '0';
271: if (isoctdigit(*p)) {
272: n = 8 * n + *p++ - '0';
273: if (isoctdigit(*p))
274: n = 8 * n + *p++ - '0';
275: }
276: c = n;
277: } /* else */
278: /* c = c; */
279: *pp = p;
280: return c;
281: }
282:
283: char *cclenter(char *p) /* add a character class */
284: {
285: int i, c, c2;
286: char *op;
287: static Gstring *cgp = 0;
288:
289: op = p;
290: if (cgp == 0)
291: cgp = newGstring();
292: caddreset(cgp);
293: i = 0;
294: while ((c = *p++) != 0) {
295: if (c == '\\') {
296: c = quoted(&p);
297: } else if (c == '-' && i > 0 && cgp->cbuf[i-1] != 0) {
298: if (*p != 0) {
299: c = cgp->cbuf[i-1];
300: c2 = *p++;
301: if (c2 == '\\')
302: c2 = quoted(&p);
303: if (c > c2) { /* empty; ignore */
304: cunadd(cgp);
305: i--;
306: continue;
307: }
308: while (c < c2) {
309: cadd(cgp, ++c);
310: i++;
311: }
312: continue;
313: }
314: }
315: cadd(cgp, c);
316: i++;
317: }
318: cadd(cgp, 0);
319: dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, cgp->cbuf) );
320: xfree(op);
321: return(tostring(cgp->cbuf));
322: }
323:
324: void overflo(char *s)
325: {
326: ERROR "regular expression too big: %.30s...", s FATAL;
327: }
328:
329: void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */
330: {
331: int i;
332: int *p;
333:
334: switch (type(v)) {
335: LEAF
336: f->re[(int) left(v)].ltype = type(v);
337: f->re[(int) left(v)].lval.np = right(v);
338: while (f->accept >= maxsetvec) { /* guessing here! */
339: maxsetvec *= 4;
340: setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
341: tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
342: if (setvec == 0 || tmpset == 0) { abort();
343: overflo("out of space in cfoll()");
344: }
345: }
346: for (i = 0; i <= f->accept; i++)
347: setvec[i] = 0;
348: setcnt = 0;
349: follow(v); /* computes setvec and setcnt */
350: if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
351: overflo("out of space building follow set");
352: f->re[(int) left(v)].lfollow = p;
353: *p = setcnt;
354: for (i = f->accept; i >= 0; i--)
355: if (setvec[i] == 1)
356: *++p = i;
357: break;
358: UNARY
359: cfoll(f,left(v));
360: break;
361: case CAT:
362: case OR:
363: cfoll(f,left(v));
364: cfoll(f,right(v));
365: break;
366: default: /* can't happen */
367: ERROR "can't happen: unknown type %d in cfoll", type(v) FATAL;
368: }
369: }
370:
371: int first(Node *p) /* collects initially active leaves of p into setvec */
372: /* returns 1 if p matches empty string */
373: {
374: int b, lp;
375:
376: switch (type(p)) {
377: LEAF
378: lp = (int) left(p); /* look for high-water mark of subscripts */
379: while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */
380: maxsetvec *= 4;
381: setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
382: tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
383: if (setvec == 0 || tmpset == 0) { abort();
384: overflo("out of space in first()");
385: }
386: }
387: if (setvec[lp] != 1) {
388: setvec[lp] = 1;
389: setcnt++;
390: }
391: if (type(p) == CCL && (*(char *) right(p)) == '\0')
392: return(0); /* empty CCL */
393: else return(1);
394: case PLUS:
395: if (first(left(p)) == 0) return(0);
396: return(1);
397: case STAR:
398: case QUEST:
399: first(left(p));
400: return(0);
401: case CAT:
402: if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
403: return(1);
404: case OR:
405: b = first(right(p));
406: if (first(left(p)) == 0 || b == 0) return(0);
407: return(1);
408: }
409: ERROR "can't happen: unknown type %d in first", type(p) FATAL; /* can't happen */
410: return(-1);
411: }
412:
413: void follow(Node *v) /* collects leaves that can follow v into setvec */
414: {
415: Node *p;
416:
417: if (type(v) == FINAL)
418: return;
419: p = parent(v);
420: switch (type(p)) {
421: case STAR:
422: case PLUS:
423: first(v);
424: follow(p);
425: return;
426:
427: case OR:
428: case QUEST:
429: follow(p);
430: return;
431:
432: case CAT:
433: if (v == left(p)) { /* v is left child of p */
434: if (first(right(p)) == 0) {
435: follow(p);
436: return;
437: }
438: } else /* v is right child */
439: follow(p);
440: return;
441: }
442: }
443:
444: int member(int c, char *s) /* is c in s? */
445: {
446: while (*s)
447: if (c == *s++)
448: return(1);
449: return(0);
450: }
451:
452: int match(fa *f, char *p0) /* shortest match ? */
453: {
454: int s, ns;
455: uschar *p = (uschar *) p0;
456:
457: s = f->reset ? makeinit(f,0) : f->initstat;
458: if (f->out[s])
459: return(1);
460: do {
461: if ((ns = f->gototab[s][*p]) != 0)
462: s = ns;
463: else
464: s = cgoto(f, s, *p);
465: if (f->out[s])
466: return(1);
467: } while (*p++ != 0);
468: return(0);
469: }
470:
471: int pmatch(fa *f, char *p0) /* longest match, for sub */
472: {
473: int s, ns;
474: uschar *p = (uschar *) p0;
475: uschar *q;
476: int i, k;
477:
478: s = f->reset ? makeinit(f,1) : f->initstat;
479: patbeg = (char *) p;
480: patlen = -1;
481: do {
482: q = p;
483: do {
484: if (f->out[s]) /* final state */
485: patlen = q-p;
486: if ((ns = f->gototab[s][*q]) != 0)
487: s = ns;
488: else
489: s = cgoto(f, s, *q);
490: if (s == 1) /* no transition */
491: if (patlen >= 0) {
492: patbeg = (char *) p;
493: return(1);
494: }
495: else
496: goto nextin; /* no match */
497: } while (*q++ != 0);
498: if (f->out[s])
499: patlen = q-p-1; /* don't count $ */
500: if (patlen >= 0) {
501: patbeg = (char *) p;
502: return(1);
503: }
504: nextin:
505: s = 2;
506: if (f->reset) {
507: for (i = 2; i <= f->curstat; i++)
508: xfree(f->posns[i]);
509: k = *f->posns[0];
510: if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
511: overflo("out of space in pmatch");
512: for (i = 0; i <= k; i++)
513: (f->posns[2])[i] = (f->posns[0])[i];
514: f->initstat = f->curstat = 2;
515: f->out[2] = f->out[0];
516: for (i = 0; i < NCHARS; i++)
517: f->gototab[2][i] = 0;
518: }
519: } while (*p++ != 0);
520: return (0);
521: }
522:
523: int nematch(fa *f, char *p0) /* non-empty match, for sub */
524: {
525: int s, ns;
526: uschar *p = (uschar *) p0;
527: uschar *q;
528: int i, k;
529:
530: s = f->reset ? makeinit(f,1) : f->initstat;
531: patlen = -1;
532: while (*p) {
533: q = p;
534: do {
535: if (f->out[s]) /* final state */
536: patlen = q-p;
537: if ((ns = f->gototab[s][*q]) != 0)
538: s = ns;
539: else
540: s = cgoto(f, s, *q);
541: if (s == 1) /* no transition */
542: if (patlen > 0) {
543: patbeg = (char *) p;
544: return(1);
545: } else
546: goto nnextin; /* no nonempty match */
547: } while (*q++ != 0);
548: if (f->out[s])
549: patlen = q-p-1; /* don't count $ */
550: if (patlen > 0 ) {
551: patbeg = (char *) p;
552: return(1);
553: }
554: nnextin:
555: s = 2;
556: if (f->reset) {
557: for (i = 2; i <= f->curstat; i++)
558: xfree(f->posns[i]);
559: k = *f->posns[0];
560: if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
561: overflo("out of state space");
562: for (i = 0; i <= k; i++)
563: (f->posns[2])[i] = (f->posns[0])[i];
564: f->initstat = f->curstat = 2;
565: f->out[2] = f->out[0];
566: for (i = 0; i < NCHARS; i++)
567: f->gototab[2][i] = 0;
568: }
569: p++;
570: }
571: return (0);
572: }
573:
574: Node *reparse(char *p) /* parses regular expression pointed to by p */
575: { /* uses relex() to scan regular expression */
576: Node *np;
577:
578: dprintf( ("reparse <%s>\n", p) );
579: lastre = prestr = p; /* prestr points to string to be parsed */
580: rtok = relex();
581: if (rtok == '\0')
582: ERROR "empty regular expression" FATAL;
583: np = regexp();
584: if (rtok != '\0')
585: ERROR "syntax error in regular expression %s at %s", lastre, prestr FATAL;
586: return(np);
587: }
588:
589: Node *regexp(void) /* top-level parse of reg expr */
590: {
591: return (alt(concat(primary())));
592: }
593:
594: Node *primary(void)
595: {
596: Node *np;
597:
598: switch (rtok) {
599: case CHAR:
600: np = op2(CHAR, NIL, (Node *) rlxval);
601: rtok = relex();
602: return (unary(np));
603: case ALL:
604: rtok = relex();
605: return (unary(op2(ALL, NIL, NIL)));
606: case DOT:
607: rtok = relex();
608: return (unary(op2(DOT, NIL, NIL)));
609: case CCL:
610: np = op2(CCL, NIL, (Node*) cclenter(rlxstr));
611: rtok = relex();
612: return (unary(np));
613: case NCCL:
614: np = op2(NCCL, NIL, (Node *) cclenter(rlxstr));
615: rtok = relex();
616: return (unary(np));
617: case '^':
618: rtok = relex();
619: return (unary(op2(CHAR, NIL, (Node *) HAT)));
620: case '$':
621: rtok = relex();
622: return (unary(op2(CHAR, NIL, NIL)));
623: case '(':
624: rtok = relex();
625: if (rtok == ')') { /* special pleading for () */
626: rtok = relex();
627: return unary(op2(CCL, NIL, (Node *) tostring("")));
628: }
629: np = regexp();
630: if (rtok == ')') {
631: rtok = relex();
632: return (unary(np));
633: }
634: else
635: ERROR "syntax error in regular expression %s at %s", lastre, prestr FATAL;
636: default:
637: ERROR "illegal primary in regular expression %s at %s", lastre, prestr FATAL;
638: }
639: return 0; /*NOTREACHED*/
640: }
641:
642: Node *concat(Node *np)
643: {
644: switch (rtok) {
645: case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
646: return (concat(op2(CAT, np, primary())));
647: }
648: return (np);
649: }
650:
651: Node *alt(Node *np)
652: {
653: if (rtok == OR) {
654: rtok = relex();
655: return (alt(op2(OR, np, concat(primary()))));
656: }
657: return (np);
658: }
659:
660: Node *unary(Node *np)
661: {
662: switch (rtok) {
663: case STAR:
664: rtok = relex();
665: return (unary(op2(STAR, np, NIL)));
666: case PLUS:
667: rtok = relex();
668: return (unary(op2(PLUS, np, NIL)));
669: case QUEST:
670: rtok = relex();
671: return (unary(op2(QUEST, np, NIL)));
672: default:
673: return (np);
674: }
675: }
676:
677: int relex(void) /* lexical analyzer for reparse */
678: {
679: int c;
680: int cflag;
681: static Gstring *gp = 0;
682:
683: switch (c = *prestr++) {
684: case '|': return OR;
685: case '*': return STAR;
686: case '+': return PLUS;
687: case '?': return QUEST;
688: case '.': return DOT;
689: case '\0': prestr--; return '\0';
690: case '^':
691: case '$':
692: case '(':
693: case ')':
694: return c;
695: case '\\':
696: rlxval = quoted(&prestr);
697: return CHAR;
698: default:
699: rlxval = c;
700: return CHAR;
701: case '[':
702: if (gp == 0)
703: gp = newGstring();
704: caddreset(gp);
705: if (*prestr == '^') {
706: cflag = 1;
707: prestr++;
708: }
709: else
710: cflag = 0;
711: for (; ; ) {
712: if ((c = *prestr++) == '\\') {
713: cadd(gp, '\\');
714: if ((c = *prestr++) == '\0')
715: ERROR "nonterminated character class %.20s...", lastre FATAL;
716: cadd(gp, c);
717: } else if (c == '\n') {
718: ERROR "newline in character class %.20s...", lastre FATAL;
719: } else if (c == '\0') {
720: ERROR "nonterminated character class %.20s", lastre FATAL;
721: } else if (gp->clen == 0) { /* 1st char is special */
722: cadd(gp, c);
723: } else if (c == ']') {
724: cadd(gp, 0);
725: rlxstr = tostring(gp->cbuf);
726: if (cflag == 0)
727: return CCL;
728: else
729: return NCCL;
730: } else
731: cadd(gp, c);
732: }
733: }
734: }
735:
736: int cgoto(fa *f, int s, int c)
737: {
738: int i, j, k;
739: int *p, *q;
740:
741: if (c < 0)
742: ERROR "can't happen: neg char %d in cgoto", c FATAL;
743: while (f->accept >= maxsetvec) { /* guessing here! */
744: maxsetvec *= 4;
745: setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
746: tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
747: if (setvec == 0 || tmpset == 0) { abort();
748: overflo("out of space in cgoto()");
749: }
750: }
751: for (i = 0; i <= f->accept; i++)
752: setvec[i] = 0;
753: setcnt = 0;
754: /* compute positions of gototab[s,c] into setvec */
755: p = f->posns[s];
756: for (i = 1; i <= *p; i++) {
757: if ((k = f->re[p[i]].ltype) != FINAL) {
758: if ((k == CHAR && c == f->re[p[i]].lval.i)
759: || (k == DOT && c != 0 && c != HAT)
760: || (k == ALL && c != 0)
761: || (k == CCL && member(c, f->re[p[i]].lval.up))
762: || (k == NCCL && !member(c, f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
763: q = f->re[p[i]].lfollow;
764: for (j = 1; j <= *q; j++) {
765: if (q[j] >= maxsetvec) {
766: maxsetvec *= 4;
767: setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
768: tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int));
769: if (setvec == 0 || tmpset == 0)
770: overflo("cgoto overflow");
771: }
772: if (setvec[q[j]] == 0) {
773: setcnt++;
774: setvec[q[j]] = 1;
775: }
776: }
777: }
778: }
779: }
780: /* determine if setvec is a previous state */
781: tmpset[0] = setcnt;
782: j = 1;
783: for (i = f->accept; i >= 0; i--)
784: if (setvec[i]) {
785: tmpset[j++] = i;
786: }
787: /* tmpset == previous state? */
788: for (i = 1; i <= f->curstat; i++) {
789: p = f->posns[i];
790: if ((k = tmpset[0]) != p[0])
791: goto different;
792: for (j = 1; j <= k; j++)
793: if (tmpset[j] != p[j])
794: goto different;
795: /* setvec is state i */
796: f->gototab[s][c] = i;
797: return i;
798: different:;
799: }
800:
801: /* add tmpset to current set of states */
802: if (f->curstat >= NSTATES-1) {
803: f->curstat = 2;
804: f->reset = 1;
805: for (i = 2; i < NSTATES; i++)
806: xfree(f->posns[i]);
807: } else
808: ++(f->curstat);
809: for (i = 0; i < NCHARS; i++)
810: f->gototab[f->curstat][i] = 0;
811: xfree(f->posns[f->curstat]);
812: if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
813: overflo("out of space in cgoto");
814:
815: f->posns[f->curstat] = p;
816: f->gototab[s][c] = f->curstat;
817: for (i = 0; i <= setcnt; i++)
818: p[i] = tmpset[i];
819: if (setvec[f->accept])
820: f->out[f->curstat] = 1;
821: else
822: f->out[f->curstat] = 0;
823: return f->curstat;
824: }
825:
826:
827: void freefa(fa *f) /* free a finite automaton */
828: {
829: int i;
830:
831: if (f == NULL)
832: return;
833: for (i = 0; i <= f->curstat; i++)
834: xfree(f->posns[i]);
835: for (i = 0; i <= f->accept; i++) {
836: xfree(f->re[i].lfollow);
837: if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
838: xfree((f->re[i].lval.np));
839: }
840: xfree(f->restr);
841: xfree(f);
842: }