Annotation of src/usr.bin/awk/b.c, Revision 1.6
1.6 ! millert 1: /* $OpenBSD: b.c,v 1.5 1997/08/25 16:17:10 kstailey Exp $ */
1.1 tholo 2: /****************************************************************
1.5 kstailey 3: Copyright (C) Lucent Technologies 1997
1.1 tholo 4: All Rights Reserved
5:
6: Permission to use, copy, modify, and distribute this software and
7: its documentation for any purpose and without fee is hereby
8: granted, provided that the above copyright notice appear in all
9: copies and that both that the copyright notice and this
10: permission notice and warranty disclaimer appear in supporting
1.5 kstailey 11: documentation, and that the name Lucent Technologies or any of
12: its entities not be used in advertising or publicity pertaining
13: to distribution of the software without specific, written prior
14: permission.
15:
16: LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17: INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18: IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19: SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20: WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21: IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22: ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23: THIS SOFTWARE.
1.1 tholo 24: ****************************************************************/
25:
26: /* lasciate ogne speranza, voi ch'entrate. */
27:
28: #define DEBUG
29:
30: #include <ctype.h>
31: #include <stdio.h>
32: #include <string.h>
33: #include <stdlib.h>
34: #include "awk.h"
1.5 kstailey 35: #include "ytab.h"
1.1 tholo 36:
37: #define HAT (NCHARS-1) /* matches ^ in regular expr */
38: /* NCHARS is 2**n */
39: #define MAXLIN 22
40:
41: #define type(v) (v)->nobj
42: #define left(v) (v)->narg[0]
43: #define right(v) (v)->narg[1]
44: #define parent(v) (v)->nnext
45:
46: #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
47: #define UNARY case STAR: case PLUS: case QUEST:
48:
49: /* encoding in tree Nodes:
50: leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
51: left is index, right contains value or pointer to value
52: unary (STAR, PLUS, QUEST): left is child, right is null
53: binary (CAT, OR): left and right are children
54: parent contains pointer to parent
55: */
56:
57:
58: int *setvec;
59: int *tmpset;
60: int maxsetvec = 0;
61:
62: int rtok; /* next token in current re */
1.4 millert 63: int rlxval;
1.1 tholo 64: char *rlxstr;
65: char *prestr; /* current position in current re */
66: char *lastre; /* origin of last re */
67:
1.4 millert 68: static int setcnt;
69: static int poscnt;
1.1 tholo 70:
71: char *patbeg;
72: int patlen;
73:
74: #define NFA 20 /* cache this many dynamic fa's */
75: fa *fatab[NFA];
76: int nfatab = 0; /* entries in fatab */
77:
78: fa *makedfa(char *s, int anchor) /* returns dfa for reg expr s */
79: {
80: int i, use, nuse;
81: fa *pfa;
1.6 ! millert 82: static int now = 1;
1.1 tholo 83:
84: if (setvec == 0) { /* first time through any RE */
85: maxsetvec = MAXLIN;
86: setvec = (int *) malloc(maxsetvec * sizeof(int));
87: tmpset = (int *) malloc(maxsetvec * sizeof(int));
88: if (setvec == 0 || tmpset == 0)
89: overflo("out of space initializing makedfa");
90: }
91:
92: if (compile_time) /* a constant for sure */
93: return mkdfa(s, anchor);
94: for (i = 0; i < nfatab; i++) /* is it there already? */
95: if (fatab[i]->anchor == anchor
96: && strcmp(fatab[i]->restr, s) == 0) {
1.6 ! millert 97: fatab[i]->use = now++;
1.1 tholo 98: return fatab[i];
99: }
100: pfa = mkdfa(s, anchor);
101: if (nfatab < NFA) { /* room for another */
102: fatab[nfatab] = pfa;
1.6 ! millert 103: fatab[nfatab]->use = now++;
1.1 tholo 104: nfatab++;
105: return pfa;
106: }
107: use = fatab[0]->use; /* replace least-recently used */
108: nuse = 0;
109: for (i = 1; i < nfatab; i++)
110: if (fatab[i]->use < use) {
111: use = fatab[i]->use;
112: nuse = i;
113: }
114: freefa(fatab[nuse]);
115: fatab[nuse] = pfa;
1.6 ! millert 116: pfa->use = now++;
1.1 tholo 117: return pfa;
118: }
119:
120: fa *mkdfa(char *s, int anchor) /* does the real work of making a dfa */
121: /* anchor = 1 for anchored matches, else 0 */
122: {
123: Node *p, *p1;
124: fa *f;
125:
126: p = reparse(s);
127: p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
128: /* put ALL STAR in front of reg. exp. */
129: p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
130: /* put FINAL after reg. exp. */
131:
132: poscnt = 0;
133: penter(p1); /* enter parent pointers and leaf indices */
134: if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
135: overflo("out of space for fa");
136: f->accept = poscnt-1; /* penter has computed number of positions in re */
137: cfoll(f, p1); /* set up follow sets */
138: freetr(p1);
1.4 millert 139: if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
1.1 tholo 140: overflo("out of space in makedfa");
1.4 millert 141: if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
1.1 tholo 142: overflo("out of space in makedfa");
143: *f->posns[1] = 0;
144: f->initstat = makeinit(f, anchor);
145: f->anchor = anchor;
146: f->restr = tostring(s);
147: return f;
148: }
149:
150: int makeinit(fa *f, int anchor)
151: {
152: int i, k;
153:
154: f->curstat = 2;
155: f->out[2] = 0;
156: f->reset = 0;
157: k = *(f->re[0].lfollow);
158: xfree(f->posns[2]);
1.4 millert 159: if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
1.1 tholo 160: overflo("out of space in makeinit");
161: for (i=0; i <= k; i++) {
162: (f->posns[2])[i] = (f->re[0].lfollow)[i];
163: }
164: if ((f->posns[2])[1] == f->accept)
165: f->out[2] = 1;
166: for (i=0; i < NCHARS; i++)
167: f->gototab[2][i] = 0;
168: f->curstat = cgoto(f, 2, HAT);
169: if (anchor) {
170: *f->posns[2] = k-1; /* leave out position 0 */
171: for (i=0; i < k; i++) {
172: (f->posns[0])[i] = (f->posns[2])[i];
173: }
174:
175: f->out[0] = f->out[2];
176: if (f->curstat != 2)
177: --(*f->posns[f->curstat]);
178: }
179: return f->curstat;
180: }
181:
182: void penter(Node *p) /* set up parent pointers and leaf indices */
183: {
184: switch (type(p)) {
185: LEAF
186: left(p) = (Node *) poscnt;
187: poscnt++;
188: break;
189: UNARY
190: penter(left(p));
191: parent(left(p)) = p;
192: break;
193: case CAT:
194: case OR:
195: penter(left(p));
196: penter(right(p));
197: parent(left(p)) = p;
198: parent(right(p)) = p;
199: break;
200: default: /* can't happen */
201: ERROR "can't happen: unknown type %d in penter", type(p) FATAL;
202: break;
203: }
204: }
205:
206: void freetr(Node *p) /* free parse tree */
207: {
208: switch (type(p)) {
209: LEAF
210: xfree(p);
211: break;
212: UNARY
213: freetr(left(p));
214: xfree(p);
215: break;
216: case CAT:
217: case OR:
218: freetr(left(p));
219: freetr(right(p));
220: xfree(p);
221: break;
222: default: /* can't happen */
223: ERROR "can't happen: unknown type %d in freetr", type(p) FATAL;
224: break;
225: }
226: }
227:
228: /* in the parsing of regular expressions, metacharacters like . have */
229: /* to be seen literally; \056 is not a metacharacter. */
230:
231: int hexstr(char **pp) /* find and eval hex string at pp, return new p */
1.2 millert 232: { /* only pick up one 8-bit byte (2 chars) */
1.1 tholo 233: char *p;
234: int n = 0;
1.2 millert 235: int i;
1.1 tholo 236:
1.2 millert 237: for (i = 0, p = *pp; i < 2 && isxdigit(*p); i++, p++) {
1.1 tholo 238: if (isdigit(*p))
239: n = 16 * n + *p - '0';
240: else if (*p >= 'a' && *p <= 'f')
241: n = 16 * n + *p - 'a' + 10;
242: else if (*p >= 'A' && *p <= 'F')
243: n = 16 * n + *p - 'A' + 10;
244: }
245: *pp = p;
246: return n;
247: }
248:
1.2 millert 249: #define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */
1.1 tholo 250:
251: int quoted(char **pp) /* pick up next thing after a \\ */
252: /* and increment *pp */
253: {
254: char *p = *pp;
255: int c;
256:
257: if ((c = *p++) == 't')
258: c = '\t';
259: else if (c == 'n')
260: c = '\n';
261: else if (c == 'f')
262: c = '\f';
263: else if (c == 'r')
264: c = '\r';
265: else if (c == 'b')
266: c = '\b';
267: else if (c == '\\')
268: c = '\\';
269: else if (c == 'x') { /* hexadecimal goo follows */
270: c = hexstr(&p); /* this adds a null if number is invalid */
271: } else if (isoctdigit(c)) { /* \d \dd \ddd */
272: int n = c - '0';
273: if (isoctdigit(*p)) {
274: n = 8 * n + *p++ - '0';
275: if (isoctdigit(*p))
276: n = 8 * n + *p++ - '0';
277: }
278: c = n;
279: } /* else */
280: /* c = c; */
281: *pp = p;
282: return c;
283: }
284:
285: char *cclenter(char *p) /* add a character class */
286: {
287: int i, c, c2;
1.5 kstailey 288: char *op, *bp;
289: static char *buf = 0;
290: static int bufsz = 100;
1.1 tholo 291:
292: op = p;
1.5 kstailey 293: if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
294: ERROR "out of space for character class [%.10s...] 1", p FATAL;
295: bp = buf;
296: for (i = 0; (c = *p++) != 0; ) {
1.1 tholo 297: if (c == '\\') {
298: c = quoted(&p);
1.5 kstailey 299: } else if (c == '-' && i > 0 && bp[-1] != 0) {
1.1 tholo 300: if (*p != 0) {
1.5 kstailey 301: c = bp[-1];
1.1 tholo 302: c2 = *p++;
303: if (c2 == '\\')
304: c2 = quoted(&p);
305: if (c > c2) { /* empty; ignore */
1.5 kstailey 306: bp--;
1.1 tholo 307: i--;
308: continue;
309: }
310: while (c < c2) {
1.5 kstailey 311: if (!adjbuf(&buf, &bufsz, bp-buf+2, 100, &bp, 0))
312: ERROR "out of space for character class [%.10s...] 2", p FATAL;
313: *bp++ = ++c;
1.1 tholo 314: i++;
315: }
316: continue;
317: }
318: }
1.5 kstailey 319: if (!adjbuf(&buf, &bufsz, bp-buf+2, 100, &bp, 0))
320: ERROR "out of space for character class [%.10s...] 3", p FATAL;
321: *bp++ = c;
1.1 tholo 322: i++;
323: }
1.5 kstailey 324: *bp = 0;
325: dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
1.1 tholo 326: xfree(op);
1.5 kstailey 327: return(tostring(buf));
1.1 tholo 328: }
329:
330: void overflo(char *s)
331: {
332: ERROR "regular expression too big: %.30s...", s FATAL;
333: }
334:
335: void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */
336: {
337: int i;
1.4 millert 338: int *p;
1.1 tholo 339:
340: switch (type(v)) {
341: LEAF
1.4 millert 342: f->re[(int) left(v)].ltype = type(v);
343: f->re[(int) left(v)].lval.np = right(v);
1.1 tholo 344: while (f->accept >= maxsetvec) { /* guessing here! */
345: maxsetvec *= 4;
346: setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
347: tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
1.5 kstailey 348: if (setvec == 0 || tmpset == 0)
1.1 tholo 349: overflo("out of space in cfoll()");
350: }
351: for (i = 0; i <= f->accept; i++)
352: setvec[i] = 0;
353: setcnt = 0;
354: follow(v); /* computes setvec and setcnt */
1.4 millert 355: if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
1.1 tholo 356: overflo("out of space building follow set");
1.4 millert 357: f->re[(int) left(v)].lfollow = p;
1.1 tholo 358: *p = setcnt;
359: for (i = f->accept; i >= 0; i--)
360: if (setvec[i] == 1)
361: *++p = i;
362: break;
363: UNARY
364: cfoll(f,left(v));
365: break;
366: case CAT:
367: case OR:
368: cfoll(f,left(v));
369: cfoll(f,right(v));
370: break;
371: default: /* can't happen */
372: ERROR "can't happen: unknown type %d in cfoll", type(v) FATAL;
373: }
374: }
375:
376: int first(Node *p) /* collects initially active leaves of p into setvec */
377: /* returns 1 if p matches empty string */
378: {
1.4 millert 379: int b, lp;
1.1 tholo 380:
381: switch (type(p)) {
382: LEAF
1.4 millert 383: lp = (int) left(p); /* look for high-water mark of subscripts */
1.1 tholo 384: while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */
385: maxsetvec *= 4;
386: setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
387: tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
388: if (setvec == 0 || tmpset == 0) { abort();
389: overflo("out of space in first()");
390: }
391: }
392: if (setvec[lp] != 1) {
393: setvec[lp] = 1;
394: setcnt++;
395: }
396: if (type(p) == CCL && (*(char *) right(p)) == '\0')
397: return(0); /* empty CCL */
398: else return(1);
399: case PLUS:
400: if (first(left(p)) == 0) return(0);
401: return(1);
402: case STAR:
403: case QUEST:
404: first(left(p));
405: return(0);
406: case CAT:
407: if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
408: return(1);
409: case OR:
410: b = first(right(p));
411: if (first(left(p)) == 0 || b == 0) return(0);
412: return(1);
413: }
414: ERROR "can't happen: unknown type %d in first", type(p) FATAL; /* can't happen */
415: return(-1);
416: }
417:
418: void follow(Node *v) /* collects leaves that can follow v into setvec */
419: {
420: Node *p;
421:
422: if (type(v) == FINAL)
423: return;
424: p = parent(v);
425: switch (type(p)) {
426: case STAR:
427: case PLUS:
428: first(v);
429: follow(p);
430: return;
431:
432: case OR:
433: case QUEST:
434: follow(p);
435: return;
436:
437: case CAT:
438: if (v == left(p)) { /* v is left child of p */
439: if (first(right(p)) == 0) {
440: follow(p);
441: return;
442: }
443: } else /* v is right child */
444: follow(p);
445: return;
446: }
447: }
448:
449: int member(int c, char *s) /* is c in s? */
450: {
451: while (*s)
452: if (c == *s++)
453: return(1);
454: return(0);
455: }
456:
457: int match(fa *f, char *p0) /* shortest match ? */
458: {
459: int s, ns;
460: uschar *p = (uschar *) p0;
461:
462: s = f->reset ? makeinit(f,0) : f->initstat;
463: if (f->out[s])
464: return(1);
465: do {
466: if ((ns = f->gototab[s][*p]) != 0)
467: s = ns;
468: else
469: s = cgoto(f, s, *p);
470: if (f->out[s])
471: return(1);
472: } while (*p++ != 0);
473: return(0);
474: }
475:
476: int pmatch(fa *f, char *p0) /* longest match, for sub */
477: {
478: int s, ns;
479: uschar *p = (uschar *) p0;
480: uschar *q;
481: int i, k;
482:
483: s = f->reset ? makeinit(f,1) : f->initstat;
484: patbeg = (char *) p;
485: patlen = -1;
486: do {
487: q = p;
488: do {
489: if (f->out[s]) /* final state */
490: patlen = q-p;
491: if ((ns = f->gototab[s][*q]) != 0)
492: s = ns;
493: else
494: s = cgoto(f, s, *q);
495: if (s == 1) /* no transition */
496: if (patlen >= 0) {
497: patbeg = (char *) p;
498: return(1);
499: }
500: else
501: goto nextin; /* no match */
502: } while (*q++ != 0);
503: if (f->out[s])
504: patlen = q-p-1; /* don't count $ */
505: if (patlen >= 0) {
506: patbeg = (char *) p;
507: return(1);
508: }
509: nextin:
510: s = 2;
511: if (f->reset) {
512: for (i = 2; i <= f->curstat; i++)
513: xfree(f->posns[i]);
514: k = *f->posns[0];
1.4 millert 515: if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
1.1 tholo 516: overflo("out of space in pmatch");
517: for (i = 0; i <= k; i++)
518: (f->posns[2])[i] = (f->posns[0])[i];
519: f->initstat = f->curstat = 2;
520: f->out[2] = f->out[0];
521: for (i = 0; i < NCHARS; i++)
522: f->gototab[2][i] = 0;
523: }
524: } while (*p++ != 0);
525: return (0);
526: }
527:
528: int nematch(fa *f, char *p0) /* non-empty match, for sub */
529: {
530: int s, ns;
531: uschar *p = (uschar *) p0;
532: uschar *q;
533: int i, k;
534:
535: s = f->reset ? makeinit(f,1) : f->initstat;
536: patlen = -1;
537: while (*p) {
538: q = p;
539: do {
540: if (f->out[s]) /* final state */
541: patlen = q-p;
542: if ((ns = f->gototab[s][*q]) != 0)
543: s = ns;
544: else
545: s = cgoto(f, s, *q);
546: if (s == 1) /* no transition */
547: if (patlen > 0) {
548: patbeg = (char *) p;
549: return(1);
550: } else
551: goto nnextin; /* no nonempty match */
552: } while (*q++ != 0);
553: if (f->out[s])
554: patlen = q-p-1; /* don't count $ */
555: if (patlen > 0 ) {
556: patbeg = (char *) p;
557: return(1);
558: }
559: nnextin:
560: s = 2;
561: if (f->reset) {
562: for (i = 2; i <= f->curstat; i++)
563: xfree(f->posns[i]);
564: k = *f->posns[0];
1.4 millert 565: if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
1.1 tholo 566: overflo("out of state space");
567: for (i = 0; i <= k; i++)
568: (f->posns[2])[i] = (f->posns[0])[i];
569: f->initstat = f->curstat = 2;
570: f->out[2] = f->out[0];
571: for (i = 0; i < NCHARS; i++)
572: f->gototab[2][i] = 0;
573: }
574: p++;
575: }
576: return (0);
577: }
578:
579: Node *reparse(char *p) /* parses regular expression pointed to by p */
580: { /* uses relex() to scan regular expression */
581: Node *np;
582:
583: dprintf( ("reparse <%s>\n", p) );
584: lastre = prestr = p; /* prestr points to string to be parsed */
585: rtok = relex();
586: if (rtok == '\0')
587: ERROR "empty regular expression" FATAL;
588: np = regexp();
589: if (rtok != '\0')
590: ERROR "syntax error in regular expression %s at %s", lastre, prestr FATAL;
591: return(np);
592: }
593:
594: Node *regexp(void) /* top-level parse of reg expr */
595: {
596: return (alt(concat(primary())));
597: }
598:
599: Node *primary(void)
600: {
601: Node *np;
602:
603: switch (rtok) {
604: case CHAR:
605: np = op2(CHAR, NIL, (Node *) rlxval);
606: rtok = relex();
607: return (unary(np));
608: case ALL:
609: rtok = relex();
610: return (unary(op2(ALL, NIL, NIL)));
611: case DOT:
612: rtok = relex();
613: return (unary(op2(DOT, NIL, NIL)));
614: case CCL:
615: np = op2(CCL, NIL, (Node*) cclenter(rlxstr));
616: rtok = relex();
617: return (unary(np));
618: case NCCL:
619: np = op2(NCCL, NIL, (Node *) cclenter(rlxstr));
620: rtok = relex();
621: return (unary(np));
622: case '^':
623: rtok = relex();
624: return (unary(op2(CHAR, NIL, (Node *) HAT)));
625: case '$':
626: rtok = relex();
627: return (unary(op2(CHAR, NIL, NIL)));
628: case '(':
629: rtok = relex();
630: if (rtok == ')') { /* special pleading for () */
631: rtok = relex();
632: return unary(op2(CCL, NIL, (Node *) tostring("")));
633: }
634: np = regexp();
635: if (rtok == ')') {
636: rtok = relex();
637: return (unary(np));
638: }
639: else
640: ERROR "syntax error in regular expression %s at %s", lastre, prestr FATAL;
641: default:
642: ERROR "illegal primary in regular expression %s at %s", lastre, prestr FATAL;
643: }
644: return 0; /*NOTREACHED*/
645: }
646:
647: Node *concat(Node *np)
648: {
649: switch (rtok) {
650: case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
651: return (concat(op2(CAT, np, primary())));
652: }
653: return (np);
654: }
655:
656: Node *alt(Node *np)
657: {
658: if (rtok == OR) {
659: rtok = relex();
660: return (alt(op2(OR, np, concat(primary()))));
661: }
662: return (np);
663: }
664:
665: Node *unary(Node *np)
666: {
667: switch (rtok) {
668: case STAR:
669: rtok = relex();
670: return (unary(op2(STAR, np, NIL)));
671: case PLUS:
672: rtok = relex();
673: return (unary(op2(PLUS, np, NIL)));
674: case QUEST:
675: rtok = relex();
676: return (unary(op2(QUEST, np, NIL)));
677: default:
678: return (np);
679: }
680: }
681:
682: int relex(void) /* lexical analyzer for reparse */
683: {
1.5 kstailey 684: int c, n;
1.1 tholo 685: int cflag;
1.5 kstailey 686: static char *buf = 0;
687: static int bufsz = 100;
688: char *bp;
1.1 tholo 689:
690: switch (c = *prestr++) {
691: case '|': return OR;
692: case '*': return STAR;
693: case '+': return PLUS;
694: case '?': return QUEST;
695: case '.': return DOT;
696: case '\0': prestr--; return '\0';
697: case '^':
698: case '$':
699: case '(':
700: case ')':
701: return c;
702: case '\\':
703: rlxval = quoted(&prestr);
704: return CHAR;
705: default:
706: rlxval = c;
707: return CHAR;
708: case '[':
1.5 kstailey 709: if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
710: ERROR "out of space in reg expr %.10s..", lastre FATAL;
711: bp = buf;
1.1 tholo 712: if (*prestr == '^') {
713: cflag = 1;
714: prestr++;
715: }
716: else
717: cflag = 0;
1.5 kstailey 718: n = 2 * strlen(prestr)+1;
719: if (!adjbuf(&buf, &bufsz, n, n, &bp, 0))
720: ERROR "out of space for reg expr %.10s...", lastre FATAL;
1.1 tholo 721: for (; ; ) {
722: if ((c = *prestr++) == '\\') {
1.5 kstailey 723: *bp++ = '\\';
1.1 tholo 724: if ((c = *prestr++) == '\0')
725: ERROR "nonterminated character class %.20s...", lastre FATAL;
1.5 kstailey 726: *bp++ = c;
1.1 tholo 727: } else if (c == '\n') {
728: ERROR "newline in character class %.20s...", lastre FATAL;
729: } else if (c == '\0') {
730: ERROR "nonterminated character class %.20s", lastre FATAL;
1.5 kstailey 731: } else if (bp == buf) { /* 1st char is special */
732: *bp++ = c;
1.1 tholo 733: } else if (c == ']') {
1.5 kstailey 734: *bp++ = 0;
735: rlxstr = tostring(buf);
1.1 tholo 736: if (cflag == 0)
737: return CCL;
738: else
739: return NCCL;
740: } else
1.5 kstailey 741: *bp++ = c;
1.1 tholo 742: }
743: }
744: }
745:
746: int cgoto(fa *f, int s, int c)
747: {
748: int i, j, k;
1.4 millert 749: int *p, *q;
1.1 tholo 750:
751: if (c < 0)
752: ERROR "can't happen: neg char %d in cgoto", c FATAL;
753: while (f->accept >= maxsetvec) { /* guessing here! */
754: maxsetvec *= 4;
755: setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
756: tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
757: if (setvec == 0 || tmpset == 0) { abort();
758: overflo("out of space in cgoto()");
759: }
760: }
761: for (i = 0; i <= f->accept; i++)
762: setvec[i] = 0;
763: setcnt = 0;
764: /* compute positions of gototab[s,c] into setvec */
765: p = f->posns[s];
766: for (i = 1; i <= *p; i++) {
767: if ((k = f->re[p[i]].ltype) != FINAL) {
768: if ((k == CHAR && c == f->re[p[i]].lval.i)
769: || (k == DOT && c != 0 && c != HAT)
770: || (k == ALL && c != 0)
771: || (k == CCL && member(c, f->re[p[i]].lval.up))
772: || (k == NCCL && !member(c, f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
773: q = f->re[p[i]].lfollow;
774: for (j = 1; j <= *q; j++) {
775: if (q[j] >= maxsetvec) {
776: maxsetvec *= 4;
777: setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
778: tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int));
779: if (setvec == 0 || tmpset == 0)
780: overflo("cgoto overflow");
781: }
782: if (setvec[q[j]] == 0) {
783: setcnt++;
784: setvec[q[j]] = 1;
785: }
786: }
787: }
788: }
789: }
790: /* determine if setvec is a previous state */
791: tmpset[0] = setcnt;
792: j = 1;
793: for (i = f->accept; i >= 0; i--)
794: if (setvec[i]) {
795: tmpset[j++] = i;
796: }
797: /* tmpset == previous state? */
798: for (i = 1; i <= f->curstat; i++) {
799: p = f->posns[i];
800: if ((k = tmpset[0]) != p[0])
801: goto different;
802: for (j = 1; j <= k; j++)
803: if (tmpset[j] != p[j])
804: goto different;
805: /* setvec is state i */
806: f->gototab[s][c] = i;
807: return i;
808: different:;
809: }
810:
811: /* add tmpset to current set of states */
812: if (f->curstat >= NSTATES-1) {
813: f->curstat = 2;
814: f->reset = 1;
815: for (i = 2; i < NSTATES; i++)
816: xfree(f->posns[i]);
817: } else
818: ++(f->curstat);
819: for (i = 0; i < NCHARS; i++)
820: f->gototab[f->curstat][i] = 0;
821: xfree(f->posns[f->curstat]);
1.4 millert 822: if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
1.1 tholo 823: overflo("out of space in cgoto");
824:
825: f->posns[f->curstat] = p;
826: f->gototab[s][c] = f->curstat;
827: for (i = 0; i <= setcnt; i++)
828: p[i] = tmpset[i];
829: if (setvec[f->accept])
830: f->out[f->curstat] = 1;
831: else
832: f->out[f->curstat] = 0;
833: return f->curstat;
834: }
835:
836:
837: void freefa(fa *f) /* free a finite automaton */
838: {
839: int i;
840:
841: if (f == NULL)
842: return;
843: for (i = 0; i <= f->curstat; i++)
844: xfree(f->posns[i]);
845: for (i = 0; i <= f->accept; i++) {
846: xfree(f->re[i].lfollow);
847: if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
848: xfree((f->re[i].lval.np));
849: }
850: xfree(f->restr);
851: xfree(f);
852: }