Annotation of src/usr.bin/awk/b.c, Revision 1.5
1.5 ! kstailey 1: /* $OpenBSD$ */
1.1 tholo 2: /****************************************************************
1.5 ! kstailey 3: Copyright (C) Lucent Technologies 1997
1.1 tholo 4: All Rights Reserved
5:
6: Permission to use, copy, modify, and distribute this software and
7: its documentation for any purpose and without fee is hereby
8: granted, provided that the above copyright notice appear in all
9: copies and that both that the copyright notice and this
10: permission notice and warranty disclaimer appear in supporting
1.5 ! kstailey 11: documentation, and that the name Lucent Technologies or any of
! 12: its entities not be used in advertising or publicity pertaining
! 13: to distribution of the software without specific, written prior
! 14: permission.
! 15:
! 16: LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
! 17: INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
! 18: IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
! 19: SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
! 20: WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
! 21: IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
! 22: ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
! 23: THIS SOFTWARE.
1.1 tholo 24: ****************************************************************/
25:
26: /* lasciate ogne speranza, voi ch'entrate. */
27:
28: #define DEBUG
29:
30: #include <ctype.h>
31: #include <stdio.h>
32: #include <string.h>
33: #include <stdlib.h>
34: #include "awk.h"
1.5 ! kstailey 35: #include "ytab.h"
1.1 tholo 36:
37: #define HAT (NCHARS-1) /* matches ^ in regular expr */
38: /* NCHARS is 2**n */
39: #define MAXLIN 22
40:
41: #define type(v) (v)->nobj
42: #define left(v) (v)->narg[0]
43: #define right(v) (v)->narg[1]
44: #define parent(v) (v)->nnext
45:
46: #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
47: #define UNARY case STAR: case PLUS: case QUEST:
48:
49: /* encoding in tree Nodes:
50: leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
51: left is index, right contains value or pointer to value
52: unary (STAR, PLUS, QUEST): left is child, right is null
53: binary (CAT, OR): left and right are children
54: parent contains pointer to parent
55: */
56:
57:
58: int *setvec;
59: int *tmpset;
60: int maxsetvec = 0;
61:
62: int rtok; /* next token in current re */
1.4 millert 63: int rlxval;
1.1 tholo 64: char *rlxstr;
65: char *prestr; /* current position in current re */
66: char *lastre; /* origin of last re */
67:
1.4 millert 68: static int setcnt;
69: static int poscnt;
1.1 tholo 70:
71: char *patbeg;
72: int patlen;
73:
74: #define NFA 20 /* cache this many dynamic fa's */
75: fa *fatab[NFA];
76: int nfatab = 0; /* entries in fatab */
77:
78: fa *makedfa(char *s, int anchor) /* returns dfa for reg expr s */
79: {
80: int i, use, nuse;
81: fa *pfa;
82:
83: if (setvec == 0) { /* first time through any RE */
84: maxsetvec = MAXLIN;
85: setvec = (int *) malloc(maxsetvec * sizeof(int));
86: tmpset = (int *) malloc(maxsetvec * sizeof(int));
87: if (setvec == 0 || tmpset == 0)
88: overflo("out of space initializing makedfa");
89: }
90:
91: if (compile_time) /* a constant for sure */
92: return mkdfa(s, anchor);
93: for (i = 0; i < nfatab; i++) /* is it there already? */
94: if (fatab[i]->anchor == anchor
95: && strcmp(fatab[i]->restr, s) == 0) {
96: fatab[i]->use++;
97: return fatab[i];
98: }
99: pfa = mkdfa(s, anchor);
100: if (nfatab < NFA) { /* room for another */
101: fatab[nfatab] = pfa;
102: fatab[nfatab]->use = 1;
103: nfatab++;
104: return pfa;
105: }
106: use = fatab[0]->use; /* replace least-recently used */
107: nuse = 0;
108: for (i = 1; i < nfatab; i++)
109: if (fatab[i]->use < use) {
110: use = fatab[i]->use;
111: nuse = i;
112: }
113: freefa(fatab[nuse]);
114: fatab[nuse] = pfa;
115: pfa->use = 1;
116: return pfa;
117: }
118:
119: fa *mkdfa(char *s, int anchor) /* does the real work of making a dfa */
120: /* anchor = 1 for anchored matches, else 0 */
121: {
122: Node *p, *p1;
123: fa *f;
124:
125: p = reparse(s);
126: p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
127: /* put ALL STAR in front of reg. exp. */
128: p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
129: /* put FINAL after reg. exp. */
130:
131: poscnt = 0;
132: penter(p1); /* enter parent pointers and leaf indices */
133: if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
134: overflo("out of space for fa");
135: f->accept = poscnt-1; /* penter has computed number of positions in re */
136: cfoll(f, p1); /* set up follow sets */
137: freetr(p1);
1.4 millert 138: if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
1.1 tholo 139: overflo("out of space in makedfa");
1.4 millert 140: if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
1.1 tholo 141: overflo("out of space in makedfa");
142: *f->posns[1] = 0;
143: f->initstat = makeinit(f, anchor);
144: f->anchor = anchor;
145: f->restr = tostring(s);
146: return f;
147: }
148:
149: int makeinit(fa *f, int anchor)
150: {
151: int i, k;
152:
153: f->curstat = 2;
154: f->out[2] = 0;
155: f->reset = 0;
156: k = *(f->re[0].lfollow);
157: xfree(f->posns[2]);
1.4 millert 158: if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
1.1 tholo 159: overflo("out of space in makeinit");
160: for (i=0; i <= k; i++) {
161: (f->posns[2])[i] = (f->re[0].lfollow)[i];
162: }
163: if ((f->posns[2])[1] == f->accept)
164: f->out[2] = 1;
165: for (i=0; i < NCHARS; i++)
166: f->gototab[2][i] = 0;
167: f->curstat = cgoto(f, 2, HAT);
168: if (anchor) {
169: *f->posns[2] = k-1; /* leave out position 0 */
170: for (i=0; i < k; i++) {
171: (f->posns[0])[i] = (f->posns[2])[i];
172: }
173:
174: f->out[0] = f->out[2];
175: if (f->curstat != 2)
176: --(*f->posns[f->curstat]);
177: }
178: return f->curstat;
179: }
180:
181: void penter(Node *p) /* set up parent pointers and leaf indices */
182: {
183: switch (type(p)) {
184: LEAF
185: left(p) = (Node *) poscnt;
186: poscnt++;
187: break;
188: UNARY
189: penter(left(p));
190: parent(left(p)) = p;
191: break;
192: case CAT:
193: case OR:
194: penter(left(p));
195: penter(right(p));
196: parent(left(p)) = p;
197: parent(right(p)) = p;
198: break;
199: default: /* can't happen */
200: ERROR "can't happen: unknown type %d in penter", type(p) FATAL;
201: break;
202: }
203: }
204:
205: void freetr(Node *p) /* free parse tree */
206: {
207: switch (type(p)) {
208: LEAF
209: xfree(p);
210: break;
211: UNARY
212: freetr(left(p));
213: xfree(p);
214: break;
215: case CAT:
216: case OR:
217: freetr(left(p));
218: freetr(right(p));
219: xfree(p);
220: break;
221: default: /* can't happen */
222: ERROR "can't happen: unknown type %d in freetr", type(p) FATAL;
223: break;
224: }
225: }
226:
227: /* in the parsing of regular expressions, metacharacters like . have */
228: /* to be seen literally; \056 is not a metacharacter. */
229:
230: int hexstr(char **pp) /* find and eval hex string at pp, return new p */
1.2 millert 231: { /* only pick up one 8-bit byte (2 chars) */
1.1 tholo 232: char *p;
233: int n = 0;
1.2 millert 234: int i;
1.1 tholo 235:
1.2 millert 236: for (i = 0, p = *pp; i < 2 && isxdigit(*p); i++, p++) {
1.1 tholo 237: if (isdigit(*p))
238: n = 16 * n + *p - '0';
239: else if (*p >= 'a' && *p <= 'f')
240: n = 16 * n + *p - 'a' + 10;
241: else if (*p >= 'A' && *p <= 'F')
242: n = 16 * n + *p - 'A' + 10;
243: }
244: *pp = p;
245: return n;
246: }
247:
1.2 millert 248: #define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */
1.1 tholo 249:
250: int quoted(char **pp) /* pick up next thing after a \\ */
251: /* and increment *pp */
252: {
253: char *p = *pp;
254: int c;
255:
256: if ((c = *p++) == 't')
257: c = '\t';
258: else if (c == 'n')
259: c = '\n';
260: else if (c == 'f')
261: c = '\f';
262: else if (c == 'r')
263: c = '\r';
264: else if (c == 'b')
265: c = '\b';
266: else if (c == '\\')
267: c = '\\';
268: else if (c == 'x') { /* hexadecimal goo follows */
269: c = hexstr(&p); /* this adds a null if number is invalid */
270: } else if (isoctdigit(c)) { /* \d \dd \ddd */
271: int n = c - '0';
272: if (isoctdigit(*p)) {
273: n = 8 * n + *p++ - '0';
274: if (isoctdigit(*p))
275: n = 8 * n + *p++ - '0';
276: }
277: c = n;
278: } /* else */
279: /* c = c; */
280: *pp = p;
281: return c;
282: }
283:
284: char *cclenter(char *p) /* add a character class */
285: {
286: int i, c, c2;
1.5 ! kstailey 287: char *op, *bp;
! 288: static char *buf = 0;
! 289: static int bufsz = 100;
1.1 tholo 290:
291: op = p;
1.5 ! kstailey 292: if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
! 293: ERROR "out of space for character class [%.10s...] 1", p FATAL;
! 294: bp = buf;
! 295: for (i = 0; (c = *p++) != 0; ) {
1.1 tholo 296: if (c == '\\') {
297: c = quoted(&p);
1.5 ! kstailey 298: } else if (c == '-' && i > 0 && bp[-1] != 0) {
1.1 tholo 299: if (*p != 0) {
1.5 ! kstailey 300: c = bp[-1];
1.1 tholo 301: c2 = *p++;
302: if (c2 == '\\')
303: c2 = quoted(&p);
304: if (c > c2) { /* empty; ignore */
1.5 ! kstailey 305: bp--;
1.1 tholo 306: i--;
307: continue;
308: }
309: while (c < c2) {
1.5 ! kstailey 310: if (!adjbuf(&buf, &bufsz, bp-buf+2, 100, &bp, 0))
! 311: ERROR "out of space for character class [%.10s...] 2", p FATAL;
! 312: *bp++ = ++c;
1.1 tholo 313: i++;
314: }
315: continue;
316: }
317: }
1.5 ! kstailey 318: if (!adjbuf(&buf, &bufsz, bp-buf+2, 100, &bp, 0))
! 319: ERROR "out of space for character class [%.10s...] 3", p FATAL;
! 320: *bp++ = c;
1.1 tholo 321: i++;
322: }
1.5 ! kstailey 323: *bp = 0;
! 324: dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
1.1 tholo 325: xfree(op);
1.5 ! kstailey 326: return(tostring(buf));
1.1 tholo 327: }
328:
329: void overflo(char *s)
330: {
331: ERROR "regular expression too big: %.30s...", s FATAL;
332: }
333:
334: void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */
335: {
336: int i;
1.4 millert 337: int *p;
1.1 tholo 338:
339: switch (type(v)) {
340: LEAF
1.4 millert 341: f->re[(int) left(v)].ltype = type(v);
342: f->re[(int) left(v)].lval.np = right(v);
1.1 tholo 343: while (f->accept >= maxsetvec) { /* guessing here! */
344: maxsetvec *= 4;
345: setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
346: tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
1.5 ! kstailey 347: if (setvec == 0 || tmpset == 0)
1.1 tholo 348: overflo("out of space in cfoll()");
349: }
350: for (i = 0; i <= f->accept; i++)
351: setvec[i] = 0;
352: setcnt = 0;
353: follow(v); /* computes setvec and setcnt */
1.4 millert 354: if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
1.1 tholo 355: overflo("out of space building follow set");
1.4 millert 356: f->re[(int) left(v)].lfollow = p;
1.1 tholo 357: *p = setcnt;
358: for (i = f->accept; i >= 0; i--)
359: if (setvec[i] == 1)
360: *++p = i;
361: break;
362: UNARY
363: cfoll(f,left(v));
364: break;
365: case CAT:
366: case OR:
367: cfoll(f,left(v));
368: cfoll(f,right(v));
369: break;
370: default: /* can't happen */
371: ERROR "can't happen: unknown type %d in cfoll", type(v) FATAL;
372: }
373: }
374:
375: int first(Node *p) /* collects initially active leaves of p into setvec */
376: /* returns 1 if p matches empty string */
377: {
1.4 millert 378: int b, lp;
1.1 tholo 379:
380: switch (type(p)) {
381: LEAF
1.4 millert 382: lp = (int) left(p); /* look for high-water mark of subscripts */
1.1 tholo 383: while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */
384: maxsetvec *= 4;
385: setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
386: tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
387: if (setvec == 0 || tmpset == 0) { abort();
388: overflo("out of space in first()");
389: }
390: }
391: if (setvec[lp] != 1) {
392: setvec[lp] = 1;
393: setcnt++;
394: }
395: if (type(p) == CCL && (*(char *) right(p)) == '\0')
396: return(0); /* empty CCL */
397: else return(1);
398: case PLUS:
399: if (first(left(p)) == 0) return(0);
400: return(1);
401: case STAR:
402: case QUEST:
403: first(left(p));
404: return(0);
405: case CAT:
406: if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
407: return(1);
408: case OR:
409: b = first(right(p));
410: if (first(left(p)) == 0 || b == 0) return(0);
411: return(1);
412: }
413: ERROR "can't happen: unknown type %d in first", type(p) FATAL; /* can't happen */
414: return(-1);
415: }
416:
417: void follow(Node *v) /* collects leaves that can follow v into setvec */
418: {
419: Node *p;
420:
421: if (type(v) == FINAL)
422: return;
423: p = parent(v);
424: switch (type(p)) {
425: case STAR:
426: case PLUS:
427: first(v);
428: follow(p);
429: return;
430:
431: case OR:
432: case QUEST:
433: follow(p);
434: return;
435:
436: case CAT:
437: if (v == left(p)) { /* v is left child of p */
438: if (first(right(p)) == 0) {
439: follow(p);
440: return;
441: }
442: } else /* v is right child */
443: follow(p);
444: return;
445: }
446: }
447:
448: int member(int c, char *s) /* is c in s? */
449: {
450: while (*s)
451: if (c == *s++)
452: return(1);
453: return(0);
454: }
455:
456: int match(fa *f, char *p0) /* shortest match ? */
457: {
458: int s, ns;
459: uschar *p = (uschar *) p0;
460:
461: s = f->reset ? makeinit(f,0) : f->initstat;
462: if (f->out[s])
463: return(1);
464: do {
465: if ((ns = f->gototab[s][*p]) != 0)
466: s = ns;
467: else
468: s = cgoto(f, s, *p);
469: if (f->out[s])
470: return(1);
471: } while (*p++ != 0);
472: return(0);
473: }
474:
475: int pmatch(fa *f, char *p0) /* longest match, for sub */
476: {
477: int s, ns;
478: uschar *p = (uschar *) p0;
479: uschar *q;
480: int i, k;
481:
482: s = f->reset ? makeinit(f,1) : f->initstat;
483: patbeg = (char *) p;
484: patlen = -1;
485: do {
486: q = p;
487: do {
488: if (f->out[s]) /* final state */
489: patlen = q-p;
490: if ((ns = f->gototab[s][*q]) != 0)
491: s = ns;
492: else
493: s = cgoto(f, s, *q);
494: if (s == 1) /* no transition */
495: if (patlen >= 0) {
496: patbeg = (char *) p;
497: return(1);
498: }
499: else
500: goto nextin; /* no match */
501: } while (*q++ != 0);
502: if (f->out[s])
503: patlen = q-p-1; /* don't count $ */
504: if (patlen >= 0) {
505: patbeg = (char *) p;
506: return(1);
507: }
508: nextin:
509: s = 2;
510: if (f->reset) {
511: for (i = 2; i <= f->curstat; i++)
512: xfree(f->posns[i]);
513: k = *f->posns[0];
1.4 millert 514: if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
1.1 tholo 515: overflo("out of space in pmatch");
516: for (i = 0; i <= k; i++)
517: (f->posns[2])[i] = (f->posns[0])[i];
518: f->initstat = f->curstat = 2;
519: f->out[2] = f->out[0];
520: for (i = 0; i < NCHARS; i++)
521: f->gototab[2][i] = 0;
522: }
523: } while (*p++ != 0);
524: return (0);
525: }
526:
527: int nematch(fa *f, char *p0) /* non-empty match, for sub */
528: {
529: int s, ns;
530: uschar *p = (uschar *) p0;
531: uschar *q;
532: int i, k;
533:
534: s = f->reset ? makeinit(f,1) : f->initstat;
535: patlen = -1;
536: while (*p) {
537: q = p;
538: do {
539: if (f->out[s]) /* final state */
540: patlen = q-p;
541: if ((ns = f->gototab[s][*q]) != 0)
542: s = ns;
543: else
544: s = cgoto(f, s, *q);
545: if (s == 1) /* no transition */
546: if (patlen > 0) {
547: patbeg = (char *) p;
548: return(1);
549: } else
550: goto nnextin; /* no nonempty match */
551: } while (*q++ != 0);
552: if (f->out[s])
553: patlen = q-p-1; /* don't count $ */
554: if (patlen > 0 ) {
555: patbeg = (char *) p;
556: return(1);
557: }
558: nnextin:
559: s = 2;
560: if (f->reset) {
561: for (i = 2; i <= f->curstat; i++)
562: xfree(f->posns[i]);
563: k = *f->posns[0];
1.4 millert 564: if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
1.1 tholo 565: overflo("out of state space");
566: for (i = 0; i <= k; i++)
567: (f->posns[2])[i] = (f->posns[0])[i];
568: f->initstat = f->curstat = 2;
569: f->out[2] = f->out[0];
570: for (i = 0; i < NCHARS; i++)
571: f->gototab[2][i] = 0;
572: }
573: p++;
574: }
575: return (0);
576: }
577:
578: Node *reparse(char *p) /* parses regular expression pointed to by p */
579: { /* uses relex() to scan regular expression */
580: Node *np;
581:
582: dprintf( ("reparse <%s>\n", p) );
583: lastre = prestr = p; /* prestr points to string to be parsed */
584: rtok = relex();
585: if (rtok == '\0')
586: ERROR "empty regular expression" FATAL;
587: np = regexp();
588: if (rtok != '\0')
589: ERROR "syntax error in regular expression %s at %s", lastre, prestr FATAL;
590: return(np);
591: }
592:
593: Node *regexp(void) /* top-level parse of reg expr */
594: {
595: return (alt(concat(primary())));
596: }
597:
598: Node *primary(void)
599: {
600: Node *np;
601:
602: switch (rtok) {
603: case CHAR:
604: np = op2(CHAR, NIL, (Node *) rlxval);
605: rtok = relex();
606: return (unary(np));
607: case ALL:
608: rtok = relex();
609: return (unary(op2(ALL, NIL, NIL)));
610: case DOT:
611: rtok = relex();
612: return (unary(op2(DOT, NIL, NIL)));
613: case CCL:
614: np = op2(CCL, NIL, (Node*) cclenter(rlxstr));
615: rtok = relex();
616: return (unary(np));
617: case NCCL:
618: np = op2(NCCL, NIL, (Node *) cclenter(rlxstr));
619: rtok = relex();
620: return (unary(np));
621: case '^':
622: rtok = relex();
623: return (unary(op2(CHAR, NIL, (Node *) HAT)));
624: case '$':
625: rtok = relex();
626: return (unary(op2(CHAR, NIL, NIL)));
627: case '(':
628: rtok = relex();
629: if (rtok == ')') { /* special pleading for () */
630: rtok = relex();
631: return unary(op2(CCL, NIL, (Node *) tostring("")));
632: }
633: np = regexp();
634: if (rtok == ')') {
635: rtok = relex();
636: return (unary(np));
637: }
638: else
639: ERROR "syntax error in regular expression %s at %s", lastre, prestr FATAL;
640: default:
641: ERROR "illegal primary in regular expression %s at %s", lastre, prestr FATAL;
642: }
643: return 0; /*NOTREACHED*/
644: }
645:
646: Node *concat(Node *np)
647: {
648: switch (rtok) {
649: case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
650: return (concat(op2(CAT, np, primary())));
651: }
652: return (np);
653: }
654:
655: Node *alt(Node *np)
656: {
657: if (rtok == OR) {
658: rtok = relex();
659: return (alt(op2(OR, np, concat(primary()))));
660: }
661: return (np);
662: }
663:
664: Node *unary(Node *np)
665: {
666: switch (rtok) {
667: case STAR:
668: rtok = relex();
669: return (unary(op2(STAR, np, NIL)));
670: case PLUS:
671: rtok = relex();
672: return (unary(op2(PLUS, np, NIL)));
673: case QUEST:
674: rtok = relex();
675: return (unary(op2(QUEST, np, NIL)));
676: default:
677: return (np);
678: }
679: }
680:
681: int relex(void) /* lexical analyzer for reparse */
682: {
1.5 ! kstailey 683: int c, n;
1.1 tholo 684: int cflag;
1.5 ! kstailey 685: static char *buf = 0;
! 686: static int bufsz = 100;
! 687: char *bp;
1.1 tholo 688:
689: switch (c = *prestr++) {
690: case '|': return OR;
691: case '*': return STAR;
692: case '+': return PLUS;
693: case '?': return QUEST;
694: case '.': return DOT;
695: case '\0': prestr--; return '\0';
696: case '^':
697: case '$':
698: case '(':
699: case ')':
700: return c;
701: case '\\':
702: rlxval = quoted(&prestr);
703: return CHAR;
704: default:
705: rlxval = c;
706: return CHAR;
707: case '[':
1.5 ! kstailey 708: if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
! 709: ERROR "out of space in reg expr %.10s..", lastre FATAL;
! 710: bp = buf;
1.1 tholo 711: if (*prestr == '^') {
712: cflag = 1;
713: prestr++;
714: }
715: else
716: cflag = 0;
1.5 ! kstailey 717: n = 2 * strlen(prestr)+1;
! 718: if (!adjbuf(&buf, &bufsz, n, n, &bp, 0))
! 719: ERROR "out of space for reg expr %.10s...", lastre FATAL;
1.1 tholo 720: for (; ; ) {
721: if ((c = *prestr++) == '\\') {
1.5 ! kstailey 722: *bp++ = '\\';
1.1 tholo 723: if ((c = *prestr++) == '\0')
724: ERROR "nonterminated character class %.20s...", lastre FATAL;
1.5 ! kstailey 725: *bp++ = c;
1.1 tholo 726: } else if (c == '\n') {
727: ERROR "newline in character class %.20s...", lastre FATAL;
728: } else if (c == '\0') {
729: ERROR "nonterminated character class %.20s", lastre FATAL;
1.5 ! kstailey 730: } else if (bp == buf) { /* 1st char is special */
! 731: *bp++ = c;
1.1 tholo 732: } else if (c == ']') {
1.5 ! kstailey 733: *bp++ = 0;
! 734: rlxstr = tostring(buf);
1.1 tholo 735: if (cflag == 0)
736: return CCL;
737: else
738: return NCCL;
739: } else
1.5 ! kstailey 740: *bp++ = c;
1.1 tholo 741: }
742: }
743: }
744:
745: int cgoto(fa *f, int s, int c)
746: {
747: int i, j, k;
1.4 millert 748: int *p, *q;
1.1 tholo 749:
750: if (c < 0)
751: ERROR "can't happen: neg char %d in cgoto", c FATAL;
752: while (f->accept >= maxsetvec) { /* guessing here! */
753: maxsetvec *= 4;
754: setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
755: tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
756: if (setvec == 0 || tmpset == 0) { abort();
757: overflo("out of space in cgoto()");
758: }
759: }
760: for (i = 0; i <= f->accept; i++)
761: setvec[i] = 0;
762: setcnt = 0;
763: /* compute positions of gototab[s,c] into setvec */
764: p = f->posns[s];
765: for (i = 1; i <= *p; i++) {
766: if ((k = f->re[p[i]].ltype) != FINAL) {
767: if ((k == CHAR && c == f->re[p[i]].lval.i)
768: || (k == DOT && c != 0 && c != HAT)
769: || (k == ALL && c != 0)
770: || (k == CCL && member(c, f->re[p[i]].lval.up))
771: || (k == NCCL && !member(c, f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
772: q = f->re[p[i]].lfollow;
773: for (j = 1; j <= *q; j++) {
774: if (q[j] >= maxsetvec) {
775: maxsetvec *= 4;
776: setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
777: tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int));
778: if (setvec == 0 || tmpset == 0)
779: overflo("cgoto overflow");
780: }
781: if (setvec[q[j]] == 0) {
782: setcnt++;
783: setvec[q[j]] = 1;
784: }
785: }
786: }
787: }
788: }
789: /* determine if setvec is a previous state */
790: tmpset[0] = setcnt;
791: j = 1;
792: for (i = f->accept; i >= 0; i--)
793: if (setvec[i]) {
794: tmpset[j++] = i;
795: }
796: /* tmpset == previous state? */
797: for (i = 1; i <= f->curstat; i++) {
798: p = f->posns[i];
799: if ((k = tmpset[0]) != p[0])
800: goto different;
801: for (j = 1; j <= k; j++)
802: if (tmpset[j] != p[j])
803: goto different;
804: /* setvec is state i */
805: f->gototab[s][c] = i;
806: return i;
807: different:;
808: }
809:
810: /* add tmpset to current set of states */
811: if (f->curstat >= NSTATES-1) {
812: f->curstat = 2;
813: f->reset = 1;
814: for (i = 2; i < NSTATES; i++)
815: xfree(f->posns[i]);
816: } else
817: ++(f->curstat);
818: for (i = 0; i < NCHARS; i++)
819: f->gototab[f->curstat][i] = 0;
820: xfree(f->posns[f->curstat]);
1.4 millert 821: if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
1.1 tholo 822: overflo("out of space in cgoto");
823:
824: f->posns[f->curstat] = p;
825: f->gototab[s][c] = f->curstat;
826: for (i = 0; i <= setcnt; i++)
827: p[i] = tmpset[i];
828: if (setvec[f->accept])
829: f->out[f->curstat] = 1;
830: else
831: f->out[f->curstat] = 0;
832: return f->curstat;
833: }
834:
835:
836: void freefa(fa *f) /* free a finite automaton */
837: {
838: int i;
839:
840: if (f == NULL)
841: return;
842: for (i = 0; i <= f->curstat; i++)
843: xfree(f->posns[i]);
844: for (i = 0; i <= f->accept; i++) {
845: xfree(f->re[i].lfollow);
846: if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
847: xfree((f->re[i].lval.np));
848: }
849: xfree(f->restr);
850: xfree(f);
851: }