Annotation of src/usr.bin/awk/b.c, Revision 1.29
1.29 ! millert 1: /* $OpenBSD: b.c,v 1.28 2020/06/10 21:03:36 millert Exp $ */
1.1 tholo 2: /****************************************************************
1.5 kstailey 3: Copyright (C) Lucent Technologies 1997
1.1 tholo 4: All Rights Reserved
5:
6: Permission to use, copy, modify, and distribute this software and
7: its documentation for any purpose and without fee is hereby
8: granted, provided that the above copyright notice appear in all
9: copies and that both that the copyright notice and this
10: permission notice and warranty disclaimer appear in supporting
1.5 kstailey 11: documentation, and that the name Lucent Technologies or any of
12: its entities not be used in advertising or publicity pertaining
13: to distribution of the software without specific, written prior
14: permission.
15:
16: LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17: INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18: IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19: SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20: WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21: IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22: ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23: THIS SOFTWARE.
1.1 tholo 24: ****************************************************************/
25:
1.15 millert 26: /* lasciate ogne speranza, voi ch'intrate. */
1.1 tholo 27:
28: #define DEBUG
29:
30: #include <ctype.h>
1.23 millert 31: #include <limits.h>
1.1 tholo 32: #include <stdio.h>
33: #include <string.h>
34: #include <stdlib.h>
35: #include "awk.h"
1.5 kstailey 36: #include "ytab.h"
1.1 tholo 37:
38: #define MAXLIN 22
39:
1.7 millert 40: #define type(v) (v)->nobj /* badly overloaded here */
41: #define info(v) (v)->ntype /* badly overloaded here */
1.1 tholo 42: #define left(v) (v)->narg[0]
43: #define right(v) (v)->narg[1]
44: #define parent(v) (v)->nnext
45:
46: #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
1.15 millert 47: #define ELEAF case EMPTYRE: /* empty string in regexp */
1.1 tholo 48: #define UNARY case STAR: case PLUS: case QUEST:
49:
50: /* encoding in tree Nodes:
1.15 millert 51: leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE):
1.1 tholo 52: left is index, right contains value or pointer to value
53: unary (STAR, PLUS, QUEST): left is child, right is null
54: binary (CAT, OR): left and right are children
55: parent contains pointer to parent
56: */
57:
58:
59: int *setvec;
60: int *tmpset;
61: int maxsetvec = 0;
62:
63: int rtok; /* next token in current re */
1.4 millert 64: int rlxval;
1.28 millert 65: static const uschar *rlxstr;
66: static const uschar *prestr; /* current position in current re */
67: static const uschar *lastre; /* origin of last re */
68: static const uschar *lastatom; /* origin of last Atom */
69: static const uschar *starttok;
70: static const uschar *basestr; /* starts with original, replaced during
1.23 millert 71: repetition processing */
1.28 millert 72: static const uschar *firstbasestr;
1.1 tholo 73:
1.4 millert 74: static int setcnt;
75: static int poscnt;
1.1 tholo 76:
1.28 millert 77: const char *patbeg;
1.1 tholo 78: int patlen;
79:
1.27 millert 80: #define NFA 128 /* cache this many dynamic fa's */
1.1 tholo 81: fa *fatab[NFA];
82: int nfatab = 0; /* entries in fatab */
83:
1.27 millert 84: static int *
85: intalloc(size_t n, const char *f)
86: {
87: void *p = calloc(n, sizeof(int));
88: if (p == NULL)
89: overflo(f);
90: return p;
91: }
92:
93: static void
94: allocsetvec(const char *f)
95: {
96: maxsetvec = MAXLIN;
97: setvec = reallocarray(setvec, maxsetvec, sizeof(*setvec));
98: tmpset = reallocarray(tmpset, maxsetvec, sizeof(*tmpset));
99: if (setvec == NULL || tmpset == NULL)
100: overflo(f);
101: }
102:
103: static void
104: resizesetvec(const char *f)
105: {
106: setvec = reallocarray(setvec, maxsetvec, 4 * sizeof(*setvec));
107: tmpset = reallocarray(tmpset, maxsetvec, 4 * sizeof(*tmpset));
108: if (setvec == NULL || tmpset == NULL)
109: overflo(f);
110: maxsetvec *= 4;
111: }
112:
113: static void
114: resize_state(fa *f, int state)
115: {
116: void *p;
117: int i, new_count;
118:
119: if (++state < f->state_count)
120: return;
121:
122: new_count = state + 10; /* needs to be tuned */
123:
124: p = reallocarray(f->gototab, new_count, sizeof(f->gototab[0]));
125: if (p == NULL)
126: goto out;
127: f->gototab = p;
128:
129: p = reallocarray(f->out, new_count, sizeof(f->out[0]));
130: if (p == NULL)
131: goto out;
132: f->out = p;
133:
134: p = reallocarray(f->posns, new_count, sizeof(f->posns[0]));
135: if (p == NULL)
136: goto out;
137: f->posns = p;
138:
139: for (i = f->state_count; i < new_count; ++i) {
140: f->gototab[i] = calloc(NCHARS, sizeof(**f->gototab));
141: if (f->gototab[i] == NULL)
142: goto out;
143: f->out[i] = 0;
144: f->posns[i] = NULL;
145: }
146: f->state_count = new_count;
147: return;
148: out:
149: overflo(__func__);
150: }
151:
1.29 ! millert 152: fa *makedfa(const char *s, bool anchor) /* returns dfa for reg expr s */
1.1 tholo 153: {
154: int i, use, nuse;
155: fa *pfa;
1.6 millert 156: static int now = 1;
1.1 tholo 157:
1.25 millert 158: if (setvec == NULL) { /* first time through any RE */
1.27 millert 159: allocsetvec(__func__);
1.1 tholo 160: }
161:
1.29 ! millert 162: if (compile_time != RUNNING) /* a constant for sure */
1.1 tholo 163: return mkdfa(s, anchor);
164: for (i = 0; i < nfatab; i++) /* is it there already? */
165: if (fatab[i]->anchor == anchor
1.11 millert 166: && strcmp((const char *) fatab[i]->restr, s) == 0) {
1.6 millert 167: fatab[i]->use = now++;
1.1 tholo 168: return fatab[i];
1.10 millert 169: }
1.1 tholo 170: pfa = mkdfa(s, anchor);
171: if (nfatab < NFA) { /* room for another */
172: fatab[nfatab] = pfa;
1.6 millert 173: fatab[nfatab]->use = now++;
1.1 tholo 174: nfatab++;
175: return pfa;
176: }
177: use = fatab[0]->use; /* replace least-recently used */
178: nuse = 0;
179: for (i = 1; i < nfatab; i++)
180: if (fatab[i]->use < use) {
181: use = fatab[i]->use;
182: nuse = i;
183: }
184: freefa(fatab[nuse]);
185: fatab[nuse] = pfa;
1.6 millert 186: pfa->use = now++;
1.1 tholo 187: return pfa;
188: }
189:
1.29 ! millert 190: fa *mkdfa(const char *s, bool anchor) /* does the real work of making a dfa */
1.1 tholo 191: /* anchor = 1 for anchored matches, else 0 */
192: {
193: Node *p, *p1;
194: fa *f;
195:
1.28 millert 196: firstbasestr = (const uschar *) s;
1.23 millert 197: basestr = firstbasestr;
1.1 tholo 198: p = reparse(s);
199: p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
200: /* put ALL STAR in front of reg. exp. */
201: p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
202: /* put FINAL after reg. exp. */
203:
204: poscnt = 0;
205: penter(p1); /* enter parent pointers and leaf indices */
1.28 millert 206: if ((f = calloc(1, sizeof(fa) + poscnt * sizeof(rrow))) == NULL)
1.27 millert 207: overflo(__func__);
1.1 tholo 208: f->accept = poscnt-1; /* penter has computed number of positions in re */
209: cfoll(f, p1); /* set up follow sets */
210: freetr(p1);
1.27 millert 211: resize_state(f, 1);
212: f->posns[0] = intalloc(*(f->re[0].lfollow), __func__);
213: f->posns[1] = intalloc(1, __func__);
1.1 tholo 214: *f->posns[1] = 0;
215: f->initstat = makeinit(f, anchor);
216: f->anchor = anchor;
1.10 millert 217: f->restr = (uschar *) tostring(s);
1.23 millert 218: if (firstbasestr != basestr) {
219: if (basestr)
220: xfree(basestr);
221: }
1.1 tholo 222: return f;
223: }
224:
1.29 ! millert 225: int makeinit(fa *f, bool anchor)
1.1 tholo 226: {
227: int i, k;
228:
229: f->curstat = 2;
230: f->out[2] = 0;
231: k = *(f->re[0].lfollow);
1.25 millert 232: xfree(f->posns[2]);
1.27 millert 233: f->posns[2] = intalloc(k + 1, __func__);
1.1 tholo 234: for (i=0; i <= k; i++) {
235: (f->posns[2])[i] = (f->re[0].lfollow)[i];
236: }
237: if ((f->posns[2])[1] == f->accept)
238: f->out[2] = 1;
239: for (i=0; i < NCHARS; i++)
240: f->gototab[2][i] = 0;
241: f->curstat = cgoto(f, 2, HAT);
242: if (anchor) {
243: *f->posns[2] = k-1; /* leave out position 0 */
244: for (i=0; i < k; i++) {
245: (f->posns[0])[i] = (f->posns[2])[i];
246: }
247:
248: f->out[0] = f->out[2];
249: if (f->curstat != 2)
250: --(*f->posns[f->curstat]);
251: }
252: return f->curstat;
253: }
254:
255: void penter(Node *p) /* set up parent pointers and leaf indices */
256: {
257: switch (type(p)) {
1.15 millert 258: ELEAF
1.1 tholo 259: LEAF
1.7 millert 260: info(p) = poscnt;
1.1 tholo 261: poscnt++;
262: break;
263: UNARY
264: penter(left(p));
265: parent(left(p)) = p;
266: break;
267: case CAT:
268: case OR:
269: penter(left(p));
270: penter(right(p));
271: parent(left(p)) = p;
272: parent(right(p)) = p;
273: break;
274: default: /* can't happen */
1.8 millert 275: FATAL("can't happen: unknown type %d in penter", type(p));
1.1 tholo 276: break;
277: }
278: }
279:
280: void freetr(Node *p) /* free parse tree */
281: {
282: switch (type(p)) {
1.15 millert 283: ELEAF
1.1 tholo 284: LEAF
285: xfree(p);
286: break;
287: UNARY
288: freetr(left(p));
289: xfree(p);
290: break;
291: case CAT:
292: case OR:
293: freetr(left(p));
294: freetr(right(p));
295: xfree(p);
296: break;
297: default: /* can't happen */
1.8 millert 298: FATAL("can't happen: unknown type %d in freetr", type(p));
1.1 tholo 299: break;
300: }
301: }
302:
303: /* in the parsing of regular expressions, metacharacters like . have */
304: /* to be seen literally; \056 is not a metacharacter. */
305:
1.28 millert 306: int hexstr(const uschar **pp) /* find and eval hex string at pp, return new p */
1.2 millert 307: { /* only pick up one 8-bit byte (2 chars) */
1.28 millert 308: const uschar *p;
1.1 tholo 309: int n = 0;
1.2 millert 310: int i;
1.1 tholo 311:
1.28 millert 312: for (i = 0, p = *pp; i < 2 && isxdigit(*p); i++, p++) {
1.1 tholo 313: if (isdigit(*p))
314: n = 16 * n + *p - '0';
315: else if (*p >= 'a' && *p <= 'f')
316: n = 16 * n + *p - 'a' + 10;
317: else if (*p >= 'A' && *p <= 'F')
318: n = 16 * n + *p - 'A' + 10;
319: }
1.28 millert 320: *pp = p;
1.1 tholo 321: return n;
322: }
323:
1.2 millert 324: #define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */
1.1 tholo 325:
1.28 millert 326: int quoted(const uschar **pp) /* pick up next thing after a \\ */
1.1 tholo 327: /* and increment *pp */
328: {
1.28 millert 329: const uschar *p = *pp;
1.1 tholo 330: int c;
331:
332: if ((c = *p++) == 't')
333: c = '\t';
334: else if (c == 'n')
335: c = '\n';
336: else if (c == 'f')
337: c = '\f';
338: else if (c == 'r')
339: c = '\r';
340: else if (c == 'b')
341: c = '\b';
1.25 millert 342: else if (c == 'v')
343: c = '\v';
1.20 millert 344: else if (c == 'a')
1.25 millert 345: c = '\a';
1.1 tholo 346: else if (c == '\\')
347: c = '\\';
348: else if (c == 'x') { /* hexadecimal goo follows */
349: c = hexstr(&p); /* this adds a null if number is invalid */
350: } else if (isoctdigit(c)) { /* \d \dd \ddd */
351: int n = c - '0';
352: if (isoctdigit(*p)) {
353: n = 8 * n + *p++ - '0';
354: if (isoctdigit(*p))
355: n = 8 * n + *p++ - '0';
356: }
357: c = n;
358: } /* else */
359: /* c = c; */
360: *pp = p;
361: return c;
362: }
363:
1.11 millert 364: char *cclenter(const char *argp) /* add a character class */
1.1 tholo 365: {
366: int i, c, c2;
1.28 millert 367: const uschar *op, *p = (const uschar *) argp;
368: uschar *bp;
1.25 millert 369: static uschar *buf = NULL;
1.5 kstailey 370: static int bufsz = 100;
1.1 tholo 371:
372: op = p;
1.27 millert 373: if (buf == NULL && (buf = malloc(bufsz)) == NULL)
1.8 millert 374: FATAL("out of space for character class [%.10s...] 1", p);
1.5 kstailey 375: bp = buf;
376: for (i = 0; (c = *p++) != 0; ) {
1.1 tholo 377: if (c == '\\') {
1.17 millert 378: c = quoted(&p);
1.5 kstailey 379: } else if (c == '-' && i > 0 && bp[-1] != 0) {
1.1 tholo 380: if (*p != 0) {
1.5 kstailey 381: c = bp[-1];
1.1 tholo 382: c2 = *p++;
383: if (c2 == '\\')
1.17 millert 384: c2 = quoted(&p);
1.1 tholo 385: if (c > c2) { /* empty; ignore */
1.5 kstailey 386: bp--;
1.1 tholo 387: i--;
388: continue;
389: }
390: while (c < c2) {
1.15 millert 391: if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter1"))
1.8 millert 392: FATAL("out of space for character class [%.10s...] 2", p);
1.5 kstailey 393: *bp++ = ++c;
1.1 tholo 394: i++;
395: }
396: continue;
397: }
398: }
1.15 millert 399: if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter2"))
1.8 millert 400: FATAL("out of space for character class [%.10s...] 3", p);
1.5 kstailey 401: *bp++ = c;
1.1 tholo 402: i++;
403: }
1.5 kstailey 404: *bp = 0;
1.19 deraadt 405: DPRINTF( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
1.1 tholo 406: xfree(op);
1.10 millert 407: return (char *) tostring((char *) buf);
1.1 tholo 408: }
409:
1.11 millert 410: void overflo(const char *s)
1.1 tholo 411: {
1.27 millert 412: FATAL("regular expression too big: out of space in %.30s...", s);
1.1 tholo 413: }
414:
415: void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */
416: {
417: int i;
1.4 millert 418: int *p;
1.1 tholo 419:
420: switch (type(v)) {
1.15 millert 421: ELEAF
1.1 tholo 422: LEAF
1.7 millert 423: f->re[info(v)].ltype = type(v);
424: f->re[info(v)].lval.np = right(v);
1.1 tholo 425: while (f->accept >= maxsetvec) { /* guessing here! */
1.27 millert 426: resizesetvec(__func__);
1.1 tholo 427: }
428: for (i = 0; i <= f->accept; i++)
429: setvec[i] = 0;
430: setcnt = 0;
431: follow(v); /* computes setvec and setcnt */
1.27 millert 432: p = intalloc(setcnt + 1, __func__);
1.7 millert 433: f->re[info(v)].lfollow = p;
1.1 tholo 434: *p = setcnt;
435: for (i = f->accept; i >= 0; i--)
436: if (setvec[i] == 1)
437: *++p = i;
438: break;
439: UNARY
440: cfoll(f,left(v));
441: break;
442: case CAT:
443: case OR:
444: cfoll(f,left(v));
445: cfoll(f,right(v));
446: break;
447: default: /* can't happen */
1.8 millert 448: FATAL("can't happen: unknown type %d in cfoll", type(v));
1.1 tholo 449: }
450: }
451:
452: int first(Node *p) /* collects initially active leaves of p into setvec */
1.15 millert 453: /* returns 0 if p matches empty string */
1.1 tholo 454: {
1.4 millert 455: int b, lp;
1.1 tholo 456:
457: switch (type(p)) {
1.15 millert 458: ELEAF
1.1 tholo 459: LEAF
1.7 millert 460: lp = info(p); /* look for high-water mark of subscripts */
1.1 tholo 461: while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */
1.27 millert 462: resizesetvec(__func__);
1.1 tholo 463: }
1.15 millert 464: if (type(p) == EMPTYRE) {
465: setvec[lp] = 0;
466: return(0);
467: }
1.1 tholo 468: if (setvec[lp] != 1) {
469: setvec[lp] = 1;
470: setcnt++;
471: }
472: if (type(p) == CCL && (*(char *) right(p)) == '\0')
473: return(0); /* empty CCL */
474: else return(1);
475: case PLUS:
476: if (first(left(p)) == 0) return(0);
477: return(1);
478: case STAR:
479: case QUEST:
480: first(left(p));
481: return(0);
482: case CAT:
483: if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
484: return(1);
485: case OR:
486: b = first(right(p));
487: if (first(left(p)) == 0 || b == 0) return(0);
488: return(1);
489: }
1.8 millert 490: FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */
1.1 tholo 491: return(-1);
492: }
493:
494: void follow(Node *v) /* collects leaves that can follow v into setvec */
495: {
496: Node *p;
497:
498: if (type(v) == FINAL)
499: return;
500: p = parent(v);
501: switch (type(p)) {
502: case STAR:
503: case PLUS:
504: first(v);
505: follow(p);
506: return;
507:
508: case OR:
509: case QUEST:
510: follow(p);
511: return;
512:
513: case CAT:
514: if (v == left(p)) { /* v is left child of p */
515: if (first(right(p)) == 0) {
516: follow(p);
517: return;
518: }
519: } else /* v is right child */
520: follow(p);
521: return;
522: }
523: }
524:
1.11 millert 525: int member(int c, const char *sarg) /* is c in s? */
1.1 tholo 526: {
1.28 millert 527: const uschar *s = (const uschar *) sarg;
1.10 millert 528:
1.1 tholo 529: while (*s)
530: if (c == *s++)
531: return(1);
532: return(0);
533: }
534:
1.11 millert 535: int match(fa *f, const char *p0) /* shortest match ? */
1.1 tholo 536: {
537: int s, ns;
1.28 millert 538: const uschar *p = (const uschar *) p0;
1.1 tholo 539:
1.27 millert 540: s = f->initstat;
541: assert (s < f->state_count);
542:
1.1 tholo 543: if (f->out[s])
544: return(1);
545: do {
1.15 millert 546: /* assert(*p < NCHARS); */
1.1 tholo 547: if ((ns = f->gototab[s][*p]) != 0)
548: s = ns;
549: else
550: s = cgoto(f, s, *p);
551: if (f->out[s])
552: return(1);
553: } while (*p++ != 0);
554: return(0);
555: }
556:
1.11 millert 557: int pmatch(fa *f, const char *p0) /* longest match, for sub */
1.1 tholo 558: {
559: int s, ns;
1.28 millert 560: const uschar *p = (const uschar *) p0;
561: const uschar *q;
1.1 tholo 562:
1.27 millert 563: s = f->initstat;
564: assert(s < f->state_count);
565:
1.28 millert 566: patbeg = (const char *)p;
1.1 tholo 567: patlen = -1;
568: do {
569: q = p;
570: do {
571: if (f->out[s]) /* final state */
572: patlen = q-p;
1.15 millert 573: /* assert(*q < NCHARS); */
1.1 tholo 574: if ((ns = f->gototab[s][*q]) != 0)
575: s = ns;
576: else
577: s = cgoto(f, s, *q);
1.27 millert 578:
579: assert(s < f->state_count);
580:
1.9 deraadt 581: if (s == 1) { /* no transition */
1.1 tholo 582: if (patlen >= 0) {
1.28 millert 583: patbeg = (const char *) p;
1.1 tholo 584: return(1);
585: }
586: else
587: goto nextin; /* no match */
1.9 deraadt 588: }
1.1 tholo 589: } while (*q++ != 0);
590: if (f->out[s])
591: patlen = q-p-1; /* don't count $ */
592: if (patlen >= 0) {
1.28 millert 593: patbeg = (const char *) p;
1.1 tholo 594: return(1);
595: }
596: nextin:
597: s = 2;
1.27 millert 598: } while (*p++);
1.1 tholo 599: return (0);
600: }
601:
1.11 millert 602: int nematch(fa *f, const char *p0) /* non-empty match, for sub */
1.1 tholo 603: {
604: int s, ns;
1.28 millert 605: const uschar *p = (const uschar *) p0;
606: const uschar *q;
1.1 tholo 607:
1.27 millert 608: s = f->initstat;
609: assert(s < f->state_count);
610:
1.28 millert 611: patbeg = (const char *)p;
1.1 tholo 612: patlen = -1;
613: while (*p) {
614: q = p;
615: do {
616: if (f->out[s]) /* final state */
617: patlen = q-p;
1.15 millert 618: /* assert(*q < NCHARS); */
1.1 tholo 619: if ((ns = f->gototab[s][*q]) != 0)
620: s = ns;
621: else
622: s = cgoto(f, s, *q);
1.9 deraadt 623: if (s == 1) { /* no transition */
1.1 tholo 624: if (patlen > 0) {
1.28 millert 625: patbeg = (const char *) p;
1.1 tholo 626: return(1);
627: } else
628: goto nnextin; /* no nonempty match */
1.9 deraadt 629: }
1.1 tholo 630: } while (*q++ != 0);
631: if (f->out[s])
632: patlen = q-p-1; /* don't count $ */
633: if (patlen > 0 ) {
1.28 millert 634: patbeg = (const char *) p;
1.1 tholo 635: return(1);
636: }
637: nnextin:
638: s = 2;
639: p++;
640: }
641: return (0);
642: }
643:
1.26 millert 644:
645: /*
646: * NAME
647: * fnematch
648: *
649: * DESCRIPTION
650: * A stream-fed version of nematch which transfers characters to a
651: * null-terminated buffer. All characters up to and including the last
652: * character of the matching text or EOF are placed in the buffer. If
653: * a match is found, patbeg and patlen are set appropriately.
654: *
655: * RETURN VALUES
1.29 ! millert 656: * false No match found.
! 657: * true Match found.
1.26 millert 658: */
659:
1.29 ! millert 660: bool fnematch(fa *pfa, FILE *f, char **pbuf, int *pbufsize, int quantum)
1.26 millert 661: {
662: char *buf = *pbuf;
663: int bufsize = *pbufsize;
664: int c, i, j, k, ns, s;
665:
666: s = pfa->initstat;
667: patlen = 0;
668:
669: /*
670: * All indices relative to buf.
671: * i <= j <= k <= bufsize
672: *
673: * i: origin of active substring
674: * j: current character
675: * k: destination of next getc()
676: */
677: i = -1, k = 0;
678: do {
679: j = i++;
680: do {
681: if (++j == k) {
682: if (k == bufsize)
683: if (!adjbuf(&buf, &bufsize, bufsize+1, quantum, 0, "fnematch"))
684: FATAL("stream '%.30s...' too long", buf);
685: buf[k++] = (c = getc(f)) != EOF ? c : 0;
686: }
687: c = buf[j];
688: /* assert(c < NCHARS); */
689:
690: if ((ns = pfa->gototab[s][c]) != 0)
691: s = ns;
692: else
693: s = cgoto(pfa, s, c);
694:
695: if (pfa->out[s]) { /* final state */
696: patlen = j - i + 1;
697: if (c == 0) /* don't count $ */
698: patlen--;
699: }
700: } while (buf[j] && s != 1);
701: s = 2;
702: } while (buf[i] && !patlen);
703:
704: /* adjbuf() may have relocated a resized buffer. Inform the world. */
705: *pbuf = buf;
706: *pbufsize = bufsize;
707:
708: if (patlen) {
709: patbeg = buf + i;
710: /*
711: * Under no circumstances is the last character fed to
712: * the automaton part of the match. It is EOF's nullbyte,
713: * or it sent the automaton into a state with no further
714: * transitions available (s==1), or both. Room for a
715: * terminating nullbyte is guaranteed.
716: *
717: * ungetc any chars after the end of matching text
718: * (except for EOF's nullbyte, if present) and null
719: * terminate the buffer.
720: */
721: do
722: if (buf[--k] && ungetc(buf[k], f) == EOF)
723: FATAL("unable to ungetc '%c'", buf[k]);
724: while (k > i + patlen);
725: buf[k] = 0;
1.29 ! millert 726: return true;
1.26 millert 727: }
728: else
1.29 ! millert 729: return false;
1.26 millert 730: }
731:
1.11 millert 732: Node *reparse(const char *p) /* parses regular expression pointed to by p */
1.1 tholo 733: { /* uses relex() to scan regular expression */
734: Node *np;
735:
1.19 deraadt 736: DPRINTF( ("reparse <%s>\n", p) );
1.28 millert 737: lastre = prestr = (const uschar *) p; /* prestr points to string to be parsed */
1.1 tholo 738: rtok = relex();
1.11 millert 739: /* GNU compatibility: an empty regexp matches anything */
1.15 millert 740: if (rtok == '\0') {
1.11 millert 741: /* FATAL("empty regular expression"); previous */
1.15 millert 742: return(op2(EMPTYRE, NIL, NIL));
743: }
1.1 tholo 744: np = regexp();
745: if (rtok != '\0')
1.8 millert 746: FATAL("syntax error in regular expression %s at %s", lastre, prestr);
1.1 tholo 747: return(np);
748: }
749:
750: Node *regexp(void) /* top-level parse of reg expr */
751: {
752: return (alt(concat(primary())));
753: }
754:
755: Node *primary(void)
756: {
757: Node *np;
1.23 millert 758: int savelastatom;
1.1 tholo 759:
760: switch (rtok) {
761: case CHAR:
1.23 millert 762: lastatom = starttok;
1.7 millert 763: np = op2(CHAR, NIL, itonp(rlxval));
1.1 tholo 764: rtok = relex();
765: return (unary(np));
766: case ALL:
767: rtok = relex();
768: return (unary(op2(ALL, NIL, NIL)));
1.15 millert 769: case EMPTYRE:
770: rtok = relex();
1.23 millert 771: return (unary(op2(EMPTYRE, NIL, NIL)));
1.1 tholo 772: case DOT:
1.23 millert 773: lastatom = starttok;
1.1 tholo 774: rtok = relex();
775: return (unary(op2(DOT, NIL, NIL)));
776: case CCL:
1.28 millert 777: np = op2(CCL, NIL, (Node*) cclenter((const char *) rlxstr));
1.23 millert 778: lastatom = starttok;
1.1 tholo 779: rtok = relex();
780: return (unary(np));
781: case NCCL:
1.28 millert 782: np = op2(NCCL, NIL, (Node *) cclenter((const char *) rlxstr));
1.23 millert 783: lastatom = starttok;
1.1 tholo 784: rtok = relex();
785: return (unary(np));
786: case '^':
787: rtok = relex();
1.7 millert 788: return (unary(op2(CHAR, NIL, itonp(HAT))));
1.1 tholo 789: case '$':
790: rtok = relex();
791: return (unary(op2(CHAR, NIL, NIL)));
792: case '(':
1.23 millert 793: lastatom = starttok;
794: savelastatom = starttok - basestr; /* Retain over recursion */
1.1 tholo 795: rtok = relex();
796: if (rtok == ')') { /* special pleading for () */
797: rtok = relex();
798: return unary(op2(CCL, NIL, (Node *) tostring("")));
799: }
800: np = regexp();
801: if (rtok == ')') {
1.23 millert 802: lastatom = basestr + savelastatom; /* Restore */
1.1 tholo 803: rtok = relex();
804: return (unary(np));
805: }
806: else
1.8 millert 807: FATAL("syntax error in regular expression %s at %s", lastre, prestr);
1.1 tholo 808: default:
1.8 millert 809: FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
1.1 tholo 810: }
811: return 0; /*NOTREACHED*/
812: }
813:
814: Node *concat(Node *np)
815: {
816: switch (rtok) {
1.23 millert 817: case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
1.1 tholo 818: return (concat(op2(CAT, np, primary())));
1.23 millert 819: case EMPTYRE:
820: rtok = relex();
821: return (concat(op2(CAT, op2(CCL, NIL, (Node *) tostring("")),
822: primary())));
1.1 tholo 823: }
824: return (np);
825: }
826:
827: Node *alt(Node *np)
828: {
829: if (rtok == OR) {
830: rtok = relex();
831: return (alt(op2(OR, np, concat(primary()))));
832: }
833: return (np);
834: }
835:
836: Node *unary(Node *np)
837: {
838: switch (rtok) {
839: case STAR:
840: rtok = relex();
841: return (unary(op2(STAR, np, NIL)));
842: case PLUS:
843: rtok = relex();
844: return (unary(op2(PLUS, np, NIL)));
845: case QUEST:
846: rtok = relex();
847: return (unary(op2(QUEST, np, NIL)));
848: default:
849: return (np);
850: }
851: }
852:
1.11 millert 853: /*
854: * Character class definitions conformant to the POSIX locale as
855: * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
856: * and operating character sets are both ASCII (ISO646) or supersets
857: * thereof.
858: *
859: * Note that to avoid overflowing the temporary buffer used in
860: * relex(), the expanded character class (prior to range expansion)
861: * must be less than twice the size of their full name.
862: */
1.12 millert 863:
864: /* Because isblank doesn't show up in any of the header files on any
865: * system i use, it's defined here. if some other locale has a richer
866: * definition of "blank", define HAS_ISBLANK and provide your own
867: * version.
868: * the parentheses here are an attempt to find a path through the maze
869: * of macro definition and/or function and/or version provided. thanks
870: * to nelson beebe for the suggestion; let's see if it works everywhere.
871: */
872:
873: #ifndef HAS_ISBLANK
874:
1.17 millert 875: int (xisblank)(int c)
1.12 millert 876: {
877: return c==' ' || c=='\t';
878: }
879:
880: #endif
881:
1.11 millert 882: struct charclass {
883: const char *cc_name;
884: int cc_namelen;
1.12 millert 885: int (*cc_func)(int);
1.11 millert 886: } charclasses[] = {
1.12 millert 887: { "alnum", 5, isalnum },
888: { "alpha", 5, isalpha },
1.17 millert 889: #ifndef HAS_ISBLANK
1.21 millert 890: { "blank", 5, xisblank },
1.17 millert 891: #else
1.12 millert 892: { "blank", 5, isblank },
1.17 millert 893: #endif
1.12 millert 894: { "cntrl", 5, iscntrl },
895: { "digit", 5, isdigit },
896: { "graph", 5, isgraph },
897: { "lower", 5, islower },
898: { "print", 5, isprint },
899: { "punct", 5, ispunct },
900: { "space", 5, isspace },
901: { "upper", 5, isupper },
902: { "xdigit", 6, isxdigit },
1.11 millert 903: { NULL, 0, NULL },
904: };
905:
1.23 millert 906: #define REPEAT_SIMPLE 0
907: #define REPEAT_PLUS_APPENDED 1
908: #define REPEAT_WITH_Q 2
909: #define REPEAT_ZERO 3
910:
911: static int
912: replace_repeat(const uschar *reptok, int reptoklen, const uschar *atom,
913: int atomlen, int firstnum, int secondnum, int special_case)
914: {
915: int i, j;
1.26 millert 916: uschar *buf = NULL;
1.23 millert 917: int ret = 1;
1.29 ! millert 918: int init_q = (firstnum == 0); /* first added char will be ? */
1.23 millert 919: int n_q_reps = secondnum-firstnum; /* m>n, so reduce until {1,m-n} left */
920: int prefix_length = reptok - basestr; /* prefix includes first rep */
1.28 millert 921: int suffix_length = strlen((const char *) reptok) - reptoklen; /* string after rep specifier */
1.23 millert 922: int size = prefix_length + suffix_length;
923:
924: if (firstnum > 1) { /* add room for reps 2 through firstnum */
925: size += atomlen*(firstnum-1);
926: }
927:
928: /* Adjust size of buffer for special cases */
929: if (special_case == REPEAT_PLUS_APPENDED) {
930: size++; /* for the final + */
931: } else if (special_case == REPEAT_WITH_Q) {
932: size += init_q + (atomlen+1)* n_q_reps;
933: } else if (special_case == REPEAT_ZERO) {
934: size += 2; /* just a null ERE: () */
935: }
1.27 millert 936: if ((buf = malloc(size + 1)) == NULL)
1.23 millert 937: FATAL("out of space in reg expr %.10s..", lastre);
938: memcpy(buf, basestr, prefix_length); /* copy prefix */
939: j = prefix_length;
940: if (special_case == REPEAT_ZERO) {
941: j -= atomlen;
942: buf[j++] = '(';
943: buf[j++] = ')';
944: }
945: for (i=1; i < firstnum; i++) { /* copy x reps */
946: memcpy(&buf[j], atom, atomlen);
947: j += atomlen;
948: }
949: if (special_case == REPEAT_PLUS_APPENDED) {
950: buf[j++] = '+';
951: } else if (special_case == REPEAT_WITH_Q) {
1.29 ! millert 952: if (init_q)
! 953: buf[j++] = '?';
! 954: for (i = 0; i < n_q_reps; i++) { /* copy x? reps */
1.23 millert 955: memcpy(&buf[j], atom, atomlen);
956: j += atomlen;
957: buf[j++] = '?';
958: }
959: }
960: memcpy(&buf[j], reptok+reptoklen, suffix_length);
961: if (special_case == REPEAT_ZERO) {
962: buf[j+suffix_length] = '\0';
963: } else {
964: buf[size] = '\0';
965: }
966: /* free old basestr */
967: if (firstbasestr != basestr) {
968: if (basestr)
969: xfree(basestr);
970: }
971: basestr = buf;
972: prestr = buf + prefix_length;
973: if (special_case == REPEAT_ZERO) {
974: prestr -= atomlen;
975: ret++;
976: }
977: return ret;
978: }
979:
980: static int repeat(const uschar *reptok, int reptoklen, const uschar *atom,
981: int atomlen, int firstnum, int secondnum)
982: {
983: /*
984: In general, the repetition specifier or "bound" is replaced here
985: by an equivalent ERE string, repeating the immediately previous atom
986: and appending ? and + as needed. Note that the first copy of the
987: atom is left in place, except in the special_case of a zero-repeat
988: (i.e., {0}).
989: */
990: if (secondnum < 0) { /* means {n,} -> repeat n-1 times followed by PLUS */
991: if (firstnum < 2) {
992: /* 0 or 1: should be handled before you get here */
993: FATAL("internal error");
994: } else {
995: return replace_repeat(reptok, reptoklen, atom, atomlen,
996: firstnum, secondnum, REPEAT_PLUS_APPENDED);
997: }
998: } else if (firstnum == secondnum) { /* {n} or {n,n} -> simply repeat n-1 times */
999: if (firstnum == 0) { /* {0} or {0,0} */
1000: /* This case is unusual because the resulting
1001: replacement string might actually be SMALLER than
1002: the original ERE */
1003: return replace_repeat(reptok, reptoklen, atom, atomlen,
1004: firstnum, secondnum, REPEAT_ZERO);
1005: } else { /* (firstnum >= 1) */
1006: return replace_repeat(reptok, reptoklen, atom, atomlen,
1007: firstnum, secondnum, REPEAT_SIMPLE);
1008: }
1009: } else if (firstnum < secondnum) { /* {n,m} -> repeat n-1 times then alternate */
1010: /* x{n,m} => xx...x{1, m-n+1} => xx...x?x?x?..x? */
1011: return replace_repeat(reptok, reptoklen, atom, atomlen,
1012: firstnum, secondnum, REPEAT_WITH_Q);
1013: } else { /* Error - shouldn't be here (n>m) */
1014: FATAL("internal error");
1015: }
1016: return 0;
1017: }
1.11 millert 1018:
1.1 tholo 1019: int relex(void) /* lexical analyzer for reparse */
1020: {
1.5 kstailey 1021: int c, n;
1.1 tholo 1022: int cflag;
1.25 millert 1023: static uschar *buf = NULL;
1.5 kstailey 1024: static int bufsz = 100;
1.10 millert 1025: uschar *bp;
1.11 millert 1026: struct charclass *cc;
1.12 millert 1027: int i;
1.29 ! millert 1028: int num, m;
! 1029: bool commafound, digitfound;
1.23 millert 1030: const uschar *startreptok;
1.24 millert 1031: static int parens = 0;
1.23 millert 1032:
1033: rescan:
1034: starttok = prestr;
1.1 tholo 1035:
1036: switch (c = *prestr++) {
1037: case '|': return OR;
1038: case '*': return STAR;
1039: case '+': return PLUS;
1040: case '?': return QUEST;
1041: case '.': return DOT;
1042: case '\0': prestr--; return '\0';
1043: case '^':
1044: case '$':
1.24 millert 1045: return c;
1.1 tholo 1046: case '(':
1.24 millert 1047: parens++;
1048: return c;
1.1 tholo 1049: case ')':
1.24 millert 1050: if (parens) {
1051: parens--;
1052: return c;
1053: }
1054: /* unmatched close parenthesis; per POSIX, treat as literal */
1055: rlxval = c;
1056: return CHAR;
1.1 tholo 1057: case '\\':
1.17 millert 1058: rlxval = quoted(&prestr);
1.1 tholo 1059: return CHAR;
1060: default:
1061: rlxval = c;
1062: return CHAR;
1.25 millert 1063: case '[':
1.27 millert 1064: if (buf == NULL && (buf = malloc(bufsz)) == NULL)
1.8 millert 1065: FATAL("out of space in reg expr %.10s..", lastre);
1.5 kstailey 1066: bp = buf;
1.1 tholo 1067: if (*prestr == '^') {
1068: cflag = 1;
1069: prestr++;
1070: }
1071: else
1072: cflag = 0;
1.11 millert 1073: n = 2 * strlen((const char *) prestr)+1;
1.15 millert 1074: if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1"))
1.8 millert 1075: FATAL("out of space for reg expr %.10s...", lastre);
1.1 tholo 1076: for (; ; ) {
1077: if ((c = *prestr++) == '\\') {
1.5 kstailey 1078: *bp++ = '\\';
1.1 tholo 1079: if ((c = *prestr++) == '\0')
1.8 millert 1080: FATAL("nonterminated character class %.20s...", lastre);
1.5 kstailey 1081: *bp++ = c;
1.10 millert 1082: /* } else if (c == '\n') { */
1083: /* FATAL("newline in character class %.20s...", lastre); */
1.11 millert 1084: } else if (c == '[' && *prestr == ':') {
1085: /* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
1086: for (cc = charclasses; cc->cc_name; cc++)
1087: if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
1088: break;
1089: if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
1090: prestr[2 + cc->cc_namelen] == ']') {
1091: prestr += cc->cc_namelen + 3;
1.22 millert 1092: /*
1093: * BUG: We begin at 1, instead of 0, since we
1094: * would otherwise prematurely terminate the
1095: * string for classes like [[:cntrl:]]. This
1096: * means that we can't match the NUL character,
1097: * not without first adapting the entire
1098: * program to track each string's length.
1099: */
1.23 millert 1100: for (i = 1; i <= UCHAR_MAX; i++) {
1.15 millert 1101: if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2"))
1.12 millert 1102: FATAL("out of space for reg expr %.10s...", lastre);
1103: if (cc->cc_func(i)) {
1104: *bp++ = i;
1105: n++;
1106: }
1107: }
1.11 millert 1108: } else
1109: *bp++ = c;
1.23 millert 1110: } else if (c == '[' && *prestr == '.') {
1111: char collate_char;
1112: prestr++;
1113: collate_char = *prestr++;
1114: if (*prestr == '.' && prestr[1] == ']') {
1115: prestr += 2;
1116: /* Found it: map via locale TBD: for
1117: now, simply return this char. This
1118: is sufficient to pass conformance
1119: test awk.ex 156
1120: */
1121: if (*prestr == ']') {
1122: prestr++;
1123: rlxval = collate_char;
1124: return CHAR;
1125: }
1126: }
1127: } else if (c == '[' && *prestr == '=') {
1128: char equiv_char;
1129: prestr++;
1130: equiv_char = *prestr++;
1131: if (*prestr == '=' && prestr[1] == ']') {
1132: prestr += 2;
1133: /* Found it: map via locale TBD: for now
1134: simply return this char. This is
1135: sufficient to pass conformance test
1136: awk.ex 156
1137: */
1138: if (*prestr == ']') {
1139: prestr++;
1140: rlxval = equiv_char;
1141: return CHAR;
1142: }
1143: }
1.1 tholo 1144: } else if (c == '\0') {
1.8 millert 1145: FATAL("nonterminated character class %.20s", lastre);
1.5 kstailey 1146: } else if (bp == buf) { /* 1st char is special */
1147: *bp++ = c;
1.1 tholo 1148: } else if (c == ']') {
1.5 kstailey 1149: *bp++ = 0;
1.10 millert 1150: rlxstr = (uschar *) tostring((char *) buf);
1.1 tholo 1151: if (cflag == 0)
1152: return CCL;
1153: else
1154: return NCCL;
1155: } else
1.5 kstailey 1156: *bp++ = c;
1.1 tholo 1157: }
1.23 millert 1158: break;
1159: case '{':
1160: if (isdigit(*(prestr))) {
1161: num = 0; /* Process as a repetition */
1162: n = -1; m = -1;
1.29 ! millert 1163: commafound = false;
! 1164: digitfound = false;
1.23 millert 1165: startreptok = prestr-1;
1166: /* Remember start of previous atom here ? */
1167: } else { /* just a { char, not a repetition */
1168: rlxval = c;
1169: return CHAR;
1170: }
1171: for (; ; ) {
1172: if ((c = *prestr++) == '}') {
1173: if (commafound) {
1174: if (digitfound) { /* {n,m} */
1175: m = num;
1176: if (m<n)
1177: FATAL("illegal repetition expression: class %.20s",
1178: lastre);
1179: if ((n==0) && (m==1)) {
1180: return QUEST;
1181: }
1182: } else { /* {n,} */
1183: if (n==0) return STAR;
1184: if (n==1) return PLUS;
1185: }
1186: } else {
1187: if (digitfound) { /* {n} same as {n,n} */
1188: n = num;
1189: m = num;
1190: } else { /* {} */
1191: FATAL("illegal repetition expression: class %.20s",
1192: lastre);
1193: }
1194: }
1195: if (repeat(starttok, prestr-starttok, lastatom,
1196: startreptok - lastatom, n, m) > 0) {
1197: if ((n==0) && (m==0)) {
1198: return EMPTYRE;
1199: }
1200: /* must rescan input for next token */
1201: goto rescan;
1202: }
1203: /* Failed to replace: eat up {...} characters
1204: and treat like just PLUS */
1205: return PLUS;
1206: } else if (c == '\0') {
1207: FATAL("nonterminated character class %.20s",
1208: lastre);
1209: } else if (isdigit(c)) {
1210: num = 10 * num + c - '0';
1.29 ! millert 1211: digitfound = true;
1.23 millert 1212: } else if (c == ',') {
1213: if (commafound)
1214: FATAL("illegal repetition expression: class %.20s",
1215: lastre);
1216: /* looking for {n,} or {n,m} */
1.29 ! millert 1217: commafound = true;
1.23 millert 1218: n = num;
1.29 ! millert 1219: digitfound = false; /* reset */
1.23 millert 1220: num = 0;
1221: } else {
1222: FATAL("illegal repetition expression: class %.20s",
1223: lastre);
1224: }
1225: }
1226: break;
1.1 tholo 1227: }
1228: }
1229:
1230: int cgoto(fa *f, int s, int c)
1231: {
1.27 millert 1232: int *p, *q;
1.1 tholo 1233: int i, j, k;
1234:
1.12 millert 1235: assert(c == HAT || c < NCHARS);
1.1 tholo 1236: while (f->accept >= maxsetvec) { /* guessing here! */
1.27 millert 1237: resizesetvec(__func__);
1.1 tholo 1238: }
1239: for (i = 0; i <= f->accept; i++)
1240: setvec[i] = 0;
1241: setcnt = 0;
1.27 millert 1242: resize_state(f, s);
1.1 tholo 1243: /* compute positions of gototab[s,c] into setvec */
1244: p = f->posns[s];
1245: for (i = 1; i <= *p; i++) {
1246: if ((k = f->re[p[i]].ltype) != FINAL) {
1.7 millert 1247: if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
1.1 tholo 1248: || (k == DOT && c != 0 && c != HAT)
1249: || (k == ALL && c != 0)
1.15 millert 1250: || (k == EMPTYRE && c != 0)
1.10 millert 1251: || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
1252: || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
1.1 tholo 1253: q = f->re[p[i]].lfollow;
1254: for (j = 1; j <= *q; j++) {
1255: if (q[j] >= maxsetvec) {
1.27 millert 1256: resizesetvec(__func__);
1.1 tholo 1257: }
1258: if (setvec[q[j]] == 0) {
1259: setcnt++;
1260: setvec[q[j]] = 1;
1261: }
1262: }
1263: }
1264: }
1265: }
1266: /* determine if setvec is a previous state */
1267: tmpset[0] = setcnt;
1268: j = 1;
1269: for (i = f->accept; i >= 0; i--)
1270: if (setvec[i]) {
1271: tmpset[j++] = i;
1272: }
1.27 millert 1273: resize_state(f, f->curstat > s ? f->curstat : s);
1.1 tholo 1274: /* tmpset == previous state? */
1275: for (i = 1; i <= f->curstat; i++) {
1276: p = f->posns[i];
1277: if ((k = tmpset[0]) != p[0])
1278: goto different;
1279: for (j = 1; j <= k; j++)
1280: if (tmpset[j] != p[j])
1281: goto different;
1282: /* setvec is state i */
1283: f->gototab[s][c] = i;
1284: return i;
1285: different:;
1286: }
1287:
1288: /* add tmpset to current set of states */
1.27 millert 1289: ++(f->curstat);
1290: resize_state(f, f->curstat);
1.1 tholo 1291: for (i = 0; i < NCHARS; i++)
1292: f->gototab[f->curstat][i] = 0;
1293: xfree(f->posns[f->curstat]);
1.27 millert 1294: p = intalloc(setcnt + 1, __func__);
1.1 tholo 1295:
1296: f->posns[f->curstat] = p;
1297: f->gototab[s][c] = f->curstat;
1298: for (i = 0; i <= setcnt; i++)
1299: p[i] = tmpset[i];
1300: if (setvec[f->accept])
1301: f->out[f->curstat] = 1;
1302: else
1303: f->out[f->curstat] = 0;
1304: return f->curstat;
1305: }
1306:
1307:
1308: void freefa(fa *f) /* free a finite automaton */
1309: {
1310: int i;
1311:
1312: if (f == NULL)
1313: return;
1.27 millert 1314: for (i = 0; i < f->state_count; i++)
1315: xfree(f->gototab[i])
1.1 tholo 1316: for (i = 0; i <= f->curstat; i++)
1317: xfree(f->posns[i]);
1318: for (i = 0; i <= f->accept; i++) {
1319: xfree(f->re[i].lfollow);
1320: if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
1321: xfree((f->re[i].lval.np));
1322: }
1323: xfree(f->restr);
1.27 millert 1324: xfree(f->out);
1325: xfree(f->posns);
1326: xfree(f->gototab);
1.1 tholo 1327: xfree(f);
1328: }