Annotation of src/usr.bin/awk/b.c, Revision 1.30
1.30 ! millert 1: /* $OpenBSD: b.c,v 1.29 2020/06/10 21:03:56 millert Exp $ */
1.1 tholo 2: /****************************************************************
1.5 kstailey 3: Copyright (C) Lucent Technologies 1997
1.1 tholo 4: All Rights Reserved
5:
6: Permission to use, copy, modify, and distribute this software and
7: its documentation for any purpose and without fee is hereby
8: granted, provided that the above copyright notice appear in all
9: copies and that both that the copyright notice and this
10: permission notice and warranty disclaimer appear in supporting
1.5 kstailey 11: documentation, and that the name Lucent Technologies or any of
12: its entities not be used in advertising or publicity pertaining
13: to distribution of the software without specific, written prior
14: permission.
15:
16: LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17: INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18: IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19: SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20: WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21: IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22: ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23: THIS SOFTWARE.
1.1 tholo 24: ****************************************************************/
25:
1.15 millert 26: /* lasciate ogne speranza, voi ch'intrate. */
1.1 tholo 27:
28: #define DEBUG
29:
30: #include <ctype.h>
1.23 millert 31: #include <limits.h>
1.1 tholo 32: #include <stdio.h>
33: #include <string.h>
34: #include <stdlib.h>
35: #include "awk.h"
1.5 kstailey 36: #include "ytab.h"
1.1 tholo 37:
38: #define MAXLIN 22
39:
1.7 millert 40: #define type(v) (v)->nobj /* badly overloaded here */
41: #define info(v) (v)->ntype /* badly overloaded here */
1.1 tholo 42: #define left(v) (v)->narg[0]
43: #define right(v) (v)->narg[1]
44: #define parent(v) (v)->nnext
45:
46: #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
1.15 millert 47: #define ELEAF case EMPTYRE: /* empty string in regexp */
1.1 tholo 48: #define UNARY case STAR: case PLUS: case QUEST:
49:
50: /* encoding in tree Nodes:
1.15 millert 51: leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE):
1.1 tholo 52: left is index, right contains value or pointer to value
53: unary (STAR, PLUS, QUEST): left is child, right is null
54: binary (CAT, OR): left and right are children
55: parent contains pointer to parent
56: */
57:
58:
59: int *setvec;
60: int *tmpset;
61: int maxsetvec = 0;
62:
63: int rtok; /* next token in current re */
1.4 millert 64: int rlxval;
1.28 millert 65: static const uschar *rlxstr;
66: static const uschar *prestr; /* current position in current re */
67: static const uschar *lastre; /* origin of last re */
68: static const uschar *lastatom; /* origin of last Atom */
69: static const uschar *starttok;
70: static const uschar *basestr; /* starts with original, replaced during
1.23 millert 71: repetition processing */
1.28 millert 72: static const uschar *firstbasestr;
1.1 tholo 73:
1.4 millert 74: static int setcnt;
75: static int poscnt;
1.1 tholo 76:
1.28 millert 77: const char *patbeg;
1.1 tholo 78: int patlen;
79:
1.27 millert 80: #define NFA 128 /* cache this many dynamic fa's */
1.1 tholo 81: fa *fatab[NFA];
82: int nfatab = 0; /* entries in fatab */
83:
1.27 millert 84: static int *
85: intalloc(size_t n, const char *f)
86: {
87: void *p = calloc(n, sizeof(int));
88: if (p == NULL)
89: overflo(f);
90: return p;
91: }
92:
93: static void
94: allocsetvec(const char *f)
95: {
96: maxsetvec = MAXLIN;
97: setvec = reallocarray(setvec, maxsetvec, sizeof(*setvec));
98: tmpset = reallocarray(tmpset, maxsetvec, sizeof(*tmpset));
99: if (setvec == NULL || tmpset == NULL)
100: overflo(f);
101: }
102:
103: static void
104: resizesetvec(const char *f)
105: {
106: setvec = reallocarray(setvec, maxsetvec, 4 * sizeof(*setvec));
107: tmpset = reallocarray(tmpset, maxsetvec, 4 * sizeof(*tmpset));
108: if (setvec == NULL || tmpset == NULL)
109: overflo(f);
110: maxsetvec *= 4;
111: }
112:
113: static void
114: resize_state(fa *f, int state)
115: {
116: void *p;
117: int i, new_count;
118:
119: if (++state < f->state_count)
120: return;
121:
122: new_count = state + 10; /* needs to be tuned */
123:
124: p = reallocarray(f->gototab, new_count, sizeof(f->gototab[0]));
125: if (p == NULL)
126: goto out;
127: f->gototab = p;
128:
129: p = reallocarray(f->out, new_count, sizeof(f->out[0]));
130: if (p == NULL)
131: goto out;
132: f->out = p;
133:
134: p = reallocarray(f->posns, new_count, sizeof(f->posns[0]));
135: if (p == NULL)
136: goto out;
137: f->posns = p;
138:
139: for (i = f->state_count; i < new_count; ++i) {
140: f->gototab[i] = calloc(NCHARS, sizeof(**f->gototab));
141: if (f->gototab[i] == NULL)
142: goto out;
143: f->out[i] = 0;
144: f->posns[i] = NULL;
145: }
146: f->state_count = new_count;
147: return;
148: out:
149: overflo(__func__);
150: }
151:
1.29 millert 152: fa *makedfa(const char *s, bool anchor) /* returns dfa for reg expr s */
1.1 tholo 153: {
154: int i, use, nuse;
155: fa *pfa;
1.6 millert 156: static int now = 1;
1.1 tholo 157:
1.25 millert 158: if (setvec == NULL) { /* first time through any RE */
1.27 millert 159: allocsetvec(__func__);
1.1 tholo 160: }
161:
1.29 millert 162: if (compile_time != RUNNING) /* a constant for sure */
1.1 tholo 163: return mkdfa(s, anchor);
164: for (i = 0; i < nfatab; i++) /* is it there already? */
165: if (fatab[i]->anchor == anchor
1.11 millert 166: && strcmp((const char *) fatab[i]->restr, s) == 0) {
1.6 millert 167: fatab[i]->use = now++;
1.1 tholo 168: return fatab[i];
1.10 millert 169: }
1.1 tholo 170: pfa = mkdfa(s, anchor);
171: if (nfatab < NFA) { /* room for another */
172: fatab[nfatab] = pfa;
1.6 millert 173: fatab[nfatab]->use = now++;
1.1 tholo 174: nfatab++;
175: return pfa;
176: }
177: use = fatab[0]->use; /* replace least-recently used */
178: nuse = 0;
179: for (i = 1; i < nfatab; i++)
180: if (fatab[i]->use < use) {
181: use = fatab[i]->use;
182: nuse = i;
183: }
184: freefa(fatab[nuse]);
185: fatab[nuse] = pfa;
1.6 millert 186: pfa->use = now++;
1.1 tholo 187: return pfa;
188: }
189:
1.29 millert 190: fa *mkdfa(const char *s, bool anchor) /* does the real work of making a dfa */
1.30 ! millert 191: /* anchor = true for anchored matches, else false */
1.1 tholo 192: {
193: Node *p, *p1;
194: fa *f;
195:
1.28 millert 196: firstbasestr = (const uschar *) s;
1.23 millert 197: basestr = firstbasestr;
1.1 tholo 198: p = reparse(s);
199: p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
200: /* put ALL STAR in front of reg. exp. */
201: p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
202: /* put FINAL after reg. exp. */
203:
204: poscnt = 0;
205: penter(p1); /* enter parent pointers and leaf indices */
1.28 millert 206: if ((f = calloc(1, sizeof(fa) + poscnt * sizeof(rrow))) == NULL)
1.27 millert 207: overflo(__func__);
1.1 tholo 208: f->accept = poscnt-1; /* penter has computed number of positions in re */
209: cfoll(f, p1); /* set up follow sets */
210: freetr(p1);
1.27 millert 211: resize_state(f, 1);
212: f->posns[0] = intalloc(*(f->re[0].lfollow), __func__);
213: f->posns[1] = intalloc(1, __func__);
1.1 tholo 214: *f->posns[1] = 0;
215: f->initstat = makeinit(f, anchor);
216: f->anchor = anchor;
1.10 millert 217: f->restr = (uschar *) tostring(s);
1.23 millert 218: if (firstbasestr != basestr) {
219: if (basestr)
220: xfree(basestr);
221: }
1.1 tholo 222: return f;
223: }
224:
1.29 millert 225: int makeinit(fa *f, bool anchor)
1.1 tholo 226: {
227: int i, k;
228:
229: f->curstat = 2;
230: f->out[2] = 0;
231: k = *(f->re[0].lfollow);
1.25 millert 232: xfree(f->posns[2]);
1.27 millert 233: f->posns[2] = intalloc(k + 1, __func__);
1.30 ! millert 234: for (i = 0; i <= k; i++) {
1.1 tholo 235: (f->posns[2])[i] = (f->re[0].lfollow)[i];
236: }
237: if ((f->posns[2])[1] == f->accept)
238: f->out[2] = 1;
1.30 ! millert 239: for (i = 0; i < NCHARS; i++)
1.1 tholo 240: f->gototab[2][i] = 0;
241: f->curstat = cgoto(f, 2, HAT);
242: if (anchor) {
243: *f->posns[2] = k-1; /* leave out position 0 */
1.30 ! millert 244: for (i = 0; i < k; i++) {
1.1 tholo 245: (f->posns[0])[i] = (f->posns[2])[i];
246: }
247:
248: f->out[0] = f->out[2];
249: if (f->curstat != 2)
250: --(*f->posns[f->curstat]);
251: }
252: return f->curstat;
253: }
254:
255: void penter(Node *p) /* set up parent pointers and leaf indices */
256: {
257: switch (type(p)) {
1.15 millert 258: ELEAF
1.1 tholo 259: LEAF
1.7 millert 260: info(p) = poscnt;
1.1 tholo 261: poscnt++;
262: break;
263: UNARY
264: penter(left(p));
265: parent(left(p)) = p;
266: break;
267: case CAT:
268: case OR:
269: penter(left(p));
270: penter(right(p));
271: parent(left(p)) = p;
272: parent(right(p)) = p;
273: break;
274: default: /* can't happen */
1.8 millert 275: FATAL("can't happen: unknown type %d in penter", type(p));
1.1 tholo 276: break;
277: }
278: }
279:
280: void freetr(Node *p) /* free parse tree */
281: {
282: switch (type(p)) {
1.15 millert 283: ELEAF
1.1 tholo 284: LEAF
285: xfree(p);
286: break;
287: UNARY
288: freetr(left(p));
289: xfree(p);
290: break;
291: case CAT:
292: case OR:
293: freetr(left(p));
294: freetr(right(p));
295: xfree(p);
296: break;
297: default: /* can't happen */
1.8 millert 298: FATAL("can't happen: unknown type %d in freetr", type(p));
1.1 tholo 299: break;
300: }
301: }
302:
303: /* in the parsing of regular expressions, metacharacters like . have */
304: /* to be seen literally; \056 is not a metacharacter. */
305:
1.28 millert 306: int hexstr(const uschar **pp) /* find and eval hex string at pp, return new p */
1.2 millert 307: { /* only pick up one 8-bit byte (2 chars) */
1.28 millert 308: const uschar *p;
1.1 tholo 309: int n = 0;
1.2 millert 310: int i;
1.1 tholo 311:
1.28 millert 312: for (i = 0, p = *pp; i < 2 && isxdigit(*p); i++, p++) {
1.1 tholo 313: if (isdigit(*p))
314: n = 16 * n + *p - '0';
315: else if (*p >= 'a' && *p <= 'f')
316: n = 16 * n + *p - 'a' + 10;
317: else if (*p >= 'A' && *p <= 'F')
318: n = 16 * n + *p - 'A' + 10;
319: }
1.28 millert 320: *pp = p;
1.1 tholo 321: return n;
322: }
323:
1.2 millert 324: #define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */
1.1 tholo 325:
1.28 millert 326: int quoted(const uschar **pp) /* pick up next thing after a \\ */
1.1 tholo 327: /* and increment *pp */
328: {
1.28 millert 329: const uschar *p = *pp;
1.1 tholo 330: int c;
331:
332: if ((c = *p++) == 't')
333: c = '\t';
334: else if (c == 'n')
335: c = '\n';
336: else if (c == 'f')
337: c = '\f';
338: else if (c == 'r')
339: c = '\r';
340: else if (c == 'b')
341: c = '\b';
1.25 millert 342: else if (c == 'v')
343: c = '\v';
1.20 millert 344: else if (c == 'a')
1.25 millert 345: c = '\a';
1.1 tholo 346: else if (c == '\\')
347: c = '\\';
348: else if (c == 'x') { /* hexadecimal goo follows */
349: c = hexstr(&p); /* this adds a null if number is invalid */
350: } else if (isoctdigit(c)) { /* \d \dd \ddd */
351: int n = c - '0';
352: if (isoctdigit(*p)) {
353: n = 8 * n + *p++ - '0';
354: if (isoctdigit(*p))
355: n = 8 * n + *p++ - '0';
356: }
357: c = n;
358: } /* else */
359: /* c = c; */
360: *pp = p;
361: return c;
362: }
363:
1.11 millert 364: char *cclenter(const char *argp) /* add a character class */
1.1 tholo 365: {
366: int i, c, c2;
1.28 millert 367: const uschar *op, *p = (const uschar *) argp;
368: uschar *bp;
1.25 millert 369: static uschar *buf = NULL;
1.5 kstailey 370: static int bufsz = 100;
1.1 tholo 371:
372: op = p;
1.27 millert 373: if (buf == NULL && (buf = malloc(bufsz)) == NULL)
1.8 millert 374: FATAL("out of space for character class [%.10s...] 1", p);
1.5 kstailey 375: bp = buf;
376: for (i = 0; (c = *p++) != 0; ) {
1.1 tholo 377: if (c == '\\') {
1.17 millert 378: c = quoted(&p);
1.5 kstailey 379: } else if (c == '-' && i > 0 && bp[-1] != 0) {
1.1 tholo 380: if (*p != 0) {
1.5 kstailey 381: c = bp[-1];
1.1 tholo 382: c2 = *p++;
383: if (c2 == '\\')
1.17 millert 384: c2 = quoted(&p);
1.1 tholo 385: if (c > c2) { /* empty; ignore */
1.5 kstailey 386: bp--;
1.1 tholo 387: i--;
388: continue;
389: }
390: while (c < c2) {
1.15 millert 391: if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter1"))
1.8 millert 392: FATAL("out of space for character class [%.10s...] 2", p);
1.5 kstailey 393: *bp++ = ++c;
1.1 tholo 394: i++;
395: }
396: continue;
397: }
398: }
1.15 millert 399: if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter2"))
1.8 millert 400: FATAL("out of space for character class [%.10s...] 3", p);
1.5 kstailey 401: *bp++ = c;
1.1 tholo 402: i++;
403: }
1.5 kstailey 404: *bp = 0;
1.19 deraadt 405: DPRINTF( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
1.1 tholo 406: xfree(op);
1.10 millert 407: return (char *) tostring((char *) buf);
1.1 tholo 408: }
409:
1.11 millert 410: void overflo(const char *s)
1.1 tholo 411: {
1.27 millert 412: FATAL("regular expression too big: out of space in %.30s...", s);
1.1 tholo 413: }
414:
415: void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */
416: {
417: int i;
1.4 millert 418: int *p;
1.1 tholo 419:
420: switch (type(v)) {
1.15 millert 421: ELEAF
1.1 tholo 422: LEAF
1.7 millert 423: f->re[info(v)].ltype = type(v);
424: f->re[info(v)].lval.np = right(v);
1.1 tholo 425: while (f->accept >= maxsetvec) { /* guessing here! */
1.27 millert 426: resizesetvec(__func__);
1.1 tholo 427: }
428: for (i = 0; i <= f->accept; i++)
429: setvec[i] = 0;
430: setcnt = 0;
431: follow(v); /* computes setvec and setcnt */
1.27 millert 432: p = intalloc(setcnt + 1, __func__);
1.7 millert 433: f->re[info(v)].lfollow = p;
1.1 tholo 434: *p = setcnt;
435: for (i = f->accept; i >= 0; i--)
436: if (setvec[i] == 1)
437: *++p = i;
438: break;
439: UNARY
440: cfoll(f,left(v));
441: break;
442: case CAT:
443: case OR:
444: cfoll(f,left(v));
445: cfoll(f,right(v));
446: break;
447: default: /* can't happen */
1.8 millert 448: FATAL("can't happen: unknown type %d in cfoll", type(v));
1.1 tholo 449: }
450: }
451:
452: int first(Node *p) /* collects initially active leaves of p into setvec */
1.15 millert 453: /* returns 0 if p matches empty string */
1.1 tholo 454: {
1.4 millert 455: int b, lp;
1.1 tholo 456:
457: switch (type(p)) {
1.15 millert 458: ELEAF
1.1 tholo 459: LEAF
1.7 millert 460: lp = info(p); /* look for high-water mark of subscripts */
1.1 tholo 461: while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */
1.27 millert 462: resizesetvec(__func__);
1.1 tholo 463: }
1.15 millert 464: if (type(p) == EMPTYRE) {
465: setvec[lp] = 0;
466: return(0);
467: }
1.1 tholo 468: if (setvec[lp] != 1) {
469: setvec[lp] = 1;
470: setcnt++;
471: }
472: if (type(p) == CCL && (*(char *) right(p)) == '\0')
473: return(0); /* empty CCL */
1.30 ! millert 474: return(1);
1.1 tholo 475: case PLUS:
1.30 ! millert 476: if (first(left(p)) == 0)
! 477: return(0);
1.1 tholo 478: return(1);
479: case STAR:
480: case QUEST:
481: first(left(p));
482: return(0);
483: case CAT:
484: if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
485: return(1);
486: case OR:
487: b = first(right(p));
488: if (first(left(p)) == 0 || b == 0) return(0);
489: return(1);
490: }
1.8 millert 491: FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */
1.1 tholo 492: return(-1);
493: }
494:
495: void follow(Node *v) /* collects leaves that can follow v into setvec */
496: {
497: Node *p;
498:
499: if (type(v) == FINAL)
500: return;
501: p = parent(v);
502: switch (type(p)) {
503: case STAR:
504: case PLUS:
505: first(v);
506: follow(p);
507: return;
508:
509: case OR:
510: case QUEST:
511: follow(p);
512: return;
513:
514: case CAT:
515: if (v == left(p)) { /* v is left child of p */
516: if (first(right(p)) == 0) {
517: follow(p);
518: return;
519: }
520: } else /* v is right child */
521: follow(p);
522: return;
523: }
524: }
525:
1.11 millert 526: int member(int c, const char *sarg) /* is c in s? */
1.1 tholo 527: {
1.28 millert 528: const uschar *s = (const uschar *) sarg;
1.10 millert 529:
1.1 tholo 530: while (*s)
531: if (c == *s++)
532: return(1);
533: return(0);
534: }
535:
1.11 millert 536: int match(fa *f, const char *p0) /* shortest match ? */
1.1 tholo 537: {
538: int s, ns;
1.28 millert 539: const uschar *p = (const uschar *) p0;
1.1 tholo 540:
1.27 millert 541: s = f->initstat;
542: assert (s < f->state_count);
543:
1.1 tholo 544: if (f->out[s])
545: return(1);
546: do {
1.15 millert 547: /* assert(*p < NCHARS); */
1.1 tholo 548: if ((ns = f->gototab[s][*p]) != 0)
549: s = ns;
550: else
551: s = cgoto(f, s, *p);
552: if (f->out[s])
553: return(1);
554: } while (*p++ != 0);
555: return(0);
556: }
557:
1.11 millert 558: int pmatch(fa *f, const char *p0) /* longest match, for sub */
1.1 tholo 559: {
560: int s, ns;
1.28 millert 561: const uschar *p = (const uschar *) p0;
562: const uschar *q;
1.1 tholo 563:
1.27 millert 564: s = f->initstat;
565: assert(s < f->state_count);
566:
1.28 millert 567: patbeg = (const char *)p;
1.1 tholo 568: patlen = -1;
569: do {
570: q = p;
571: do {
572: if (f->out[s]) /* final state */
573: patlen = q-p;
1.15 millert 574: /* assert(*q < NCHARS); */
1.1 tholo 575: if ((ns = f->gototab[s][*q]) != 0)
576: s = ns;
577: else
578: s = cgoto(f, s, *q);
1.27 millert 579:
580: assert(s < f->state_count);
581:
1.9 deraadt 582: if (s == 1) { /* no transition */
1.1 tholo 583: if (patlen >= 0) {
1.28 millert 584: patbeg = (const char *) p;
1.1 tholo 585: return(1);
586: }
587: else
588: goto nextin; /* no match */
1.9 deraadt 589: }
1.1 tholo 590: } while (*q++ != 0);
591: if (f->out[s])
592: patlen = q-p-1; /* don't count $ */
593: if (patlen >= 0) {
1.28 millert 594: patbeg = (const char *) p;
1.1 tholo 595: return(1);
596: }
597: nextin:
598: s = 2;
1.27 millert 599: } while (*p++);
1.1 tholo 600: return (0);
601: }
602:
1.11 millert 603: int nematch(fa *f, const char *p0) /* non-empty match, for sub */
1.1 tholo 604: {
605: int s, ns;
1.28 millert 606: const uschar *p = (const uschar *) p0;
607: const uschar *q;
1.1 tholo 608:
1.27 millert 609: s = f->initstat;
610: assert(s < f->state_count);
611:
1.28 millert 612: patbeg = (const char *)p;
1.1 tholo 613: patlen = -1;
614: while (*p) {
615: q = p;
616: do {
617: if (f->out[s]) /* final state */
618: patlen = q-p;
1.15 millert 619: /* assert(*q < NCHARS); */
1.1 tholo 620: if ((ns = f->gototab[s][*q]) != 0)
621: s = ns;
622: else
623: s = cgoto(f, s, *q);
1.9 deraadt 624: if (s == 1) { /* no transition */
1.1 tholo 625: if (patlen > 0) {
1.28 millert 626: patbeg = (const char *) p;
1.1 tholo 627: return(1);
628: } else
629: goto nnextin; /* no nonempty match */
1.9 deraadt 630: }
1.1 tholo 631: } while (*q++ != 0);
632: if (f->out[s])
633: patlen = q-p-1; /* don't count $ */
634: if (patlen > 0 ) {
1.28 millert 635: patbeg = (const char *) p;
1.1 tholo 636: return(1);
637: }
638: nnextin:
639: s = 2;
640: p++;
641: }
642: return (0);
643: }
644:
1.26 millert 645:
646: /*
647: * NAME
648: * fnematch
649: *
650: * DESCRIPTION
651: * A stream-fed version of nematch which transfers characters to a
652: * null-terminated buffer. All characters up to and including the last
653: * character of the matching text or EOF are placed in the buffer. If
654: * a match is found, patbeg and patlen are set appropriately.
655: *
656: * RETURN VALUES
1.29 millert 657: * false No match found.
658: * true Match found.
1.26 millert 659: */
660:
1.29 millert 661: bool fnematch(fa *pfa, FILE *f, char **pbuf, int *pbufsize, int quantum)
1.26 millert 662: {
663: char *buf = *pbuf;
664: int bufsize = *pbufsize;
665: int c, i, j, k, ns, s;
666:
667: s = pfa->initstat;
668: patlen = 0;
669:
670: /*
671: * All indices relative to buf.
672: * i <= j <= k <= bufsize
673: *
674: * i: origin of active substring
675: * j: current character
676: * k: destination of next getc()
677: */
678: i = -1, k = 0;
679: do {
680: j = i++;
681: do {
682: if (++j == k) {
683: if (k == bufsize)
684: if (!adjbuf(&buf, &bufsize, bufsize+1, quantum, 0, "fnematch"))
685: FATAL("stream '%.30s...' too long", buf);
686: buf[k++] = (c = getc(f)) != EOF ? c : 0;
687: }
688: c = buf[j];
689: /* assert(c < NCHARS); */
690:
691: if ((ns = pfa->gototab[s][c]) != 0)
692: s = ns;
693: else
694: s = cgoto(pfa, s, c);
695:
696: if (pfa->out[s]) { /* final state */
697: patlen = j - i + 1;
698: if (c == 0) /* don't count $ */
699: patlen--;
700: }
701: } while (buf[j] && s != 1);
702: s = 2;
703: } while (buf[i] && !patlen);
704:
705: /* adjbuf() may have relocated a resized buffer. Inform the world. */
706: *pbuf = buf;
707: *pbufsize = bufsize;
708:
709: if (patlen) {
710: patbeg = buf + i;
711: /*
712: * Under no circumstances is the last character fed to
713: * the automaton part of the match. It is EOF's nullbyte,
714: * or it sent the automaton into a state with no further
715: * transitions available (s==1), or both. Room for a
716: * terminating nullbyte is guaranteed.
717: *
718: * ungetc any chars after the end of matching text
719: * (except for EOF's nullbyte, if present) and null
720: * terminate the buffer.
721: */
722: do
723: if (buf[--k] && ungetc(buf[k], f) == EOF)
724: FATAL("unable to ungetc '%c'", buf[k]);
725: while (k > i + patlen);
1.30 ! millert 726: buf[k] = '\0';
1.29 millert 727: return true;
1.26 millert 728: }
729: else
1.29 millert 730: return false;
1.26 millert 731: }
732:
1.11 millert 733: Node *reparse(const char *p) /* parses regular expression pointed to by p */
1.1 tholo 734: { /* uses relex() to scan regular expression */
735: Node *np;
736:
1.19 deraadt 737: DPRINTF( ("reparse <%s>\n", p) );
1.28 millert 738: lastre = prestr = (const uschar *) p; /* prestr points to string to be parsed */
1.1 tholo 739: rtok = relex();
1.11 millert 740: /* GNU compatibility: an empty regexp matches anything */
1.15 millert 741: if (rtok == '\0') {
1.11 millert 742: /* FATAL("empty regular expression"); previous */
1.15 millert 743: return(op2(EMPTYRE, NIL, NIL));
744: }
1.1 tholo 745: np = regexp();
746: if (rtok != '\0')
1.8 millert 747: FATAL("syntax error in regular expression %s at %s", lastre, prestr);
1.1 tholo 748: return(np);
749: }
750:
751: Node *regexp(void) /* top-level parse of reg expr */
752: {
753: return (alt(concat(primary())));
754: }
755:
756: Node *primary(void)
757: {
758: Node *np;
1.23 millert 759: int savelastatom;
1.1 tholo 760:
761: switch (rtok) {
762: case CHAR:
1.23 millert 763: lastatom = starttok;
1.7 millert 764: np = op2(CHAR, NIL, itonp(rlxval));
1.1 tholo 765: rtok = relex();
766: return (unary(np));
767: case ALL:
768: rtok = relex();
769: return (unary(op2(ALL, NIL, NIL)));
1.15 millert 770: case EMPTYRE:
771: rtok = relex();
1.23 millert 772: return (unary(op2(EMPTYRE, NIL, NIL)));
1.1 tholo 773: case DOT:
1.23 millert 774: lastatom = starttok;
1.1 tholo 775: rtok = relex();
776: return (unary(op2(DOT, NIL, NIL)));
777: case CCL:
1.28 millert 778: np = op2(CCL, NIL, (Node*) cclenter((const char *) rlxstr));
1.23 millert 779: lastatom = starttok;
1.1 tholo 780: rtok = relex();
781: return (unary(np));
782: case NCCL:
1.28 millert 783: np = op2(NCCL, NIL, (Node *) cclenter((const char *) rlxstr));
1.23 millert 784: lastatom = starttok;
1.1 tholo 785: rtok = relex();
786: return (unary(np));
787: case '^':
788: rtok = relex();
1.7 millert 789: return (unary(op2(CHAR, NIL, itonp(HAT))));
1.1 tholo 790: case '$':
791: rtok = relex();
792: return (unary(op2(CHAR, NIL, NIL)));
793: case '(':
1.23 millert 794: lastatom = starttok;
795: savelastatom = starttok - basestr; /* Retain over recursion */
1.1 tholo 796: rtok = relex();
797: if (rtok == ')') { /* special pleading for () */
798: rtok = relex();
799: return unary(op2(CCL, NIL, (Node *) tostring("")));
800: }
801: np = regexp();
802: if (rtok == ')') {
1.23 millert 803: lastatom = basestr + savelastatom; /* Restore */
1.1 tholo 804: rtok = relex();
805: return (unary(np));
806: }
807: else
1.8 millert 808: FATAL("syntax error in regular expression %s at %s", lastre, prestr);
1.1 tholo 809: default:
1.8 millert 810: FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
1.1 tholo 811: }
812: return 0; /*NOTREACHED*/
813: }
814:
815: Node *concat(Node *np)
816: {
817: switch (rtok) {
1.23 millert 818: case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
1.1 tholo 819: return (concat(op2(CAT, np, primary())));
1.23 millert 820: case EMPTYRE:
821: rtok = relex();
822: return (concat(op2(CAT, op2(CCL, NIL, (Node *) tostring("")),
823: primary())));
1.1 tholo 824: }
825: return (np);
826: }
827:
828: Node *alt(Node *np)
829: {
830: if (rtok == OR) {
831: rtok = relex();
832: return (alt(op2(OR, np, concat(primary()))));
833: }
834: return (np);
835: }
836:
837: Node *unary(Node *np)
838: {
839: switch (rtok) {
840: case STAR:
841: rtok = relex();
842: return (unary(op2(STAR, np, NIL)));
843: case PLUS:
844: rtok = relex();
845: return (unary(op2(PLUS, np, NIL)));
846: case QUEST:
847: rtok = relex();
848: return (unary(op2(QUEST, np, NIL)));
849: default:
850: return (np);
851: }
852: }
853:
1.11 millert 854: /*
855: * Character class definitions conformant to the POSIX locale as
856: * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
857: * and operating character sets are both ASCII (ISO646) or supersets
858: * thereof.
859: *
860: * Note that to avoid overflowing the temporary buffer used in
861: * relex(), the expanded character class (prior to range expansion)
862: * must be less than twice the size of their full name.
863: */
1.12 millert 864:
865: /* Because isblank doesn't show up in any of the header files on any
866: * system i use, it's defined here. if some other locale has a richer
867: * definition of "blank", define HAS_ISBLANK and provide your own
868: * version.
869: * the parentheses here are an attempt to find a path through the maze
870: * of macro definition and/or function and/or version provided. thanks
871: * to nelson beebe for the suggestion; let's see if it works everywhere.
872: */
873:
874: #ifndef HAS_ISBLANK
875:
1.17 millert 876: int (xisblank)(int c)
1.12 millert 877: {
878: return c==' ' || c=='\t';
879: }
880:
881: #endif
882:
1.11 millert 883: struct charclass {
884: const char *cc_name;
885: int cc_namelen;
1.12 millert 886: int (*cc_func)(int);
1.11 millert 887: } charclasses[] = {
1.12 millert 888: { "alnum", 5, isalnum },
889: { "alpha", 5, isalpha },
1.17 millert 890: #ifndef HAS_ISBLANK
1.21 millert 891: { "blank", 5, xisblank },
1.17 millert 892: #else
1.12 millert 893: { "blank", 5, isblank },
1.17 millert 894: #endif
1.12 millert 895: { "cntrl", 5, iscntrl },
896: { "digit", 5, isdigit },
897: { "graph", 5, isgraph },
898: { "lower", 5, islower },
899: { "print", 5, isprint },
900: { "punct", 5, ispunct },
901: { "space", 5, isspace },
902: { "upper", 5, isupper },
903: { "xdigit", 6, isxdigit },
1.11 millert 904: { NULL, 0, NULL },
905: };
906:
1.23 millert 907: #define REPEAT_SIMPLE 0
908: #define REPEAT_PLUS_APPENDED 1
909: #define REPEAT_WITH_Q 2
910: #define REPEAT_ZERO 3
911:
912: static int
913: replace_repeat(const uschar *reptok, int reptoklen, const uschar *atom,
914: int atomlen, int firstnum, int secondnum, int special_case)
915: {
916: int i, j;
1.26 millert 917: uschar *buf = NULL;
1.23 millert 918: int ret = 1;
1.30 ! millert 919: bool init_q = (firstnum == 0); /* first added char will be ? */
1.23 millert 920: int n_q_reps = secondnum-firstnum; /* m>n, so reduce until {1,m-n} left */
921: int prefix_length = reptok - basestr; /* prefix includes first rep */
1.28 millert 922: int suffix_length = strlen((const char *) reptok) - reptoklen; /* string after rep specifier */
1.23 millert 923: int size = prefix_length + suffix_length;
924:
925: if (firstnum > 1) { /* add room for reps 2 through firstnum */
926: size += atomlen*(firstnum-1);
927: }
928:
929: /* Adjust size of buffer for special cases */
930: if (special_case == REPEAT_PLUS_APPENDED) {
931: size++; /* for the final + */
932: } else if (special_case == REPEAT_WITH_Q) {
933: size += init_q + (atomlen+1)* n_q_reps;
934: } else if (special_case == REPEAT_ZERO) {
935: size += 2; /* just a null ERE: () */
936: }
1.27 millert 937: if ((buf = malloc(size + 1)) == NULL)
1.23 millert 938: FATAL("out of space in reg expr %.10s..", lastre);
939: memcpy(buf, basestr, prefix_length); /* copy prefix */
940: j = prefix_length;
941: if (special_case == REPEAT_ZERO) {
942: j -= atomlen;
943: buf[j++] = '(';
944: buf[j++] = ')';
945: }
1.30 ! millert 946: for (i = 1; i < firstnum; i++) { /* copy x reps */
1.23 millert 947: memcpy(&buf[j], atom, atomlen);
948: j += atomlen;
949: }
950: if (special_case == REPEAT_PLUS_APPENDED) {
951: buf[j++] = '+';
952: } else if (special_case == REPEAT_WITH_Q) {
1.29 millert 953: if (init_q)
954: buf[j++] = '?';
1.30 ! millert 955: for (i = init_q; i < n_q_reps; i++) { /* copy x? reps */
1.23 millert 956: memcpy(&buf[j], atom, atomlen);
957: j += atomlen;
958: buf[j++] = '?';
959: }
960: }
961: memcpy(&buf[j], reptok+reptoklen, suffix_length);
962: if (special_case == REPEAT_ZERO) {
963: buf[j+suffix_length] = '\0';
964: } else {
965: buf[size] = '\0';
966: }
967: /* free old basestr */
968: if (firstbasestr != basestr) {
969: if (basestr)
970: xfree(basestr);
971: }
972: basestr = buf;
973: prestr = buf + prefix_length;
974: if (special_case == REPEAT_ZERO) {
975: prestr -= atomlen;
976: ret++;
977: }
978: return ret;
979: }
980:
981: static int repeat(const uschar *reptok, int reptoklen, const uschar *atom,
982: int atomlen, int firstnum, int secondnum)
983: {
984: /*
985: In general, the repetition specifier or "bound" is replaced here
986: by an equivalent ERE string, repeating the immediately previous atom
987: and appending ? and + as needed. Note that the first copy of the
988: atom is left in place, except in the special_case of a zero-repeat
989: (i.e., {0}).
990: */
991: if (secondnum < 0) { /* means {n,} -> repeat n-1 times followed by PLUS */
992: if (firstnum < 2) {
993: /* 0 or 1: should be handled before you get here */
994: FATAL("internal error");
995: } else {
996: return replace_repeat(reptok, reptoklen, atom, atomlen,
997: firstnum, secondnum, REPEAT_PLUS_APPENDED);
998: }
999: } else if (firstnum == secondnum) { /* {n} or {n,n} -> simply repeat n-1 times */
1000: if (firstnum == 0) { /* {0} or {0,0} */
1001: /* This case is unusual because the resulting
1002: replacement string might actually be SMALLER than
1003: the original ERE */
1004: return replace_repeat(reptok, reptoklen, atom, atomlen,
1005: firstnum, secondnum, REPEAT_ZERO);
1006: } else { /* (firstnum >= 1) */
1007: return replace_repeat(reptok, reptoklen, atom, atomlen,
1008: firstnum, secondnum, REPEAT_SIMPLE);
1009: }
1010: } else if (firstnum < secondnum) { /* {n,m} -> repeat n-1 times then alternate */
1011: /* x{n,m} => xx...x{1, m-n+1} => xx...x?x?x?..x? */
1012: return replace_repeat(reptok, reptoklen, atom, atomlen,
1013: firstnum, secondnum, REPEAT_WITH_Q);
1014: } else { /* Error - shouldn't be here (n>m) */
1015: FATAL("internal error");
1016: }
1017: return 0;
1018: }
1.11 millert 1019:
1.1 tholo 1020: int relex(void) /* lexical analyzer for reparse */
1021: {
1.5 kstailey 1022: int c, n;
1.1 tholo 1023: int cflag;
1.25 millert 1024: static uschar *buf = NULL;
1.5 kstailey 1025: static int bufsz = 100;
1.10 millert 1026: uschar *bp;
1.11 millert 1027: struct charclass *cc;
1.12 millert 1028: int i;
1.29 millert 1029: int num, m;
1030: bool commafound, digitfound;
1.23 millert 1031: const uschar *startreptok;
1.24 millert 1032: static int parens = 0;
1.23 millert 1033:
1034: rescan:
1035: starttok = prestr;
1.1 tholo 1036:
1037: switch (c = *prestr++) {
1038: case '|': return OR;
1039: case '*': return STAR;
1040: case '+': return PLUS;
1041: case '?': return QUEST;
1042: case '.': return DOT;
1043: case '\0': prestr--; return '\0';
1044: case '^':
1045: case '$':
1.24 millert 1046: return c;
1.1 tholo 1047: case '(':
1.24 millert 1048: parens++;
1049: return c;
1.1 tholo 1050: case ')':
1.24 millert 1051: if (parens) {
1052: parens--;
1053: return c;
1054: }
1055: /* unmatched close parenthesis; per POSIX, treat as literal */
1056: rlxval = c;
1057: return CHAR;
1.1 tholo 1058: case '\\':
1.17 millert 1059: rlxval = quoted(&prestr);
1.1 tholo 1060: return CHAR;
1061: default:
1062: rlxval = c;
1063: return CHAR;
1.25 millert 1064: case '[':
1.27 millert 1065: if (buf == NULL && (buf = malloc(bufsz)) == NULL)
1.8 millert 1066: FATAL("out of space in reg expr %.10s..", lastre);
1.5 kstailey 1067: bp = buf;
1.1 tholo 1068: if (*prestr == '^') {
1069: cflag = 1;
1070: prestr++;
1071: }
1072: else
1073: cflag = 0;
1.11 millert 1074: n = 2 * strlen((const char *) prestr)+1;
1.15 millert 1075: if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1"))
1.8 millert 1076: FATAL("out of space for reg expr %.10s...", lastre);
1.1 tholo 1077: for (; ; ) {
1078: if ((c = *prestr++) == '\\') {
1.5 kstailey 1079: *bp++ = '\\';
1.1 tholo 1080: if ((c = *prestr++) == '\0')
1.8 millert 1081: FATAL("nonterminated character class %.20s...", lastre);
1.5 kstailey 1082: *bp++ = c;
1.10 millert 1083: /* } else if (c == '\n') { */
1084: /* FATAL("newline in character class %.20s...", lastre); */
1.11 millert 1085: } else if (c == '[' && *prestr == ':') {
1086: /* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
1087: for (cc = charclasses; cc->cc_name; cc++)
1088: if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
1089: break;
1090: if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
1091: prestr[2 + cc->cc_namelen] == ']') {
1092: prestr += cc->cc_namelen + 3;
1.22 millert 1093: /*
1094: * BUG: We begin at 1, instead of 0, since we
1095: * would otherwise prematurely terminate the
1096: * string for classes like [[:cntrl:]]. This
1097: * means that we can't match the NUL character,
1098: * not without first adapting the entire
1099: * program to track each string's length.
1100: */
1.23 millert 1101: for (i = 1; i <= UCHAR_MAX; i++) {
1.15 millert 1102: if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2"))
1.12 millert 1103: FATAL("out of space for reg expr %.10s...", lastre);
1104: if (cc->cc_func(i)) {
1105: *bp++ = i;
1106: n++;
1107: }
1108: }
1.11 millert 1109: } else
1110: *bp++ = c;
1.23 millert 1111: } else if (c == '[' && *prestr == '.') {
1112: char collate_char;
1113: prestr++;
1114: collate_char = *prestr++;
1115: if (*prestr == '.' && prestr[1] == ']') {
1116: prestr += 2;
1117: /* Found it: map via locale TBD: for
1118: now, simply return this char. This
1119: is sufficient to pass conformance
1120: test awk.ex 156
1121: */
1122: if (*prestr == ']') {
1123: prestr++;
1124: rlxval = collate_char;
1125: return CHAR;
1126: }
1127: }
1128: } else if (c == '[' && *prestr == '=') {
1129: char equiv_char;
1130: prestr++;
1131: equiv_char = *prestr++;
1132: if (*prestr == '=' && prestr[1] == ']') {
1133: prestr += 2;
1134: /* Found it: map via locale TBD: for now
1135: simply return this char. This is
1136: sufficient to pass conformance test
1137: awk.ex 156
1138: */
1139: if (*prestr == ']') {
1140: prestr++;
1141: rlxval = equiv_char;
1142: return CHAR;
1143: }
1144: }
1.1 tholo 1145: } else if (c == '\0') {
1.8 millert 1146: FATAL("nonterminated character class %.20s", lastre);
1.5 kstailey 1147: } else if (bp == buf) { /* 1st char is special */
1148: *bp++ = c;
1.1 tholo 1149: } else if (c == ']') {
1.5 kstailey 1150: *bp++ = 0;
1.10 millert 1151: rlxstr = (uschar *) tostring((char *) buf);
1.1 tholo 1152: if (cflag == 0)
1153: return CCL;
1154: else
1155: return NCCL;
1156: } else
1.5 kstailey 1157: *bp++ = c;
1.1 tholo 1158: }
1.23 millert 1159: break;
1160: case '{':
1161: if (isdigit(*(prestr))) {
1162: num = 0; /* Process as a repetition */
1163: n = -1; m = -1;
1.29 millert 1164: commafound = false;
1165: digitfound = false;
1.23 millert 1166: startreptok = prestr-1;
1167: /* Remember start of previous atom here ? */
1168: } else { /* just a { char, not a repetition */
1169: rlxval = c;
1170: return CHAR;
1171: }
1172: for (; ; ) {
1173: if ((c = *prestr++) == '}') {
1174: if (commafound) {
1175: if (digitfound) { /* {n,m} */
1176: m = num;
1.30 ! millert 1177: if (m < n)
1.23 millert 1178: FATAL("illegal repetition expression: class %.20s",
1179: lastre);
1.30 ! millert 1180: if (n == 0 && m == 1) {
1.23 millert 1181: return QUEST;
1182: }
1183: } else { /* {n,} */
1.30 ! millert 1184: if (n == 0)
! 1185: return STAR;
! 1186: else if (n == 1)
! 1187: return PLUS;
1.23 millert 1188: }
1189: } else {
1190: if (digitfound) { /* {n} same as {n,n} */
1191: n = num;
1192: m = num;
1193: } else { /* {} */
1194: FATAL("illegal repetition expression: class %.20s",
1195: lastre);
1196: }
1197: }
1198: if (repeat(starttok, prestr-starttok, lastatom,
1199: startreptok - lastatom, n, m) > 0) {
1.30 ! millert 1200: if (n == 0 && m == 0) {
1.23 millert 1201: return EMPTYRE;
1202: }
1203: /* must rescan input for next token */
1204: goto rescan;
1205: }
1206: /* Failed to replace: eat up {...} characters
1207: and treat like just PLUS */
1208: return PLUS;
1209: } else if (c == '\0') {
1210: FATAL("nonterminated character class %.20s",
1211: lastre);
1212: } else if (isdigit(c)) {
1213: num = 10 * num + c - '0';
1.29 millert 1214: digitfound = true;
1.23 millert 1215: } else if (c == ',') {
1216: if (commafound)
1217: FATAL("illegal repetition expression: class %.20s",
1218: lastre);
1219: /* looking for {n,} or {n,m} */
1.29 millert 1220: commafound = true;
1.23 millert 1221: n = num;
1.29 millert 1222: digitfound = false; /* reset */
1.23 millert 1223: num = 0;
1224: } else {
1225: FATAL("illegal repetition expression: class %.20s",
1226: lastre);
1227: }
1228: }
1229: break;
1.1 tholo 1230: }
1231: }
1232:
1233: int cgoto(fa *f, int s, int c)
1234: {
1.27 millert 1235: int *p, *q;
1.1 tholo 1236: int i, j, k;
1237:
1.12 millert 1238: assert(c == HAT || c < NCHARS);
1.1 tholo 1239: while (f->accept >= maxsetvec) { /* guessing here! */
1.27 millert 1240: resizesetvec(__func__);
1.1 tholo 1241: }
1242: for (i = 0; i <= f->accept; i++)
1243: setvec[i] = 0;
1244: setcnt = 0;
1.27 millert 1245: resize_state(f, s);
1.1 tholo 1246: /* compute positions of gototab[s,c] into setvec */
1247: p = f->posns[s];
1248: for (i = 1; i <= *p; i++) {
1249: if ((k = f->re[p[i]].ltype) != FINAL) {
1.7 millert 1250: if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
1.1 tholo 1251: || (k == DOT && c != 0 && c != HAT)
1252: || (k == ALL && c != 0)
1.15 millert 1253: || (k == EMPTYRE && c != 0)
1.10 millert 1254: || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
1255: || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
1.1 tholo 1256: q = f->re[p[i]].lfollow;
1257: for (j = 1; j <= *q; j++) {
1258: if (q[j] >= maxsetvec) {
1.27 millert 1259: resizesetvec(__func__);
1.1 tholo 1260: }
1261: if (setvec[q[j]] == 0) {
1262: setcnt++;
1263: setvec[q[j]] = 1;
1264: }
1265: }
1266: }
1267: }
1268: }
1269: /* determine if setvec is a previous state */
1270: tmpset[0] = setcnt;
1271: j = 1;
1272: for (i = f->accept; i >= 0; i--)
1273: if (setvec[i]) {
1274: tmpset[j++] = i;
1275: }
1.27 millert 1276: resize_state(f, f->curstat > s ? f->curstat : s);
1.1 tholo 1277: /* tmpset == previous state? */
1278: for (i = 1; i <= f->curstat; i++) {
1279: p = f->posns[i];
1280: if ((k = tmpset[0]) != p[0])
1281: goto different;
1282: for (j = 1; j <= k; j++)
1283: if (tmpset[j] != p[j])
1284: goto different;
1285: /* setvec is state i */
1.30 ! millert 1286: if (c != HAT)
! 1287: f->gototab[s][c] = i;
1.1 tholo 1288: return i;
1289: different:;
1290: }
1291:
1292: /* add tmpset to current set of states */
1.27 millert 1293: ++(f->curstat);
1294: resize_state(f, f->curstat);
1.1 tholo 1295: for (i = 0; i < NCHARS; i++)
1296: f->gototab[f->curstat][i] = 0;
1297: xfree(f->posns[f->curstat]);
1.27 millert 1298: p = intalloc(setcnt + 1, __func__);
1.1 tholo 1299:
1300: f->posns[f->curstat] = p;
1.30 ! millert 1301: if (c != HAT)
! 1302: f->gototab[s][c] = f->curstat;
1.1 tholo 1303: for (i = 0; i <= setcnt; i++)
1304: p[i] = tmpset[i];
1305: if (setvec[f->accept])
1306: f->out[f->curstat] = 1;
1307: else
1308: f->out[f->curstat] = 0;
1309: return f->curstat;
1310: }
1311:
1312:
1313: void freefa(fa *f) /* free a finite automaton */
1314: {
1315: int i;
1316:
1317: if (f == NULL)
1318: return;
1.27 millert 1319: for (i = 0; i < f->state_count; i++)
1320: xfree(f->gototab[i])
1.1 tholo 1321: for (i = 0; i <= f->curstat; i++)
1322: xfree(f->posns[i]);
1323: for (i = 0; i <= f->accept; i++) {
1324: xfree(f->re[i].lfollow);
1325: if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
1.30 ! millert 1326: xfree(f->re[i].lval.np);
1.1 tholo 1327: }
1328: xfree(f->restr);
1.27 millert 1329: xfree(f->out);
1330: xfree(f->posns);
1331: xfree(f->gototab);
1.1 tholo 1332: xfree(f);
1333: }