Annotation of src/usr.bin/awk/b.c, Revision 1.32
1.32 ! millert 1: /* $OpenBSD: b.c,v 1.31 2020/06/10 21:05:02 millert Exp $ */
1.1 tholo 2: /****************************************************************
1.5 kstailey 3: Copyright (C) Lucent Technologies 1997
1.1 tholo 4: All Rights Reserved
5:
6: Permission to use, copy, modify, and distribute this software and
7: its documentation for any purpose and without fee is hereby
8: granted, provided that the above copyright notice appear in all
9: copies and that both that the copyright notice and this
10: permission notice and warranty disclaimer appear in supporting
1.5 kstailey 11: documentation, and that the name Lucent Technologies or any of
12: its entities not be used in advertising or publicity pertaining
13: to distribution of the software without specific, written prior
14: permission.
15:
16: LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17: INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18: IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19: SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20: WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21: IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22: ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23: THIS SOFTWARE.
1.1 tholo 24: ****************************************************************/
25:
1.15 millert 26: /* lasciate ogne speranza, voi ch'intrate. */
1.1 tholo 27:
28: #define DEBUG
29:
30: #include <ctype.h>
1.23 millert 31: #include <limits.h>
1.1 tholo 32: #include <stdio.h>
33: #include <string.h>
34: #include <stdlib.h>
35: #include "awk.h"
1.5 kstailey 36: #include "ytab.h"
1.1 tholo 37:
38: #define MAXLIN 22
39:
1.7 millert 40: #define type(v) (v)->nobj /* badly overloaded here */
41: #define info(v) (v)->ntype /* badly overloaded here */
1.1 tholo 42: #define left(v) (v)->narg[0]
43: #define right(v) (v)->narg[1]
44: #define parent(v) (v)->nnext
45:
46: #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
1.15 millert 47: #define ELEAF case EMPTYRE: /* empty string in regexp */
1.1 tholo 48: #define UNARY case STAR: case PLUS: case QUEST:
49:
50: /* encoding in tree Nodes:
1.15 millert 51: leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE):
1.1 tholo 52: left is index, right contains value or pointer to value
53: unary (STAR, PLUS, QUEST): left is child, right is null
54: binary (CAT, OR): left and right are children
55: parent contains pointer to parent
56: */
57:
58:
59: int *setvec;
60: int *tmpset;
61: int maxsetvec = 0;
62:
63: int rtok; /* next token in current re */
1.4 millert 64: int rlxval;
1.28 millert 65: static const uschar *rlxstr;
66: static const uschar *prestr; /* current position in current re */
67: static const uschar *lastre; /* origin of last re */
68: static const uschar *lastatom; /* origin of last Atom */
69: static const uschar *starttok;
70: static const uschar *basestr; /* starts with original, replaced during
1.23 millert 71: repetition processing */
1.28 millert 72: static const uschar *firstbasestr;
1.1 tholo 73:
1.4 millert 74: static int setcnt;
75: static int poscnt;
1.1 tholo 76:
1.28 millert 77: const char *patbeg;
1.1 tholo 78: int patlen;
79:
1.27 millert 80: #define NFA 128 /* cache this many dynamic fa's */
1.1 tholo 81: fa *fatab[NFA];
82: int nfatab = 0; /* entries in fatab */
83:
1.27 millert 84: static int *
85: intalloc(size_t n, const char *f)
86: {
87: void *p = calloc(n, sizeof(int));
88: if (p == NULL)
89: overflo(f);
90: return p;
91: }
92:
93: static void
94: allocsetvec(const char *f)
95: {
96: maxsetvec = MAXLIN;
97: setvec = reallocarray(setvec, maxsetvec, sizeof(*setvec));
98: tmpset = reallocarray(tmpset, maxsetvec, sizeof(*tmpset));
99: if (setvec == NULL || tmpset == NULL)
100: overflo(f);
101: }
102:
103: static void
104: resizesetvec(const char *f)
105: {
106: setvec = reallocarray(setvec, maxsetvec, 4 * sizeof(*setvec));
107: tmpset = reallocarray(tmpset, maxsetvec, 4 * sizeof(*tmpset));
108: if (setvec == NULL || tmpset == NULL)
109: overflo(f);
110: maxsetvec *= 4;
111: }
112:
113: static void
114: resize_state(fa *f, int state)
115: {
116: void *p;
117: int i, new_count;
118:
119: if (++state < f->state_count)
120: return;
121:
122: new_count = state + 10; /* needs to be tuned */
123:
124: p = reallocarray(f->gototab, new_count, sizeof(f->gototab[0]));
125: if (p == NULL)
126: goto out;
127: f->gototab = p;
128:
129: p = reallocarray(f->out, new_count, sizeof(f->out[0]));
130: if (p == NULL)
131: goto out;
132: f->out = p;
133:
134: p = reallocarray(f->posns, new_count, sizeof(f->posns[0]));
135: if (p == NULL)
136: goto out;
137: f->posns = p;
138:
139: for (i = f->state_count; i < new_count; ++i) {
140: f->gototab[i] = calloc(NCHARS, sizeof(**f->gototab));
141: if (f->gototab[i] == NULL)
142: goto out;
143: f->out[i] = 0;
144: f->posns[i] = NULL;
145: }
146: f->state_count = new_count;
147: return;
148: out:
149: overflo(__func__);
150: }
151:
1.29 millert 152: fa *makedfa(const char *s, bool anchor) /* returns dfa for reg expr s */
1.1 tholo 153: {
154: int i, use, nuse;
155: fa *pfa;
1.6 millert 156: static int now = 1;
1.1 tholo 157:
1.25 millert 158: if (setvec == NULL) { /* first time through any RE */
1.27 millert 159: allocsetvec(__func__);
1.1 tholo 160: }
161:
1.29 millert 162: if (compile_time != RUNNING) /* a constant for sure */
1.1 tholo 163: return mkdfa(s, anchor);
164: for (i = 0; i < nfatab; i++) /* is it there already? */
165: if (fatab[i]->anchor == anchor
1.11 millert 166: && strcmp((const char *) fatab[i]->restr, s) == 0) {
1.6 millert 167: fatab[i]->use = now++;
1.1 tholo 168: return fatab[i];
1.10 millert 169: }
1.1 tholo 170: pfa = mkdfa(s, anchor);
171: if (nfatab < NFA) { /* room for another */
172: fatab[nfatab] = pfa;
1.6 millert 173: fatab[nfatab]->use = now++;
1.1 tholo 174: nfatab++;
175: return pfa;
176: }
177: use = fatab[0]->use; /* replace least-recently used */
178: nuse = 0;
179: for (i = 1; i < nfatab; i++)
180: if (fatab[i]->use < use) {
181: use = fatab[i]->use;
182: nuse = i;
183: }
184: freefa(fatab[nuse]);
185: fatab[nuse] = pfa;
1.6 millert 186: pfa->use = now++;
1.1 tholo 187: return pfa;
188: }
189:
1.29 millert 190: fa *mkdfa(const char *s, bool anchor) /* does the real work of making a dfa */
1.30 millert 191: /* anchor = true for anchored matches, else false */
1.1 tholo 192: {
193: Node *p, *p1;
194: fa *f;
195:
1.28 millert 196: firstbasestr = (const uschar *) s;
1.23 millert 197: basestr = firstbasestr;
1.1 tholo 198: p = reparse(s);
199: p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
200: /* put ALL STAR in front of reg. exp. */
201: p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
202: /* put FINAL after reg. exp. */
203:
204: poscnt = 0;
205: penter(p1); /* enter parent pointers and leaf indices */
1.28 millert 206: if ((f = calloc(1, sizeof(fa) + poscnt * sizeof(rrow))) == NULL)
1.27 millert 207: overflo(__func__);
1.1 tholo 208: f->accept = poscnt-1; /* penter has computed number of positions in re */
209: cfoll(f, p1); /* set up follow sets */
210: freetr(p1);
1.27 millert 211: resize_state(f, 1);
212: f->posns[0] = intalloc(*(f->re[0].lfollow), __func__);
213: f->posns[1] = intalloc(1, __func__);
1.1 tholo 214: *f->posns[1] = 0;
215: f->initstat = makeinit(f, anchor);
216: f->anchor = anchor;
1.10 millert 217: f->restr = (uschar *) tostring(s);
1.23 millert 218: if (firstbasestr != basestr) {
219: if (basestr)
220: xfree(basestr);
221: }
1.1 tholo 222: return f;
223: }
224:
1.29 millert 225: int makeinit(fa *f, bool anchor)
1.1 tholo 226: {
227: int i, k;
228:
229: f->curstat = 2;
230: f->out[2] = 0;
231: k = *(f->re[0].lfollow);
1.25 millert 232: xfree(f->posns[2]);
1.27 millert 233: f->posns[2] = intalloc(k + 1, __func__);
1.30 millert 234: for (i = 0; i <= k; i++) {
1.1 tholo 235: (f->posns[2])[i] = (f->re[0].lfollow)[i];
236: }
237: if ((f->posns[2])[1] == f->accept)
238: f->out[2] = 1;
1.30 millert 239: for (i = 0; i < NCHARS; i++)
1.1 tholo 240: f->gototab[2][i] = 0;
241: f->curstat = cgoto(f, 2, HAT);
242: if (anchor) {
243: *f->posns[2] = k-1; /* leave out position 0 */
1.30 millert 244: for (i = 0; i < k; i++) {
1.1 tholo 245: (f->posns[0])[i] = (f->posns[2])[i];
246: }
247:
248: f->out[0] = f->out[2];
249: if (f->curstat != 2)
250: --(*f->posns[f->curstat]);
251: }
252: return f->curstat;
253: }
254:
255: void penter(Node *p) /* set up parent pointers and leaf indices */
256: {
257: switch (type(p)) {
1.15 millert 258: ELEAF
1.1 tholo 259: LEAF
1.7 millert 260: info(p) = poscnt;
1.1 tholo 261: poscnt++;
262: break;
263: UNARY
264: penter(left(p));
265: parent(left(p)) = p;
266: break;
267: case CAT:
268: case OR:
269: penter(left(p));
270: penter(right(p));
271: parent(left(p)) = p;
272: parent(right(p)) = p;
273: break;
1.31 millert 274: case ZERO:
275: break;
1.1 tholo 276: default: /* can't happen */
1.8 millert 277: FATAL("can't happen: unknown type %d in penter", type(p));
1.1 tholo 278: break;
279: }
280: }
281:
282: void freetr(Node *p) /* free parse tree */
283: {
284: switch (type(p)) {
1.15 millert 285: ELEAF
1.1 tholo 286: LEAF
287: xfree(p);
288: break;
289: UNARY
1.31 millert 290: case ZERO:
1.1 tholo 291: freetr(left(p));
292: xfree(p);
293: break;
294: case CAT:
295: case OR:
296: freetr(left(p));
297: freetr(right(p));
298: xfree(p);
299: break;
300: default: /* can't happen */
1.8 millert 301: FATAL("can't happen: unknown type %d in freetr", type(p));
1.1 tholo 302: break;
303: }
304: }
305:
306: /* in the parsing of regular expressions, metacharacters like . have */
307: /* to be seen literally; \056 is not a metacharacter. */
308:
1.28 millert 309: int hexstr(const uschar **pp) /* find and eval hex string at pp, return new p */
1.2 millert 310: { /* only pick up one 8-bit byte (2 chars) */
1.28 millert 311: const uschar *p;
1.1 tholo 312: int n = 0;
1.2 millert 313: int i;
1.1 tholo 314:
1.28 millert 315: for (i = 0, p = *pp; i < 2 && isxdigit(*p); i++, p++) {
1.1 tholo 316: if (isdigit(*p))
317: n = 16 * n + *p - '0';
318: else if (*p >= 'a' && *p <= 'f')
319: n = 16 * n + *p - 'a' + 10;
320: else if (*p >= 'A' && *p <= 'F')
321: n = 16 * n + *p - 'A' + 10;
322: }
1.28 millert 323: *pp = p;
1.1 tholo 324: return n;
325: }
326:
1.2 millert 327: #define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */
1.1 tholo 328:
1.28 millert 329: int quoted(const uschar **pp) /* pick up next thing after a \\ */
1.1 tholo 330: /* and increment *pp */
331: {
1.28 millert 332: const uschar *p = *pp;
1.1 tholo 333: int c;
334:
335: if ((c = *p++) == 't')
336: c = '\t';
337: else if (c == 'n')
338: c = '\n';
339: else if (c == 'f')
340: c = '\f';
341: else if (c == 'r')
342: c = '\r';
343: else if (c == 'b')
344: c = '\b';
1.25 millert 345: else if (c == 'v')
346: c = '\v';
1.20 millert 347: else if (c == 'a')
1.25 millert 348: c = '\a';
1.1 tholo 349: else if (c == '\\')
350: c = '\\';
351: else if (c == 'x') { /* hexadecimal goo follows */
352: c = hexstr(&p); /* this adds a null if number is invalid */
353: } else if (isoctdigit(c)) { /* \d \dd \ddd */
354: int n = c - '0';
355: if (isoctdigit(*p)) {
356: n = 8 * n + *p++ - '0';
357: if (isoctdigit(*p))
358: n = 8 * n + *p++ - '0';
359: }
360: c = n;
361: } /* else */
362: /* c = c; */
363: *pp = p;
364: return c;
365: }
366:
1.11 millert 367: char *cclenter(const char *argp) /* add a character class */
1.1 tholo 368: {
369: int i, c, c2;
1.28 millert 370: const uschar *op, *p = (const uschar *) argp;
371: uschar *bp;
1.25 millert 372: static uschar *buf = NULL;
1.5 kstailey 373: static int bufsz = 100;
1.1 tholo 374:
375: op = p;
1.27 millert 376: if (buf == NULL && (buf = malloc(bufsz)) == NULL)
1.8 millert 377: FATAL("out of space for character class [%.10s...] 1", p);
1.5 kstailey 378: bp = buf;
379: for (i = 0; (c = *p++) != 0; ) {
1.1 tholo 380: if (c == '\\') {
1.17 millert 381: c = quoted(&p);
1.5 kstailey 382: } else if (c == '-' && i > 0 && bp[-1] != 0) {
1.1 tholo 383: if (*p != 0) {
1.5 kstailey 384: c = bp[-1];
1.1 tholo 385: c2 = *p++;
386: if (c2 == '\\')
1.17 millert 387: c2 = quoted(&p);
1.1 tholo 388: if (c > c2) { /* empty; ignore */
1.5 kstailey 389: bp--;
1.1 tholo 390: i--;
391: continue;
392: }
393: while (c < c2) {
1.15 millert 394: if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter1"))
1.8 millert 395: FATAL("out of space for character class [%.10s...] 2", p);
1.5 kstailey 396: *bp++ = ++c;
1.1 tholo 397: i++;
398: }
399: continue;
400: }
401: }
1.15 millert 402: if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter2"))
1.8 millert 403: FATAL("out of space for character class [%.10s...] 3", p);
1.5 kstailey 404: *bp++ = c;
1.1 tholo 405: i++;
406: }
1.5 kstailey 407: *bp = 0;
1.32 ! millert 408: DPRINTF("cclenter: in = |%s|, out = |%s|\n", op, buf);
1.1 tholo 409: xfree(op);
1.10 millert 410: return (char *) tostring((char *) buf);
1.1 tholo 411: }
412:
1.11 millert 413: void overflo(const char *s)
1.1 tholo 414: {
1.27 millert 415: FATAL("regular expression too big: out of space in %.30s...", s);
1.1 tholo 416: }
417:
418: void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */
419: {
420: int i;
1.4 millert 421: int *p;
1.1 tholo 422:
423: switch (type(v)) {
1.15 millert 424: ELEAF
1.1 tholo 425: LEAF
1.7 millert 426: f->re[info(v)].ltype = type(v);
427: f->re[info(v)].lval.np = right(v);
1.1 tholo 428: while (f->accept >= maxsetvec) { /* guessing here! */
1.27 millert 429: resizesetvec(__func__);
1.1 tholo 430: }
431: for (i = 0; i <= f->accept; i++)
432: setvec[i] = 0;
433: setcnt = 0;
434: follow(v); /* computes setvec and setcnt */
1.27 millert 435: p = intalloc(setcnt + 1, __func__);
1.7 millert 436: f->re[info(v)].lfollow = p;
1.1 tholo 437: *p = setcnt;
438: for (i = f->accept; i >= 0; i--)
439: if (setvec[i] == 1)
440: *++p = i;
441: break;
442: UNARY
443: cfoll(f,left(v));
444: break;
445: case CAT:
446: case OR:
447: cfoll(f,left(v));
448: cfoll(f,right(v));
449: break;
1.31 millert 450: case ZERO:
451: break;
1.1 tholo 452: default: /* can't happen */
1.8 millert 453: FATAL("can't happen: unknown type %d in cfoll", type(v));
1.1 tholo 454: }
455: }
456:
457: int first(Node *p) /* collects initially active leaves of p into setvec */
1.15 millert 458: /* returns 0 if p matches empty string */
1.1 tholo 459: {
1.4 millert 460: int b, lp;
1.1 tholo 461:
462: switch (type(p)) {
1.15 millert 463: ELEAF
1.1 tholo 464: LEAF
1.7 millert 465: lp = info(p); /* look for high-water mark of subscripts */
1.1 tholo 466: while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */
1.27 millert 467: resizesetvec(__func__);
1.1 tholo 468: }
1.15 millert 469: if (type(p) == EMPTYRE) {
470: setvec[lp] = 0;
471: return(0);
472: }
1.1 tholo 473: if (setvec[lp] != 1) {
474: setvec[lp] = 1;
475: setcnt++;
476: }
477: if (type(p) == CCL && (*(char *) right(p)) == '\0')
478: return(0); /* empty CCL */
1.30 millert 479: return(1);
1.1 tholo 480: case PLUS:
1.30 millert 481: if (first(left(p)) == 0)
482: return(0);
1.1 tholo 483: return(1);
484: case STAR:
485: case QUEST:
486: first(left(p));
487: return(0);
488: case CAT:
489: if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
490: return(1);
491: case OR:
492: b = first(right(p));
493: if (first(left(p)) == 0 || b == 0) return(0);
494: return(1);
1.31 millert 495: case ZERO:
496: return 0;
1.1 tholo 497: }
1.8 millert 498: FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */
1.1 tholo 499: return(-1);
500: }
501:
502: void follow(Node *v) /* collects leaves that can follow v into setvec */
503: {
504: Node *p;
505:
506: if (type(v) == FINAL)
507: return;
508: p = parent(v);
509: switch (type(p)) {
510: case STAR:
511: case PLUS:
512: first(v);
513: follow(p);
514: return;
515:
516: case OR:
517: case QUEST:
518: follow(p);
519: return;
520:
521: case CAT:
522: if (v == left(p)) { /* v is left child of p */
523: if (first(right(p)) == 0) {
524: follow(p);
525: return;
526: }
527: } else /* v is right child */
528: follow(p);
529: return;
530: }
531: }
532:
1.11 millert 533: int member(int c, const char *sarg) /* is c in s? */
1.1 tholo 534: {
1.28 millert 535: const uschar *s = (const uschar *) sarg;
1.10 millert 536:
1.1 tholo 537: while (*s)
538: if (c == *s++)
539: return(1);
540: return(0);
541: }
542:
1.11 millert 543: int match(fa *f, const char *p0) /* shortest match ? */
1.1 tholo 544: {
545: int s, ns;
1.28 millert 546: const uschar *p = (const uschar *) p0;
1.1 tholo 547:
1.27 millert 548: s = f->initstat;
549: assert (s < f->state_count);
550:
1.1 tholo 551: if (f->out[s])
552: return(1);
553: do {
1.15 millert 554: /* assert(*p < NCHARS); */
1.1 tholo 555: if ((ns = f->gototab[s][*p]) != 0)
556: s = ns;
557: else
558: s = cgoto(f, s, *p);
559: if (f->out[s])
560: return(1);
561: } while (*p++ != 0);
562: return(0);
563: }
564:
1.11 millert 565: int pmatch(fa *f, const char *p0) /* longest match, for sub */
1.1 tholo 566: {
567: int s, ns;
1.28 millert 568: const uschar *p = (const uschar *) p0;
569: const uschar *q;
1.1 tholo 570:
1.27 millert 571: s = f->initstat;
572: assert(s < f->state_count);
573:
1.28 millert 574: patbeg = (const char *)p;
1.1 tholo 575: patlen = -1;
576: do {
577: q = p;
578: do {
579: if (f->out[s]) /* final state */
580: patlen = q-p;
1.15 millert 581: /* assert(*q < NCHARS); */
1.1 tholo 582: if ((ns = f->gototab[s][*q]) != 0)
583: s = ns;
584: else
585: s = cgoto(f, s, *q);
1.27 millert 586:
587: assert(s < f->state_count);
588:
1.9 deraadt 589: if (s == 1) { /* no transition */
1.1 tholo 590: if (patlen >= 0) {
1.28 millert 591: patbeg = (const char *) p;
1.1 tholo 592: return(1);
593: }
594: else
595: goto nextin; /* no match */
1.9 deraadt 596: }
1.1 tholo 597: } while (*q++ != 0);
598: if (f->out[s])
599: patlen = q-p-1; /* don't count $ */
600: if (patlen >= 0) {
1.28 millert 601: patbeg = (const char *) p;
1.1 tholo 602: return(1);
603: }
604: nextin:
605: s = 2;
1.27 millert 606: } while (*p++);
1.1 tholo 607: return (0);
608: }
609:
1.11 millert 610: int nematch(fa *f, const char *p0) /* non-empty match, for sub */
1.1 tholo 611: {
612: int s, ns;
1.28 millert 613: const uschar *p = (const uschar *) p0;
614: const uschar *q;
1.1 tholo 615:
1.27 millert 616: s = f->initstat;
617: assert(s < f->state_count);
618:
1.28 millert 619: patbeg = (const char *)p;
1.1 tholo 620: patlen = -1;
621: while (*p) {
622: q = p;
623: do {
624: if (f->out[s]) /* final state */
625: patlen = q-p;
1.15 millert 626: /* assert(*q < NCHARS); */
1.1 tholo 627: if ((ns = f->gototab[s][*q]) != 0)
628: s = ns;
629: else
630: s = cgoto(f, s, *q);
1.9 deraadt 631: if (s == 1) { /* no transition */
1.1 tholo 632: if (patlen > 0) {
1.28 millert 633: patbeg = (const char *) p;
1.1 tholo 634: return(1);
635: } else
636: goto nnextin; /* no nonempty match */
1.9 deraadt 637: }
1.1 tholo 638: } while (*q++ != 0);
639: if (f->out[s])
640: patlen = q-p-1; /* don't count $ */
641: if (patlen > 0 ) {
1.28 millert 642: patbeg = (const char *) p;
1.1 tholo 643: return(1);
644: }
645: nnextin:
646: s = 2;
647: p++;
648: }
649: return (0);
650: }
651:
1.26 millert 652:
653: /*
654: * NAME
655: * fnematch
656: *
657: * DESCRIPTION
658: * A stream-fed version of nematch which transfers characters to a
659: * null-terminated buffer. All characters up to and including the last
660: * character of the matching text or EOF are placed in the buffer. If
661: * a match is found, patbeg and patlen are set appropriately.
662: *
663: * RETURN VALUES
1.29 millert 664: * false No match found.
665: * true Match found.
1.26 millert 666: */
667:
1.29 millert 668: bool fnematch(fa *pfa, FILE *f, char **pbuf, int *pbufsize, int quantum)
1.26 millert 669: {
670: char *buf = *pbuf;
671: int bufsize = *pbufsize;
672: int c, i, j, k, ns, s;
673:
674: s = pfa->initstat;
675: patlen = 0;
676:
677: /*
678: * All indices relative to buf.
679: * i <= j <= k <= bufsize
680: *
681: * i: origin of active substring
682: * j: current character
683: * k: destination of next getc()
684: */
685: i = -1, k = 0;
686: do {
687: j = i++;
688: do {
689: if (++j == k) {
690: if (k == bufsize)
691: if (!adjbuf(&buf, &bufsize, bufsize+1, quantum, 0, "fnematch"))
692: FATAL("stream '%.30s...' too long", buf);
693: buf[k++] = (c = getc(f)) != EOF ? c : 0;
694: }
695: c = buf[j];
696: /* assert(c < NCHARS); */
697:
698: if ((ns = pfa->gototab[s][c]) != 0)
699: s = ns;
700: else
701: s = cgoto(pfa, s, c);
702:
703: if (pfa->out[s]) { /* final state */
704: patlen = j - i + 1;
705: if (c == 0) /* don't count $ */
706: patlen--;
707: }
708: } while (buf[j] && s != 1);
709: s = 2;
710: } while (buf[i] && !patlen);
711:
712: /* adjbuf() may have relocated a resized buffer. Inform the world. */
713: *pbuf = buf;
714: *pbufsize = bufsize;
715:
716: if (patlen) {
717: patbeg = buf + i;
718: /*
719: * Under no circumstances is the last character fed to
720: * the automaton part of the match. It is EOF's nullbyte,
721: * or it sent the automaton into a state with no further
722: * transitions available (s==1), or both. Room for a
723: * terminating nullbyte is guaranteed.
724: *
725: * ungetc any chars after the end of matching text
726: * (except for EOF's nullbyte, if present) and null
727: * terminate the buffer.
728: */
729: do
730: if (buf[--k] && ungetc(buf[k], f) == EOF)
731: FATAL("unable to ungetc '%c'", buf[k]);
732: while (k > i + patlen);
1.30 millert 733: buf[k] = '\0';
1.29 millert 734: return true;
1.26 millert 735: }
736: else
1.29 millert 737: return false;
1.26 millert 738: }
739:
1.11 millert 740: Node *reparse(const char *p) /* parses regular expression pointed to by p */
1.1 tholo 741: { /* uses relex() to scan regular expression */
742: Node *np;
743:
1.32 ! millert 744: DPRINTF("reparse <%s>\n", p);
1.28 millert 745: lastre = prestr = (const uschar *) p; /* prestr points to string to be parsed */
1.1 tholo 746: rtok = relex();
1.11 millert 747: /* GNU compatibility: an empty regexp matches anything */
1.15 millert 748: if (rtok == '\0') {
1.11 millert 749: /* FATAL("empty regular expression"); previous */
1.15 millert 750: return(op2(EMPTYRE, NIL, NIL));
751: }
1.1 tholo 752: np = regexp();
753: if (rtok != '\0')
1.8 millert 754: FATAL("syntax error in regular expression %s at %s", lastre, prestr);
1.1 tholo 755: return(np);
756: }
757:
758: Node *regexp(void) /* top-level parse of reg expr */
759: {
760: return (alt(concat(primary())));
761: }
762:
763: Node *primary(void)
764: {
765: Node *np;
1.23 millert 766: int savelastatom;
1.1 tholo 767:
768: switch (rtok) {
769: case CHAR:
1.23 millert 770: lastatom = starttok;
1.7 millert 771: np = op2(CHAR, NIL, itonp(rlxval));
1.1 tholo 772: rtok = relex();
773: return (unary(np));
774: case ALL:
775: rtok = relex();
776: return (unary(op2(ALL, NIL, NIL)));
1.15 millert 777: case EMPTYRE:
778: rtok = relex();
1.23 millert 779: return (unary(op2(EMPTYRE, NIL, NIL)));
1.1 tholo 780: case DOT:
1.23 millert 781: lastatom = starttok;
1.1 tholo 782: rtok = relex();
783: return (unary(op2(DOT, NIL, NIL)));
784: case CCL:
1.28 millert 785: np = op2(CCL, NIL, (Node*) cclenter((const char *) rlxstr));
1.23 millert 786: lastatom = starttok;
1.1 tholo 787: rtok = relex();
788: return (unary(np));
789: case NCCL:
1.28 millert 790: np = op2(NCCL, NIL, (Node *) cclenter((const char *) rlxstr));
1.23 millert 791: lastatom = starttok;
1.1 tholo 792: rtok = relex();
793: return (unary(np));
794: case '^':
795: rtok = relex();
1.7 millert 796: return (unary(op2(CHAR, NIL, itonp(HAT))));
1.1 tholo 797: case '$':
798: rtok = relex();
799: return (unary(op2(CHAR, NIL, NIL)));
800: case '(':
1.23 millert 801: lastatom = starttok;
802: savelastatom = starttok - basestr; /* Retain over recursion */
1.1 tholo 803: rtok = relex();
804: if (rtok == ')') { /* special pleading for () */
805: rtok = relex();
806: return unary(op2(CCL, NIL, (Node *) tostring("")));
807: }
808: np = regexp();
809: if (rtok == ')') {
1.23 millert 810: lastatom = basestr + savelastatom; /* Restore */
1.1 tholo 811: rtok = relex();
812: return (unary(np));
813: }
814: else
1.8 millert 815: FATAL("syntax error in regular expression %s at %s", lastre, prestr);
1.1 tholo 816: default:
1.8 millert 817: FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
1.1 tholo 818: }
819: return 0; /*NOTREACHED*/
820: }
821:
822: Node *concat(Node *np)
823: {
824: switch (rtok) {
1.23 millert 825: case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
1.1 tholo 826: return (concat(op2(CAT, np, primary())));
1.23 millert 827: case EMPTYRE:
828: rtok = relex();
829: return (concat(op2(CAT, op2(CCL, NIL, (Node *) tostring("")),
830: primary())));
1.1 tholo 831: }
832: return (np);
833: }
834:
835: Node *alt(Node *np)
836: {
837: if (rtok == OR) {
838: rtok = relex();
839: return (alt(op2(OR, np, concat(primary()))));
840: }
841: return (np);
842: }
843:
844: Node *unary(Node *np)
845: {
846: switch (rtok) {
847: case STAR:
848: rtok = relex();
849: return (unary(op2(STAR, np, NIL)));
850: case PLUS:
851: rtok = relex();
852: return (unary(op2(PLUS, np, NIL)));
853: case QUEST:
854: rtok = relex();
855: return (unary(op2(QUEST, np, NIL)));
1.31 millert 856: case ZERO:
857: rtok = relex();
858: return (unary(op2(ZERO, np, NIL)));
1.1 tholo 859: default:
860: return (np);
861: }
862: }
863:
1.11 millert 864: /*
865: * Character class definitions conformant to the POSIX locale as
866: * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
867: * and operating character sets are both ASCII (ISO646) or supersets
868: * thereof.
869: *
870: * Note that to avoid overflowing the temporary buffer used in
871: * relex(), the expanded character class (prior to range expansion)
872: * must be less than twice the size of their full name.
873: */
1.12 millert 874:
875: /* Because isblank doesn't show up in any of the header files on any
876: * system i use, it's defined here. if some other locale has a richer
877: * definition of "blank", define HAS_ISBLANK and provide your own
878: * version.
879: * the parentheses here are an attempt to find a path through the maze
880: * of macro definition and/or function and/or version provided. thanks
881: * to nelson beebe for the suggestion; let's see if it works everywhere.
882: */
883:
884: #ifndef HAS_ISBLANK
885:
1.17 millert 886: int (xisblank)(int c)
1.12 millert 887: {
888: return c==' ' || c=='\t';
889: }
890:
891: #endif
892:
1.31 millert 893: static const struct charclass {
1.11 millert 894: const char *cc_name;
895: int cc_namelen;
1.12 millert 896: int (*cc_func)(int);
1.11 millert 897: } charclasses[] = {
1.12 millert 898: { "alnum", 5, isalnum },
899: { "alpha", 5, isalpha },
1.17 millert 900: #ifndef HAS_ISBLANK
1.21 millert 901: { "blank", 5, xisblank },
1.17 millert 902: #else
1.12 millert 903: { "blank", 5, isblank },
1.17 millert 904: #endif
1.12 millert 905: { "cntrl", 5, iscntrl },
906: { "digit", 5, isdigit },
907: { "graph", 5, isgraph },
908: { "lower", 5, islower },
909: { "print", 5, isprint },
910: { "punct", 5, ispunct },
911: { "space", 5, isspace },
912: { "upper", 5, isupper },
913: { "xdigit", 6, isxdigit },
1.11 millert 914: { NULL, 0, NULL },
915: };
916:
1.23 millert 917: #define REPEAT_SIMPLE 0
918: #define REPEAT_PLUS_APPENDED 1
919: #define REPEAT_WITH_Q 2
920: #define REPEAT_ZERO 3
921:
922: static int
923: replace_repeat(const uschar *reptok, int reptoklen, const uschar *atom,
924: int atomlen, int firstnum, int secondnum, int special_case)
925: {
926: int i, j;
1.26 millert 927: uschar *buf = NULL;
1.23 millert 928: int ret = 1;
1.31 millert 929: int init_q = (firstnum == 0); /* first added char will be ? */
1.23 millert 930: int n_q_reps = secondnum-firstnum; /* m>n, so reduce until {1,m-n} left */
931: int prefix_length = reptok - basestr; /* prefix includes first rep */
1.28 millert 932: int suffix_length = strlen((const char *) reptok) - reptoklen; /* string after rep specifier */
1.23 millert 933: int size = prefix_length + suffix_length;
934:
935: if (firstnum > 1) { /* add room for reps 2 through firstnum */
936: size += atomlen*(firstnum-1);
937: }
938:
939: /* Adjust size of buffer for special cases */
940: if (special_case == REPEAT_PLUS_APPENDED) {
941: size++; /* for the final + */
942: } else if (special_case == REPEAT_WITH_Q) {
943: size += init_q + (atomlen+1)* n_q_reps;
944: } else if (special_case == REPEAT_ZERO) {
945: size += 2; /* just a null ERE: () */
946: }
1.27 millert 947: if ((buf = malloc(size + 1)) == NULL)
1.23 millert 948: FATAL("out of space in reg expr %.10s..", lastre);
949: memcpy(buf, basestr, prefix_length); /* copy prefix */
950: j = prefix_length;
951: if (special_case == REPEAT_ZERO) {
952: j -= atomlen;
953: buf[j++] = '(';
954: buf[j++] = ')';
955: }
1.30 millert 956: for (i = 1; i < firstnum; i++) { /* copy x reps */
1.23 millert 957: memcpy(&buf[j], atom, atomlen);
958: j += atomlen;
959: }
960: if (special_case == REPEAT_PLUS_APPENDED) {
961: buf[j++] = '+';
962: } else if (special_case == REPEAT_WITH_Q) {
1.29 millert 963: if (init_q)
964: buf[j++] = '?';
1.30 millert 965: for (i = init_q; i < n_q_reps; i++) { /* copy x? reps */
1.23 millert 966: memcpy(&buf[j], atom, atomlen);
967: j += atomlen;
968: buf[j++] = '?';
969: }
970: }
971: memcpy(&buf[j], reptok+reptoklen, suffix_length);
972: if (special_case == REPEAT_ZERO) {
973: buf[j+suffix_length] = '\0';
974: } else {
975: buf[size] = '\0';
976: }
977: /* free old basestr */
978: if (firstbasestr != basestr) {
979: if (basestr)
980: xfree(basestr);
981: }
982: basestr = buf;
983: prestr = buf + prefix_length;
984: if (special_case == REPEAT_ZERO) {
985: prestr -= atomlen;
986: ret++;
987: }
988: return ret;
989: }
990:
991: static int repeat(const uschar *reptok, int reptoklen, const uschar *atom,
992: int atomlen, int firstnum, int secondnum)
993: {
994: /*
995: In general, the repetition specifier or "bound" is replaced here
996: by an equivalent ERE string, repeating the immediately previous atom
997: and appending ? and + as needed. Note that the first copy of the
998: atom is left in place, except in the special_case of a zero-repeat
999: (i.e., {0}).
1000: */
1001: if (secondnum < 0) { /* means {n,} -> repeat n-1 times followed by PLUS */
1002: if (firstnum < 2) {
1003: /* 0 or 1: should be handled before you get here */
1004: FATAL("internal error");
1005: } else {
1006: return replace_repeat(reptok, reptoklen, atom, atomlen,
1007: firstnum, secondnum, REPEAT_PLUS_APPENDED);
1008: }
1009: } else if (firstnum == secondnum) { /* {n} or {n,n} -> simply repeat n-1 times */
1010: if (firstnum == 0) { /* {0} or {0,0} */
1011: /* This case is unusual because the resulting
1012: replacement string might actually be SMALLER than
1013: the original ERE */
1014: return replace_repeat(reptok, reptoklen, atom, atomlen,
1015: firstnum, secondnum, REPEAT_ZERO);
1016: } else { /* (firstnum >= 1) */
1017: return replace_repeat(reptok, reptoklen, atom, atomlen,
1018: firstnum, secondnum, REPEAT_SIMPLE);
1019: }
1020: } else if (firstnum < secondnum) { /* {n,m} -> repeat n-1 times then alternate */
1021: /* x{n,m} => xx...x{1, m-n+1} => xx...x?x?x?..x? */
1022: return replace_repeat(reptok, reptoklen, atom, atomlen,
1023: firstnum, secondnum, REPEAT_WITH_Q);
1024: } else { /* Error - shouldn't be here (n>m) */
1025: FATAL("internal error");
1026: }
1027: return 0;
1028: }
1.11 millert 1029:
1.1 tholo 1030: int relex(void) /* lexical analyzer for reparse */
1031: {
1.5 kstailey 1032: int c, n;
1.1 tholo 1033: int cflag;
1.25 millert 1034: static uschar *buf = NULL;
1.5 kstailey 1035: static int bufsz = 100;
1.10 millert 1036: uschar *bp;
1.31 millert 1037: const struct charclass *cc;
1.12 millert 1038: int i;
1.29 millert 1039: int num, m;
1040: bool commafound, digitfound;
1.23 millert 1041: const uschar *startreptok;
1.24 millert 1042: static int parens = 0;
1.23 millert 1043:
1044: rescan:
1045: starttok = prestr;
1.1 tholo 1046:
1047: switch (c = *prestr++) {
1048: case '|': return OR;
1049: case '*': return STAR;
1050: case '+': return PLUS;
1051: case '?': return QUEST;
1052: case '.': return DOT;
1053: case '\0': prestr--; return '\0';
1054: case '^':
1055: case '$':
1.24 millert 1056: return c;
1.1 tholo 1057: case '(':
1.24 millert 1058: parens++;
1059: return c;
1.1 tholo 1060: case ')':
1.24 millert 1061: if (parens) {
1062: parens--;
1063: return c;
1064: }
1065: /* unmatched close parenthesis; per POSIX, treat as literal */
1066: rlxval = c;
1067: return CHAR;
1.1 tholo 1068: case '\\':
1.17 millert 1069: rlxval = quoted(&prestr);
1.1 tholo 1070: return CHAR;
1071: default:
1072: rlxval = c;
1073: return CHAR;
1.25 millert 1074: case '[':
1.27 millert 1075: if (buf == NULL && (buf = malloc(bufsz)) == NULL)
1.8 millert 1076: FATAL("out of space in reg expr %.10s..", lastre);
1.5 kstailey 1077: bp = buf;
1.1 tholo 1078: if (*prestr == '^') {
1079: cflag = 1;
1080: prestr++;
1081: }
1082: else
1083: cflag = 0;
1.11 millert 1084: n = 2 * strlen((const char *) prestr)+1;
1.15 millert 1085: if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1"))
1.8 millert 1086: FATAL("out of space for reg expr %.10s...", lastre);
1.1 tholo 1087: for (; ; ) {
1088: if ((c = *prestr++) == '\\') {
1.5 kstailey 1089: *bp++ = '\\';
1.1 tholo 1090: if ((c = *prestr++) == '\0')
1.8 millert 1091: FATAL("nonterminated character class %.20s...", lastre);
1.5 kstailey 1092: *bp++ = c;
1.10 millert 1093: /* } else if (c == '\n') { */
1094: /* FATAL("newline in character class %.20s...", lastre); */
1.11 millert 1095: } else if (c == '[' && *prestr == ':') {
1096: /* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
1097: for (cc = charclasses; cc->cc_name; cc++)
1098: if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
1099: break;
1100: if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
1101: prestr[2 + cc->cc_namelen] == ']') {
1102: prestr += cc->cc_namelen + 3;
1.22 millert 1103: /*
1104: * BUG: We begin at 1, instead of 0, since we
1105: * would otherwise prematurely terminate the
1106: * string for classes like [[:cntrl:]]. This
1107: * means that we can't match the NUL character,
1108: * not without first adapting the entire
1109: * program to track each string's length.
1110: */
1.23 millert 1111: for (i = 1; i <= UCHAR_MAX; i++) {
1.15 millert 1112: if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2"))
1.12 millert 1113: FATAL("out of space for reg expr %.10s...", lastre);
1114: if (cc->cc_func(i)) {
1.32 ! millert 1115: /* escape backslash */
! 1116: if (i == '\\') {
! 1117: *bp++ = '\\';
! 1118: n++;
! 1119: }
! 1120:
1.12 millert 1121: *bp++ = i;
1122: n++;
1123: }
1124: }
1.11 millert 1125: } else
1126: *bp++ = c;
1.23 millert 1127: } else if (c == '[' && *prestr == '.') {
1128: char collate_char;
1129: prestr++;
1130: collate_char = *prestr++;
1131: if (*prestr == '.' && prestr[1] == ']') {
1132: prestr += 2;
1133: /* Found it: map via locale TBD: for
1134: now, simply return this char. This
1135: is sufficient to pass conformance
1136: test awk.ex 156
1137: */
1138: if (*prestr == ']') {
1139: prestr++;
1140: rlxval = collate_char;
1141: return CHAR;
1142: }
1143: }
1144: } else if (c == '[' && *prestr == '=') {
1145: char equiv_char;
1146: prestr++;
1147: equiv_char = *prestr++;
1148: if (*prestr == '=' && prestr[1] == ']') {
1149: prestr += 2;
1150: /* Found it: map via locale TBD: for now
1151: simply return this char. This is
1152: sufficient to pass conformance test
1153: awk.ex 156
1154: */
1155: if (*prestr == ']') {
1156: prestr++;
1157: rlxval = equiv_char;
1158: return CHAR;
1159: }
1160: }
1.1 tholo 1161: } else if (c == '\0') {
1.8 millert 1162: FATAL("nonterminated character class %.20s", lastre);
1.5 kstailey 1163: } else if (bp == buf) { /* 1st char is special */
1164: *bp++ = c;
1.1 tholo 1165: } else if (c == ']') {
1.5 kstailey 1166: *bp++ = 0;
1.10 millert 1167: rlxstr = (uschar *) tostring((char *) buf);
1.1 tholo 1168: if (cflag == 0)
1169: return CCL;
1170: else
1171: return NCCL;
1172: } else
1.5 kstailey 1173: *bp++ = c;
1.1 tholo 1174: }
1.23 millert 1175: break;
1176: case '{':
1177: if (isdigit(*(prestr))) {
1178: num = 0; /* Process as a repetition */
1179: n = -1; m = -1;
1.29 millert 1180: commafound = false;
1181: digitfound = false;
1.23 millert 1182: startreptok = prestr-1;
1183: /* Remember start of previous atom here ? */
1184: } else { /* just a { char, not a repetition */
1185: rlxval = c;
1186: return CHAR;
1187: }
1188: for (; ; ) {
1189: if ((c = *prestr++) == '}') {
1190: if (commafound) {
1191: if (digitfound) { /* {n,m} */
1192: m = num;
1.30 millert 1193: if (m < n)
1.23 millert 1194: FATAL("illegal repetition expression: class %.20s",
1195: lastre);
1.30 millert 1196: if (n == 0 && m == 1) {
1.23 millert 1197: return QUEST;
1198: }
1199: } else { /* {n,} */
1.30 millert 1200: if (n == 0)
1201: return STAR;
1202: else if (n == 1)
1203: return PLUS;
1.23 millert 1204: }
1205: } else {
1206: if (digitfound) { /* {n} same as {n,n} */
1207: n = num;
1208: m = num;
1209: } else { /* {} */
1210: FATAL("illegal repetition expression: class %.20s",
1211: lastre);
1212: }
1213: }
1214: if (repeat(starttok, prestr-starttok, lastatom,
1215: startreptok - lastatom, n, m) > 0) {
1.30 millert 1216: if (n == 0 && m == 0) {
1.31 millert 1217: return ZERO;
1.23 millert 1218: }
1219: /* must rescan input for next token */
1220: goto rescan;
1221: }
1222: /* Failed to replace: eat up {...} characters
1223: and treat like just PLUS */
1224: return PLUS;
1225: } else if (c == '\0') {
1226: FATAL("nonterminated character class %.20s",
1227: lastre);
1228: } else if (isdigit(c)) {
1229: num = 10 * num + c - '0';
1.29 millert 1230: digitfound = true;
1.23 millert 1231: } else if (c == ',') {
1232: if (commafound)
1233: FATAL("illegal repetition expression: class %.20s",
1234: lastre);
1235: /* looking for {n,} or {n,m} */
1.29 millert 1236: commafound = true;
1.23 millert 1237: n = num;
1.29 millert 1238: digitfound = false; /* reset */
1.23 millert 1239: num = 0;
1240: } else {
1241: FATAL("illegal repetition expression: class %.20s",
1242: lastre);
1243: }
1244: }
1245: break;
1.1 tholo 1246: }
1247: }
1248:
1249: int cgoto(fa *f, int s, int c)
1250: {
1.27 millert 1251: int *p, *q;
1.1 tholo 1252: int i, j, k;
1253:
1.12 millert 1254: assert(c == HAT || c < NCHARS);
1.1 tholo 1255: while (f->accept >= maxsetvec) { /* guessing here! */
1.27 millert 1256: resizesetvec(__func__);
1.1 tholo 1257: }
1258: for (i = 0; i <= f->accept; i++)
1259: setvec[i] = 0;
1260: setcnt = 0;
1.27 millert 1261: resize_state(f, s);
1.1 tholo 1262: /* compute positions of gototab[s,c] into setvec */
1263: p = f->posns[s];
1264: for (i = 1; i <= *p; i++) {
1265: if ((k = f->re[p[i]].ltype) != FINAL) {
1.7 millert 1266: if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
1.1 tholo 1267: || (k == DOT && c != 0 && c != HAT)
1268: || (k == ALL && c != 0)
1.15 millert 1269: || (k == EMPTYRE && c != 0)
1.10 millert 1270: || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
1271: || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
1.1 tholo 1272: q = f->re[p[i]].lfollow;
1273: for (j = 1; j <= *q; j++) {
1274: if (q[j] >= maxsetvec) {
1.27 millert 1275: resizesetvec(__func__);
1.1 tholo 1276: }
1277: if (setvec[q[j]] == 0) {
1278: setcnt++;
1279: setvec[q[j]] = 1;
1280: }
1281: }
1282: }
1283: }
1284: }
1285: /* determine if setvec is a previous state */
1286: tmpset[0] = setcnt;
1287: j = 1;
1288: for (i = f->accept; i >= 0; i--)
1289: if (setvec[i]) {
1290: tmpset[j++] = i;
1291: }
1.27 millert 1292: resize_state(f, f->curstat > s ? f->curstat : s);
1.1 tholo 1293: /* tmpset == previous state? */
1294: for (i = 1; i <= f->curstat; i++) {
1295: p = f->posns[i];
1296: if ((k = tmpset[0]) != p[0])
1297: goto different;
1298: for (j = 1; j <= k; j++)
1299: if (tmpset[j] != p[j])
1300: goto different;
1301: /* setvec is state i */
1.30 millert 1302: if (c != HAT)
1303: f->gototab[s][c] = i;
1.1 tholo 1304: return i;
1305: different:;
1306: }
1307:
1308: /* add tmpset to current set of states */
1.27 millert 1309: ++(f->curstat);
1310: resize_state(f, f->curstat);
1.1 tholo 1311: for (i = 0; i < NCHARS; i++)
1312: f->gototab[f->curstat][i] = 0;
1313: xfree(f->posns[f->curstat]);
1.27 millert 1314: p = intalloc(setcnt + 1, __func__);
1.1 tholo 1315:
1316: f->posns[f->curstat] = p;
1.30 millert 1317: if (c != HAT)
1318: f->gototab[s][c] = f->curstat;
1.1 tholo 1319: for (i = 0; i <= setcnt; i++)
1320: p[i] = tmpset[i];
1321: if (setvec[f->accept])
1322: f->out[f->curstat] = 1;
1323: else
1324: f->out[f->curstat] = 0;
1325: return f->curstat;
1326: }
1327:
1328:
1329: void freefa(fa *f) /* free a finite automaton */
1330: {
1331: int i;
1332:
1333: if (f == NULL)
1334: return;
1.27 millert 1335: for (i = 0; i < f->state_count; i++)
1336: xfree(f->gototab[i])
1.1 tholo 1337: for (i = 0; i <= f->curstat; i++)
1338: xfree(f->posns[i]);
1339: for (i = 0; i <= f->accept; i++) {
1340: xfree(f->re[i].lfollow);
1341: if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
1.30 millert 1342: xfree(f->re[i].lval.np);
1.1 tholo 1343: }
1344: xfree(f->restr);
1.27 millert 1345: xfree(f->out);
1346: xfree(f->posns);
1347: xfree(f->gototab);
1.1 tholo 1348: xfree(f);
1349: }