Annotation of src/usr.bin/awk/b.c, Revision 1.10
1.10 ! millert 1: /* $OpenBSD: b.c,v 1.9 2001/07/12 05:16:53 deraadt Exp $ */
1.1 tholo 2: /****************************************************************
1.5 kstailey 3: Copyright (C) Lucent Technologies 1997
1.1 tholo 4: All Rights Reserved
5:
6: Permission to use, copy, modify, and distribute this software and
7: its documentation for any purpose and without fee is hereby
8: granted, provided that the above copyright notice appear in all
9: copies and that both that the copyright notice and this
10: permission notice and warranty disclaimer appear in supporting
1.5 kstailey 11: documentation, and that the name Lucent Technologies or any of
12: its entities not be used in advertising or publicity pertaining
13: to distribution of the software without specific, written prior
14: permission.
15:
16: LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17: INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18: IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19: SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20: WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21: IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22: ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23: THIS SOFTWARE.
1.1 tholo 24: ****************************************************************/
25:
26: /* lasciate ogne speranza, voi ch'entrate. */
27:
28: #define DEBUG
29:
30: #include <ctype.h>
31: #include <stdio.h>
32: #include <string.h>
33: #include <stdlib.h>
34: #include "awk.h"
1.5 kstailey 35: #include "ytab.h"
1.1 tholo 36:
1.10 ! millert 37: #define HAT (NCHARS-2) /* matches ^ in regular expr */
1.1 tholo 38: /* NCHARS is 2**n */
39: #define MAXLIN 22
40:
1.7 millert 41: #define type(v) (v)->nobj /* badly overloaded here */
42: #define info(v) (v)->ntype /* badly overloaded here */
1.1 tholo 43: #define left(v) (v)->narg[0]
44: #define right(v) (v)->narg[1]
45: #define parent(v) (v)->nnext
46:
47: #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
48: #define UNARY case STAR: case PLUS: case QUEST:
49:
50: /* encoding in tree Nodes:
51: leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
52: left is index, right contains value or pointer to value
53: unary (STAR, PLUS, QUEST): left is child, right is null
54: binary (CAT, OR): left and right are children
55: parent contains pointer to parent
56: */
57:
58:
59: int *setvec;
60: int *tmpset;
61: int maxsetvec = 0;
62:
63: int rtok; /* next token in current re */
1.4 millert 64: int rlxval;
1.10 ! millert 65: static uschar *rlxstr;
! 66: static uschar *prestr; /* current position in current re */
! 67: static uschar *lastre; /* origin of last re */
1.1 tholo 68:
1.4 millert 69: static int setcnt;
70: static int poscnt;
1.1 tholo 71:
72: char *patbeg;
73: int patlen;
74:
75: #define NFA 20 /* cache this many dynamic fa's */
76: fa *fatab[NFA];
77: int nfatab = 0; /* entries in fatab */
78:
79: fa *makedfa(char *s, int anchor) /* returns dfa for reg expr s */
80: {
81: int i, use, nuse;
82: fa *pfa;
1.6 millert 83: static int now = 1;
1.1 tholo 84:
85: if (setvec == 0) { /* first time through any RE */
86: maxsetvec = MAXLIN;
87: setvec = (int *) malloc(maxsetvec * sizeof(int));
88: tmpset = (int *) malloc(maxsetvec * sizeof(int));
89: if (setvec == 0 || tmpset == 0)
90: overflo("out of space initializing makedfa");
91: }
92:
93: if (compile_time) /* a constant for sure */
94: return mkdfa(s, anchor);
95: for (i = 0; i < nfatab; i++) /* is it there already? */
96: if (fatab[i]->anchor == anchor
97: && strcmp(fatab[i]->restr, s) == 0) {
1.6 millert 98: fatab[i]->use = now++;
1.1 tholo 99: return fatab[i];
1.10 ! millert 100: }
1.1 tholo 101: pfa = mkdfa(s, anchor);
102: if (nfatab < NFA) { /* room for another */
103: fatab[nfatab] = pfa;
1.6 millert 104: fatab[nfatab]->use = now++;
1.1 tholo 105: nfatab++;
106: return pfa;
107: }
108: use = fatab[0]->use; /* replace least-recently used */
109: nuse = 0;
110: for (i = 1; i < nfatab; i++)
111: if (fatab[i]->use < use) {
112: use = fatab[i]->use;
113: nuse = i;
114: }
115: freefa(fatab[nuse]);
116: fatab[nuse] = pfa;
1.6 millert 117: pfa->use = now++;
1.1 tholo 118: return pfa;
119: }
120:
121: fa *mkdfa(char *s, int anchor) /* does the real work of making a dfa */
122: /* anchor = 1 for anchored matches, else 0 */
123: {
124: Node *p, *p1;
125: fa *f;
126:
127: p = reparse(s);
128: p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
129: /* put ALL STAR in front of reg. exp. */
130: p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
131: /* put FINAL after reg. exp. */
132:
133: poscnt = 0;
134: penter(p1); /* enter parent pointers and leaf indices */
135: if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
136: overflo("out of space for fa");
137: f->accept = poscnt-1; /* penter has computed number of positions in re */
138: cfoll(f, p1); /* set up follow sets */
139: freetr(p1);
1.4 millert 140: if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
1.1 tholo 141: overflo("out of space in makedfa");
1.4 millert 142: if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
1.1 tholo 143: overflo("out of space in makedfa");
144: *f->posns[1] = 0;
145: f->initstat = makeinit(f, anchor);
146: f->anchor = anchor;
1.10 ! millert 147: f->restr = (uschar *) tostring(s);
1.1 tholo 148: return f;
149: }
150:
151: int makeinit(fa *f, int anchor)
152: {
153: int i, k;
154:
155: f->curstat = 2;
156: f->out[2] = 0;
157: f->reset = 0;
158: k = *(f->re[0].lfollow);
159: xfree(f->posns[2]);
1.4 millert 160: if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
1.1 tholo 161: overflo("out of space in makeinit");
162: for (i=0; i <= k; i++) {
163: (f->posns[2])[i] = (f->re[0].lfollow)[i];
164: }
165: if ((f->posns[2])[1] == f->accept)
166: f->out[2] = 1;
167: for (i=0; i < NCHARS; i++)
168: f->gototab[2][i] = 0;
169: f->curstat = cgoto(f, 2, HAT);
170: if (anchor) {
171: *f->posns[2] = k-1; /* leave out position 0 */
172: for (i=0; i < k; i++) {
173: (f->posns[0])[i] = (f->posns[2])[i];
174: }
175:
176: f->out[0] = f->out[2];
177: if (f->curstat != 2)
178: --(*f->posns[f->curstat]);
179: }
180: return f->curstat;
181: }
182:
183: void penter(Node *p) /* set up parent pointers and leaf indices */
184: {
185: switch (type(p)) {
186: LEAF
1.7 millert 187: info(p) = poscnt;
1.1 tholo 188: poscnt++;
189: break;
190: UNARY
191: penter(left(p));
192: parent(left(p)) = p;
193: break;
194: case CAT:
195: case OR:
196: penter(left(p));
197: penter(right(p));
198: parent(left(p)) = p;
199: parent(right(p)) = p;
200: break;
201: default: /* can't happen */
1.8 millert 202: FATAL("can't happen: unknown type %d in penter", type(p));
1.1 tholo 203: break;
204: }
205: }
206:
207: void freetr(Node *p) /* free parse tree */
208: {
209: switch (type(p)) {
210: LEAF
211: xfree(p);
212: break;
213: UNARY
214: freetr(left(p));
215: xfree(p);
216: break;
217: case CAT:
218: case OR:
219: freetr(left(p));
220: freetr(right(p));
221: xfree(p);
222: break;
223: default: /* can't happen */
1.8 millert 224: FATAL("can't happen: unknown type %d in freetr", type(p));
1.1 tholo 225: break;
226: }
227: }
228:
229: /* in the parsing of regular expressions, metacharacters like . have */
230: /* to be seen literally; \056 is not a metacharacter. */
231:
232: int hexstr(char **pp) /* find and eval hex string at pp, return new p */
1.2 millert 233: { /* only pick up one 8-bit byte (2 chars) */
1.10 ! millert 234: uschar *p;
1.1 tholo 235: int n = 0;
1.2 millert 236: int i;
1.1 tholo 237:
1.10 ! millert 238: for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
1.1 tholo 239: if (isdigit(*p))
240: n = 16 * n + *p - '0';
241: else if (*p >= 'a' && *p <= 'f')
242: n = 16 * n + *p - 'a' + 10;
243: else if (*p >= 'A' && *p <= 'F')
244: n = 16 * n + *p - 'A' + 10;
245: }
1.10 ! millert 246: *pp = (char *) p;
1.1 tholo 247: return n;
248: }
249:
1.2 millert 250: #define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */
1.1 tholo 251:
252: int quoted(char **pp) /* pick up next thing after a \\ */
253: /* and increment *pp */
254: {
255: char *p = *pp;
256: int c;
257:
258: if ((c = *p++) == 't')
259: c = '\t';
260: else if (c == 'n')
261: c = '\n';
262: else if (c == 'f')
263: c = '\f';
264: else if (c == 'r')
265: c = '\r';
266: else if (c == 'b')
267: c = '\b';
268: else if (c == '\\')
269: c = '\\';
270: else if (c == 'x') { /* hexadecimal goo follows */
271: c = hexstr(&p); /* this adds a null if number is invalid */
272: } else if (isoctdigit(c)) { /* \d \dd \ddd */
273: int n = c - '0';
274: if (isoctdigit(*p)) {
275: n = 8 * n + *p++ - '0';
276: if (isoctdigit(*p))
277: n = 8 * n + *p++ - '0';
278: }
279: c = n;
280: } /* else */
281: /* c = c; */
282: *pp = p;
283: return c;
284: }
285:
1.10 ! millert 286: char *cclenter(char *argp) /* add a character class */
1.1 tholo 287: {
288: int i, c, c2;
1.10 ! millert 289: uschar *p = (uschar *) argp;
! 290: uschar *op, *bp;
! 291: static uschar *buf = 0;
1.5 kstailey 292: static int bufsz = 100;
1.1 tholo 293:
294: op = p;
1.10 ! millert 295: if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
1.8 millert 296: FATAL("out of space for character class [%.10s...] 1", p);
1.5 kstailey 297: bp = buf;
298: for (i = 0; (c = *p++) != 0; ) {
1.1 tholo 299: if (c == '\\') {
1.10 ! millert 300: c = quoted((char **) &p);
1.5 kstailey 301: } else if (c == '-' && i > 0 && bp[-1] != 0) {
1.1 tholo 302: if (*p != 0) {
1.5 kstailey 303: c = bp[-1];
1.1 tholo 304: c2 = *p++;
305: if (c2 == '\\')
1.10 ! millert 306: c2 = quoted((char **) &p);
1.1 tholo 307: if (c > c2) { /* empty; ignore */
1.5 kstailey 308: bp--;
1.1 tholo 309: i--;
310: continue;
311: }
312: while (c < c2) {
1.10 ! millert 313: if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0))
1.8 millert 314: FATAL("out of space for character class [%.10s...] 2", p);
1.5 kstailey 315: *bp++ = ++c;
1.1 tholo 316: i++;
317: }
318: continue;
319: }
320: }
1.10 ! millert 321: if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0))
1.8 millert 322: FATAL("out of space for character class [%.10s...] 3", p);
1.5 kstailey 323: *bp++ = c;
1.1 tholo 324: i++;
325: }
1.5 kstailey 326: *bp = 0;
327: dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
1.1 tholo 328: xfree(op);
1.10 ! millert 329: return (char *) tostring((char *) buf);
1.1 tholo 330: }
331:
332: void overflo(char *s)
333: {
1.8 millert 334: FATAL("regular expression too big: %.30s...", s);
1.1 tholo 335: }
336:
337: void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */
338: {
339: int i;
1.4 millert 340: int *p;
1.1 tholo 341:
342: switch (type(v)) {
343: LEAF
1.7 millert 344: f->re[info(v)].ltype = type(v);
345: f->re[info(v)].lval.np = right(v);
1.1 tholo 346: while (f->accept >= maxsetvec) { /* guessing here! */
347: maxsetvec *= 4;
348: setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
349: tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
1.5 kstailey 350: if (setvec == 0 || tmpset == 0)
1.1 tholo 351: overflo("out of space in cfoll()");
352: }
353: for (i = 0; i <= f->accept; i++)
354: setvec[i] = 0;
355: setcnt = 0;
356: follow(v); /* computes setvec and setcnt */
1.4 millert 357: if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
1.1 tholo 358: overflo("out of space building follow set");
1.7 millert 359: f->re[info(v)].lfollow = p;
1.1 tholo 360: *p = setcnt;
361: for (i = f->accept; i >= 0; i--)
362: if (setvec[i] == 1)
363: *++p = i;
364: break;
365: UNARY
366: cfoll(f,left(v));
367: break;
368: case CAT:
369: case OR:
370: cfoll(f,left(v));
371: cfoll(f,right(v));
372: break;
373: default: /* can't happen */
1.8 millert 374: FATAL("can't happen: unknown type %d in cfoll", type(v));
1.1 tholo 375: }
376: }
377:
378: int first(Node *p) /* collects initially active leaves of p into setvec */
379: /* returns 1 if p matches empty string */
380: {
1.4 millert 381: int b, lp;
1.1 tholo 382:
383: switch (type(p)) {
384: LEAF
1.7 millert 385: lp = info(p); /* look for high-water mark of subscripts */
1.1 tholo 386: while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */
387: maxsetvec *= 4;
388: setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
389: tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
1.7 millert 390: if (setvec == 0 || tmpset == 0)
1.1 tholo 391: overflo("out of space in first()");
392: }
393: if (setvec[lp] != 1) {
394: setvec[lp] = 1;
395: setcnt++;
396: }
397: if (type(p) == CCL && (*(char *) right(p)) == '\0')
398: return(0); /* empty CCL */
399: else return(1);
400: case PLUS:
401: if (first(left(p)) == 0) return(0);
402: return(1);
403: case STAR:
404: case QUEST:
405: first(left(p));
406: return(0);
407: case CAT:
408: if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
409: return(1);
410: case OR:
411: b = first(right(p));
412: if (first(left(p)) == 0 || b == 0) return(0);
413: return(1);
414: }
1.8 millert 415: FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */
1.1 tholo 416: return(-1);
417: }
418:
419: void follow(Node *v) /* collects leaves that can follow v into setvec */
420: {
421: Node *p;
422:
423: if (type(v) == FINAL)
424: return;
425: p = parent(v);
426: switch (type(p)) {
427: case STAR:
428: case PLUS:
429: first(v);
430: follow(p);
431: return;
432:
433: case OR:
434: case QUEST:
435: follow(p);
436: return;
437:
438: case CAT:
439: if (v == left(p)) { /* v is left child of p */
440: if (first(right(p)) == 0) {
441: follow(p);
442: return;
443: }
444: } else /* v is right child */
445: follow(p);
446: return;
447: }
448: }
449:
1.10 ! millert 450: int member(int c, char *sarg) /* is c in s? */
1.1 tholo 451: {
1.10 ! millert 452: uschar *s = (uschar *) sarg;
! 453:
1.1 tholo 454: while (*s)
455: if (c == *s++)
456: return(1);
457: return(0);
458: }
459:
460: int match(fa *f, char *p0) /* shortest match ? */
461: {
462: int s, ns;
463: uschar *p = (uschar *) p0;
464:
465: s = f->reset ? makeinit(f,0) : f->initstat;
466: if (f->out[s])
467: return(1);
468: do {
469: if ((ns = f->gototab[s][*p]) != 0)
470: s = ns;
471: else
472: s = cgoto(f, s, *p);
473: if (f->out[s])
474: return(1);
475: } while (*p++ != 0);
476: return(0);
477: }
478:
479: int pmatch(fa *f, char *p0) /* longest match, for sub */
480: {
481: int s, ns;
482: uschar *p = (uschar *) p0;
483: uschar *q;
484: int i, k;
485:
486: s = f->reset ? makeinit(f,1) : f->initstat;
487: patbeg = (char *) p;
488: patlen = -1;
489: do {
490: q = p;
491: do {
492: if (f->out[s]) /* final state */
493: patlen = q-p;
494: if ((ns = f->gototab[s][*q]) != 0)
495: s = ns;
496: else
497: s = cgoto(f, s, *q);
1.9 deraadt 498: if (s == 1) { /* no transition */
1.1 tholo 499: if (patlen >= 0) {
500: patbeg = (char *) p;
501: return(1);
502: }
503: else
504: goto nextin; /* no match */
1.9 deraadt 505: }
1.1 tholo 506: } while (*q++ != 0);
507: if (f->out[s])
508: patlen = q-p-1; /* don't count $ */
509: if (patlen >= 0) {
510: patbeg = (char *) p;
511: return(1);
512: }
513: nextin:
514: s = 2;
515: if (f->reset) {
516: for (i = 2; i <= f->curstat; i++)
517: xfree(f->posns[i]);
518: k = *f->posns[0];
1.4 millert 519: if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
1.1 tholo 520: overflo("out of space in pmatch");
521: for (i = 0; i <= k; i++)
522: (f->posns[2])[i] = (f->posns[0])[i];
523: f->initstat = f->curstat = 2;
524: f->out[2] = f->out[0];
525: for (i = 0; i < NCHARS; i++)
526: f->gototab[2][i] = 0;
527: }
528: } while (*p++ != 0);
529: return (0);
530: }
531:
532: int nematch(fa *f, char *p0) /* non-empty match, for sub */
533: {
534: int s, ns;
535: uschar *p = (uschar *) p0;
536: uschar *q;
537: int i, k;
538:
539: s = f->reset ? makeinit(f,1) : f->initstat;
540: patlen = -1;
541: while (*p) {
542: q = p;
543: do {
544: if (f->out[s]) /* final state */
545: patlen = q-p;
546: if ((ns = f->gototab[s][*q]) != 0)
547: s = ns;
548: else
549: s = cgoto(f, s, *q);
1.9 deraadt 550: if (s == 1) { /* no transition */
1.1 tholo 551: if (patlen > 0) {
552: patbeg = (char *) p;
553: return(1);
554: } else
555: goto nnextin; /* no nonempty match */
1.9 deraadt 556: }
1.1 tholo 557: } while (*q++ != 0);
558: if (f->out[s])
559: patlen = q-p-1; /* don't count $ */
560: if (patlen > 0 ) {
561: patbeg = (char *) p;
562: return(1);
563: }
564: nnextin:
565: s = 2;
566: if (f->reset) {
567: for (i = 2; i <= f->curstat; i++)
568: xfree(f->posns[i]);
569: k = *f->posns[0];
1.4 millert 570: if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
1.1 tholo 571: overflo("out of state space");
572: for (i = 0; i <= k; i++)
573: (f->posns[2])[i] = (f->posns[0])[i];
574: f->initstat = f->curstat = 2;
575: f->out[2] = f->out[0];
576: for (i = 0; i < NCHARS; i++)
577: f->gototab[2][i] = 0;
578: }
579: p++;
580: }
581: return (0);
582: }
583:
584: Node *reparse(char *p) /* parses regular expression pointed to by p */
585: { /* uses relex() to scan regular expression */
586: Node *np;
587:
588: dprintf( ("reparse <%s>\n", p) );
1.10 ! millert 589: lastre = prestr = (uschar *) p; /* prestr points to string to be parsed */
1.1 tholo 590: rtok = relex();
591: if (rtok == '\0')
1.8 millert 592: FATAL("empty regular expression");
1.1 tholo 593: np = regexp();
594: if (rtok != '\0')
1.8 millert 595: FATAL("syntax error in regular expression %s at %s", lastre, prestr);
1.1 tholo 596: return(np);
597: }
598:
599: Node *regexp(void) /* top-level parse of reg expr */
600: {
601: return (alt(concat(primary())));
602: }
603:
604: Node *primary(void)
605: {
606: Node *np;
607:
608: switch (rtok) {
609: case CHAR:
1.7 millert 610: np = op2(CHAR, NIL, itonp(rlxval));
1.1 tholo 611: rtok = relex();
612: return (unary(np));
613: case ALL:
614: rtok = relex();
615: return (unary(op2(ALL, NIL, NIL)));
616: case DOT:
617: rtok = relex();
618: return (unary(op2(DOT, NIL, NIL)));
619: case CCL:
1.10 ! millert 620: np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
1.1 tholo 621: rtok = relex();
622: return (unary(np));
623: case NCCL:
1.10 ! millert 624: np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
1.1 tholo 625: rtok = relex();
626: return (unary(np));
627: case '^':
628: rtok = relex();
1.7 millert 629: return (unary(op2(CHAR, NIL, itonp(HAT))));
1.1 tholo 630: case '$':
631: rtok = relex();
632: return (unary(op2(CHAR, NIL, NIL)));
633: case '(':
634: rtok = relex();
635: if (rtok == ')') { /* special pleading for () */
636: rtok = relex();
637: return unary(op2(CCL, NIL, (Node *) tostring("")));
638: }
639: np = regexp();
640: if (rtok == ')') {
641: rtok = relex();
642: return (unary(np));
643: }
644: else
1.8 millert 645: FATAL("syntax error in regular expression %s at %s", lastre, prestr);
1.1 tholo 646: default:
1.8 millert 647: FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
1.1 tholo 648: }
649: return 0; /*NOTREACHED*/
650: }
651:
652: Node *concat(Node *np)
653: {
654: switch (rtok) {
655: case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
656: return (concat(op2(CAT, np, primary())));
657: }
658: return (np);
659: }
660:
661: Node *alt(Node *np)
662: {
663: if (rtok == OR) {
664: rtok = relex();
665: return (alt(op2(OR, np, concat(primary()))));
666: }
667: return (np);
668: }
669:
670: Node *unary(Node *np)
671: {
672: switch (rtok) {
673: case STAR:
674: rtok = relex();
675: return (unary(op2(STAR, np, NIL)));
676: case PLUS:
677: rtok = relex();
678: return (unary(op2(PLUS, np, NIL)));
679: case QUEST:
680: rtok = relex();
681: return (unary(op2(QUEST, np, NIL)));
682: default:
683: return (np);
684: }
685: }
686:
687: int relex(void) /* lexical analyzer for reparse */
688: {
1.5 kstailey 689: int c, n;
1.1 tholo 690: int cflag;
1.10 ! millert 691: static uschar *buf = 0;
1.5 kstailey 692: static int bufsz = 100;
1.10 ! millert 693: uschar *bp;
1.1 tholo 694:
695: switch (c = *prestr++) {
696: case '|': return OR;
697: case '*': return STAR;
698: case '+': return PLUS;
699: case '?': return QUEST;
700: case '.': return DOT;
701: case '\0': prestr--; return '\0';
702: case '^':
703: case '$':
704: case '(':
705: case ')':
706: return c;
707: case '\\':
1.10 ! millert 708: rlxval = quoted((char **) &prestr);
1.1 tholo 709: return CHAR;
710: default:
711: rlxval = c;
712: return CHAR;
713: case '[':
1.10 ! millert 714: if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
1.8 millert 715: FATAL("out of space in reg expr %.10s..", lastre);
1.5 kstailey 716: bp = buf;
1.1 tholo 717: if (*prestr == '^') {
718: cflag = 1;
719: prestr++;
720: }
721: else
722: cflag = 0;
1.5 kstailey 723: n = 2 * strlen(prestr)+1;
1.10 ! millert 724: if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, 0))
1.8 millert 725: FATAL("out of space for reg expr %.10s...", lastre);
1.1 tholo 726: for (; ; ) {
727: if ((c = *prestr++) == '\\') {
1.5 kstailey 728: *bp++ = '\\';
1.1 tholo 729: if ((c = *prestr++) == '\0')
1.8 millert 730: FATAL("nonterminated character class %.20s...", lastre);
1.5 kstailey 731: *bp++ = c;
1.10 ! millert 732: /* } else if (c == '\n') { */
! 733: /* FATAL("newline in character class %.20s...", lastre); */
1.1 tholo 734: } else if (c == '\0') {
1.8 millert 735: FATAL("nonterminated character class %.20s", lastre);
1.5 kstailey 736: } else if (bp == buf) { /* 1st char is special */
737: *bp++ = c;
1.1 tholo 738: } else if (c == ']') {
1.5 kstailey 739: *bp++ = 0;
1.10 ! millert 740: rlxstr = (uschar *) tostring((char *) buf);
1.1 tholo 741: if (cflag == 0)
742: return CCL;
743: else
744: return NCCL;
745: } else
1.5 kstailey 746: *bp++ = c;
1.1 tholo 747: }
748: }
749: }
750:
751: int cgoto(fa *f, int s, int c)
752: {
753: int i, j, k;
1.4 millert 754: int *p, *q;
1.1 tholo 755:
1.10 ! millert 756: if (c < 0 || c > 255)
1.8 millert 757: FATAL("can't happen: neg char %d in cgoto", c);
1.1 tholo 758: while (f->accept >= maxsetvec) { /* guessing here! */
759: maxsetvec *= 4;
760: setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
761: tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
1.7 millert 762: if (setvec == 0 || tmpset == 0)
1.1 tholo 763: overflo("out of space in cgoto()");
764: }
765: for (i = 0; i <= f->accept; i++)
766: setvec[i] = 0;
767: setcnt = 0;
768: /* compute positions of gototab[s,c] into setvec */
769: p = f->posns[s];
770: for (i = 1; i <= *p; i++) {
771: if ((k = f->re[p[i]].ltype) != FINAL) {
1.7 millert 772: if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
1.1 tholo 773: || (k == DOT && c != 0 && c != HAT)
774: || (k == ALL && c != 0)
1.10 ! millert 775: || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
! 776: || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
1.1 tholo 777: q = f->re[p[i]].lfollow;
778: for (j = 1; j <= *q; j++) {
779: if (q[j] >= maxsetvec) {
780: maxsetvec *= 4;
781: setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
782: tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int));
783: if (setvec == 0 || tmpset == 0)
784: overflo("cgoto overflow");
785: }
786: if (setvec[q[j]] == 0) {
787: setcnt++;
788: setvec[q[j]] = 1;
789: }
790: }
791: }
792: }
793: }
794: /* determine if setvec is a previous state */
795: tmpset[0] = setcnt;
796: j = 1;
797: for (i = f->accept; i >= 0; i--)
798: if (setvec[i]) {
799: tmpset[j++] = i;
800: }
801: /* tmpset == previous state? */
802: for (i = 1; i <= f->curstat; i++) {
803: p = f->posns[i];
804: if ((k = tmpset[0]) != p[0])
805: goto different;
806: for (j = 1; j <= k; j++)
807: if (tmpset[j] != p[j])
808: goto different;
809: /* setvec is state i */
810: f->gototab[s][c] = i;
811: return i;
812: different:;
813: }
814:
815: /* add tmpset to current set of states */
816: if (f->curstat >= NSTATES-1) {
817: f->curstat = 2;
818: f->reset = 1;
819: for (i = 2; i < NSTATES; i++)
820: xfree(f->posns[i]);
821: } else
822: ++(f->curstat);
823: for (i = 0; i < NCHARS; i++)
824: f->gototab[f->curstat][i] = 0;
825: xfree(f->posns[f->curstat]);
1.4 millert 826: if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
1.1 tholo 827: overflo("out of space in cgoto");
828:
829: f->posns[f->curstat] = p;
830: f->gototab[s][c] = f->curstat;
831: for (i = 0; i <= setcnt; i++)
832: p[i] = tmpset[i];
833: if (setvec[f->accept])
834: f->out[f->curstat] = 1;
835: else
836: f->out[f->curstat] = 0;
837: return f->curstat;
838: }
839:
840:
841: void freefa(fa *f) /* free a finite automaton */
842: {
843: int i;
844:
845: if (f == NULL)
846: return;
847: for (i = 0; i <= f->curstat; i++)
848: xfree(f->posns[i]);
849: for (i = 0; i <= f->accept; i++) {
850: xfree(f->re[i].lfollow);
851: if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
852: xfree((f->re[i].lval.np));
853: }
854: xfree(f->restr);
855: xfree(f);
856: }