Annotation of src/usr.bin/awk/b.c, Revision 1.1
1.1 ! tholo 1: /****************************************************************
! 2: Copyright (C) AT&T and Lucent Technologies 1996
! 3: All Rights Reserved
! 4:
! 5: Permission to use, copy, modify, and distribute this software and
! 6: its documentation for any purpose and without fee is hereby
! 7: granted, provided that the above copyright notice appear in all
! 8: copies and that both that the copyright notice and this
! 9: permission notice and warranty disclaimer appear in supporting
! 10: documentation, and that the names of AT&T or Lucent Technologies
! 11: or any of their entities not be used in advertising or publicity
! 12: pertaining to distribution of the software without specific,
! 13: written prior permission.
! 14:
! 15: AT&T AND LUCENT DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
! 16: SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
! 17: FITNESS. IN NO EVENT SHALL AT&T OR LUCENT OR ANY OF THEIR
! 18: ENTITIES BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
! 19: DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
! 20: DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
! 21: OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE
! 22: USE OR PERFORMANCE OF THIS SOFTWARE.
! 23: ****************************************************************/
! 24:
! 25: /* lasciate ogne speranza, voi ch'entrate. */
! 26:
! 27: #define DEBUG
! 28:
! 29: #include <ctype.h>
! 30: #include <stdio.h>
! 31: #include <string.h>
! 32: #include <stdlib.h>
! 33: #include "awk.h"
! 34: #include "awkgram.h"
! 35:
! 36: #define HAT (NCHARS-1) /* matches ^ in regular expr */
! 37: /* NCHARS is 2**n */
! 38: #define MAXLIN 22
! 39:
! 40: #define type(v) (v)->nobj
! 41: #define left(v) (v)->narg[0]
! 42: #define right(v) (v)->narg[1]
! 43: #define parent(v) (v)->nnext
! 44:
! 45: #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
! 46: #define UNARY case STAR: case PLUS: case QUEST:
! 47:
! 48: /* encoding in tree Nodes:
! 49: leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
! 50: left is index, right contains value or pointer to value
! 51: unary (STAR, PLUS, QUEST): left is child, right is null
! 52: binary (CAT, OR): left and right are children
! 53: parent contains pointer to parent
! 54: */
! 55:
! 56:
! 57: int *setvec;
! 58: int *tmpset;
! 59: int maxsetvec = 0;
! 60:
! 61: int rtok; /* next token in current re */
! 62: int rlxval;
! 63: char *rlxstr;
! 64: char *prestr; /* current position in current re */
! 65: char *lastre; /* origin of last re */
! 66:
! 67: static int setcnt;
! 68: static int poscnt;
! 69:
! 70: char *patbeg;
! 71: int patlen;
! 72:
! 73: #define NFA 20 /* cache this many dynamic fa's */
! 74: fa *fatab[NFA];
! 75: int nfatab = 0; /* entries in fatab */
! 76:
! 77: fa *makedfa(char *s, int anchor) /* returns dfa for reg expr s */
! 78: {
! 79: int i, use, nuse;
! 80: fa *pfa;
! 81:
! 82: if (setvec == 0) { /* first time through any RE */
! 83: maxsetvec = MAXLIN;
! 84: setvec = (int *) malloc(maxsetvec * sizeof(int));
! 85: tmpset = (int *) malloc(maxsetvec * sizeof(int));
! 86: if (setvec == 0 || tmpset == 0)
! 87: overflo("out of space initializing makedfa");
! 88: }
! 89:
! 90: if (compile_time) /* a constant for sure */
! 91: return mkdfa(s, anchor);
! 92: for (i = 0; i < nfatab; i++) /* is it there already? */
! 93: if (fatab[i]->anchor == anchor
! 94: && strcmp(fatab[i]->restr, s) == 0) {
! 95: fatab[i]->use++;
! 96: return fatab[i];
! 97: }
! 98: pfa = mkdfa(s, anchor);
! 99: if (nfatab < NFA) { /* room for another */
! 100: fatab[nfatab] = pfa;
! 101: fatab[nfatab]->use = 1;
! 102: nfatab++;
! 103: return pfa;
! 104: }
! 105: use = fatab[0]->use; /* replace least-recently used */
! 106: nuse = 0;
! 107: for (i = 1; i < nfatab; i++)
! 108: if (fatab[i]->use < use) {
! 109: use = fatab[i]->use;
! 110: nuse = i;
! 111: }
! 112: freefa(fatab[nuse]);
! 113: fatab[nuse] = pfa;
! 114: pfa->use = 1;
! 115: return pfa;
! 116: }
! 117:
! 118: fa *mkdfa(char *s, int anchor) /* does the real work of making a dfa */
! 119: /* anchor = 1 for anchored matches, else 0 */
! 120: {
! 121: Node *p, *p1;
! 122: fa *f;
! 123:
! 124: p = reparse(s);
! 125: p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
! 126: /* put ALL STAR in front of reg. exp. */
! 127: p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
! 128: /* put FINAL after reg. exp. */
! 129:
! 130: poscnt = 0;
! 131: penter(p1); /* enter parent pointers and leaf indices */
! 132: if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
! 133: overflo("out of space for fa");
! 134: f->accept = poscnt-1; /* penter has computed number of positions in re */
! 135: cfoll(f, p1); /* set up follow sets */
! 136: freetr(p1);
! 137: if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
! 138: overflo("out of space in makedfa");
! 139: if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
! 140: overflo("out of space in makedfa");
! 141: *f->posns[1] = 0;
! 142: f->initstat = makeinit(f, anchor);
! 143: f->anchor = anchor;
! 144: f->restr = tostring(s);
! 145: return f;
! 146: }
! 147:
! 148: int makeinit(fa *f, int anchor)
! 149: {
! 150: int i, k;
! 151:
! 152: f->curstat = 2;
! 153: f->out[2] = 0;
! 154: f->reset = 0;
! 155: k = *(f->re[0].lfollow);
! 156: xfree(f->posns[2]);
! 157: if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
! 158: overflo("out of space in makeinit");
! 159: for (i=0; i <= k; i++) {
! 160: (f->posns[2])[i] = (f->re[0].lfollow)[i];
! 161: }
! 162: if ((f->posns[2])[1] == f->accept)
! 163: f->out[2] = 1;
! 164: for (i=0; i < NCHARS; i++)
! 165: f->gototab[2][i] = 0;
! 166: f->curstat = cgoto(f, 2, HAT);
! 167: if (anchor) {
! 168: *f->posns[2] = k-1; /* leave out position 0 */
! 169: for (i=0; i < k; i++) {
! 170: (f->posns[0])[i] = (f->posns[2])[i];
! 171: }
! 172:
! 173: f->out[0] = f->out[2];
! 174: if (f->curstat != 2)
! 175: --(*f->posns[f->curstat]);
! 176: }
! 177: return f->curstat;
! 178: }
! 179:
! 180: void penter(Node *p) /* set up parent pointers and leaf indices */
! 181: {
! 182: switch (type(p)) {
! 183: LEAF
! 184: left(p) = (Node *) poscnt;
! 185: poscnt++;
! 186: break;
! 187: UNARY
! 188: penter(left(p));
! 189: parent(left(p)) = p;
! 190: break;
! 191: case CAT:
! 192: case OR:
! 193: penter(left(p));
! 194: penter(right(p));
! 195: parent(left(p)) = p;
! 196: parent(right(p)) = p;
! 197: break;
! 198: default: /* can't happen */
! 199: ERROR "can't happen: unknown type %d in penter", type(p) FATAL;
! 200: break;
! 201: }
! 202: }
! 203:
! 204: void freetr(Node *p) /* free parse tree */
! 205: {
! 206: switch (type(p)) {
! 207: LEAF
! 208: xfree(p);
! 209: break;
! 210: UNARY
! 211: freetr(left(p));
! 212: xfree(p);
! 213: break;
! 214: case CAT:
! 215: case OR:
! 216: freetr(left(p));
! 217: freetr(right(p));
! 218: xfree(p);
! 219: break;
! 220: default: /* can't happen */
! 221: ERROR "can't happen: unknown type %d in freetr", type(p) FATAL;
! 222: break;
! 223: }
! 224: }
! 225:
! 226: /* in the parsing of regular expressions, metacharacters like . have */
! 227: /* to be seen literally; \056 is not a metacharacter. */
! 228:
! 229: int hexstr(char **pp) /* find and eval hex string at pp, return new p */
! 230: {
! 231: char *p;
! 232: int n = 0;
! 233:
! 234: for (p = *pp; isxdigit(*p); p++) {
! 235: if (isdigit(*p))
! 236: n = 16 * n + *p - '0';
! 237: else if (*p >= 'a' && *p <= 'f')
! 238: n = 16 * n + *p - 'a' + 10;
! 239: else if (*p >= 'A' && *p <= 'F')
! 240: n = 16 * n + *p - 'A' + 10;
! 241: }
! 242: *pp = p;
! 243: return n;
! 244: }
! 245:
! 246: #define isoctdigit(c) ((c) >= '0' && (c) <= '8') /* multiple use of arg */
! 247:
! 248: int quoted(char **pp) /* pick up next thing after a \\ */
! 249: /* and increment *pp */
! 250: {
! 251: char *p = *pp;
! 252: int c;
! 253:
! 254: if ((c = *p++) == 't')
! 255: c = '\t';
! 256: else if (c == 'n')
! 257: c = '\n';
! 258: else if (c == 'f')
! 259: c = '\f';
! 260: else if (c == 'r')
! 261: c = '\r';
! 262: else if (c == 'b')
! 263: c = '\b';
! 264: else if (c == '\\')
! 265: c = '\\';
! 266: else if (c == 'x') { /* hexadecimal goo follows */
! 267: c = hexstr(&p); /* this adds a null if number is invalid */
! 268: } else if (isoctdigit(c)) { /* \d \dd \ddd */
! 269: int n = c - '0';
! 270: if (isoctdigit(*p)) {
! 271: n = 8 * n + *p++ - '0';
! 272: if (isoctdigit(*p))
! 273: n = 8 * n + *p++ - '0';
! 274: }
! 275: c = n;
! 276: } /* else */
! 277: /* c = c; */
! 278: *pp = p;
! 279: return c;
! 280: }
! 281:
! 282: char *cclenter(char *p) /* add a character class */
! 283: {
! 284: int i, c, c2;
! 285: char *op;
! 286: static Gstring *cgp = 0;
! 287:
! 288: op = p;
! 289: if (cgp == 0)
! 290: cgp = newGstring();
! 291: caddreset(cgp);
! 292: i = 0;
! 293: while ((c = *p++) != 0) {
! 294: if (c == '\\') {
! 295: c = quoted(&p);
! 296: } else if (c == '-' && i > 0 && cgp->cbuf[i-1] != 0) {
! 297: if (*p != 0) {
! 298: c = cgp->cbuf[i-1];
! 299: c2 = *p++;
! 300: if (c2 == '\\')
! 301: c2 = quoted(&p);
! 302: if (c > c2) { /* empty; ignore */
! 303: cunadd(cgp);
! 304: i--;
! 305: continue;
! 306: }
! 307: while (c < c2) {
! 308: cadd(cgp, ++c);
! 309: i++;
! 310: }
! 311: continue;
! 312: }
! 313: }
! 314: cadd(cgp, c);
! 315: i++;
! 316: }
! 317: cadd(cgp, 0);
! 318: dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, cgp->cbuf) );
! 319: xfree(op);
! 320: return(tostring(cgp->cbuf));
! 321: }
! 322:
! 323: void overflo(char *s)
! 324: {
! 325: ERROR "regular expression too big: %.30s...", s FATAL;
! 326: }
! 327:
! 328: void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */
! 329: {
! 330: int i;
! 331: int *p;
! 332:
! 333: switch (type(v)) {
! 334: LEAF
! 335: f->re[(int) left(v)].ltype = type(v);
! 336: f->re[(int) left(v)].lval.np = right(v);
! 337: while (f->accept >= maxsetvec) { /* guessing here! */
! 338: maxsetvec *= 4;
! 339: setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
! 340: tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
! 341: if (setvec == 0 || tmpset == 0) { abort();
! 342: overflo("out of space in cfoll()");
! 343: }
! 344: }
! 345: for (i = 0; i <= f->accept; i++)
! 346: setvec[i] = 0;
! 347: setcnt = 0;
! 348: follow(v); /* computes setvec and setcnt */
! 349: if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
! 350: overflo("out of space building follow set");
! 351: f->re[(int) left(v)].lfollow = p;
! 352: *p = setcnt;
! 353: for (i = f->accept; i >= 0; i--)
! 354: if (setvec[i] == 1)
! 355: *++p = i;
! 356: break;
! 357: UNARY
! 358: cfoll(f,left(v));
! 359: break;
! 360: case CAT:
! 361: case OR:
! 362: cfoll(f,left(v));
! 363: cfoll(f,right(v));
! 364: break;
! 365: default: /* can't happen */
! 366: ERROR "can't happen: unknown type %d in cfoll", type(v) FATAL;
! 367: }
! 368: }
! 369:
! 370: int first(Node *p) /* collects initially active leaves of p into setvec */
! 371: /* returns 1 if p matches empty string */
! 372: {
! 373: int b, lp;
! 374:
! 375: switch (type(p)) {
! 376: LEAF
! 377: lp = (int) left(p); /* look for high-water mark of subscripts */
! 378: while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */
! 379: maxsetvec *= 4;
! 380: setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
! 381: tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
! 382: if (setvec == 0 || tmpset == 0) { abort();
! 383: overflo("out of space in first()");
! 384: }
! 385: }
! 386: if (setvec[lp] != 1) {
! 387: setvec[lp] = 1;
! 388: setcnt++;
! 389: }
! 390: if (type(p) == CCL && (*(char *) right(p)) == '\0')
! 391: return(0); /* empty CCL */
! 392: else return(1);
! 393: case PLUS:
! 394: if (first(left(p)) == 0) return(0);
! 395: return(1);
! 396: case STAR:
! 397: case QUEST:
! 398: first(left(p));
! 399: return(0);
! 400: case CAT:
! 401: if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
! 402: return(1);
! 403: case OR:
! 404: b = first(right(p));
! 405: if (first(left(p)) == 0 || b == 0) return(0);
! 406: return(1);
! 407: }
! 408: ERROR "can't happen: unknown type %d in first", type(p) FATAL; /* can't happen */
! 409: return(-1);
! 410: }
! 411:
! 412: void follow(Node *v) /* collects leaves that can follow v into setvec */
! 413: {
! 414: Node *p;
! 415:
! 416: if (type(v) == FINAL)
! 417: return;
! 418: p = parent(v);
! 419: switch (type(p)) {
! 420: case STAR:
! 421: case PLUS:
! 422: first(v);
! 423: follow(p);
! 424: return;
! 425:
! 426: case OR:
! 427: case QUEST:
! 428: follow(p);
! 429: return;
! 430:
! 431: case CAT:
! 432: if (v == left(p)) { /* v is left child of p */
! 433: if (first(right(p)) == 0) {
! 434: follow(p);
! 435: return;
! 436: }
! 437: } else /* v is right child */
! 438: follow(p);
! 439: return;
! 440: }
! 441: }
! 442:
! 443: int member(int c, char *s) /* is c in s? */
! 444: {
! 445: while (*s)
! 446: if (c == *s++)
! 447: return(1);
! 448: return(0);
! 449: }
! 450:
! 451: int match(fa *f, char *p0) /* shortest match ? */
! 452: {
! 453: int s, ns;
! 454: uschar *p = (uschar *) p0;
! 455:
! 456: s = f->reset ? makeinit(f,0) : f->initstat;
! 457: if (f->out[s])
! 458: return(1);
! 459: do {
! 460: if ((ns = f->gototab[s][*p]) != 0)
! 461: s = ns;
! 462: else
! 463: s = cgoto(f, s, *p);
! 464: if (f->out[s])
! 465: return(1);
! 466: } while (*p++ != 0);
! 467: return(0);
! 468: }
! 469:
! 470: int pmatch(fa *f, char *p0) /* longest match, for sub */
! 471: {
! 472: int s, ns;
! 473: uschar *p = (uschar *) p0;
! 474: uschar *q;
! 475: int i, k;
! 476:
! 477: s = f->reset ? makeinit(f,1) : f->initstat;
! 478: patbeg = (char *) p;
! 479: patlen = -1;
! 480: do {
! 481: q = p;
! 482: do {
! 483: if (f->out[s]) /* final state */
! 484: patlen = q-p;
! 485: if ((ns = f->gototab[s][*q]) != 0)
! 486: s = ns;
! 487: else
! 488: s = cgoto(f, s, *q);
! 489: if (s == 1) /* no transition */
! 490: if (patlen >= 0) {
! 491: patbeg = (char *) p;
! 492: return(1);
! 493: }
! 494: else
! 495: goto nextin; /* no match */
! 496: } while (*q++ != 0);
! 497: if (f->out[s])
! 498: patlen = q-p-1; /* don't count $ */
! 499: if (patlen >= 0) {
! 500: patbeg = (char *) p;
! 501: return(1);
! 502: }
! 503: nextin:
! 504: s = 2;
! 505: if (f->reset) {
! 506: for (i = 2; i <= f->curstat; i++)
! 507: xfree(f->posns[i]);
! 508: k = *f->posns[0];
! 509: if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
! 510: overflo("out of space in pmatch");
! 511: for (i = 0; i <= k; i++)
! 512: (f->posns[2])[i] = (f->posns[0])[i];
! 513: f->initstat = f->curstat = 2;
! 514: f->out[2] = f->out[0];
! 515: for (i = 0; i < NCHARS; i++)
! 516: f->gototab[2][i] = 0;
! 517: }
! 518: } while (*p++ != 0);
! 519: return (0);
! 520: }
! 521:
! 522: int nematch(fa *f, char *p0) /* non-empty match, for sub */
! 523: {
! 524: int s, ns;
! 525: uschar *p = (uschar *) p0;
! 526: uschar *q;
! 527: int i, k;
! 528:
! 529: s = f->reset ? makeinit(f,1) : f->initstat;
! 530: patlen = -1;
! 531: while (*p) {
! 532: q = p;
! 533: do {
! 534: if (f->out[s]) /* final state */
! 535: patlen = q-p;
! 536: if ((ns = f->gototab[s][*q]) != 0)
! 537: s = ns;
! 538: else
! 539: s = cgoto(f, s, *q);
! 540: if (s == 1) /* no transition */
! 541: if (patlen > 0) {
! 542: patbeg = (char *) p;
! 543: return(1);
! 544: } else
! 545: goto nnextin; /* no nonempty match */
! 546: } while (*q++ != 0);
! 547: if (f->out[s])
! 548: patlen = q-p-1; /* don't count $ */
! 549: if (patlen > 0 ) {
! 550: patbeg = (char *) p;
! 551: return(1);
! 552: }
! 553: nnextin:
! 554: s = 2;
! 555: if (f->reset) {
! 556: for (i = 2; i <= f->curstat; i++)
! 557: xfree(f->posns[i]);
! 558: k = *f->posns[0];
! 559: if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
! 560: overflo("out of state space");
! 561: for (i = 0; i <= k; i++)
! 562: (f->posns[2])[i] = (f->posns[0])[i];
! 563: f->initstat = f->curstat = 2;
! 564: f->out[2] = f->out[0];
! 565: for (i = 0; i < NCHARS; i++)
! 566: f->gototab[2][i] = 0;
! 567: }
! 568: p++;
! 569: }
! 570: return (0);
! 571: }
! 572:
! 573: Node *reparse(char *p) /* parses regular expression pointed to by p */
! 574: { /* uses relex() to scan regular expression */
! 575: Node *np;
! 576:
! 577: dprintf( ("reparse <%s>\n", p) );
! 578: lastre = prestr = p; /* prestr points to string to be parsed */
! 579: rtok = relex();
! 580: if (rtok == '\0')
! 581: ERROR "empty regular expression" FATAL;
! 582: np = regexp();
! 583: if (rtok != '\0')
! 584: ERROR "syntax error in regular expression %s at %s", lastre, prestr FATAL;
! 585: return(np);
! 586: }
! 587:
! 588: Node *regexp(void) /* top-level parse of reg expr */
! 589: {
! 590: return (alt(concat(primary())));
! 591: }
! 592:
! 593: Node *primary(void)
! 594: {
! 595: Node *np;
! 596:
! 597: switch (rtok) {
! 598: case CHAR:
! 599: np = op2(CHAR, NIL, (Node *) rlxval);
! 600: rtok = relex();
! 601: return (unary(np));
! 602: case ALL:
! 603: rtok = relex();
! 604: return (unary(op2(ALL, NIL, NIL)));
! 605: case DOT:
! 606: rtok = relex();
! 607: return (unary(op2(DOT, NIL, NIL)));
! 608: case CCL:
! 609: np = op2(CCL, NIL, (Node*) cclenter(rlxstr));
! 610: rtok = relex();
! 611: return (unary(np));
! 612: case NCCL:
! 613: np = op2(NCCL, NIL, (Node *) cclenter(rlxstr));
! 614: rtok = relex();
! 615: return (unary(np));
! 616: case '^':
! 617: rtok = relex();
! 618: return (unary(op2(CHAR, NIL, (Node *) HAT)));
! 619: case '$':
! 620: rtok = relex();
! 621: return (unary(op2(CHAR, NIL, NIL)));
! 622: case '(':
! 623: rtok = relex();
! 624: if (rtok == ')') { /* special pleading for () */
! 625: rtok = relex();
! 626: return unary(op2(CCL, NIL, (Node *) tostring("")));
! 627: }
! 628: np = regexp();
! 629: if (rtok == ')') {
! 630: rtok = relex();
! 631: return (unary(np));
! 632: }
! 633: else
! 634: ERROR "syntax error in regular expression %s at %s", lastre, prestr FATAL;
! 635: default:
! 636: ERROR "illegal primary in regular expression %s at %s", lastre, prestr FATAL;
! 637: }
! 638: return 0; /*NOTREACHED*/
! 639: }
! 640:
! 641: Node *concat(Node *np)
! 642: {
! 643: switch (rtok) {
! 644: case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
! 645: return (concat(op2(CAT, np, primary())));
! 646: }
! 647: return (np);
! 648: }
! 649:
! 650: Node *alt(Node *np)
! 651: {
! 652: if (rtok == OR) {
! 653: rtok = relex();
! 654: return (alt(op2(OR, np, concat(primary()))));
! 655: }
! 656: return (np);
! 657: }
! 658:
! 659: Node *unary(Node *np)
! 660: {
! 661: switch (rtok) {
! 662: case STAR:
! 663: rtok = relex();
! 664: return (unary(op2(STAR, np, NIL)));
! 665: case PLUS:
! 666: rtok = relex();
! 667: return (unary(op2(PLUS, np, NIL)));
! 668: case QUEST:
! 669: rtok = relex();
! 670: return (unary(op2(QUEST, np, NIL)));
! 671: default:
! 672: return (np);
! 673: }
! 674: }
! 675:
! 676: int relex(void) /* lexical analyzer for reparse */
! 677: {
! 678: int c;
! 679: int cflag;
! 680: static Gstring *gp = 0;
! 681:
! 682: switch (c = *prestr++) {
! 683: case '|': return OR;
! 684: case '*': return STAR;
! 685: case '+': return PLUS;
! 686: case '?': return QUEST;
! 687: case '.': return DOT;
! 688: case '\0': prestr--; return '\0';
! 689: case '^':
! 690: case '$':
! 691: case '(':
! 692: case ')':
! 693: return c;
! 694: case '\\':
! 695: rlxval = quoted(&prestr);
! 696: return CHAR;
! 697: default:
! 698: rlxval = c;
! 699: return CHAR;
! 700: case '[':
! 701: if (gp == 0)
! 702: gp = newGstring();
! 703: caddreset(gp);
! 704: if (*prestr == '^') {
! 705: cflag = 1;
! 706: prestr++;
! 707: }
! 708: else
! 709: cflag = 0;
! 710: for (; ; ) {
! 711: if ((c = *prestr++) == '\\') {
! 712: cadd(gp, '\\');
! 713: if ((c = *prestr++) == '\0')
! 714: ERROR "nonterminated character class %.20s...", lastre FATAL;
! 715: cadd(gp, c);
! 716: } else if (c == '\n') {
! 717: ERROR "newline in character class %.20s...", lastre FATAL;
! 718: } else if (c == '\0') {
! 719: ERROR "nonterminated character class %.20s", lastre FATAL;
! 720: } else if (gp->clen == 0) { /* 1st char is special */
! 721: cadd(gp, c);
! 722: } else if (c == ']') {
! 723: cadd(gp, 0);
! 724: rlxstr = tostring(gp->cbuf);
! 725: if (cflag == 0)
! 726: return CCL;
! 727: else
! 728: return NCCL;
! 729: } else
! 730: cadd(gp, c);
! 731: }
! 732: }
! 733: }
! 734:
! 735: int cgoto(fa *f, int s, int c)
! 736: {
! 737: int i, j, k;
! 738: int *p, *q;
! 739:
! 740: if (c < 0)
! 741: ERROR "can't happen: neg char %d in cgoto", c FATAL;
! 742: while (f->accept >= maxsetvec) { /* guessing here! */
! 743: maxsetvec *= 4;
! 744: setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
! 745: tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
! 746: if (setvec == 0 || tmpset == 0) { abort();
! 747: overflo("out of space in cgoto()");
! 748: }
! 749: }
! 750: for (i = 0; i <= f->accept; i++)
! 751: setvec[i] = 0;
! 752: setcnt = 0;
! 753: /* compute positions of gototab[s,c] into setvec */
! 754: p = f->posns[s];
! 755: for (i = 1; i <= *p; i++) {
! 756: if ((k = f->re[p[i]].ltype) != FINAL) {
! 757: if ((k == CHAR && c == f->re[p[i]].lval.i)
! 758: || (k == DOT && c != 0 && c != HAT)
! 759: || (k == ALL && c != 0)
! 760: || (k == CCL && member(c, f->re[p[i]].lval.up))
! 761: || (k == NCCL && !member(c, f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
! 762: q = f->re[p[i]].lfollow;
! 763: for (j = 1; j <= *q; j++) {
! 764: if (q[j] >= maxsetvec) {
! 765: maxsetvec *= 4;
! 766: setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
! 767: tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int));
! 768: if (setvec == 0 || tmpset == 0)
! 769: overflo("cgoto overflow");
! 770: }
! 771: if (setvec[q[j]] == 0) {
! 772: setcnt++;
! 773: setvec[q[j]] = 1;
! 774: }
! 775: }
! 776: }
! 777: }
! 778: }
! 779: /* determine if setvec is a previous state */
! 780: tmpset[0] = setcnt;
! 781: j = 1;
! 782: for (i = f->accept; i >= 0; i--)
! 783: if (setvec[i]) {
! 784: tmpset[j++] = i;
! 785: }
! 786: /* tmpset == previous state? */
! 787: for (i = 1; i <= f->curstat; i++) {
! 788: p = f->posns[i];
! 789: if ((k = tmpset[0]) != p[0])
! 790: goto different;
! 791: for (j = 1; j <= k; j++)
! 792: if (tmpset[j] != p[j])
! 793: goto different;
! 794: /* setvec is state i */
! 795: f->gototab[s][c] = i;
! 796: return i;
! 797: different:;
! 798: }
! 799:
! 800: /* add tmpset to current set of states */
! 801: if (f->curstat >= NSTATES-1) {
! 802: f->curstat = 2;
! 803: f->reset = 1;
! 804: for (i = 2; i < NSTATES; i++)
! 805: xfree(f->posns[i]);
! 806: } else
! 807: ++(f->curstat);
! 808: for (i = 0; i < NCHARS; i++)
! 809: f->gototab[f->curstat][i] = 0;
! 810: xfree(f->posns[f->curstat]);
! 811: if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
! 812: overflo("out of space in cgoto");
! 813:
! 814: f->posns[f->curstat] = p;
! 815: f->gototab[s][c] = f->curstat;
! 816: for (i = 0; i <= setcnt; i++)
! 817: p[i] = tmpset[i];
! 818: if (setvec[f->accept])
! 819: f->out[f->curstat] = 1;
! 820: else
! 821: f->out[f->curstat] = 0;
! 822: return f->curstat;
! 823: }
! 824:
! 825:
! 826: void freefa(fa *f) /* free a finite automaton */
! 827: {
! 828: int i;
! 829:
! 830: if (f == NULL)
! 831: return;
! 832: for (i = 0; i <= f->curstat; i++)
! 833: xfree(f->posns[i]);
! 834: for (i = 0; i <= f->accept; i++) {
! 835: xfree(f->re[i].lfollow);
! 836: if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
! 837: xfree((f->re[i].lval.np));
! 838: }
! 839: xfree(f->restr);
! 840: xfree(f);
! 841: }