=================================================================== RCS file: /cvsrepo/anoncvs/cvs/src/usr.bin/awk/b.c,v retrieving revision 1.26 retrieving revision 1.27 diff -u -r1.26 -r1.27 --- src/usr.bin/awk/b.c 2020/06/10 21:02:53 1.26 +++ src/usr.bin/awk/b.c 2020/06/10 21:03:12 1.27 @@ -1,4 +1,4 @@ -/* $OpenBSD: b.c,v 1.26 2020/06/10 21:02:53 millert Exp $ */ +/* $OpenBSD: b.c,v 1.27 2020/06/10 21:03:12 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -77,10 +77,78 @@ char *patbeg; int patlen; -#define NFA 20 /* cache this many dynamic fa's */ +#define NFA 128 /* cache this many dynamic fa's */ fa *fatab[NFA]; int nfatab = 0; /* entries in fatab */ +static int * +intalloc(size_t n, const char *f) +{ + void *p = calloc(n, sizeof(int)); + if (p == NULL) + overflo(f); + return p; +} + +static void +allocsetvec(const char *f) +{ + maxsetvec = MAXLIN; + setvec = reallocarray(setvec, maxsetvec, sizeof(*setvec)); + tmpset = reallocarray(tmpset, maxsetvec, sizeof(*tmpset)); + if (setvec == NULL || tmpset == NULL) + overflo(f); +} + +static void +resizesetvec(const char *f) +{ + setvec = reallocarray(setvec, maxsetvec, 4 * sizeof(*setvec)); + tmpset = reallocarray(tmpset, maxsetvec, 4 * sizeof(*tmpset)); + if (setvec == NULL || tmpset == NULL) + overflo(f); + maxsetvec *= 4; +} + +static void +resize_state(fa *f, int state) +{ + void *p; + int i, new_count; + + if (++state < f->state_count) + return; + + new_count = state + 10; /* needs to be tuned */ + + p = reallocarray(f->gototab, new_count, sizeof(f->gototab[0])); + if (p == NULL) + goto out; + f->gototab = p; + + p = reallocarray(f->out, new_count, sizeof(f->out[0])); + if (p == NULL) + goto out; + f->out = p; + + p = reallocarray(f->posns, new_count, sizeof(f->posns[0])); + if (p == NULL) + goto out; + f->posns = p; + + for (i = f->state_count; i < new_count; ++i) { + f->gototab[i] = calloc(NCHARS, sizeof(**f->gototab)); + if (f->gototab[i] == NULL) + goto out; + f->out[i] = 0; + f->posns[i] = NULL; + } + f->state_count = new_count; + return; +out: + overflo(__func__); +} + fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */ { int i, use, nuse; @@ -88,11 +156,7 @@ static int now = 1; if (setvec == NULL) { /* first time through any RE */ - maxsetvec = MAXLIN; - setvec = (int *) calloc(maxsetvec, sizeof(int)); - tmpset = (int *) calloc(maxsetvec, sizeof(int)); - if (setvec == NULL || tmpset == NULL) - overflo("out of space initializing makedfa"); + allocsetvec(__func__); } if (compile_time) /* a constant for sure */ @@ -140,14 +204,13 @@ poscnt = 0; penter(p1); /* enter parent pointers and leaf indices */ if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL) - overflo("out of space for fa"); + overflo(__func__); f->accept = poscnt-1; /* penter has computed number of positions in re */ cfoll(f, p1); /* set up follow sets */ freetr(p1); - if ((f->posns[0] = (int *) calloc(*(f->re[0].lfollow), sizeof(int))) == NULL) - overflo("out of space in makedfa"); - if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL) - overflo("out of space in makedfa"); + resize_state(f, 1); + f->posns[0] = intalloc(*(f->re[0].lfollow), __func__); + f->posns[1] = intalloc(1, __func__); *f->posns[1] = 0; f->initstat = makeinit(f, anchor); f->anchor = anchor; @@ -165,11 +228,9 @@ f->curstat = 2; f->out[2] = 0; - f->reset = 0; k = *(f->re[0].lfollow); xfree(f->posns[2]); - if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL) - overflo("out of space in makeinit"); + f->posns[2] = intalloc(k + 1, __func__); for (i=0; i <= k; i++) { (f->posns[2])[i] = (f->re[0].lfollow)[i]; } @@ -309,7 +370,7 @@ static int bufsz = 100; op = p; - if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL) + if (buf == NULL && (buf = malloc(bufsz)) == NULL) FATAL("out of space for character class [%.10s...] 1", p); bp = buf; for (i = 0; (c = *p++) != 0; ) { @@ -348,7 +409,7 @@ void overflo(const char *s) { - FATAL("regular expression too big: %.30s...", s); + FATAL("regular expression too big: out of space in %.30s...", s); } void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */ @@ -362,20 +423,13 @@ f->re[info(v)].ltype = type(v); f->re[info(v)].lval.np = right(v); while (f->accept >= maxsetvec) { /* guessing here! */ - setvec = reallocarray(setvec, maxsetvec, - 4 * sizeof(int)); - tmpset = reallocarray(tmpset, maxsetvec, - 4 * sizeof(int)); - if (setvec == NULL || tmpset == NULL) - overflo("out of space in cfoll()"); - maxsetvec *= 4; + resizesetvec(__func__); } for (i = 0; i <= f->accept; i++) setvec[i] = 0; setcnt = 0; follow(v); /* computes setvec and setcnt */ - if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL) - overflo("out of space building follow set"); + p = intalloc(setcnt + 1, __func__); f->re[info(v)].lfollow = p; *p = setcnt; for (i = f->accept; i >= 0; i--) @@ -405,13 +459,7 @@ LEAF lp = info(p); /* look for high-water mark of subscripts */ while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */ - setvec = reallocarray(setvec, maxsetvec, - 4 * sizeof(int)); - tmpset = reallocarray(tmpset, maxsetvec, - 4 * sizeof(int)); - if (setvec == NULL || tmpset == NULL) - overflo("out of space in first()"); - maxsetvec *= 4; + resizesetvec(__func__); } if (type(p) == EMPTYRE) { setvec[lp] = 0; @@ -489,7 +537,9 @@ int s, ns; uschar *p = (uschar *) p0; - s = f->reset ? makeinit(f,0) : f->initstat; + s = f->initstat; + assert (s < f->state_count); + if (f->out[s]) return(1); do { @@ -509,15 +559,11 @@ int s, ns; uschar *p = (uschar *) p0; uschar *q; - int i, k; - /* s = f->reset ? makeinit(f,1) : f->initstat; */ - if (f->reset) { - f->initstat = s = makeinit(f,1); - } else { - s = f->initstat; - } - patbeg = (char *) p; + s = f->initstat; + assert(s < f->state_count); + + patbeg = (char *)p; patlen = -1; do { q = p; @@ -529,6 +575,9 @@ s = ns; else s = cgoto(f, s, *q); + + assert(s < f->state_count); + if (s == 1) { /* no transition */ if (patlen >= 0) { patbeg = (char *) p; @@ -546,20 +595,7 @@ } nextin: s = 2; - if (f->reset) { - for (i = 2; i <= f->curstat; i++) - xfree(f->posns[i]); - k = *f->posns[0]; - if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL) - overflo("out of space in pmatch"); - for (i = 0; i <= k; i++) - (f->posns[2])[i] = (f->posns[0])[i]; - f->initstat = f->curstat = 2; - f->out[2] = f->out[0]; - for (i = 0; i < NCHARS; i++) - f->gototab[2][i] = 0; - } - } while (*p++ != 0); + } while (*p++); return (0); } @@ -568,14 +604,11 @@ int s, ns; uschar *p = (uschar *) p0; uschar *q; - int i, k; - /* s = f->reset ? makeinit(f,1) : f->initstat; */ - if (f->reset) { - f->initstat = s = makeinit(f,1); - } else { - s = f->initstat; - } + s = f->initstat; + assert(s < f->state_count); + + patbeg = (char *)p; patlen = -1; while (*p) { q = p; @@ -603,19 +636,6 @@ } nnextin: s = 2; - if (f->reset) { - for (i = 2; i <= f->curstat; i++) - xfree(f->posns[i]); - k = *f->posns[0]; - if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL) - overflo("out of state space"); - for (i = 0; i <= k; i++) - (f->posns[2])[i] = (f->posns[0])[i]; - f->initstat = f->curstat = 2; - f->out[2] = f->out[0]; - for (i = 0; i < NCHARS; i++) - f->gototab[2][i] = 0; - } p++; } return (0); @@ -913,7 +933,7 @@ } else if (special_case == REPEAT_ZERO) { size += 2; /* just a null ERE: () */ } - if ((buf = (uschar *) malloc(size+1)) == NULL) + if ((buf = malloc(size + 1)) == NULL) FATAL("out of space in reg expr %.10s..", lastre); memcpy(buf, basestr, prefix_length); /* copy prefix */ j = prefix_length; @@ -1039,7 +1059,7 @@ rlxval = c; return CHAR; case '[': - if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL) + if (buf == NULL && (buf = malloc(bufsz)) == NULL) FATAL("out of space in reg expr %.10s..", lastre); bp = buf; if (*prestr == '^') { @@ -1207,20 +1227,17 @@ int cgoto(fa *f, int s, int c) { - int i, j, k; int *p, *q; + int i, j, k; assert(c == HAT || c < NCHARS); while (f->accept >= maxsetvec) { /* guessing here! */ - setvec = reallocarray(setvec, maxsetvec, 4 * sizeof(int)); - tmpset = reallocarray(tmpset, maxsetvec, 4 * sizeof(int)); - if (setvec == NULL || tmpset == NULL) - overflo("out of space in cgoto()"); - maxsetvec *= 4; + resizesetvec(__func__); } for (i = 0; i <= f->accept; i++) setvec[i] = 0; setcnt = 0; + resize_state(f, s); /* compute positions of gototab[s,c] into setvec */ p = f->posns[s]; for (i = 1; i <= *p; i++) { @@ -1234,13 +1251,7 @@ q = f->re[p[i]].lfollow; for (j = 1; j <= *q; j++) { if (q[j] >= maxsetvec) { - setvec = reallocarray(setvec, - maxsetvec, 4 * sizeof(int)); - tmpset = reallocarray(tmpset, - maxsetvec, 4 * sizeof(int)); - if (setvec == 0 || tmpset == 0) - overflo("cgoto overflow"); - maxsetvec *= 4; + resizesetvec(__func__); } if (setvec[q[j]] == 0) { setcnt++; @@ -1257,6 +1268,7 @@ if (setvec[i]) { tmpset[j++] = i; } + resize_state(f, f->curstat > s ? f->curstat : s); /* tmpset == previous state? */ for (i = 1; i <= f->curstat; i++) { p = f->posns[i]; @@ -1272,18 +1284,12 @@ } /* add tmpset to current set of states */ - if (f->curstat >= NSTATES-1) { - f->curstat = 2; - f->reset = 1; - for (i = 2; i < NSTATES; i++) - xfree(f->posns[i]); - } else - ++(f->curstat); + ++(f->curstat); + resize_state(f, f->curstat); for (i = 0; i < NCHARS; i++) f->gototab[f->curstat][i] = 0; xfree(f->posns[f->curstat]); - if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL) - overflo("out of space in cgoto"); + p = intalloc(setcnt + 1, __func__); f->posns[f->curstat] = p; f->gototab[s][c] = f->curstat; @@ -1303,6 +1309,8 @@ if (f == NULL) return; + for (i = 0; i < f->state_count; i++) + xfree(f->gototab[i]) for (i = 0; i <= f->curstat; i++) xfree(f->posns[i]); for (i = 0; i <= f->accept; i++) { @@ -1311,5 +1319,8 @@ xfree((f->re[i].lval.np)); } xfree(f->restr); + xfree(f->out); + xfree(f->posns); + xfree(f->gototab); xfree(f); }