version 1.9, 2001/07/12 05:16:53 |
version 1.10, 2001/09/08 00:12:40 |
|
|
#include "awk.h" |
#include "awk.h" |
#include "ytab.h" |
#include "ytab.h" |
|
|
#define HAT (NCHARS-1) /* matches ^ in regular expr */ |
#define HAT (NCHARS-2) /* matches ^ in regular expr */ |
/* NCHARS is 2**n */ |
/* NCHARS is 2**n */ |
#define MAXLIN 22 |
#define MAXLIN 22 |
|
|
|
|
|
|
int rtok; /* next token in current re */ |
int rtok; /* next token in current re */ |
int rlxval; |
int rlxval; |
char *rlxstr; |
static uschar *rlxstr; |
char *prestr; /* current position in current re */ |
static uschar *prestr; /* current position in current re */ |
char *lastre; /* origin of last re */ |
static uschar *lastre; /* origin of last re */ |
|
|
static int setcnt; |
static int setcnt; |
static int poscnt; |
static int poscnt; |
|
|
&& strcmp(fatab[i]->restr, s) == 0) { |
&& strcmp(fatab[i]->restr, s) == 0) { |
fatab[i]->use = now++; |
fatab[i]->use = now++; |
return fatab[i]; |
return fatab[i]; |
} |
} |
pfa = mkdfa(s, anchor); |
pfa = mkdfa(s, anchor); |
if (nfatab < NFA) { /* room for another */ |
if (nfatab < NFA) { /* room for another */ |
fatab[nfatab] = pfa; |
fatab[nfatab] = pfa; |
|
|
*f->posns[1] = 0; |
*f->posns[1] = 0; |
f->initstat = makeinit(f, anchor); |
f->initstat = makeinit(f, anchor); |
f->anchor = anchor; |
f->anchor = anchor; |
f->restr = tostring(s); |
f->restr = (uschar *) tostring(s); |
return f; |
return f; |
} |
} |
|
|
|
|
|
|
int hexstr(char **pp) /* find and eval hex string at pp, return new p */ |
int hexstr(char **pp) /* find and eval hex string at pp, return new p */ |
{ /* only pick up one 8-bit byte (2 chars) */ |
{ /* only pick up one 8-bit byte (2 chars) */ |
char *p; |
uschar *p; |
int n = 0; |
int n = 0; |
int i; |
int i; |
|
|
for (i = 0, p = *pp; i < 2 && isxdigit(*p); i++, p++) { |
for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) { |
if (isdigit(*p)) |
if (isdigit(*p)) |
n = 16 * n + *p - '0'; |
n = 16 * n + *p - '0'; |
else if (*p >= 'a' && *p <= 'f') |
else if (*p >= 'a' && *p <= 'f') |
|
|
else if (*p >= 'A' && *p <= 'F') |
else if (*p >= 'A' && *p <= 'F') |
n = 16 * n + *p - 'A' + 10; |
n = 16 * n + *p - 'A' + 10; |
} |
} |
*pp = p; |
*pp = (char *) p; |
return n; |
return n; |
} |
} |
|
|
|
|
return c; |
return c; |
} |
} |
|
|
char *cclenter(char *p) /* add a character class */ |
char *cclenter(char *argp) /* add a character class */ |
{ |
{ |
int i, c, c2; |
int i, c, c2; |
char *op, *bp; |
uschar *p = (uschar *) argp; |
static char *buf = 0; |
uschar *op, *bp; |
|
static uschar *buf = 0; |
static int bufsz = 100; |
static int bufsz = 100; |
|
|
op = p; |
op = p; |
if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL) |
if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL) |
FATAL("out of space for character class [%.10s...] 1", p); |
FATAL("out of space for character class [%.10s...] 1", p); |
bp = buf; |
bp = buf; |
for (i = 0; (c = *p++) != 0; ) { |
for (i = 0; (c = *p++) != 0; ) { |
if (c == '\\') { |
if (c == '\\') { |
c = quoted(&p); |
c = quoted((char **) &p); |
} else if (c == '-' && i > 0 && bp[-1] != 0) { |
} else if (c == '-' && i > 0 && bp[-1] != 0) { |
if (*p != 0) { |
if (*p != 0) { |
c = bp[-1]; |
c = bp[-1]; |
c2 = *p++; |
c2 = *p++; |
if (c2 == '\\') |
if (c2 == '\\') |
c2 = quoted(&p); |
c2 = quoted((char **) &p); |
if (c > c2) { /* empty; ignore */ |
if (c > c2) { /* empty; ignore */ |
bp--; |
bp--; |
i--; |
i--; |
continue; |
continue; |
} |
} |
while (c < c2) { |
while (c < c2) { |
if (!adjbuf(&buf, &bufsz, bp-buf+2, 100, &bp, 0)) |
if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0)) |
FATAL("out of space for character class [%.10s...] 2", p); |
FATAL("out of space for character class [%.10s...] 2", p); |
*bp++ = ++c; |
*bp++ = ++c; |
i++; |
i++; |
|
|
continue; |
continue; |
} |
} |
} |
} |
if (!adjbuf(&buf, &bufsz, bp-buf+2, 100, &bp, 0)) |
if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0)) |
FATAL("out of space for character class [%.10s...] 3", p); |
FATAL("out of space for character class [%.10s...] 3", p); |
*bp++ = c; |
*bp++ = c; |
i++; |
i++; |
|
|
*bp = 0; |
*bp = 0; |
dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) ); |
dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) ); |
xfree(op); |
xfree(op); |
return(tostring(buf)); |
return (char *) tostring((char *) buf); |
} |
} |
|
|
void overflo(char *s) |
void overflo(char *s) |
|
|
} |
} |
} |
} |
|
|
int member(int c, char *s) /* is c in s? */ |
int member(int c, char *sarg) /* is c in s? */ |
{ |
{ |
|
uschar *s = (uschar *) sarg; |
|
|
while (*s) |
while (*s) |
if (c == *s++) |
if (c == *s++) |
return(1); |
return(1); |
|
|
Node *np; |
Node *np; |
|
|
dprintf( ("reparse <%s>\n", p) ); |
dprintf( ("reparse <%s>\n", p) ); |
lastre = prestr = p; /* prestr points to string to be parsed */ |
lastre = prestr = (uschar *) p; /* prestr points to string to be parsed */ |
rtok = relex(); |
rtok = relex(); |
if (rtok == '\0') |
if (rtok == '\0') |
FATAL("empty regular expression"); |
FATAL("empty regular expression"); |
|
|
rtok = relex(); |
rtok = relex(); |
return (unary(op2(DOT, NIL, NIL))); |
return (unary(op2(DOT, NIL, NIL))); |
case CCL: |
case CCL: |
np = op2(CCL, NIL, (Node*) cclenter(rlxstr)); |
np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr)); |
rtok = relex(); |
rtok = relex(); |
return (unary(np)); |
return (unary(np)); |
case NCCL: |
case NCCL: |
np = op2(NCCL, NIL, (Node *) cclenter(rlxstr)); |
np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr)); |
rtok = relex(); |
rtok = relex(); |
return (unary(np)); |
return (unary(np)); |
case '^': |
case '^': |
|
|
{ |
{ |
int c, n; |
int c, n; |
int cflag; |
int cflag; |
static char *buf = 0; |
static uschar *buf = 0; |
static int bufsz = 100; |
static int bufsz = 100; |
char *bp; |
uschar *bp; |
|
|
switch (c = *prestr++) { |
switch (c = *prestr++) { |
case '|': return OR; |
case '|': return OR; |
|
|
case ')': |
case ')': |
return c; |
return c; |
case '\\': |
case '\\': |
rlxval = quoted(&prestr); |
rlxval = quoted((char **) &prestr); |
return CHAR; |
return CHAR; |
default: |
default: |
rlxval = c; |
rlxval = c; |
return CHAR; |
return CHAR; |
case '[': |
case '[': |
if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL) |
if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL) |
FATAL("out of space in reg expr %.10s..", lastre); |
FATAL("out of space in reg expr %.10s..", lastre); |
bp = buf; |
bp = buf; |
if (*prestr == '^') { |
if (*prestr == '^') { |
|
|
else |
else |
cflag = 0; |
cflag = 0; |
n = 2 * strlen(prestr)+1; |
n = 2 * strlen(prestr)+1; |
if (!adjbuf(&buf, &bufsz, n, n, &bp, 0)) |
if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, 0)) |
FATAL("out of space for reg expr %.10s...", lastre); |
FATAL("out of space for reg expr %.10s...", lastre); |
for (; ; ) { |
for (; ; ) { |
if ((c = *prestr++) == '\\') { |
if ((c = *prestr++) == '\\') { |
|
|
if ((c = *prestr++) == '\0') |
if ((c = *prestr++) == '\0') |
FATAL("nonterminated character class %.20s...", lastre); |
FATAL("nonterminated character class %.20s...", lastre); |
*bp++ = c; |
*bp++ = c; |
} else if (c == '\n') { |
/* } else if (c == '\n') { */ |
FATAL("newline in character class %.20s...", lastre); |
/* FATAL("newline in character class %.20s...", lastre); */ |
} else if (c == '\0') { |
} else if (c == '\0') { |
FATAL("nonterminated character class %.20s", lastre); |
FATAL("nonterminated character class %.20s", lastre); |
} else if (bp == buf) { /* 1st char is special */ |
} else if (bp == buf) { /* 1st char is special */ |
*bp++ = c; |
*bp++ = c; |
} else if (c == ']') { |
} else if (c == ']') { |
*bp++ = 0; |
*bp++ = 0; |
rlxstr = tostring(buf); |
rlxstr = (uschar *) tostring((char *) buf); |
if (cflag == 0) |
if (cflag == 0) |
return CCL; |
return CCL; |
else |
else |
|
|
int i, j, k; |
int i, j, k; |
int *p, *q; |
int *p, *q; |
|
|
if (c < 0) |
if (c < 0 || c > 255) |
FATAL("can't happen: neg char %d in cgoto", c); |
FATAL("can't happen: neg char %d in cgoto", c); |
while (f->accept >= maxsetvec) { /* guessing here! */ |
while (f->accept >= maxsetvec) { /* guessing here! */ |
maxsetvec *= 4; |
maxsetvec *= 4; |
|
|
if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np)) |
if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np)) |
|| (k == DOT && c != 0 && c != HAT) |
|| (k == DOT && c != 0 && c != HAT) |
|| (k == ALL && c != 0) |
|| (k == ALL && c != 0) |
|| (k == CCL && member(c, f->re[p[i]].lval.up)) |
|| (k == CCL && member(c, (char *) f->re[p[i]].lval.up)) |
|| (k == NCCL && !member(c, f->re[p[i]].lval.up) && c != 0 && c != HAT)) { |
|| (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) { |
q = f->re[p[i]].lfollow; |
q = f->re[p[i]].lfollow; |
for (j = 1; j <= *q; j++) { |
for (j = 1; j <= *q; j++) { |
if (q[j] >= maxsetvec) { |
if (q[j] >= maxsetvec) { |