version 1.11, 2002/12/19 21:24:28 |
version 1.12, 2004/12/30 01:52:48 |
|
|
#include "awk.h" |
#include "awk.h" |
#include "ytab.h" |
#include "ytab.h" |
|
|
#define HAT (NCHARS-2) /* matches ^ in regular expr */ |
#define HAT (NCHARS+2) /* matches ^ in regular expr */ |
/* NCHARS is 2**n */ |
/* NCHARS is 2**n */ |
#define MAXLIN 22 |
#define MAXLIN 22 |
|
|
|
|
if (f->out[s]) |
if (f->out[s]) |
return(1); |
return(1); |
do { |
do { |
|
assert(*p < NCHARS); |
if ((ns = f->gototab[s][*p]) != 0) |
if ((ns = f->gototab[s][*p]) != 0) |
s = ns; |
s = ns; |
else |
else |
|
|
uschar *q; |
uschar *q; |
int i, k; |
int i, k; |
|
|
s = f->reset ? makeinit(f,1) : f->initstat; |
/* s = f->reset ? makeinit(f,1) : f->initstat; */ |
|
if (f->reset) { |
|
f->initstat = s = makeinit(f,1); |
|
} else { |
|
s = f->initstat; |
|
} |
patbeg = (char *) p; |
patbeg = (char *) p; |
patlen = -1; |
patlen = -1; |
do { |
do { |
|
|
do { |
do { |
if (f->out[s]) /* final state */ |
if (f->out[s]) /* final state */ |
patlen = q-p; |
patlen = q-p; |
|
assert(*q < NCHARS); |
if ((ns = f->gototab[s][*q]) != 0) |
if ((ns = f->gototab[s][*q]) != 0) |
s = ns; |
s = ns; |
else |
else |
|
|
uschar *q; |
uschar *q; |
int i, k; |
int i, k; |
|
|
s = f->reset ? makeinit(f,1) : f->initstat; |
/* s = f->reset ? makeinit(f,1) : f->initstat; */ |
|
if (f->reset) { |
|
f->initstat = s = makeinit(f,1); |
|
} else { |
|
s = f->initstat; |
|
} |
patlen = -1; |
patlen = -1; |
while (*p) { |
while (*p) { |
q = p; |
q = p; |
do { |
do { |
if (f->out[s]) /* final state */ |
if (f->out[s]) /* final state */ |
patlen = q-p; |
patlen = q-p; |
|
assert(*q < NCHARS); |
if ((ns = f->gototab[s][*q]) != 0) |
if ((ns = f->gototab[s][*q]) != 0) |
s = ns; |
s = ns; |
else |
else |
|
|
* relex(), the expanded character class (prior to range expansion) |
* relex(), the expanded character class (prior to range expansion) |
* must be less than twice the size of their full name. |
* must be less than twice the size of their full name. |
*/ |
*/ |
|
|
|
/* Because isblank doesn't show up in any of the header files on any |
|
* system i use, it's defined here. if some other locale has a richer |
|
* definition of "blank", define HAS_ISBLANK and provide your own |
|
* version. |
|
* the parentheses here are an attempt to find a path through the maze |
|
* of macro definition and/or function and/or version provided. thanks |
|
* to nelson beebe for the suggestion; let's see if it works everywhere. |
|
*/ |
|
|
|
#ifndef HAS_ISBLANK |
|
|
|
int (isblank)(int c) |
|
{ |
|
return c==' ' || c=='\t'; |
|
} |
|
|
|
#endif |
|
|
struct charclass { |
struct charclass { |
const char *cc_name; |
const char *cc_name; |
int cc_namelen; |
int cc_namelen; |
const char *cc_expand; |
int (*cc_func)(int); |
} charclasses[] = { |
} charclasses[] = { |
{ "alnum", 5, "0-9A-Za-z" }, |
{ "alnum", 5, isalnum }, |
{ "alpha", 5, "A-Za-z" }, |
{ "alpha", 5, isalpha }, |
{ "blank", 5, " \t" }, |
{ "blank", 5, isblank }, |
{ "cntrl", 5, "\000-\037\177" }, |
{ "cntrl", 5, iscntrl }, |
{ "digit", 5, "0-9" }, |
{ "digit", 5, isdigit }, |
{ "graph", 5, "\041-\176" }, |
{ "graph", 5, isgraph }, |
{ "lower", 5, "a-z" }, |
{ "lower", 5, islower }, |
{ "print", 5, " \041-\176" }, |
{ "print", 5, isprint }, |
{ "punct", 5, "\041-\057\072-\100\133-\140\173-\176" }, |
{ "punct", 5, ispunct }, |
{ "space", 5, " \f\n\r\t\v" }, |
{ "space", 5, isspace }, |
{ "upper", 5, "A-Z" }, |
{ "upper", 5, isupper }, |
{ "xdigit", 6, "0-9A-Fa-f" }, |
{ "xdigit", 6, isxdigit }, |
{ NULL, 0, NULL }, |
{ NULL, 0, NULL }, |
}; |
}; |
|
|
|
|
static int bufsz = 100; |
static int bufsz = 100; |
uschar *bp; |
uschar *bp; |
struct charclass *cc; |
struct charclass *cc; |
const uschar *p; |
int i; |
|
|
switch (c = *prestr++) { |
switch (c = *prestr++) { |
case '|': return OR; |
case '|': return OR; |
|
|
if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' && |
if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' && |
prestr[2 + cc->cc_namelen] == ']') { |
prestr[2 + cc->cc_namelen] == ']') { |
prestr += cc->cc_namelen + 3; |
prestr += cc->cc_namelen + 3; |
for (p = (const uschar *) cc->cc_expand; *p; p++) |
for (i = 0; i < NCHARS; i++) { |
*bp++ = *p; |
if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, 0)) |
|
FATAL("out of space for reg expr %.10s...", lastre); |
|
if (cc->cc_func(i)) { |
|
*bp++ = i; |
|
n++; |
|
} |
|
} |
} else |
} else |
*bp++ = c; |
*bp++ = c; |
} else if (c == '\0') { |
} else if (c == '\0') { |
|
|
int i, j, k; |
int i, j, k; |
int *p, *q; |
int *p, *q; |
|
|
if (c < 0 || c > 255) |
assert(c == HAT || c < NCHARS); |
FATAL("can't happen: neg char %d in cgoto", c); |
|
while (f->accept >= maxsetvec) { /* guessing here! */ |
while (f->accept >= maxsetvec) { /* guessing here! */ |
maxsetvec *= 4; |
maxsetvec *= 4; |
setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); |
setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); |