version 1.48, 2023/11/22 01:01:21 |
version 1.49, 2023/11/25 16:31:33 |
|
|
mechanism of the goto table used 8-bit byte indices into the |
mechanism of the goto table used 8-bit byte indices into the |
gototab entries to compute the next state. Unicode is a lot |
gototab entries to compute the next state. Unicode is a lot |
bigger, so the gototab entries are now structs with a character |
bigger, so the gototab entries are now structs with a character |
and a next state, and there is a linear search of the characters |
and a next state. These are sorted by code point and binary |
to find the state. (Yes, this is slower, by a significant |
searched. |
amount. Tough.) |
|
|
|
Throughout the RE mechanism in b.c, utf-8 characters are |
Throughout the RE mechanism in b.c, utf-8 characters are |
converted to their utf-32 value. This mostly shows up in |
converted to their utf-32 value. This mostly shows up in |
|
|
|
|
*/ |
*/ |
|
|
|
static int entry_cmp(const void *l, const void *r); |
static int get_gototab(fa*, int, int); |
static int get_gototab(fa*, int, int); |
static int set_gototab(fa*, int, int, int); |
static int set_gototab(fa*, int, int, int); |
static void reset_gototab(fa*, int); |
static void clear_gototab(fa*, int); |
extern int u8_rune(int *, const uschar *); |
extern int u8_rune(int *, const uschar *); |
|
|
static int * |
static int * |
|
|
static void |
static void |
resize_state(fa *f, int state) |
resize_state(fa *f, int state) |
{ |
{ |
gtt **p; |
gtt *p; |
uschar *p2; |
uschar *p2; |
int **p3; |
int **p3; |
int i, new_count; |
int i, new_count; |
|
|
|
|
new_count = state + 10; /* needs to be tuned */ |
new_count = state + 10; /* needs to be tuned */ |
|
|
p = (gtt **) reallocarray(f->gototab, new_count, sizeof(f->gototab[0])); |
p = (gtt *) reallocarray(f->gototab, new_count, sizeof(gtt)); |
if (p == NULL) |
if (p == NULL) |
goto out; |
goto out; |
f->gototab = p; |
f->gototab = p; |
|
|
f->posns = p3; |
f->posns = p3; |
|
|
for (i = f->state_count; i < new_count; ++i) { |
for (i = f->state_count; i < new_count; ++i) { |
f->gototab[i] = (gtt *) calloc(NCHARS, sizeof(**f->gototab)); |
f->gototab[i].entries = (gtte *) calloc(NCHARS, sizeof(gtte)); |
if (f->gototab[i] == NULL) |
if (f->gototab[i].entries == NULL) |
goto out; |
goto out; |
f->out[i] = 0; |
f->gototab[i].allocated = NCHARS; |
|
f->gototab[i].inuse = 0; |
|
f->out[i] = 0; |
f->posns[i] = NULL; |
f->posns[i] = NULL; |
} |
} |
f->gototab_len = NCHARS; /* should be variable, growable */ |
|
f->state_count = new_count; |
f->state_count = new_count; |
return; |
return; |
out: |
out: |
|
|
} |
} |
if ((f->posns[2])[1] == f->accept) |
if ((f->posns[2])[1] == f->accept) |
f->out[2] = 1; |
f->out[2] = 1; |
reset_gototab(f, 2); |
clear_gototab(f, 2); |
f->curstat = cgoto(f, 2, HAT); |
f->curstat = cgoto(f, 2, HAT); |
if (anchor) { |
if (anchor) { |
*f->posns[2] = k-1; /* leave out position 0 */ |
*f->posns[2] = k-1; /* leave out position 0 */ |
|
|
return(0); |
return(0); |
} |
} |
|
|
|
static void resize_gototab(fa *f, int state) |
|
{ |
|
size_t new_size = f->gototab[state].allocated * 2; |
|
gtte *p = (gtte *) realloc(f->gototab[state].entries, new_size * sizeof(gtte)); |
|
if (p == NULL) |
|
overflo(__func__); |
|
|
|
// need to initialized the new memory to zero |
|
size_t orig_size = f->gototab[state].allocated; // 2nd half of new mem is this size |
|
memset(p + orig_size, 0, orig_size * sizeof(gtte)); // clean it out |
|
|
|
f->gototab[state].allocated = new_size; // update gotottab info |
|
f->gototab[state].entries = p; |
|
} |
|
|
static int get_gototab(fa *f, int state, int ch) /* hide gototab inplementation */ |
static int get_gototab(fa *f, int state, int ch) /* hide gototab inplementation */ |
{ |
{ |
int i; |
gtte key; |
for (i = 0; i < f->gototab_len; i++) { |
gtte *item; |
if (f->gototab[state][i].ch == 0) |
|
break; |
key.ch = ch; |
if (f->gototab[state][i].ch == ch) |
key.state = 0; /* irrelevant */ |
return f->gototab[state][i].state; |
item = bsearch(& key, f->gototab[state].entries, |
} |
f->gototab[state].inuse, sizeof(gtte), |
return 0; |
entry_cmp); |
|
|
|
if (item == NULL) |
|
return 0; |
|
else |
|
return item->state; |
} |
} |
|
|
static void reset_gototab(fa *f, int state) /* hide gototab inplementation */ |
static int entry_cmp(const void *l, const void *r) |
{ |
{ |
memset(f->gototab[state], 0, f->gototab_len * sizeof(**f->gototab)); |
const gtte *left, *right; |
|
|
|
left = (const gtte *) l; |
|
right = (const gtte *) r; |
|
|
|
return left->ch - right->ch; |
} |
} |
|
|
static int set_gototab(fa *f, int state, int ch, int val) /* hide gototab inplementation */ |
static int set_gototab(fa *f, int state, int ch, int val) /* hide gototab inplementation */ |
{ |
{ |
int i; |
if (f->gototab[state].inuse == 0) { |
for (i = 0; i < f->gototab_len; i++) { |
f->gototab[state].entries[0].ch = ch; |
if (f->gototab[state][i].ch == 0 || f->gototab[state][i].ch == ch) { |
f->gototab[state].entries[0].state = val; |
f->gototab[state][i].ch = ch; |
f->gototab[state].inuse++; |
f->gototab[state][i].state = val; |
return val; |
return val; |
} else if (ch > f->gototab[state].entries[f->gototab[state].inuse-1].ch) { |
|
// not seen yet, insert and return |
|
gtt *tab = & f->gototab[state]; |
|
if (tab->inuse + 1 >= tab->allocated) |
|
resize_gototab(f, state); |
|
|
|
f->gototab[state].entries[f->gototab[state].inuse-1].ch = ch; |
|
f->gototab[state].entries[f->gototab[state].inuse-1].state = val; |
|
f->gototab[state].inuse++; |
|
return val; |
|
} else { |
|
// maybe we have it, maybe we don't |
|
gtte key; |
|
gtte *item; |
|
|
|
key.ch = ch; |
|
key.state = 0; /* irrelevant */ |
|
item = bsearch(& key, f->gototab[state].entries, |
|
f->gototab[state].inuse, sizeof(gtte), |
|
entry_cmp); |
|
|
|
if (item != NULL) { |
|
// we have it, update state and return |
|
item->state = val; |
|
return item->state; |
} |
} |
|
// otherwise, fall through to insert and reallocate. |
} |
} |
overflo(__func__); |
|
|
gtt *tab = & f->gototab[state]; |
|
if (tab->inuse + 1 >= tab->allocated) |
|
resize_gototab(f, state); |
|
++tab->inuse; |
|
f->gototab[state].entries[tab->inuse].ch = ch; |
|
f->gototab[state].entries[tab->inuse].state = val; |
|
|
|
qsort(f->gototab[state].entries, |
|
f->gototab[state].inuse, sizeof(gtte), entry_cmp); |
|
|
return val; /* not used anywhere at the moment */ |
return val; /* not used anywhere at the moment */ |
} |
} |
|
|
|
static void clear_gototab(fa *f, int state) |
|
{ |
|
memset(f->gototab[state].entries, 0, |
|
f->gototab[state].allocated * sizeof(gtte)); |
|
f->gototab[state].inuse = 0; |
|
} |
|
|
int match(fa *f, const char *p0) /* shortest match ? */ |
int match(fa *f, const char *p0) /* shortest match ? */ |
{ |
{ |
int s, ns; |
int s, ns; |
|
|
/* add tmpset to current set of states */ |
/* add tmpset to current set of states */ |
++(f->curstat); |
++(f->curstat); |
resize_state(f, f->curstat); |
resize_state(f, f->curstat); |
|
clear_gototab(f, f->curstat); |
xfree(f->posns[f->curstat]); |
xfree(f->posns[f->curstat]); |
p = intalloc(setcnt + 1, __func__); |
p = intalloc(setcnt + 1, __func__); |
|
|
|
|
if (f == NULL) |
if (f == NULL) |
return; |
return; |
for (i = 0; i < f->state_count; i++) |
for (i = 0; i < f->state_count; i++) |
xfree(f->gototab[i]) |
xfree(f->gototab[i].entries); |
|
xfree(f->gototab); |
for (i = 0; i <= f->curstat; i++) |
for (i = 0; i <= f->curstat; i++) |
xfree(f->posns[i]); |
xfree(f->posns[i]); |
for (i = 0; i <= f->accept; i++) { |
for (i = 0; i <= f->accept; i++) { |