Annotation of src/usr.bin/lex/dfa.c, Revision 1.8
1.8 ! tedu 1: /* $OpenBSD: dfa.c,v 1.7 2015/11/19 19:43:40 tedu Exp $ */
1.2 deraadt 2:
1.1 deraadt 3: /* dfa - DFA construction routines */
4:
1.7 tedu 5: /* Copyright (c) 1990 The Regents of the University of California. */
6: /* All rights reserved. */
7:
8: /* This code is derived from software contributed to Berkeley by */
9: /* Vern Paxson. */
1.1 deraadt 10:
1.7 tedu 11: /* The United States Government has rights in this work pursuant */
12: /* to contract no. DE-AC03-76SF00098 between the United States */
13: /* Department of Energy and the University of California. */
14:
15: /* Redistribution and use in source and binary forms, with or without */
16: /* modification, are permitted provided that the following conditions */
17: /* are met: */
18:
19: /* 1. Redistributions of source code must retain the above copyright */
20: /* notice, this list of conditions and the following disclaimer. */
21: /* 2. Redistributions in binary form must reproduce the above copyright */
22: /* notice, this list of conditions and the following disclaimer in the */
23: /* documentation and/or other materials provided with the distribution. */
24:
25: /* Neither the name of the University nor the names of its contributors */
26: /* may be used to endorse or promote products derived from this software */
27: /* without specific prior written permission. */
28:
29: /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
30: /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
31: /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
32: /* PURPOSE. */
1.1 deraadt 33:
34: #include "flexdef.h"
1.7 tedu 35: #include "tables.h"
1.1 deraadt 36:
37: /* declare functions that have forward references */
38:
1.7 tedu 39: void dump_associated_rules PROTO ((FILE *, int));
40: void dump_transitions PROTO ((FILE *, int[]));
41: void sympartition PROTO ((int[], int, int[], int[]));
42: int symfollowset PROTO ((int[], int, int, int[]));
1.1 deraadt 43:
44:
45: /* check_for_backing_up - check a DFA state for backing up
46: *
47: * synopsis
48: * void check_for_backing_up( int ds, int state[numecs] );
49: *
50: * ds is the number of the state to check and state[] is its out-transitions,
51: * indexed by equivalence class.
52: */
53:
1.7 tedu 54: void check_for_backing_up (ds, state)
55: int ds;
56: int state[];
57: {
58: if ((reject && !dfaacc[ds].dfaacc_set) || (!reject && !dfaacc[ds].dfaacc_state)) { /* state is non-accepting */
1.1 deraadt 59: ++num_backing_up;
60:
1.7 tedu 61: if (backing_up_report) {
62: fprintf (backing_up_file,
63: _("State #%d is non-accepting -\n"), ds);
1.1 deraadt 64:
65: /* identify the state */
1.7 tedu 66: dump_associated_rules (backing_up_file, ds);
1.1 deraadt 67:
68: /* Now identify it further using the out- and
69: * jam-transitions.
70: */
1.7 tedu 71: dump_transitions (backing_up_file, state);
1.1 deraadt 72:
1.7 tedu 73: putc ('\n', backing_up_file);
1.1 deraadt 74: }
75: }
1.7 tedu 76: }
1.1 deraadt 77:
78:
79: /* check_trailing_context - check to see if NFA state set constitutes
80: * "dangerous" trailing context
81: *
82: * synopsis
83: * void check_trailing_context( int nfa_states[num_states+1], int num_states,
84: * int accset[nacc+1], int nacc );
85: *
86: * NOTES
87: * Trailing context is "dangerous" if both the head and the trailing
88: * part are of variable size \and/ there's a DFA state which contains
89: * both an accepting state for the head part of the rule and NFA states
90: * which occur after the beginning of the trailing context.
91: *
92: * When such a rule is matched, it's impossible to tell if having been
93: * in the DFA state indicates the beginning of the trailing context or
94: * further-along scanning of the pattern. In these cases, a warning
95: * message is issued.
96: *
97: * nfa_states[1 .. num_states] is the list of NFA states in the DFA.
98: * accset[1 .. nacc] is the list of accepting numbers for the DFA state.
99: */
100:
1.7 tedu 101: void check_trailing_context (nfa_states, num_states, accset, nacc)
102: int *nfa_states, num_states;
103: int *accset;
104: int nacc;
105: {
1.5 mpech 106: int i, j;
1.1 deraadt 107:
1.7 tedu 108: for (i = 1; i <= num_states; ++i) {
109: int ns = nfa_states[i];
1.5 mpech 110: int type = state_type[ns];
111: int ar = assoc_rule[ns];
1.1 deraadt 112:
1.7 tedu 113: if (type == STATE_NORMAL || rule_type[ar] != RULE_VARIABLE) { /* do nothing */
114: }
1.1 deraadt 115:
1.7 tedu 116: else if (type == STATE_TRAILING_CONTEXT) {
1.1 deraadt 117: /* Potential trouble. Scan set of accepting numbers
118: * for the one marking the end of the "head". We
119: * assume that this looping will be fairly cheap
120: * since it's rare that an accepting number set
121: * is large.
122: */
1.7 tedu 123: for (j = 1; j <= nacc; ++j)
124: if (accset[j] & YY_TRAILING_HEAD_MASK) {
125: line_warning (_
126: ("dangerous trailing context"),
127: rule_linenum[ar]);
1.1 deraadt 128: return;
1.7 tedu 129: }
1.1 deraadt 130: }
131: }
1.7 tedu 132: }
1.1 deraadt 133:
134:
135: /* dump_associated_rules - list the rules associated with a DFA state
136: *
137: * Goes through the set of NFA states associated with the DFA and
138: * extracts the first MAX_ASSOC_RULES unique rules, sorts them,
139: * and writes a report to the given file.
140: */
141:
1.7 tedu 142: void dump_associated_rules (file, ds)
143: FILE *file;
144: int ds;
145: {
1.5 mpech 146: int i, j;
147: int num_associated_rules = 0;
1.7 tedu 148: int rule_set[MAX_ASSOC_RULES + 1];
149: int *dset = dss[ds];
150: int size = dfasiz[ds];
1.1 deraadt 151:
1.7 tedu 152: for (i = 1; i <= size; ++i) {
1.5 mpech 153: int rule_num = rule_linenum[assoc_rule[dset[i]]];
1.1 deraadt 154:
1.7 tedu 155: for (j = 1; j <= num_associated_rules; ++j)
156: if (rule_num == rule_set[j])
1.1 deraadt 157: break;
158:
1.7 tedu 159: if (j > num_associated_rules) { /* new rule */
160: if (num_associated_rules < MAX_ASSOC_RULES)
161: rule_set[++num_associated_rules] =
162: rule_num;
1.1 deraadt 163: }
1.7 tedu 164: }
1.1 deraadt 165:
1.7 tedu 166: qsort (&rule_set [1], num_associated_rules, sizeof (rule_set [1]), intcmp);
1.1 deraadt 167:
1.7 tedu 168: fprintf (file, _(" associated rule line numbers:"));
1.1 deraadt 169:
1.7 tedu 170: for (i = 1; i <= num_associated_rules; ++i) {
171: if (i % 8 == 1)
172: putc ('\n', file);
1.1 deraadt 173:
1.7 tedu 174: fprintf (file, "\t%d", rule_set[i]);
175: }
1.1 deraadt 176:
1.7 tedu 177: putc ('\n', file);
178: }
1.1 deraadt 179:
180:
181: /* dump_transitions - list the transitions associated with a DFA state
182: *
183: * synopsis
184: * dump_transitions( FILE *file, int state[numecs] );
185: *
186: * Goes through the set of out-transitions and lists them in human-readable
187: * form (i.e., not as equivalence classes); also lists jam transitions
188: * (i.e., all those which are not out-transitions, plus EOF). The dump
189: * is done to the given file.
190: */
191:
1.7 tedu 192: void dump_transitions (file, state)
193: FILE *file;
194: int state[];
195: {
1.5 mpech 196: int i, ec;
1.7 tedu 197: int out_char_set[CSIZE];
1.1 deraadt 198:
1.7 tedu 199: for (i = 0; i < csize; ++i) {
200: ec = ABS (ecgroup[i]);
1.1 deraadt 201: out_char_set[i] = state[ec];
1.7 tedu 202: }
1.1 deraadt 203:
1.7 tedu 204: fprintf (file, _(" out-transitions: "));
1.1 deraadt 205:
1.7 tedu 206: list_character_set (file, out_char_set);
1.1 deraadt 207:
208: /* now invert the members of the set to get the jam transitions */
1.7 tedu 209: for (i = 0; i < csize; ++i)
210: out_char_set[i] = !out_char_set[i];
1.1 deraadt 211:
1.7 tedu 212: fprintf (file, _("\n jam-transitions: EOF "));
1.1 deraadt 213:
1.7 tedu 214: list_character_set (file, out_char_set);
1.1 deraadt 215:
1.7 tedu 216: putc ('\n', file);
217: }
1.1 deraadt 218:
219:
220: /* epsclosure - construct the epsilon closure of a set of ndfa states
221: *
222: * synopsis
223: * int *epsclosure( int t[num_states], int *numstates_addr,
224: * int accset[num_rules+1], int *nacc_addr,
225: * int *hashval_addr );
226: *
227: * NOTES
228: * The epsilon closure is the set of all states reachable by an arbitrary
229: * number of epsilon transitions, which themselves do not have epsilon
230: * transitions going out, unioned with the set of states which have non-null
231: * accepting numbers. t is an array of size numstates of nfa state numbers.
232: * Upon return, t holds the epsilon closure and *numstates_addr is updated.
233: * accset holds a list of the accepting numbers, and the size of accset is
234: * given by *nacc_addr. t may be subjected to reallocation if it is not
235: * large enough to hold the epsilon closure.
236: *
237: * hashval is the hash value for the dfa corresponding to the state set.
238: */
239:
1.7 tedu 240: int *epsclosure (t, ns_addr, accset, nacc_addr, hv_addr)
241: int *t, *ns_addr, accset[], *nacc_addr, *hv_addr;
242: {
1.5 mpech 243: int stkpos, ns, tsp;
1.7 tedu 244: int numstates = *ns_addr, nacc, hashval, transsym, nfaccnum;
245: int stkend, nstate;
246: static int did_stk_init = false, *stk;
1.1 deraadt 247:
248: #define MARK_STATE(state) \
1.7 tedu 249: do{ trans1[state] = trans1[state] - MARKER_DIFFERENCE;} while(0)
1.1 deraadt 250:
251: #define IS_MARKED(state) (trans1[state] < 0)
252:
253: #define UNMARK_STATE(state) \
1.7 tedu 254: do{ trans1[state] = trans1[state] + MARKER_DIFFERENCE;} while(0)
1.1 deraadt 255:
256: #define CHECK_ACCEPT(state) \
1.7 tedu 257: do{ \
1.1 deraadt 258: nfaccnum = accptnum[state]; \
259: if ( nfaccnum != NIL ) \
260: accset[++nacc] = nfaccnum; \
1.7 tedu 261: }while(0)
1.1 deraadt 262:
1.7 tedu 263: #define DO_REALLOCATION() \
264: do { \
1.1 deraadt 265: current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \
266: ++num_reallocs; \
267: t = reallocate_integer_array( t, current_max_dfa_size ); \
268: stk = reallocate_integer_array( stk, current_max_dfa_size ); \
1.7 tedu 269: }while(0) \
1.1 deraadt 270:
271: #define PUT_ON_STACK(state) \
1.7 tedu 272: do { \
1.1 deraadt 273: if ( ++stkend >= current_max_dfa_size ) \
1.7 tedu 274: DO_REALLOCATION(); \
1.1 deraadt 275: stk[stkend] = state; \
1.7 tedu 276: MARK_STATE(state); \
277: }while(0)
1.1 deraadt 278:
279: #define ADD_STATE(state) \
1.7 tedu 280: do { \
1.1 deraadt 281: if ( ++numstates >= current_max_dfa_size ) \
1.7 tedu 282: DO_REALLOCATION(); \
1.1 deraadt 283: t[numstates] = state; \
284: hashval += state; \
1.7 tedu 285: }while(0)
1.1 deraadt 286:
287: #define STACK_STATE(state) \
1.7 tedu 288: do { \
289: PUT_ON_STACK(state); \
290: CHECK_ACCEPT(state); \
1.1 deraadt 291: if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \
1.7 tedu 292: ADD_STATE(state); \
293: }while(0)
1.1 deraadt 294:
295:
1.7 tedu 296: if (!did_stk_init) {
297: stk = allocate_integer_array (current_max_dfa_size);
1.1 deraadt 298: did_stk_init = true;
1.7 tedu 299: }
1.1 deraadt 300:
301: nacc = stkend = hashval = 0;
302:
1.7 tedu 303: for (nstate = 1; nstate <= numstates; ++nstate) {
1.1 deraadt 304: ns = t[nstate];
305:
306: /* The state could be marked if we've already pushed it onto
307: * the stack.
308: */
1.7 tedu 309: if (!IS_MARKED (ns)) {
310: PUT_ON_STACK (ns);
311: CHECK_ACCEPT (ns);
1.1 deraadt 312: hashval += ns;
313: }
1.7 tedu 314: }
1.1 deraadt 315:
1.7 tedu 316: for (stkpos = 1; stkpos <= stkend; ++stkpos) {
1.1 deraadt 317: ns = stk[stkpos];
318: transsym = transchar[ns];
319:
1.7 tedu 320: if (transsym == SYM_EPSILON) {
1.1 deraadt 321: tsp = trans1[ns] + MARKER_DIFFERENCE;
322:
1.7 tedu 323: if (tsp != NO_TRANSITION) {
324: if (!IS_MARKED (tsp))
325: STACK_STATE (tsp);
1.1 deraadt 326:
327: tsp = trans2[ns];
328:
1.7 tedu 329: if (tsp != NO_TRANSITION
330: && !IS_MARKED (tsp))
331: STACK_STATE (tsp);
1.1 deraadt 332: }
333: }
1.7 tedu 334: }
1.1 deraadt 335:
336: /* Clear out "visit" markers. */
337:
1.7 tedu 338: for (stkpos = 1; stkpos <= stkend; ++stkpos) {
339: if (IS_MARKED (stk[stkpos]))
340: UNMARK_STATE (stk[stkpos]);
1.1 deraadt 341: else
1.7 tedu 342: flexfatal (_
343: ("consistency check failed in epsclosure()"));
344: }
1.1 deraadt 345:
346: *ns_addr = numstates;
347: *hv_addr = hashval;
348: *nacc_addr = nacc;
349:
350: return t;
1.7 tedu 351: }
1.1 deraadt 352:
353:
354: /* increase_max_dfas - increase the maximum number of DFAs */
355:
1.7 tedu 356: void increase_max_dfas ()
357: {
1.1 deraadt 358: current_max_dfas += MAX_DFAS_INCREMENT;
359:
360: ++num_reallocs;
361:
1.7 tedu 362: base = reallocate_integer_array (base, current_max_dfas);
363: def = reallocate_integer_array (def, current_max_dfas);
364: dfasiz = reallocate_integer_array (dfasiz, current_max_dfas);
365: accsiz = reallocate_integer_array (accsiz, current_max_dfas);
366: dhash = reallocate_integer_array (dhash, current_max_dfas);
367: dss = reallocate_int_ptr_array (dss, current_max_dfas);
368: dfaacc = reallocate_dfaacc_union (dfaacc, current_max_dfas);
1.1 deraadt 369:
1.7 tedu 370: if (nultrans)
1.1 deraadt 371: nultrans =
1.7 tedu 372: reallocate_integer_array (nultrans,
373: current_max_dfas);
374: }
1.1 deraadt 375:
376:
377: /* ntod - convert an ndfa to a dfa
378: *
379: * Creates the dfa corresponding to the ndfa we've constructed. The
380: * dfa starts out in state #1.
381: */
382:
1.7 tedu 383: void ntod ()
384: {
385: int *accset, ds, nacc, newds;
386: int sym, hashval, numstates, dsize;
387: int num_full_table_rows=0; /* used only for -f */
388: int *nset, *dset;
389: int targptr, totaltrans, i, comstate, comfreq, targ;
390: int symlist[CSIZE + 1];
391: int num_start_states;
392: int todo_head, todo_next;
393:
394: struct yytbl_data *yynxt_tbl = 0;
395: flex_int32_t *yynxt_data = 0, yynxt_curr = 0;
1.1 deraadt 396:
397: /* Note that the following are indexed by *equivalence classes*
398: * and not by characters. Since equivalence classes are indexed
399: * beginning with 1, even if the scanner accepts NUL's, this
400: * means that (since every character is potentially in its own
401: * equivalence class) these arrays must have room for indices
402: * from 1 to CSIZE, so their size must be CSIZE + 1.
403: */
1.7 tedu 404: int duplist[CSIZE + 1], state[CSIZE + 1];
405: int targfreq[CSIZE + 1], targstate[CSIZE + 1];
1.1 deraadt 406:
1.7 tedu 407: /* accset needs to be large enough to hold all of the rules present
408: * in the input, *plus* their YY_TRAILING_HEAD_MASK variants.
409: */
410: accset = allocate_integer_array ((num_rules + 1) * 2);
411: nset = allocate_integer_array (current_max_dfa_size);
1.1 deraadt 412:
413: /* The "todo" queue is represented by the head, which is the DFA
414: * state currently being processed, and the "next", which is the
415: * next DFA state number available (not in use). We depend on the
416: * fact that snstods() returns DFA's \in increasing order/, and thus
417: * need only know the bounds of the dfas to be processed.
418: */
419: todo_head = todo_next = 0;
420:
1.7 tedu 421: for (i = 0; i <= csize; ++i) {
1.1 deraadt 422: duplist[i] = NIL;
423: symlist[i] = false;
1.7 tedu 424: }
1.1 deraadt 425:
1.7 tedu 426: for (i = 0; i <= num_rules; ++i)
1.1 deraadt 427: accset[i] = NIL;
428:
1.7 tedu 429: if (trace) {
430: dumpnfa (scset[1]);
431: fputs (_("\n\nDFA Dump:\n\n"), stderr);
432: }
1.1 deraadt 433:
1.7 tedu 434: inittbl ();
1.1 deraadt 435:
436: /* Check to see whether we should build a separate table for
437: * transitions on NUL characters. We don't do this for full-speed
438: * (-F) scanners, since for them we don't have a simple state
439: * number lying around with which to index the table. We also
440: * don't bother doing it for scanners unless (1) NUL is in its own
441: * equivalence class (indicated by a positive value of
442: * ecgroup[NUL]), (2) NUL's equivalence class is the last
443: * equivalence class, and (3) the number of equivalence classes is
444: * the same as the number of characters. This latter case comes
445: * about when useecs is false or when it's true but every character
446: * still manages to land in its own class (unlikely, but it's
447: * cheap to check for). If all these things are true then the
448: * character code needed to represent NUL's equivalence class for
449: * indexing the tables is going to take one more bit than the
450: * number of characters, and therefore we won't be assured of
451: * being able to fit it into a YY_CHAR variable. This rules out
452: * storing the transitions in a compressed table, since the code
453: * for interpreting them uses a YY_CHAR variable (perhaps it
454: * should just use an integer, though; this is worth pondering ...
455: * ###).
456: *
457: * Finally, for full tables, we want the number of entries in the
458: * table to be a power of two so the array references go fast (it
459: * will just take a shift to compute the major index). If
460: * encoding NUL's transitions in the table will spoil this, we
461: * give it its own table (note that this will be the case if we're
462: * not using equivalence classes).
463: */
464:
465: /* Note that the test for ecgroup[0] == numecs below accomplishes
466: * both (1) and (2) above
467: */
1.7 tedu 468: if (!fullspd && ecgroup[0] == numecs) {
1.1 deraadt 469: /* NUL is alone in its equivalence class, which is the
470: * last one.
471: */
1.7 tedu 472: int use_NUL_table = (numecs == csize);
1.1 deraadt 473:
1.7 tedu 474: if (fulltbl && !use_NUL_table) {
1.1 deraadt 475: /* We still may want to use the table if numecs
476: * is a power of 2.
477: */
1.7 tedu 478: int power_of_two;
1.1 deraadt 479:
1.7 tedu 480: for (power_of_two = 1; power_of_two <= csize;
481: power_of_two *= 2)
482: if (numecs == power_of_two) {
1.1 deraadt 483: use_NUL_table = true;
484: break;
1.7 tedu 485: }
486: }
1.1 deraadt 487:
1.7 tedu 488: if (use_NUL_table)
489: nultrans =
490: allocate_integer_array (current_max_dfas);
1.1 deraadt 491:
492: /* From now on, nultrans != nil indicates that we're
493: * saving null transitions for later, separate encoding.
494: */
1.7 tedu 495: }
1.1 deraadt 496:
497:
1.7 tedu 498: if (fullspd) {
499: for (i = 0; i <= numecs; ++i)
1.1 deraadt 500: state[i] = 0;
501:
1.7 tedu 502: place_state (state, 0, 0);
1.1 deraadt 503: dfaacc[0].dfaacc_state = 0;
1.7 tedu 504: }
1.1 deraadt 505:
1.7 tedu 506: else if (fulltbl) {
507: if (nultrans)
1.1 deraadt 508: /* We won't be including NUL's transitions in the
509: * table, so build it for entries from 0 .. numecs - 1.
510: */
511: num_full_table_rows = numecs;
512:
513: else
514: /* Take into account the fact that we'll be including
515: * the NUL entries in the transition table. Build it
516: * from 0 .. numecs.
517: */
518: num_full_table_rows = numecs + 1;
519:
1.7 tedu 520: /* Begin generating yy_nxt[][]
521: * This spans the entire LONG function.
522: * This table is tricky because we don't know how big it will be.
523: * So we'll have to realloc() on the way...
524: * we'll wait until we can calculate yynxt_tbl->td_hilen.
525: */
526: yynxt_tbl =
527: (struct yytbl_data *) calloc (1,
528: sizeof (struct
529: yytbl_data));
530: yytbl_data_init (yynxt_tbl, YYTD_ID_NXT);
531: yynxt_tbl->td_hilen = 1;
532: yynxt_tbl->td_lolen = num_full_table_rows;
533: yynxt_tbl->td_data = yynxt_data =
534: (flex_int32_t *) calloc (yynxt_tbl->td_lolen *
535: yynxt_tbl->td_hilen,
536: sizeof (flex_int32_t));
537: yynxt_curr = 0;
538:
539: buf_prints (&yydmap_buf,
540: "\t{YYTD_ID_NXT, (void**)&yy_nxt, sizeof(%s)},\n",
541: long_align ? "flex_int32_t" : "flex_int16_t");
542:
1.1 deraadt 543: /* Unless -Ca, declare it "short" because it's a real
544: * long-shot that that won't be large enough.
545: */
1.7 tedu 546: if (gentables)
547: out_str_dec
548: ("static yyconst %s yy_nxt[][%d] =\n {\n",
549: long_align ? "flex_int32_t" : "flex_int16_t",
550: num_full_table_rows);
551: else {
552: out_dec ("#undef YY_NXT_LOLEN\n#define YY_NXT_LOLEN (%d)\n", num_full_table_rows);
553: out_str ("static yyconst %s *yy_nxt =0;\n",
554: long_align ? "flex_int32_t" : "flex_int16_t");
555: }
556:
1.1 deraadt 557:
1.7 tedu 558: if (gentables)
559: outn (" {");
1.1 deraadt 560:
561: /* Generate 0 entries for state #0. */
1.7 tedu 562: for (i = 0; i < num_full_table_rows; ++i) {
563: mk2data (0);
564: yynxt_data[yynxt_curr++] = 0;
565: }
1.1 deraadt 566:
1.7 tedu 567: dataflush ();
568: if (gentables)
569: outn (" },\n");
570: }
1.1 deraadt 571:
572: /* Create the first states. */
573:
574: num_start_states = lastsc * 2;
575:
1.7 tedu 576: for (i = 1; i <= num_start_states; ++i) {
1.1 deraadt 577: numstates = 1;
578:
579: /* For each start condition, make one state for the case when
580: * we're at the beginning of the line (the '^' operator) and
581: * one for the case when we're not.
582: */
1.7 tedu 583: if (i % 2 == 1)
1.1 deraadt 584: nset[numstates] = scset[(i / 2) + 1];
585: else
586: nset[numstates] =
1.7 tedu 587: mkbranch (scbol[i / 2], scset[i / 2]);
1.1 deraadt 588:
1.7 tedu 589: nset = epsclosure (nset, &numstates, accset, &nacc,
590: &hashval);
1.1 deraadt 591:
1.7 tedu 592: if (snstods (nset, numstates, accset, nacc, hashval, &ds)) {
1.1 deraadt 593: numas += nacc;
594: totnst += numstates;
595: ++todo_next;
596:
1.7 tedu 597: if (variable_trailing_context_rules && nacc > 0)
598: check_trailing_context (nset, numstates,
599: accset, nacc);
1.1 deraadt 600: }
1.7 tedu 601: }
1.1 deraadt 602:
1.7 tedu 603: if (!fullspd) {
604: if (!snstods (nset, 0, accset, 0, 0, &end_of_buffer_state))
605: flexfatal (_
606: ("could not create unique end-of-buffer state"));
1.1 deraadt 607:
608: ++numas;
609: ++num_start_states;
610: ++todo_next;
1.7 tedu 611: }
612:
1.1 deraadt 613:
1.7 tedu 614: while (todo_head < todo_next) {
1.1 deraadt 615: targptr = 0;
616: totaltrans = 0;
617:
1.7 tedu 618: for (i = 1; i <= numecs; ++i)
1.1 deraadt 619: state[i] = 0;
620:
621: ds = ++todo_head;
622:
623: dset = dss[ds];
624: dsize = dfasiz[ds];
625:
1.7 tedu 626: if (trace)
627: fprintf (stderr, _("state # %d:\n"), ds);
1.1 deraadt 628:
1.7 tedu 629: sympartition (dset, dsize, symlist, duplist);
1.1 deraadt 630:
1.7 tedu 631: for (sym = 1; sym <= numecs; ++sym) {
632: if (symlist[sym]) {
1.1 deraadt 633: symlist[sym] = 0;
634:
1.7 tedu 635: if (duplist[sym] == NIL) {
1.1 deraadt 636: /* Symbol has unique out-transitions. */
1.7 tedu 637: numstates =
638: symfollowset (dset, dsize,
639: sym, nset);
640: nset = epsclosure (nset,
641: &numstates,
642: accset, &nacc,
643: &hashval);
644:
645: if (snstods
646: (nset, numstates, accset, nacc,
647: hashval, &newds)) {
648: totnst = totnst +
649: numstates;
1.1 deraadt 650: ++todo_next;
651: numas += nacc;
652:
1.7 tedu 653: if (variable_trailing_context_rules && nacc > 0)
654: check_trailing_context
655: (nset,
656: numstates,
657: accset,
658: nacc);
659: }
1.1 deraadt 660:
661: state[sym] = newds;
662:
1.7 tedu 663: if (trace)
664: fprintf (stderr,
665: "\t%d\t%d\n", sym,
666: newds);
1.1 deraadt 667:
668: targfreq[++targptr] = 1;
669: targstate[targptr] = newds;
670: ++numuniq;
1.7 tedu 671: }
1.1 deraadt 672:
1.7 tedu 673: else {
1.1 deraadt 674: /* sym's equivalence class has the same
675: * transitions as duplist(sym)'s
676: * equivalence class.
677: */
678: targ = state[duplist[sym]];
679: state[sym] = targ;
680:
1.7 tedu 681: if (trace)
682: fprintf (stderr,
683: "\t%d\t%d\n", sym,
684: targ);
1.1 deraadt 685:
686: /* Update frequency count for
687: * destination state.
688: */
689:
690: i = 0;
1.7 tedu 691: while (targstate[++i] != targ) ;
1.1 deraadt 692:
693: ++targfreq[i];
694: ++numdup;
1.7 tedu 695: }
1.1 deraadt 696:
697: ++totaltrans;
698: duplist[sym] = NIL;
699: }
1.7 tedu 700: }
1.1 deraadt 701:
702:
703: numsnpairs += totaltrans;
704:
1.7 tedu 705: if (ds > num_start_states)
706: check_for_backing_up (ds, state);
1.1 deraadt 707:
1.7 tedu 708: if (nultrans) {
1.1 deraadt 709: nultrans[ds] = state[NUL_ec];
710: state[NUL_ec] = 0; /* remove transition */
1.7 tedu 711: }
712:
713: if (fulltbl) {
714:
715: /* Each time we hit here, it's another td_hilen, so we realloc. */
716: yynxt_tbl->td_hilen++;
717: yynxt_tbl->td_data = yynxt_data =
718: (flex_int32_t *) realloc (yynxt_data,
719: yynxt_tbl->td_hilen *
720: yynxt_tbl->td_lolen *
721: sizeof (flex_int32_t));
722:
1.1 deraadt 723:
1.7 tedu 724: if (gentables)
725: outn (" {");
1.1 deraadt 726:
727: /* Supply array's 0-element. */
1.7 tedu 728: if (ds == end_of_buffer_state) {
729: mk2data (-end_of_buffer_state);
730: yynxt_data[yynxt_curr++] =
731: -end_of_buffer_state;
732: }
733: else {
734: mk2data (end_of_buffer_state);
735: yynxt_data[yynxt_curr++] =
736: end_of_buffer_state;
737: }
1.1 deraadt 738:
1.7 tedu 739: for (i = 1; i < num_full_table_rows; ++i) {
1.1 deraadt 740: /* Jams are marked by negative of state
741: * number.
742: */
1.7 tedu 743: mk2data (state[i] ? state[i] : -ds);
744: yynxt_data[yynxt_curr++] =
745: state[i] ? state[i] : -ds;
746: }
1.1 deraadt 747:
1.7 tedu 748: dataflush ();
749: if (gentables)
750: outn (" },\n");
751: }
1.1 deraadt 752:
1.7 tedu 753: else if (fullspd)
754: place_state (state, ds, totaltrans);
1.1 deraadt 755:
1.7 tedu 756: else if (ds == end_of_buffer_state)
1.1 deraadt 757: /* Special case this state to make sure it does what
758: * it's supposed to, i.e., jam on end-of-buffer.
759: */
1.7 tedu 760: stack1 (ds, 0, 0, JAMSTATE);
761:
762: else { /* normal, compressed state */
1.1 deraadt 763:
764: /* Determine which destination state is the most
765: * common, and how many transitions to it there are.
766: */
767:
768: comfreq = 0;
769: comstate = 0;
770:
1.7 tedu 771: for (i = 1; i <= targptr; ++i)
772: if (targfreq[i] > comfreq) {
1.1 deraadt 773: comfreq = targfreq[i];
774: comstate = targstate[i];
1.7 tedu 775: }
1.1 deraadt 776:
1.7 tedu 777: bldtbl (state, ds, totaltrans, comstate, comfreq);
1.1 deraadt 778: }
1.7 tedu 779: }
1.1 deraadt 780:
1.7 tedu 781: if (fulltbl) {
782: dataend ();
783: if (tablesext) {
784: yytbl_data_compress (yynxt_tbl);
785: if (yytbl_data_fwrite (&tableswr, yynxt_tbl) < 0)
786: flexerror (_
787: ("Could not write yynxt_tbl[][]"));
788: }
789: if (yynxt_tbl) {
790: yytbl_data_destroy (yynxt_tbl);
791: yynxt_tbl = 0;
792: }
793: }
1.1 deraadt 794:
1.7 tedu 795: else if (!fullspd) {
796: cmptmps (); /* create compressed template entries */
1.1 deraadt 797:
798: /* Create tables for all the states with only one
799: * out-transition.
800: */
1.7 tedu 801: while (onesp > 0) {
802: mk1tbl (onestate[onesp], onesym[onesp],
803: onenext[onesp], onedef[onesp]);
1.1 deraadt 804: --onesp;
805: }
806:
1.7 tedu 807: mkdeftbl ();
1.1 deraadt 808: }
809:
1.8 ! tedu 810: free ((void *) accset);
! 811: free ((void *) nset);
1.7 tedu 812: }
813:
1.1 deraadt 814:
815: /* snstods - converts a set of ndfa states into a dfa state
816: *
817: * synopsis
818: * is_new_state = snstods( int sns[numstates], int numstates,
819: * int accset[num_rules+1], int nacc,
820: * int hashval, int *newds_addr );
821: *
822: * On return, the dfa state number is in newds.
823: */
824:
1.7 tedu 825: int snstods (sns, numstates, accset, nacc, hashval, newds_addr)
826: int sns[], numstates, accset[], nacc, hashval, *newds_addr;
827: {
828: int didsort = 0;
1.5 mpech 829: int i, j;
1.7 tedu 830: int newds, *oldsns;
1.1 deraadt 831:
1.7 tedu 832: for (i = 1; i <= lastdfa; ++i)
833: if (hashval == dhash[i]) {
834: if (numstates == dfasiz[i]) {
1.1 deraadt 835: oldsns = dss[i];
836:
1.7 tedu 837: if (!didsort) {
1.1 deraadt 838: /* We sort the states in sns so we
839: * can compare it to oldsns quickly.
840: */
1.7 tedu 841: qsort (&sns [1], numstates, sizeof (sns [1]), intcmp);
1.1 deraadt 842: didsort = 1;
1.7 tedu 843: }
1.1 deraadt 844:
1.7 tedu 845: for (j = 1; j <= numstates; ++j)
846: if (sns[j] != oldsns[j])
1.1 deraadt 847: break;
848:
1.7 tedu 849: if (j > numstates) {
1.1 deraadt 850: ++dfaeql;
851: *newds_addr = i;
852: return 0;
1.7 tedu 853: }
1.1 deraadt 854:
855: ++hshcol;
1.7 tedu 856: }
1.1 deraadt 857:
858: else
859: ++hshsave;
1.7 tedu 860: }
1.1 deraadt 861:
862: /* Make a new dfa. */
863:
1.7 tedu 864: if (++lastdfa >= current_max_dfas)
865: increase_max_dfas ();
1.1 deraadt 866:
867: newds = lastdfa;
868:
1.7 tedu 869: dss[newds] = allocate_integer_array (numstates + 1);
1.1 deraadt 870:
871: /* If we haven't already sorted the states in sns, we do so now,
872: * so that future comparisons with it can be made quickly.
873: */
874:
1.7 tedu 875: if (!didsort)
876: qsort (&sns [1], numstates, sizeof (sns [1]), intcmp);
1.1 deraadt 877:
1.7 tedu 878: for (i = 1; i <= numstates; ++i)
1.1 deraadt 879: dss[newds][i] = sns[i];
880:
881: dfasiz[newds] = numstates;
882: dhash[newds] = hashval;
883:
1.7 tedu 884: if (nacc == 0) {
885: if (reject)
1.1 deraadt 886: dfaacc[newds].dfaacc_set = (int *) 0;
887: else
888: dfaacc[newds].dfaacc_state = 0;
889:
890: accsiz[newds] = 0;
1.7 tedu 891: }
1.1 deraadt 892:
1.7 tedu 893: else if (reject) {
1.1 deraadt 894: /* We sort the accepting set in increasing order so the
895: * disambiguating rule that the first rule listed is considered
1.7 tedu 896: * match in the event of ties will work.
1.1 deraadt 897: */
898:
1.7 tedu 899: qsort (&accset [1], nacc, sizeof (accset [1]), intcmp);
1.1 deraadt 900:
1.7 tedu 901: dfaacc[newds].dfaacc_set =
902: allocate_integer_array (nacc + 1);
1.1 deraadt 903:
904: /* Save the accepting set for later */
1.7 tedu 905: for (i = 1; i <= nacc; ++i) {
1.1 deraadt 906: dfaacc[newds].dfaacc_set[i] = accset[i];
907:
1.7 tedu 908: if (accset[i] <= num_rules)
1.1 deraadt 909: /* Who knows, perhaps a REJECT can yield
910: * this rule.
911: */
912: rule_useful[accset[i]] = true;
1.7 tedu 913: }
1.1 deraadt 914:
915: accsiz[newds] = nacc;
1.7 tedu 916: }
1.1 deraadt 917:
1.7 tedu 918: else {
1.1 deraadt 919: /* Find lowest numbered rule so the disambiguating rule
920: * will work.
921: */
922: j = num_rules + 1;
923:
1.7 tedu 924: for (i = 1; i <= nacc; ++i)
925: if (accset[i] < j)
1.1 deraadt 926: j = accset[i];
927:
928: dfaacc[newds].dfaacc_state = j;
929:
1.7 tedu 930: if (j <= num_rules)
1.1 deraadt 931: rule_useful[j] = true;
1.7 tedu 932: }
1.1 deraadt 933:
934: *newds_addr = newds;
935:
936: return 1;
1.7 tedu 937: }
1.1 deraadt 938:
939:
940: /* symfollowset - follow the symbol transitions one step
941: *
942: * synopsis
943: * numstates = symfollowset( int ds[current_max_dfa_size], int dsize,
944: * int transsym, int nset[current_max_dfa_size] );
945: */
946:
1.7 tedu 947: int symfollowset (ds, dsize, transsym, nset)
948: int ds[], dsize, transsym, nset[];
949: {
950: int ns, tsp, sym, i, j, lenccl, ch, numstates, ccllist;
1.1 deraadt 951:
952: numstates = 0;
953:
1.7 tedu 954: for (i = 1; i <= dsize; ++i) { /* for each nfa state ns in the state set of ds */
1.1 deraadt 955: ns = ds[i];
956: sym = transchar[ns];
957: tsp = trans1[ns];
958:
1.7 tedu 959: if (sym < 0) { /* it's a character class */
1.1 deraadt 960: sym = -sym;
961: ccllist = cclmap[sym];
962: lenccl = ccllen[sym];
963:
1.7 tedu 964: if (cclng[sym]) {
965: for (j = 0; j < lenccl; ++j) {
1.1 deraadt 966: /* Loop through negated character
967: * class.
968: */
969: ch = ccltbl[ccllist + j];
970:
1.7 tedu 971: if (ch == 0)
1.1 deraadt 972: ch = NUL_ec;
973:
1.7 tedu 974: if (ch > transsym)
1.1 deraadt 975: /* Transsym isn't in negated
976: * ccl.
977: */
978: break;
979:
1.7 tedu 980: else if (ch == transsym)
981: /* next 2 */
982: goto bottom;
983: }
1.1 deraadt 984:
985: /* Didn't find transsym in ccl. */
986: nset[++numstates] = tsp;
1.7 tedu 987: }
1.1 deraadt 988:
989: else
1.7 tedu 990: for (j = 0; j < lenccl; ++j) {
1.1 deraadt 991: ch = ccltbl[ccllist + j];
992:
1.7 tedu 993: if (ch == 0)
1.1 deraadt 994: ch = NUL_ec;
995:
1.7 tedu 996: if (ch > transsym)
1.1 deraadt 997: break;
1.7 tedu 998: else if (ch == transsym) {
1.1 deraadt 999: nset[++numstates] = tsp;
1000: break;
1001: }
1.7 tedu 1002: }
1003: }
1.1 deraadt 1004:
1.7 tedu 1005: else if (sym == SYM_EPSILON) { /* do nothing */
1006: }
1.1 deraadt 1007:
1.7 tedu 1008: else if (ABS (ecgroup[sym]) == transsym)
1.1 deraadt 1009: nset[++numstates] = tsp;
1010:
1.7 tedu 1011: bottom:;
1012: }
1.1 deraadt 1013:
1014: return numstates;
1.7 tedu 1015: }
1.1 deraadt 1016:
1017:
1018: /* sympartition - partition characters with same out-transitions
1019: *
1020: * synopsis
1021: * sympartition( int ds[current_max_dfa_size], int numstates,
1022: * int symlist[numecs], int duplist[numecs] );
1023: */
1024:
1.7 tedu 1025: void sympartition (ds, numstates, symlist, duplist)
1026: int ds[], numstates;
1027: int symlist[], duplist[];
1028: {
1029: int tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich;
1.1 deraadt 1030:
1031: /* Partitioning is done by creating equivalence classes for those
1032: * characters which have out-transitions from the given state. Thus
1033: * we are really creating equivalence classes of equivalence classes.
1034: */
1035:
1.7 tedu 1036: for (i = 1; i <= numecs; ++i) { /* initialize equivalence class list */
1.1 deraadt 1037: duplist[i] = i - 1;
1038: dupfwd[i] = i + 1;
1.7 tedu 1039: }
1.1 deraadt 1040:
1041: duplist[1] = NIL;
1042: dupfwd[numecs] = NIL;
1043:
1.7 tedu 1044: for (i = 1; i <= numstates; ++i) {
1.1 deraadt 1045: ns = ds[i];
1046: tch = transchar[ns];
1047:
1.7 tedu 1048: if (tch != SYM_EPSILON) {
1049: if (tch < -lastccl || tch >= csize) {
1050: flexfatal (_
1051: ("bad transition character detected in sympartition()"));
1052: }
1.1 deraadt 1053:
1.7 tedu 1054: if (tch >= 0) { /* character transition */
1055: int ec = ecgroup[tch];
1.1 deraadt 1056:
1.7 tedu 1057: mkechar (ec, dupfwd, duplist);
1.1 deraadt 1058: symlist[ec] = 1;
1.7 tedu 1059: }
1.1 deraadt 1060:
1.7 tedu 1061: else { /* character class */
1.1 deraadt 1062: tch = -tch;
1063:
1064: lenccl = ccllen[tch];
1065: cclp = cclmap[tch];
1.7 tedu 1066: mkeccl (ccltbl + cclp, lenccl, dupfwd,
1067: duplist, numecs, NUL_ec);
1.1 deraadt 1068:
1.7 tedu 1069: if (cclng[tch]) {
1.1 deraadt 1070: j = 0;
1071:
1.7 tedu 1072: for (k = 0; k < lenccl; ++k) {
1.1 deraadt 1073: ich = ccltbl[cclp + k];
1074:
1.7 tedu 1075: if (ich == 0)
1.1 deraadt 1076: ich = NUL_ec;
1077:
1.7 tedu 1078: for (++j; j < ich; ++j)
1.1 deraadt 1079: symlist[j] = 1;
1.7 tedu 1080: }
1.1 deraadt 1081:
1.7 tedu 1082: for (++j; j <= numecs; ++j)
1.1 deraadt 1083: symlist[j] = 1;
1.7 tedu 1084: }
1.1 deraadt 1085:
1086: else
1.7 tedu 1087: for (k = 0; k < lenccl; ++k) {
1.1 deraadt 1088: ich = ccltbl[cclp + k];
1089:
1.7 tedu 1090: if (ich == 0)
1.1 deraadt 1091: ich = NUL_ec;
1092:
1093: symlist[ich] = 1;
1.7 tedu 1094: }
1.1 deraadt 1095: }
1096: }
1097: }
1.7 tedu 1098: }