Annotation of src/usr.bin/lex/dfa.c, Revision 1.7
1.7 ! tedu 1: /* $OpenBSD: dfa.c,v 1.6 2003/06/04 17:34:44 millert Exp $ */
1.2 deraadt 2:
1.1 deraadt 3: /* dfa - DFA construction routines */
4:
1.7 ! tedu 5: /* Copyright (c) 1990 The Regents of the University of California. */
! 6: /* All rights reserved. */
! 7:
! 8: /* This code is derived from software contributed to Berkeley by */
! 9: /* Vern Paxson. */
1.1 deraadt 10:
1.7 ! tedu 11: /* The United States Government has rights in this work pursuant */
! 12: /* to contract no. DE-AC03-76SF00098 between the United States */
! 13: /* Department of Energy and the University of California. */
! 14:
! 15: /* Redistribution and use in source and binary forms, with or without */
! 16: /* modification, are permitted provided that the following conditions */
! 17: /* are met: */
! 18:
! 19: /* 1. Redistributions of source code must retain the above copyright */
! 20: /* notice, this list of conditions and the following disclaimer. */
! 21: /* 2. Redistributions in binary form must reproduce the above copyright */
! 22: /* notice, this list of conditions and the following disclaimer in the */
! 23: /* documentation and/or other materials provided with the distribution. */
! 24:
! 25: /* Neither the name of the University nor the names of its contributors */
! 26: /* may be used to endorse or promote products derived from this software */
! 27: /* without specific prior written permission. */
! 28:
! 29: /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
! 30: /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
! 31: /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
! 32: /* PURPOSE. */
1.1 deraadt 33:
34: #include "flexdef.h"
1.7 ! tedu 35: #include "tables.h"
1.1 deraadt 36:
37: /* declare functions that have forward references */
38:
1.7 ! tedu 39: void dump_associated_rules PROTO ((FILE *, int));
! 40: void dump_transitions PROTO ((FILE *, int[]));
! 41: void sympartition PROTO ((int[], int, int[], int[]));
! 42: int symfollowset PROTO ((int[], int, int, int[]));
1.1 deraadt 43:
44:
45: /* check_for_backing_up - check a DFA state for backing up
46: *
47: * synopsis
48: * void check_for_backing_up( int ds, int state[numecs] );
49: *
50: * ds is the number of the state to check and state[] is its out-transitions,
51: * indexed by equivalence class.
52: */
53:
1.7 ! tedu 54: void check_for_backing_up (ds, state)
! 55: int ds;
! 56: int state[];
! 57: {
! 58: if ((reject && !dfaacc[ds].dfaacc_set) || (!reject && !dfaacc[ds].dfaacc_state)) { /* state is non-accepting */
1.1 deraadt 59: ++num_backing_up;
60:
1.7 ! tedu 61: if (backing_up_report) {
! 62: fprintf (backing_up_file,
! 63: _("State #%d is non-accepting -\n"), ds);
1.1 deraadt 64:
65: /* identify the state */
1.7 ! tedu 66: dump_associated_rules (backing_up_file, ds);
1.1 deraadt 67:
68: /* Now identify it further using the out- and
69: * jam-transitions.
70: */
1.7 ! tedu 71: dump_transitions (backing_up_file, state);
1.1 deraadt 72:
1.7 ! tedu 73: putc ('\n', backing_up_file);
1.1 deraadt 74: }
75: }
1.7 ! tedu 76: }
1.1 deraadt 77:
78:
79: /* check_trailing_context - check to see if NFA state set constitutes
80: * "dangerous" trailing context
81: *
82: * synopsis
83: * void check_trailing_context( int nfa_states[num_states+1], int num_states,
84: * int accset[nacc+1], int nacc );
85: *
86: * NOTES
87: * Trailing context is "dangerous" if both the head and the trailing
88: * part are of variable size \and/ there's a DFA state which contains
89: * both an accepting state for the head part of the rule and NFA states
90: * which occur after the beginning of the trailing context.
91: *
92: * When such a rule is matched, it's impossible to tell if having been
93: * in the DFA state indicates the beginning of the trailing context or
94: * further-along scanning of the pattern. In these cases, a warning
95: * message is issued.
96: *
97: * nfa_states[1 .. num_states] is the list of NFA states in the DFA.
98: * accset[1 .. nacc] is the list of accepting numbers for the DFA state.
99: */
100:
1.7 ! tedu 101: void check_trailing_context (nfa_states, num_states, accset, nacc)
! 102: int *nfa_states, num_states;
! 103: int *accset;
! 104: int nacc;
! 105: {
1.5 mpech 106: int i, j;
1.1 deraadt 107:
1.7 ! tedu 108: for (i = 1; i <= num_states; ++i) {
! 109: int ns = nfa_states[i];
1.5 mpech 110: int type = state_type[ns];
111: int ar = assoc_rule[ns];
1.1 deraadt 112:
1.7 ! tedu 113: if (type == STATE_NORMAL || rule_type[ar] != RULE_VARIABLE) { /* do nothing */
! 114: }
1.1 deraadt 115:
1.7 ! tedu 116: else if (type == STATE_TRAILING_CONTEXT) {
1.1 deraadt 117: /* Potential trouble. Scan set of accepting numbers
118: * for the one marking the end of the "head". We
119: * assume that this looping will be fairly cheap
120: * since it's rare that an accepting number set
121: * is large.
122: */
1.7 ! tedu 123: for (j = 1; j <= nacc; ++j)
! 124: if (accset[j] & YY_TRAILING_HEAD_MASK) {
! 125: line_warning (_
! 126: ("dangerous trailing context"),
! 127: rule_linenum[ar]);
1.1 deraadt 128: return;
1.7 ! tedu 129: }
1.1 deraadt 130: }
131: }
1.7 ! tedu 132: }
1.1 deraadt 133:
134:
135: /* dump_associated_rules - list the rules associated with a DFA state
136: *
137: * Goes through the set of NFA states associated with the DFA and
138: * extracts the first MAX_ASSOC_RULES unique rules, sorts them,
139: * and writes a report to the given file.
140: */
141:
1.7 ! tedu 142: void dump_associated_rules (file, ds)
! 143: FILE *file;
! 144: int ds;
! 145: {
1.5 mpech 146: int i, j;
147: int num_associated_rules = 0;
1.7 ! tedu 148: int rule_set[MAX_ASSOC_RULES + 1];
! 149: int *dset = dss[ds];
! 150: int size = dfasiz[ds];
1.1 deraadt 151:
1.7 ! tedu 152: for (i = 1; i <= size; ++i) {
1.5 mpech 153: int rule_num = rule_linenum[assoc_rule[dset[i]]];
1.1 deraadt 154:
1.7 ! tedu 155: for (j = 1; j <= num_associated_rules; ++j)
! 156: if (rule_num == rule_set[j])
1.1 deraadt 157: break;
158:
1.7 ! tedu 159: if (j > num_associated_rules) { /* new rule */
! 160: if (num_associated_rules < MAX_ASSOC_RULES)
! 161: rule_set[++num_associated_rules] =
! 162: rule_num;
1.1 deraadt 163: }
1.7 ! tedu 164: }
1.1 deraadt 165:
1.7 ! tedu 166: qsort (&rule_set [1], num_associated_rules, sizeof (rule_set [1]), intcmp);
1.1 deraadt 167:
1.7 ! tedu 168: fprintf (file, _(" associated rule line numbers:"));
1.1 deraadt 169:
1.7 ! tedu 170: for (i = 1; i <= num_associated_rules; ++i) {
! 171: if (i % 8 == 1)
! 172: putc ('\n', file);
1.1 deraadt 173:
1.7 ! tedu 174: fprintf (file, "\t%d", rule_set[i]);
! 175: }
1.1 deraadt 176:
1.7 ! tedu 177: putc ('\n', file);
! 178: }
1.1 deraadt 179:
180:
181: /* dump_transitions - list the transitions associated with a DFA state
182: *
183: * synopsis
184: * dump_transitions( FILE *file, int state[numecs] );
185: *
186: * Goes through the set of out-transitions and lists them in human-readable
187: * form (i.e., not as equivalence classes); also lists jam transitions
188: * (i.e., all those which are not out-transitions, plus EOF). The dump
189: * is done to the given file.
190: */
191:
1.7 ! tedu 192: void dump_transitions (file, state)
! 193: FILE *file;
! 194: int state[];
! 195: {
1.5 mpech 196: int i, ec;
1.7 ! tedu 197: int out_char_set[CSIZE];
1.1 deraadt 198:
1.7 ! tedu 199: for (i = 0; i < csize; ++i) {
! 200: ec = ABS (ecgroup[i]);
1.1 deraadt 201: out_char_set[i] = state[ec];
1.7 ! tedu 202: }
1.1 deraadt 203:
1.7 ! tedu 204: fprintf (file, _(" out-transitions: "));
1.1 deraadt 205:
1.7 ! tedu 206: list_character_set (file, out_char_set);
1.1 deraadt 207:
208: /* now invert the members of the set to get the jam transitions */
1.7 ! tedu 209: for (i = 0; i < csize; ++i)
! 210: out_char_set[i] = !out_char_set[i];
1.1 deraadt 211:
1.7 ! tedu 212: fprintf (file, _("\n jam-transitions: EOF "));
1.1 deraadt 213:
1.7 ! tedu 214: list_character_set (file, out_char_set);
1.1 deraadt 215:
1.7 ! tedu 216: putc ('\n', file);
! 217: }
1.1 deraadt 218:
219:
220: /* epsclosure - construct the epsilon closure of a set of ndfa states
221: *
222: * synopsis
223: * int *epsclosure( int t[num_states], int *numstates_addr,
224: * int accset[num_rules+1], int *nacc_addr,
225: * int *hashval_addr );
226: *
227: * NOTES
228: * The epsilon closure is the set of all states reachable by an arbitrary
229: * number of epsilon transitions, which themselves do not have epsilon
230: * transitions going out, unioned with the set of states which have non-null
231: * accepting numbers. t is an array of size numstates of nfa state numbers.
232: * Upon return, t holds the epsilon closure and *numstates_addr is updated.
233: * accset holds a list of the accepting numbers, and the size of accset is
234: * given by *nacc_addr. t may be subjected to reallocation if it is not
235: * large enough to hold the epsilon closure.
236: *
237: * hashval is the hash value for the dfa corresponding to the state set.
238: */
239:
1.7 ! tedu 240: int *epsclosure (t, ns_addr, accset, nacc_addr, hv_addr)
! 241: int *t, *ns_addr, accset[], *nacc_addr, *hv_addr;
! 242: {
1.5 mpech 243: int stkpos, ns, tsp;
1.7 ! tedu 244: int numstates = *ns_addr, nacc, hashval, transsym, nfaccnum;
! 245: int stkend, nstate;
! 246: static int did_stk_init = false, *stk;
1.1 deraadt 247:
248: #define MARK_STATE(state) \
1.7 ! tedu 249: do{ trans1[state] = trans1[state] - MARKER_DIFFERENCE;} while(0)
1.1 deraadt 250:
251: #define IS_MARKED(state) (trans1[state] < 0)
252:
253: #define UNMARK_STATE(state) \
1.7 ! tedu 254: do{ trans1[state] = trans1[state] + MARKER_DIFFERENCE;} while(0)
1.1 deraadt 255:
256: #define CHECK_ACCEPT(state) \
1.7 ! tedu 257: do{ \
1.1 deraadt 258: nfaccnum = accptnum[state]; \
259: if ( nfaccnum != NIL ) \
260: accset[++nacc] = nfaccnum; \
1.7 ! tedu 261: }while(0)
1.1 deraadt 262:
1.7 ! tedu 263: #define DO_REALLOCATION() \
! 264: do { \
1.1 deraadt 265: current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \
266: ++num_reallocs; \
267: t = reallocate_integer_array( t, current_max_dfa_size ); \
268: stk = reallocate_integer_array( stk, current_max_dfa_size ); \
1.7 ! tedu 269: }while(0) \
1.1 deraadt 270:
271: #define PUT_ON_STACK(state) \
1.7 ! tedu 272: do { \
1.1 deraadt 273: if ( ++stkend >= current_max_dfa_size ) \
1.7 ! tedu 274: DO_REALLOCATION(); \
1.1 deraadt 275: stk[stkend] = state; \
1.7 ! tedu 276: MARK_STATE(state); \
! 277: }while(0)
1.1 deraadt 278:
279: #define ADD_STATE(state) \
1.7 ! tedu 280: do { \
1.1 deraadt 281: if ( ++numstates >= current_max_dfa_size ) \
1.7 ! tedu 282: DO_REALLOCATION(); \
1.1 deraadt 283: t[numstates] = state; \
284: hashval += state; \
1.7 ! tedu 285: }while(0)
1.1 deraadt 286:
287: #define STACK_STATE(state) \
1.7 ! tedu 288: do { \
! 289: PUT_ON_STACK(state); \
! 290: CHECK_ACCEPT(state); \
1.1 deraadt 291: if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \
1.7 ! tedu 292: ADD_STATE(state); \
! 293: }while(0)
1.1 deraadt 294:
295:
1.7 ! tedu 296: if (!did_stk_init) {
! 297: stk = allocate_integer_array (current_max_dfa_size);
1.1 deraadt 298: did_stk_init = true;
1.7 ! tedu 299: }
1.1 deraadt 300:
301: nacc = stkend = hashval = 0;
302:
1.7 ! tedu 303: for (nstate = 1; nstate <= numstates; ++nstate) {
1.1 deraadt 304: ns = t[nstate];
305:
306: /* The state could be marked if we've already pushed it onto
307: * the stack.
308: */
1.7 ! tedu 309: if (!IS_MARKED (ns)) {
! 310: PUT_ON_STACK (ns);
! 311: CHECK_ACCEPT (ns);
1.1 deraadt 312: hashval += ns;
313: }
1.7 ! tedu 314: }
1.1 deraadt 315:
1.7 ! tedu 316: for (stkpos = 1; stkpos <= stkend; ++stkpos) {
1.1 deraadt 317: ns = stk[stkpos];
318: transsym = transchar[ns];
319:
1.7 ! tedu 320: if (transsym == SYM_EPSILON) {
1.1 deraadt 321: tsp = trans1[ns] + MARKER_DIFFERENCE;
322:
1.7 ! tedu 323: if (tsp != NO_TRANSITION) {
! 324: if (!IS_MARKED (tsp))
! 325: STACK_STATE (tsp);
1.1 deraadt 326:
327: tsp = trans2[ns];
328:
1.7 ! tedu 329: if (tsp != NO_TRANSITION
! 330: && !IS_MARKED (tsp))
! 331: STACK_STATE (tsp);
1.1 deraadt 332: }
333: }
1.7 ! tedu 334: }
1.1 deraadt 335:
336: /* Clear out "visit" markers. */
337:
1.7 ! tedu 338: for (stkpos = 1; stkpos <= stkend; ++stkpos) {
! 339: if (IS_MARKED (stk[stkpos]))
! 340: UNMARK_STATE (stk[stkpos]);
1.1 deraadt 341: else
1.7 ! tedu 342: flexfatal (_
! 343: ("consistency check failed in epsclosure()"));
! 344: }
1.1 deraadt 345:
346: *ns_addr = numstates;
347: *hv_addr = hashval;
348: *nacc_addr = nacc;
349:
350: return t;
1.7 ! tedu 351: }
1.1 deraadt 352:
353:
354: /* increase_max_dfas - increase the maximum number of DFAs */
355:
1.7 ! tedu 356: void increase_max_dfas ()
! 357: {
1.1 deraadt 358: current_max_dfas += MAX_DFAS_INCREMENT;
359:
360: ++num_reallocs;
361:
1.7 ! tedu 362: base = reallocate_integer_array (base, current_max_dfas);
! 363: def = reallocate_integer_array (def, current_max_dfas);
! 364: dfasiz = reallocate_integer_array (dfasiz, current_max_dfas);
! 365: accsiz = reallocate_integer_array (accsiz, current_max_dfas);
! 366: dhash = reallocate_integer_array (dhash, current_max_dfas);
! 367: dss = reallocate_int_ptr_array (dss, current_max_dfas);
! 368: dfaacc = reallocate_dfaacc_union (dfaacc, current_max_dfas);
1.1 deraadt 369:
1.7 ! tedu 370: if (nultrans)
1.1 deraadt 371: nultrans =
1.7 ! tedu 372: reallocate_integer_array (nultrans,
! 373: current_max_dfas);
! 374: }
1.1 deraadt 375:
376:
377: /* ntod - convert an ndfa to a dfa
378: *
379: * Creates the dfa corresponding to the ndfa we've constructed. The
380: * dfa starts out in state #1.
381: */
382:
1.7 ! tedu 383: void ntod ()
! 384: {
! 385: int *accset, ds, nacc, newds;
! 386: int sym, hashval, numstates, dsize;
! 387: int num_full_table_rows=0; /* used only for -f */
! 388: int *nset, *dset;
! 389: int targptr, totaltrans, i, comstate, comfreq, targ;
! 390: int symlist[CSIZE + 1];
! 391: int num_start_states;
! 392: int todo_head, todo_next;
! 393:
! 394: struct yytbl_data *yynxt_tbl = 0;
! 395: flex_int32_t *yynxt_data = 0, yynxt_curr = 0;
1.1 deraadt 396:
397: /* Note that the following are indexed by *equivalence classes*
398: * and not by characters. Since equivalence classes are indexed
399: * beginning with 1, even if the scanner accepts NUL's, this
400: * means that (since every character is potentially in its own
401: * equivalence class) these arrays must have room for indices
402: * from 1 to CSIZE, so their size must be CSIZE + 1.
403: */
1.7 ! tedu 404: int duplist[CSIZE + 1], state[CSIZE + 1];
! 405: int targfreq[CSIZE + 1], targstate[CSIZE + 1];
1.1 deraadt 406:
1.7 ! tedu 407: /* accset needs to be large enough to hold all of the rules present
! 408: * in the input, *plus* their YY_TRAILING_HEAD_MASK variants.
! 409: */
! 410: accset = allocate_integer_array ((num_rules + 1) * 2);
! 411: nset = allocate_integer_array (current_max_dfa_size);
1.1 deraadt 412:
413: /* The "todo" queue is represented by the head, which is the DFA
414: * state currently being processed, and the "next", which is the
415: * next DFA state number available (not in use). We depend on the
416: * fact that snstods() returns DFA's \in increasing order/, and thus
417: * need only know the bounds of the dfas to be processed.
418: */
419: todo_head = todo_next = 0;
420:
1.7 ! tedu 421: for (i = 0; i <= csize; ++i) {
1.1 deraadt 422: duplist[i] = NIL;
423: symlist[i] = false;
1.7 ! tedu 424: }
1.1 deraadt 425:
1.7 ! tedu 426: for (i = 0; i <= num_rules; ++i)
1.1 deraadt 427: accset[i] = NIL;
428:
1.7 ! tedu 429: if (trace) {
! 430: dumpnfa (scset[1]);
! 431: fputs (_("\n\nDFA Dump:\n\n"), stderr);
! 432: }
1.1 deraadt 433:
1.7 ! tedu 434: inittbl ();
1.1 deraadt 435:
436: /* Check to see whether we should build a separate table for
437: * transitions on NUL characters. We don't do this for full-speed
438: * (-F) scanners, since for them we don't have a simple state
439: * number lying around with which to index the table. We also
440: * don't bother doing it for scanners unless (1) NUL is in its own
441: * equivalence class (indicated by a positive value of
442: * ecgroup[NUL]), (2) NUL's equivalence class is the last
443: * equivalence class, and (3) the number of equivalence classes is
444: * the same as the number of characters. This latter case comes
445: * about when useecs is false or when it's true but every character
446: * still manages to land in its own class (unlikely, but it's
447: * cheap to check for). If all these things are true then the
448: * character code needed to represent NUL's equivalence class for
449: * indexing the tables is going to take one more bit than the
450: * number of characters, and therefore we won't be assured of
451: * being able to fit it into a YY_CHAR variable. This rules out
452: * storing the transitions in a compressed table, since the code
453: * for interpreting them uses a YY_CHAR variable (perhaps it
454: * should just use an integer, though; this is worth pondering ...
455: * ###).
456: *
457: * Finally, for full tables, we want the number of entries in the
458: * table to be a power of two so the array references go fast (it
459: * will just take a shift to compute the major index). If
460: * encoding NUL's transitions in the table will spoil this, we
461: * give it its own table (note that this will be the case if we're
462: * not using equivalence classes).
463: */
464:
465: /* Note that the test for ecgroup[0] == numecs below accomplishes
466: * both (1) and (2) above
467: */
1.7 ! tedu 468: if (!fullspd && ecgroup[0] == numecs) {
1.1 deraadt 469: /* NUL is alone in its equivalence class, which is the
470: * last one.
471: */
1.7 ! tedu 472: int use_NUL_table = (numecs == csize);
1.1 deraadt 473:
1.7 ! tedu 474: if (fulltbl && !use_NUL_table) {
1.1 deraadt 475: /* We still may want to use the table if numecs
476: * is a power of 2.
477: */
1.7 ! tedu 478: int power_of_two;
1.1 deraadt 479:
1.7 ! tedu 480: for (power_of_two = 1; power_of_two <= csize;
! 481: power_of_two *= 2)
! 482: if (numecs == power_of_two) {
1.1 deraadt 483: use_NUL_table = true;
484: break;
1.7 ! tedu 485: }
! 486: }
1.1 deraadt 487:
1.7 ! tedu 488: if (use_NUL_table)
! 489: nultrans =
! 490: allocate_integer_array (current_max_dfas);
1.1 deraadt 491:
492: /* From now on, nultrans != nil indicates that we're
493: * saving null transitions for later, separate encoding.
494: */
1.7 ! tedu 495: }
1.1 deraadt 496:
497:
1.7 ! tedu 498: if (fullspd) {
! 499: for (i = 0; i <= numecs; ++i)
1.1 deraadt 500: state[i] = 0;
501:
1.7 ! tedu 502: place_state (state, 0, 0);
1.1 deraadt 503: dfaacc[0].dfaacc_state = 0;
1.7 ! tedu 504: }
1.1 deraadt 505:
1.7 ! tedu 506: else if (fulltbl) {
! 507: if (nultrans)
1.1 deraadt 508: /* We won't be including NUL's transitions in the
509: * table, so build it for entries from 0 .. numecs - 1.
510: */
511: num_full_table_rows = numecs;
512:
513: else
514: /* Take into account the fact that we'll be including
515: * the NUL entries in the transition table. Build it
516: * from 0 .. numecs.
517: */
518: num_full_table_rows = numecs + 1;
519:
1.7 ! tedu 520: /* Begin generating yy_nxt[][]
! 521: * This spans the entire LONG function.
! 522: * This table is tricky because we don't know how big it will be.
! 523: * So we'll have to realloc() on the way...
! 524: * we'll wait until we can calculate yynxt_tbl->td_hilen.
! 525: */
! 526: yynxt_tbl =
! 527: (struct yytbl_data *) calloc (1,
! 528: sizeof (struct
! 529: yytbl_data));
! 530: yytbl_data_init (yynxt_tbl, YYTD_ID_NXT);
! 531: yynxt_tbl->td_hilen = 1;
! 532: yynxt_tbl->td_lolen = num_full_table_rows;
! 533: yynxt_tbl->td_data = yynxt_data =
! 534: (flex_int32_t *) calloc (yynxt_tbl->td_lolen *
! 535: yynxt_tbl->td_hilen,
! 536: sizeof (flex_int32_t));
! 537: yynxt_curr = 0;
! 538:
! 539: buf_prints (&yydmap_buf,
! 540: "\t{YYTD_ID_NXT, (void**)&yy_nxt, sizeof(%s)},\n",
! 541: long_align ? "flex_int32_t" : "flex_int16_t");
! 542:
1.1 deraadt 543: /* Unless -Ca, declare it "short" because it's a real
544: * long-shot that that won't be large enough.
545: */
1.7 ! tedu 546: if (gentables)
! 547: out_str_dec
! 548: ("static yyconst %s yy_nxt[][%d] =\n {\n",
! 549: long_align ? "flex_int32_t" : "flex_int16_t",
! 550: num_full_table_rows);
! 551: else {
! 552: out_dec ("#undef YY_NXT_LOLEN\n#define YY_NXT_LOLEN (%d)\n", num_full_table_rows);
! 553: out_str ("static yyconst %s *yy_nxt =0;\n",
! 554: long_align ? "flex_int32_t" : "flex_int16_t");
! 555: }
! 556:
1.1 deraadt 557:
1.7 ! tedu 558: if (gentables)
! 559: outn (" {");
1.1 deraadt 560:
561: /* Generate 0 entries for state #0. */
1.7 ! tedu 562: for (i = 0; i < num_full_table_rows; ++i) {
! 563: mk2data (0);
! 564: yynxt_data[yynxt_curr++] = 0;
! 565: }
1.1 deraadt 566:
1.7 ! tedu 567: dataflush ();
! 568: if (gentables)
! 569: outn (" },\n");
! 570: }
1.1 deraadt 571:
572: /* Create the first states. */
573:
574: num_start_states = lastsc * 2;
575:
1.7 ! tedu 576: for (i = 1; i <= num_start_states; ++i) {
1.1 deraadt 577: numstates = 1;
578:
579: /* For each start condition, make one state for the case when
580: * we're at the beginning of the line (the '^' operator) and
581: * one for the case when we're not.
582: */
1.7 ! tedu 583: if (i % 2 == 1)
1.1 deraadt 584: nset[numstates] = scset[(i / 2) + 1];
585: else
586: nset[numstates] =
1.7 ! tedu 587: mkbranch (scbol[i / 2], scset[i / 2]);
1.1 deraadt 588:
1.7 ! tedu 589: nset = epsclosure (nset, &numstates, accset, &nacc,
! 590: &hashval);
1.1 deraadt 591:
1.7 ! tedu 592: if (snstods (nset, numstates, accset, nacc, hashval, &ds)) {
1.1 deraadt 593: numas += nacc;
594: totnst += numstates;
595: ++todo_next;
596:
1.7 ! tedu 597: if (variable_trailing_context_rules && nacc > 0)
! 598: check_trailing_context (nset, numstates,
! 599: accset, nacc);
1.1 deraadt 600: }
1.7 ! tedu 601: }
1.1 deraadt 602:
1.7 ! tedu 603: if (!fullspd) {
! 604: if (!snstods (nset, 0, accset, 0, 0, &end_of_buffer_state))
! 605: flexfatal (_
! 606: ("could not create unique end-of-buffer state"));
1.1 deraadt 607:
608: ++numas;
609: ++num_start_states;
610: ++todo_next;
1.7 ! tedu 611: }
! 612:
1.1 deraadt 613:
1.7 ! tedu 614: while (todo_head < todo_next) {
1.1 deraadt 615: targptr = 0;
616: totaltrans = 0;
617:
1.7 ! tedu 618: for (i = 1; i <= numecs; ++i)
1.1 deraadt 619: state[i] = 0;
620:
621: ds = ++todo_head;
622:
623: dset = dss[ds];
624: dsize = dfasiz[ds];
625:
1.7 ! tedu 626: if (trace)
! 627: fprintf (stderr, _("state # %d:\n"), ds);
1.1 deraadt 628:
1.7 ! tedu 629: sympartition (dset, dsize, symlist, duplist);
1.1 deraadt 630:
1.7 ! tedu 631: for (sym = 1; sym <= numecs; ++sym) {
! 632: if (symlist[sym]) {
1.1 deraadt 633: symlist[sym] = 0;
634:
1.7 ! tedu 635: if (duplist[sym] == NIL) {
1.1 deraadt 636: /* Symbol has unique out-transitions. */
1.7 ! tedu 637: numstates =
! 638: symfollowset (dset, dsize,
! 639: sym, nset);
! 640: nset = epsclosure (nset,
! 641: &numstates,
! 642: accset, &nacc,
! 643: &hashval);
! 644:
! 645: if (snstods
! 646: (nset, numstates, accset, nacc,
! 647: hashval, &newds)) {
! 648: totnst = totnst +
! 649: numstates;
1.1 deraadt 650: ++todo_next;
651: numas += nacc;
652:
1.7 ! tedu 653: if (variable_trailing_context_rules && nacc > 0)
! 654: check_trailing_context
! 655: (nset,
! 656: numstates,
! 657: accset,
! 658: nacc);
! 659: }
1.1 deraadt 660:
661: state[sym] = newds;
662:
1.7 ! tedu 663: if (trace)
! 664: fprintf (stderr,
! 665: "\t%d\t%d\n", sym,
! 666: newds);
1.1 deraadt 667:
668: targfreq[++targptr] = 1;
669: targstate[targptr] = newds;
670: ++numuniq;
1.7 ! tedu 671: }
1.1 deraadt 672:
1.7 ! tedu 673: else {
1.1 deraadt 674: /* sym's equivalence class has the same
675: * transitions as duplist(sym)'s
676: * equivalence class.
677: */
678: targ = state[duplist[sym]];
679: state[sym] = targ;
680:
1.7 ! tedu 681: if (trace)
! 682: fprintf (stderr,
! 683: "\t%d\t%d\n", sym,
! 684: targ);
1.1 deraadt 685:
686: /* Update frequency count for
687: * destination state.
688: */
689:
690: i = 0;
1.7 ! tedu 691: while (targstate[++i] != targ) ;
1.1 deraadt 692:
693: ++targfreq[i];
694: ++numdup;
1.7 ! tedu 695: }
1.1 deraadt 696:
697: ++totaltrans;
698: duplist[sym] = NIL;
699: }
1.7 ! tedu 700: }
1.1 deraadt 701:
702:
703: numsnpairs += totaltrans;
704:
1.7 ! tedu 705: if (ds > num_start_states)
! 706: check_for_backing_up (ds, state);
1.1 deraadt 707:
1.7 ! tedu 708: if (nultrans) {
1.1 deraadt 709: nultrans[ds] = state[NUL_ec];
710: state[NUL_ec] = 0; /* remove transition */
1.7 ! tedu 711: }
! 712:
! 713: if (fulltbl) {
! 714:
! 715: /* Each time we hit here, it's another td_hilen, so we realloc. */
! 716: yynxt_tbl->td_hilen++;
! 717: yynxt_tbl->td_data = yynxt_data =
! 718: (flex_int32_t *) realloc (yynxt_data,
! 719: yynxt_tbl->td_hilen *
! 720: yynxt_tbl->td_lolen *
! 721: sizeof (flex_int32_t));
! 722:
1.1 deraadt 723:
1.7 ! tedu 724: if (gentables)
! 725: outn (" {");
1.1 deraadt 726:
727: /* Supply array's 0-element. */
1.7 ! tedu 728: if (ds == end_of_buffer_state) {
! 729: mk2data (-end_of_buffer_state);
! 730: yynxt_data[yynxt_curr++] =
! 731: -end_of_buffer_state;
! 732: }
! 733: else {
! 734: mk2data (end_of_buffer_state);
! 735: yynxt_data[yynxt_curr++] =
! 736: end_of_buffer_state;
! 737: }
1.1 deraadt 738:
1.7 ! tedu 739: for (i = 1; i < num_full_table_rows; ++i) {
1.1 deraadt 740: /* Jams are marked by negative of state
741: * number.
742: */
1.7 ! tedu 743: mk2data (state[i] ? state[i] : -ds);
! 744: yynxt_data[yynxt_curr++] =
! 745: state[i] ? state[i] : -ds;
! 746: }
1.1 deraadt 747:
1.7 ! tedu 748: dataflush ();
! 749: if (gentables)
! 750: outn (" },\n");
! 751: }
1.1 deraadt 752:
1.7 ! tedu 753: else if (fullspd)
! 754: place_state (state, ds, totaltrans);
1.1 deraadt 755:
1.7 ! tedu 756: else if (ds == end_of_buffer_state)
1.1 deraadt 757: /* Special case this state to make sure it does what
758: * it's supposed to, i.e., jam on end-of-buffer.
759: */
1.7 ! tedu 760: stack1 (ds, 0, 0, JAMSTATE);
! 761:
! 762: else { /* normal, compressed state */
1.1 deraadt 763:
764: /* Determine which destination state is the most
765: * common, and how many transitions to it there are.
766: */
767:
768: comfreq = 0;
769: comstate = 0;
770:
1.7 ! tedu 771: for (i = 1; i <= targptr; ++i)
! 772: if (targfreq[i] > comfreq) {
1.1 deraadt 773: comfreq = targfreq[i];
774: comstate = targstate[i];
1.7 ! tedu 775: }
1.1 deraadt 776:
1.7 ! tedu 777: bldtbl (state, ds, totaltrans, comstate, comfreq);
1.1 deraadt 778: }
1.7 ! tedu 779: }
1.1 deraadt 780:
1.7 ! tedu 781: if (fulltbl) {
! 782: dataend ();
! 783: if (tablesext) {
! 784: yytbl_data_compress (yynxt_tbl);
! 785: if (yytbl_data_fwrite (&tableswr, yynxt_tbl) < 0)
! 786: flexerror (_
! 787: ("Could not write yynxt_tbl[][]"));
! 788: }
! 789: if (yynxt_tbl) {
! 790: yytbl_data_destroy (yynxt_tbl);
! 791: yynxt_tbl = 0;
! 792: }
! 793: }
1.1 deraadt 794:
1.7 ! tedu 795: else if (!fullspd) {
! 796: cmptmps (); /* create compressed template entries */
1.1 deraadt 797:
798: /* Create tables for all the states with only one
799: * out-transition.
800: */
1.7 ! tedu 801: while (onesp > 0) {
! 802: mk1tbl (onestate[onesp], onesym[onesp],
! 803: onenext[onesp], onedef[onesp]);
1.1 deraadt 804: --onesp;
805: }
806:
1.7 ! tedu 807: mkdeftbl ();
1.1 deraadt 808: }
809:
1.7 ! tedu 810: flex_free ((void *) accset);
! 811: flex_free ((void *) nset);
! 812: }
! 813:
1.1 deraadt 814:
815: /* snstods - converts a set of ndfa states into a dfa state
816: *
817: * synopsis
818: * is_new_state = snstods( int sns[numstates], int numstates,
819: * int accset[num_rules+1], int nacc,
820: * int hashval, int *newds_addr );
821: *
822: * On return, the dfa state number is in newds.
823: */
824:
1.7 ! tedu 825: int snstods (sns, numstates, accset, nacc, hashval, newds_addr)
! 826: int sns[], numstates, accset[], nacc, hashval, *newds_addr;
! 827: {
! 828: int didsort = 0;
1.5 mpech 829: int i, j;
1.7 ! tedu 830: int newds, *oldsns;
1.1 deraadt 831:
1.7 ! tedu 832: for (i = 1; i <= lastdfa; ++i)
! 833: if (hashval == dhash[i]) {
! 834: if (numstates == dfasiz[i]) {
1.1 deraadt 835: oldsns = dss[i];
836:
1.7 ! tedu 837: if (!didsort) {
1.1 deraadt 838: /* We sort the states in sns so we
839: * can compare it to oldsns quickly.
840: */
1.7 ! tedu 841: qsort (&sns [1], numstates, sizeof (sns [1]), intcmp);
1.1 deraadt 842: didsort = 1;
1.7 ! tedu 843: }
1.1 deraadt 844:
1.7 ! tedu 845: for (j = 1; j <= numstates; ++j)
! 846: if (sns[j] != oldsns[j])
1.1 deraadt 847: break;
848:
1.7 ! tedu 849: if (j > numstates) {
1.1 deraadt 850: ++dfaeql;
851: *newds_addr = i;
852: return 0;
1.7 ! tedu 853: }
1.1 deraadt 854:
855: ++hshcol;
1.7 ! tedu 856: }
1.1 deraadt 857:
858: else
859: ++hshsave;
1.7 ! tedu 860: }
1.1 deraadt 861:
862: /* Make a new dfa. */
863:
1.7 ! tedu 864: if (++lastdfa >= current_max_dfas)
! 865: increase_max_dfas ();
1.1 deraadt 866:
867: newds = lastdfa;
868:
1.7 ! tedu 869: dss[newds] = allocate_integer_array (numstates + 1);
1.1 deraadt 870:
871: /* If we haven't already sorted the states in sns, we do so now,
872: * so that future comparisons with it can be made quickly.
873: */
874:
1.7 ! tedu 875: if (!didsort)
! 876: qsort (&sns [1], numstates, sizeof (sns [1]), intcmp);
1.1 deraadt 877:
1.7 ! tedu 878: for (i = 1; i <= numstates; ++i)
1.1 deraadt 879: dss[newds][i] = sns[i];
880:
881: dfasiz[newds] = numstates;
882: dhash[newds] = hashval;
883:
1.7 ! tedu 884: if (nacc == 0) {
! 885: if (reject)
1.1 deraadt 886: dfaacc[newds].dfaacc_set = (int *) 0;
887: else
888: dfaacc[newds].dfaacc_state = 0;
889:
890: accsiz[newds] = 0;
1.7 ! tedu 891: }
1.1 deraadt 892:
1.7 ! tedu 893: else if (reject) {
1.1 deraadt 894: /* We sort the accepting set in increasing order so the
895: * disambiguating rule that the first rule listed is considered
1.7 ! tedu 896: * match in the event of ties will work.
1.1 deraadt 897: */
898:
1.7 ! tedu 899: qsort (&accset [1], nacc, sizeof (accset [1]), intcmp);
1.1 deraadt 900:
1.7 ! tedu 901: dfaacc[newds].dfaacc_set =
! 902: allocate_integer_array (nacc + 1);
1.1 deraadt 903:
904: /* Save the accepting set for later */
1.7 ! tedu 905: for (i = 1; i <= nacc; ++i) {
1.1 deraadt 906: dfaacc[newds].dfaacc_set[i] = accset[i];
907:
1.7 ! tedu 908: if (accset[i] <= num_rules)
1.1 deraadt 909: /* Who knows, perhaps a REJECT can yield
910: * this rule.
911: */
912: rule_useful[accset[i]] = true;
1.7 ! tedu 913: }
1.1 deraadt 914:
915: accsiz[newds] = nacc;
1.7 ! tedu 916: }
1.1 deraadt 917:
1.7 ! tedu 918: else {
1.1 deraadt 919: /* Find lowest numbered rule so the disambiguating rule
920: * will work.
921: */
922: j = num_rules + 1;
923:
1.7 ! tedu 924: for (i = 1; i <= nacc; ++i)
! 925: if (accset[i] < j)
1.1 deraadt 926: j = accset[i];
927:
928: dfaacc[newds].dfaacc_state = j;
929:
1.7 ! tedu 930: if (j <= num_rules)
1.1 deraadt 931: rule_useful[j] = true;
1.7 ! tedu 932: }
1.1 deraadt 933:
934: *newds_addr = newds;
935:
936: return 1;
1.7 ! tedu 937: }
1.1 deraadt 938:
939:
940: /* symfollowset - follow the symbol transitions one step
941: *
942: * synopsis
943: * numstates = symfollowset( int ds[current_max_dfa_size], int dsize,
944: * int transsym, int nset[current_max_dfa_size] );
945: */
946:
1.7 ! tedu 947: int symfollowset (ds, dsize, transsym, nset)
! 948: int ds[], dsize, transsym, nset[];
! 949: {
! 950: int ns, tsp, sym, i, j, lenccl, ch, numstates, ccllist;
1.1 deraadt 951:
952: numstates = 0;
953:
1.7 ! tedu 954: for (i = 1; i <= dsize; ++i) { /* for each nfa state ns in the state set of ds */
1.1 deraadt 955: ns = ds[i];
956: sym = transchar[ns];
957: tsp = trans1[ns];
958:
1.7 ! tedu 959: if (sym < 0) { /* it's a character class */
1.1 deraadt 960: sym = -sym;
961: ccllist = cclmap[sym];
962: lenccl = ccllen[sym];
963:
1.7 ! tedu 964: if (cclng[sym]) {
! 965: for (j = 0; j < lenccl; ++j) {
1.1 deraadt 966: /* Loop through negated character
967: * class.
968: */
969: ch = ccltbl[ccllist + j];
970:
1.7 ! tedu 971: if (ch == 0)
1.1 deraadt 972: ch = NUL_ec;
973:
1.7 ! tedu 974: if (ch > transsym)
1.1 deraadt 975: /* Transsym isn't in negated
976: * ccl.
977: */
978: break;
979:
1.7 ! tedu 980: else if (ch == transsym)
! 981: /* next 2 */
! 982: goto bottom;
! 983: }
1.1 deraadt 984:
985: /* Didn't find transsym in ccl. */
986: nset[++numstates] = tsp;
1.7 ! tedu 987: }
1.1 deraadt 988:
989: else
1.7 ! tedu 990: for (j = 0; j < lenccl; ++j) {
1.1 deraadt 991: ch = ccltbl[ccllist + j];
992:
1.7 ! tedu 993: if (ch == 0)
1.1 deraadt 994: ch = NUL_ec;
995:
1.7 ! tedu 996: if (ch > transsym)
1.1 deraadt 997: break;
1.7 ! tedu 998: else if (ch == transsym) {
1.1 deraadt 999: nset[++numstates] = tsp;
1000: break;
1001: }
1.7 ! tedu 1002: }
! 1003: }
1.1 deraadt 1004:
1.7 ! tedu 1005: else if (sym == SYM_EPSILON) { /* do nothing */
! 1006: }
1.1 deraadt 1007:
1.7 ! tedu 1008: else if (ABS (ecgroup[sym]) == transsym)
1.1 deraadt 1009: nset[++numstates] = tsp;
1010:
1.7 ! tedu 1011: bottom:;
! 1012: }
1.1 deraadt 1013:
1014: return numstates;
1.7 ! tedu 1015: }
1.1 deraadt 1016:
1017:
1018: /* sympartition - partition characters with same out-transitions
1019: *
1020: * synopsis
1021: * sympartition( int ds[current_max_dfa_size], int numstates,
1022: * int symlist[numecs], int duplist[numecs] );
1023: */
1024:
1.7 ! tedu 1025: void sympartition (ds, numstates, symlist, duplist)
! 1026: int ds[], numstates;
! 1027: int symlist[], duplist[];
! 1028: {
! 1029: int tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich;
1.1 deraadt 1030:
1031: /* Partitioning is done by creating equivalence classes for those
1032: * characters which have out-transitions from the given state. Thus
1033: * we are really creating equivalence classes of equivalence classes.
1034: */
1035:
1.7 ! tedu 1036: for (i = 1; i <= numecs; ++i) { /* initialize equivalence class list */
1.1 deraadt 1037: duplist[i] = i - 1;
1038: dupfwd[i] = i + 1;
1.7 ! tedu 1039: }
1.1 deraadt 1040:
1041: duplist[1] = NIL;
1042: dupfwd[numecs] = NIL;
1043:
1.7 ! tedu 1044: for (i = 1; i <= numstates; ++i) {
1.1 deraadt 1045: ns = ds[i];
1046: tch = transchar[ns];
1047:
1.7 ! tedu 1048: if (tch != SYM_EPSILON) {
! 1049: if (tch < -lastccl || tch >= csize) {
! 1050: flexfatal (_
! 1051: ("bad transition character detected in sympartition()"));
! 1052: }
1.1 deraadt 1053:
1.7 ! tedu 1054: if (tch >= 0) { /* character transition */
! 1055: int ec = ecgroup[tch];
1.1 deraadt 1056:
1.7 ! tedu 1057: mkechar (ec, dupfwd, duplist);
1.1 deraadt 1058: symlist[ec] = 1;
1.7 ! tedu 1059: }
1.1 deraadt 1060:
1.7 ! tedu 1061: else { /* character class */
1.1 deraadt 1062: tch = -tch;
1063:
1064: lenccl = ccllen[tch];
1065: cclp = cclmap[tch];
1.7 ! tedu 1066: mkeccl (ccltbl + cclp, lenccl, dupfwd,
! 1067: duplist, numecs, NUL_ec);
1.1 deraadt 1068:
1.7 ! tedu 1069: if (cclng[tch]) {
1.1 deraadt 1070: j = 0;
1071:
1.7 ! tedu 1072: for (k = 0; k < lenccl; ++k) {
1.1 deraadt 1073: ich = ccltbl[cclp + k];
1074:
1.7 ! tedu 1075: if (ich == 0)
1.1 deraadt 1076: ich = NUL_ec;
1077:
1.7 ! tedu 1078: for (++j; j < ich; ++j)
1.1 deraadt 1079: symlist[j] = 1;
1.7 ! tedu 1080: }
1.1 deraadt 1081:
1.7 ! tedu 1082: for (++j; j <= numecs; ++j)
1.1 deraadt 1083: symlist[j] = 1;
1.7 ! tedu 1084: }
1.1 deraadt 1085:
1086: else
1.7 ! tedu 1087: for (k = 0; k < lenccl; ++k) {
1.1 deraadt 1088: ich = ccltbl[cclp + k];
1089:
1.7 ! tedu 1090: if (ich == 0)
1.1 deraadt 1091: ich = NUL_ec;
1092:
1093: symlist[ich] = 1;
1.7 ! tedu 1094: }
1.1 deraadt 1095: }
1096: }
1097: }
1.7 ! tedu 1098: }