Annotation of src/usr.bin/lex/dfa.c, Revision 1.1
1.1 ! deraadt 1: /* dfa - DFA construction routines */
! 2:
! 3: /*-
! 4: * Copyright (c) 1990 The Regents of the University of California.
! 5: * All rights reserved.
! 6: *
! 7: * This code is derived from software contributed to Berkeley by
! 8: * Vern Paxson.
! 9: *
! 10: * The United States Government has rights in this work pursuant
! 11: * to contract no. DE-AC03-76SF00098 between the United States
! 12: * Department of Energy and the University of California.
! 13: *
! 14: * Redistribution and use in source and binary forms are permitted provided
! 15: * that: (1) source distributions retain this entire copyright notice and
! 16: * comment, and (2) distributions including binaries display the following
! 17: * acknowledgement: ``This product includes software developed by the
! 18: * University of California, Berkeley and its contributors'' in the
! 19: * documentation or other materials provided with the distribution and in
! 20: * all advertising materials mentioning features or use of this software.
! 21: * Neither the name of the University nor the names of its contributors may
! 22: * be used to endorse or promote products derived from this software without
! 23: * specific prior written permission.
! 24: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
! 25: * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
! 26: * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
! 27: */
! 28:
! 29: /* $Header: /a/cvsroot/src/usr.bin/lex/dfa.c,v 1.9 1995/05/05 05:35:14 jtc Exp $ */
! 30:
! 31: #include "flexdef.h"
! 32:
! 33:
! 34: /* declare functions that have forward references */
! 35:
! 36: void dump_associated_rules PROTO((FILE*, int));
! 37: void dump_transitions PROTO((FILE*, int[]));
! 38: void sympartition PROTO((int[], int, int[], int[]));
! 39: int symfollowset PROTO((int[], int, int, int[]));
! 40:
! 41:
! 42: /* check_for_backing_up - check a DFA state for backing up
! 43: *
! 44: * synopsis
! 45: * void check_for_backing_up( int ds, int state[numecs] );
! 46: *
! 47: * ds is the number of the state to check and state[] is its out-transitions,
! 48: * indexed by equivalence class.
! 49: */
! 50:
! 51: void check_for_backing_up( ds, state )
! 52: int ds;
! 53: int state[];
! 54: {
! 55: if ( (reject && ! dfaacc[ds].dfaacc_set) ||
! 56: (! reject && ! dfaacc[ds].dfaacc_state) )
! 57: { /* state is non-accepting */
! 58: ++num_backing_up;
! 59:
! 60: if ( backing_up_report )
! 61: {
! 62: fprintf( backing_up_file,
! 63: _( "State #%d is non-accepting -\n" ), ds );
! 64:
! 65: /* identify the state */
! 66: dump_associated_rules( backing_up_file, ds );
! 67:
! 68: /* Now identify it further using the out- and
! 69: * jam-transitions.
! 70: */
! 71: dump_transitions( backing_up_file, state );
! 72:
! 73: putc( '\n', backing_up_file );
! 74: }
! 75: }
! 76: }
! 77:
! 78:
! 79: /* check_trailing_context - check to see if NFA state set constitutes
! 80: * "dangerous" trailing context
! 81: *
! 82: * synopsis
! 83: * void check_trailing_context( int nfa_states[num_states+1], int num_states,
! 84: * int accset[nacc+1], int nacc );
! 85: *
! 86: * NOTES
! 87: * Trailing context is "dangerous" if both the head and the trailing
! 88: * part are of variable size \and/ there's a DFA state which contains
! 89: * both an accepting state for the head part of the rule and NFA states
! 90: * which occur after the beginning of the trailing context.
! 91: *
! 92: * When such a rule is matched, it's impossible to tell if having been
! 93: * in the DFA state indicates the beginning of the trailing context or
! 94: * further-along scanning of the pattern. In these cases, a warning
! 95: * message is issued.
! 96: *
! 97: * nfa_states[1 .. num_states] is the list of NFA states in the DFA.
! 98: * accset[1 .. nacc] is the list of accepting numbers for the DFA state.
! 99: */
! 100:
! 101: void check_trailing_context( nfa_states, num_states, accset, nacc )
! 102: int *nfa_states, num_states;
! 103: int *accset;
! 104: int nacc;
! 105: {
! 106: register int i, j;
! 107:
! 108: for ( i = 1; i <= num_states; ++i )
! 109: {
! 110: int ns = nfa_states[i];
! 111: register int type = state_type[ns];
! 112: register int ar = assoc_rule[ns];
! 113:
! 114: if ( type == STATE_NORMAL || rule_type[ar] != RULE_VARIABLE )
! 115: { /* do nothing */
! 116: }
! 117:
! 118: else if ( type == STATE_TRAILING_CONTEXT )
! 119: {
! 120: /* Potential trouble. Scan set of accepting numbers
! 121: * for the one marking the end of the "head". We
! 122: * assume that this looping will be fairly cheap
! 123: * since it's rare that an accepting number set
! 124: * is large.
! 125: */
! 126: for ( j = 1; j <= nacc; ++j )
! 127: if ( accset[j] & YY_TRAILING_HEAD_MASK )
! 128: {
! 129: line_warning(
! 130: _( "dangerous trailing context" ),
! 131: rule_linenum[ar] );
! 132: return;
! 133: }
! 134: }
! 135: }
! 136: }
! 137:
! 138:
! 139: /* dump_associated_rules - list the rules associated with a DFA state
! 140: *
! 141: * Goes through the set of NFA states associated with the DFA and
! 142: * extracts the first MAX_ASSOC_RULES unique rules, sorts them,
! 143: * and writes a report to the given file.
! 144: */
! 145:
! 146: void dump_associated_rules( file, ds )
! 147: FILE *file;
! 148: int ds;
! 149: {
! 150: register int i, j;
! 151: register int num_associated_rules = 0;
! 152: int rule_set[MAX_ASSOC_RULES + 1];
! 153: int *dset = dss[ds];
! 154: int size = dfasiz[ds];
! 155:
! 156: for ( i = 1; i <= size; ++i )
! 157: {
! 158: register int rule_num = rule_linenum[assoc_rule[dset[i]]];
! 159:
! 160: for ( j = 1; j <= num_associated_rules; ++j )
! 161: if ( rule_num == rule_set[j] )
! 162: break;
! 163:
! 164: if ( j > num_associated_rules )
! 165: { /* new rule */
! 166: if ( num_associated_rules < MAX_ASSOC_RULES )
! 167: rule_set[++num_associated_rules] = rule_num;
! 168: }
! 169: }
! 170:
! 171: bubble( rule_set, num_associated_rules );
! 172:
! 173: fprintf( file, _( " associated rule line numbers:" ) );
! 174:
! 175: for ( i = 1; i <= num_associated_rules; ++i )
! 176: {
! 177: if ( i % 8 == 1 )
! 178: putc( '\n', file );
! 179:
! 180: fprintf( file, "\t%d", rule_set[i] );
! 181: }
! 182:
! 183: putc( '\n', file );
! 184: }
! 185:
! 186:
! 187: /* dump_transitions - list the transitions associated with a DFA state
! 188: *
! 189: * synopsis
! 190: * dump_transitions( FILE *file, int state[numecs] );
! 191: *
! 192: * Goes through the set of out-transitions and lists them in human-readable
! 193: * form (i.e., not as equivalence classes); also lists jam transitions
! 194: * (i.e., all those which are not out-transitions, plus EOF). The dump
! 195: * is done to the given file.
! 196: */
! 197:
! 198: void dump_transitions( file, state )
! 199: FILE *file;
! 200: int state[];
! 201: {
! 202: register int i, ec;
! 203: int out_char_set[CSIZE];
! 204:
! 205: for ( i = 0; i < csize; ++i )
! 206: {
! 207: ec = ABS( ecgroup[i] );
! 208: out_char_set[i] = state[ec];
! 209: }
! 210:
! 211: fprintf( file, _( " out-transitions: " ) );
! 212:
! 213: list_character_set( file, out_char_set );
! 214:
! 215: /* now invert the members of the set to get the jam transitions */
! 216: for ( i = 0; i < csize; ++i )
! 217: out_char_set[i] = ! out_char_set[i];
! 218:
! 219: fprintf( file, _( "\n jam-transitions: EOF " ) );
! 220:
! 221: list_character_set( file, out_char_set );
! 222:
! 223: putc( '\n', file );
! 224: }
! 225:
! 226:
! 227: /* epsclosure - construct the epsilon closure of a set of ndfa states
! 228: *
! 229: * synopsis
! 230: * int *epsclosure( int t[num_states], int *numstates_addr,
! 231: * int accset[num_rules+1], int *nacc_addr,
! 232: * int *hashval_addr );
! 233: *
! 234: * NOTES
! 235: * The epsilon closure is the set of all states reachable by an arbitrary
! 236: * number of epsilon transitions, which themselves do not have epsilon
! 237: * transitions going out, unioned with the set of states which have non-null
! 238: * accepting numbers. t is an array of size numstates of nfa state numbers.
! 239: * Upon return, t holds the epsilon closure and *numstates_addr is updated.
! 240: * accset holds a list of the accepting numbers, and the size of accset is
! 241: * given by *nacc_addr. t may be subjected to reallocation if it is not
! 242: * large enough to hold the epsilon closure.
! 243: *
! 244: * hashval is the hash value for the dfa corresponding to the state set.
! 245: */
! 246:
! 247: int *epsclosure( t, ns_addr, accset, nacc_addr, hv_addr )
! 248: int *t, *ns_addr, accset[], *nacc_addr, *hv_addr;
! 249: {
! 250: register int stkpos, ns, tsp;
! 251: int numstates = *ns_addr, nacc, hashval, transsym, nfaccnum;
! 252: int stkend, nstate;
! 253: static int did_stk_init = false, *stk;
! 254:
! 255: #define MARK_STATE(state) \
! 256: trans1[state] = trans1[state] - MARKER_DIFFERENCE;
! 257:
! 258: #define IS_MARKED(state) (trans1[state] < 0)
! 259:
! 260: #define UNMARK_STATE(state) \
! 261: trans1[state] = trans1[state] + MARKER_DIFFERENCE;
! 262:
! 263: #define CHECK_ACCEPT(state) \
! 264: { \
! 265: nfaccnum = accptnum[state]; \
! 266: if ( nfaccnum != NIL ) \
! 267: accset[++nacc] = nfaccnum; \
! 268: }
! 269:
! 270: #define DO_REALLOCATION \
! 271: { \
! 272: current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \
! 273: ++num_reallocs; \
! 274: t = reallocate_integer_array( t, current_max_dfa_size ); \
! 275: stk = reallocate_integer_array( stk, current_max_dfa_size ); \
! 276: } \
! 277:
! 278: #define PUT_ON_STACK(state) \
! 279: { \
! 280: if ( ++stkend >= current_max_dfa_size ) \
! 281: DO_REALLOCATION \
! 282: stk[stkend] = state; \
! 283: MARK_STATE(state) \
! 284: }
! 285:
! 286: #define ADD_STATE(state) \
! 287: { \
! 288: if ( ++numstates >= current_max_dfa_size ) \
! 289: DO_REALLOCATION \
! 290: t[numstates] = state; \
! 291: hashval += state; \
! 292: }
! 293:
! 294: #define STACK_STATE(state) \
! 295: { \
! 296: PUT_ON_STACK(state) \
! 297: CHECK_ACCEPT(state) \
! 298: if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \
! 299: ADD_STATE(state) \
! 300: }
! 301:
! 302:
! 303: if ( ! did_stk_init )
! 304: {
! 305: stk = allocate_integer_array( current_max_dfa_size );
! 306: did_stk_init = true;
! 307: }
! 308:
! 309: nacc = stkend = hashval = 0;
! 310:
! 311: for ( nstate = 1; nstate <= numstates; ++nstate )
! 312: {
! 313: ns = t[nstate];
! 314:
! 315: /* The state could be marked if we've already pushed it onto
! 316: * the stack.
! 317: */
! 318: if ( ! IS_MARKED(ns) )
! 319: {
! 320: PUT_ON_STACK(ns)
! 321: CHECK_ACCEPT(ns)
! 322: hashval += ns;
! 323: }
! 324: }
! 325:
! 326: for ( stkpos = 1; stkpos <= stkend; ++stkpos )
! 327: {
! 328: ns = stk[stkpos];
! 329: transsym = transchar[ns];
! 330:
! 331: if ( transsym == SYM_EPSILON )
! 332: {
! 333: tsp = trans1[ns] + MARKER_DIFFERENCE;
! 334:
! 335: if ( tsp != NO_TRANSITION )
! 336: {
! 337: if ( ! IS_MARKED(tsp) )
! 338: STACK_STATE(tsp)
! 339:
! 340: tsp = trans2[ns];
! 341:
! 342: if ( tsp != NO_TRANSITION && ! IS_MARKED(tsp) )
! 343: STACK_STATE(tsp)
! 344: }
! 345: }
! 346: }
! 347:
! 348: /* Clear out "visit" markers. */
! 349:
! 350: for ( stkpos = 1; stkpos <= stkend; ++stkpos )
! 351: {
! 352: if ( IS_MARKED(stk[stkpos]) )
! 353: UNMARK_STATE(stk[stkpos])
! 354: else
! 355: flexfatal(
! 356: _( "consistency check failed in epsclosure()" ) );
! 357: }
! 358:
! 359: *ns_addr = numstates;
! 360: *hv_addr = hashval;
! 361: *nacc_addr = nacc;
! 362:
! 363: return t;
! 364: }
! 365:
! 366:
! 367: /* increase_max_dfas - increase the maximum number of DFAs */
! 368:
! 369: void increase_max_dfas()
! 370: {
! 371: current_max_dfas += MAX_DFAS_INCREMENT;
! 372:
! 373: ++num_reallocs;
! 374:
! 375: base = reallocate_integer_array( base, current_max_dfas );
! 376: def = reallocate_integer_array( def, current_max_dfas );
! 377: dfasiz = reallocate_integer_array( dfasiz, current_max_dfas );
! 378: accsiz = reallocate_integer_array( accsiz, current_max_dfas );
! 379: dhash = reallocate_integer_array( dhash, current_max_dfas );
! 380: dss = reallocate_int_ptr_array( dss, current_max_dfas );
! 381: dfaacc = reallocate_dfaacc_union( dfaacc, current_max_dfas );
! 382:
! 383: if ( nultrans )
! 384: nultrans =
! 385: reallocate_integer_array( nultrans, current_max_dfas );
! 386: }
! 387:
! 388:
! 389: /* ntod - convert an ndfa to a dfa
! 390: *
! 391: * Creates the dfa corresponding to the ndfa we've constructed. The
! 392: * dfa starts out in state #1.
! 393: */
! 394:
! 395: void ntod()
! 396: {
! 397: int *accset, ds, nacc, newds;
! 398: int sym, hashval, numstates, dsize;
! 399: int num_full_table_rows; /* used only for -f */
! 400: int *nset, *dset;
! 401: int targptr, totaltrans, i, comstate, comfreq, targ;
! 402: int symlist[CSIZE + 1];
! 403: int num_start_states;
! 404: int todo_head, todo_next;
! 405:
! 406: /* Note that the following are indexed by *equivalence classes*
! 407: * and not by characters. Since equivalence classes are indexed
! 408: * beginning with 1, even if the scanner accepts NUL's, this
! 409: * means that (since every character is potentially in its own
! 410: * equivalence class) these arrays must have room for indices
! 411: * from 1 to CSIZE, so their size must be CSIZE + 1.
! 412: */
! 413: int duplist[CSIZE + 1], state[CSIZE + 1];
! 414: int targfreq[CSIZE + 1], targstate[CSIZE + 1];
! 415:
! 416: accset = allocate_integer_array( num_rules + 1 );
! 417: nset = allocate_integer_array( current_max_dfa_size );
! 418:
! 419: /* The "todo" queue is represented by the head, which is the DFA
! 420: * state currently being processed, and the "next", which is the
! 421: * next DFA state number available (not in use). We depend on the
! 422: * fact that snstods() returns DFA's \in increasing order/, and thus
! 423: * need only know the bounds of the dfas to be processed.
! 424: */
! 425: todo_head = todo_next = 0;
! 426:
! 427: for ( i = 0; i <= csize; ++i )
! 428: {
! 429: duplist[i] = NIL;
! 430: symlist[i] = false;
! 431: }
! 432:
! 433: for ( i = 0; i <= num_rules; ++i )
! 434: accset[i] = NIL;
! 435:
! 436: if ( trace )
! 437: {
! 438: dumpnfa( scset[1] );
! 439: fputs( _( "\n\nDFA Dump:\n\n" ), stderr );
! 440: }
! 441:
! 442: inittbl();
! 443:
! 444: /* Check to see whether we should build a separate table for
! 445: * transitions on NUL characters. We don't do this for full-speed
! 446: * (-F) scanners, since for them we don't have a simple state
! 447: * number lying around with which to index the table. We also
! 448: * don't bother doing it for scanners unless (1) NUL is in its own
! 449: * equivalence class (indicated by a positive value of
! 450: * ecgroup[NUL]), (2) NUL's equivalence class is the last
! 451: * equivalence class, and (3) the number of equivalence classes is
! 452: * the same as the number of characters. This latter case comes
! 453: * about when useecs is false or when it's true but every character
! 454: * still manages to land in its own class (unlikely, but it's
! 455: * cheap to check for). If all these things are true then the
! 456: * character code needed to represent NUL's equivalence class for
! 457: * indexing the tables is going to take one more bit than the
! 458: * number of characters, and therefore we won't be assured of
! 459: * being able to fit it into a YY_CHAR variable. This rules out
! 460: * storing the transitions in a compressed table, since the code
! 461: * for interpreting them uses a YY_CHAR variable (perhaps it
! 462: * should just use an integer, though; this is worth pondering ...
! 463: * ###).
! 464: *
! 465: * Finally, for full tables, we want the number of entries in the
! 466: * table to be a power of two so the array references go fast (it
! 467: * will just take a shift to compute the major index). If
! 468: * encoding NUL's transitions in the table will spoil this, we
! 469: * give it its own table (note that this will be the case if we're
! 470: * not using equivalence classes).
! 471: */
! 472:
! 473: /* Note that the test for ecgroup[0] == numecs below accomplishes
! 474: * both (1) and (2) above
! 475: */
! 476: if ( ! fullspd && ecgroup[0] == numecs )
! 477: {
! 478: /* NUL is alone in its equivalence class, which is the
! 479: * last one.
! 480: */
! 481: int use_NUL_table = (numecs == csize);
! 482:
! 483: if ( fulltbl && ! use_NUL_table )
! 484: {
! 485: /* We still may want to use the table if numecs
! 486: * is a power of 2.
! 487: */
! 488: int power_of_two;
! 489:
! 490: for ( power_of_two = 1; power_of_two <= csize;
! 491: power_of_two *= 2 )
! 492: if ( numecs == power_of_two )
! 493: {
! 494: use_NUL_table = true;
! 495: break;
! 496: }
! 497: }
! 498:
! 499: if ( use_NUL_table )
! 500: nultrans = allocate_integer_array( current_max_dfas );
! 501:
! 502: /* From now on, nultrans != nil indicates that we're
! 503: * saving null transitions for later, separate encoding.
! 504: */
! 505: }
! 506:
! 507:
! 508: if ( fullspd )
! 509: {
! 510: for ( i = 0; i <= numecs; ++i )
! 511: state[i] = 0;
! 512:
! 513: place_state( state, 0, 0 );
! 514: dfaacc[0].dfaacc_state = 0;
! 515: }
! 516:
! 517: else if ( fulltbl )
! 518: {
! 519: if ( nultrans )
! 520: /* We won't be including NUL's transitions in the
! 521: * table, so build it for entries from 0 .. numecs - 1.
! 522: */
! 523: num_full_table_rows = numecs;
! 524:
! 525: else
! 526: /* Take into account the fact that we'll be including
! 527: * the NUL entries in the transition table. Build it
! 528: * from 0 .. numecs.
! 529: */
! 530: num_full_table_rows = numecs + 1;
! 531:
! 532: /* Unless -Ca, declare it "short" because it's a real
! 533: * long-shot that that won't be large enough.
! 534: */
! 535: out_str_dec( "static yyconst %s yy_nxt[][%d] =\n {\n",
! 536: /* '}' so vi doesn't get too confused */
! 537: long_align ? "long" : "short", num_full_table_rows );
! 538:
! 539: outn( " {" );
! 540:
! 541: /* Generate 0 entries for state #0. */
! 542: for ( i = 0; i < num_full_table_rows; ++i )
! 543: mk2data( 0 );
! 544:
! 545: dataflush();
! 546: outn( " },\n" );
! 547: }
! 548:
! 549: /* Create the first states. */
! 550:
! 551: num_start_states = lastsc * 2;
! 552:
! 553: for ( i = 1; i <= num_start_states; ++i )
! 554: {
! 555: numstates = 1;
! 556:
! 557: /* For each start condition, make one state for the case when
! 558: * we're at the beginning of the line (the '^' operator) and
! 559: * one for the case when we're not.
! 560: */
! 561: if ( i % 2 == 1 )
! 562: nset[numstates] = scset[(i / 2) + 1];
! 563: else
! 564: nset[numstates] =
! 565: mkbranch( scbol[i / 2], scset[i / 2] );
! 566:
! 567: nset = epsclosure( nset, &numstates, accset, &nacc, &hashval );
! 568:
! 569: if ( snstods( nset, numstates, accset, nacc, hashval, &ds ) )
! 570: {
! 571: numas += nacc;
! 572: totnst += numstates;
! 573: ++todo_next;
! 574:
! 575: if ( variable_trailing_context_rules && nacc > 0 )
! 576: check_trailing_context( nset, numstates,
! 577: accset, nacc );
! 578: }
! 579: }
! 580:
! 581: if ( ! fullspd )
! 582: {
! 583: if ( ! snstods( nset, 0, accset, 0, 0, &end_of_buffer_state ) )
! 584: flexfatal(
! 585: _( "could not create unique end-of-buffer state" ) );
! 586:
! 587: ++numas;
! 588: ++num_start_states;
! 589: ++todo_next;
! 590: }
! 591:
! 592: while ( todo_head < todo_next )
! 593: {
! 594: targptr = 0;
! 595: totaltrans = 0;
! 596:
! 597: for ( i = 1; i <= numecs; ++i )
! 598: state[i] = 0;
! 599:
! 600: ds = ++todo_head;
! 601:
! 602: dset = dss[ds];
! 603: dsize = dfasiz[ds];
! 604:
! 605: if ( trace )
! 606: fprintf( stderr, _( "state # %d:\n" ), ds );
! 607:
! 608: sympartition( dset, dsize, symlist, duplist );
! 609:
! 610: for ( sym = 1; sym <= numecs; ++sym )
! 611: {
! 612: if ( symlist[sym] )
! 613: {
! 614: symlist[sym] = 0;
! 615:
! 616: if ( duplist[sym] == NIL )
! 617: {
! 618: /* Symbol has unique out-transitions. */
! 619: numstates = symfollowset( dset, dsize,
! 620: sym, nset );
! 621: nset = epsclosure( nset, &numstates,
! 622: accset, &nacc, &hashval );
! 623:
! 624: if ( snstods( nset, numstates, accset,
! 625: nacc, hashval, &newds ) )
! 626: {
! 627: totnst = totnst + numstates;
! 628: ++todo_next;
! 629: numas += nacc;
! 630:
! 631: if (
! 632: variable_trailing_context_rules &&
! 633: nacc > 0 )
! 634: check_trailing_context(
! 635: nset, numstates,
! 636: accset, nacc );
! 637: }
! 638:
! 639: state[sym] = newds;
! 640:
! 641: if ( trace )
! 642: fprintf( stderr, "\t%d\t%d\n",
! 643: sym, newds );
! 644:
! 645: targfreq[++targptr] = 1;
! 646: targstate[targptr] = newds;
! 647: ++numuniq;
! 648: }
! 649:
! 650: else
! 651: {
! 652: /* sym's equivalence class has the same
! 653: * transitions as duplist(sym)'s
! 654: * equivalence class.
! 655: */
! 656: targ = state[duplist[sym]];
! 657: state[sym] = targ;
! 658:
! 659: if ( trace )
! 660: fprintf( stderr, "\t%d\t%d\n",
! 661: sym, targ );
! 662:
! 663: /* Update frequency count for
! 664: * destination state.
! 665: */
! 666:
! 667: i = 0;
! 668: while ( targstate[++i] != targ )
! 669: ;
! 670:
! 671: ++targfreq[i];
! 672: ++numdup;
! 673: }
! 674:
! 675: ++totaltrans;
! 676: duplist[sym] = NIL;
! 677: }
! 678: }
! 679:
! 680: if ( caseins && ! useecs )
! 681: {
! 682: register int j;
! 683:
! 684: for ( i = 'A', j = 'a'; i <= 'Z'; ++i, ++j )
! 685: {
! 686: if ( state[i] == 0 && state[j] != 0 )
! 687: /* We're adding a transition. */
! 688: ++totaltrans;
! 689:
! 690: else if ( state[i] != 0 && state[j] == 0 )
! 691: /* We're taking away a transition. */
! 692: --totaltrans;
! 693:
! 694: state[i] = state[j];
! 695: }
! 696: }
! 697:
! 698: numsnpairs += totaltrans;
! 699:
! 700: if ( ds > num_start_states )
! 701: check_for_backing_up( ds, state );
! 702:
! 703: if ( nultrans )
! 704: {
! 705: nultrans[ds] = state[NUL_ec];
! 706: state[NUL_ec] = 0; /* remove transition */
! 707: }
! 708:
! 709: if ( fulltbl )
! 710: {
! 711: outn( " {" );
! 712:
! 713: /* Supply array's 0-element. */
! 714: if ( ds == end_of_buffer_state )
! 715: mk2data( -end_of_buffer_state );
! 716: else
! 717: mk2data( end_of_buffer_state );
! 718:
! 719: for ( i = 1; i < num_full_table_rows; ++i )
! 720: /* Jams are marked by negative of state
! 721: * number.
! 722: */
! 723: mk2data( state[i] ? state[i] : -ds );
! 724:
! 725: dataflush();
! 726: outn( " },\n" );
! 727: }
! 728:
! 729: else if ( fullspd )
! 730: place_state( state, ds, totaltrans );
! 731:
! 732: else if ( ds == end_of_buffer_state )
! 733: /* Special case this state to make sure it does what
! 734: * it's supposed to, i.e., jam on end-of-buffer.
! 735: */
! 736: stack1( ds, 0, 0, JAMSTATE );
! 737:
! 738: else /* normal, compressed state */
! 739: {
! 740: /* Determine which destination state is the most
! 741: * common, and how many transitions to it there are.
! 742: */
! 743:
! 744: comfreq = 0;
! 745: comstate = 0;
! 746:
! 747: for ( i = 1; i <= targptr; ++i )
! 748: if ( targfreq[i] > comfreq )
! 749: {
! 750: comfreq = targfreq[i];
! 751: comstate = targstate[i];
! 752: }
! 753:
! 754: bldtbl( state, ds, totaltrans, comstate, comfreq );
! 755: }
! 756: }
! 757:
! 758: if ( fulltbl )
! 759: dataend();
! 760:
! 761: else if ( ! fullspd )
! 762: {
! 763: cmptmps(); /* create compressed template entries */
! 764:
! 765: /* Create tables for all the states with only one
! 766: * out-transition.
! 767: */
! 768: while ( onesp > 0 )
! 769: {
! 770: mk1tbl( onestate[onesp], onesym[onesp], onenext[onesp],
! 771: onedef[onesp] );
! 772: --onesp;
! 773: }
! 774:
! 775: mkdeftbl();
! 776: }
! 777:
! 778: flex_free( (void *) accset );
! 779: flex_free( (void *) nset );
! 780: }
! 781:
! 782:
! 783: /* snstods - converts a set of ndfa states into a dfa state
! 784: *
! 785: * synopsis
! 786: * is_new_state = snstods( int sns[numstates], int numstates,
! 787: * int accset[num_rules+1], int nacc,
! 788: * int hashval, int *newds_addr );
! 789: *
! 790: * On return, the dfa state number is in newds.
! 791: */
! 792:
! 793: int snstods( sns, numstates, accset, nacc, hashval, newds_addr )
! 794: int sns[], numstates, accset[], nacc, hashval, *newds_addr;
! 795: {
! 796: int didsort = 0;
! 797: register int i, j;
! 798: int newds, *oldsns;
! 799:
! 800: for ( i = 1; i <= lastdfa; ++i )
! 801: if ( hashval == dhash[i] )
! 802: {
! 803: if ( numstates == dfasiz[i] )
! 804: {
! 805: oldsns = dss[i];
! 806:
! 807: if ( ! didsort )
! 808: {
! 809: /* We sort the states in sns so we
! 810: * can compare it to oldsns quickly.
! 811: * We use bubble because there probably
! 812: * aren't very many states.
! 813: */
! 814: bubble( sns, numstates );
! 815: didsort = 1;
! 816: }
! 817:
! 818: for ( j = 1; j <= numstates; ++j )
! 819: if ( sns[j] != oldsns[j] )
! 820: break;
! 821:
! 822: if ( j > numstates )
! 823: {
! 824: ++dfaeql;
! 825: *newds_addr = i;
! 826: return 0;
! 827: }
! 828:
! 829: ++hshcol;
! 830: }
! 831:
! 832: else
! 833: ++hshsave;
! 834: }
! 835:
! 836: /* Make a new dfa. */
! 837:
! 838: if ( ++lastdfa >= current_max_dfas )
! 839: increase_max_dfas();
! 840:
! 841: newds = lastdfa;
! 842:
! 843: dss[newds] = allocate_integer_array( numstates + 1 );
! 844:
! 845: /* If we haven't already sorted the states in sns, we do so now,
! 846: * so that future comparisons with it can be made quickly.
! 847: */
! 848:
! 849: if ( ! didsort )
! 850: bubble( sns, numstates );
! 851:
! 852: for ( i = 1; i <= numstates; ++i )
! 853: dss[newds][i] = sns[i];
! 854:
! 855: dfasiz[newds] = numstates;
! 856: dhash[newds] = hashval;
! 857:
! 858: if ( nacc == 0 )
! 859: {
! 860: if ( reject )
! 861: dfaacc[newds].dfaacc_set = (int *) 0;
! 862: else
! 863: dfaacc[newds].dfaacc_state = 0;
! 864:
! 865: accsiz[newds] = 0;
! 866: }
! 867:
! 868: else if ( reject )
! 869: {
! 870: /* We sort the accepting set in increasing order so the
! 871: * disambiguating rule that the first rule listed is considered
! 872: * match in the event of ties will work. We use a bubble
! 873: * sort since the list is probably quite small.
! 874: */
! 875:
! 876: bubble( accset, nacc );
! 877:
! 878: dfaacc[newds].dfaacc_set = allocate_integer_array( nacc + 1 );
! 879:
! 880: /* Save the accepting set for later */
! 881: for ( i = 1; i <= nacc; ++i )
! 882: {
! 883: dfaacc[newds].dfaacc_set[i] = accset[i];
! 884:
! 885: if ( accset[i] <= num_rules )
! 886: /* Who knows, perhaps a REJECT can yield
! 887: * this rule.
! 888: */
! 889: rule_useful[accset[i]] = true;
! 890: }
! 891:
! 892: accsiz[newds] = nacc;
! 893: }
! 894:
! 895: else
! 896: {
! 897: /* Find lowest numbered rule so the disambiguating rule
! 898: * will work.
! 899: */
! 900: j = num_rules + 1;
! 901:
! 902: for ( i = 1; i <= nacc; ++i )
! 903: if ( accset[i] < j )
! 904: j = accset[i];
! 905:
! 906: dfaacc[newds].dfaacc_state = j;
! 907:
! 908: if ( j <= num_rules )
! 909: rule_useful[j] = true;
! 910: }
! 911:
! 912: *newds_addr = newds;
! 913:
! 914: return 1;
! 915: }
! 916:
! 917:
! 918: /* symfollowset - follow the symbol transitions one step
! 919: *
! 920: * synopsis
! 921: * numstates = symfollowset( int ds[current_max_dfa_size], int dsize,
! 922: * int transsym, int nset[current_max_dfa_size] );
! 923: */
! 924:
! 925: int symfollowset( ds, dsize, transsym, nset )
! 926: int ds[], dsize, transsym, nset[];
! 927: {
! 928: int ns, tsp, sym, i, j, lenccl, ch, numstates, ccllist;
! 929:
! 930: numstates = 0;
! 931:
! 932: for ( i = 1; i <= dsize; ++i )
! 933: { /* for each nfa state ns in the state set of ds */
! 934: ns = ds[i];
! 935: sym = transchar[ns];
! 936: tsp = trans1[ns];
! 937:
! 938: if ( sym < 0 )
! 939: { /* it's a character class */
! 940: sym = -sym;
! 941: ccllist = cclmap[sym];
! 942: lenccl = ccllen[sym];
! 943:
! 944: if ( cclng[sym] )
! 945: {
! 946: for ( j = 0; j < lenccl; ++j )
! 947: {
! 948: /* Loop through negated character
! 949: * class.
! 950: */
! 951: ch = ccltbl[ccllist + j];
! 952:
! 953: if ( ch == 0 )
! 954: ch = NUL_ec;
! 955:
! 956: if ( ch > transsym )
! 957: /* Transsym isn't in negated
! 958: * ccl.
! 959: */
! 960: break;
! 961:
! 962: else if ( ch == transsym )
! 963: /* next 2 */ goto bottom;
! 964: }
! 965:
! 966: /* Didn't find transsym in ccl. */
! 967: nset[++numstates] = tsp;
! 968: }
! 969:
! 970: else
! 971: for ( j = 0; j < lenccl; ++j )
! 972: {
! 973: ch = ccltbl[ccllist + j];
! 974:
! 975: if ( ch == 0 )
! 976: ch = NUL_ec;
! 977:
! 978: if ( ch > transsym )
! 979: break;
! 980: else if ( ch == transsym )
! 981: {
! 982: nset[++numstates] = tsp;
! 983: break;
! 984: }
! 985: }
! 986: }
! 987:
! 988: else if ( sym >= 'A' && sym <= 'Z' && caseins )
! 989: flexfatal(
! 990: _( "consistency check failed in symfollowset" ) );
! 991:
! 992: else if ( sym == SYM_EPSILON )
! 993: { /* do nothing */
! 994: }
! 995:
! 996: else if ( ABS( ecgroup[sym] ) == transsym )
! 997: nset[++numstates] = tsp;
! 998:
! 999: bottom: ;
! 1000: }
! 1001:
! 1002: return numstates;
! 1003: }
! 1004:
! 1005:
! 1006: /* sympartition - partition characters with same out-transitions
! 1007: *
! 1008: * synopsis
! 1009: * sympartition( int ds[current_max_dfa_size], int numstates,
! 1010: * int symlist[numecs], int duplist[numecs] );
! 1011: */
! 1012:
! 1013: void sympartition( ds, numstates, symlist, duplist )
! 1014: int ds[], numstates;
! 1015: int symlist[], duplist[];
! 1016: {
! 1017: int tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich;
! 1018:
! 1019: /* Partitioning is done by creating equivalence classes for those
! 1020: * characters which have out-transitions from the given state. Thus
! 1021: * we are really creating equivalence classes of equivalence classes.
! 1022: */
! 1023:
! 1024: for ( i = 1; i <= numecs; ++i )
! 1025: { /* initialize equivalence class list */
! 1026: duplist[i] = i - 1;
! 1027: dupfwd[i] = i + 1;
! 1028: }
! 1029:
! 1030: duplist[1] = NIL;
! 1031: dupfwd[numecs] = NIL;
! 1032:
! 1033: for ( i = 1; i <= numstates; ++i )
! 1034: {
! 1035: ns = ds[i];
! 1036: tch = transchar[ns];
! 1037:
! 1038: if ( tch != SYM_EPSILON )
! 1039: {
! 1040: if ( tch < -lastccl || tch >= csize )
! 1041: {
! 1042: flexfatal(
! 1043: _( "bad transition character detected in sympartition()" ) );
! 1044: }
! 1045:
! 1046: if ( tch >= 0 )
! 1047: { /* character transition */
! 1048: int ec = ecgroup[tch];
! 1049:
! 1050: mkechar( ec, dupfwd, duplist );
! 1051: symlist[ec] = 1;
! 1052: }
! 1053:
! 1054: else
! 1055: { /* character class */
! 1056: tch = -tch;
! 1057:
! 1058: lenccl = ccllen[tch];
! 1059: cclp = cclmap[tch];
! 1060: mkeccl( ccltbl + cclp, lenccl, dupfwd,
! 1061: duplist, numecs, NUL_ec );
! 1062:
! 1063: if ( cclng[tch] )
! 1064: {
! 1065: j = 0;
! 1066:
! 1067: for ( k = 0; k < lenccl; ++k )
! 1068: {
! 1069: ich = ccltbl[cclp + k];
! 1070:
! 1071: if ( ich == 0 )
! 1072: ich = NUL_ec;
! 1073:
! 1074: for ( ++j; j < ich; ++j )
! 1075: symlist[j] = 1;
! 1076: }
! 1077:
! 1078: for ( ++j; j <= numecs; ++j )
! 1079: symlist[j] = 1;
! 1080: }
! 1081:
! 1082: else
! 1083: for ( k = 0; k < lenccl; ++k )
! 1084: {
! 1085: ich = ccltbl[cclp + k];
! 1086:
! 1087: if ( ich == 0 )
! 1088: ich = NUL_ec;
! 1089:
! 1090: symlist[ich] = 1;
! 1091: }
! 1092: }
! 1093: }
! 1094: }
! 1095: }