src/usr.bin/lex/dfa.c - annotate

Return to dfa.c CVS log
Up to [local] / src / usr.bin / lex
Annotation of src/usr.bin/lex/dfa.c, Revision 1.1.1.1

1.1       deraadt     1: /* dfa - DFA construction routines */
                      2:
                      3: /*-
                      4:  * Copyright (c) 1990 The Regents of the University of California.
                      5:  * All rights reserved.
                      6:  *
                      7:  * This code is derived from software contributed to Berkeley by
                      8:  * Vern Paxson.
                      9:  *
                     10:  * The United States Government has rights in this work pursuant
                     11:  * to contract no. DE-AC03-76SF00098 between the United States
                     12:  * Department of Energy and the University of California.
                     13:  *
                     14:  * Redistribution and use in source and binary forms are permitted provided
                     15:  * that: (1) source distributions retain this entire copyright notice and
                     16:  * comment, and (2) distributions including binaries display the following
                     17:  * acknowledgement:  ``This product includes software developed by the
                     18:  * University of California, Berkeley and its contributors'' in the
                     19:  * documentation or other materials provided with the distribution and in
                     20:  * all advertising materials mentioning features or use of this software.
                     21:  * Neither the name of the University nor the names of its contributors may
                     22:  * be used to endorse or promote products derived from this software without
                     23:  * specific prior written permission.
                     24:  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
                     25:  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
                     26:  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
                     27:  */
                     28:
                     29: /* $Header: /a/cvsroot/src/usr.bin/lex/dfa.c,v 1.9 1995/05/05 05:35:14 jtc Exp $ */
                     30:
                     31: #include "flexdef.h"
                     32:
                     33:
                     34: /* declare functions that have forward references */
                     35:
                     36: void dump_associated_rules PROTO((FILE*, int));
                     37: void dump_transitions PROTO((FILE*, int[]));
                     38: void sympartition PROTO((int[], int, int[], int[]));
                     39: int symfollowset PROTO((int[], int, int, int[]));
                     40:
                     41:
                     42: /* check_for_backing_up - check a DFA state for backing up
                     43:  *
                     44:  * synopsis
                     45:  *     void check_for_backing_up( int ds, int state[numecs] );
                     46:  *
                     47:  * ds is the number of the state to check and state[] is its out-transitions,
                     48:  * indexed by equivalence class.
                     49:  */
                     50:
                     51: void check_for_backing_up( ds, state )
                     52: int ds;
                     53: int state[];
                     54:        {
                     55:        if ( (reject && ! dfaacc[ds].dfaacc_set) ||
                     56:             (! reject && ! dfaacc[ds].dfaacc_state) )
                     57:                { /* state is non-accepting */
                     58:                ++num_backing_up;
                     59:
                     60:                if ( backing_up_report )
                     61:                        {
                     62:                        fprintf( backing_up_file,
                     63:                                _( "State #%d is non-accepting -\n" ), ds );
                     64:
                     65:                        /* identify the state */
                     66:                        dump_associated_rules( backing_up_file, ds );
                     67:
                     68:                        /* Now identify it further using the out- and
                     69:                         * jam-transitions.
                     70:                         */
                     71:                        dump_transitions( backing_up_file, state );
                     72:
                     73:                        putc( '\n', backing_up_file );
                     74:                        }
                     75:                }
                     76:        }
                     77:
                     78:
                     79: /* check_trailing_context - check to see if NFA state set constitutes
                     80:  *                          "dangerous" trailing context
                     81:  *
                     82:  * synopsis
                     83:  *    void check_trailing_context( int nfa_states[num_states+1], int num_states,
                     84:  *                             int accset[nacc+1], int nacc );
                     85:  *
                     86:  * NOTES
                     87:  *  Trailing context is "dangerous" if both the head and the trailing
                     88:  *  part are of variable size \and/ there's a DFA state which contains
                     89:  *  both an accepting state for the head part of the rule and NFA states
                     90:  *  which occur after the beginning of the trailing context.
                     91:  *
                     92:  *  When such a rule is matched, it's impossible to tell if having been
                     93:  *  in the DFA state indicates the beginning of the trailing context or
                     94:  *  further-along scanning of the pattern.  In these cases, a warning
                     95:  *  message is issued.
                     96:  *
                     97:  *    nfa_states[1 .. num_states] is the list of NFA states in the DFA.
                     98:  *    accset[1 .. nacc] is the list of accepting numbers for the DFA state.
                     99:  */
                    100:
                    101: void check_trailing_context( nfa_states, num_states, accset, nacc )
                    102: int *nfa_states, num_states;
                    103: int *accset;
                    104: int nacc;
                    105:        {
                    106:        register int i, j;
                    107:
                    108:        for ( i = 1; i <= num_states; ++i )
                    109:                {
                    110:                int ns = nfa_states[i];
                    111:                register int type = state_type[ns];
                    112:                register int ar = assoc_rule[ns];
                    113:
                    114:                if ( type == STATE_NORMAL || rule_type[ar] != RULE_VARIABLE )
                    115:                        { /* do nothing */
                    116:                        }
                    117:
                    118:                else if ( type == STATE_TRAILING_CONTEXT )
                    119:                        {
                    120:                        /* Potential trouble.  Scan set of accepting numbers
                    121:                         * for the one marking the end of the "head".  We
                    122:                         * assume that this looping will be fairly cheap
                    123:                         * since it's rare that an accepting number set
                    124:                         * is large.
                    125:                         */
                    126:                        for ( j = 1; j <= nacc; ++j )
                    127:                                if ( accset[j] & YY_TRAILING_HEAD_MASK )
                    128:                                        {
                    129:                                        line_warning(
                    130:                                        _( "dangerous trailing context" ),
                    131:                                                rule_linenum[ar] );
                    132:                                        return;
                    133:                                        }
                    134:                        }
                    135:                }
                    136:        }
                    137:
                    138:
                    139: /* dump_associated_rules - list the rules associated with a DFA state
                    140:  *
                    141:  * Goes through the set of NFA states associated with the DFA and
                    142:  * extracts the first MAX_ASSOC_RULES unique rules, sorts them,
                    143:  * and writes a report to the given file.
                    144:  */
                    145:
                    146: void dump_associated_rules( file, ds )
                    147: FILE *file;
                    148: int ds;
                    149:        {
                    150:        register int i, j;
                    151:        register int num_associated_rules = 0;
                    152:        int rule_set[MAX_ASSOC_RULES + 1];
                    153:        int *dset = dss[ds];
                    154:        int size = dfasiz[ds];
                    155:
                    156:        for ( i = 1; i <= size; ++i )
                    157:                {
                    158:                register int rule_num = rule_linenum[assoc_rule[dset[i]]];
                    159:
                    160:                for ( j = 1; j <= num_associated_rules; ++j )
                    161:                        if ( rule_num == rule_set[j] )
                    162:                                break;
                    163:
                    164:                if ( j > num_associated_rules )
                    165:                        { /* new rule */
                    166:                        if ( num_associated_rules < MAX_ASSOC_RULES )
                    167:                                rule_set[++num_associated_rules] = rule_num;
                    168:                        }
                    169:                }
                    170:
                    171:        bubble( rule_set, num_associated_rules );
                    172:
                    173:        fprintf( file, _( " associated rule line numbers:" ) );
                    174:
                    175:        for ( i = 1; i <= num_associated_rules; ++i )
                    176:                {
                    177:                if ( i % 8 == 1 )
                    178:                        putc( '\n', file );
                    179:
                    180:                fprintf( file, "\t%d", rule_set[i] );
                    181:                }
                    182:
                    183:        putc( '\n', file );
                    184:        }
                    185:
                    186:
                    187: /* dump_transitions - list the transitions associated with a DFA state
                    188:  *
                    189:  * synopsis
                    190:  *     dump_transitions( FILE *file, int state[numecs] );
                    191:  *
                    192:  * Goes through the set of out-transitions and lists them in human-readable
                    193:  * form (i.e., not as equivalence classes); also lists jam transitions
                    194:  * (i.e., all those which are not out-transitions, plus EOF).  The dump
                    195:  * is done to the given file.
                    196:  */
                    197:
                    198: void dump_transitions( file, state )
                    199: FILE *file;
                    200: int state[];
                    201:        {
                    202:        register int i, ec;
                    203:        int out_char_set[CSIZE];
                    204:
                    205:        for ( i = 0; i < csize; ++i )
                    206:                {
                    207:                ec = ABS( ecgroup[i] );
                    208:                out_char_set[i] = state[ec];
                    209:                }
                    210:
                    211:        fprintf( file, _( " out-transitions: " ) );
                    212:
                    213:        list_character_set( file, out_char_set );
                    214:
                    215:        /* now invert the members of the set to get the jam transitions */
                    216:        for ( i = 0; i < csize; ++i )
                    217:                out_char_set[i] = ! out_char_set[i];
                    218:
                    219:        fprintf( file, _( "\n jam-transitions: EOF " ) );
                    220:
                    221:        list_character_set( file, out_char_set );
                    222:
                    223:        putc( '\n', file );
                    224:        }
                    225:
                    226:
                    227: /* epsclosure - construct the epsilon closure of a set of ndfa states
                    228:  *
                    229:  * synopsis
                    230:  *    int *epsclosure( int t[num_states], int *numstates_addr,
                    231:  *                     int accset[num_rules+1], int *nacc_addr,
                    232:  *                     int *hashval_addr );
                    233:  *
                    234:  * NOTES
                    235:  *  The epsilon closure is the set of all states reachable by an arbitrary
                    236:  *  number of epsilon transitions, which themselves do not have epsilon
                    237:  *  transitions going out, unioned with the set of states which have non-null
                    238:  *  accepting numbers.  t is an array of size numstates of nfa state numbers.
                    239:  *  Upon return, t holds the epsilon closure and *numstates_addr is updated.
                    240:  *  accset holds a list of the accepting numbers, and the size of accset is
                    241:  *  given by *nacc_addr.  t may be subjected to reallocation if it is not
                    242:  *  large enough to hold the epsilon closure.
                    243:  *
                    244:  *  hashval is the hash value for the dfa corresponding to the state set.
                    245:  */
                    246:
                    247: int *epsclosure( t, ns_addr, accset, nacc_addr, hv_addr )
                    248: int *t, *ns_addr, accset[], *nacc_addr, *hv_addr;
                    249:        {
                    250:        register int stkpos, ns, tsp;
                    251:        int numstates = *ns_addr, nacc, hashval, transsym, nfaccnum;
                    252:        int stkend, nstate;
                    253:        static int did_stk_init = false, *stk;
                    254:
                    255: #define MARK_STATE(state) \
                    256: trans1[state] = trans1[state] - MARKER_DIFFERENCE;
                    257:
                    258: #define IS_MARKED(state) (trans1[state] < 0)
                    259:
                    260: #define UNMARK_STATE(state) \
                    261: trans1[state] = trans1[state] + MARKER_DIFFERENCE;
                    262:
                    263: #define CHECK_ACCEPT(state) \
                    264: { \
                    265: nfaccnum = accptnum[state]; \
                    266: if ( nfaccnum != NIL ) \
                    267: accset[++nacc] = nfaccnum; \
                    268: }
                    269:
                    270: #define DO_REALLOCATION \
                    271: { \
                    272: current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \
                    273: ++num_reallocs; \
                    274: t = reallocate_integer_array( t, current_max_dfa_size ); \
                    275: stk = reallocate_integer_array( stk, current_max_dfa_size ); \
                    276: } \
                    277:
                    278: #define PUT_ON_STACK(state) \
                    279: { \
                    280: if ( ++stkend >= current_max_dfa_size ) \
                    281: DO_REALLOCATION \
                    282: stk[stkend] = state; \
                    283: MARK_STATE(state) \
                    284: }
                    285:
                    286: #define ADD_STATE(state) \
                    287: { \
                    288: if ( ++numstates >= current_max_dfa_size ) \
                    289: DO_REALLOCATION \
                    290: t[numstates] = state; \
                    291: hashval += state; \
                    292: }
                    293:
                    294: #define STACK_STATE(state) \
                    295: { \
                    296: PUT_ON_STACK(state) \
                    297: CHECK_ACCEPT(state) \
                    298: if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \
                    299: ADD_STATE(state) \
                    300: }
                    301:
                    302:
                    303:        if ( ! did_stk_init )
                    304:                {
                    305:                stk = allocate_integer_array( current_max_dfa_size );
                    306:                did_stk_init = true;
                    307:                }
                    308:
                    309:        nacc = stkend = hashval = 0;
                    310:
                    311:        for ( nstate = 1; nstate <= numstates; ++nstate )
                    312:                {
                    313:                ns = t[nstate];
                    314:
                    315:                /* The state could be marked if we've already pushed it onto
                    316:                 * the stack.
                    317:                 */
                    318:                if ( ! IS_MARKED(ns) )
                    319:                        {
                    320:                        PUT_ON_STACK(ns)
                    321:                        CHECK_ACCEPT(ns)
                    322:                        hashval += ns;
                    323:                        }
                    324:                }
                    325:
                    326:        for ( stkpos = 1; stkpos <= stkend; ++stkpos )
                    327:                {
                    328:                ns = stk[stkpos];
                    329:                transsym = transchar[ns];
                    330:
                    331:                if ( transsym == SYM_EPSILON )
                    332:                        {
                    333:                        tsp = trans1[ns] + MARKER_DIFFERENCE;
                    334:
                    335:                        if ( tsp != NO_TRANSITION )
                    336:                                {
                    337:                                if ( ! IS_MARKED(tsp) )
                    338:                                        STACK_STATE(tsp)
                    339:
                    340:                                tsp = trans2[ns];
                    341:
                    342:                                if ( tsp != NO_TRANSITION && ! IS_MARKED(tsp) )
                    343:                                        STACK_STATE(tsp)
                    344:                                }
                    345:                        }
                    346:                }
                    347:
                    348:        /* Clear out "visit" markers. */
                    349:
                    350:        for ( stkpos = 1; stkpos <= stkend; ++stkpos )
                    351:                {
                    352:                if ( IS_MARKED(stk[stkpos]) )
                    353:                        UNMARK_STATE(stk[stkpos])
                    354:                else
                    355:                        flexfatal(
                    356:                        _( "consistency check failed in epsclosure()" ) );
                    357:                }
                    358:
                    359:        *ns_addr = numstates;
                    360:        *hv_addr = hashval;
                    361:        *nacc_addr = nacc;
                    362:
                    363:        return t;
                    364:        }
                    365:
                    366:
                    367: /* increase_max_dfas - increase the maximum number of DFAs */
                    368:
                    369: void increase_max_dfas()
                    370:        {
                    371:        current_max_dfas += MAX_DFAS_INCREMENT;
                    372:
                    373:        ++num_reallocs;
                    374:
                    375:        base = reallocate_integer_array( base, current_max_dfas );
                    376:        def = reallocate_integer_array( def, current_max_dfas );
                    377:        dfasiz = reallocate_integer_array( dfasiz, current_max_dfas );
                    378:        accsiz = reallocate_integer_array( accsiz, current_max_dfas );
                    379:        dhash = reallocate_integer_array( dhash, current_max_dfas );
                    380:        dss = reallocate_int_ptr_array( dss, current_max_dfas );
                    381:        dfaacc = reallocate_dfaacc_union( dfaacc, current_max_dfas );
                    382:
                    383:        if ( nultrans )
                    384:                nultrans =
                    385:                        reallocate_integer_array( nultrans, current_max_dfas );
                    386:        }
                    387:
                    388:
                    389: /* ntod - convert an ndfa to a dfa
                    390:  *
                    391:  * Creates the dfa corresponding to the ndfa we've constructed.  The
                    392:  * dfa starts out in state #1.
                    393:  */
                    394:
                    395: void ntod()
                    396:        {
                    397:        int *accset, ds, nacc, newds;
                    398:        int sym, hashval, numstates, dsize;
                    399:        int num_full_table_rows;        /* used only for -f */
                    400:        int *nset, *dset;
                    401:        int targptr, totaltrans, i, comstate, comfreq, targ;
                    402:        int symlist[CSIZE + 1];
                    403:        int num_start_states;
                    404:        int todo_head, todo_next;
                    405:
                    406:        /* Note that the following are indexed by *equivalence classes*
                    407:         * and not by characters.  Since equivalence classes are indexed
                    408:         * beginning with 1, even if the scanner accepts NUL's, this
                    409:         * means that (since every character is potentially in its own
                    410:         * equivalence class) these arrays must have room for indices
                    411:         * from 1 to CSIZE, so their size must be CSIZE + 1.
                    412:         */
                    413:        int duplist[CSIZE + 1], state[CSIZE + 1];
                    414:        int targfreq[CSIZE + 1], targstate[CSIZE + 1];
                    415:
                    416:        accset = allocate_integer_array( num_rules + 1 );
                    417:        nset = allocate_integer_array( current_max_dfa_size );
                    418:
                    419:        /* The "todo" queue is represented by the head, which is the DFA
                    420:         * state currently being processed, and the "next", which is the
                    421:         * next DFA state number available (not in use).  We depend on the
                    422:         * fact that snstods() returns DFA's \in increasing order/, and thus
                    423:         * need only know the bounds of the dfas to be processed.
                    424:         */
                    425:        todo_head = todo_next = 0;
                    426:
                    427:        for ( i = 0; i <= csize; ++i )
                    428:                {
                    429:                duplist[i] = NIL;
                    430:                symlist[i] = false;
                    431:                }
                    432:
                    433:        for ( i = 0; i <= num_rules; ++i )
                    434:                accset[i] = NIL;
                    435:
                    436:        if ( trace )
                    437:                {
                    438:                dumpnfa( scset[1] );
                    439:                fputs( _( "\n\nDFA Dump:\n\n" ), stderr );
                    440:                }
                    441:
                    442:        inittbl();
                    443:
                    444:        /* Check to see whether we should build a separate table for
                    445:         * transitions on NUL characters.  We don't do this for full-speed
                    446:         * (-F) scanners, since for them we don't have a simple state
                    447:         * number lying around with which to index the table.  We also
                    448:         * don't bother doing it for scanners unless (1) NUL is in its own
                    449:         * equivalence class (indicated by a positive value of
                    450:         * ecgroup[NUL]), (2) NUL's equivalence class is the last
                    451:         * equivalence class, and (3) the number of equivalence classes is
                    452:         * the same as the number of characters.  This latter case comes
                    453:         * about when useecs is false or when it's true but every character
                    454:         * still manages to land in its own class (unlikely, but it's
                    455:         * cheap to check for).  If all these things are true then the
                    456:         * character code needed to represent NUL's equivalence class for
                    457:         * indexing the tables is going to take one more bit than the
                    458:         * number of characters, and therefore we won't be assured of
                    459:         * being able to fit it into a YY_CHAR variable.  This rules out
                    460:         * storing the transitions in a compressed table, since the code
                    461:         * for interpreting them uses a YY_CHAR variable (perhaps it
                    462:         * should just use an integer, though; this is worth pondering ...
                    463:         * ###).
                    464:         *
                    465:         * Finally, for full tables, we want the number of entries in the
                    466:         * table to be a power of two so the array references go fast (it
                    467:         * will just take a shift to compute the major index).  If
                    468:         * encoding NUL's transitions in the table will spoil this, we
                    469:         * give it its own table (note that this will be the case if we're
                    470:         * not using equivalence classes).
                    471:         */
                    472:
                    473:        /* Note that the test for ecgroup[0] == numecs below accomplishes
                    474:         * both (1) and (2) above
                    475:         */
                    476:        if ( ! fullspd && ecgroup[0] == numecs )
                    477:                {
                    478:                /* NUL is alone in its equivalence class, which is the
                    479:                 * last one.
                    480:                 */
                    481:                int use_NUL_table = (numecs == csize);
                    482:
                    483:                if ( fulltbl && ! use_NUL_table )
                    484:                        {
                    485:                        /* We still may want to use the table if numecs
                    486:                         * is a power of 2.
                    487:                         */
                    488:                        int power_of_two;
                    489:
                    490:                        for ( power_of_two = 1; power_of_two <= csize;
                    491:                              power_of_two *= 2 )
                    492:                                if ( numecs == power_of_two )
                    493:                                        {
                    494:                                        use_NUL_table = true;
                    495:                                        break;
                    496:                                        }
                    497:                        }
                    498:
                    499:                if ( use_NUL_table )
                    500:                        nultrans = allocate_integer_array( current_max_dfas );
                    501:
                    502:                /* From now on, nultrans != nil indicates that we're
                    503:                 * saving null transitions for later, separate encoding.
                    504:                 */
                    505:                }
                    506:
                    507:
                    508:        if ( fullspd )
                    509:                {
                    510:                for ( i = 0; i <= numecs; ++i )
                    511:                        state[i] = 0;
                    512:
                    513:                place_state( state, 0, 0 );
                    514:                dfaacc[0].dfaacc_state = 0;
                    515:                }
                    516:
                    517:        else if ( fulltbl )
                    518:                {
                    519:                if ( nultrans )
                    520:                        /* We won't be including NUL's transitions in the
                    521:                         * table, so build it for entries from 0 .. numecs - 1.
                    522:                         */
                    523:                        num_full_table_rows = numecs;
                    524:
                    525:                else
                    526:                        /* Take into account the fact that we'll be including
                    527:                         * the NUL entries in the transition table.  Build it
                    528:                         * from 0 .. numecs.
                    529:                         */
                    530:                        num_full_table_rows = numecs + 1;
                    531:
                    532:                /* Unless -Ca, declare it "short" because it's a real
                    533:                 * long-shot that that won't be large enough.
                    534:                 */
                    535:                out_str_dec( "static yyconst %s yy_nxt[][%d] =\n    {\n",
                    536:                        /* '}' so vi doesn't get too confused */
                    537:                        long_align ? "long" : "short", num_full_table_rows );
                    538:
                    539:                outn( "    {" );
                    540:
                    541:                /* Generate 0 entries for state #0. */
                    542:                for ( i = 0; i < num_full_table_rows; ++i )
                    543:                        mk2data( 0 );
                    544:
                    545:                dataflush();
                    546:                outn( "    },\n" );
                    547:                }
                    548:
                    549:        /* Create the first states. */
                    550:
                    551:        num_start_states = lastsc * 2;
                    552:
                    553:        for ( i = 1; i <= num_start_states; ++i )
                    554:                {
                    555:                numstates = 1;
                    556:
                    557:                /* For each start condition, make one state for the case when
                    558:                 * we're at the beginning of the line (the '^' operator) and
                    559:                 * one for the case when we're not.
                    560:                 */
                    561:                if ( i % 2 == 1 )
                    562:                        nset[numstates] = scset[(i / 2) + 1];
                    563:                else
                    564:                        nset[numstates] =
                    565:                                mkbranch( scbol[i / 2], scset[i / 2] );
                    566:
                    567:                nset = epsclosure( nset, &numstates, accset, &nacc, &hashval );
                    568:
                    569:                if ( snstods( nset, numstates, accset, nacc, hashval, &ds ) )
                    570:                        {
                    571:                        numas += nacc;
                    572:                        totnst += numstates;
                    573:                        ++todo_next;
                    574:
                    575:                        if ( variable_trailing_context_rules && nacc > 0 )
                    576:                                check_trailing_context( nset, numstates,
                    577:                                                        accset, nacc );
                    578:                        }
                    579:                }
                    580:
                    581:        if ( ! fullspd )
                    582:                {
                    583:                if ( ! snstods( nset, 0, accset, 0, 0, &end_of_buffer_state ) )
                    584:                        flexfatal(
                    585:                        _( "could not create unique end-of-buffer state" ) );
                    586:
                    587:                ++numas;
                    588:                ++num_start_states;
                    589:                ++todo_next;
                    590:                }
                    591:
                    592:        while ( todo_head < todo_next )
                    593:                {
                    594:                targptr = 0;
                    595:                totaltrans = 0;
                    596:
                    597:                for ( i = 1; i <= numecs; ++i )
                    598:                        state[i] = 0;
                    599:
                    600:                ds = ++todo_head;
                    601:
                    602:                dset = dss[ds];
                    603:                dsize = dfasiz[ds];
                    604:
                    605:                if ( trace )
                    606:                        fprintf( stderr, _( "state # %d:\n" ), ds );
                    607:
                    608:                sympartition( dset, dsize, symlist, duplist );
                    609:
                    610:                for ( sym = 1; sym <= numecs; ++sym )
                    611:                        {
                    612:                        if ( symlist[sym] )
                    613:                                {
                    614:                                symlist[sym] = 0;
                    615:
                    616:                                if ( duplist[sym] == NIL )
                    617:                                        {
                    618:                                        /* Symbol has unique out-transitions. */
                    619:                                        numstates = symfollowset( dset, dsize,
                    620:                                                                sym, nset );
                    621:                                        nset = epsclosure( nset, &numstates,
                    622:                                                accset, &nacc, &hashval );
                    623:
                    624:                                        if ( snstods( nset, numstates, accset,
                    625:                                                nacc, hashval, &newds ) )
                    626:                                                {
                    627:                                                totnst = totnst + numstates;
                    628:                                                ++todo_next;
                    629:                                                numas += nacc;
                    630:
                    631:                                                if (
                    632:                                        variable_trailing_context_rules &&
                    633:                                                        nacc > 0 )
                    634:                                                        check_trailing_context(
                    635:                                                                nset, numstates,
                    636:                                                                accset, nacc );
                    637:                                                }
                    638:
                    639:                                        state[sym] = newds;
                    640:
                    641:                                        if ( trace )
                    642:                                                fprintf( stderr, "\t%d\t%d\n",
                    643:                                                        sym, newds );
                    644:
                    645:                                        targfreq[++targptr] = 1;
                    646:                                        targstate[targptr] = newds;
                    647:                                        ++numuniq;
                    648:                                        }
                    649:
                    650:                                else
                    651:                                        {
                    652:                                        /* sym's equivalence class has the same
                    653:                                         * transitions as duplist(sym)'s
                    654:                                         * equivalence class.
                    655:                                         */
                    656:                                        targ = state[duplist[sym]];
                    657:                                        state[sym] = targ;
                    658:
                    659:                                        if ( trace )
                    660:                                                fprintf( stderr, "\t%d\t%d\n",
                    661:                                                        sym, targ );
                    662:
                    663:                                        /* Update frequency count for
                    664:                                         * destination state.
                    665:                                         */
                    666:
                    667:                                        i = 0;
                    668:                                        while ( targstate[++i] != targ )
                    669:                                                ;
                    670:
                    671:                                        ++targfreq[i];
                    672:                                        ++numdup;
                    673:                                        }
                    674:
                    675:                                ++totaltrans;
                    676:                                duplist[sym] = NIL;
                    677:                                }
                    678:                        }
                    679:
                    680:                if ( caseins && ! useecs )
                    681:                        {
                    682:                        register int j;
                    683:
                    684:                        for ( i = 'A', j = 'a'; i <= 'Z'; ++i, ++j )
                    685:                                {
                    686:                                if ( state[i] == 0 && state[j] != 0 )
                    687:                                        /* We're adding a transition. */
                    688:                                        ++totaltrans;
                    689:
                    690:                                else if ( state[i] != 0 && state[j] == 0 )
                    691:                                        /* We're taking away a transition. */
                    692:                                        --totaltrans;
                    693:
                    694:                                state[i] = state[j];
                    695:                                }
                    696:                        }
                    697:
                    698:                numsnpairs += totaltrans;
                    699:
                    700:                if ( ds > num_start_states )
                    701:                        check_for_backing_up( ds, state );
                    702:
                    703:                if ( nultrans )
                    704:                        {
                    705:                        nultrans[ds] = state[NUL_ec];
                    706:                        state[NUL_ec] = 0;      /* remove transition */
                    707:                        }
                    708:
                    709:                if ( fulltbl )
                    710:                        {
                    711:                        outn( "    {" );
                    712:
                    713:                        /* Supply array's 0-element. */
                    714:                        if ( ds == end_of_buffer_state )
                    715:                                mk2data( -end_of_buffer_state );
                    716:                        else
                    717:                                mk2data( end_of_buffer_state );
                    718:
                    719:                        for ( i = 1; i < num_full_table_rows; ++i )
                    720:                                /* Jams are marked by negative of state
                    721:                                 * number.
                    722:                                 */
                    723:                                mk2data( state[i] ? state[i] : -ds );
                    724:
                    725:                        dataflush();
                    726:                        outn( "    },\n" );
                    727:                        }
                    728:
                    729:                else if ( fullspd )
                    730:                        place_state( state, ds, totaltrans );
                    731:
                    732:                else if ( ds == end_of_buffer_state )
                    733:                        /* Special case this state to make sure it does what
                    734:                         * it's supposed to, i.e., jam on end-of-buffer.
                    735:                         */
                    736:                        stack1( ds, 0, 0, JAMSTATE );
                    737:
                    738:                else /* normal, compressed state */
                    739:                        {
                    740:                        /* Determine which destination state is the most
                    741:                         * common, and how many transitions to it there are.
                    742:                         */
                    743:
                    744:                        comfreq = 0;
                    745:                        comstate = 0;
                    746:
                    747:                        for ( i = 1; i <= targptr; ++i )
                    748:                                if ( targfreq[i] > comfreq )
                    749:                                        {
                    750:                                        comfreq = targfreq[i];
                    751:                                        comstate = targstate[i];
                    752:                                        }
                    753:
                    754:                        bldtbl( state, ds, totaltrans, comstate, comfreq );
                    755:                        }
                    756:                }
                    757:
                    758:        if ( fulltbl )
                    759:                dataend();
                    760:
                    761:        else if ( ! fullspd )
                    762:                {
                    763:                cmptmps();  /* create compressed template entries */
                    764:
                    765:                /* Create tables for all the states with only one
                    766:                 * out-transition.
                    767:                 */
                    768:                while ( onesp > 0 )
                    769:                        {
                    770:                        mk1tbl( onestate[onesp], onesym[onesp], onenext[onesp],
                    771:                        onedef[onesp] );
                    772:                        --onesp;
                    773:                        }
                    774:
                    775:                mkdeftbl();
                    776:                }
                    777:
                    778:        flex_free( (void *) accset );
                    779:        flex_free( (void *) nset );
                    780:        }
                    781:
                    782:
                    783: /* snstods - converts a set of ndfa states into a dfa state
                    784:  *
                    785:  * synopsis
                    786:  *    is_new_state = snstods( int sns[numstates], int numstates,
                    787:  *                             int accset[num_rules+1], int nacc,
                    788:  *                             int hashval, int *newds_addr );
                    789:  *
                    790:  * On return, the dfa state number is in newds.
                    791:  */
                    792:
                    793: int snstods( sns, numstates, accset, nacc, hashval, newds_addr )
                    794: int sns[], numstates, accset[], nacc, hashval, *newds_addr;
                    795:        {
                    796:        int didsort = 0;
                    797:        register int i, j;
                    798:        int newds, *oldsns;
                    799:
                    800:        for ( i = 1; i <= lastdfa; ++i )
                    801:                if ( hashval == dhash[i] )
                    802:                        {
                    803:                        if ( numstates == dfasiz[i] )
                    804:                                {
                    805:                                oldsns = dss[i];
                    806:
                    807:                                if ( ! didsort )
                    808:                                        {
                    809:                                        /* We sort the states in sns so we
                    810:                                         * can compare it to oldsns quickly.
                    811:                                         * We use bubble because there probably
                    812:                                         * aren't very many states.
                    813:                                         */
                    814:                                        bubble( sns, numstates );
                    815:                                        didsort = 1;
                    816:                                        }
                    817:
                    818:                                for ( j = 1; j <= numstates; ++j )
                    819:                                        if ( sns[j] != oldsns[j] )
                    820:                                                break;
                    821:
                    822:                                if ( j > numstates )
                    823:                                        {
                    824:                                        ++dfaeql;
                    825:                                        *newds_addr = i;
                    826:                                        return 0;
                    827:                                        }
                    828:
                    829:                                ++hshcol;
                    830:                                }
                    831:
                    832:                        else
                    833:                                ++hshsave;
                    834:                        }
                    835:
                    836:        /* Make a new dfa. */
                    837:
                    838:        if ( ++lastdfa >= current_max_dfas )
                    839:                increase_max_dfas();
                    840:
                    841:        newds = lastdfa;
                    842:
                    843:        dss[newds] = allocate_integer_array( numstates + 1 );
                    844:
                    845:        /* If we haven't already sorted the states in sns, we do so now,
                    846:         * so that future comparisons with it can be made quickly.
                    847:         */
                    848:
                    849:        if ( ! didsort )
                    850:                bubble( sns, numstates );
                    851:
                    852:        for ( i = 1; i <= numstates; ++i )
                    853:                dss[newds][i] = sns[i];
                    854:
                    855:        dfasiz[newds] = numstates;
                    856:        dhash[newds] = hashval;
                    857:
                    858:        if ( nacc == 0 )
                    859:                {
                    860:                if ( reject )
                    861:                        dfaacc[newds].dfaacc_set = (int *) 0;
                    862:                else
                    863:                        dfaacc[newds].dfaacc_state = 0;
                    864:
                    865:                accsiz[newds] = 0;
                    866:                }
                    867:
                    868:        else if ( reject )
                    869:                {
                    870:                /* We sort the accepting set in increasing order so the
                    871:                 * disambiguating rule that the first rule listed is considered
                    872:                 * match in the event of ties will work.  We use a bubble
                    873:                 * sort since the list is probably quite small.
                    874:                 */
                    875:
                    876:                bubble( accset, nacc );
                    877:
                    878:                dfaacc[newds].dfaacc_set = allocate_integer_array( nacc + 1 );
                    879:
                    880:                /* Save the accepting set for later */
                    881:                for ( i = 1; i <= nacc; ++i )
                    882:                        {
                    883:                        dfaacc[newds].dfaacc_set[i] = accset[i];
                    884:
                    885:                        if ( accset[i] <= num_rules )
                    886:                                /* Who knows, perhaps a REJECT can yield
                    887:                                 * this rule.
                    888:                                 */
                    889:                                rule_useful[accset[i]] = true;
                    890:                        }
                    891:
                    892:                accsiz[newds] = nacc;
                    893:                }
                    894:
                    895:        else
                    896:                {
                    897:                /* Find lowest numbered rule so the disambiguating rule
                    898:                 * will work.
                    899:                 */
                    900:                j = num_rules + 1;
                    901:
                    902:                for ( i = 1; i <= nacc; ++i )
                    903:                        if ( accset[i] < j )
                    904:                                j = accset[i];
                    905:
                    906:                dfaacc[newds].dfaacc_state = j;
                    907:
                    908:                if ( j <= num_rules )
                    909:                        rule_useful[j] = true;
                    910:                }
                    911:
                    912:        *newds_addr = newds;
                    913:
                    914:        return 1;
                    915:        }
                    916:
                    917:
                    918: /* symfollowset - follow the symbol transitions one step
                    919:  *
                    920:  * synopsis
                    921:  *    numstates = symfollowset( int ds[current_max_dfa_size], int dsize,
                    922:  *                             int transsym, int nset[current_max_dfa_size] );
                    923:  */
                    924:
                    925: int symfollowset( ds, dsize, transsym, nset )
                    926: int ds[], dsize, transsym, nset[];
                    927:        {
                    928:        int ns, tsp, sym, i, j, lenccl, ch, numstates, ccllist;
                    929:
                    930:        numstates = 0;
                    931:
                    932:        for ( i = 1; i <= dsize; ++i )
                    933:                { /* for each nfa state ns in the state set of ds */
                    934:                ns = ds[i];
                    935:                sym = transchar[ns];
                    936:                tsp = trans1[ns];
                    937:
                    938:                if ( sym < 0 )
                    939:                        { /* it's a character class */
                    940:                        sym = -sym;
                    941:                        ccllist = cclmap[sym];
                    942:                        lenccl = ccllen[sym];
                    943:
                    944:                        if ( cclng[sym] )
                    945:                                {
                    946:                                for ( j = 0; j < lenccl; ++j )
                    947:                                        {
                    948:                                        /* Loop through negated character
                    949:                                         * class.
                    950:                                         */
                    951:                                        ch = ccltbl[ccllist + j];
                    952:
                    953:                                        if ( ch == 0 )
                    954:                                                ch = NUL_ec;
                    955:
                    956:                                        if ( ch > transsym )
                    957:                                                /* Transsym isn't in negated
                    958:                                                 * ccl.
                    959:                                                 */
                    960:                                                break;
                    961:
                    962:                                        else if ( ch == transsym )
                    963:                                                /* next 2 */ goto bottom;
                    964:                                        }
                    965:
                    966:                                /* Didn't find transsym in ccl. */
                    967:                                nset[++numstates] = tsp;
                    968:                                }
                    969:
                    970:                        else
                    971:                                for ( j = 0; j < lenccl; ++j )
                    972:                                        {
                    973:                                        ch = ccltbl[ccllist + j];
                    974:
                    975:                                        if ( ch == 0 )
                    976:                                                ch = NUL_ec;
                    977:
                    978:                                        if ( ch > transsym )
                    979:                                                break;
                    980:                                        else if ( ch == transsym )
                    981:                                                {
                    982:                                                nset[++numstates] = tsp;
                    983:                                                break;
                    984:                                                }
                    985:                                        }
                    986:                        }
                    987:
                    988:                else if ( sym >= 'A' && sym <= 'Z' && caseins )
                    989:                        flexfatal(
                    990:                        _( "consistency check failed in symfollowset" ) );
                    991:
                    992:                else if ( sym == SYM_EPSILON )
                    993:                        { /* do nothing */
                    994:                        }
                    995:
                    996:                else if ( ABS( ecgroup[sym] ) == transsym )
                    997:                        nset[++numstates] = tsp;
                    998:
                    999:                bottom: ;
                   1000:                }
                   1001:
                   1002:        return numstates;
                   1003:        }
                   1004:
                   1005:
                   1006: /* sympartition - partition characters with same out-transitions
                   1007:  *
                   1008:  * synopsis
                   1009:  *    sympartition( int ds[current_max_dfa_size], int numstates,
                   1010:  *                     int symlist[numecs], int duplist[numecs] );
                   1011:  */
                   1012:
                   1013: void sympartition( ds, numstates, symlist, duplist )
                   1014: int ds[], numstates;
                   1015: int symlist[], duplist[];
                   1016:        {
                   1017:        int tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich;
                   1018:
                   1019:        /* Partitioning is done by creating equivalence classes for those
                   1020:         * characters which have out-transitions from the given state.  Thus
                   1021:         * we are really creating equivalence classes of equivalence classes.
                   1022:         */
                   1023:
                   1024:        for ( i = 1; i <= numecs; ++i )
                   1025:                { /* initialize equivalence class list */
                   1026:                duplist[i] = i - 1;
                   1027:                dupfwd[i] = i + 1;
                   1028:                }
                   1029:
                   1030:        duplist[1] = NIL;
                   1031:        dupfwd[numecs] = NIL;
                   1032:
                   1033:        for ( i = 1; i <= numstates; ++i )
                   1034:                {
                   1035:                ns = ds[i];
                   1036:                tch = transchar[ns];
                   1037:
                   1038:                if ( tch != SYM_EPSILON )
                   1039:                        {
                   1040:                        if ( tch < -lastccl || tch >= csize )
                   1041:                                {
                   1042:                                flexfatal(
                   1043:                _( "bad transition character detected in sympartition()" ) );
                   1044:                                }
                   1045:
                   1046:                        if ( tch >= 0 )
                   1047:                                { /* character transition */
                   1048:                                int ec = ecgroup[tch];
                   1049:
                   1050:                                mkechar( ec, dupfwd, duplist );
                   1051:                                symlist[ec] = 1;
                   1052:                                }
                   1053:
                   1054:                        else
                   1055:                                { /* character class */
                   1056:                                tch = -tch;
                   1057:
                   1058:                                lenccl = ccllen[tch];
                   1059:                                cclp = cclmap[tch];
                   1060:                                mkeccl( ccltbl + cclp, lenccl, dupfwd,
                   1061:                                        duplist, numecs, NUL_ec );
                   1062:
                   1063:                                if ( cclng[tch] )
                   1064:                                        {
                   1065:                                        j = 0;
                   1066:
                   1067:                                        for ( k = 0; k < lenccl; ++k )
                   1068:                                                {
                   1069:                                                ich = ccltbl[cclp + k];
                   1070:
                   1071:                                                if ( ich == 0 )
                   1072:                                                        ich = NUL_ec;
                   1073:
                   1074:                                                for ( ++j; j < ich; ++j )
                   1075:                                                        symlist[j] = 1;
                   1076:                                                }
                   1077:
                   1078:                                        for ( ++j; j <= numecs; ++j )
                   1079:                                                symlist[j] = 1;
                   1080:                                        }
                   1081:
                   1082:                                else
                   1083:                                        for ( k = 0; k < lenccl; ++k )
                   1084:                                                {
                   1085:                                                ich = ccltbl[cclp + k];
                   1086:
                   1087:                                                if ( ich == 0 )
                   1088:                                                        ich = NUL_ec;
                   1089:
                   1090:                                                symlist[ich] = 1;
                   1091:                                                }
                   1092:                                }
                   1093:                        }
                   1094:                }
                   1095:        }