src/usr.bin/lex/dfa.c - annotate

Return to dfa.c CVS log
Up to [local] / src / usr.bin / lex
Annotation of src/usr.bin/lex/dfa.c, Revision 1.4

1.4     ! deraadt     1: /*     $OpenBSD: dfa.c,v 1.3 1996/07/13 22:21:57 millert Exp $ */
1.2       deraadt     2:
1.1       deraadt     3: /* dfa - DFA construction routines */
                      4:
                      5: /*-
                      6:  * Copyright (c) 1990 The Regents of the University of California.
                      7:  * All rights reserved.
                      8:  *
                      9:  * This code is derived from software contributed to Berkeley by
                     10:  * Vern Paxson.
                     11:  *
                     12:  * The United States Government has rights in this work pursuant
                     13:  * to contract no. DE-AC03-76SF00098 between the United States
                     14:  * Department of Energy and the University of California.
                     15:  *
1.4     ! deraadt    16:  * Redistribution and use in source and binary forms, with or without
        !            17:  * modification, are permitted provided that: (1) source distributions
        !            18:  * retain this entire copyright notice and comment, and (2) distributions
        !            19:  * including binaries display the following acknowledgement:  ``This product
        !            20:  * includes software developed by the University of California, Berkeley
        !            21:  * and its contributors'' in the documentation or other materials provided
        !            22:  * with the distribution and in all advertising materials mentioning
        !            23:  * features or use of this software. Neither the name of the University nor
        !            24:  * the names of its contributors may be used to endorse or promote products
        !            25:  * derived from this software without specific prior written permission.
1.1       deraadt    26:  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
                     27:  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
                     28:  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
                     29:  */
                     30:
1.4     ! deraadt    31: /* $Header: /cvs/src/usr.bin/lex/dfa.c,v 1.3 1996/07/13 22:21:57 millert Exp $ */
1.1       deraadt    32:
                     33: #include "flexdef.h"
                     34:
                     35:
                     36: /* declare functions that have forward references */
                     37:
                     38: void dump_associated_rules PROTO((FILE*, int));
                     39: void dump_transitions PROTO((FILE*, int[]));
                     40: void sympartition PROTO((int[], int, int[], int[]));
                     41: int symfollowset PROTO((int[], int, int, int[]));
                     42:
                     43:
                     44: /* check_for_backing_up - check a DFA state for backing up
                     45:  *
                     46:  * synopsis
                     47:  *     void check_for_backing_up( int ds, int state[numecs] );
                     48:  *
                     49:  * ds is the number of the state to check and state[] is its out-transitions,
                     50:  * indexed by equivalence class.
                     51:  */
                     52:
                     53: void check_for_backing_up( ds, state )
                     54: int ds;
                     55: int state[];
                     56:        {
                     57:        if ( (reject && ! dfaacc[ds].dfaacc_set) ||
                     58:             (! reject && ! dfaacc[ds].dfaacc_state) )
                     59:                { /* state is non-accepting */
                     60:                ++num_backing_up;
                     61:
                     62:                if ( backing_up_report )
                     63:                        {
                     64:                        fprintf( backing_up_file,
                     65:                                _( "State #%d is non-accepting -\n" ), ds );
                     66:
                     67:                        /* identify the state */
                     68:                        dump_associated_rules( backing_up_file, ds );
                     69:
                     70:                        /* Now identify it further using the out- and
                     71:                         * jam-transitions.
                     72:                         */
                     73:                        dump_transitions( backing_up_file, state );
                     74:
                     75:                        putc( '\n', backing_up_file );
                     76:                        }
                     77:                }
                     78:        }
                     79:
                     80:
                     81: /* check_trailing_context - check to see if NFA state set constitutes
                     82:  *                          "dangerous" trailing context
                     83:  *
                     84:  * synopsis
                     85:  *    void check_trailing_context( int nfa_states[num_states+1], int num_states,
                     86:  *                             int accset[nacc+1], int nacc );
                     87:  *
                     88:  * NOTES
                     89:  *  Trailing context is "dangerous" if both the head and the trailing
                     90:  *  part are of variable size \and/ there's a DFA state which contains
                     91:  *  both an accepting state for the head part of the rule and NFA states
                     92:  *  which occur after the beginning of the trailing context.
                     93:  *
                     94:  *  When such a rule is matched, it's impossible to tell if having been
                     95:  *  in the DFA state indicates the beginning of the trailing context or
                     96:  *  further-along scanning of the pattern.  In these cases, a warning
                     97:  *  message is issued.
                     98:  *
                     99:  *    nfa_states[1 .. num_states] is the list of NFA states in the DFA.
                    100:  *    accset[1 .. nacc] is the list of accepting numbers for the DFA state.
                    101:  */
                    102:
                    103: void check_trailing_context( nfa_states, num_states, accset, nacc )
                    104: int *nfa_states, num_states;
                    105: int *accset;
                    106: int nacc;
                    107:        {
                    108:        register int i, j;
                    109:
                    110:        for ( i = 1; i <= num_states; ++i )
                    111:                {
                    112:                int ns = nfa_states[i];
                    113:                register int type = state_type[ns];
                    114:                register int ar = assoc_rule[ns];
                    115:
                    116:                if ( type == STATE_NORMAL || rule_type[ar] != RULE_VARIABLE )
                    117:                        { /* do nothing */
                    118:                        }
                    119:
                    120:                else if ( type == STATE_TRAILING_CONTEXT )
                    121:                        {
                    122:                        /* Potential trouble.  Scan set of accepting numbers
                    123:                         * for the one marking the end of the "head".  We
                    124:                         * assume that this looping will be fairly cheap
                    125:                         * since it's rare that an accepting number set
                    126:                         * is large.
                    127:                         */
                    128:                        for ( j = 1; j <= nacc; ++j )
                    129:                                if ( accset[j] & YY_TRAILING_HEAD_MASK )
                    130:                                        {
                    131:                                        line_warning(
                    132:                                        _( "dangerous trailing context" ),
                    133:                                                rule_linenum[ar] );
                    134:                                        return;
                    135:                                        }
                    136:                        }
                    137:                }
                    138:        }
                    139:
                    140:
                    141: /* dump_associated_rules - list the rules associated with a DFA state
                    142:  *
                    143:  * Goes through the set of NFA states associated with the DFA and
                    144:  * extracts the first MAX_ASSOC_RULES unique rules, sorts them,
                    145:  * and writes a report to the given file.
                    146:  */
                    147:
                    148: void dump_associated_rules( file, ds )
                    149: FILE *file;
                    150: int ds;
                    151:        {
                    152:        register int i, j;
                    153:        register int num_associated_rules = 0;
                    154:        int rule_set[MAX_ASSOC_RULES + 1];
                    155:        int *dset = dss[ds];
                    156:        int size = dfasiz[ds];
                    157:
                    158:        for ( i = 1; i <= size; ++i )
                    159:                {
                    160:                register int rule_num = rule_linenum[assoc_rule[dset[i]]];
                    161:
                    162:                for ( j = 1; j <= num_associated_rules; ++j )
                    163:                        if ( rule_num == rule_set[j] )
                    164:                                break;
                    165:
                    166:                if ( j > num_associated_rules )
                    167:                        { /* new rule */
                    168:                        if ( num_associated_rules < MAX_ASSOC_RULES )
                    169:                                rule_set[++num_associated_rules] = rule_num;
                    170:                        }
                    171:                }
                    172:
                    173:        bubble( rule_set, num_associated_rules );
                    174:
                    175:        fprintf( file, _( " associated rule line numbers:" ) );
                    176:
                    177:        for ( i = 1; i <= num_associated_rules; ++i )
                    178:                {
                    179:                if ( i % 8 == 1 )
                    180:                        putc( '\n', file );
                    181:
                    182:                fprintf( file, "\t%d", rule_set[i] );
                    183:                }
                    184:
                    185:        putc( '\n', file );
                    186:        }
                    187:
                    188:
                    189: /* dump_transitions - list the transitions associated with a DFA state
                    190:  *
                    191:  * synopsis
                    192:  *     dump_transitions( FILE *file, int state[numecs] );
                    193:  *
                    194:  * Goes through the set of out-transitions and lists them in human-readable
                    195:  * form (i.e., not as equivalence classes); also lists jam transitions
                    196:  * (i.e., all those which are not out-transitions, plus EOF).  The dump
                    197:  * is done to the given file.
                    198:  */
                    199:
                    200: void dump_transitions( file, state )
                    201: FILE *file;
                    202: int state[];
                    203:        {
                    204:        register int i, ec;
                    205:        int out_char_set[CSIZE];
                    206:
                    207:        for ( i = 0; i < csize; ++i )
                    208:                {
                    209:                ec = ABS( ecgroup[i] );
                    210:                out_char_set[i] = state[ec];
                    211:                }
                    212:
                    213:        fprintf( file, _( " out-transitions: " ) );
                    214:
                    215:        list_character_set( file, out_char_set );
                    216:
                    217:        /* now invert the members of the set to get the jam transitions */
                    218:        for ( i = 0; i < csize; ++i )
                    219:                out_char_set[i] = ! out_char_set[i];
                    220:
                    221:        fprintf( file, _( "\n jam-transitions: EOF " ) );
                    222:
                    223:        list_character_set( file, out_char_set );
                    224:
                    225:        putc( '\n', file );
                    226:        }
                    227:
                    228:
                    229: /* epsclosure - construct the epsilon closure of a set of ndfa states
                    230:  *
                    231:  * synopsis
                    232:  *    int *epsclosure( int t[num_states], int *numstates_addr,
                    233:  *                     int accset[num_rules+1], int *nacc_addr,
                    234:  *                     int *hashval_addr );
                    235:  *
                    236:  * NOTES
                    237:  *  The epsilon closure is the set of all states reachable by an arbitrary
                    238:  *  number of epsilon transitions, which themselves do not have epsilon
                    239:  *  transitions going out, unioned with the set of states which have non-null
                    240:  *  accepting numbers.  t is an array of size numstates of nfa state numbers.
                    241:  *  Upon return, t holds the epsilon closure and *numstates_addr is updated.
                    242:  *  accset holds a list of the accepting numbers, and the size of accset is
                    243:  *  given by *nacc_addr.  t may be subjected to reallocation if it is not
                    244:  *  large enough to hold the epsilon closure.
                    245:  *
                    246:  *  hashval is the hash value for the dfa corresponding to the state set.
                    247:  */
                    248:
                    249: int *epsclosure( t, ns_addr, accset, nacc_addr, hv_addr )
                    250: int *t, *ns_addr, accset[], *nacc_addr, *hv_addr;
                    251:        {
                    252:        register int stkpos, ns, tsp;
                    253:        int numstates = *ns_addr, nacc, hashval, transsym, nfaccnum;
                    254:        int stkend, nstate;
                    255:        static int did_stk_init = false, *stk;
                    256:
                    257: #define MARK_STATE(state) \
                    258: trans1[state] = trans1[state] - MARKER_DIFFERENCE;
                    259:
                    260: #define IS_MARKED(state) (trans1[state] < 0)
                    261:
                    262: #define UNMARK_STATE(state) \
                    263: trans1[state] = trans1[state] + MARKER_DIFFERENCE;
                    264:
                    265: #define CHECK_ACCEPT(state) \
                    266: { \
                    267: nfaccnum = accptnum[state]; \
                    268: if ( nfaccnum != NIL ) \
                    269: accset[++nacc] = nfaccnum; \
                    270: }
                    271:
                    272: #define DO_REALLOCATION \
                    273: { \
                    274: current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \
                    275: ++num_reallocs; \
                    276: t = reallocate_integer_array( t, current_max_dfa_size ); \
                    277: stk = reallocate_integer_array( stk, current_max_dfa_size ); \
                    278: } \
                    279:
                    280: #define PUT_ON_STACK(state) \
                    281: { \
                    282: if ( ++stkend >= current_max_dfa_size ) \
                    283: DO_REALLOCATION \
                    284: stk[stkend] = state; \
                    285: MARK_STATE(state) \
                    286: }
                    287:
                    288: #define ADD_STATE(state) \
                    289: { \
                    290: if ( ++numstates >= current_max_dfa_size ) \
                    291: DO_REALLOCATION \
                    292: t[numstates] = state; \
                    293: hashval += state; \
                    294: }
                    295:
                    296: #define STACK_STATE(state) \
                    297: { \
                    298: PUT_ON_STACK(state) \
                    299: CHECK_ACCEPT(state) \
                    300: if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \
                    301: ADD_STATE(state) \
                    302: }
                    303:
                    304:
                    305:        if ( ! did_stk_init )
                    306:                {
                    307:                stk = allocate_integer_array( current_max_dfa_size );
                    308:                did_stk_init = true;
                    309:                }
                    310:
                    311:        nacc = stkend = hashval = 0;
                    312:
                    313:        for ( nstate = 1; nstate <= numstates; ++nstate )
                    314:                {
                    315:                ns = t[nstate];
                    316:
                    317:                /* The state could be marked if we've already pushed it onto
                    318:                 * the stack.
                    319:                 */
                    320:                if ( ! IS_MARKED(ns) )
                    321:                        {
                    322:                        PUT_ON_STACK(ns)
                    323:                        CHECK_ACCEPT(ns)
                    324:                        hashval += ns;
                    325:                        }
                    326:                }
                    327:
                    328:        for ( stkpos = 1; stkpos <= stkend; ++stkpos )
                    329:                {
                    330:                ns = stk[stkpos];
                    331:                transsym = transchar[ns];
                    332:
                    333:                if ( transsym == SYM_EPSILON )
                    334:                        {
                    335:                        tsp = trans1[ns] + MARKER_DIFFERENCE;
                    336:
                    337:                        if ( tsp != NO_TRANSITION )
                    338:                                {
                    339:                                if ( ! IS_MARKED(tsp) )
                    340:                                        STACK_STATE(tsp)
                    341:
                    342:                                tsp = trans2[ns];
                    343:
                    344:                                if ( tsp != NO_TRANSITION && ! IS_MARKED(tsp) )
                    345:                                        STACK_STATE(tsp)
                    346:                                }
                    347:                        }
                    348:                }
                    349:
                    350:        /* Clear out "visit" markers. */
                    351:
                    352:        for ( stkpos = 1; stkpos <= stkend; ++stkpos )
                    353:                {
                    354:                if ( IS_MARKED(stk[stkpos]) )
                    355:                        UNMARK_STATE(stk[stkpos])
                    356:                else
                    357:                        flexfatal(
                    358:                        _( "consistency check failed in epsclosure()" ) );
                    359:                }
                    360:
                    361:        *ns_addr = numstates;
                    362:        *hv_addr = hashval;
                    363:        *nacc_addr = nacc;
                    364:
                    365:        return t;
                    366:        }
                    367:
                    368:
                    369: /* increase_max_dfas - increase the maximum number of DFAs */
                    370:
                    371: void increase_max_dfas()
                    372:        {
                    373:        current_max_dfas += MAX_DFAS_INCREMENT;
                    374:
                    375:        ++num_reallocs;
                    376:
                    377:        base = reallocate_integer_array( base, current_max_dfas );
                    378:        def = reallocate_integer_array( def, current_max_dfas );
                    379:        dfasiz = reallocate_integer_array( dfasiz, current_max_dfas );
                    380:        accsiz = reallocate_integer_array( accsiz, current_max_dfas );
                    381:        dhash = reallocate_integer_array( dhash, current_max_dfas );
                    382:        dss = reallocate_int_ptr_array( dss, current_max_dfas );
                    383:        dfaacc = reallocate_dfaacc_union( dfaacc, current_max_dfas );
                    384:
                    385:        if ( nultrans )
                    386:                nultrans =
                    387:                        reallocate_integer_array( nultrans, current_max_dfas );
                    388:        }
                    389:
                    390:
                    391: /* ntod - convert an ndfa to a dfa
                    392:  *
                    393:  * Creates the dfa corresponding to the ndfa we've constructed.  The
                    394:  * dfa starts out in state #1.
                    395:  */
                    396:
                    397: void ntod()
                    398:        {
                    399:        int *accset, ds, nacc, newds;
                    400:        int sym, hashval, numstates, dsize;
                    401:        int num_full_table_rows;        /* used only for -f */
                    402:        int *nset, *dset;
                    403:        int targptr, totaltrans, i, comstate, comfreq, targ;
                    404:        int symlist[CSIZE + 1];
                    405:        int num_start_states;
                    406:        int todo_head, todo_next;
                    407:
                    408:        /* Note that the following are indexed by *equivalence classes*
                    409:         * and not by characters.  Since equivalence classes are indexed
                    410:         * beginning with 1, even if the scanner accepts NUL's, this
                    411:         * means that (since every character is potentially in its own
                    412:         * equivalence class) these arrays must have room for indices
                    413:         * from 1 to CSIZE, so their size must be CSIZE + 1.
                    414:         */
                    415:        int duplist[CSIZE + 1], state[CSIZE + 1];
                    416:        int targfreq[CSIZE + 1], targstate[CSIZE + 1];
                    417:
                    418:        accset = allocate_integer_array( num_rules + 1 );
                    419:        nset = allocate_integer_array( current_max_dfa_size );
                    420:
                    421:        /* The "todo" queue is represented by the head, which is the DFA
                    422:         * state currently being processed, and the "next", which is the
                    423:         * next DFA state number available (not in use).  We depend on the
                    424:         * fact that snstods() returns DFA's \in increasing order/, and thus
                    425:         * need only know the bounds of the dfas to be processed.
                    426:         */
                    427:        todo_head = todo_next = 0;
                    428:
                    429:        for ( i = 0; i <= csize; ++i )
                    430:                {
                    431:                duplist[i] = NIL;
                    432:                symlist[i] = false;
                    433:                }
                    434:
                    435:        for ( i = 0; i <= num_rules; ++i )
                    436:                accset[i] = NIL;
                    437:
                    438:        if ( trace )
                    439:                {
                    440:                dumpnfa( scset[1] );
                    441:                fputs( _( "\n\nDFA Dump:\n\n" ), stderr );
                    442:                }
                    443:
                    444:        inittbl();
                    445:
                    446:        /* Check to see whether we should build a separate table for
                    447:         * transitions on NUL characters.  We don't do this for full-speed
                    448:         * (-F) scanners, since for them we don't have a simple state
                    449:         * number lying around with which to index the table.  We also
                    450:         * don't bother doing it for scanners unless (1) NUL is in its own
                    451:         * equivalence class (indicated by a positive value of
                    452:         * ecgroup[NUL]), (2) NUL's equivalence class is the last
                    453:         * equivalence class, and (3) the number of equivalence classes is
                    454:         * the same as the number of characters.  This latter case comes
                    455:         * about when useecs is false or when it's true but every character
                    456:         * still manages to land in its own class (unlikely, but it's
                    457:         * cheap to check for).  If all these things are true then the
                    458:         * character code needed to represent NUL's equivalence class for
                    459:         * indexing the tables is going to take one more bit than the
                    460:         * number of characters, and therefore we won't be assured of
                    461:         * being able to fit it into a YY_CHAR variable.  This rules out
                    462:         * storing the transitions in a compressed table, since the code
                    463:         * for interpreting them uses a YY_CHAR variable (perhaps it
                    464:         * should just use an integer, though; this is worth pondering ...
                    465:         * ###).
                    466:         *
                    467:         * Finally, for full tables, we want the number of entries in the
                    468:         * table to be a power of two so the array references go fast (it
                    469:         * will just take a shift to compute the major index).  If
                    470:         * encoding NUL's transitions in the table will spoil this, we
                    471:         * give it its own table (note that this will be the case if we're
                    472:         * not using equivalence classes).
                    473:         */
                    474:
                    475:        /* Note that the test for ecgroup[0] == numecs below accomplishes
                    476:         * both (1) and (2) above
                    477:         */
                    478:        if ( ! fullspd && ecgroup[0] == numecs )
                    479:                {
                    480:                /* NUL is alone in its equivalence class, which is the
                    481:                 * last one.
                    482:                 */
                    483:                int use_NUL_table = (numecs == csize);
                    484:
                    485:                if ( fulltbl && ! use_NUL_table )
                    486:                        {
                    487:                        /* We still may want to use the table if numecs
                    488:                         * is a power of 2.
                    489:                         */
                    490:                        int power_of_two;
                    491:
                    492:                        for ( power_of_two = 1; power_of_two <= csize;
                    493:                              power_of_two *= 2 )
                    494:                                if ( numecs == power_of_two )
                    495:                                        {
                    496:                                        use_NUL_table = true;
                    497:                                        break;
                    498:                                        }
                    499:                        }
                    500:
                    501:                if ( use_NUL_table )
                    502:                        nultrans = allocate_integer_array( current_max_dfas );
                    503:
                    504:                /* From now on, nultrans != nil indicates that we're
                    505:                 * saving null transitions for later, separate encoding.
                    506:                 */
                    507:                }
                    508:
                    509:
                    510:        if ( fullspd )
                    511:                {
                    512:                for ( i = 0; i <= numecs; ++i )
                    513:                        state[i] = 0;
                    514:
                    515:                place_state( state, 0, 0 );
                    516:                dfaacc[0].dfaacc_state = 0;
                    517:                }
                    518:
                    519:        else if ( fulltbl )
                    520:                {
                    521:                if ( nultrans )
                    522:                        /* We won't be including NUL's transitions in the
                    523:                         * table, so build it for entries from 0 .. numecs - 1.
                    524:                         */
                    525:                        num_full_table_rows = numecs;
                    526:
                    527:                else
                    528:                        /* Take into account the fact that we'll be including
                    529:                         * the NUL entries in the transition table.  Build it
                    530:                         * from 0 .. numecs.
                    531:                         */
                    532:                        num_full_table_rows = numecs + 1;
                    533:
                    534:                /* Unless -Ca, declare it "short" because it's a real
                    535:                 * long-shot that that won't be large enough.
                    536:                 */
                    537:                out_str_dec( "static yyconst %s yy_nxt[][%d] =\n    {\n",
                    538:                        /* '}' so vi doesn't get too confused */
                    539:                        long_align ? "long" : "short", num_full_table_rows );
                    540:
                    541:                outn( "    {" );
                    542:
                    543:                /* Generate 0 entries for state #0. */
                    544:                for ( i = 0; i < num_full_table_rows; ++i )
                    545:                        mk2data( 0 );
                    546:
                    547:                dataflush();
                    548:                outn( "    },\n" );
                    549:                }
                    550:
                    551:        /* Create the first states. */
                    552:
                    553:        num_start_states = lastsc * 2;
                    554:
                    555:        for ( i = 1; i <= num_start_states; ++i )
                    556:                {
                    557:                numstates = 1;
                    558:
                    559:                /* For each start condition, make one state for the case when
                    560:                 * we're at the beginning of the line (the '^' operator) and
                    561:                 * one for the case when we're not.
                    562:                 */
                    563:                if ( i % 2 == 1 )
                    564:                        nset[numstates] = scset[(i / 2) + 1];
                    565:                else
                    566:                        nset[numstates] =
                    567:                                mkbranch( scbol[i / 2], scset[i / 2] );
                    568:
                    569:                nset = epsclosure( nset, &numstates, accset, &nacc, &hashval );
                    570:
                    571:                if ( snstods( nset, numstates, accset, nacc, hashval, &ds ) )
                    572:                        {
                    573:                        numas += nacc;
                    574:                        totnst += numstates;
                    575:                        ++todo_next;
                    576:
                    577:                        if ( variable_trailing_context_rules && nacc > 0 )
                    578:                                check_trailing_context( nset, numstates,
                    579:                                                        accset, nacc );
                    580:                        }
                    581:                }
                    582:
                    583:        if ( ! fullspd )
                    584:                {
                    585:                if ( ! snstods( nset, 0, accset, 0, 0, &end_of_buffer_state ) )
                    586:                        flexfatal(
                    587:                        _( "could not create unique end-of-buffer state" ) );
                    588:
                    589:                ++numas;
                    590:                ++num_start_states;
                    591:                ++todo_next;
                    592:                }
                    593:
                    594:        while ( todo_head < todo_next )
                    595:                {
                    596:                targptr = 0;
                    597:                totaltrans = 0;
                    598:
                    599:                for ( i = 1; i <= numecs; ++i )
                    600:                        state[i] = 0;
                    601:
                    602:                ds = ++todo_head;
                    603:
                    604:                dset = dss[ds];
                    605:                dsize = dfasiz[ds];
                    606:
                    607:                if ( trace )
                    608:                        fprintf( stderr, _( "state # %d:\n" ), ds );
                    609:
                    610:                sympartition( dset, dsize, symlist, duplist );
                    611:
                    612:                for ( sym = 1; sym <= numecs; ++sym )
                    613:                        {
                    614:                        if ( symlist[sym] )
                    615:                                {
                    616:                                symlist[sym] = 0;
                    617:
                    618:                                if ( duplist[sym] == NIL )
                    619:                                        {
                    620:                                        /* Symbol has unique out-transitions. */
                    621:                                        numstates = symfollowset( dset, dsize,
                    622:                                                                sym, nset );
                    623:                                        nset = epsclosure( nset, &numstates,
                    624:                                                accset, &nacc, &hashval );
                    625:
                    626:                                        if ( snstods( nset, numstates, accset,
                    627:                                                nacc, hashval, &newds ) )
                    628:                                                {
                    629:                                                totnst = totnst + numstates;
                    630:                                                ++todo_next;
                    631:                                                numas += nacc;
                    632:
                    633:                                                if (
                    634:                                        variable_trailing_context_rules &&
                    635:                                                        nacc > 0 )
                    636:                                                        check_trailing_context(
                    637:                                                                nset, numstates,
                    638:                                                                accset, nacc );
                    639:                                                }
                    640:
                    641:                                        state[sym] = newds;
                    642:
                    643:                                        if ( trace )
                    644:                                                fprintf( stderr, "\t%d\t%d\n",
                    645:                                                        sym, newds );
                    646:
                    647:                                        targfreq[++targptr] = 1;
                    648:                                        targstate[targptr] = newds;
                    649:                                        ++numuniq;
                    650:                                        }
                    651:
                    652:                                else
                    653:                                        {
                    654:                                        /* sym's equivalence class has the same
                    655:                                         * transitions as duplist(sym)'s
                    656:                                         * equivalence class.
                    657:                                         */
                    658:                                        targ = state[duplist[sym]];
                    659:                                        state[sym] = targ;
                    660:
                    661:                                        if ( trace )
                    662:                                                fprintf( stderr, "\t%d\t%d\n",
                    663:                                                        sym, targ );
                    664:
                    665:                                        /* Update frequency count for
                    666:                                         * destination state.
                    667:                                         */
                    668:
                    669:                                        i = 0;
                    670:                                        while ( targstate[++i] != targ )
                    671:                                                ;
                    672:
                    673:                                        ++targfreq[i];
                    674:                                        ++numdup;
                    675:                                        }
                    676:
                    677:                                ++totaltrans;
                    678:                                duplist[sym] = NIL;
                    679:                                }
                    680:                        }
                    681:
                    682:                if ( caseins && ! useecs )
                    683:                        {
                    684:                        register int j;
                    685:
                    686:                        for ( i = 'A', j = 'a'; i <= 'Z'; ++i, ++j )
                    687:                                {
                    688:                                if ( state[i] == 0 && state[j] != 0 )
                    689:                                        /* We're adding a transition. */
                    690:                                        ++totaltrans;
                    691:
                    692:                                else if ( state[i] != 0 && state[j] == 0 )
                    693:                                        /* We're taking away a transition. */
                    694:                                        --totaltrans;
                    695:
                    696:                                state[i] = state[j];
                    697:                                }
                    698:                        }
                    699:
                    700:                numsnpairs += totaltrans;
                    701:
                    702:                if ( ds > num_start_states )
                    703:                        check_for_backing_up( ds, state );
                    704:
                    705:                if ( nultrans )
                    706:                        {
                    707:                        nultrans[ds] = state[NUL_ec];
                    708:                        state[NUL_ec] = 0;      /* remove transition */
                    709:                        }
                    710:
                    711:                if ( fulltbl )
                    712:                        {
                    713:                        outn( "    {" );
                    714:
                    715:                        /* Supply array's 0-element. */
                    716:                        if ( ds == end_of_buffer_state )
                    717:                                mk2data( -end_of_buffer_state );
                    718:                        else
                    719:                                mk2data( end_of_buffer_state );
                    720:
                    721:                        for ( i = 1; i < num_full_table_rows; ++i )
                    722:                                /* Jams are marked by negative of state
                    723:                                 * number.
                    724:                                 */
                    725:                                mk2data( state[i] ? state[i] : -ds );
                    726:
                    727:                        dataflush();
                    728:                        outn( "    },\n" );
                    729:                        }
                    730:
                    731:                else if ( fullspd )
                    732:                        place_state( state, ds, totaltrans );
                    733:
                    734:                else if ( ds == end_of_buffer_state )
                    735:                        /* Special case this state to make sure it does what
                    736:                         * it's supposed to, i.e., jam on end-of-buffer.
                    737:                         */
                    738:                        stack1( ds, 0, 0, JAMSTATE );
                    739:
                    740:                else /* normal, compressed state */
                    741:                        {
                    742:                        /* Determine which destination state is the most
                    743:                         * common, and how many transitions to it there are.
                    744:                         */
                    745:
                    746:                        comfreq = 0;
                    747:                        comstate = 0;
                    748:
                    749:                        for ( i = 1; i <= targptr; ++i )
                    750:                                if ( targfreq[i] > comfreq )
                    751:                                        {
                    752:                                        comfreq = targfreq[i];
                    753:                                        comstate = targstate[i];
                    754:                                        }
                    755:
                    756:                        bldtbl( state, ds, totaltrans, comstate, comfreq );
                    757:                        }
                    758:                }
                    759:
                    760:        if ( fulltbl )
                    761:                dataend();
                    762:
                    763:        else if ( ! fullspd )
                    764:                {
                    765:                cmptmps();  /* create compressed template entries */
                    766:
                    767:                /* Create tables for all the states with only one
                    768:                 * out-transition.
                    769:                 */
                    770:                while ( onesp > 0 )
                    771:                        {
                    772:                        mk1tbl( onestate[onesp], onesym[onesp], onenext[onesp],
                    773:                        onedef[onesp] );
                    774:                        --onesp;
                    775:                        }
                    776:
                    777:                mkdeftbl();
                    778:                }
                    779:
                    780:        flex_free( (void *) accset );
                    781:        flex_free( (void *) nset );
                    782:        }
                    783:
                    784:
                    785: /* snstods - converts a set of ndfa states into a dfa state
                    786:  *
                    787:  * synopsis
                    788:  *    is_new_state = snstods( int sns[numstates], int numstates,
                    789:  *                             int accset[num_rules+1], int nacc,
                    790:  *                             int hashval, int *newds_addr );
                    791:  *
                    792:  * On return, the dfa state number is in newds.
                    793:  */
                    794:
                    795: int snstods( sns, numstates, accset, nacc, hashval, newds_addr )
                    796: int sns[], numstates, accset[], nacc, hashval, *newds_addr;
                    797:        {
                    798:        int didsort = 0;
                    799:        register int i, j;
                    800:        int newds, *oldsns;
                    801:
                    802:        for ( i = 1; i <= lastdfa; ++i )
                    803:                if ( hashval == dhash[i] )
                    804:                        {
                    805:                        if ( numstates == dfasiz[i] )
                    806:                                {
                    807:                                oldsns = dss[i];
                    808:
                    809:                                if ( ! didsort )
                    810:                                        {
                    811:                                        /* We sort the states in sns so we
                    812:                                         * can compare it to oldsns quickly.
                    813:                                         * We use bubble because there probably
                    814:                                         * aren't very many states.
                    815:                                         */
                    816:                                        bubble( sns, numstates );
                    817:                                        didsort = 1;
                    818:                                        }
                    819:
                    820:                                for ( j = 1; j <= numstates; ++j )
                    821:                                        if ( sns[j] != oldsns[j] )
                    822:                                                break;
                    823:
                    824:                                if ( j > numstates )
                    825:                                        {
                    826:                                        ++dfaeql;
                    827:                                        *newds_addr = i;
                    828:                                        return 0;
                    829:                                        }
                    830:
                    831:                                ++hshcol;
                    832:                                }
                    833:
                    834:                        else
                    835:                                ++hshsave;
                    836:                        }
                    837:
                    838:        /* Make a new dfa. */
                    839:
                    840:        if ( ++lastdfa >= current_max_dfas )
                    841:                increase_max_dfas();
                    842:
                    843:        newds = lastdfa;
                    844:
                    845:        dss[newds] = allocate_integer_array( numstates + 1 );
                    846:
                    847:        /* If we haven't already sorted the states in sns, we do so now,
                    848:         * so that future comparisons with it can be made quickly.
                    849:         */
                    850:
                    851:        if ( ! didsort )
                    852:                bubble( sns, numstates );
                    853:
                    854:        for ( i = 1; i <= numstates; ++i )
                    855:                dss[newds][i] = sns[i];
                    856:
                    857:        dfasiz[newds] = numstates;
                    858:        dhash[newds] = hashval;
                    859:
                    860:        if ( nacc == 0 )
                    861:                {
                    862:                if ( reject )
                    863:                        dfaacc[newds].dfaacc_set = (int *) 0;
                    864:                else
                    865:                        dfaacc[newds].dfaacc_state = 0;
                    866:
                    867:                accsiz[newds] = 0;
                    868:                }
                    869:
                    870:        else if ( reject )
                    871:                {
                    872:                /* We sort the accepting set in increasing order so the
                    873:                 * disambiguating rule that the first rule listed is considered
                    874:                 * match in the event of ties will work.  We use a bubble
                    875:                 * sort since the list is probably quite small.
                    876:                 */
                    877:
                    878:                bubble( accset, nacc );
                    879:
                    880:                dfaacc[newds].dfaacc_set = allocate_integer_array( nacc + 1 );
                    881:
                    882:                /* Save the accepting set for later */
                    883:                for ( i = 1; i <= nacc; ++i )
                    884:                        {
                    885:                        dfaacc[newds].dfaacc_set[i] = accset[i];
                    886:
                    887:                        if ( accset[i] <= num_rules )
                    888:                                /* Who knows, perhaps a REJECT can yield
                    889:                                 * this rule.
                    890:                                 */
                    891:                                rule_useful[accset[i]] = true;
                    892:                        }
                    893:
                    894:                accsiz[newds] = nacc;
                    895:                }
                    896:
                    897:        else
                    898:                {
                    899:                /* Find lowest numbered rule so the disambiguating rule
                    900:                 * will work.
                    901:                 */
                    902:                j = num_rules + 1;
                    903:
                    904:                for ( i = 1; i <= nacc; ++i )
                    905:                        if ( accset[i] < j )
                    906:                                j = accset[i];
                    907:
                    908:                dfaacc[newds].dfaacc_state = j;
                    909:
                    910:                if ( j <= num_rules )
                    911:                        rule_useful[j] = true;
                    912:                }
                    913:
                    914:        *newds_addr = newds;
                    915:
                    916:        return 1;
                    917:        }
                    918:
                    919:
                    920: /* symfollowset - follow the symbol transitions one step
                    921:  *
                    922:  * synopsis
                    923:  *    numstates = symfollowset( int ds[current_max_dfa_size], int dsize,
                    924:  *                             int transsym, int nset[current_max_dfa_size] );
                    925:  */
                    926:
                    927: int symfollowset( ds, dsize, transsym, nset )
                    928: int ds[], dsize, transsym, nset[];
                    929:        {
                    930:        int ns, tsp, sym, i, j, lenccl, ch, numstates, ccllist;
                    931:
                    932:        numstates = 0;
                    933:
                    934:        for ( i = 1; i <= dsize; ++i )
                    935:                { /* for each nfa state ns in the state set of ds */
                    936:                ns = ds[i];
                    937:                sym = transchar[ns];
                    938:                tsp = trans1[ns];
                    939:
                    940:                if ( sym < 0 )
                    941:                        { /* it's a character class */
                    942:                        sym = -sym;
                    943:                        ccllist = cclmap[sym];
                    944:                        lenccl = ccllen[sym];
                    945:
                    946:                        if ( cclng[sym] )
                    947:                                {
                    948:                                for ( j = 0; j < lenccl; ++j )
                    949:                                        {
                    950:                                        /* Loop through negated character
                    951:                                         * class.
                    952:                                         */
                    953:                                        ch = ccltbl[ccllist + j];
                    954:
                    955:                                        if ( ch == 0 )
                    956:                                                ch = NUL_ec;
                    957:
                    958:                                        if ( ch > transsym )
                    959:                                                /* Transsym isn't in negated
                    960:                                                 * ccl.
                    961:                                                 */
                    962:                                                break;
                    963:
                    964:                                        else if ( ch == transsym )
                    965:                                                /* next 2 */ goto bottom;
                    966:                                        }
                    967:
                    968:                                /* Didn't find transsym in ccl. */
                    969:                                nset[++numstates] = tsp;
                    970:                                }
                    971:
                    972:                        else
                    973:                                for ( j = 0; j < lenccl; ++j )
                    974:                                        {
                    975:                                        ch = ccltbl[ccllist + j];
                    976:
                    977:                                        if ( ch == 0 )
                    978:                                                ch = NUL_ec;
                    979:
                    980:                                        if ( ch > transsym )
                    981:                                                break;
                    982:                                        else if ( ch == transsym )
                    983:                                                {
                    984:                                                nset[++numstates] = tsp;
                    985:                                                break;
                    986:                                                }
                    987:                                        }
                    988:                        }
                    989:
                    990:                else if ( sym >= 'A' && sym <= 'Z' && caseins )
                    991:                        flexfatal(
                    992:                        _( "consistency check failed in symfollowset" ) );
                    993:
                    994:                else if ( sym == SYM_EPSILON )
                    995:                        { /* do nothing */
                    996:                        }
                    997:
                    998:                else if ( ABS( ecgroup[sym] ) == transsym )
                    999:                        nset[++numstates] = tsp;
                   1000:
                   1001:                bottom: ;
                   1002:                }
                   1003:
                   1004:        return numstates;
                   1005:        }
                   1006:
                   1007:
                   1008: /* sympartition - partition characters with same out-transitions
                   1009:  *
                   1010:  * synopsis
                   1011:  *    sympartition( int ds[current_max_dfa_size], int numstates,
                   1012:  *                     int symlist[numecs], int duplist[numecs] );
                   1013:  */
                   1014:
                   1015: void sympartition( ds, numstates, symlist, duplist )
                   1016: int ds[], numstates;
                   1017: int symlist[], duplist[];
                   1018:        {
                   1019:        int tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich;
                   1020:
                   1021:        /* Partitioning is done by creating equivalence classes for those
                   1022:         * characters which have out-transitions from the given state.  Thus
                   1023:         * we are really creating equivalence classes of equivalence classes.
                   1024:         */
                   1025:
                   1026:        for ( i = 1; i <= numecs; ++i )
                   1027:                { /* initialize equivalence class list */
                   1028:                duplist[i] = i - 1;
                   1029:                dupfwd[i] = i + 1;
                   1030:                }
                   1031:
                   1032:        duplist[1] = NIL;
                   1033:        dupfwd[numecs] = NIL;
                   1034:
                   1035:        for ( i = 1; i <= numstates; ++i )
                   1036:                {
                   1037:                ns = ds[i];
                   1038:                tch = transchar[ns];
                   1039:
                   1040:                if ( tch != SYM_EPSILON )
                   1041:                        {
                   1042:                        if ( tch < -lastccl || tch >= csize )
                   1043:                                {
                   1044:                                flexfatal(
                   1045:                _( "bad transition character detected in sympartition()" ) );
                   1046:                                }
                   1047:
                   1048:                        if ( tch >= 0 )
                   1049:                                { /* character transition */
                   1050:                                int ec = ecgroup[tch];
                   1051:
                   1052:                                mkechar( ec, dupfwd, duplist );
                   1053:                                symlist[ec] = 1;
                   1054:                                }
                   1055:
                   1056:                        else
                   1057:                                { /* character class */
                   1058:                                tch = -tch;
                   1059:
                   1060:                                lenccl = ccllen[tch];
                   1061:                                cclp = cclmap[tch];
                   1062:                                mkeccl( ccltbl + cclp, lenccl, dupfwd,
                   1063:                                        duplist, numecs, NUL_ec );
                   1064:
                   1065:                                if ( cclng[tch] )
                   1066:                                        {
                   1067:                                        j = 0;
                   1068:
                   1069:                                        for ( k = 0; k < lenccl; ++k )
                   1070:                                                {
                   1071:                                                ich = ccltbl[cclp + k];
                   1072:
                   1073:                                                if ( ich == 0 )
                   1074:                                                        ich = NUL_ec;
                   1075:
                   1076:                                                for ( ++j; j < ich; ++j )
                   1077:                                                        symlist[j] = 1;
                   1078:                                                }
                   1079:
                   1080:                                        for ( ++j; j <= numecs; ++j )
                   1081:                                                symlist[j] = 1;
                   1082:                                        }
                   1083:
                   1084:                                else
                   1085:                                        for ( k = 0; k < lenccl; ++k )
                   1086:                                                {
                   1087:                                                ich = ccltbl[cclp + k];
                   1088:
                   1089:                                                if ( ich == 0 )
                   1090:                                                        ich = NUL_ec;
                   1091:
                   1092:                                                symlist[ich] = 1;
                   1093:                                                }
                   1094:                                }
                   1095:                        }
                   1096:                }
                   1097:        }