[BACK]Return to nfa.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / lex

Annotation of src/usr.bin/lex/nfa.c, Revision 1.10

1.10    ! tedu        1: /*     $OpenBSD: nfa.c,v 1.9 2003/06/04 17:34:44 millert Exp $ */
1.2       deraadt     2:
1.1       deraadt     3: /* nfa - NFA construction routines */
                      4:
1.10    ! tedu        5: /*  Copyright (c) 1990 The Regents of the University of California. */
        !             6: /*  All rights reserved. */
        !             7:
        !             8: /*  This code is derived from software contributed to Berkeley by */
        !             9: /*  Vern Paxson. */
1.1       deraadt    10:
1.10    ! tedu       11: /*  The United States Government has rights in this work pursuant */
        !            12: /*  to contract no. DE-AC03-76SF00098 between the United States */
        !            13: /*  Department of Energy and the University of California. */
        !            14:
        !            15: /*  This file is part of flex. */
        !            16:
        !            17: /*  Redistribution and use in source and binary forms, with or without */
        !            18: /*  modification, are permitted provided that the following conditions */
        !            19: /*  are met: */
        !            20:
        !            21: /*  1. Redistributions of source code must retain the above copyright */
        !            22: /*     notice, this list of conditions and the following disclaimer. */
        !            23: /*  2. Redistributions in binary form must reproduce the above copyright */
        !            24: /*     notice, this list of conditions and the following disclaimer in the */
        !            25: /*     documentation and/or other materials provided with the distribution. */
        !            26:
        !            27: /*  Neither the name of the University nor the names of its contributors */
        !            28: /*  may be used to endorse or promote products derived from this software */
        !            29: /*  without specific prior written permission. */
        !            30:
        !            31: /*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
        !            32: /*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
        !            33: /*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
        !            34: /*  PURPOSE. */
1.1       deraadt    35:
                     36: #include "flexdef.h"
                     37:
                     38:
                     39: /* declare functions that have forward references */
                     40:
1.10    ! tedu       41: int dupmachine PROTO ((int));
        !            42: void mkxtion PROTO ((int, int));
1.1       deraadt    43:
                     44:
                     45: /* add_accept - add an accepting state to a machine
                     46:  *
                     47:  * accepting_number becomes mach's accepting number.
                     48:  */
                     49:
1.10    ! tedu       50: void    add_accept (mach, accepting_number)
        !            51:      int     mach, accepting_number;
        !            52: {
1.1       deraadt    53:        /* Hang the accepting number off an epsilon state.  if it is associated
                     54:         * with a state that has a non-epsilon out-transition, then the state
                     55:         * will accept BEFORE it makes that transition, i.e., one character
                     56:         * too soon.
                     57:         */
                     58:
1.10    ! tedu       59:        if (transchar[finalst[mach]] == SYM_EPSILON)
1.1       deraadt    60:                accptnum[finalst[mach]] = accepting_number;
                     61:
1.10    ! tedu       62:        else {
        !            63:                int     astate = mkstate (SYM_EPSILON);
        !            64:
1.1       deraadt    65:                accptnum[astate] = accepting_number;
1.10    ! tedu       66:                (void) link_machines (mach, astate);
1.1       deraadt    67:        }
1.10    ! tedu       68: }
1.1       deraadt    69:
                     70:
                     71: /* copysingl - make a given number of copies of a singleton machine
                     72:  *
                     73:  * synopsis
                     74:  *
                     75:  *   newsng = copysingl( singl, num );
                     76:  *
                     77:  *     newsng - a new singleton composed of num copies of singl
                     78:  *     singl  - a singleton machine
                     79:  *     num    - the number of copies of singl to be present in newsng
                     80:  */
                     81:
1.10    ! tedu       82: int     copysingl (singl, num)
        !            83:      int     singl, num;
        !            84: {
        !            85:        int     copy, i;
1.1       deraadt    86:
1.10    ! tedu       87:        copy = mkstate (SYM_EPSILON);
1.1       deraadt    88:
1.10    ! tedu       89:        for (i = 1; i <= num; ++i)
        !            90:                copy = link_machines (copy, dupmachine (singl));
1.1       deraadt    91:
                     92:        return copy;
1.10    ! tedu       93: }
1.1       deraadt    94:
                     95:
                     96: /* dumpnfa - debugging routine to write out an nfa */
                     97:
1.10    ! tedu       98: void    dumpnfa (state1)
        !            99:      int     state1;
1.1       deraadt   100:
1.10    ! tedu      101: {
        !           102:        int     sym, tsp1, tsp2, anum, ns;
1.1       deraadt   103:
1.10    ! tedu      104:        fprintf (stderr,
        !           105:                 _
        !           106:                 ("\n\n********** beginning dump of nfa with start state %d\n"),
        !           107:                 state1);
1.1       deraadt   108:
                    109:        /* We probably should loop starting at firstst[state1] and going to
                    110:         * lastst[state1], but they're not maintained properly when we "or"
                    111:         * all of the rules together.  So we use our knowledge that the machine
                    112:         * starts at state 1 and ends at lastnfa.
                    113:         */
                    114:
                    115:        /* for ( ns = firstst[state1]; ns <= lastst[state1]; ++ns ) */
1.10    ! tedu      116:        for (ns = 1; ns <= lastnfa; ++ns) {
        !           117:                fprintf (stderr, _("state # %4d\t"), ns);
1.1       deraadt   118:
                    119:                sym = transchar[ns];
                    120:                tsp1 = trans1[ns];
                    121:                tsp2 = trans2[ns];
                    122:                anum = accptnum[ns];
                    123:
1.10    ! tedu      124:                fprintf (stderr, "%3d:  %4d, %4d", sym, tsp1, tsp2);
1.1       deraadt   125:
1.10    ! tedu      126:                if (anum != NIL)
        !           127:                        fprintf (stderr, "  [%d]", anum);
1.1       deraadt   128:
1.10    ! tedu      129:                fprintf (stderr, "\n");
        !           130:        }
1.1       deraadt   131:
1.10    ! tedu      132:        fprintf (stderr, _("********** end of dump\n"));
        !           133: }
1.1       deraadt   134:
                    135:
                    136: /* dupmachine - make a duplicate of a given machine
                    137:  *
                    138:  * synopsis
                    139:  *
                    140:  *   copy = dupmachine( mach );
                    141:  *
                    142:  *     copy - holds duplicate of mach
                    143:  *     mach - machine to be duplicated
                    144:  *
                    145:  * note that the copy of mach is NOT an exact duplicate; rather, all the
                    146:  * transition states values are adjusted so that the copy is self-contained,
                    147:  * as the original should have been.
                    148:  *
                    149:  * also note that the original MUST be contiguous, with its low and high
                    150:  * states accessible by the arrays firstst and lastst
                    151:  */
                    152:
1.10    ! tedu      153: int     dupmachine (mach)
        !           154:      int     mach;
        !           155: {
        !           156:        int     i, init, state_offset;
        !           157:        int     state = 0;
        !           158:        int     last = lastst[mach];
        !           159:
        !           160:        for (i = firstst[mach]; i <= last; ++i) {
        !           161:                state = mkstate (transchar[i]);
        !           162:
        !           163:                if (trans1[i] != NO_TRANSITION) {
        !           164:                        mkxtion (finalst[state], trans1[i] + state - i);
        !           165:
        !           166:                        if (transchar[i] == SYM_EPSILON &&
        !           167:                            trans2[i] != NO_TRANSITION)
        !           168:                                        mkxtion (finalst[state],
        !           169:                                                 trans2[i] + state - i);
        !           170:                }
1.1       deraadt   171:
                    172:                accptnum[state] = accptnum[i];
1.10    ! tedu      173:        }
1.1       deraadt   174:
1.10    ! tedu      175:        if (state == 0)
        !           176:                flexfatal (_("empty machine in dupmachine()"));
1.1       deraadt   177:
                    178:        state_offset = state - i + 1;
                    179:
                    180:        init = mach + state_offset;
                    181:        firstst[init] = firstst[mach] + state_offset;
                    182:        finalst[init] = finalst[mach] + state_offset;
                    183:        lastst[init] = lastst[mach] + state_offset;
                    184:
                    185:        return init;
1.10    ! tedu      186: }
1.1       deraadt   187:
                    188:
                    189: /* finish_rule - finish up the processing for a rule
                    190:  *
                    191:  * An accepting number is added to the given machine.  If variable_trail_rule
                    192:  * is true then the rule has trailing context and both the head and trail
                    193:  * are variable size.  Otherwise if headcnt or trailcnt is non-zero then
                    194:  * the machine recognizes a pattern with trailing context and headcnt is
                    195:  * the number of characters in the matched part of the pattern, or zero
                    196:  * if the matched part has variable length.  trailcnt is the number of
                    197:  * trailing context characters in the pattern, or zero if the trailing
                    198:  * context has variable length.
                    199:  */
                    200:
1.10    ! tedu      201: void    finish_rule (mach, variable_trail_rule, headcnt, trailcnt,
        !           202:                     pcont_act)
        !           203:      int     mach, variable_trail_rule, headcnt, trailcnt, pcont_act;
        !           204: {
        !           205:        char    action_text[MAXLINE];
1.1       deraadt   206:
1.10    ! tedu      207:        add_accept (mach, num_rules);
1.1       deraadt   208:
                    209:        /* We did this in new_rule(), but it often gets the wrong
                    210:         * number because we do it before we start parsing the current rule.
                    211:         */
                    212:        rule_linenum[num_rules] = linenum;
                    213:
                    214:        /* If this is a continued action, then the line-number has already
                    215:         * been updated, giving us the wrong number.
                    216:         */
1.10    ! tedu      217:        if (continued_action)
1.1       deraadt   218:                --rule_linenum[num_rules];
                    219:
                    220:
1.10    ! tedu      221:        /* If the previous rule was continued action, then we inherit the
        !           222:         * previous newline flag, possibly overriding the current one.
        !           223:         */
        !           224:        if (pcont_act && rule_has_nl[num_rules - 1])
        !           225:                rule_has_nl[num_rules] = true;
        !           226:
        !           227:        snprintf (action_text, sizeof(action_text), "case %d:\n", num_rules);
        !           228:        add_action (action_text);
        !           229:        if (rule_has_nl[num_rules]) {
        !           230:                snprintf (action_text, sizeof(action_text), "/* rule %d can match eol */\n",
        !           231:                         num_rules);
        !           232:                add_action (action_text);
        !           233:        }
        !           234:
        !           235:
        !           236:        if (variable_trail_rule) {
1.1       deraadt   237:                rule_type[num_rules] = RULE_VARIABLE;
                    238:
1.10    ! tedu      239:                if (performance_report > 0)
        !           240:                        fprintf (stderr,
        !           241:                                 _
        !           242:                                 ("Variable trailing context rule at line %d\n"),
        !           243:                                 rule_linenum[num_rules]);
1.1       deraadt   244:
                    245:                variable_trailing_context_rules = true;
1.10    ! tedu      246:        }
1.1       deraadt   247:
1.10    ! tedu      248:        else {
1.1       deraadt   249:                rule_type[num_rules] = RULE_NORMAL;
                    250:
1.10    ! tedu      251:                if (headcnt > 0 || trailcnt > 0) {
1.1       deraadt   252:                        /* Do trailing context magic to not match the trailing
                    253:                         * characters.
                    254:                         */
1.10    ! tedu      255:                        char   *scanner_cp = "YY_G(yy_c_buf_p) = yy_cp";
        !           256:                        char   *scanner_bp = "yy_bp";
1.1       deraadt   257:
1.10    ! tedu      258:                        add_action
        !           259:                                ("*yy_cp = YY_G(yy_hold_char); /* undo effects of setting up yytext */\n");
1.1       deraadt   260:
1.10    ! tedu      261:                        if (headcnt > 0) {
        !           262:                                if (rule_has_nl[num_rules]) {
        !           263:                                        snprintf (action_text, sizeof(action_text),
        !           264:                                                "YY_LINENO_REWIND_TO(%s + %d);\n", scanner_bp, headcnt);
        !           265:                                        add_action (action_text);
1.1       deraadt   266:                                }
1.10    ! tedu      267:                                snprintf (action_text, sizeof(action_text), "%s = %s + %d;\n",
        !           268:                                         scanner_cp, scanner_bp, headcnt);
        !           269:                                add_action (action_text);
        !           270:                        }
1.1       deraadt   271:
1.10    ! tedu      272:                        else {
        !           273:                                if (rule_has_nl[num_rules]) {
        !           274:                                        snprintf (action_text, sizeof(action_text),
        !           275:                                                 "YY_LINENO_REWIND_TO(yy_cp - %d);\n", trailcnt);
        !           276:                                        add_action (action_text);
1.1       deraadt   277:                                }
                    278:
1.10    ! tedu      279:                                snprintf (action_text, sizeof(action_text), "%s -= %d;\n",
        !           280:                                         scanner_cp, trailcnt);
        !           281:                                add_action (action_text);
1.1       deraadt   282:                        }
1.10    ! tedu      283:
        !           284:                        add_action
        !           285:                                ("YY_DO_BEFORE_ACTION; /* set up yytext again */\n");
1.1       deraadt   286:                }
1.10    ! tedu      287:        }
1.1       deraadt   288:
                    289:        /* Okay, in the action code at this point yytext and yyleng have
                    290:         * their proper final values for this rule, so here's the point
                    291:         * to do any user action.  But don't do it for continued actions,
                    292:         * as that'll result in multiple YY_RULE_SETUP's.
                    293:         */
1.10    ! tedu      294:        if (!continued_action)
        !           295:                add_action ("YY_RULE_SETUP\n");
1.1       deraadt   296:
1.10    ! tedu      297:        line_directive_out ((FILE *) 0, 1);
        !           298: }
1.1       deraadt   299:
                    300:
                    301: /* link_machines - connect two machines together
                    302:  *
                    303:  * synopsis
                    304:  *
                    305:  *   new = link_machines( first, last );
                    306:  *
                    307:  *     new    - a machine constructed by connecting first to last
                    308:  *     first  - the machine whose successor is to be last
                    309:  *     last   - the machine whose predecessor is to be first
                    310:  *
                    311:  * note: this routine concatenates the machine first with the machine
                    312:  *  last to produce a machine new which will pattern-match first first
                    313:  *  and then last, and will fail if either of the sub-patterns fails.
                    314:  *  FIRST is set to new by the operation.  last is unmolested.
                    315:  */
                    316:
1.10    ! tedu      317: int     link_machines (first, last)
        !           318:      int     first, last;
        !           319: {
        !           320:        if (first == NIL)
1.1       deraadt   321:                return last;
                    322:
1.10    ! tedu      323:        else if (last == NIL)
1.1       deraadt   324:                return first;
                    325:
1.10    ! tedu      326:        else {
        !           327:                mkxtion (finalst[first], last);
1.1       deraadt   328:                finalst[first] = finalst[last];
1.10    ! tedu      329:                lastst[first] = MAX (lastst[first], lastst[last]);
        !           330:                firstst[first] = MIN (firstst[first], firstst[last]);
1.1       deraadt   331:
                    332:                return first;
                    333:        }
1.10    ! tedu      334: }
1.1       deraadt   335:
                    336:
                    337: /* mark_beginning_as_normal - mark each "beginning" state in a machine
                    338:  *                            as being a "normal" (i.e., not trailing context-
                    339:  *                            associated) states
                    340:  *
                    341:  * The "beginning" states are the epsilon closure of the first state
                    342:  */
                    343:
1.10    ! tedu      344: void    mark_beginning_as_normal (mach)
        !           345:      int mach;
        !           346: {
        !           347:        switch (state_type[mach]) {
        !           348:        case STATE_NORMAL:
        !           349:                /* Oh, we've already visited here. */
        !           350:                return;
        !           351:
        !           352:        case STATE_TRAILING_CONTEXT:
        !           353:                state_type[mach] = STATE_NORMAL;
        !           354:
        !           355:                if (transchar[mach] == SYM_EPSILON) {
        !           356:                        if (trans1[mach] != NO_TRANSITION)
        !           357:                                mark_beginning_as_normal (trans1[mach]);
        !           358:
        !           359:                        if (trans2[mach] != NO_TRANSITION)
        !           360:                                mark_beginning_as_normal (trans2[mach]);
        !           361:                }
        !           362:                break;
        !           363:
        !           364:        default:
        !           365:                flexerror (_
        !           366:                           ("bad state type in mark_beginning_as_normal()"));
        !           367:                break;
1.1       deraadt   368:        }
1.10    ! tedu      369: }
1.1       deraadt   370:
                    371:
                    372: /* mkbranch - make a machine that branches to two machines
                    373:  *
                    374:  * synopsis
                    375:  *
                    376:  *   branch = mkbranch( first, second );
                    377:  *
                    378:  *     branch - a machine which matches either first's pattern or second's
                    379:  *     first, second - machines whose patterns are to be or'ed (the | operator)
                    380:  *
                    381:  * Note that first and second are NEITHER destroyed by the operation.  Also,
                    382:  * the resulting machine CANNOT be used with any other "mk" operation except
                    383:  * more mkbranch's.  Compare with mkor()
                    384:  */
                    385:
1.10    ! tedu      386: int     mkbranch (first, second)
        !           387:      int     first, second;
        !           388: {
        !           389:        int     eps;
1.1       deraadt   390:
1.10    ! tedu      391:        if (first == NO_TRANSITION)
1.1       deraadt   392:                return second;
                    393:
1.10    ! tedu      394:        else if (second == NO_TRANSITION)
1.1       deraadt   395:                return first;
                    396:
1.10    ! tedu      397:        eps = mkstate (SYM_EPSILON);
1.1       deraadt   398:
1.10    ! tedu      399:        mkxtion (eps, first);
        !           400:        mkxtion (eps, second);
1.1       deraadt   401:
                    402:        return eps;
1.10    ! tedu      403: }
1.1       deraadt   404:
                    405:
                    406: /* mkclos - convert a machine into a closure
                    407:  *
                    408:  * synopsis
                    409:  *   new = mkclos( state );
                    410:  *
                    411:  * new - a new state which matches the closure of "state"
                    412:  */
                    413:
1.10    ! tedu      414: int     mkclos (state)
        !           415:      int     state;
        !           416: {
        !           417:        return mkopt (mkposcl (state));
        !           418: }
1.1       deraadt   419:
                    420:
                    421: /* mkopt - make a machine optional
                    422:  *
                    423:  * synopsis
                    424:  *
                    425:  *   new = mkopt( mach );
                    426:  *
                    427:  *     new  - a machine which optionally matches whatever mach matched
                    428:  *     mach - the machine to make optional
                    429:  *
                    430:  * notes:
                    431:  *     1. mach must be the last machine created
                    432:  *     2. mach is destroyed by the call
                    433:  */
                    434:
1.10    ! tedu      435: int     mkopt (mach)
        !           436:      int     mach;
        !           437: {
        !           438:        int     eps;
        !           439:
        !           440:        if (!SUPER_FREE_EPSILON (finalst[mach])) {
        !           441:                eps = mkstate (SYM_EPSILON);
        !           442:                mach = link_machines (mach, eps);
        !           443:        }
1.1       deraadt   444:
                    445:        /* Can't skimp on the following if FREE_EPSILON(mach) is true because
                    446:         * some state interior to "mach" might point back to the beginning
                    447:         * for a closure.
                    448:         */
1.10    ! tedu      449:        eps = mkstate (SYM_EPSILON);
        !           450:        mach = link_machines (eps, mach);
1.1       deraadt   451:
1.10    ! tedu      452:        mkxtion (mach, finalst[mach]);
1.1       deraadt   453:
                    454:        return mach;
1.10    ! tedu      455: }
1.1       deraadt   456:
                    457:
                    458: /* mkor - make a machine that matches either one of two machines
                    459:  *
                    460:  * synopsis
                    461:  *
                    462:  *   new = mkor( first, second );
                    463:  *
                    464:  *     new - a machine which matches either first's pattern or second's
                    465:  *     first, second - machines whose patterns are to be or'ed (the | operator)
                    466:  *
                    467:  * note that first and second are both destroyed by the operation
                    468:  * the code is rather convoluted because an attempt is made to minimize
                    469:  * the number of epsilon states needed
                    470:  */
                    471:
1.10    ! tedu      472: int     mkor (first, second)
        !           473:      int     first, second;
        !           474: {
        !           475:        int     eps, orend;
1.1       deraadt   476:
1.10    ! tedu      477:        if (first == NIL)
1.1       deraadt   478:                return second;
                    479:
1.10    ! tedu      480:        else if (second == NIL)
1.1       deraadt   481:                return first;
                    482:
1.10    ! tedu      483:        else {
1.1       deraadt   484:                /* See comment in mkopt() about why we can't use the first
                    485:                 * state of "first" or "second" if they satisfy "FREE_EPSILON".
                    486:                 */
1.10    ! tedu      487:                eps = mkstate (SYM_EPSILON);
1.1       deraadt   488:
1.10    ! tedu      489:                first = link_machines (eps, first);
1.1       deraadt   490:
1.10    ! tedu      491:                mkxtion (first, second);
1.1       deraadt   492:
1.10    ! tedu      493:                if (SUPER_FREE_EPSILON (finalst[first]) &&
        !           494:                    accptnum[finalst[first]] == NIL) {
1.1       deraadt   495:                        orend = finalst[first];
1.10    ! tedu      496:                        mkxtion (finalst[second], orend);
        !           497:                }
1.1       deraadt   498:
1.10    ! tedu      499:                else if (SUPER_FREE_EPSILON (finalst[second]) &&
        !           500:                         accptnum[finalst[second]] == NIL) {
1.1       deraadt   501:                        orend = finalst[second];
1.10    ! tedu      502:                        mkxtion (finalst[first], orend);
        !           503:                }
1.1       deraadt   504:
1.10    ! tedu      505:                else {
        !           506:                        eps = mkstate (SYM_EPSILON);
1.1       deraadt   507:
1.10    ! tedu      508:                        first = link_machines (first, eps);
1.1       deraadt   509:                        orend = finalst[first];
                    510:
1.10    ! tedu      511:                        mkxtion (finalst[second], orend);
1.1       deraadt   512:                }
1.10    ! tedu      513:        }
1.1       deraadt   514:
                    515:        finalst[first] = orend;
                    516:        return first;
1.10    ! tedu      517: }
1.1       deraadt   518:
                    519:
                    520: /* mkposcl - convert a machine into a positive closure
                    521:  *
                    522:  * synopsis
                    523:  *   new = mkposcl( state );
                    524:  *
                    525:  *    new - a machine matching the positive closure of "state"
                    526:  */
                    527:
1.10    ! tedu      528: int     mkposcl (state)
        !           529:      int     state;
        !           530: {
        !           531:        int     eps;
        !           532:
        !           533:        if (SUPER_FREE_EPSILON (finalst[state])) {
        !           534:                mkxtion (finalst[state], state);
1.1       deraadt   535:                return state;
1.10    ! tedu      536:        }
1.1       deraadt   537:
1.10    ! tedu      538:        else {
        !           539:                eps = mkstate (SYM_EPSILON);
        !           540:                mkxtion (eps, state);
        !           541:                return link_machines (state, eps);
1.1       deraadt   542:        }
1.10    ! tedu      543: }
1.1       deraadt   544:
                    545:
                    546: /* mkrep - make a replicated machine
                    547:  *
                    548:  * synopsis
                    549:  *   new = mkrep( mach, lb, ub );
                    550:  *
                    551:  *    new - a machine that matches whatever "mach" matched from "lb"
                    552:  *          number of times to "ub" number of times
                    553:  *
                    554:  * note
1.10    ! tedu      555:  *   if "ub" is INFINITE_REPEAT then "new" matches "lb" or more occurrences of "mach"
1.1       deraadt   556:  */
                    557:
1.10    ! tedu      558: int     mkrep (mach, lb, ub)
        !           559:      int     mach, lb, ub;
        !           560: {
        !           561:        int     base_mach, tail, copy, i;
1.1       deraadt   562:
1.10    ! tedu      563:        base_mach = copysingl (mach, lb - 1);
1.1       deraadt   564:
1.10    ! tedu      565:        if (ub == INFINITE_REPEAT) {
        !           566:                copy = dupmachine (mach);
        !           567:                mach = link_machines (mach,
        !           568:                                      link_machines (base_mach,
        !           569:                                                     mkclos (copy)));
        !           570:        }
1.1       deraadt   571:
1.10    ! tedu      572:        else {
        !           573:                tail = mkstate (SYM_EPSILON);
1.1       deraadt   574:
1.10    ! tedu      575:                for (i = lb; i < ub; ++i) {
        !           576:                        copy = dupmachine (mach);
        !           577:                        tail = mkopt (link_machines (copy, tail));
1.1       deraadt   578:                }
                    579:
1.10    ! tedu      580:                mach =
        !           581:                        link_machines (mach,
        !           582:                                       link_machines (base_mach, tail));
        !           583:        }
        !           584:
1.1       deraadt   585:        return mach;
1.10    ! tedu      586: }
1.1       deraadt   587:
                    588:
                    589: /* mkstate - create a state with a transition on a given symbol
                    590:  *
                    591:  * synopsis
                    592:  *
                    593:  *   state = mkstate( sym );
                    594:  *
                    595:  *     state - a new state matching sym
                    596:  *     sym   - the symbol the new state is to have an out-transition on
                    597:  *
                    598:  * note that this routine makes new states in ascending order through the
                    599:  * state array (and increments LASTNFA accordingly).  The routine DUPMACHINE
                    600:  * relies on machines being made in ascending order and that they are
                    601:  * CONTIGUOUS.  Change it and you will have to rewrite DUPMACHINE (kludge
                    602:  * that it admittedly is)
                    603:  */
                    604:
1.10    ! tedu      605: int     mkstate (sym)
        !           606:      int     sym;
        !           607: {
        !           608:        if (++lastnfa >= current_mns) {
        !           609:                if ((current_mns += MNS_INCREMENT) >= maximum_mns)
        !           610:                        lerrif (_
        !           611:                                ("input rules are too complicated (>= %d NFA states)"),
        !           612: current_mns);
1.1       deraadt   613:
                    614:                ++num_reallocs;
                    615:
1.10    ! tedu      616:                firstst = reallocate_integer_array (firstst, current_mns);
        !           617:                lastst = reallocate_integer_array (lastst, current_mns);
        !           618:                finalst = reallocate_integer_array (finalst, current_mns);
        !           619:                transchar =
        !           620:                        reallocate_integer_array (transchar, current_mns);
        !           621:                trans1 = reallocate_integer_array (trans1, current_mns);
        !           622:                trans2 = reallocate_integer_array (trans2, current_mns);
        !           623:                accptnum =
        !           624:                        reallocate_integer_array (accptnum, current_mns);
1.1       deraadt   625:                assoc_rule =
1.10    ! tedu      626:                        reallocate_integer_array (assoc_rule, current_mns);
1.1       deraadt   627:                state_type =
1.10    ! tedu      628:                        reallocate_integer_array (state_type, current_mns);
        !           629:        }
1.1       deraadt   630:
                    631:        firstst[lastnfa] = lastnfa;
                    632:        finalst[lastnfa] = lastnfa;
                    633:        lastst[lastnfa] = lastnfa;
                    634:        transchar[lastnfa] = sym;
                    635:        trans1[lastnfa] = NO_TRANSITION;
                    636:        trans2[lastnfa] = NO_TRANSITION;
                    637:        accptnum[lastnfa] = NIL;
                    638:        assoc_rule[lastnfa] = num_rules;
                    639:        state_type[lastnfa] = current_state_type;
                    640:
                    641:        /* Fix up equivalence classes base on this transition.  Note that any
                    642:         * character which has its own transition gets its own equivalence
                    643:         * class.  Thus only characters which are only in character classes
                    644:         * have a chance at being in the same equivalence class.  E.g. "a|b"
                    645:         * puts 'a' and 'b' into two different equivalence classes.  "[ab]"
                    646:         * puts them in the same equivalence class (barring other differences
                    647:         * elsewhere in the input).
                    648:         */
                    649:
1.10    ! tedu      650:        if (sym < 0) {
1.1       deraadt   651:                /* We don't have to update the equivalence classes since
                    652:                 * that was already done when the ccl was created for the
                    653:                 * first time.
                    654:                 */
1.10    ! tedu      655:        }
1.1       deraadt   656:
1.10    ! tedu      657:        else if (sym == SYM_EPSILON)
1.1       deraadt   658:                ++numeps;
                    659:
1.10    ! tedu      660:        else {
        !           661:                check_char (sym);
1.1       deraadt   662:
1.10    ! tedu      663:                if (useecs)
1.1       deraadt   664:                        /* Map NUL's to csize. */
1.10    ! tedu      665:                        mkechar (sym ? sym : csize, nextecm, ecgroup);
        !           666:        }
1.1       deraadt   667:
                    668:        return lastnfa;
1.10    ! tedu      669: }
1.1       deraadt   670:
                    671:
                    672: /* mkxtion - make a transition from one state to another
                    673:  *
                    674:  * synopsis
                    675:  *
                    676:  *   mkxtion( statefrom, stateto );
                    677:  *
                    678:  *     statefrom - the state from which the transition is to be made
                    679:  *     stateto   - the state to which the transition is to be made
                    680:  */
                    681:
1.10    ! tedu      682: void    mkxtion (statefrom, stateto)
        !           683:      int     statefrom, stateto;
        !           684: {
        !           685:        if (trans1[statefrom] == NO_TRANSITION)
1.1       deraadt   686:                trans1[statefrom] = stateto;
                    687:
1.10    ! tedu      688:        else if ((transchar[statefrom] != SYM_EPSILON) ||
        !           689:                 (trans2[statefrom] != NO_TRANSITION))
        !           690:                flexfatal (_("found too many transitions in mkxtion()"));
1.1       deraadt   691:
1.10    ! tedu      692:        else {                  /* second out-transition for an epsilon state */
1.1       deraadt   693:                ++eps2;
                    694:                trans2[statefrom] = stateto;
                    695:        }
1.10    ! tedu      696: }
1.1       deraadt   697:
                    698: /* new_rule - initialize for a new rule */
                    699:
1.10    ! tedu      700: void    new_rule ()
        !           701: {
        !           702:        if (++num_rules >= current_max_rules) {
1.1       deraadt   703:                ++num_reallocs;
                    704:                current_max_rules += MAX_RULES_INCREMENT;
1.10    ! tedu      705:                rule_type = reallocate_integer_array (rule_type,
        !           706:                                                      current_max_rules);
        !           707:                rule_linenum = reallocate_integer_array (rule_linenum,
        !           708:                                                         current_max_rules);
        !           709:                rule_useful = reallocate_integer_array (rule_useful,
        !           710:                                                        current_max_rules);
        !           711:                rule_has_nl = reallocate_bool_array (rule_has_nl,
        !           712:                                                     current_max_rules);
        !           713:        }
1.1       deraadt   714:
1.10    ! tedu      715:        if (num_rules > MAX_RULE)
        !           716:                lerrif (_("too many rules (> %d)!"), MAX_RULE);
1.1       deraadt   717:
                    718:        rule_linenum[num_rules] = linenum;
                    719:        rule_useful[num_rules] = false;
1.10    ! tedu      720:        rule_has_nl[num_rules] = false;
        !           721: }