src/usr.bin/lex/parse.y - annotate

Return to parse.y CVS log
Up to [local] / src / usr.bin / lex
Annotation of src/usr.bin/lex/parse.y, Revision 1.10

1.10    ! millert     1: /*     $OpenBSD: parse.y,v 1.9 2015/11/19 19:43:40 tedu Exp $  */
1.2       deraadt     2:
1.1       deraadt     3: /* parse.y - parser for flex input */
                      4:
                      5: %token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
1.9       tedu        6: %token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS OPT_HEADER OPT_EXTRA_TYPE
                      7: %token OPT_TABLES
1.1       deraadt     8:
                      9: %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
                     10: %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
                     11:
1.9       tedu       12: %token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH
                     13: %token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT
                     14:
                     15: %left CCL_OP_DIFF CCL_OP_UNION
                     16:
                     17: /*
                     18:  *POSIX and AT&T lex place the
                     19:  * precedence of the repeat operator, {}, below that of concatenation.
                     20:  * Thus, ab{3} is ababab.  Most other POSIX utilities use an Extended
                     21:  * Regular Expression (ERE) precedence that has the repeat operator
                     22:  * higher than concatenation.  This causes ab{3} to yield abbb.
1.8       millert    23:  *
1.9       tedu       24:  * In order to support the POSIX and AT&T precedence and the flex
                     25:  * precedence we define two token sets for the begin and end tokens of
                     26:  * the repeat operator, '{' and '}'.  The lexical scanner chooses
                     27:  * which tokens to return based on whether posix_compat or lex_compat
                     28:  * are specified. Specifying either posix_compat or lex_compat will
                     29:  * cause flex to parse scanner files as per the AT&T and
                     30:  * POSIX-mandated behavior.
1.1       deraadt    31:  */
                     32:
1.9       tedu       33: %token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX
1.1       deraadt    34:
                     35:
1.9       tedu       36: %{
                     37: /*  Copyright (c) 1990 The Regents of the University of California. */
                     38: /*  All rights reserved. */
1.1       deraadt    39:
1.9       tedu       40: /*  This code is derived from software contributed to Berkeley by */
                     41: /*  Vern Paxson. */
1.1       deraadt    42:
1.9       tedu       43: /*  The United States Government has rights in this work pursuant */
                     44: /*  to contract no. DE-AC03-76SF00098 between the United States */
                     45: /*  Department of Energy and the University of California. */
                     46:
                     47: /*  This file is part of flex. */
                     48:
                     49: /*  Redistribution and use in source and binary forms, with or without */
                     50: /*  modification, are permitted provided that the following conditions */
                     51: /*  are met: */
                     52:
                     53: /*  1. Redistributions of source code must retain the above copyright */
                     54: /*     notice, this list of conditions and the following disclaimer. */
                     55: /*  2. Redistributions in binary form must reproduce the above copyright */
                     56: /*     notice, this list of conditions and the following disclaimer in the */
                     57: /*     documentation and/or other materials provided with the distribution. */
                     58:
                     59: /*  Neither the name of the University nor the names of its contributors */
                     60: /*  may be used to endorse or promote products derived from this software */
                     61: /*  without specific prior written permission. */
                     62:
                     63: /*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
                     64: /*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
                     65: /*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
                     66: /*  PURPOSE. */
1.1       deraadt    67:
1.9       tedu       68: #include "flexdef.h"
                     69: #include "tables.h"
1.1       deraadt    70:
1.9       tedu       71: int pat, scnum, eps, headcnt, trailcnt, lastchar, i, rulelen;
1.1       deraadt    72: int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule;
                     73:
                     74: int *scon_stk;
                     75: int scon_stk_ptr;
                     76:
                     77: static int madeany = false;  /* whether we've made the '.' character class */
1.9       tedu       78: static int ccldot, cclany;
1.1       deraadt    79: int previous_continued_action; /* whether the previous rule's action was '|' */
                     80:
1.9       tedu       81: #define format_warn3(fmt, a1, a2) \
                     82:        do{ \
                     83:         char fw3_msg[MAXLINE];\
                     84:         snprintf( fw3_msg, MAXLINE,(fmt), (a1), (a2) );\
                     85:         warn( fw3_msg );\
                     86:        }while(0)
                     87:
1.1       deraadt    88: /* Expand a POSIX character class expression. */
                     89: #define CCL_EXPR(func) \
1.9       tedu       90:        do{ \
1.1       deraadt    91:        int c; \
                     92:        for ( c = 0; c < csize; ++c ) \
                     93:                if ( isascii(c) && func(c) ) \
                     94:                        ccladd( currccl, c ); \
1.9       tedu       95:        }while(0)
                     96:
                     97: /* negated class */
                     98: #define CCL_NEG_EXPR(func) \
                     99:        do{ \
                    100:        int c; \
                    101:        for ( c = 0; c < csize; ++c ) \
                    102:                if ( !func(c) ) \
                    103:                        ccladd( currccl, c ); \
                    104:        }while(0)
1.1       deraadt   105:
                    106: /* On some over-ambitious machines, such as DEC Alpha's, the default
                    107:  * token type is "long" instead of "int"; this leads to problems with
                    108:  * declaring yylval in flexdef.h.  But so far, all the yacc's I've seen
                    109:  * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the
                    110:  * following should ensure that the default token type is "int".
                    111:  */
                    112: #define YYSTYPE int
                    113:
                    114: %}
                    115:
                    116: %%
                    117: goal           :  initlex sect1 sect1end sect2 initforrule
                    118:                        { /* add default rule */
                    119:                        int def_rule;
                    120:
                    121:                        pat = cclinit();
                    122:                        cclnegate( pat );
                    123:
                    124:                        def_rule = mkstate( -pat );
                    125:
                    126:                        /* Remember the number of the default rule so we
                    127:                         * don't generate "can't match" warnings for it.
                    128:                         */
                    129:                        default_rule = num_rules;
                    130:
1.9       tedu      131:                        finish_rule( def_rule, false, 0, 0, 0);
1.1       deraadt   132:
                    133:                        for ( i = 1; i <= lastsc; ++i )
                    134:                                scset[i] = mkbranch( scset[i], def_rule );
                    135:
                    136:                        if ( spprdflt )
                    137:                                add_action(
                    138:                                "YY_FATAL_ERROR( \"flex scanner jammed\" )" );
                    139:                        else
                    140:                                add_action( "ECHO" );
                    141:
                    142:                        add_action( ";\n\tYY_BREAK\n" );
                    143:                        }
                    144:                ;
                    145:
                    146: initlex                :
                    147:                        { /* initialize for processing rules */
                    148:
                    149:                        /* Create default DFA start condition. */
                    150:                        scinstal( "INITIAL", false );
                    151:                        }
                    152:                ;
                    153:
                    154: sect1          :  sect1 startconddecl namelist1
                    155:                |  sect1 options
                    156:                |
                    157:                |  error
1.9       tedu      158:                        { synerr( _("unknown error processing section 1") ); }
1.1       deraadt   159:                ;
                    160:
                    161: sect1end       :  SECTEND
                    162:                        {
                    163:                        check_options();
                    164:                        scon_stk = allocate_integer_array( lastsc + 1 );
                    165:                        scon_stk_ptr = 0;
                    166:                        }
                    167:                ;
                    168:
                    169: startconddecl  :  SCDECL
                    170:                        { xcluflg = false; }
                    171:
                    172:                |  XSCDECL
                    173:                        { xcluflg = true; }
                    174:                ;
                    175:
                    176: namelist1      :  namelist1 NAME
                    177:                        { scinstal( nmstr, xcluflg ); }
                    178:
                    179:                |  NAME
                    180:                        { scinstal( nmstr, xcluflg ); }
                    181:
                    182:                |  error
1.9       tedu      183:                        { synerr( _("bad start condition list") ); }
1.1       deraadt   184:                ;
                    185:
                    186: options                :  OPTION_OP optionlist
                    187:                ;
                    188:
                    189: optionlist     :  optionlist option
                    190:                |
                    191:                ;
                    192:
                    193: option         :  OPT_OUTFILE '=' NAME
                    194:                        {
                    195:                        outfilename = copy_string( nmstr );
                    196:                        did_outfilename = 1;
                    197:                        }
1.9       tedu      198:                |  OPT_EXTRA_TYPE '=' NAME
                    199:                        { extra_type = copy_string( nmstr ); }
1.1       deraadt   200:                |  OPT_PREFIX '=' NAME
                    201:                        { prefix = copy_string( nmstr ); }
                    202:                |  OPT_YYCLASS '=' NAME
                    203:                        { yyclass = copy_string( nmstr ); }
1.9       tedu      204:                |  OPT_HEADER '=' NAME
                    205:                        { headerfilename = copy_string( nmstr ); }
                    206:            |  OPT_TABLES '=' NAME
                    207:             { tablesext = true; tablesfilename = copy_string( nmstr ); }
1.1       deraadt   208:                ;
                    209:
                    210: sect2          :  sect2 scon initforrule flexrule '\n'
                    211:                        { scon_stk_ptr = $2; }
                    212:                |  sect2 scon '{' sect2 '}'
                    213:                        { scon_stk_ptr = $2; }
                    214:                |
                    215:                ;
                    216:
                    217: initforrule    :
                    218:                        {
                    219:                        /* Initialize for a parse of one rule. */
                    220:                        trlcontxt = variable_trail_rule = varlength = false;
                    221:                        trailcnt = headcnt = rulelen = 0;
                    222:                        current_state_type = STATE_NORMAL;
                    223:                        previous_continued_action = continued_action;
                    224:                        in_rule = true;
                    225:
                    226:                        new_rule();
                    227:                        }
                    228:                ;
                    229:
                    230: flexrule       :  '^' rule
                    231:                        {
                    232:                        pat = $2;
                    233:                        finish_rule( pat, variable_trail_rule,
1.9       tedu      234:                                headcnt, trailcnt , previous_continued_action);
1.1       deraadt   235:
                    236:                        if ( scon_stk_ptr > 0 )
                    237:                                {
                    238:                                for ( i = 1; i <= scon_stk_ptr; ++i )
                    239:                                        scbol[scon_stk[i]] =
                    240:                                                mkbranch( scbol[scon_stk[i]],
                    241:                                                                pat );
                    242:                                }
                    243:
                    244:                        else
                    245:                                {
                    246:                                /* Add to all non-exclusive start conditions,
                    247:                                 * including the default (0) start condition.
                    248:                                 */
                    249:
                    250:                                for ( i = 1; i <= lastsc; ++i )
                    251:                                        if ( ! scxclu[i] )
                    252:                                                scbol[i] = mkbranch( scbol[i],
                    253:                                                                        pat );
                    254:                                }
                    255:
                    256:                        if ( ! bol_needed )
                    257:                                {
                    258:                                bol_needed = true;
                    259:
                    260:                                if ( performance_report > 1 )
                    261:                                        pinpoint_message(
                    262:                        "'^' operator results in sub-optimal performance" );
                    263:                                }
                    264:                        }
                    265:
                    266:                |  rule
                    267:                        {
                    268:                        pat = $1;
                    269:                        finish_rule( pat, variable_trail_rule,
1.9       tedu      270:                                headcnt, trailcnt , previous_continued_action);
1.1       deraadt   271:
                    272:                        if ( scon_stk_ptr > 0 )
                    273:                                {
                    274:                                for ( i = 1; i <= scon_stk_ptr; ++i )
                    275:                                        scset[scon_stk[i]] =
                    276:                                                mkbranch( scset[scon_stk[i]],
                    277:                                                                pat );
                    278:                                }
                    279:
                    280:                        else
                    281:                                {
                    282:                                for ( i = 1; i <= lastsc; ++i )
                    283:                                        if ( ! scxclu[i] )
                    284:                                                scset[i] =
                    285:                                                        mkbranch( scset[i],
                    286:                                                                pat );
                    287:                                }
                    288:                        }
                    289:
                    290:                |  EOF_OP
                    291:                        {
                    292:                        if ( scon_stk_ptr > 0 )
                    293:                                build_eof_action();
                    294:
                    295:                        else
                    296:                                {
                    297:                                /* This EOF applies to all start conditions
                    298:                                 * which don't already have EOF actions.
                    299:                                 */
                    300:                                for ( i = 1; i <= lastsc; ++i )
                    301:                                        if ( ! sceof[i] )
                    302:                                                scon_stk[++scon_stk_ptr] = i;
                    303:
                    304:                                if ( scon_stk_ptr == 0 )
                    305:                                        warn(
                    306:                        "all start conditions already have <<EOF>> rules" );
                    307:
                    308:                                else
                    309:                                        build_eof_action();
                    310:                                }
                    311:                        }
                    312:
                    313:                |  error
1.9       tedu      314:                        { synerr( _("unrecognized rule") ); }
1.1       deraadt   315:                ;
                    316:
                    317: scon_stk_ptr   :
                    318:                        { $$ = scon_stk_ptr; }
                    319:                ;
                    320:
                    321: scon           :  '<' scon_stk_ptr namelist2 '>'
                    322:                        { $$ = $2; }
                    323:
                    324:                |  '<' '*' '>'
                    325:                        {
                    326:                        $$ = scon_stk_ptr;
                    327:
                    328:                        for ( i = 1; i <= lastsc; ++i )
                    329:                                {
                    330:                                int j;
                    331:
                    332:                                for ( j = 1; j <= scon_stk_ptr; ++j )
                    333:                                        if ( scon_stk[j] == i )
                    334:                                                break;
                    335:
                    336:                                if ( j > scon_stk_ptr )
                    337:                                        scon_stk[++scon_stk_ptr] = i;
                    338:                                }
                    339:                        }
                    340:
                    341:                |
                    342:                        { $$ = scon_stk_ptr; }
                    343:                ;
                    344:
                    345: namelist2      :  namelist2 ',' sconname
                    346:
                    347:                |  sconname
                    348:
                    349:                |  error
1.9       tedu      350:                        { synerr( _("bad start condition list") ); }
1.1       deraadt   351:                ;
                    352:
                    353: sconname       :  NAME
                    354:                        {
                    355:                        if ( (scnum = sclookup( nmstr )) == 0 )
                    356:                                format_pinpoint_message(
                    357:                                        "undeclared start condition %s",
                    358:                                        nmstr );
                    359:                        else
                    360:                                {
                    361:                                for ( i = 1; i <= scon_stk_ptr; ++i )
                    362:                                        if ( scon_stk[i] == scnum )
                    363:                                                {
                    364:                                                format_warn(
                    365:                                                        "<%s> specified twice",
                    366:                                                        scname[scnum] );
                    367:                                                break;
                    368:                                                }
                    369:
                    370:                                if ( i > scon_stk_ptr )
                    371:                                        scon_stk[++scon_stk_ptr] = scnum;
                    372:                                }
                    373:                        }
                    374:                ;
                    375:
                    376: rule           :  re2 re
                    377:                        {
                    378:                        if ( transchar[lastst[$2]] != SYM_EPSILON )
                    379:                                /* Provide final transition \now/ so it
                    380:                                 * will be marked as a trailing context
                    381:                                 * state.
                    382:                                 */
                    383:                                $2 = link_machines( $2,
                    384:                                                mkstate( SYM_EPSILON ) );
                    385:
                    386:                        mark_beginning_as_normal( $2 );
                    387:                        current_state_type = STATE_NORMAL;
                    388:
                    389:                        if ( previous_continued_action )
                    390:                                {
                    391:                                /* We need to treat this as variable trailing
                    392:                                 * context so that the backup does not happen
                    393:                                 * in the action but before the action switch
                    394:                                 * statement.  If the backup happens in the
                    395:                                 * action, then the rules "falling into" this
                    396:                                 * one's action will *also* do the backup,
                    397:                                 * erroneously.
                    398:                                 */
                    399:                                if ( ! varlength || headcnt != 0 )
                    400:                                        warn(
                    401:                "trailing context made variable due to preceding '|' action" );
                    402:
                    403:                                /* Mark as variable. */
                    404:                                varlength = true;
                    405:                                headcnt = 0;
1.9       tedu      406:
1.1       deraadt   407:                                }
                    408:
                    409:                        if ( lex_compat || (varlength && headcnt == 0) )
                    410:                                { /* variable trailing context rule */
                    411:                                /* Mark the first part of the rule as the
                    412:                                 * accepting "head" part of a trailing
                    413:                                 * context rule.
                    414:                                 *
                    415:                                 * By the way, we didn't do this at the
                    416:                                 * beginning of this production because back
                    417:                                 * then current_state_type was set up for a
                    418:                                 * trail rule, and add_accept() can create
                    419:                                 * a new state ...
                    420:                                 */
                    421:                                add_accept( $1,
                    422:                                        num_rules | YY_TRAILING_HEAD_MASK );
                    423:                                variable_trail_rule = true;
                    424:                                }
                    425:
                    426:                        else
                    427:                                trailcnt = rulelen;
                    428:
                    429:                        $$ = link_machines( $1, $2 );
                    430:                        }
                    431:
                    432:                |  re2 re '$'
1.9       tedu      433:                        { synerr( _("trailing context used twice") ); }
1.1       deraadt   434:
                    435:                |  re '$'
                    436:                        {
                    437:                        headcnt = 0;
                    438:                        trailcnt = 1;
                    439:                        rulelen = 1;
                    440:                        varlength = false;
                    441:
                    442:                        current_state_type = STATE_TRAILING_CONTEXT;
                    443:
                    444:                        if ( trlcontxt )
                    445:                                {
1.9       tedu      446:                                synerr( _("trailing context used twice") );
1.1       deraadt   447:                                $$ = mkstate( SYM_EPSILON );
                    448:                                }
                    449:
                    450:                        else if ( previous_continued_action )
                    451:                                {
                    452:                                /* See the comment in the rule for "re2 re"
                    453:                                 * above.
                    454:                                 */
                    455:                                warn(
                    456:                "trailing context made variable due to preceding '|' action" );
                    457:
                    458:                                varlength = true;
                    459:                                }
                    460:
                    461:                        if ( lex_compat || varlength )
                    462:                                {
                    463:                                /* Again, see the comment in the rule for
                    464:                                 * "re2 re" above.
                    465:                                 */
                    466:                                add_accept( $1,
                    467:                                        num_rules | YY_TRAILING_HEAD_MASK );
                    468:                                variable_trail_rule = true;
                    469:                                }
                    470:
                    471:                        trlcontxt = true;
                    472:
                    473:                        eps = mkstate( SYM_EPSILON );
                    474:                        $$ = link_machines( $1,
                    475:                                link_machines( eps, mkstate( '\n' ) ) );
                    476:                        }
                    477:
                    478:                |  re
                    479:                        {
                    480:                        $$ = $1;
                    481:
                    482:                        if ( trlcontxt )
                    483:                                {
                    484:                                if ( lex_compat || (varlength && headcnt == 0) )
                    485:                                        /* Both head and trail are
                    486:                                         * variable-length.
                    487:                                         */
                    488:                                        variable_trail_rule = true;
                    489:                                else
                    490:                                        trailcnt = rulelen;
                    491:                                }
                    492:                        }
                    493:                ;
                    494:
                    495:
                    496: re             :  re '|' series
                    497:                        {
                    498:                        varlength = true;
                    499:                        $$ = mkor( $1, $3 );
                    500:                        }
                    501:
                    502:                |  series
                    503:                        { $$ = $1; }
                    504:                ;
                    505:
                    506:
                    507: re2            :  re '/'
                    508:                        {
                    509:                        /* This rule is written separately so the
                    510:                         * reduction will occur before the trailing
                    511:                         * series is parsed.
                    512:                         */
                    513:
                    514:                        if ( trlcontxt )
1.9       tedu      515:                                synerr( _("trailing context used twice") );
1.1       deraadt   516:                        else
                    517:                                trlcontxt = true;
                    518:
                    519:                        if ( varlength )
                    520:                                /* We hope the trailing context is
                    521:                                 * fixed-length.
                    522:                                 */
                    523:                                varlength = false;
                    524:                        else
                    525:                                headcnt = rulelen;
                    526:
                    527:                        rulelen = 0;
                    528:
                    529:                        current_state_type = STATE_TRAILING_CONTEXT;
                    530:                        $$ = $1;
                    531:                        }
                    532:                ;
                    533:
                    534: series         :  series singleton
                    535:                        {
                    536:                        /* This is where concatenation of adjacent patterns
                    537:                         * gets done.
                    538:                         */
                    539:                        $$ = link_machines( $1, $2 );
                    540:                        }
                    541:
                    542:                |  singleton
                    543:                        { $$ = $1; }
1.9       tedu      544:
                    545:                |  series BEGIN_REPEAT_POSIX NUMBER ',' NUMBER END_REPEAT_POSIX
                    546:                        {
                    547:                        varlength = true;
                    548:
                    549:                        if ( $3 > $5 || $3 < 0 )
                    550:                                {
                    551:                                synerr( _("bad iteration values") );
                    552:                                $$ = $1;
                    553:                                }
                    554:                        else
                    555:                                {
                    556:                                if ( $3 == 0 )
                    557:                                        {
                    558:                                        if ( $5 <= 0 )
                    559:                                                {
                    560:                                                synerr(
                    561:                                                _("bad iteration values") );
                    562:                                                $$ = $1;
                    563:                                                }
                    564:                                        else
                    565:                                                $$ = mkopt(
                    566:                                                        mkrep( $1, 1, $5 ) );
                    567:                                        }
                    568:                                else
                    569:                                        $$ = mkrep( $1, $3, $5 );
                    570:                                }
                    571:                        }
                    572:
                    573:                |  series BEGIN_REPEAT_POSIX NUMBER ',' END_REPEAT_POSIX
                    574:                        {
                    575:                        varlength = true;
                    576:
                    577:                        if ( $3 <= 0 )
                    578:                                {
                    579:                                synerr( _("iteration value must be positive") );
                    580:                                $$ = $1;
                    581:                                }
                    582:
                    583:                        else
                    584:                                $$ = mkrep( $1, $3, INFINITE_REPEAT );
                    585:                        }
                    586:
                    587:                |  series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX
                    588:                        {
                    589:                        /* The series could be something like "(foo)",
                    590:                         * in which case we have no idea what its length
                    591:                         * is, so we punt here.
                    592:                         */
                    593:                        varlength = true;
                    594:
                    595:                        if ( $3 <= 0 )
                    596:                                {
                    597:                                  synerr( _("iteration value must be positive")
                    598:                                          );
                    599:                                $$ = $1;
                    600:                                }
                    601:
                    602:                        else
                    603:                                $$ = link_machines( $1,
                    604:                                                copysingl( $1, $3 - 1 ) );
                    605:                        }
                    606:
1.1       deraadt   607:                ;
                    608:
                    609: singleton      :  singleton '*'
                    610:                        {
                    611:                        varlength = true;
                    612:
                    613:                        $$ = mkclos( $1 );
                    614:                        }
                    615:
                    616:                |  singleton '+'
                    617:                        {
                    618:                        varlength = true;
                    619:                        $$ = mkposcl( $1 );
                    620:                        }
                    621:
                    622:                |  singleton '?'
                    623:                        {
                    624:                        varlength = true;
                    625:                        $$ = mkopt( $1 );
                    626:                        }
                    627:
1.9       tedu      628:                |  singleton BEGIN_REPEAT_FLEX NUMBER ',' NUMBER END_REPEAT_FLEX
1.1       deraadt   629:                        {
                    630:                        varlength = true;
                    631:
                    632:                        if ( $3 > $5 || $3 < 0 )
                    633:                                {
1.9       tedu      634:                                synerr( _("bad iteration values") );
1.1       deraadt   635:                                $$ = $1;
                    636:                                }
                    637:                        else
                    638:                                {
                    639:                                if ( $3 == 0 )
                    640:                                        {
                    641:                                        if ( $5 <= 0 )
                    642:                                                {
                    643:                                                synerr(
1.9       tedu      644:                                                _("bad iteration values") );
1.1       deraadt   645:                                                $$ = $1;
                    646:                                                }
                    647:                                        else
                    648:                                                $$ = mkopt(
                    649:                                                        mkrep( $1, 1, $5 ) );
                    650:                                        }
                    651:                                else
                    652:                                        $$ = mkrep( $1, $3, $5 );
                    653:                                }
                    654:                        }
                    655:
1.9       tedu      656:                |  singleton BEGIN_REPEAT_FLEX NUMBER ',' END_REPEAT_FLEX
1.1       deraadt   657:                        {
                    658:                        varlength = true;
                    659:
                    660:                        if ( $3 <= 0 )
                    661:                                {
1.9       tedu      662:                                synerr( _("iteration value must be positive") );
1.1       deraadt   663:                                $$ = $1;
                    664:                                }
                    665:
                    666:                        else
1.9       tedu      667:                                $$ = mkrep( $1, $3, INFINITE_REPEAT );
1.1       deraadt   668:                        }
                    669:
1.9       tedu      670:                |  singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX
1.1       deraadt   671:                        {
                    672:                        /* The singleton could be something like "(foo)",
                    673:                         * in which case we have no idea what its length
                    674:                         * is, so we punt here.
                    675:                         */
                    676:                        varlength = true;
                    677:
                    678:                        if ( $3 <= 0 )
                    679:                                {
1.9       tedu      680:                                synerr( _("iteration value must be positive") );
1.1       deraadt   681:                                $$ = $1;
                    682:                                }
                    683:
                    684:                        else
                    685:                                $$ = link_machines( $1,
                    686:                                                copysingl( $1, $3 - 1 ) );
                    687:                        }
                    688:
                    689:                |  '.'
                    690:                        {
                    691:                        if ( ! madeany )
                    692:                                {
                    693:                                /* Create the '.' character class. */
1.9       tedu      694:                     ccldot = cclinit();
                    695:                     ccladd( ccldot, '\n' );
                    696:                     cclnegate( ccldot );
                    697:
                    698:                     if ( useecs )
                    699:                         mkeccl( ccltbl + cclmap[ccldot],
                    700:                             ccllen[ccldot], nextecm,
                    701:                             ecgroup, csize, csize );
                    702:
                    703:                                /* Create the (?s:'.') character class. */
                    704:                     cclany = cclinit();
                    705:                     cclnegate( cclany );
                    706:
                    707:                     if ( useecs )
                    708:                         mkeccl( ccltbl + cclmap[cclany],
                    709:                             ccllen[cclany], nextecm,
                    710:                             ecgroup, csize, csize );
1.1       deraadt   711:
                    712:                                madeany = true;
                    713:                                }
                    714:
                    715:                        ++rulelen;
                    716:
1.9       tedu      717:             if (sf_dot_all())
                    718:                 $$ = mkstate( -cclany );
                    719:             else
                    720:                 $$ = mkstate( -ccldot );
1.1       deraadt   721:                        }
                    722:
                    723:                |  fullccl
                    724:                        {
1.9       tedu      725:                                /* Sort characters for fast searching.
1.1       deraadt   726:                                 */
1.9       tedu      727:                                qsort( ccltbl + cclmap[$1], ccllen[$1], sizeof (*ccltbl), cclcmp );
1.1       deraadt   728:
                    729:                        if ( useecs )
                    730:                                mkeccl( ccltbl + cclmap[$1], ccllen[$1],
                    731:                                        nextecm, ecgroup, csize, csize );
                    732:
                    733:                        ++rulelen;
                    734:
1.9       tedu      735:                        if (ccl_has_nl[$1])
                    736:                                rule_has_nl[num_rules] = true;
                    737:
1.1       deraadt   738:                        $$ = mkstate( -$1 );
                    739:                        }
                    740:
                    741:                |  PREVCCL
                    742:                        {
                    743:                        ++rulelen;
                    744:
1.9       tedu      745:                        if (ccl_has_nl[$1])
                    746:                                rule_has_nl[num_rules] = true;
                    747:
1.1       deraadt   748:                        $$ = mkstate( -$1 );
                    749:                        }
                    750:
                    751:                |  '"' string '"'
                    752:                        { $$ = $2; }
                    753:
                    754:                |  '(' re ')'
                    755:                        { $$ = $2; }
                    756:
                    757:                |  CHAR
                    758:                        {
                    759:                        ++rulelen;
                    760:
1.9       tedu      761:                        if ($1 == nlch)
                    762:                                rule_has_nl[num_rules] = true;
1.1       deraadt   763:
1.9       tedu      764:             if (sf_case_ins() && has_case($1))
                    765:                 /* create an alternation, as in (a|A) */
                    766:                 $$ = mkor (mkstate($1), mkstate(reverse_case($1)));
                    767:             else
                    768:                 $$ = mkstate( $1 );
1.1       deraadt   769:                        }
                    770:                ;
1.9       tedu      771: fullccl:
                    772:         fullccl CCL_OP_DIFF  braceccl  { $$ = ccl_set_diff  ($1, $3); }
                    773:     |   fullccl CCL_OP_UNION braceccl  { $$ = ccl_set_union ($1, $3); }
                    774:     |   braceccl
                    775:     ;
1.1       deraadt   776:
1.9       tedu      777: braceccl:
                    778:
                    779:             '[' ccl ']' { $$ = $2; }
1.1       deraadt   780:
                    781:                |  '[' '^' ccl ']'
                    782:                        {
                    783:                        cclnegate( $3 );
                    784:                        $$ = $3;
                    785:                        }
                    786:                ;
                    787:
                    788: ccl            :  ccl CHAR '-' CHAR
                    789:                        {
1.9       tedu      790:
                    791:                        if (sf_case_ins())
                    792:                          {
                    793:
                    794:                            /* If one end of the range has case and the other
                    795:                             * does not, or the cases are different, then we're not
                    796:                             * sure what range the user is trying to express.
                    797:                             * Examples: [@-z] or [S-t]
                    798:                             */
                    799:                            if (has_case ($2) != has_case ($4)
                    800:                                     || (has_case ($2) && (b_islower ($2) != b_islower ($4)))
                    801:                                     || (has_case ($2) && (b_isupper ($2) != b_isupper ($4))))
                    802:                              format_warn3 (
                    803:                              _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
                    804:                                            $2, $4);
                    805:
                    806:                            /* If the range spans uppercase characters but not
                    807:                             * lowercase (or vice-versa), then should we automatically
                    808:                             * include lowercase characters in the range?
                    809:                             * Example: [@-_] spans [a-z] but not [A-Z]
                    810:                             */
                    811:                            else if (!has_case ($2) && !has_case ($4) && !range_covers_case ($2, $4))
                    812:                              format_warn3 (
                    813:                              _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
                    814:                                            $2, $4);
                    815:                          }
1.1       deraadt   816:
                    817:                        if ( $2 > $4 )
1.9       tedu      818:                                synerr( _("negative range in character class") );
1.1       deraadt   819:
                    820:                        else
                    821:                                {
                    822:                                for ( i = $2; i <= $4; ++i )
                    823:                                        ccladd( $1, i );
                    824:
                    825:                                /* Keep track if this ccl is staying in
                    826:                                 * alphabetical order.
                    827:                                 */
                    828:                                cclsorted = cclsorted && ($2 > lastchar);
                    829:                                lastchar = $4;
1.9       tedu      830:
                    831:                 /* Do it again for upper/lowercase */
                    832:                 if (sf_case_ins() && has_case($2) && has_case($4)){
                    833:                     $2 = reverse_case ($2);
                    834:                     $4 = reverse_case ($4);
                    835:
                    836:                     for ( i = $2; i <= $4; ++i )
                    837:                         ccladd( $1, i );
                    838:
                    839:                     cclsorted = cclsorted && ($2 > lastchar);
                    840:                     lastchar = $4;
                    841:                 }
                    842:
1.1       deraadt   843:                                }
                    844:
                    845:                        $$ = $1;
                    846:                        }
                    847:
                    848:                |  ccl CHAR
                    849:                        {
                    850:                        ccladd( $1, $2 );
                    851:                        cclsorted = cclsorted && ($2 > lastchar);
                    852:                        lastchar = $2;
1.9       tedu      853:
                    854:             /* Do it again for upper/lowercase */
                    855:             if (sf_case_ins() && has_case($2)){
                    856:                 $2 = reverse_case ($2);
                    857:                 ccladd ($1, $2);
                    858:
                    859:                 cclsorted = cclsorted && ($2 > lastchar);
                    860:                 lastchar = $2;
                    861:             }
                    862:
1.1       deraadt   863:                        $$ = $1;
                    864:                        }
                    865:
                    866:                |  ccl ccl_expr
                    867:                        {
                    868:                        /* Too hard to properly maintain cclsorted. */
                    869:                        cclsorted = false;
                    870:                        $$ = $1;
                    871:                        }
                    872:
                    873:                |
                    874:                        {
                    875:                        cclsorted = true;
                    876:                        lastchar = 0;
                    877:                        currccl = $$ = cclinit();
                    878:                        }
                    879:                ;
                    880:
1.9       tedu      881: ccl_expr:
                    882:            CCE_ALNUM   { CCL_EXPR(isalnum); }
                    883:                |  CCE_ALPHA    { CCL_EXPR(isalpha); }
1.10    ! millert   884:                |  CCE_BLANK    { CCL_EXPR(isblank); }
1.9       tedu      885:                |  CCE_CNTRL    { CCL_EXPR(iscntrl); }
                    886:                |  CCE_DIGIT    { CCL_EXPR(isdigit); }
                    887:                |  CCE_GRAPH    { CCL_EXPR(isgraph); }
                    888:                |  CCE_LOWER    {
                    889:                           CCL_EXPR(islower);
                    890:                           if (sf_case_ins())
                    891:                               CCL_EXPR(isupper);
                    892:                         }
                    893:                |  CCE_PRINT    { CCL_EXPR(isprint); }
                    894:                |  CCE_PUNCT    { CCL_EXPR(ispunct); }
                    895:                |  CCE_SPACE    { CCL_EXPR(isspace); }
                    896:                |  CCE_XDIGIT   { CCL_EXPR(isxdigit); }
1.1       deraadt   897:                |  CCE_UPPER    {
1.9       tedu      898:                     CCL_EXPR(isupper);
                    899:                     if (sf_case_ins())
                    900:                         CCL_EXPR(islower);
                    901:                                }
                    902:
                    903:         |  CCE_NEG_ALNUM       { CCL_NEG_EXPR(isalnum); }
                    904:                |  CCE_NEG_ALPHA        { CCL_NEG_EXPR(isalpha); }
1.10    ! millert   905:                |  CCE_NEG_BLANK        { CCL_NEG_EXPR(isblank); }
1.9       tedu      906:                |  CCE_NEG_CNTRL        { CCL_NEG_EXPR(iscntrl); }
                    907:                |  CCE_NEG_DIGIT        { CCL_NEG_EXPR(isdigit); }
                    908:                |  CCE_NEG_GRAPH        { CCL_NEG_EXPR(isgraph); }
                    909:                |  CCE_NEG_PRINT        { CCL_NEG_EXPR(isprint); }
                    910:                |  CCE_NEG_PUNCT        { CCL_NEG_EXPR(ispunct); }
                    911:                |  CCE_NEG_SPACE        { CCL_NEG_EXPR(isspace); }
                    912:                |  CCE_NEG_XDIGIT       { CCL_NEG_EXPR(isxdigit); }
                    913:                |  CCE_NEG_LOWER        {
                    914:                                if ( sf_case_ins() )
                    915:                                        warn(_("[:^lower:] is ambiguous in case insensitive scanner"));
1.1       deraadt   916:                                else
1.9       tedu      917:                                        CCL_NEG_EXPR(islower);
                    918:                                }
                    919:                |  CCE_NEG_UPPER        {
                    920:                                if ( sf_case_ins() )
                    921:                                        warn(_("[:^upper:] ambiguous in case insensitive scanner"));
                    922:                                else
                    923:                                        CCL_NEG_EXPR(isupper);
1.1       deraadt   924:                                }
                    925:                ;
                    926:
                    927: string         :  string CHAR
                    928:                        {
1.9       tedu      929:                        if ( $2 == nlch )
                    930:                                rule_has_nl[num_rules] = true;
1.1       deraadt   931:
                    932:                        ++rulelen;
                    933:
1.9       tedu      934:             if (sf_case_ins() && has_case($2))
                    935:                 $$ = mkor (mkstate($2), mkstate(reverse_case($2)));
                    936:             else
                    937:                 $$ = mkstate ($2);
                    938:
                    939:                        $$ = link_machines( $1, $$);
1.1       deraadt   940:                        }
                    941:
                    942:                |
                    943:                        { $$ = mkstate( SYM_EPSILON ); }
                    944:                ;
                    945:
                    946: %%
                    947:
                    948:
                    949: /* build_eof_action - build the "<<EOF>>" action for the active start
                    950:  *                    conditions
                    951:  */
                    952:
                    953: void build_eof_action()
                    954:        {
1.9       tedu      955:        int i;
1.1       deraadt   956:        char action_text[MAXLINE];
                    957:
                    958:        for ( i = 1; i <= scon_stk_ptr; ++i )
                    959:                {
                    960:                if ( sceof[scon_stk[i]] )
                    961:                        format_pinpoint_message(
                    962:                                "multiple <<EOF>> rules for start condition %s",
                    963:                                scname[scon_stk[i]] );
                    964:
                    965:                else
                    966:                        {
                    967:                        sceof[scon_stk[i]] = true;
1.9       tedu      968:
                    969:                        if (previous_continued_action /* && previous action was regular */)
                    970:                                add_action("YY_RULE_SETUP\n");
                    971:
                    972:                        snprintf( action_text, sizeof(action_text), "case YY_STATE_EOF(%s):\n",
1.1       deraadt   973:                                scname[scon_stk[i]] );
                    974:                        add_action( action_text );
                    975:                        }
                    976:                }
                    977:
                    978:        line_directive_out( (FILE *) 0, 1 );
                    979:
                    980:        /* This isn't a normal rule after all - don't count it as
                    981:         * such, so we don't have any holes in the rule numbering
                    982:         * (which make generating "rule can never match" warnings
                    983:         * more difficult.
                    984:         */
                    985:        --num_rules;
                    986:        ++num_eof_rules;
                    987:        }
                    988:
                    989:
                    990: /* format_synerr - write out formatted syntax error */
                    991:
                    992: void format_synerr( msg, arg )
1.9       tedu      993: const char *msg, arg[];
1.1       deraadt   994:        {
                    995:        char errmsg[MAXLINE];
                    996:
1.9       tedu      997:        (void) snprintf( errmsg, sizeof(errmsg), msg, arg );
1.1       deraadt   998:        synerr( errmsg );
                    999:        }
                   1000:
                   1001:
                   1002: /* synerr - report a syntax error */
                   1003:
                   1004: void synerr( str )
1.9       tedu     1005: const char *str;
1.1       deraadt  1006:        {
                   1007:        syntaxerror = true;
                   1008:        pinpoint_message( str );
                   1009:        }
                   1010:
                   1011:
                   1012: /* format_warn - write out formatted warning */
                   1013:
                   1014: void format_warn( msg, arg )
1.9       tedu     1015: const char *msg, arg[];
1.1       deraadt  1016:        {
                   1017:        char warn_msg[MAXLINE];
                   1018:
1.9       tedu     1019:        snprintf( warn_msg, sizeof(warn_msg), msg, arg );
1.1       deraadt  1020:        warn( warn_msg );
                   1021:        }
                   1022:
                   1023:
                   1024: /* warn - report a warning, unless -w was given */
                   1025:
                   1026: void warn( str )
1.9       tedu     1027: const char *str;
1.1       deraadt  1028:        {
                   1029:        line_warning( str, linenum );
                   1030:        }
                   1031:
                   1032: /* format_pinpoint_message - write out a message formatted with one string,
                   1033:  *                          pinpointing its location
                   1034:  */
                   1035:
                   1036: void format_pinpoint_message( msg, arg )
1.9       tedu     1037: const char *msg, arg[];
1.1       deraadt  1038:        {
                   1039:        char errmsg[MAXLINE];
                   1040:
1.9       tedu     1041:        snprintf( errmsg, sizeof(errmsg), msg, arg );
1.1       deraadt  1042:        pinpoint_message( errmsg );
                   1043:        }
                   1044:
                   1045:
                   1046: /* pinpoint_message - write out a message, pinpointing its location */
                   1047:
                   1048: void pinpoint_message( str )
1.9       tedu     1049: const char *str;
1.1       deraadt  1050:        {
                   1051:        line_pinpoint( str, linenum );
                   1052:        }
                   1053:
                   1054:
                   1055: /* line_warning - report a warning at a given line, unless -w was given */
                   1056:
                   1057: void line_warning( str, line )
1.9       tedu     1058: const char *str;
1.1       deraadt  1059: int line;
                   1060:        {
                   1061:        char warning[MAXLINE];
                   1062:
                   1063:        if ( ! nowarn )
                   1064:                {
1.9       tedu     1065:                snprintf( warning, sizeof(warning), "warning, %s", str );
1.1       deraadt  1066:                line_pinpoint( warning, line );
                   1067:                }
                   1068:        }
                   1069:
                   1070:
                   1071: /* line_pinpoint - write out a message, pinpointing it at the given line */
                   1072:
                   1073: void line_pinpoint( str, line )
1.9       tedu     1074: const char *str;
1.1       deraadt  1075: int line;
                   1076:        {
1.9       tedu     1077:        fprintf( stderr, "%s:%d: %s\n", infilename, line, str );
1.1       deraadt  1078:        }
                   1079:
                   1080:
                   1081: /* yyerror - eat up an error message from the parser;
                   1082:  *          currently, messages are ignore
                   1083:  */
                   1084:
                   1085: void yyerror( msg )
1.9       tedu     1086: const char *msg;
1.1       deraadt  1087:        {
                   1088:        }