Annotation of src/usr.bin/lex/parse.y, Revision 1.9
1.9 ! tedu 1: /* $OpenBSD: parse.y,v 1.8 2003/06/04 17:34:44 millert Exp $ */
1.2 deraadt 2:
1.1 deraadt 3: /* parse.y - parser for flex input */
4:
5: %token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
1.9 ! tedu 6: %token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS OPT_HEADER OPT_EXTRA_TYPE
! 7: %token OPT_TABLES
1.1 deraadt 8:
9: %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
10: %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
11:
1.9 ! tedu 12: %token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH
! 13: %token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT
! 14:
! 15: %left CCL_OP_DIFF CCL_OP_UNION
! 16:
! 17: /*
! 18: *POSIX and AT&T lex place the
! 19: * precedence of the repeat operator, {}, below that of concatenation.
! 20: * Thus, ab{3} is ababab. Most other POSIX utilities use an Extended
! 21: * Regular Expression (ERE) precedence that has the repeat operator
! 22: * higher than concatenation. This causes ab{3} to yield abbb.
1.8 millert 23: *
1.9 ! tedu 24: * In order to support the POSIX and AT&T precedence and the flex
! 25: * precedence we define two token sets for the begin and end tokens of
! 26: * the repeat operator, '{' and '}'. The lexical scanner chooses
! 27: * which tokens to return based on whether posix_compat or lex_compat
! 28: * are specified. Specifying either posix_compat or lex_compat will
! 29: * cause flex to parse scanner files as per the AT&T and
! 30: * POSIX-mandated behavior.
1.1 deraadt 31: */
32:
1.9 ! tedu 33: %token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX
1.1 deraadt 34:
35:
1.9 ! tedu 36: %{
! 37: /* Copyright (c) 1990 The Regents of the University of California. */
! 38: /* All rights reserved. */
1.1 deraadt 39:
1.9 ! tedu 40: /* This code is derived from software contributed to Berkeley by */
! 41: /* Vern Paxson. */
1.1 deraadt 42:
1.9 ! tedu 43: /* The United States Government has rights in this work pursuant */
! 44: /* to contract no. DE-AC03-76SF00098 between the United States */
! 45: /* Department of Energy and the University of California. */
! 46:
! 47: /* This file is part of flex. */
! 48:
! 49: /* Redistribution and use in source and binary forms, with or without */
! 50: /* modification, are permitted provided that the following conditions */
! 51: /* are met: */
! 52:
! 53: /* 1. Redistributions of source code must retain the above copyright */
! 54: /* notice, this list of conditions and the following disclaimer. */
! 55: /* 2. Redistributions in binary form must reproduce the above copyright */
! 56: /* notice, this list of conditions and the following disclaimer in the */
! 57: /* documentation and/or other materials provided with the distribution. */
! 58:
! 59: /* Neither the name of the University nor the names of its contributors */
! 60: /* may be used to endorse or promote products derived from this software */
! 61: /* without specific prior written permission. */
! 62:
! 63: /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
! 64: /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
! 65: /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
! 66: /* PURPOSE. */
1.1 deraadt 67:
1.9 ! tedu 68: #include "flexdef.h"
! 69: #include "tables.h"
1.1 deraadt 70:
1.9 ! tedu 71: int pat, scnum, eps, headcnt, trailcnt, lastchar, i, rulelen;
1.1 deraadt 72: int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule;
73:
74: int *scon_stk;
75: int scon_stk_ptr;
76:
77: static int madeany = false; /* whether we've made the '.' character class */
1.9 ! tedu 78: static int ccldot, cclany;
1.1 deraadt 79: int previous_continued_action; /* whether the previous rule's action was '|' */
80:
1.9 ! tedu 81: #define format_warn3(fmt, a1, a2) \
! 82: do{ \
! 83: char fw3_msg[MAXLINE];\
! 84: snprintf( fw3_msg, MAXLINE,(fmt), (a1), (a2) );\
! 85: warn( fw3_msg );\
! 86: }while(0)
! 87:
1.1 deraadt 88: /* Expand a POSIX character class expression. */
89: #define CCL_EXPR(func) \
1.9 ! tedu 90: do{ \
1.1 deraadt 91: int c; \
92: for ( c = 0; c < csize; ++c ) \
93: if ( isascii(c) && func(c) ) \
94: ccladd( currccl, c ); \
1.9 ! tedu 95: }while(0)
! 96:
! 97: /* negated class */
! 98: #define CCL_NEG_EXPR(func) \
! 99: do{ \
! 100: int c; \
! 101: for ( c = 0; c < csize; ++c ) \
! 102: if ( !func(c) ) \
! 103: ccladd( currccl, c ); \
! 104: }while(0)
1.1 deraadt 105:
106: /* While POSIX defines isblank(), it's not ANSI C. */
107: #define IS_BLANK(c) ((c) == ' ' || (c) == '\t')
108:
109: /* On some over-ambitious machines, such as DEC Alpha's, the default
110: * token type is "long" instead of "int"; this leads to problems with
111: * declaring yylval in flexdef.h. But so far, all the yacc's I've seen
112: * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the
113: * following should ensure that the default token type is "int".
114: */
115: #define YYSTYPE int
116:
117: %}
118:
119: %%
120: goal : initlex sect1 sect1end sect2 initforrule
121: { /* add default rule */
122: int def_rule;
123:
124: pat = cclinit();
125: cclnegate( pat );
126:
127: def_rule = mkstate( -pat );
128:
129: /* Remember the number of the default rule so we
130: * don't generate "can't match" warnings for it.
131: */
132: default_rule = num_rules;
133:
1.9 ! tedu 134: finish_rule( def_rule, false, 0, 0, 0);
1.1 deraadt 135:
136: for ( i = 1; i <= lastsc; ++i )
137: scset[i] = mkbranch( scset[i], def_rule );
138:
139: if ( spprdflt )
140: add_action(
141: "YY_FATAL_ERROR( \"flex scanner jammed\" )" );
142: else
143: add_action( "ECHO" );
144:
145: add_action( ";\n\tYY_BREAK\n" );
146: }
147: ;
148:
149: initlex :
150: { /* initialize for processing rules */
151:
152: /* Create default DFA start condition. */
153: scinstal( "INITIAL", false );
154: }
155: ;
156:
157: sect1 : sect1 startconddecl namelist1
158: | sect1 options
159: |
160: | error
1.9 ! tedu 161: { synerr( _("unknown error processing section 1") ); }
1.1 deraadt 162: ;
163:
164: sect1end : SECTEND
165: {
166: check_options();
167: scon_stk = allocate_integer_array( lastsc + 1 );
168: scon_stk_ptr = 0;
169: }
170: ;
171:
172: startconddecl : SCDECL
173: { xcluflg = false; }
174:
175: | XSCDECL
176: { xcluflg = true; }
177: ;
178:
179: namelist1 : namelist1 NAME
180: { scinstal( nmstr, xcluflg ); }
181:
182: | NAME
183: { scinstal( nmstr, xcluflg ); }
184:
185: | error
1.9 ! tedu 186: { synerr( _("bad start condition list") ); }
1.1 deraadt 187: ;
188:
189: options : OPTION_OP optionlist
190: ;
191:
192: optionlist : optionlist option
193: |
194: ;
195:
196: option : OPT_OUTFILE '=' NAME
197: {
198: outfilename = copy_string( nmstr );
199: did_outfilename = 1;
200: }
1.9 ! tedu 201: | OPT_EXTRA_TYPE '=' NAME
! 202: { extra_type = copy_string( nmstr ); }
1.1 deraadt 203: | OPT_PREFIX '=' NAME
204: { prefix = copy_string( nmstr ); }
205: | OPT_YYCLASS '=' NAME
206: { yyclass = copy_string( nmstr ); }
1.9 ! tedu 207: | OPT_HEADER '=' NAME
! 208: { headerfilename = copy_string( nmstr ); }
! 209: | OPT_TABLES '=' NAME
! 210: { tablesext = true; tablesfilename = copy_string( nmstr ); }
1.1 deraadt 211: ;
212:
213: sect2 : sect2 scon initforrule flexrule '\n'
214: { scon_stk_ptr = $2; }
215: | sect2 scon '{' sect2 '}'
216: { scon_stk_ptr = $2; }
217: |
218: ;
219:
220: initforrule :
221: {
222: /* Initialize for a parse of one rule. */
223: trlcontxt = variable_trail_rule = varlength = false;
224: trailcnt = headcnt = rulelen = 0;
225: current_state_type = STATE_NORMAL;
226: previous_continued_action = continued_action;
227: in_rule = true;
228:
229: new_rule();
230: }
231: ;
232:
233: flexrule : '^' rule
234: {
235: pat = $2;
236: finish_rule( pat, variable_trail_rule,
1.9 ! tedu 237: headcnt, trailcnt , previous_continued_action);
1.1 deraadt 238:
239: if ( scon_stk_ptr > 0 )
240: {
241: for ( i = 1; i <= scon_stk_ptr; ++i )
242: scbol[scon_stk[i]] =
243: mkbranch( scbol[scon_stk[i]],
244: pat );
245: }
246:
247: else
248: {
249: /* Add to all non-exclusive start conditions,
250: * including the default (0) start condition.
251: */
252:
253: for ( i = 1; i <= lastsc; ++i )
254: if ( ! scxclu[i] )
255: scbol[i] = mkbranch( scbol[i],
256: pat );
257: }
258:
259: if ( ! bol_needed )
260: {
261: bol_needed = true;
262:
263: if ( performance_report > 1 )
264: pinpoint_message(
265: "'^' operator results in sub-optimal performance" );
266: }
267: }
268:
269: | rule
270: {
271: pat = $1;
272: finish_rule( pat, variable_trail_rule,
1.9 ! tedu 273: headcnt, trailcnt , previous_continued_action);
1.1 deraadt 274:
275: if ( scon_stk_ptr > 0 )
276: {
277: for ( i = 1; i <= scon_stk_ptr; ++i )
278: scset[scon_stk[i]] =
279: mkbranch( scset[scon_stk[i]],
280: pat );
281: }
282:
283: else
284: {
285: for ( i = 1; i <= lastsc; ++i )
286: if ( ! scxclu[i] )
287: scset[i] =
288: mkbranch( scset[i],
289: pat );
290: }
291: }
292:
293: | EOF_OP
294: {
295: if ( scon_stk_ptr > 0 )
296: build_eof_action();
297:
298: else
299: {
300: /* This EOF applies to all start conditions
301: * which don't already have EOF actions.
302: */
303: for ( i = 1; i <= lastsc; ++i )
304: if ( ! sceof[i] )
305: scon_stk[++scon_stk_ptr] = i;
306:
307: if ( scon_stk_ptr == 0 )
308: warn(
309: "all start conditions already have <<EOF>> rules" );
310:
311: else
312: build_eof_action();
313: }
314: }
315:
316: | error
1.9 ! tedu 317: { synerr( _("unrecognized rule") ); }
1.1 deraadt 318: ;
319:
320: scon_stk_ptr :
321: { $$ = scon_stk_ptr; }
322: ;
323:
324: scon : '<' scon_stk_ptr namelist2 '>'
325: { $$ = $2; }
326:
327: | '<' '*' '>'
328: {
329: $$ = scon_stk_ptr;
330:
331: for ( i = 1; i <= lastsc; ++i )
332: {
333: int j;
334:
335: for ( j = 1; j <= scon_stk_ptr; ++j )
336: if ( scon_stk[j] == i )
337: break;
338:
339: if ( j > scon_stk_ptr )
340: scon_stk[++scon_stk_ptr] = i;
341: }
342: }
343:
344: |
345: { $$ = scon_stk_ptr; }
346: ;
347:
348: namelist2 : namelist2 ',' sconname
349:
350: | sconname
351:
352: | error
1.9 ! tedu 353: { synerr( _("bad start condition list") ); }
1.1 deraadt 354: ;
355:
356: sconname : NAME
357: {
358: if ( (scnum = sclookup( nmstr )) == 0 )
359: format_pinpoint_message(
360: "undeclared start condition %s",
361: nmstr );
362: else
363: {
364: for ( i = 1; i <= scon_stk_ptr; ++i )
365: if ( scon_stk[i] == scnum )
366: {
367: format_warn(
368: "<%s> specified twice",
369: scname[scnum] );
370: break;
371: }
372:
373: if ( i > scon_stk_ptr )
374: scon_stk[++scon_stk_ptr] = scnum;
375: }
376: }
377: ;
378:
379: rule : re2 re
380: {
381: if ( transchar[lastst[$2]] != SYM_EPSILON )
382: /* Provide final transition \now/ so it
383: * will be marked as a trailing context
384: * state.
385: */
386: $2 = link_machines( $2,
387: mkstate( SYM_EPSILON ) );
388:
389: mark_beginning_as_normal( $2 );
390: current_state_type = STATE_NORMAL;
391:
392: if ( previous_continued_action )
393: {
394: /* We need to treat this as variable trailing
395: * context so that the backup does not happen
396: * in the action but before the action switch
397: * statement. If the backup happens in the
398: * action, then the rules "falling into" this
399: * one's action will *also* do the backup,
400: * erroneously.
401: */
402: if ( ! varlength || headcnt != 0 )
403: warn(
404: "trailing context made variable due to preceding '|' action" );
405:
406: /* Mark as variable. */
407: varlength = true;
408: headcnt = 0;
1.9 ! tedu 409:
1.1 deraadt 410: }
411:
412: if ( lex_compat || (varlength && headcnt == 0) )
413: { /* variable trailing context rule */
414: /* Mark the first part of the rule as the
415: * accepting "head" part of a trailing
416: * context rule.
417: *
418: * By the way, we didn't do this at the
419: * beginning of this production because back
420: * then current_state_type was set up for a
421: * trail rule, and add_accept() can create
422: * a new state ...
423: */
424: add_accept( $1,
425: num_rules | YY_TRAILING_HEAD_MASK );
426: variable_trail_rule = true;
427: }
428:
429: else
430: trailcnt = rulelen;
431:
432: $$ = link_machines( $1, $2 );
433: }
434:
435: | re2 re '$'
1.9 ! tedu 436: { synerr( _("trailing context used twice") ); }
1.1 deraadt 437:
438: | re '$'
439: {
440: headcnt = 0;
441: trailcnt = 1;
442: rulelen = 1;
443: varlength = false;
444:
445: current_state_type = STATE_TRAILING_CONTEXT;
446:
447: if ( trlcontxt )
448: {
1.9 ! tedu 449: synerr( _("trailing context used twice") );
1.1 deraadt 450: $$ = mkstate( SYM_EPSILON );
451: }
452:
453: else if ( previous_continued_action )
454: {
455: /* See the comment in the rule for "re2 re"
456: * above.
457: */
458: warn(
459: "trailing context made variable due to preceding '|' action" );
460:
461: varlength = true;
462: }
463:
464: if ( lex_compat || varlength )
465: {
466: /* Again, see the comment in the rule for
467: * "re2 re" above.
468: */
469: add_accept( $1,
470: num_rules | YY_TRAILING_HEAD_MASK );
471: variable_trail_rule = true;
472: }
473:
474: trlcontxt = true;
475:
476: eps = mkstate( SYM_EPSILON );
477: $$ = link_machines( $1,
478: link_machines( eps, mkstate( '\n' ) ) );
479: }
480:
481: | re
482: {
483: $$ = $1;
484:
485: if ( trlcontxt )
486: {
487: if ( lex_compat || (varlength && headcnt == 0) )
488: /* Both head and trail are
489: * variable-length.
490: */
491: variable_trail_rule = true;
492: else
493: trailcnt = rulelen;
494: }
495: }
496: ;
497:
498:
499: re : re '|' series
500: {
501: varlength = true;
502: $$ = mkor( $1, $3 );
503: }
504:
505: | series
506: { $$ = $1; }
507: ;
508:
509:
510: re2 : re '/'
511: {
512: /* This rule is written separately so the
513: * reduction will occur before the trailing
514: * series is parsed.
515: */
516:
517: if ( trlcontxt )
1.9 ! tedu 518: synerr( _("trailing context used twice") );
1.1 deraadt 519: else
520: trlcontxt = true;
521:
522: if ( varlength )
523: /* We hope the trailing context is
524: * fixed-length.
525: */
526: varlength = false;
527: else
528: headcnt = rulelen;
529:
530: rulelen = 0;
531:
532: current_state_type = STATE_TRAILING_CONTEXT;
533: $$ = $1;
534: }
535: ;
536:
537: series : series singleton
538: {
539: /* This is where concatenation of adjacent patterns
540: * gets done.
541: */
542: $$ = link_machines( $1, $2 );
543: }
544:
545: | singleton
546: { $$ = $1; }
1.9 ! tedu 547:
! 548: | series BEGIN_REPEAT_POSIX NUMBER ',' NUMBER END_REPEAT_POSIX
! 549: {
! 550: varlength = true;
! 551:
! 552: if ( $3 > $5 || $3 < 0 )
! 553: {
! 554: synerr( _("bad iteration values") );
! 555: $$ = $1;
! 556: }
! 557: else
! 558: {
! 559: if ( $3 == 0 )
! 560: {
! 561: if ( $5 <= 0 )
! 562: {
! 563: synerr(
! 564: _("bad iteration values") );
! 565: $$ = $1;
! 566: }
! 567: else
! 568: $$ = mkopt(
! 569: mkrep( $1, 1, $5 ) );
! 570: }
! 571: else
! 572: $$ = mkrep( $1, $3, $5 );
! 573: }
! 574: }
! 575:
! 576: | series BEGIN_REPEAT_POSIX NUMBER ',' END_REPEAT_POSIX
! 577: {
! 578: varlength = true;
! 579:
! 580: if ( $3 <= 0 )
! 581: {
! 582: synerr( _("iteration value must be positive") );
! 583: $$ = $1;
! 584: }
! 585:
! 586: else
! 587: $$ = mkrep( $1, $3, INFINITE_REPEAT );
! 588: }
! 589:
! 590: | series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX
! 591: {
! 592: /* The series could be something like "(foo)",
! 593: * in which case we have no idea what its length
! 594: * is, so we punt here.
! 595: */
! 596: varlength = true;
! 597:
! 598: if ( $3 <= 0 )
! 599: {
! 600: synerr( _("iteration value must be positive")
! 601: );
! 602: $$ = $1;
! 603: }
! 604:
! 605: else
! 606: $$ = link_machines( $1,
! 607: copysingl( $1, $3 - 1 ) );
! 608: }
! 609:
1.1 deraadt 610: ;
611:
612: singleton : singleton '*'
613: {
614: varlength = true;
615:
616: $$ = mkclos( $1 );
617: }
618:
619: | singleton '+'
620: {
621: varlength = true;
622: $$ = mkposcl( $1 );
623: }
624:
625: | singleton '?'
626: {
627: varlength = true;
628: $$ = mkopt( $1 );
629: }
630:
1.9 ! tedu 631: | singleton BEGIN_REPEAT_FLEX NUMBER ',' NUMBER END_REPEAT_FLEX
1.1 deraadt 632: {
633: varlength = true;
634:
635: if ( $3 > $5 || $3 < 0 )
636: {
1.9 ! tedu 637: synerr( _("bad iteration values") );
1.1 deraadt 638: $$ = $1;
639: }
640: else
641: {
642: if ( $3 == 0 )
643: {
644: if ( $5 <= 0 )
645: {
646: synerr(
1.9 ! tedu 647: _("bad iteration values") );
1.1 deraadt 648: $$ = $1;
649: }
650: else
651: $$ = mkopt(
652: mkrep( $1, 1, $5 ) );
653: }
654: else
655: $$ = mkrep( $1, $3, $5 );
656: }
657: }
658:
1.9 ! tedu 659: | singleton BEGIN_REPEAT_FLEX NUMBER ',' END_REPEAT_FLEX
1.1 deraadt 660: {
661: varlength = true;
662:
663: if ( $3 <= 0 )
664: {
1.9 ! tedu 665: synerr( _("iteration value must be positive") );
1.1 deraadt 666: $$ = $1;
667: }
668:
669: else
1.9 ! tedu 670: $$ = mkrep( $1, $3, INFINITE_REPEAT );
1.1 deraadt 671: }
672:
1.9 ! tedu 673: | singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX
1.1 deraadt 674: {
675: /* The singleton could be something like "(foo)",
676: * in which case we have no idea what its length
677: * is, so we punt here.
678: */
679: varlength = true;
680:
681: if ( $3 <= 0 )
682: {
1.9 ! tedu 683: synerr( _("iteration value must be positive") );
1.1 deraadt 684: $$ = $1;
685: }
686:
687: else
688: $$ = link_machines( $1,
689: copysingl( $1, $3 - 1 ) );
690: }
691:
692: | '.'
693: {
694: if ( ! madeany )
695: {
696: /* Create the '.' character class. */
1.9 ! tedu 697: ccldot = cclinit();
! 698: ccladd( ccldot, '\n' );
! 699: cclnegate( ccldot );
! 700:
! 701: if ( useecs )
! 702: mkeccl( ccltbl + cclmap[ccldot],
! 703: ccllen[ccldot], nextecm,
! 704: ecgroup, csize, csize );
! 705:
! 706: /* Create the (?s:'.') character class. */
! 707: cclany = cclinit();
! 708: cclnegate( cclany );
! 709:
! 710: if ( useecs )
! 711: mkeccl( ccltbl + cclmap[cclany],
! 712: ccllen[cclany], nextecm,
! 713: ecgroup, csize, csize );
1.1 deraadt 714:
715: madeany = true;
716: }
717:
718: ++rulelen;
719:
1.9 ! tedu 720: if (sf_dot_all())
! 721: $$ = mkstate( -cclany );
! 722: else
! 723: $$ = mkstate( -ccldot );
1.1 deraadt 724: }
725:
726: | fullccl
727: {
1.9 ! tedu 728: /* Sort characters for fast searching.
1.1 deraadt 729: */
1.9 ! tedu 730: qsort( ccltbl + cclmap[$1], ccllen[$1], sizeof (*ccltbl), cclcmp );
1.1 deraadt 731:
732: if ( useecs )
733: mkeccl( ccltbl + cclmap[$1], ccllen[$1],
734: nextecm, ecgroup, csize, csize );
735:
736: ++rulelen;
737:
1.9 ! tedu 738: if (ccl_has_nl[$1])
! 739: rule_has_nl[num_rules] = true;
! 740:
1.1 deraadt 741: $$ = mkstate( -$1 );
742: }
743:
744: | PREVCCL
745: {
746: ++rulelen;
747:
1.9 ! tedu 748: if (ccl_has_nl[$1])
! 749: rule_has_nl[num_rules] = true;
! 750:
1.1 deraadt 751: $$ = mkstate( -$1 );
752: }
753:
754: | '"' string '"'
755: { $$ = $2; }
756:
757: | '(' re ')'
758: { $$ = $2; }
759:
760: | CHAR
761: {
762: ++rulelen;
763:
1.9 ! tedu 764: if ($1 == nlch)
! 765: rule_has_nl[num_rules] = true;
1.1 deraadt 766:
1.9 ! tedu 767: if (sf_case_ins() && has_case($1))
! 768: /* create an alternation, as in (a|A) */
! 769: $$ = mkor (mkstate($1), mkstate(reverse_case($1)));
! 770: else
! 771: $$ = mkstate( $1 );
1.1 deraadt 772: }
773: ;
1.9 ! tedu 774: fullccl:
! 775: fullccl CCL_OP_DIFF braceccl { $$ = ccl_set_diff ($1, $3); }
! 776: | fullccl CCL_OP_UNION braceccl { $$ = ccl_set_union ($1, $3); }
! 777: | braceccl
! 778: ;
1.1 deraadt 779:
1.9 ! tedu 780: braceccl:
! 781:
! 782: '[' ccl ']' { $$ = $2; }
1.1 deraadt 783:
784: | '[' '^' ccl ']'
785: {
786: cclnegate( $3 );
787: $$ = $3;
788: }
789: ;
790:
791: ccl : ccl CHAR '-' CHAR
792: {
1.9 ! tedu 793:
! 794: if (sf_case_ins())
! 795: {
! 796:
! 797: /* If one end of the range has case and the other
! 798: * does not, or the cases are different, then we're not
! 799: * sure what range the user is trying to express.
! 800: * Examples: [@-z] or [S-t]
! 801: */
! 802: if (has_case ($2) != has_case ($4)
! 803: || (has_case ($2) && (b_islower ($2) != b_islower ($4)))
! 804: || (has_case ($2) && (b_isupper ($2) != b_isupper ($4))))
! 805: format_warn3 (
! 806: _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
! 807: $2, $4);
! 808:
! 809: /* If the range spans uppercase characters but not
! 810: * lowercase (or vice-versa), then should we automatically
! 811: * include lowercase characters in the range?
! 812: * Example: [@-_] spans [a-z] but not [A-Z]
! 813: */
! 814: else if (!has_case ($2) && !has_case ($4) && !range_covers_case ($2, $4))
! 815: format_warn3 (
! 816: _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
! 817: $2, $4);
! 818: }
1.1 deraadt 819:
820: if ( $2 > $4 )
1.9 ! tedu 821: synerr( _("negative range in character class") );
1.1 deraadt 822:
823: else
824: {
825: for ( i = $2; i <= $4; ++i )
826: ccladd( $1, i );
827:
828: /* Keep track if this ccl is staying in
829: * alphabetical order.
830: */
831: cclsorted = cclsorted && ($2 > lastchar);
832: lastchar = $4;
1.9 ! tedu 833:
! 834: /* Do it again for upper/lowercase */
! 835: if (sf_case_ins() && has_case($2) && has_case($4)){
! 836: $2 = reverse_case ($2);
! 837: $4 = reverse_case ($4);
! 838:
! 839: for ( i = $2; i <= $4; ++i )
! 840: ccladd( $1, i );
! 841:
! 842: cclsorted = cclsorted && ($2 > lastchar);
! 843: lastchar = $4;
! 844: }
! 845:
1.1 deraadt 846: }
847:
848: $$ = $1;
849: }
850:
851: | ccl CHAR
852: {
853: ccladd( $1, $2 );
854: cclsorted = cclsorted && ($2 > lastchar);
855: lastchar = $2;
1.9 ! tedu 856:
! 857: /* Do it again for upper/lowercase */
! 858: if (sf_case_ins() && has_case($2)){
! 859: $2 = reverse_case ($2);
! 860: ccladd ($1, $2);
! 861:
! 862: cclsorted = cclsorted && ($2 > lastchar);
! 863: lastchar = $2;
! 864: }
! 865:
1.1 deraadt 866: $$ = $1;
867: }
868:
869: | ccl ccl_expr
870: {
871: /* Too hard to properly maintain cclsorted. */
872: cclsorted = false;
873: $$ = $1;
874: }
875:
876: |
877: {
878: cclsorted = true;
879: lastchar = 0;
880: currccl = $$ = cclinit();
881: }
882: ;
883:
1.9 ! tedu 884: ccl_expr:
! 885: CCE_ALNUM { CCL_EXPR(isalnum); }
! 886: | CCE_ALPHA { CCL_EXPR(isalpha); }
! 887: | CCE_BLANK { CCL_EXPR(IS_BLANK); }
! 888: | CCE_CNTRL { CCL_EXPR(iscntrl); }
! 889: | CCE_DIGIT { CCL_EXPR(isdigit); }
! 890: | CCE_GRAPH { CCL_EXPR(isgraph); }
! 891: | CCE_LOWER {
! 892: CCL_EXPR(islower);
! 893: if (sf_case_ins())
! 894: CCL_EXPR(isupper);
! 895: }
! 896: | CCE_PRINT { CCL_EXPR(isprint); }
! 897: | CCE_PUNCT { CCL_EXPR(ispunct); }
! 898: | CCE_SPACE { CCL_EXPR(isspace); }
! 899: | CCE_XDIGIT { CCL_EXPR(isxdigit); }
1.1 deraadt 900: | CCE_UPPER {
1.9 ! tedu 901: CCL_EXPR(isupper);
! 902: if (sf_case_ins())
! 903: CCL_EXPR(islower);
! 904: }
! 905:
! 906: | CCE_NEG_ALNUM { CCL_NEG_EXPR(isalnum); }
! 907: | CCE_NEG_ALPHA { CCL_NEG_EXPR(isalpha); }
! 908: | CCE_NEG_BLANK { CCL_NEG_EXPR(IS_BLANK); }
! 909: | CCE_NEG_CNTRL { CCL_NEG_EXPR(iscntrl); }
! 910: | CCE_NEG_DIGIT { CCL_NEG_EXPR(isdigit); }
! 911: | CCE_NEG_GRAPH { CCL_NEG_EXPR(isgraph); }
! 912: | CCE_NEG_PRINT { CCL_NEG_EXPR(isprint); }
! 913: | CCE_NEG_PUNCT { CCL_NEG_EXPR(ispunct); }
! 914: | CCE_NEG_SPACE { CCL_NEG_EXPR(isspace); }
! 915: | CCE_NEG_XDIGIT { CCL_NEG_EXPR(isxdigit); }
! 916: | CCE_NEG_LOWER {
! 917: if ( sf_case_ins() )
! 918: warn(_("[:^lower:] is ambiguous in case insensitive scanner"));
1.1 deraadt 919: else
1.9 ! tedu 920: CCL_NEG_EXPR(islower);
! 921: }
! 922: | CCE_NEG_UPPER {
! 923: if ( sf_case_ins() )
! 924: warn(_("[:^upper:] ambiguous in case insensitive scanner"));
! 925: else
! 926: CCL_NEG_EXPR(isupper);
1.1 deraadt 927: }
928: ;
929:
930: string : string CHAR
931: {
1.9 ! tedu 932: if ( $2 == nlch )
! 933: rule_has_nl[num_rules] = true;
1.1 deraadt 934:
935: ++rulelen;
936:
1.9 ! tedu 937: if (sf_case_ins() && has_case($2))
! 938: $$ = mkor (mkstate($2), mkstate(reverse_case($2)));
! 939: else
! 940: $$ = mkstate ($2);
! 941:
! 942: $$ = link_machines( $1, $$);
1.1 deraadt 943: }
944:
945: |
946: { $$ = mkstate( SYM_EPSILON ); }
947: ;
948:
949: %%
950:
951:
952: /* build_eof_action - build the "<<EOF>>" action for the active start
953: * conditions
954: */
955:
956: void build_eof_action()
957: {
1.9 ! tedu 958: int i;
1.1 deraadt 959: char action_text[MAXLINE];
960:
961: for ( i = 1; i <= scon_stk_ptr; ++i )
962: {
963: if ( sceof[scon_stk[i]] )
964: format_pinpoint_message(
965: "multiple <<EOF>> rules for start condition %s",
966: scname[scon_stk[i]] );
967:
968: else
969: {
970: sceof[scon_stk[i]] = true;
1.9 ! tedu 971:
! 972: if (previous_continued_action /* && previous action was regular */)
! 973: add_action("YY_RULE_SETUP\n");
! 974:
! 975: snprintf( action_text, sizeof(action_text), "case YY_STATE_EOF(%s):\n",
1.1 deraadt 976: scname[scon_stk[i]] );
977: add_action( action_text );
978: }
979: }
980:
981: line_directive_out( (FILE *) 0, 1 );
982:
983: /* This isn't a normal rule after all - don't count it as
984: * such, so we don't have any holes in the rule numbering
985: * (which make generating "rule can never match" warnings
986: * more difficult.
987: */
988: --num_rules;
989: ++num_eof_rules;
990: }
991:
992:
993: /* format_synerr - write out formatted syntax error */
994:
995: void format_synerr( msg, arg )
1.9 ! tedu 996: const char *msg, arg[];
1.1 deraadt 997: {
998: char errmsg[MAXLINE];
999:
1.9 ! tedu 1000: (void) snprintf( errmsg, sizeof(errmsg), msg, arg );
1.1 deraadt 1001: synerr( errmsg );
1002: }
1003:
1004:
1005: /* synerr - report a syntax error */
1006:
1007: void synerr( str )
1.9 ! tedu 1008: const char *str;
1.1 deraadt 1009: {
1010: syntaxerror = true;
1011: pinpoint_message( str );
1012: }
1013:
1014:
1015: /* format_warn - write out formatted warning */
1016:
1017: void format_warn( msg, arg )
1.9 ! tedu 1018: const char *msg, arg[];
1.1 deraadt 1019: {
1020: char warn_msg[MAXLINE];
1021:
1.9 ! tedu 1022: snprintf( warn_msg, sizeof(warn_msg), msg, arg );
1.1 deraadt 1023: warn( warn_msg );
1024: }
1025:
1026:
1027: /* warn - report a warning, unless -w was given */
1028:
1029: void warn( str )
1.9 ! tedu 1030: const char *str;
1.1 deraadt 1031: {
1032: line_warning( str, linenum );
1033: }
1034:
1035: /* format_pinpoint_message - write out a message formatted with one string,
1036: * pinpointing its location
1037: */
1038:
1039: void format_pinpoint_message( msg, arg )
1.9 ! tedu 1040: const char *msg, arg[];
1.1 deraadt 1041: {
1042: char errmsg[MAXLINE];
1043:
1.9 ! tedu 1044: snprintf( errmsg, sizeof(errmsg), msg, arg );
1.1 deraadt 1045: pinpoint_message( errmsg );
1046: }
1047:
1048:
1049: /* pinpoint_message - write out a message, pinpointing its location */
1050:
1051: void pinpoint_message( str )
1.9 ! tedu 1052: const char *str;
1.1 deraadt 1053: {
1054: line_pinpoint( str, linenum );
1055: }
1056:
1057:
1058: /* line_warning - report a warning at a given line, unless -w was given */
1059:
1060: void line_warning( str, line )
1.9 ! tedu 1061: const char *str;
1.1 deraadt 1062: int line;
1063: {
1064: char warning[MAXLINE];
1065:
1066: if ( ! nowarn )
1067: {
1.9 ! tedu 1068: snprintf( warning, sizeof(warning), "warning, %s", str );
1.1 deraadt 1069: line_pinpoint( warning, line );
1070: }
1071: }
1072:
1073:
1074: /* line_pinpoint - write out a message, pinpointing it at the given line */
1075:
1076: void line_pinpoint( str, line )
1.9 ! tedu 1077: const char *str;
1.1 deraadt 1078: int line;
1079: {
1.9 ! tedu 1080: fprintf( stderr, "%s:%d: %s\n", infilename, line, str );
1.1 deraadt 1081: }
1082:
1083:
1084: /* yyerror - eat up an error message from the parser;
1085: * currently, messages are ignore
1086: */
1087:
1088: void yyerror( msg )
1.9 ! tedu 1089: const char *msg;
1.1 deraadt 1090: {
1091: }