Annotation of src/usr.bin/lex/parse.y, Revision 1.10
1.10 ! millert 1: /* $OpenBSD: parse.y,v 1.9 2015/11/19 19:43:40 tedu Exp $ */
1.2 deraadt 2:
1.1 deraadt 3: /* parse.y - parser for flex input */
4:
5: %token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
1.9 tedu 6: %token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS OPT_HEADER OPT_EXTRA_TYPE
7: %token OPT_TABLES
1.1 deraadt 8:
9: %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
10: %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
11:
1.9 tedu 12: %token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH
13: %token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT
14:
15: %left CCL_OP_DIFF CCL_OP_UNION
16:
17: /*
18: *POSIX and AT&T lex place the
19: * precedence of the repeat operator, {}, below that of concatenation.
20: * Thus, ab{3} is ababab. Most other POSIX utilities use an Extended
21: * Regular Expression (ERE) precedence that has the repeat operator
22: * higher than concatenation. This causes ab{3} to yield abbb.
1.8 millert 23: *
1.9 tedu 24: * In order to support the POSIX and AT&T precedence and the flex
25: * precedence we define two token sets for the begin and end tokens of
26: * the repeat operator, '{' and '}'. The lexical scanner chooses
27: * which tokens to return based on whether posix_compat or lex_compat
28: * are specified. Specifying either posix_compat or lex_compat will
29: * cause flex to parse scanner files as per the AT&T and
30: * POSIX-mandated behavior.
1.1 deraadt 31: */
32:
1.9 tedu 33: %token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX
1.1 deraadt 34:
35:
1.9 tedu 36: %{
37: /* Copyright (c) 1990 The Regents of the University of California. */
38: /* All rights reserved. */
1.1 deraadt 39:
1.9 tedu 40: /* This code is derived from software contributed to Berkeley by */
41: /* Vern Paxson. */
1.1 deraadt 42:
1.9 tedu 43: /* The United States Government has rights in this work pursuant */
44: /* to contract no. DE-AC03-76SF00098 between the United States */
45: /* Department of Energy and the University of California. */
46:
47: /* This file is part of flex. */
48:
49: /* Redistribution and use in source and binary forms, with or without */
50: /* modification, are permitted provided that the following conditions */
51: /* are met: */
52:
53: /* 1. Redistributions of source code must retain the above copyright */
54: /* notice, this list of conditions and the following disclaimer. */
55: /* 2. Redistributions in binary form must reproduce the above copyright */
56: /* notice, this list of conditions and the following disclaimer in the */
57: /* documentation and/or other materials provided with the distribution. */
58:
59: /* Neither the name of the University nor the names of its contributors */
60: /* may be used to endorse or promote products derived from this software */
61: /* without specific prior written permission. */
62:
63: /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
64: /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
65: /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
66: /* PURPOSE. */
1.1 deraadt 67:
1.9 tedu 68: #include "flexdef.h"
69: #include "tables.h"
1.1 deraadt 70:
1.9 tedu 71: int pat, scnum, eps, headcnt, trailcnt, lastchar, i, rulelen;
1.1 deraadt 72: int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule;
73:
74: int *scon_stk;
75: int scon_stk_ptr;
76:
77: static int madeany = false; /* whether we've made the '.' character class */
1.9 tedu 78: static int ccldot, cclany;
1.1 deraadt 79: int previous_continued_action; /* whether the previous rule's action was '|' */
80:
1.9 tedu 81: #define format_warn3(fmt, a1, a2) \
82: do{ \
83: char fw3_msg[MAXLINE];\
84: snprintf( fw3_msg, MAXLINE,(fmt), (a1), (a2) );\
85: warn( fw3_msg );\
86: }while(0)
87:
1.1 deraadt 88: /* Expand a POSIX character class expression. */
89: #define CCL_EXPR(func) \
1.9 tedu 90: do{ \
1.1 deraadt 91: int c; \
92: for ( c = 0; c < csize; ++c ) \
93: if ( isascii(c) && func(c) ) \
94: ccladd( currccl, c ); \
1.9 tedu 95: }while(0)
96:
97: /* negated class */
98: #define CCL_NEG_EXPR(func) \
99: do{ \
100: int c; \
101: for ( c = 0; c < csize; ++c ) \
102: if ( !func(c) ) \
103: ccladd( currccl, c ); \
104: }while(0)
1.1 deraadt 105:
106: /* On some over-ambitious machines, such as DEC Alpha's, the default
107: * token type is "long" instead of "int"; this leads to problems with
108: * declaring yylval in flexdef.h. But so far, all the yacc's I've seen
109: * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the
110: * following should ensure that the default token type is "int".
111: */
112: #define YYSTYPE int
113:
114: %}
115:
116: %%
117: goal : initlex sect1 sect1end sect2 initforrule
118: { /* add default rule */
119: int def_rule;
120:
121: pat = cclinit();
122: cclnegate( pat );
123:
124: def_rule = mkstate( -pat );
125:
126: /* Remember the number of the default rule so we
127: * don't generate "can't match" warnings for it.
128: */
129: default_rule = num_rules;
130:
1.9 tedu 131: finish_rule( def_rule, false, 0, 0, 0);
1.1 deraadt 132:
133: for ( i = 1; i <= lastsc; ++i )
134: scset[i] = mkbranch( scset[i], def_rule );
135:
136: if ( spprdflt )
137: add_action(
138: "YY_FATAL_ERROR( \"flex scanner jammed\" )" );
139: else
140: add_action( "ECHO" );
141:
142: add_action( ";\n\tYY_BREAK\n" );
143: }
144: ;
145:
146: initlex :
147: { /* initialize for processing rules */
148:
149: /* Create default DFA start condition. */
150: scinstal( "INITIAL", false );
151: }
152: ;
153:
154: sect1 : sect1 startconddecl namelist1
155: | sect1 options
156: |
157: | error
1.9 tedu 158: { synerr( _("unknown error processing section 1") ); }
1.1 deraadt 159: ;
160:
161: sect1end : SECTEND
162: {
163: check_options();
164: scon_stk = allocate_integer_array( lastsc + 1 );
165: scon_stk_ptr = 0;
166: }
167: ;
168:
169: startconddecl : SCDECL
170: { xcluflg = false; }
171:
172: | XSCDECL
173: { xcluflg = true; }
174: ;
175:
176: namelist1 : namelist1 NAME
177: { scinstal( nmstr, xcluflg ); }
178:
179: | NAME
180: { scinstal( nmstr, xcluflg ); }
181:
182: | error
1.9 tedu 183: { synerr( _("bad start condition list") ); }
1.1 deraadt 184: ;
185:
186: options : OPTION_OP optionlist
187: ;
188:
189: optionlist : optionlist option
190: |
191: ;
192:
193: option : OPT_OUTFILE '=' NAME
194: {
195: outfilename = copy_string( nmstr );
196: did_outfilename = 1;
197: }
1.9 tedu 198: | OPT_EXTRA_TYPE '=' NAME
199: { extra_type = copy_string( nmstr ); }
1.1 deraadt 200: | OPT_PREFIX '=' NAME
201: { prefix = copy_string( nmstr ); }
202: | OPT_YYCLASS '=' NAME
203: { yyclass = copy_string( nmstr ); }
1.9 tedu 204: | OPT_HEADER '=' NAME
205: { headerfilename = copy_string( nmstr ); }
206: | OPT_TABLES '=' NAME
207: { tablesext = true; tablesfilename = copy_string( nmstr ); }
1.1 deraadt 208: ;
209:
210: sect2 : sect2 scon initforrule flexrule '\n'
211: { scon_stk_ptr = $2; }
212: | sect2 scon '{' sect2 '}'
213: { scon_stk_ptr = $2; }
214: |
215: ;
216:
217: initforrule :
218: {
219: /* Initialize for a parse of one rule. */
220: trlcontxt = variable_trail_rule = varlength = false;
221: trailcnt = headcnt = rulelen = 0;
222: current_state_type = STATE_NORMAL;
223: previous_continued_action = continued_action;
224: in_rule = true;
225:
226: new_rule();
227: }
228: ;
229:
230: flexrule : '^' rule
231: {
232: pat = $2;
233: finish_rule( pat, variable_trail_rule,
1.9 tedu 234: headcnt, trailcnt , previous_continued_action);
1.1 deraadt 235:
236: if ( scon_stk_ptr > 0 )
237: {
238: for ( i = 1; i <= scon_stk_ptr; ++i )
239: scbol[scon_stk[i]] =
240: mkbranch( scbol[scon_stk[i]],
241: pat );
242: }
243:
244: else
245: {
246: /* Add to all non-exclusive start conditions,
247: * including the default (0) start condition.
248: */
249:
250: for ( i = 1; i <= lastsc; ++i )
251: if ( ! scxclu[i] )
252: scbol[i] = mkbranch( scbol[i],
253: pat );
254: }
255:
256: if ( ! bol_needed )
257: {
258: bol_needed = true;
259:
260: if ( performance_report > 1 )
261: pinpoint_message(
262: "'^' operator results in sub-optimal performance" );
263: }
264: }
265:
266: | rule
267: {
268: pat = $1;
269: finish_rule( pat, variable_trail_rule,
1.9 tedu 270: headcnt, trailcnt , previous_continued_action);
1.1 deraadt 271:
272: if ( scon_stk_ptr > 0 )
273: {
274: for ( i = 1; i <= scon_stk_ptr; ++i )
275: scset[scon_stk[i]] =
276: mkbranch( scset[scon_stk[i]],
277: pat );
278: }
279:
280: else
281: {
282: for ( i = 1; i <= lastsc; ++i )
283: if ( ! scxclu[i] )
284: scset[i] =
285: mkbranch( scset[i],
286: pat );
287: }
288: }
289:
290: | EOF_OP
291: {
292: if ( scon_stk_ptr > 0 )
293: build_eof_action();
294:
295: else
296: {
297: /* This EOF applies to all start conditions
298: * which don't already have EOF actions.
299: */
300: for ( i = 1; i <= lastsc; ++i )
301: if ( ! sceof[i] )
302: scon_stk[++scon_stk_ptr] = i;
303:
304: if ( scon_stk_ptr == 0 )
305: warn(
306: "all start conditions already have <<EOF>> rules" );
307:
308: else
309: build_eof_action();
310: }
311: }
312:
313: | error
1.9 tedu 314: { synerr( _("unrecognized rule") ); }
1.1 deraadt 315: ;
316:
317: scon_stk_ptr :
318: { $$ = scon_stk_ptr; }
319: ;
320:
321: scon : '<' scon_stk_ptr namelist2 '>'
322: { $$ = $2; }
323:
324: | '<' '*' '>'
325: {
326: $$ = scon_stk_ptr;
327:
328: for ( i = 1; i <= lastsc; ++i )
329: {
330: int j;
331:
332: for ( j = 1; j <= scon_stk_ptr; ++j )
333: if ( scon_stk[j] == i )
334: break;
335:
336: if ( j > scon_stk_ptr )
337: scon_stk[++scon_stk_ptr] = i;
338: }
339: }
340:
341: |
342: { $$ = scon_stk_ptr; }
343: ;
344:
345: namelist2 : namelist2 ',' sconname
346:
347: | sconname
348:
349: | error
1.9 tedu 350: { synerr( _("bad start condition list") ); }
1.1 deraadt 351: ;
352:
353: sconname : NAME
354: {
355: if ( (scnum = sclookup( nmstr )) == 0 )
356: format_pinpoint_message(
357: "undeclared start condition %s",
358: nmstr );
359: else
360: {
361: for ( i = 1; i <= scon_stk_ptr; ++i )
362: if ( scon_stk[i] == scnum )
363: {
364: format_warn(
365: "<%s> specified twice",
366: scname[scnum] );
367: break;
368: }
369:
370: if ( i > scon_stk_ptr )
371: scon_stk[++scon_stk_ptr] = scnum;
372: }
373: }
374: ;
375:
376: rule : re2 re
377: {
378: if ( transchar[lastst[$2]] != SYM_EPSILON )
379: /* Provide final transition \now/ so it
380: * will be marked as a trailing context
381: * state.
382: */
383: $2 = link_machines( $2,
384: mkstate( SYM_EPSILON ) );
385:
386: mark_beginning_as_normal( $2 );
387: current_state_type = STATE_NORMAL;
388:
389: if ( previous_continued_action )
390: {
391: /* We need to treat this as variable trailing
392: * context so that the backup does not happen
393: * in the action but before the action switch
394: * statement. If the backup happens in the
395: * action, then the rules "falling into" this
396: * one's action will *also* do the backup,
397: * erroneously.
398: */
399: if ( ! varlength || headcnt != 0 )
400: warn(
401: "trailing context made variable due to preceding '|' action" );
402:
403: /* Mark as variable. */
404: varlength = true;
405: headcnt = 0;
1.9 tedu 406:
1.1 deraadt 407: }
408:
409: if ( lex_compat || (varlength && headcnt == 0) )
410: { /* variable trailing context rule */
411: /* Mark the first part of the rule as the
412: * accepting "head" part of a trailing
413: * context rule.
414: *
415: * By the way, we didn't do this at the
416: * beginning of this production because back
417: * then current_state_type was set up for a
418: * trail rule, and add_accept() can create
419: * a new state ...
420: */
421: add_accept( $1,
422: num_rules | YY_TRAILING_HEAD_MASK );
423: variable_trail_rule = true;
424: }
425:
426: else
427: trailcnt = rulelen;
428:
429: $$ = link_machines( $1, $2 );
430: }
431:
432: | re2 re '$'
1.9 tedu 433: { synerr( _("trailing context used twice") ); }
1.1 deraadt 434:
435: | re '$'
436: {
437: headcnt = 0;
438: trailcnt = 1;
439: rulelen = 1;
440: varlength = false;
441:
442: current_state_type = STATE_TRAILING_CONTEXT;
443:
444: if ( trlcontxt )
445: {
1.9 tedu 446: synerr( _("trailing context used twice") );
1.1 deraadt 447: $$ = mkstate( SYM_EPSILON );
448: }
449:
450: else if ( previous_continued_action )
451: {
452: /* See the comment in the rule for "re2 re"
453: * above.
454: */
455: warn(
456: "trailing context made variable due to preceding '|' action" );
457:
458: varlength = true;
459: }
460:
461: if ( lex_compat || varlength )
462: {
463: /* Again, see the comment in the rule for
464: * "re2 re" above.
465: */
466: add_accept( $1,
467: num_rules | YY_TRAILING_HEAD_MASK );
468: variable_trail_rule = true;
469: }
470:
471: trlcontxt = true;
472:
473: eps = mkstate( SYM_EPSILON );
474: $$ = link_machines( $1,
475: link_machines( eps, mkstate( '\n' ) ) );
476: }
477:
478: | re
479: {
480: $$ = $1;
481:
482: if ( trlcontxt )
483: {
484: if ( lex_compat || (varlength && headcnt == 0) )
485: /* Both head and trail are
486: * variable-length.
487: */
488: variable_trail_rule = true;
489: else
490: trailcnt = rulelen;
491: }
492: }
493: ;
494:
495:
496: re : re '|' series
497: {
498: varlength = true;
499: $$ = mkor( $1, $3 );
500: }
501:
502: | series
503: { $$ = $1; }
504: ;
505:
506:
507: re2 : re '/'
508: {
509: /* This rule is written separately so the
510: * reduction will occur before the trailing
511: * series is parsed.
512: */
513:
514: if ( trlcontxt )
1.9 tedu 515: synerr( _("trailing context used twice") );
1.1 deraadt 516: else
517: trlcontxt = true;
518:
519: if ( varlength )
520: /* We hope the trailing context is
521: * fixed-length.
522: */
523: varlength = false;
524: else
525: headcnt = rulelen;
526:
527: rulelen = 0;
528:
529: current_state_type = STATE_TRAILING_CONTEXT;
530: $$ = $1;
531: }
532: ;
533:
534: series : series singleton
535: {
536: /* This is where concatenation of adjacent patterns
537: * gets done.
538: */
539: $$ = link_machines( $1, $2 );
540: }
541:
542: | singleton
543: { $$ = $1; }
1.9 tedu 544:
545: | series BEGIN_REPEAT_POSIX NUMBER ',' NUMBER END_REPEAT_POSIX
546: {
547: varlength = true;
548:
549: if ( $3 > $5 || $3 < 0 )
550: {
551: synerr( _("bad iteration values") );
552: $$ = $1;
553: }
554: else
555: {
556: if ( $3 == 0 )
557: {
558: if ( $5 <= 0 )
559: {
560: synerr(
561: _("bad iteration values") );
562: $$ = $1;
563: }
564: else
565: $$ = mkopt(
566: mkrep( $1, 1, $5 ) );
567: }
568: else
569: $$ = mkrep( $1, $3, $5 );
570: }
571: }
572:
573: | series BEGIN_REPEAT_POSIX NUMBER ',' END_REPEAT_POSIX
574: {
575: varlength = true;
576:
577: if ( $3 <= 0 )
578: {
579: synerr( _("iteration value must be positive") );
580: $$ = $1;
581: }
582:
583: else
584: $$ = mkrep( $1, $3, INFINITE_REPEAT );
585: }
586:
587: | series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX
588: {
589: /* The series could be something like "(foo)",
590: * in which case we have no idea what its length
591: * is, so we punt here.
592: */
593: varlength = true;
594:
595: if ( $3 <= 0 )
596: {
597: synerr( _("iteration value must be positive")
598: );
599: $$ = $1;
600: }
601:
602: else
603: $$ = link_machines( $1,
604: copysingl( $1, $3 - 1 ) );
605: }
606:
1.1 deraadt 607: ;
608:
609: singleton : singleton '*'
610: {
611: varlength = true;
612:
613: $$ = mkclos( $1 );
614: }
615:
616: | singleton '+'
617: {
618: varlength = true;
619: $$ = mkposcl( $1 );
620: }
621:
622: | singleton '?'
623: {
624: varlength = true;
625: $$ = mkopt( $1 );
626: }
627:
1.9 tedu 628: | singleton BEGIN_REPEAT_FLEX NUMBER ',' NUMBER END_REPEAT_FLEX
1.1 deraadt 629: {
630: varlength = true;
631:
632: if ( $3 > $5 || $3 < 0 )
633: {
1.9 tedu 634: synerr( _("bad iteration values") );
1.1 deraadt 635: $$ = $1;
636: }
637: else
638: {
639: if ( $3 == 0 )
640: {
641: if ( $5 <= 0 )
642: {
643: synerr(
1.9 tedu 644: _("bad iteration values") );
1.1 deraadt 645: $$ = $1;
646: }
647: else
648: $$ = mkopt(
649: mkrep( $1, 1, $5 ) );
650: }
651: else
652: $$ = mkrep( $1, $3, $5 );
653: }
654: }
655:
1.9 tedu 656: | singleton BEGIN_REPEAT_FLEX NUMBER ',' END_REPEAT_FLEX
1.1 deraadt 657: {
658: varlength = true;
659:
660: if ( $3 <= 0 )
661: {
1.9 tedu 662: synerr( _("iteration value must be positive") );
1.1 deraadt 663: $$ = $1;
664: }
665:
666: else
1.9 tedu 667: $$ = mkrep( $1, $3, INFINITE_REPEAT );
1.1 deraadt 668: }
669:
1.9 tedu 670: | singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX
1.1 deraadt 671: {
672: /* The singleton could be something like "(foo)",
673: * in which case we have no idea what its length
674: * is, so we punt here.
675: */
676: varlength = true;
677:
678: if ( $3 <= 0 )
679: {
1.9 tedu 680: synerr( _("iteration value must be positive") );
1.1 deraadt 681: $$ = $1;
682: }
683:
684: else
685: $$ = link_machines( $1,
686: copysingl( $1, $3 - 1 ) );
687: }
688:
689: | '.'
690: {
691: if ( ! madeany )
692: {
693: /* Create the '.' character class. */
1.9 tedu 694: ccldot = cclinit();
695: ccladd( ccldot, '\n' );
696: cclnegate( ccldot );
697:
698: if ( useecs )
699: mkeccl( ccltbl + cclmap[ccldot],
700: ccllen[ccldot], nextecm,
701: ecgroup, csize, csize );
702:
703: /* Create the (?s:'.') character class. */
704: cclany = cclinit();
705: cclnegate( cclany );
706:
707: if ( useecs )
708: mkeccl( ccltbl + cclmap[cclany],
709: ccllen[cclany], nextecm,
710: ecgroup, csize, csize );
1.1 deraadt 711:
712: madeany = true;
713: }
714:
715: ++rulelen;
716:
1.9 tedu 717: if (sf_dot_all())
718: $$ = mkstate( -cclany );
719: else
720: $$ = mkstate( -ccldot );
1.1 deraadt 721: }
722:
723: | fullccl
724: {
1.9 tedu 725: /* Sort characters for fast searching.
1.1 deraadt 726: */
1.9 tedu 727: qsort( ccltbl + cclmap[$1], ccllen[$1], sizeof (*ccltbl), cclcmp );
1.1 deraadt 728:
729: if ( useecs )
730: mkeccl( ccltbl + cclmap[$1], ccllen[$1],
731: nextecm, ecgroup, csize, csize );
732:
733: ++rulelen;
734:
1.9 tedu 735: if (ccl_has_nl[$1])
736: rule_has_nl[num_rules] = true;
737:
1.1 deraadt 738: $$ = mkstate( -$1 );
739: }
740:
741: | PREVCCL
742: {
743: ++rulelen;
744:
1.9 tedu 745: if (ccl_has_nl[$1])
746: rule_has_nl[num_rules] = true;
747:
1.1 deraadt 748: $$ = mkstate( -$1 );
749: }
750:
751: | '"' string '"'
752: { $$ = $2; }
753:
754: | '(' re ')'
755: { $$ = $2; }
756:
757: | CHAR
758: {
759: ++rulelen;
760:
1.9 tedu 761: if ($1 == nlch)
762: rule_has_nl[num_rules] = true;
1.1 deraadt 763:
1.9 tedu 764: if (sf_case_ins() && has_case($1))
765: /* create an alternation, as in (a|A) */
766: $$ = mkor (mkstate($1), mkstate(reverse_case($1)));
767: else
768: $$ = mkstate( $1 );
1.1 deraadt 769: }
770: ;
1.9 tedu 771: fullccl:
772: fullccl CCL_OP_DIFF braceccl { $$ = ccl_set_diff ($1, $3); }
773: | fullccl CCL_OP_UNION braceccl { $$ = ccl_set_union ($1, $3); }
774: | braceccl
775: ;
1.1 deraadt 776:
1.9 tedu 777: braceccl:
778:
779: '[' ccl ']' { $$ = $2; }
1.1 deraadt 780:
781: | '[' '^' ccl ']'
782: {
783: cclnegate( $3 );
784: $$ = $3;
785: }
786: ;
787:
788: ccl : ccl CHAR '-' CHAR
789: {
1.9 tedu 790:
791: if (sf_case_ins())
792: {
793:
794: /* If one end of the range has case and the other
795: * does not, or the cases are different, then we're not
796: * sure what range the user is trying to express.
797: * Examples: [@-z] or [S-t]
798: */
799: if (has_case ($2) != has_case ($4)
800: || (has_case ($2) && (b_islower ($2) != b_islower ($4)))
801: || (has_case ($2) && (b_isupper ($2) != b_isupper ($4))))
802: format_warn3 (
803: _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
804: $2, $4);
805:
806: /* If the range spans uppercase characters but not
807: * lowercase (or vice-versa), then should we automatically
808: * include lowercase characters in the range?
809: * Example: [@-_] spans [a-z] but not [A-Z]
810: */
811: else if (!has_case ($2) && !has_case ($4) && !range_covers_case ($2, $4))
812: format_warn3 (
813: _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
814: $2, $4);
815: }
1.1 deraadt 816:
817: if ( $2 > $4 )
1.9 tedu 818: synerr( _("negative range in character class") );
1.1 deraadt 819:
820: else
821: {
822: for ( i = $2; i <= $4; ++i )
823: ccladd( $1, i );
824:
825: /* Keep track if this ccl is staying in
826: * alphabetical order.
827: */
828: cclsorted = cclsorted && ($2 > lastchar);
829: lastchar = $4;
1.9 tedu 830:
831: /* Do it again for upper/lowercase */
832: if (sf_case_ins() && has_case($2) && has_case($4)){
833: $2 = reverse_case ($2);
834: $4 = reverse_case ($4);
835:
836: for ( i = $2; i <= $4; ++i )
837: ccladd( $1, i );
838:
839: cclsorted = cclsorted && ($2 > lastchar);
840: lastchar = $4;
841: }
842:
1.1 deraadt 843: }
844:
845: $$ = $1;
846: }
847:
848: | ccl CHAR
849: {
850: ccladd( $1, $2 );
851: cclsorted = cclsorted && ($2 > lastchar);
852: lastchar = $2;
1.9 tedu 853:
854: /* Do it again for upper/lowercase */
855: if (sf_case_ins() && has_case($2)){
856: $2 = reverse_case ($2);
857: ccladd ($1, $2);
858:
859: cclsorted = cclsorted && ($2 > lastchar);
860: lastchar = $2;
861: }
862:
1.1 deraadt 863: $$ = $1;
864: }
865:
866: | ccl ccl_expr
867: {
868: /* Too hard to properly maintain cclsorted. */
869: cclsorted = false;
870: $$ = $1;
871: }
872:
873: |
874: {
875: cclsorted = true;
876: lastchar = 0;
877: currccl = $$ = cclinit();
878: }
879: ;
880:
1.9 tedu 881: ccl_expr:
882: CCE_ALNUM { CCL_EXPR(isalnum); }
883: | CCE_ALPHA { CCL_EXPR(isalpha); }
1.10 ! millert 884: | CCE_BLANK { CCL_EXPR(isblank); }
1.9 tedu 885: | CCE_CNTRL { CCL_EXPR(iscntrl); }
886: | CCE_DIGIT { CCL_EXPR(isdigit); }
887: | CCE_GRAPH { CCL_EXPR(isgraph); }
888: | CCE_LOWER {
889: CCL_EXPR(islower);
890: if (sf_case_ins())
891: CCL_EXPR(isupper);
892: }
893: | CCE_PRINT { CCL_EXPR(isprint); }
894: | CCE_PUNCT { CCL_EXPR(ispunct); }
895: | CCE_SPACE { CCL_EXPR(isspace); }
896: | CCE_XDIGIT { CCL_EXPR(isxdigit); }
1.1 deraadt 897: | CCE_UPPER {
1.9 tedu 898: CCL_EXPR(isupper);
899: if (sf_case_ins())
900: CCL_EXPR(islower);
901: }
902:
903: | CCE_NEG_ALNUM { CCL_NEG_EXPR(isalnum); }
904: | CCE_NEG_ALPHA { CCL_NEG_EXPR(isalpha); }
1.10 ! millert 905: | CCE_NEG_BLANK { CCL_NEG_EXPR(isblank); }
1.9 tedu 906: | CCE_NEG_CNTRL { CCL_NEG_EXPR(iscntrl); }
907: | CCE_NEG_DIGIT { CCL_NEG_EXPR(isdigit); }
908: | CCE_NEG_GRAPH { CCL_NEG_EXPR(isgraph); }
909: | CCE_NEG_PRINT { CCL_NEG_EXPR(isprint); }
910: | CCE_NEG_PUNCT { CCL_NEG_EXPR(ispunct); }
911: | CCE_NEG_SPACE { CCL_NEG_EXPR(isspace); }
912: | CCE_NEG_XDIGIT { CCL_NEG_EXPR(isxdigit); }
913: | CCE_NEG_LOWER {
914: if ( sf_case_ins() )
915: warn(_("[:^lower:] is ambiguous in case insensitive scanner"));
1.1 deraadt 916: else
1.9 tedu 917: CCL_NEG_EXPR(islower);
918: }
919: | CCE_NEG_UPPER {
920: if ( sf_case_ins() )
921: warn(_("[:^upper:] ambiguous in case insensitive scanner"));
922: else
923: CCL_NEG_EXPR(isupper);
1.1 deraadt 924: }
925: ;
926:
927: string : string CHAR
928: {
1.9 tedu 929: if ( $2 == nlch )
930: rule_has_nl[num_rules] = true;
1.1 deraadt 931:
932: ++rulelen;
933:
1.9 tedu 934: if (sf_case_ins() && has_case($2))
935: $$ = mkor (mkstate($2), mkstate(reverse_case($2)));
936: else
937: $$ = mkstate ($2);
938:
939: $$ = link_machines( $1, $$);
1.1 deraadt 940: }
941:
942: |
943: { $$ = mkstate( SYM_EPSILON ); }
944: ;
945:
946: %%
947:
948:
949: /* build_eof_action - build the "<<EOF>>" action for the active start
950: * conditions
951: */
952:
953: void build_eof_action()
954: {
1.9 tedu 955: int i;
1.1 deraadt 956: char action_text[MAXLINE];
957:
958: for ( i = 1; i <= scon_stk_ptr; ++i )
959: {
960: if ( sceof[scon_stk[i]] )
961: format_pinpoint_message(
962: "multiple <<EOF>> rules for start condition %s",
963: scname[scon_stk[i]] );
964:
965: else
966: {
967: sceof[scon_stk[i]] = true;
1.9 tedu 968:
969: if (previous_continued_action /* && previous action was regular */)
970: add_action("YY_RULE_SETUP\n");
971:
972: snprintf( action_text, sizeof(action_text), "case YY_STATE_EOF(%s):\n",
1.1 deraadt 973: scname[scon_stk[i]] );
974: add_action( action_text );
975: }
976: }
977:
978: line_directive_out( (FILE *) 0, 1 );
979:
980: /* This isn't a normal rule after all - don't count it as
981: * such, so we don't have any holes in the rule numbering
982: * (which make generating "rule can never match" warnings
983: * more difficult.
984: */
985: --num_rules;
986: ++num_eof_rules;
987: }
988:
989:
990: /* format_synerr - write out formatted syntax error */
991:
992: void format_synerr( msg, arg )
1.9 tedu 993: const char *msg, arg[];
1.1 deraadt 994: {
995: char errmsg[MAXLINE];
996:
1.9 tedu 997: (void) snprintf( errmsg, sizeof(errmsg), msg, arg );
1.1 deraadt 998: synerr( errmsg );
999: }
1000:
1001:
1002: /* synerr - report a syntax error */
1003:
1004: void synerr( str )
1.9 tedu 1005: const char *str;
1.1 deraadt 1006: {
1007: syntaxerror = true;
1008: pinpoint_message( str );
1009: }
1010:
1011:
1012: /* format_warn - write out formatted warning */
1013:
1014: void format_warn( msg, arg )
1.9 tedu 1015: const char *msg, arg[];
1.1 deraadt 1016: {
1017: char warn_msg[MAXLINE];
1018:
1.9 tedu 1019: snprintf( warn_msg, sizeof(warn_msg), msg, arg );
1.1 deraadt 1020: warn( warn_msg );
1021: }
1022:
1023:
1024: /* warn - report a warning, unless -w was given */
1025:
1026: void warn( str )
1.9 tedu 1027: const char *str;
1.1 deraadt 1028: {
1029: line_warning( str, linenum );
1030: }
1031:
1032: /* format_pinpoint_message - write out a message formatted with one string,
1033: * pinpointing its location
1034: */
1035:
1036: void format_pinpoint_message( msg, arg )
1.9 tedu 1037: const char *msg, arg[];
1.1 deraadt 1038: {
1039: char errmsg[MAXLINE];
1040:
1.9 tedu 1041: snprintf( errmsg, sizeof(errmsg), msg, arg );
1.1 deraadt 1042: pinpoint_message( errmsg );
1043: }
1044:
1045:
1046: /* pinpoint_message - write out a message, pinpointing its location */
1047:
1048: void pinpoint_message( str )
1.9 tedu 1049: const char *str;
1.1 deraadt 1050: {
1051: line_pinpoint( str, linenum );
1052: }
1053:
1054:
1055: /* line_warning - report a warning at a given line, unless -w was given */
1056:
1057: void line_warning( str, line )
1.9 tedu 1058: const char *str;
1.1 deraadt 1059: int line;
1060: {
1061: char warning[MAXLINE];
1062:
1063: if ( ! nowarn )
1064: {
1.9 tedu 1065: snprintf( warning, sizeof(warning), "warning, %s", str );
1.1 deraadt 1066: line_pinpoint( warning, line );
1067: }
1068: }
1069:
1070:
1071: /* line_pinpoint - write out a message, pinpointing it at the given line */
1072:
1073: void line_pinpoint( str, line )
1.9 tedu 1074: const char *str;
1.1 deraadt 1075: int line;
1076: {
1.9 tedu 1077: fprintf( stderr, "%s:%d: %s\n", infilename, line, str );
1.1 deraadt 1078: }
1079:
1080:
1081: /* yyerror - eat up an error message from the parser;
1082: * currently, messages are ignore
1083: */
1084:
1085: void yyerror( msg )
1.9 tedu 1086: const char *msg;
1.1 deraadt 1087: {
1088: }