Annotation of src/usr.bin/lex/parse.y, Revision 1.1.1.1
1.1 deraadt 1: /* parse.y - parser for flex input */
2:
3: %token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
4: %token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS
5:
6: %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
7: %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
8:
9: %{
10: /*-
11: * Copyright (c) 1990 The Regents of the University of California.
12: * All rights reserved.
13: *
14: * This code is derived from software contributed to Berkeley by
15: * Vern Paxson.
16: *
17: * The United States Government has rights in this work pursuant
18: * to contract no. DE-AC03-76SF00098 between the United States
19: * Department of Energy and the University of California.
20: *
21: * Redistribution and use in source and binary forms are permitted provided
22: * that: (1) source distributions retain this entire copyright notice and
23: * comment, and (2) distributions including binaries display the following
24: * acknowledgement: ``This product includes software developed by the
25: * University of California, Berkeley and its contributors'' in the
26: * documentation or other materials provided with the distribution and in
27: * all advertising materials mentioning features or use of this software.
28: * Neither the name of the University nor the names of its contributors may
29: * be used to endorse or promote products derived from this software without
30: * specific prior written permission.
31: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
32: * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
33: * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
34: */
35:
36: /* $Header: /a/cvsroot/src/usr.bin/lex/parse.y,v 1.7 1995/05/05 05:35:39 jtc Exp $ */
37:
38:
39: /* Some versions of bison are broken in that they use alloca() but don't
40: * declare it properly. The following is the patented (just kidding!)
41: * #ifdef chud to fix the problem, courtesy of Francois Pinard.
42: */
43: #ifdef YYBISON
44: /* AIX requires this to be the first thing in the file. What a piece. */
45: # ifdef _AIX
46: #pragma alloca
47: # endif
48: #endif
49:
50: #include "flexdef.h"
51:
52: /* The remainder of the alloca() cruft has to come after including flexdef.h,
53: * so HAVE_ALLOCA_H is (possibly) defined.
54: */
55: #ifdef YYBISON
56: # ifdef __GNUC__
57: # ifndef alloca
58: # define alloca __builtin_alloca
59: # endif
60: # else
61: # if HAVE_ALLOCA_H
62: # include <alloca.h>
63: # else
64: # ifdef __hpux
65: void *alloca ();
66: # else
67: # ifdef __TURBOC__
68: # include <malloc.h>
69: # else
70: char *alloca ();
71: # endif
72: # endif
73: # endif
74: # endif
75: #endif
76:
77: /* Bletch, ^^^^ that was ugly! */
78:
79:
80: int pat, scnum, eps, headcnt, trailcnt, anyccl, lastchar, i, rulelen;
81: int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule;
82:
83: int *scon_stk;
84: int scon_stk_ptr;
85:
86: static int madeany = false; /* whether we've made the '.' character class */
87: int previous_continued_action; /* whether the previous rule's action was '|' */
88:
89: /* Expand a POSIX character class expression. */
90: #define CCL_EXPR(func) \
91: { \
92: int c; \
93: for ( c = 0; c < csize; ++c ) \
94: if ( isascii(c) && func(c) ) \
95: ccladd( currccl, c ); \
96: }
97:
98: /* While POSIX defines isblank(), it's not ANSI C. */
99: #define IS_BLANK(c) ((c) == ' ' || (c) == '\t')
100:
101: /* On some over-ambitious machines, such as DEC Alpha's, the default
102: * token type is "long" instead of "int"; this leads to problems with
103: * declaring yylval in flexdef.h. But so far, all the yacc's I've seen
104: * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the
105: * following should ensure that the default token type is "int".
106: */
107: #define YYSTYPE int
108:
109: %}
110:
111: %%
112: goal : initlex sect1 sect1end sect2 initforrule
113: { /* add default rule */
114: int def_rule;
115:
116: pat = cclinit();
117: cclnegate( pat );
118:
119: def_rule = mkstate( -pat );
120:
121: /* Remember the number of the default rule so we
122: * don't generate "can't match" warnings for it.
123: */
124: default_rule = num_rules;
125:
126: finish_rule( def_rule, false, 0, 0 );
127:
128: for ( i = 1; i <= lastsc; ++i )
129: scset[i] = mkbranch( scset[i], def_rule );
130:
131: if ( spprdflt )
132: add_action(
133: "YY_FATAL_ERROR( \"flex scanner jammed\" )" );
134: else
135: add_action( "ECHO" );
136:
137: add_action( ";\n\tYY_BREAK\n" );
138: }
139: ;
140:
141: initlex :
142: { /* initialize for processing rules */
143:
144: /* Create default DFA start condition. */
145: scinstal( "INITIAL", false );
146: }
147: ;
148:
149: sect1 : sect1 startconddecl namelist1
150: | sect1 options
151: |
152: | error
153: { synerr( "unknown error processing section 1" ); }
154: ;
155:
156: sect1end : SECTEND
157: {
158: check_options();
159: scon_stk = allocate_integer_array( lastsc + 1 );
160: scon_stk_ptr = 0;
161: }
162: ;
163:
164: startconddecl : SCDECL
165: { xcluflg = false; }
166:
167: | XSCDECL
168: { xcluflg = true; }
169: ;
170:
171: namelist1 : namelist1 NAME
172: { scinstal( nmstr, xcluflg ); }
173:
174: | NAME
175: { scinstal( nmstr, xcluflg ); }
176:
177: | error
178: { synerr( "bad start condition list" ); }
179: ;
180:
181: options : OPTION_OP optionlist
182: ;
183:
184: optionlist : optionlist option
185: |
186: ;
187:
188: option : OPT_OUTFILE '=' NAME
189: {
190: outfilename = copy_string( nmstr );
191: did_outfilename = 1;
192: }
193: | OPT_PREFIX '=' NAME
194: { prefix = copy_string( nmstr ); }
195: | OPT_YYCLASS '=' NAME
196: { yyclass = copy_string( nmstr ); }
197: ;
198:
199: sect2 : sect2 scon initforrule flexrule '\n'
200: { scon_stk_ptr = $2; }
201: | sect2 scon '{' sect2 '}'
202: { scon_stk_ptr = $2; }
203: |
204: ;
205:
206: initforrule :
207: {
208: /* Initialize for a parse of one rule. */
209: trlcontxt = variable_trail_rule = varlength = false;
210: trailcnt = headcnt = rulelen = 0;
211: current_state_type = STATE_NORMAL;
212: previous_continued_action = continued_action;
213: in_rule = true;
214:
215: new_rule();
216: }
217: ;
218:
219: flexrule : '^' rule
220: {
221: pat = $2;
222: finish_rule( pat, variable_trail_rule,
223: headcnt, trailcnt );
224:
225: if ( scon_stk_ptr > 0 )
226: {
227: for ( i = 1; i <= scon_stk_ptr; ++i )
228: scbol[scon_stk[i]] =
229: mkbranch( scbol[scon_stk[i]],
230: pat );
231: }
232:
233: else
234: {
235: /* Add to all non-exclusive start conditions,
236: * including the default (0) start condition.
237: */
238:
239: for ( i = 1; i <= lastsc; ++i )
240: if ( ! scxclu[i] )
241: scbol[i] = mkbranch( scbol[i],
242: pat );
243: }
244:
245: if ( ! bol_needed )
246: {
247: bol_needed = true;
248:
249: if ( performance_report > 1 )
250: pinpoint_message(
251: "'^' operator results in sub-optimal performance" );
252: }
253: }
254:
255: | rule
256: {
257: pat = $1;
258: finish_rule( pat, variable_trail_rule,
259: headcnt, trailcnt );
260:
261: if ( scon_stk_ptr > 0 )
262: {
263: for ( i = 1; i <= scon_stk_ptr; ++i )
264: scset[scon_stk[i]] =
265: mkbranch( scset[scon_stk[i]],
266: pat );
267: }
268:
269: else
270: {
271: for ( i = 1; i <= lastsc; ++i )
272: if ( ! scxclu[i] )
273: scset[i] =
274: mkbranch( scset[i],
275: pat );
276: }
277: }
278:
279: | EOF_OP
280: {
281: if ( scon_stk_ptr > 0 )
282: build_eof_action();
283:
284: else
285: {
286: /* This EOF applies to all start conditions
287: * which don't already have EOF actions.
288: */
289: for ( i = 1; i <= lastsc; ++i )
290: if ( ! sceof[i] )
291: scon_stk[++scon_stk_ptr] = i;
292:
293: if ( scon_stk_ptr == 0 )
294: warn(
295: "all start conditions already have <<EOF>> rules" );
296:
297: else
298: build_eof_action();
299: }
300: }
301:
302: | error
303: { synerr( "unrecognized rule" ); }
304: ;
305:
306: scon_stk_ptr :
307: { $$ = scon_stk_ptr; }
308: ;
309:
310: scon : '<' scon_stk_ptr namelist2 '>'
311: { $$ = $2; }
312:
313: | '<' '*' '>'
314: {
315: $$ = scon_stk_ptr;
316:
317: for ( i = 1; i <= lastsc; ++i )
318: {
319: int j;
320:
321: for ( j = 1; j <= scon_stk_ptr; ++j )
322: if ( scon_stk[j] == i )
323: break;
324:
325: if ( j > scon_stk_ptr )
326: scon_stk[++scon_stk_ptr] = i;
327: }
328: }
329:
330: |
331: { $$ = scon_stk_ptr; }
332: ;
333:
334: namelist2 : namelist2 ',' sconname
335:
336: | sconname
337:
338: | error
339: { synerr( "bad start condition list" ); }
340: ;
341:
342: sconname : NAME
343: {
344: if ( (scnum = sclookup( nmstr )) == 0 )
345: format_pinpoint_message(
346: "undeclared start condition %s",
347: nmstr );
348: else
349: {
350: for ( i = 1; i <= scon_stk_ptr; ++i )
351: if ( scon_stk[i] == scnum )
352: {
353: format_warn(
354: "<%s> specified twice",
355: scname[scnum] );
356: break;
357: }
358:
359: if ( i > scon_stk_ptr )
360: scon_stk[++scon_stk_ptr] = scnum;
361: }
362: }
363: ;
364:
365: rule : re2 re
366: {
367: if ( transchar[lastst[$2]] != SYM_EPSILON )
368: /* Provide final transition \now/ so it
369: * will be marked as a trailing context
370: * state.
371: */
372: $2 = link_machines( $2,
373: mkstate( SYM_EPSILON ) );
374:
375: mark_beginning_as_normal( $2 );
376: current_state_type = STATE_NORMAL;
377:
378: if ( previous_continued_action )
379: {
380: /* We need to treat this as variable trailing
381: * context so that the backup does not happen
382: * in the action but before the action switch
383: * statement. If the backup happens in the
384: * action, then the rules "falling into" this
385: * one's action will *also* do the backup,
386: * erroneously.
387: */
388: if ( ! varlength || headcnt != 0 )
389: warn(
390: "trailing context made variable due to preceding '|' action" );
391:
392: /* Mark as variable. */
393: varlength = true;
394: headcnt = 0;
395: }
396:
397: if ( lex_compat || (varlength && headcnt == 0) )
398: { /* variable trailing context rule */
399: /* Mark the first part of the rule as the
400: * accepting "head" part of a trailing
401: * context rule.
402: *
403: * By the way, we didn't do this at the
404: * beginning of this production because back
405: * then current_state_type was set up for a
406: * trail rule, and add_accept() can create
407: * a new state ...
408: */
409: add_accept( $1,
410: num_rules | YY_TRAILING_HEAD_MASK );
411: variable_trail_rule = true;
412: }
413:
414: else
415: trailcnt = rulelen;
416:
417: $$ = link_machines( $1, $2 );
418: }
419:
420: | re2 re '$'
421: { synerr( "trailing context used twice" ); }
422:
423: | re '$'
424: {
425: headcnt = 0;
426: trailcnt = 1;
427: rulelen = 1;
428: varlength = false;
429:
430: current_state_type = STATE_TRAILING_CONTEXT;
431:
432: if ( trlcontxt )
433: {
434: synerr( "trailing context used twice" );
435: $$ = mkstate( SYM_EPSILON );
436: }
437:
438: else if ( previous_continued_action )
439: {
440: /* See the comment in the rule for "re2 re"
441: * above.
442: */
443: warn(
444: "trailing context made variable due to preceding '|' action" );
445:
446: varlength = true;
447: }
448:
449: if ( lex_compat || varlength )
450: {
451: /* Again, see the comment in the rule for
452: * "re2 re" above.
453: */
454: add_accept( $1,
455: num_rules | YY_TRAILING_HEAD_MASK );
456: variable_trail_rule = true;
457: }
458:
459: trlcontxt = true;
460:
461: eps = mkstate( SYM_EPSILON );
462: $$ = link_machines( $1,
463: link_machines( eps, mkstate( '\n' ) ) );
464: }
465:
466: | re
467: {
468: $$ = $1;
469:
470: if ( trlcontxt )
471: {
472: if ( lex_compat || (varlength && headcnt == 0) )
473: /* Both head and trail are
474: * variable-length.
475: */
476: variable_trail_rule = true;
477: else
478: trailcnt = rulelen;
479: }
480: }
481: ;
482:
483:
484: re : re '|' series
485: {
486: varlength = true;
487: $$ = mkor( $1, $3 );
488: }
489:
490: | series
491: { $$ = $1; }
492: ;
493:
494:
495: re2 : re '/'
496: {
497: /* This rule is written separately so the
498: * reduction will occur before the trailing
499: * series is parsed.
500: */
501:
502: if ( trlcontxt )
503: synerr( "trailing context used twice" );
504: else
505: trlcontxt = true;
506:
507: if ( varlength )
508: /* We hope the trailing context is
509: * fixed-length.
510: */
511: varlength = false;
512: else
513: headcnt = rulelen;
514:
515: rulelen = 0;
516:
517: current_state_type = STATE_TRAILING_CONTEXT;
518: $$ = $1;
519: }
520: ;
521:
522: series : series singleton
523: {
524: /* This is where concatenation of adjacent patterns
525: * gets done.
526: */
527: $$ = link_machines( $1, $2 );
528: }
529:
530: | singleton
531: { $$ = $1; }
532: ;
533:
534: singleton : singleton '*'
535: {
536: varlength = true;
537:
538: $$ = mkclos( $1 );
539: }
540:
541: | singleton '+'
542: {
543: varlength = true;
544: $$ = mkposcl( $1 );
545: }
546:
547: | singleton '?'
548: {
549: varlength = true;
550: $$ = mkopt( $1 );
551: }
552:
553: | singleton '{' NUMBER ',' NUMBER '}'
554: {
555: varlength = true;
556:
557: if ( $3 > $5 || $3 < 0 )
558: {
559: synerr( "bad iteration values" );
560: $$ = $1;
561: }
562: else
563: {
564: if ( $3 == 0 )
565: {
566: if ( $5 <= 0 )
567: {
568: synerr(
569: "bad iteration values" );
570: $$ = $1;
571: }
572: else
573: $$ = mkopt(
574: mkrep( $1, 1, $5 ) );
575: }
576: else
577: $$ = mkrep( $1, $3, $5 );
578: }
579: }
580:
581: | singleton '{' NUMBER ',' '}'
582: {
583: varlength = true;
584:
585: if ( $3 <= 0 )
586: {
587: synerr( "iteration value must be positive" );
588: $$ = $1;
589: }
590:
591: else
592: $$ = mkrep( $1, $3, INFINITY );
593: }
594:
595: | singleton '{' NUMBER '}'
596: {
597: /* The singleton could be something like "(foo)",
598: * in which case we have no idea what its length
599: * is, so we punt here.
600: */
601: varlength = true;
602:
603: if ( $3 <= 0 )
604: {
605: synerr( "iteration value must be positive" );
606: $$ = $1;
607: }
608:
609: else
610: $$ = link_machines( $1,
611: copysingl( $1, $3 - 1 ) );
612: }
613:
614: | '.'
615: {
616: if ( ! madeany )
617: {
618: /* Create the '.' character class. */
619: anyccl = cclinit();
620: ccladd( anyccl, '\n' );
621: cclnegate( anyccl );
622:
623: if ( useecs )
624: mkeccl( ccltbl + cclmap[anyccl],
625: ccllen[anyccl], nextecm,
626: ecgroup, csize, csize );
627:
628: madeany = true;
629: }
630:
631: ++rulelen;
632:
633: $$ = mkstate( -anyccl );
634: }
635:
636: | fullccl
637: {
638: if ( ! cclsorted )
639: /* Sort characters for fast searching. We
640: * use a shell sort since this list could
641: * be large.
642: */
643: cshell( ccltbl + cclmap[$1], ccllen[$1], true );
644:
645: if ( useecs )
646: mkeccl( ccltbl + cclmap[$1], ccllen[$1],
647: nextecm, ecgroup, csize, csize );
648:
649: ++rulelen;
650:
651: $$ = mkstate( -$1 );
652: }
653:
654: | PREVCCL
655: {
656: ++rulelen;
657:
658: $$ = mkstate( -$1 );
659: }
660:
661: | '"' string '"'
662: { $$ = $2; }
663:
664: | '(' re ')'
665: { $$ = $2; }
666:
667: | CHAR
668: {
669: ++rulelen;
670:
671: if ( caseins && $1 >= 'A' && $1 <= 'Z' )
672: $1 = clower( $1 );
673:
674: $$ = mkstate( $1 );
675: }
676: ;
677:
678: fullccl : '[' ccl ']'
679: { $$ = $2; }
680:
681: | '[' '^' ccl ']'
682: {
683: cclnegate( $3 );
684: $$ = $3;
685: }
686: ;
687:
688: ccl : ccl CHAR '-' CHAR
689: {
690: if ( caseins )
691: {
692: if ( $2 >= 'A' && $2 <= 'Z' )
693: $2 = clower( $2 );
694: if ( $4 >= 'A' && $4 <= 'Z' )
695: $4 = clower( $4 );
696: }
697:
698: if ( $2 > $4 )
699: synerr( "negative range in character class" );
700:
701: else
702: {
703: for ( i = $2; i <= $4; ++i )
704: ccladd( $1, i );
705:
706: /* Keep track if this ccl is staying in
707: * alphabetical order.
708: */
709: cclsorted = cclsorted && ($2 > lastchar);
710: lastchar = $4;
711: }
712:
713: $$ = $1;
714: }
715:
716: | ccl CHAR
717: {
718: if ( caseins && $2 >= 'A' && $2 <= 'Z' )
719: $2 = clower( $2 );
720:
721: ccladd( $1, $2 );
722: cclsorted = cclsorted && ($2 > lastchar);
723: lastchar = $2;
724: $$ = $1;
725: }
726:
727: | ccl ccl_expr
728: {
729: /* Too hard to properly maintain cclsorted. */
730: cclsorted = false;
731: $$ = $1;
732: }
733:
734: |
735: {
736: cclsorted = true;
737: lastchar = 0;
738: currccl = $$ = cclinit();
739: }
740: ;
741:
742: ccl_expr: CCE_ALNUM { CCL_EXPR(isalnum) }
743: | CCE_ALPHA { CCL_EXPR(isalpha) }
744: | CCE_BLANK { CCL_EXPR(IS_BLANK) }
745: | CCE_CNTRL { CCL_EXPR(iscntrl) }
746: | CCE_DIGIT { CCL_EXPR(isdigit) }
747: | CCE_GRAPH { CCL_EXPR(isgraph) }
748: | CCE_LOWER { CCL_EXPR(islower) }
749: | CCE_PRINT { CCL_EXPR(isprint) }
750: | CCE_PUNCT { CCL_EXPR(ispunct) }
751: | CCE_SPACE { CCL_EXPR(isspace) }
752: | CCE_UPPER {
753: if ( caseins )
754: CCL_EXPR(islower)
755: else
756: CCL_EXPR(isupper)
757: }
758: | CCE_XDIGIT { CCL_EXPR(isxdigit) }
759: ;
760:
761: string : string CHAR
762: {
763: if ( caseins && $2 >= 'A' && $2 <= 'Z' )
764: $2 = clower( $2 );
765:
766: ++rulelen;
767:
768: $$ = link_machines( $1, mkstate( $2 ) );
769: }
770:
771: |
772: { $$ = mkstate( SYM_EPSILON ); }
773: ;
774:
775: %%
776:
777:
778: /* build_eof_action - build the "<<EOF>>" action for the active start
779: * conditions
780: */
781:
782: void build_eof_action()
783: {
784: register int i;
785: char action_text[MAXLINE];
786:
787: for ( i = 1; i <= scon_stk_ptr; ++i )
788: {
789: if ( sceof[scon_stk[i]] )
790: format_pinpoint_message(
791: "multiple <<EOF>> rules for start condition %s",
792: scname[scon_stk[i]] );
793:
794: else
795: {
796: sceof[scon_stk[i]] = true;
797: sprintf( action_text, "case YY_STATE_EOF(%s):\n",
798: scname[scon_stk[i]] );
799: add_action( action_text );
800: }
801: }
802:
803: line_directive_out( (FILE *) 0, 1 );
804:
805: /* This isn't a normal rule after all - don't count it as
806: * such, so we don't have any holes in the rule numbering
807: * (which make generating "rule can never match" warnings
808: * more difficult.
809: */
810: --num_rules;
811: ++num_eof_rules;
812: }
813:
814:
815: /* format_synerr - write out formatted syntax error */
816:
817: void format_synerr( msg, arg )
818: char msg[], arg[];
819: {
820: char errmsg[MAXLINE];
821:
822: (void) sprintf( errmsg, msg, arg );
823: synerr( errmsg );
824: }
825:
826:
827: /* synerr - report a syntax error */
828:
829: void synerr( str )
830: char str[];
831: {
832: syntaxerror = true;
833: pinpoint_message( str );
834: }
835:
836:
837: /* format_warn - write out formatted warning */
838:
839: void format_warn( msg, arg )
840: char msg[], arg[];
841: {
842: char warn_msg[MAXLINE];
843:
844: (void) sprintf( warn_msg, msg, arg );
845: warn( warn_msg );
846: }
847:
848:
849: /* warn - report a warning, unless -w was given */
850:
851: void warn( str )
852: char str[];
853: {
854: line_warning( str, linenum );
855: }
856:
857: /* format_pinpoint_message - write out a message formatted with one string,
858: * pinpointing its location
859: */
860:
861: void format_pinpoint_message( msg, arg )
862: char msg[], arg[];
863: {
864: char errmsg[MAXLINE];
865:
866: (void) sprintf( errmsg, msg, arg );
867: pinpoint_message( errmsg );
868: }
869:
870:
871: /* pinpoint_message - write out a message, pinpointing its location */
872:
873: void pinpoint_message( str )
874: char str[];
875: {
876: line_pinpoint( str, linenum );
877: }
878:
879:
880: /* line_warning - report a warning at a given line, unless -w was given */
881:
882: void line_warning( str, line )
883: char str[];
884: int line;
885: {
886: char warning[MAXLINE];
887:
888: if ( ! nowarn )
889: {
890: sprintf( warning, "warning, %s", str );
891: line_pinpoint( warning, line );
892: }
893: }
894:
895:
896: /* line_pinpoint - write out a message, pinpointing it at the given line */
897:
898: void line_pinpoint( str, line )
899: char str[];
900: int line;
901: {
902: fprintf( stderr, "\"%s\", line %d: %s\n", infilename, line, str );
903: }
904:
905:
906: /* yyerror - eat up an error message from the parser;
907: * currently, messages are ignore
908: */
909:
910: void yyerror( msg )
911: char msg[];
912: {
913: }