Annotation of src/usr.bin/lex/scan.l, Revision 1.8
1.8 ! millert 1: /* $OpenBSD: scan.l,v 1.7 2002/05/31 22:49:29 deraadt Exp $ */
1.2 deraadt 2:
1.1 deraadt 3: /* scan.l - scanner for flex input */
4:
5: %{
6: /*-
7: * Copyright (c) 1990 The Regents of the University of California.
8: * All rights reserved.
9: *
10: * This code is derived from software contributed to Berkeley by
11: * Vern Paxson.
12: *
13: * The United States Government has rights in this work pursuant
14: * to contract no. DE-AC03-76SF00098 between the United States
15: * Department of Energy and the University of California.
16: *
1.4 deraadt 17: * Redistribution and use in source and binary forms, with or without
1.8 ! millert 18: * modification, are permitted provided that the following conditions
! 19: * are met:
! 20: *
! 21: * 1. Redistributions of source code must retain the above copyright
! 22: * notice, this list of conditions and the following disclaimer.
! 23: * 2. Redistributions in binary form must reproduce the above copyright
! 24: * notice, this list of conditions and the following disclaimer in the
! 25: * documentation and/or other materials provided with the distribution.
! 26: *
! 27: * Neither the name of the University nor the names of its contributors
! 28: * may be used to endorse or promote products derived from this software
! 29: * without specific prior written permission.
! 30: *
! 31: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
! 32: * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
! 33: * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! 34: * PURPOSE.
1.1 deraadt 35: */
36:
1.8 ! millert 37: /* $Header: /cvs/src/usr.bin/lex/scan.l,v 1.7 2002/05/31 22:49:29 deraadt Exp $ */
1.1 deraadt 38:
39: #include "flexdef.h"
40: #include "parse.h"
41:
42: #define ACTION_ECHO add_action( yytext )
43: #define ACTION_IFDEF(def, should_define) \
44: { \
45: if ( should_define ) \
46: action_define( def, 1 ); \
47: }
48:
49: #define MARK_END_OF_PROLOG mark_prolog();
50:
51: #define YY_DECL \
52: int flexscan()
53:
54: #define RETURNCHAR \
55: yylval = (unsigned char) yytext[0]; \
56: return CHAR;
57:
58: #define RETURNNAME \
1.7 deraadt 59: strlcpy( nmstr, yytext, sizeof nmstr); \
1.1 deraadt 60: return NAME;
61:
62: #define PUT_BACK_STRING(str, start) \
63: for ( i = strlen( str ) - 1; i >= start; --i ) \
64: unput((str)[i])
65:
66: #define CHECK_REJECT(str) \
67: if ( all_upper( str ) ) \
68: reject = true;
69:
70: #define CHECK_YYMORE(str) \
71: if ( all_lower( str ) ) \
72: yymore_used = true;
73: %}
74:
75: %option caseless nodefault outfile="scan.c" stack noyy_top_state
76: %option nostdinit
77:
78: %x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE
79: %x FIRSTCCL CCL ACTION RECOVER COMMENT ACTION_STRING PERCENT_BRACE_ACTION
80: %x OPTION LINEDIR
81:
82: WS [[:blank:]]+
83: OPTWS [[:blank:]]*
84: NOT_WS [^[:blank:]\n]
85:
86: NL \r?\n
87:
88: NAME ([[:alpha:]_][[:alnum:]_-]*)
89: NOT_NAME [^[:alpha:]_*\n]+
90:
91: SCNAME {NAME}
92:
93: ESCSEQ (\\([^\n]|[0-7]{1,3}|x[[:xdigit:]]{1,2}))
94:
95: FIRST_CCL_CHAR ([^\\\n]|{ESCSEQ})
96: CCL_CHAR ([^\\\n\]]|{ESCSEQ})
97: CCL_EXPR ("[:"[[:alpha:]]+":]")
98:
99: LEXOPT [aceknopr]
100:
101: %%
102: static int bracelevel, didadef, indented_code;
103: static int doing_rule_action = false;
104: static int option_sense;
105:
106: int doing_codeblock = false;
107: int i;
108: Char nmdef[MAXLINE], myesc();
109:
110:
111: <INITIAL>{
112: ^{WS} indented_code = true; BEGIN(CODEBLOCK);
113: ^"/*" ACTION_ECHO; yy_push_state( COMMENT );
114: ^#{OPTWS}line{WS} yy_push_state( LINEDIR );
115: ^"%s"{NAME}? return SCDECL;
116: ^"%x"{NAME}? return XSCDECL;
117: ^"%{".*{NL} {
118: ++linenum;
119: line_directive_out( (FILE *) 0, 1 );
120: indented_code = false;
121: BEGIN(CODEBLOCK);
122: }
123:
124: {WS} /* discard */
125:
126: ^"%%".* {
127: sectnum = 2;
128: bracelevel = 0;
129: mark_defs1();
130: line_directive_out( (FILE *) 0, 1 );
131: BEGIN(SECT2PROLOG);
132: return SECTEND;
133: }
134:
135: ^"%pointer".*{NL} yytext_is_array = false; ++linenum;
136: ^"%array".*{NL} yytext_is_array = true; ++linenum;
137:
138: ^"%option" BEGIN(OPTION); return OPTION_OP;
139:
140: ^"%"{LEXOPT}{OPTWS}[[:digit:]]*{OPTWS}{NL} ++linenum; /* ignore */
141: ^"%"{LEXOPT}{WS}.*{NL} ++linenum; /* ignore */
142:
143: ^"%"[^sxaceknopr{}].* synerr( _( "unrecognized '%' directive" ) );
144:
145: ^{NAME} {
1.7 deraadt 146: strlcpy( nmstr, yytext, sizeof nmstr );
1.1 deraadt 147: didadef = false;
148: BEGIN(PICKUPDEF);
149: }
150:
151: {SCNAME} RETURNNAME;
152: ^{OPTWS}{NL} ++linenum; /* allows blank lines in section 1 */
153: {OPTWS}{NL} ACTION_ECHO; ++linenum; /* maybe end of comment line */
154: }
155:
156:
157: <COMMENT>{
158: "*/" ACTION_ECHO; yy_pop_state();
159: "*" ACTION_ECHO;
160: [^*\n]+ ACTION_ECHO;
161: [^*\n]*{NL} ++linenum; ACTION_ECHO;
162: }
163:
164: <LINEDIR>{
165: \n yy_pop_state();
166: [[:digit:]]+ linenum = myctoi( yytext );
167:
168: \"[^"\n]*\" {
169: flex_free( (void *) infilename );
170: infilename = copy_string( yytext + 1 );
171: infilename[strlen( infilename ) - 1] = '\0';
172: }
173: . /* ignore spurious characters */
174: }
175:
176: <CODEBLOCK>{
177: ^"%}".*{NL} ++linenum; BEGIN(INITIAL);
178:
179: {NAME}|{NOT_NAME}|. ACTION_ECHO;
180:
181: {NL} {
182: ++linenum;
183: ACTION_ECHO;
184: if ( indented_code )
185: BEGIN(INITIAL);
186: }
187: }
188:
189:
190: <PICKUPDEF>{
191: {WS} /* separates name and definition */
192:
193: {NOT_WS}.* {
1.7 deraadt 194: strlcpy( (char *) nmdef, yytext, sizeof nmdef);
1.1 deraadt 195:
196: /* Skip trailing whitespace. */
197: for ( i = strlen( (char *) nmdef ) - 1;
198: i >= 0 && (nmdef[i] == ' ' || nmdef[i] == '\t');
199: --i )
200: ;
201:
202: nmdef[i + 1] = '\0';
203:
204: ndinstal( nmstr, nmdef );
205: didadef = true;
206: }
207:
208: {NL} {
209: if ( ! didadef )
210: synerr( _( "incomplete name definition" ) );
211: BEGIN(INITIAL);
212: ++linenum;
213: }
214: }
215:
216:
217: <OPTION>{
218: {NL} ++linenum; BEGIN(INITIAL);
219: {WS} option_sense = true;
220:
221: "=" return '=';
222:
223: no option_sense = ! option_sense;
224:
225: 7bit csize = option_sense ? 128 : 256;
226: 8bit csize = option_sense ? 256 : 128;
227:
228: align long_align = option_sense;
229: always-interactive {
230: action_define( "YY_ALWAYS_INTERACTIVE", option_sense );
231: }
232: array yytext_is_array = option_sense;
233: backup backing_up_report = option_sense;
234: batch interactive = ! option_sense;
235: "c++" C_plus_plus = option_sense;
236: caseful|case-sensitive caseins = ! option_sense;
237: caseless|case-insensitive caseins = option_sense;
238: debug ddebug = option_sense;
239: default spprdflt = ! option_sense;
240: ecs useecs = option_sense;
241: fast {
242: useecs = usemecs = false;
243: use_read = fullspd = true;
244: }
245: full {
246: useecs = usemecs = false;
247: use_read = fulltbl = true;
248: }
249: input ACTION_IFDEF("YY_NO_INPUT", ! option_sense);
250: interactive interactive = option_sense;
251: lex-compat lex_compat = option_sense;
252: main {
253: action_define( "YY_MAIN", option_sense );
254: do_yywrap = ! option_sense;
255: }
256: meta-ecs usemecs = option_sense;
257: never-interactive {
258: action_define( "YY_NEVER_INTERACTIVE", option_sense );
259: }
260: perf-report performance_report += option_sense ? 1 : -1;
261: pointer yytext_is_array = ! option_sense;
262: read use_read = option_sense;
263: reject reject_really_used = option_sense;
264: stack action_define( "YY_STACK_USED", option_sense );
265: stdinit do_stdinit = option_sense;
266: stdout use_stdout = option_sense;
267: unput ACTION_IFDEF("YY_NO_UNPUT", ! option_sense);
268: verbose printstats = option_sense;
269: warn nowarn = ! option_sense;
270: yylineno do_yylineno = option_sense;
271: yymore yymore_really_used = option_sense;
272: yywrap do_yywrap = option_sense;
273:
274: yy_push_state ACTION_IFDEF("YY_NO_PUSH_STATE", ! option_sense);
275: yy_pop_state ACTION_IFDEF("YY_NO_POP_STATE", ! option_sense);
276: yy_top_state ACTION_IFDEF("YY_NO_TOP_STATE", ! option_sense);
277:
278: yy_scan_buffer ACTION_IFDEF("YY_NO_SCAN_BUFFER", ! option_sense);
279: yy_scan_bytes ACTION_IFDEF("YY_NO_SCAN_BYTES", ! option_sense);
280: yy_scan_string ACTION_IFDEF("YY_NO_SCAN_STRING", ! option_sense);
281:
282: outfile return OPT_OUTFILE;
283: prefix return OPT_PREFIX;
284: yyclass return OPT_YYCLASS;
285:
286: \"[^"\n]*\" {
1.7 deraadt 287: strlcpy( nmstr, yytext + 1, sizeof nmstr);
1.6 deraadt 288: nmstr[strlen( nmstr ) - 1] = '\0';
1.1 deraadt 289: return NAME;
290: }
291:
292: (([a-mo-z]|n[a-np-z])[[:alpha:]\-+]*)|. {
293: format_synerr( _( "unrecognized %%option: %s" ),
294: yytext );
295: BEGIN(RECOVER);
296: }
297: }
298:
299: <RECOVER>.*{NL} ++linenum; BEGIN(INITIAL);
300:
301:
302: <SECT2PROLOG>{
303: ^"%{".* ++bracelevel; yyless( 2 ); /* eat only %{ */
304: ^"%}".* --bracelevel; yyless( 2 ); /* eat only %} */
305:
306: ^{WS}.* ACTION_ECHO; /* indented code in prolog */
307:
308: ^{NOT_WS}.* { /* non-indented code */
309: if ( bracelevel <= 0 )
310: { /* not in %{ ... %} */
311: yyless( 0 ); /* put it all back */
312: yy_set_bol( 1 );
313: mark_prolog();
314: BEGIN(SECT2);
315: }
316: else
317: ACTION_ECHO;
318: }
319:
320: .* ACTION_ECHO;
321: {NL} ++linenum; ACTION_ECHO;
322:
323: <<EOF>> {
324: mark_prolog();
325: sectnum = 0;
326: yyterminate(); /* to stop the parser */
327: }
328: }
329:
330: <SECT2>{
331: ^{OPTWS}{NL} ++linenum; /* allow blank lines in section 2 */
332:
333: ^{OPTWS}"%{" {
334: indented_code = false;
335: doing_codeblock = true;
336: bracelevel = 1;
337: BEGIN(PERCENT_BRACE_ACTION);
338: }
339:
340: ^{OPTWS}"<" BEGIN(SC); return '<';
341: ^{OPTWS}"^" return '^';
342: \" BEGIN(QUOTE); return '"';
343: "{"/[[:digit:]] BEGIN(NUM); return '{';
344: "$"/([[:blank:]]|{NL}) return '$';
345:
346: {WS}"%{" {
347: bracelevel = 1;
348: BEGIN(PERCENT_BRACE_ACTION);
349:
350: if ( in_rule )
351: {
352: doing_rule_action = true;
353: in_rule = false;
354: return '\n';
355: }
356: }
357: {WS}"|".*{NL} continued_action = true; ++linenum; return '\n';
358:
359: ^{WS}"/*" {
360: yyless( yyleng - 2 ); /* put back '/', '*' */
361: bracelevel = 0;
362: continued_action = false;
363: BEGIN(ACTION);
364: }
365:
366: ^{WS} /* allow indented rules */
367:
368: {WS} {
369: /* This rule is separate from the one below because
370: * otherwise we get variable trailing context, so
371: * we can't build the scanner using -{f,F}.
372: */
373: bracelevel = 0;
374: continued_action = false;
375: BEGIN(ACTION);
376:
377: if ( in_rule )
378: {
379: doing_rule_action = true;
380: in_rule = false;
381: return '\n';
382: }
383: }
384:
385: {OPTWS}{NL} {
386: bracelevel = 0;
387: continued_action = false;
388: BEGIN(ACTION);
389: unput( '\n' ); /* so <ACTION> sees it */
390:
391: if ( in_rule )
392: {
393: doing_rule_action = true;
394: in_rule = false;
395: return '\n';
396: }
397: }
398:
399: ^{OPTWS}"<<EOF>>" |
400: "<<EOF>>" return EOF_OP;
401:
402: ^"%%".* {
403: sectnum = 3;
404: BEGIN(SECT3);
405: yyterminate(); /* to stop the parser */
406: }
407:
408: "["({FIRST_CCL_CHAR}|{CCL_EXPR})({CCL_CHAR}|{CCL_EXPR})* {
409: int cclval;
410:
1.7 deraadt 411: strlcpy( nmstr, yytext, sizeof nmstr);
1.1 deraadt 412:
413: /* Check to see if we've already encountered this
414: * ccl.
415: */
416: if ( (cclval = ccllookup( (Char *) nmstr )) != 0 )
417: {
418: if ( input() != ']' )
419: synerr( _( "bad character class" ) );
420:
421: yylval = cclval;
422: ++cclreuse;
423: return PREVCCL;
424: }
425: else
426: {
427: /* We fudge a bit. We know that this ccl will
428: * soon be numbered as lastccl + 1 by cclinit.
429: */
430: cclinstal( (Char *) nmstr, lastccl + 1 );
431:
432: /* Push back everything but the leading bracket
433: * so the ccl can be rescanned.
434: */
435: yyless( 1 );
436:
437: BEGIN(FIRSTCCL);
438: return '[';
439: }
440: }
441:
442: "{"{NAME}"}" {
443: register Char *nmdefptr;
444: Char *ndlookup();
445:
1.7 deraadt 446: strlcpy( nmstr, yytext + 1, sizeof nmstr );
1.1 deraadt 447: nmstr[yyleng - 2] = '\0'; /* chop trailing brace */
448:
449: if ( (nmdefptr = ndlookup( nmstr )) == 0 )
450: format_synerr(
451: _( "undefined definition {%s}" ),
452: nmstr );
453:
454: else
455: { /* push back name surrounded by ()'s */
456: int len = strlen( (char *) nmdefptr );
457:
458: if ( lex_compat || nmdefptr[0] == '^' ||
459: (len > 0 && nmdefptr[len - 1] == '$') )
460: { /* don't use ()'s after all */
461: PUT_BACK_STRING((char *) nmdefptr, 0);
462:
463: if ( nmdefptr[0] == '^' )
464: BEGIN(CARETISBOL);
465: }
466:
467: else
468: {
469: unput(')');
470: PUT_BACK_STRING((char *) nmdefptr, 0);
471: unput('(');
472: }
473: }
474: }
475:
476: [/|*+?.(){}] return (unsigned char) yytext[0];
477: . RETURNCHAR;
478: }
479:
480:
481: <SC>{
482: [,*] return (unsigned char) yytext[0];
483: ">" BEGIN(SECT2); return '>';
484: ">"/^ BEGIN(CARETISBOL); return '>';
485: {SCNAME} RETURNNAME;
486: . {
487: format_synerr( _( "bad <start condition>: %s" ),
488: yytext );
489: }
490: }
491:
492: <CARETISBOL>"^" BEGIN(SECT2); return '^';
493:
494:
495: <QUOTE>{
496: [^"\n] RETURNCHAR;
497: \" BEGIN(SECT2); return '"';
498:
499: {NL} {
500: synerr( _( "missing quote" ) );
501: BEGIN(SECT2);
502: ++linenum;
503: return '"';
504: }
505: }
506:
507:
508: <FIRSTCCL>{
509: "^"/[^-\]\n] BEGIN(CCL); return '^';
510: "^"/("-"|"]") return '^';
511: . BEGIN(CCL); RETURNCHAR;
512: }
513:
514: <CCL>{
515: -/[^\]\n] return '-';
516: [^\]\n] RETURNCHAR;
517: "]" BEGIN(SECT2); return ']';
518: .|{NL} {
519: synerr( _( "bad character class" ) );
520: BEGIN(SECT2);
521: return ']';
522: }
523: }
524:
525: <FIRSTCCL,CCL>{
526: "[:alnum:]" BEGIN(CCL); return CCE_ALNUM;
527: "[:alpha:]" BEGIN(CCL); return CCE_ALPHA;
528: "[:blank:]" BEGIN(CCL); return CCE_BLANK;
529: "[:cntrl:]" BEGIN(CCL); return CCE_CNTRL;
530: "[:digit:]" BEGIN(CCL); return CCE_DIGIT;
531: "[:graph:]" BEGIN(CCL); return CCE_GRAPH;
532: "[:lower:]" BEGIN(CCL); return CCE_LOWER;
533: "[:print:]" BEGIN(CCL); return CCE_PRINT;
534: "[:punct:]" BEGIN(CCL); return CCE_PUNCT;
535: "[:space:]" BEGIN(CCL); return CCE_SPACE;
536: "[:upper:]" BEGIN(CCL); return CCE_UPPER;
537: "[:xdigit:]" BEGIN(CCL); return CCE_XDIGIT;
538: {CCL_EXPR} {
539: format_synerr(
540: _( "bad character class expression: %s" ),
541: yytext );
542: BEGIN(CCL); return CCE_ALNUM;
543: }
544: }
545:
546: <NUM>{
547: [[:digit:]]+ {
548: yylval = myctoi( yytext );
549: return NUMBER;
550: }
551:
552: "," return ',';
553: "}" BEGIN(SECT2); return '}';
554:
555: . {
556: synerr( _( "bad character inside {}'s" ) );
557: BEGIN(SECT2);
558: return '}';
559: }
560:
561: {NL} {
562: synerr( _( "missing }" ) );
563: BEGIN(SECT2);
564: ++linenum;
565: return '}';
566: }
567: }
568:
569:
570: <PERCENT_BRACE_ACTION>{
571: {OPTWS}"%}".* bracelevel = 0;
572:
573: <ACTION>"/*" ACTION_ECHO; yy_push_state( COMMENT );
574:
575: <CODEBLOCK,ACTION>{
576: "reject" {
577: ACTION_ECHO;
578: CHECK_REJECT(yytext);
579: }
580: "yymore" {
581: ACTION_ECHO;
582: CHECK_YYMORE(yytext);
583: }
584: }
585:
586: {NAME}|{NOT_NAME}|. ACTION_ECHO;
587: {NL} {
588: ++linenum;
589: ACTION_ECHO;
590: if ( bracelevel == 0 ||
591: (doing_codeblock && indented_code) )
592: {
593: if ( doing_rule_action )
594: add_action( "\tYY_BREAK\n" );
595:
596: doing_rule_action = doing_codeblock = false;
597: BEGIN(SECT2);
598: }
599: }
600: }
601:
602:
603: /* Reject and YYmore() are checked for above, in PERCENT_BRACE_ACTION */
604: <ACTION>{
605: "{" ACTION_ECHO; ++bracelevel;
606: "}" ACTION_ECHO; --bracelevel;
607: [^[:alpha:]_{}"'/\n]+ ACTION_ECHO;
608: {NAME} ACTION_ECHO;
609: "'"([^'\\\n]|\\.)*"'" ACTION_ECHO; /* character constant */
610: \" ACTION_ECHO; BEGIN(ACTION_STRING);
611: {NL} {
612: ++linenum;
613: ACTION_ECHO;
614: if ( bracelevel == 0 )
615: {
616: if ( doing_rule_action )
617: add_action( "\tYY_BREAK\n" );
618:
619: doing_rule_action = false;
620: BEGIN(SECT2);
621: }
622: }
623: . ACTION_ECHO;
624: }
625:
626: <ACTION_STRING>{
627: [^"\\\n]+ ACTION_ECHO;
628: \\. ACTION_ECHO;
629: {NL} ++linenum; ACTION_ECHO;
630: \" ACTION_ECHO; BEGIN(ACTION);
631: . ACTION_ECHO;
632: }
633:
634: <COMMENT,ACTION,ACTION_STRING><<EOF>> {
635: synerr( _( "EOF encountered inside an action" ) );
636: yyterminate();
637: }
638:
639:
640: <SECT2,QUOTE,FIRSTCCL,CCL>{ESCSEQ} {
641: yylval = myesc( (Char *) yytext );
642:
643: if ( YY_START == FIRSTCCL )
644: BEGIN(CCL);
645:
646: return CHAR;
647: }
648:
649:
650: <SECT3>{
651: .*(\n?) ECHO;
652: <<EOF>> sectnum = 0; yyterminate();
653: }
654:
655: <*>.|\n format_synerr( _( "bad character: %s" ), yytext );
656:
657: %%
658:
659:
660: int yywrap()
661: {
662: if ( --num_input_files > 0 )
663: {
664: set_input_file( *++input_files );
665: return 0;
666: }
667:
668: else
669: return 1;
670: }
671:
672:
673: /* set_input_file - open the given file (if NULL, stdin) for scanning */
674:
675: void set_input_file( file )
676: char *file;
677: {
678: if ( file && strcmp( file, "-" ) )
679: {
680: infilename = copy_string( file );
681: yyin = fopen( infilename, "r" );
682:
683: if ( yyin == NULL )
684: lerrsf( _( "can't open %s" ), file );
685: }
686:
687: else
688: {
689: yyin = stdin;
690: infilename = copy_string( "<stdin>" );
691: }
692:
693: linenum = 1;
694: }
695:
696:
697: /* Wrapper routines for accessing the scanner's malloc routines. */
698:
699: void *flex_alloc( size )
700: size_t size;
701: {
702: return (void *) malloc( size );
703: }
704:
705: void *flex_realloc( ptr, size )
706: void *ptr;
707: size_t size;
708: {
709: return (void *) realloc( ptr, size );
710: }
711:
712: void flex_free( ptr )
713: void *ptr;
714: {
715: if ( ptr )
716: free( ptr );
717: }