Annotation of src/usr.bin/lex/scan.l, Revision 1.5
1.5 ! deraadt 1: /* $OpenBSD: scan.l,v 1.4 2001/06/17 07:30:42 deraadt Exp $ */
1.2 deraadt 2:
1.1 deraadt 3: /* scan.l - scanner for flex input */
4:
5: %{
6: /*-
7: * Copyright (c) 1990 The Regents of the University of California.
8: * All rights reserved.
9: *
10: * This code is derived from software contributed to Berkeley by
11: * Vern Paxson.
12: *
13: * The United States Government has rights in this work pursuant
14: * to contract no. DE-AC03-76SF00098 between the United States
15: * Department of Energy and the University of California.
16: *
1.4 deraadt 17: * Redistribution and use in source and binary forms, with or without
18: * modification, are permitted provided that: (1) source distributions
19: * retain this entire copyright notice and comment, and (2) distributions
20: * including binaries display the following acknowledgement: ``This product
21: * includes software developed by the University of California, Berkeley
22: * and its contributors'' in the documentation or other materials provided
23: * with the distribution and in all advertising materials mentioning
24: * features or use of this software. Neither the name of the University nor
25: * the names of its contributors may be used to endorse or promote products
26: * derived from this software without specific prior written permission.
1.1 deraadt 27: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
28: * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
29: * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
30: */
31:
1.5 ! deraadt 32: /* $Header: /cvs/src/usr.bin/lex/scan.l,v 1.4 2001/06/17 07:30:42 deraadt Exp $ */
1.1 deraadt 33:
34: #include "flexdef.h"
35: #include "parse.h"
36:
37: #define ACTION_ECHO add_action( yytext )
38: #define ACTION_IFDEF(def, should_define) \
39: { \
40: if ( should_define ) \
41: action_define( def, 1 ); \
42: }
43:
44: #define MARK_END_OF_PROLOG mark_prolog();
45:
46: #define YY_DECL \
47: int flexscan()
48:
49: #define RETURNCHAR \
50: yylval = (unsigned char) yytext[0]; \
51: return CHAR;
52:
53: #define RETURNNAME \
1.5 ! deraadt 54: strlcpy( nmstr, yytext, sizeof nmstr); \
1.1 deraadt 55: return NAME;
56:
57: #define PUT_BACK_STRING(str, start) \
58: for ( i = strlen( str ) - 1; i >= start; --i ) \
59: unput((str)[i])
60:
61: #define CHECK_REJECT(str) \
62: if ( all_upper( str ) ) \
63: reject = true;
64:
65: #define CHECK_YYMORE(str) \
66: if ( all_lower( str ) ) \
67: yymore_used = true;
68: %}
69:
70: %option caseless nodefault outfile="scan.c" stack noyy_top_state
71: %option nostdinit
72:
73: %x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE
74: %x FIRSTCCL CCL ACTION RECOVER COMMENT ACTION_STRING PERCENT_BRACE_ACTION
75: %x OPTION LINEDIR
76:
77: WS [[:blank:]]+
78: OPTWS [[:blank:]]*
79: NOT_WS [^[:blank:]\n]
80:
81: NL \r?\n
82:
83: NAME ([[:alpha:]_][[:alnum:]_-]*)
84: NOT_NAME [^[:alpha:]_*\n]+
85:
86: SCNAME {NAME}
87:
88: ESCSEQ (\\([^\n]|[0-7]{1,3}|x[[:xdigit:]]{1,2}))
89:
90: FIRST_CCL_CHAR ([^\\\n]|{ESCSEQ})
91: CCL_CHAR ([^\\\n\]]|{ESCSEQ})
92: CCL_EXPR ("[:"[[:alpha:]]+":]")
93:
94: LEXOPT [aceknopr]
95:
96: %%
97: static int bracelevel, didadef, indented_code;
98: static int doing_rule_action = false;
99: static int option_sense;
100:
101: int doing_codeblock = false;
102: int i;
103: Char nmdef[MAXLINE], myesc();
104:
105:
106: <INITIAL>{
107: ^{WS} indented_code = true; BEGIN(CODEBLOCK);
108: ^"/*" ACTION_ECHO; yy_push_state( COMMENT );
109: ^#{OPTWS}line{WS} yy_push_state( LINEDIR );
110: ^"%s"{NAME}? return SCDECL;
111: ^"%x"{NAME}? return XSCDECL;
112: ^"%{".*{NL} {
113: ++linenum;
114: line_directive_out( (FILE *) 0, 1 );
115: indented_code = false;
116: BEGIN(CODEBLOCK);
117: }
118:
119: {WS} /* discard */
120:
121: ^"%%".* {
122: sectnum = 2;
123: bracelevel = 0;
124: mark_defs1();
125: line_directive_out( (FILE *) 0, 1 );
126: BEGIN(SECT2PROLOG);
127: return SECTEND;
128: }
129:
130: ^"%pointer".*{NL} yytext_is_array = false; ++linenum;
131: ^"%array".*{NL} yytext_is_array = true; ++linenum;
132:
133: ^"%option" BEGIN(OPTION); return OPTION_OP;
134:
135: ^"%"{LEXOPT}{OPTWS}[[:digit:]]*{OPTWS}{NL} ++linenum; /* ignore */
136: ^"%"{LEXOPT}{WS}.*{NL} ++linenum; /* ignore */
137:
138: ^"%"[^sxaceknopr{}].* synerr( _( "unrecognized '%' directive" ) );
139:
140: ^{NAME} {
1.5 ! deraadt 141: strlcpy( nmstr, yytext, sizeof nmstr );
1.1 deraadt 142: didadef = false;
143: BEGIN(PICKUPDEF);
144: }
145:
146: {SCNAME} RETURNNAME;
147: ^{OPTWS}{NL} ++linenum; /* allows blank lines in section 1 */
148: {OPTWS}{NL} ACTION_ECHO; ++linenum; /* maybe end of comment line */
149: }
150:
151:
152: <COMMENT>{
153: "*/" ACTION_ECHO; yy_pop_state();
154: "*" ACTION_ECHO;
155: [^*\n]+ ACTION_ECHO;
156: [^*\n]*{NL} ++linenum; ACTION_ECHO;
157: }
158:
159: <LINEDIR>{
160: \n yy_pop_state();
161: [[:digit:]]+ linenum = myctoi( yytext );
162:
163: \"[^"\n]*\" {
164: flex_free( (void *) infilename );
165: infilename = copy_string( yytext + 1 );
166: infilename[strlen( infilename ) - 1] = '\0';
167: }
168: . /* ignore spurious characters */
169: }
170:
171: <CODEBLOCK>{
172: ^"%}".*{NL} ++linenum; BEGIN(INITIAL);
173:
174: {NAME}|{NOT_NAME}|. ACTION_ECHO;
175:
176: {NL} {
177: ++linenum;
178: ACTION_ECHO;
179: if ( indented_code )
180: BEGIN(INITIAL);
181: }
182: }
183:
184:
185: <PICKUPDEF>{
186: {WS} /* separates name and definition */
187:
188: {NOT_WS}.* {
1.5 ! deraadt 189: strlcpy( (char *) nmdef, yytext, sizeof nmdef);
1.1 deraadt 190:
191: /* Skip trailing whitespace. */
192: for ( i = strlen( (char *) nmdef ) - 1;
193: i >= 0 && (nmdef[i] == ' ' || nmdef[i] == '\t');
194: --i )
195: ;
196:
197: nmdef[i + 1] = '\0';
198:
199: ndinstal( nmstr, nmdef );
200: didadef = true;
201: }
202:
203: {NL} {
204: if ( ! didadef )
205: synerr( _( "incomplete name definition" ) );
206: BEGIN(INITIAL);
207: ++linenum;
208: }
209: }
210:
211:
212: <OPTION>{
213: {NL} ++linenum; BEGIN(INITIAL);
214: {WS} option_sense = true;
215:
216: "=" return '=';
217:
218: no option_sense = ! option_sense;
219:
220: 7bit csize = option_sense ? 128 : 256;
221: 8bit csize = option_sense ? 256 : 128;
222:
223: align long_align = option_sense;
224: always-interactive {
225: action_define( "YY_ALWAYS_INTERACTIVE", option_sense );
226: }
227: array yytext_is_array = option_sense;
228: backup backing_up_report = option_sense;
229: batch interactive = ! option_sense;
230: "c++" C_plus_plus = option_sense;
231: caseful|case-sensitive caseins = ! option_sense;
232: caseless|case-insensitive caseins = option_sense;
233: debug ddebug = option_sense;
234: default spprdflt = ! option_sense;
235: ecs useecs = option_sense;
236: fast {
237: useecs = usemecs = false;
238: use_read = fullspd = true;
239: }
240: full {
241: useecs = usemecs = false;
242: use_read = fulltbl = true;
243: }
244: input ACTION_IFDEF("YY_NO_INPUT", ! option_sense);
245: interactive interactive = option_sense;
246: lex-compat lex_compat = option_sense;
247: main {
248: action_define( "YY_MAIN", option_sense );
249: do_yywrap = ! option_sense;
250: }
251: meta-ecs usemecs = option_sense;
252: never-interactive {
253: action_define( "YY_NEVER_INTERACTIVE", option_sense );
254: }
255: perf-report performance_report += option_sense ? 1 : -1;
256: pointer yytext_is_array = ! option_sense;
257: read use_read = option_sense;
258: reject reject_really_used = option_sense;
259: stack action_define( "YY_STACK_USED", option_sense );
260: stdinit do_stdinit = option_sense;
261: stdout use_stdout = option_sense;
262: unput ACTION_IFDEF("YY_NO_UNPUT", ! option_sense);
263: verbose printstats = option_sense;
264: warn nowarn = ! option_sense;
265: yylineno do_yylineno = option_sense;
266: yymore yymore_really_used = option_sense;
267: yywrap do_yywrap = option_sense;
268:
269: yy_push_state ACTION_IFDEF("YY_NO_PUSH_STATE", ! option_sense);
270: yy_pop_state ACTION_IFDEF("YY_NO_POP_STATE", ! option_sense);
271: yy_top_state ACTION_IFDEF("YY_NO_TOP_STATE", ! option_sense);
272:
273: yy_scan_buffer ACTION_IFDEF("YY_NO_SCAN_BUFFER", ! option_sense);
274: yy_scan_bytes ACTION_IFDEF("YY_NO_SCAN_BYTES", ! option_sense);
275: yy_scan_string ACTION_IFDEF("YY_NO_SCAN_STRING", ! option_sense);
276:
277: outfile return OPT_OUTFILE;
278: prefix return OPT_PREFIX;
279: yyclass return OPT_YYCLASS;
280:
281: \"[^"\n]*\" {
1.5 ! deraadt 282: strlcpy( nmstr, yytext + 1, sizeof nmstr);
1.1 deraadt 283: return NAME;
284: }
285:
286: (([a-mo-z]|n[a-np-z])[[:alpha:]\-+]*)|. {
287: format_synerr( _( "unrecognized %%option: %s" ),
288: yytext );
289: BEGIN(RECOVER);
290: }
291: }
292:
293: <RECOVER>.*{NL} ++linenum; BEGIN(INITIAL);
294:
295:
296: <SECT2PROLOG>{
297: ^"%{".* ++bracelevel; yyless( 2 ); /* eat only %{ */
298: ^"%}".* --bracelevel; yyless( 2 ); /* eat only %} */
299:
300: ^{WS}.* ACTION_ECHO; /* indented code in prolog */
301:
302: ^{NOT_WS}.* { /* non-indented code */
303: if ( bracelevel <= 0 )
304: { /* not in %{ ... %} */
305: yyless( 0 ); /* put it all back */
306: yy_set_bol( 1 );
307: mark_prolog();
308: BEGIN(SECT2);
309: }
310: else
311: ACTION_ECHO;
312: }
313:
314: .* ACTION_ECHO;
315: {NL} ++linenum; ACTION_ECHO;
316:
317: <<EOF>> {
318: mark_prolog();
319: sectnum = 0;
320: yyterminate(); /* to stop the parser */
321: }
322: }
323:
324: <SECT2>{
325: ^{OPTWS}{NL} ++linenum; /* allow blank lines in section 2 */
326:
327: ^{OPTWS}"%{" {
328: indented_code = false;
329: doing_codeblock = true;
330: bracelevel = 1;
331: BEGIN(PERCENT_BRACE_ACTION);
332: }
333:
334: ^{OPTWS}"<" BEGIN(SC); return '<';
335: ^{OPTWS}"^" return '^';
336: \" BEGIN(QUOTE); return '"';
337: "{"/[[:digit:]] BEGIN(NUM); return '{';
338: "$"/([[:blank:]]|{NL}) return '$';
339:
340: {WS}"%{" {
341: bracelevel = 1;
342: BEGIN(PERCENT_BRACE_ACTION);
343:
344: if ( in_rule )
345: {
346: doing_rule_action = true;
347: in_rule = false;
348: return '\n';
349: }
350: }
351: {WS}"|".*{NL} continued_action = true; ++linenum; return '\n';
352:
353: ^{WS}"/*" {
354: yyless( yyleng - 2 ); /* put back '/', '*' */
355: bracelevel = 0;
356: continued_action = false;
357: BEGIN(ACTION);
358: }
359:
360: ^{WS} /* allow indented rules */
361:
362: {WS} {
363: /* This rule is separate from the one below because
364: * otherwise we get variable trailing context, so
365: * we can't build the scanner using -{f,F}.
366: */
367: bracelevel = 0;
368: continued_action = false;
369: BEGIN(ACTION);
370:
371: if ( in_rule )
372: {
373: doing_rule_action = true;
374: in_rule = false;
375: return '\n';
376: }
377: }
378:
379: {OPTWS}{NL} {
380: bracelevel = 0;
381: continued_action = false;
382: BEGIN(ACTION);
383: unput( '\n' ); /* so <ACTION> sees it */
384:
385: if ( in_rule )
386: {
387: doing_rule_action = true;
388: in_rule = false;
389: return '\n';
390: }
391: }
392:
393: ^{OPTWS}"<<EOF>>" |
394: "<<EOF>>" return EOF_OP;
395:
396: ^"%%".* {
397: sectnum = 3;
398: BEGIN(SECT3);
399: yyterminate(); /* to stop the parser */
400: }
401:
402: "["({FIRST_CCL_CHAR}|{CCL_EXPR})({CCL_CHAR}|{CCL_EXPR})* {
403: int cclval;
404:
1.5 ! deraadt 405: strlcpy( nmstr, yytext, sizeof nmstr);
1.1 deraadt 406:
407: /* Check to see if we've already encountered this
408: * ccl.
409: */
410: if ( (cclval = ccllookup( (Char *) nmstr )) != 0 )
411: {
412: if ( input() != ']' )
413: synerr( _( "bad character class" ) );
414:
415: yylval = cclval;
416: ++cclreuse;
417: return PREVCCL;
418: }
419: else
420: {
421: /* We fudge a bit. We know that this ccl will
422: * soon be numbered as lastccl + 1 by cclinit.
423: */
424: cclinstal( (Char *) nmstr, lastccl + 1 );
425:
426: /* Push back everything but the leading bracket
427: * so the ccl can be rescanned.
428: */
429: yyless( 1 );
430:
431: BEGIN(FIRSTCCL);
432: return '[';
433: }
434: }
435:
436: "{"{NAME}"}" {
437: register Char *nmdefptr;
438: Char *ndlookup();
439:
1.5 ! deraadt 440: strlcpy( nmstr, yytext + 1, sizeof nmstr );
1.1 deraadt 441: nmstr[yyleng - 2] = '\0'; /* chop trailing brace */
442:
443: if ( (nmdefptr = ndlookup( nmstr )) == 0 )
444: format_synerr(
445: _( "undefined definition {%s}" ),
446: nmstr );
447:
448: else
449: { /* push back name surrounded by ()'s */
450: int len = strlen( (char *) nmdefptr );
451:
452: if ( lex_compat || nmdefptr[0] == '^' ||
453: (len > 0 && nmdefptr[len - 1] == '$') )
454: { /* don't use ()'s after all */
455: PUT_BACK_STRING((char *) nmdefptr, 0);
456:
457: if ( nmdefptr[0] == '^' )
458: BEGIN(CARETISBOL);
459: }
460:
461: else
462: {
463: unput(')');
464: PUT_BACK_STRING((char *) nmdefptr, 0);
465: unput('(');
466: }
467: }
468: }
469:
470: [/|*+?.(){}] return (unsigned char) yytext[0];
471: . RETURNCHAR;
472: }
473:
474:
475: <SC>{
476: [,*] return (unsigned char) yytext[0];
477: ">" BEGIN(SECT2); return '>';
478: ">"/^ BEGIN(CARETISBOL); return '>';
479: {SCNAME} RETURNNAME;
480: . {
481: format_synerr( _( "bad <start condition>: %s" ),
482: yytext );
483: }
484: }
485:
486: <CARETISBOL>"^" BEGIN(SECT2); return '^';
487:
488:
489: <QUOTE>{
490: [^"\n] RETURNCHAR;
491: \" BEGIN(SECT2); return '"';
492:
493: {NL} {
494: synerr( _( "missing quote" ) );
495: BEGIN(SECT2);
496: ++linenum;
497: return '"';
498: }
499: }
500:
501:
502: <FIRSTCCL>{
503: "^"/[^-\]\n] BEGIN(CCL); return '^';
504: "^"/("-"|"]") return '^';
505: . BEGIN(CCL); RETURNCHAR;
506: }
507:
508: <CCL>{
509: -/[^\]\n] return '-';
510: [^\]\n] RETURNCHAR;
511: "]" BEGIN(SECT2); return ']';
512: .|{NL} {
513: synerr( _( "bad character class" ) );
514: BEGIN(SECT2);
515: return ']';
516: }
517: }
518:
519: <FIRSTCCL,CCL>{
520: "[:alnum:]" BEGIN(CCL); return CCE_ALNUM;
521: "[:alpha:]" BEGIN(CCL); return CCE_ALPHA;
522: "[:blank:]" BEGIN(CCL); return CCE_BLANK;
523: "[:cntrl:]" BEGIN(CCL); return CCE_CNTRL;
524: "[:digit:]" BEGIN(CCL); return CCE_DIGIT;
525: "[:graph:]" BEGIN(CCL); return CCE_GRAPH;
526: "[:lower:]" BEGIN(CCL); return CCE_LOWER;
527: "[:print:]" BEGIN(CCL); return CCE_PRINT;
528: "[:punct:]" BEGIN(CCL); return CCE_PUNCT;
529: "[:space:]" BEGIN(CCL); return CCE_SPACE;
530: "[:upper:]" BEGIN(CCL); return CCE_UPPER;
531: "[:xdigit:]" BEGIN(CCL); return CCE_XDIGIT;
532: {CCL_EXPR} {
533: format_synerr(
534: _( "bad character class expression: %s" ),
535: yytext );
536: BEGIN(CCL); return CCE_ALNUM;
537: }
538: }
539:
540: <NUM>{
541: [[:digit:]]+ {
542: yylval = myctoi( yytext );
543: return NUMBER;
544: }
545:
546: "," return ',';
547: "}" BEGIN(SECT2); return '}';
548:
549: . {
550: synerr( _( "bad character inside {}'s" ) );
551: BEGIN(SECT2);
552: return '}';
553: }
554:
555: {NL} {
556: synerr( _( "missing }" ) );
557: BEGIN(SECT2);
558: ++linenum;
559: return '}';
560: }
561: }
562:
563:
564: <PERCENT_BRACE_ACTION>{
565: {OPTWS}"%}".* bracelevel = 0;
566:
567: <ACTION>"/*" ACTION_ECHO; yy_push_state( COMMENT );
568:
569: <CODEBLOCK,ACTION>{
570: "reject" {
571: ACTION_ECHO;
572: CHECK_REJECT(yytext);
573: }
574: "yymore" {
575: ACTION_ECHO;
576: CHECK_YYMORE(yytext);
577: }
578: }
579:
580: {NAME}|{NOT_NAME}|. ACTION_ECHO;
581: {NL} {
582: ++linenum;
583: ACTION_ECHO;
584: if ( bracelevel == 0 ||
585: (doing_codeblock && indented_code) )
586: {
587: if ( doing_rule_action )
588: add_action( "\tYY_BREAK\n" );
589:
590: doing_rule_action = doing_codeblock = false;
591: BEGIN(SECT2);
592: }
593: }
594: }
595:
596:
597: /* Reject and YYmore() are checked for above, in PERCENT_BRACE_ACTION */
598: <ACTION>{
599: "{" ACTION_ECHO; ++bracelevel;
600: "}" ACTION_ECHO; --bracelevel;
601: [^[:alpha:]_{}"'/\n]+ ACTION_ECHO;
602: {NAME} ACTION_ECHO;
603: "'"([^'\\\n]|\\.)*"'" ACTION_ECHO; /* character constant */
604: \" ACTION_ECHO; BEGIN(ACTION_STRING);
605: {NL} {
606: ++linenum;
607: ACTION_ECHO;
608: if ( bracelevel == 0 )
609: {
610: if ( doing_rule_action )
611: add_action( "\tYY_BREAK\n" );
612:
613: doing_rule_action = false;
614: BEGIN(SECT2);
615: }
616: }
617: . ACTION_ECHO;
618: }
619:
620: <ACTION_STRING>{
621: [^"\\\n]+ ACTION_ECHO;
622: \\. ACTION_ECHO;
623: {NL} ++linenum; ACTION_ECHO;
624: \" ACTION_ECHO; BEGIN(ACTION);
625: . ACTION_ECHO;
626: }
627:
628: <COMMENT,ACTION,ACTION_STRING><<EOF>> {
629: synerr( _( "EOF encountered inside an action" ) );
630: yyterminate();
631: }
632:
633:
634: <SECT2,QUOTE,FIRSTCCL,CCL>{ESCSEQ} {
635: yylval = myesc( (Char *) yytext );
636:
637: if ( YY_START == FIRSTCCL )
638: BEGIN(CCL);
639:
640: return CHAR;
641: }
642:
643:
644: <SECT3>{
645: .*(\n?) ECHO;
646: <<EOF>> sectnum = 0; yyterminate();
647: }
648:
649: <*>.|\n format_synerr( _( "bad character: %s" ), yytext );
650:
651: %%
652:
653:
654: int yywrap()
655: {
656: if ( --num_input_files > 0 )
657: {
658: set_input_file( *++input_files );
659: return 0;
660: }
661:
662: else
663: return 1;
664: }
665:
666:
667: /* set_input_file - open the given file (if NULL, stdin) for scanning */
668:
669: void set_input_file( file )
670: char *file;
671: {
672: if ( file && strcmp( file, "-" ) )
673: {
674: infilename = copy_string( file );
675: yyin = fopen( infilename, "r" );
676:
677: if ( yyin == NULL )
678: lerrsf( _( "can't open %s" ), file );
679: }
680:
681: else
682: {
683: yyin = stdin;
684: infilename = copy_string( "<stdin>" );
685: }
686:
687: linenum = 1;
688: }
689:
690:
691: /* Wrapper routines for accessing the scanner's malloc routines. */
692:
693: void *flex_alloc( size )
694: size_t size;
695: {
696: return (void *) malloc( size );
697: }
698:
699: void *flex_realloc( ptr, size )
700: void *ptr;
701: size_t size;
702: {
703: return (void *) realloc( ptr, size );
704: }
705:
706: void flex_free( ptr )
707: void *ptr;
708: {
709: if ( ptr )
710: free( ptr );
711: }