Annotation of src/usr.bin/lex/scan.l, Revision 1.9
1.9 ! ray 1: /* $OpenBSD: scan.l,v 1.8 2003/06/04 17:34:44 millert Exp $ */
1.2 deraadt 2:
1.1 deraadt 3: /* scan.l - scanner for flex input */
4:
5: %{
6: /*-
7: * Copyright (c) 1990 The Regents of the University of California.
8: * All rights reserved.
9: *
10: * This code is derived from software contributed to Berkeley by
11: * Vern Paxson.
12: *
13: * The United States Government has rights in this work pursuant
14: * to contract no. DE-AC03-76SF00098 between the United States
15: * Department of Energy and the University of California.
16: *
1.4 deraadt 17: * Redistribution and use in source and binary forms, with or without
1.8 millert 18: * modification, are permitted provided that the following conditions
19: * are met:
20: *
21: * 1. Redistributions of source code must retain the above copyright
22: * notice, this list of conditions and the following disclaimer.
23: * 2. Redistributions in binary form must reproduce the above copyright
24: * notice, this list of conditions and the following disclaimer in the
25: * documentation and/or other materials provided with the distribution.
26: *
27: * Neither the name of the University nor the names of its contributors
28: * may be used to endorse or promote products derived from this software
29: * without specific prior written permission.
30: *
31: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
32: * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
33: * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
34: * PURPOSE.
1.1 deraadt 35: */
36:
1.9 ! ray 37: /* $Header: /home/ray/openbsd/src/usr.bin/lex/scan.l,v 1.8 2003/06/04 17:34:44 millert Exp $ */
1.1 deraadt 38:
39: #include "flexdef.h"
40: #include "parse.h"
41:
42: #define ACTION_ECHO add_action( yytext )
43: #define ACTION_IFDEF(def, should_define) \
44: { \
45: if ( should_define ) \
46: action_define( def, 1 ); \
47: }
48:
49: #define MARK_END_OF_PROLOG mark_prolog();
50:
51: #define YY_DECL \
52: int flexscan()
53:
54: #define RETURNCHAR \
55: yylval = (unsigned char) yytext[0]; \
56: return CHAR;
57:
58: #define RETURNNAME \
1.7 deraadt 59: strlcpy( nmstr, yytext, sizeof nmstr); \
1.1 deraadt 60: return NAME;
61:
62: #define PUT_BACK_STRING(str, start) \
63: for ( i = strlen( str ) - 1; i >= start; --i ) \
64: unput((str)[i])
65:
66: #define CHECK_REJECT(str) \
67: if ( all_upper( str ) ) \
68: reject = true;
69:
70: #define CHECK_YYMORE(str) \
71: if ( all_lower( str ) ) \
72: yymore_used = true;
73: %}
74:
75: %option caseless nodefault outfile="scan.c" stack noyy_top_state
76: %option nostdinit
77:
78: %x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE
79: %x FIRSTCCL CCL ACTION RECOVER COMMENT ACTION_STRING PERCENT_BRACE_ACTION
80: %x OPTION LINEDIR
81:
82: WS [[:blank:]]+
83: OPTWS [[:blank:]]*
84: NOT_WS [^[:blank:]\n]
85:
86: NL \r?\n
87:
88: NAME ([[:alpha:]_][[:alnum:]_-]*)
89: NOT_NAME [^[:alpha:]_*\n]+
90:
91: SCNAME {NAME}
92:
93: ESCSEQ (\\([^\n]|[0-7]{1,3}|x[[:xdigit:]]{1,2}))
94:
95: FIRST_CCL_CHAR ([^\\\n]|{ESCSEQ})
96: CCL_CHAR ([^\\\n\]]|{ESCSEQ})
97: CCL_EXPR ("[:"[[:alpha:]]+":]")
98:
99: LEXOPT [aceknopr]
100:
101: %%
102: static int bracelevel, didadef, indented_code;
103: static int doing_rule_action = false;
104: static int option_sense;
105:
106: int doing_codeblock = false;
107: int i;
108: Char nmdef[MAXLINE], myesc();
109:
110:
111: <INITIAL>{
112: ^{WS} indented_code = true; BEGIN(CODEBLOCK);
113: ^"/*" ACTION_ECHO; yy_push_state( COMMENT );
114: ^#{OPTWS}line{WS} yy_push_state( LINEDIR );
115: ^"%s"{NAME}? return SCDECL;
116: ^"%x"{NAME}? return XSCDECL;
117: ^"%{".*{NL} {
118: ++linenum;
119: line_directive_out( (FILE *) 0, 1 );
120: indented_code = false;
121: BEGIN(CODEBLOCK);
122: }
123:
124: {WS} /* discard */
125:
126: ^"%%".* {
127: sectnum = 2;
128: bracelevel = 0;
129: mark_defs1();
130: line_directive_out( (FILE *) 0, 1 );
131: BEGIN(SECT2PROLOG);
132: return SECTEND;
133: }
134:
135: ^"%pointer".*{NL} yytext_is_array = false; ++linenum;
136: ^"%array".*{NL} yytext_is_array = true; ++linenum;
137:
138: ^"%option" BEGIN(OPTION); return OPTION_OP;
139:
140: ^"%"{LEXOPT}{OPTWS}[[:digit:]]*{OPTWS}{NL} ++linenum; /* ignore */
141: ^"%"{LEXOPT}{WS}.*{NL} ++linenum; /* ignore */
142:
143: ^"%"[^sxaceknopr{}].* synerr( _( "unrecognized '%' directive" ) );
144:
145: ^{NAME} {
1.7 deraadt 146: strlcpy( nmstr, yytext, sizeof nmstr );
1.1 deraadt 147: didadef = false;
148: BEGIN(PICKUPDEF);
149: }
150:
151: {SCNAME} RETURNNAME;
152: ^{OPTWS}{NL} ++linenum; /* allows blank lines in section 1 */
153: {OPTWS}{NL} ACTION_ECHO; ++linenum; /* maybe end of comment line */
154: }
155:
156:
157: <COMMENT>{
158: "*/" ACTION_ECHO; yy_pop_state();
159: "*" ACTION_ECHO;
160: [^*\n]+ ACTION_ECHO;
161: [^*\n]*{NL} ++linenum; ACTION_ECHO;
162: }
163:
164: <LINEDIR>{
165: \n yy_pop_state();
166: [[:digit:]]+ linenum = myctoi( yytext );
167:
168: \"[^"\n]*\" {
169: flex_free( (void *) infilename );
170: infilename = copy_string( yytext + 1 );
171: infilename[strlen( infilename ) - 1] = '\0';
172: }
173: . /* ignore spurious characters */
174: }
175:
176: <CODEBLOCK>{
177: ^"%}".*{NL} ++linenum; BEGIN(INITIAL);
178:
179: {NAME}|{NOT_NAME}|. ACTION_ECHO;
180:
181: {NL} {
182: ++linenum;
183: ACTION_ECHO;
184: if ( indented_code )
185: BEGIN(INITIAL);
186: }
187: }
188:
189:
190: <PICKUPDEF>{
191: {WS} /* separates name and definition */
192:
193: {NOT_WS}.* {
1.7 deraadt 194: strlcpy( (char *) nmdef, yytext, sizeof nmdef);
1.1 deraadt 195:
196: /* Skip trailing whitespace. */
197: for ( i = strlen( (char *) nmdef ) - 1;
198: i >= 0 && (nmdef[i] == ' ' || nmdef[i] == '\t');
199: --i )
200: ;
201:
202: nmdef[i + 1] = '\0';
203:
204: ndinstal( nmstr, nmdef );
205: didadef = true;
206: }
207:
208: {NL} {
209: if ( ! didadef )
210: synerr( _( "incomplete name definition" ) );
211: BEGIN(INITIAL);
212: ++linenum;
213: }
214: }
215:
216:
217: <OPTION>{
218: {NL} ++linenum; BEGIN(INITIAL);
219: {WS} option_sense = true;
220:
221: "=" return '=';
222:
223: no option_sense = ! option_sense;
224:
225: 7bit csize = option_sense ? 128 : 256;
226: 8bit csize = option_sense ? 256 : 128;
227:
228: align long_align = option_sense;
229: always-interactive {
230: action_define( "YY_ALWAYS_INTERACTIVE", option_sense );
231: }
232: array yytext_is_array = option_sense;
233: backup backing_up_report = option_sense;
234: batch interactive = ! option_sense;
235: "c++" C_plus_plus = option_sense;
236: caseful|case-sensitive caseins = ! option_sense;
237: caseless|case-insensitive caseins = option_sense;
238: debug ddebug = option_sense;
239: default spprdflt = ! option_sense;
240: ecs useecs = option_sense;
241: fast {
242: useecs = usemecs = false;
243: use_read = fullspd = true;
244: }
245: full {
246: useecs = usemecs = false;
247: use_read = fulltbl = true;
248: }
249: input ACTION_IFDEF("YY_NO_INPUT", ! option_sense);
250: interactive interactive = option_sense;
251: lex-compat lex_compat = option_sense;
252: main {
253: action_define( "YY_MAIN", option_sense );
254: do_yywrap = ! option_sense;
255: }
256: meta-ecs usemecs = option_sense;
257: never-interactive {
258: action_define( "YY_NEVER_INTERACTIVE", option_sense );
259: }
260: perf-report performance_report += option_sense ? 1 : -1;
261: pointer yytext_is_array = ! option_sense;
262: read use_read = option_sense;
263: reject reject_really_used = option_sense;
264: stack action_define( "YY_STACK_USED", option_sense );
265: stdinit do_stdinit = option_sense;
266: stdout use_stdout = option_sense;
267: unput ACTION_IFDEF("YY_NO_UNPUT", ! option_sense);
268: verbose printstats = option_sense;
269: warn nowarn = ! option_sense;
270: yylineno do_yylineno = option_sense;
271: yymore yymore_really_used = option_sense;
272: yywrap do_yywrap = option_sense;
273:
274: yy_push_state ACTION_IFDEF("YY_NO_PUSH_STATE", ! option_sense);
275: yy_pop_state ACTION_IFDEF("YY_NO_POP_STATE", ! option_sense);
276: yy_top_state ACTION_IFDEF("YY_NO_TOP_STATE", ! option_sense);
277:
278: yy_scan_buffer ACTION_IFDEF("YY_NO_SCAN_BUFFER", ! option_sense);
279: yy_scan_bytes ACTION_IFDEF("YY_NO_SCAN_BYTES", ! option_sense);
280: yy_scan_string ACTION_IFDEF("YY_NO_SCAN_STRING", ! option_sense);
281:
282: outfile return OPT_OUTFILE;
283: prefix return OPT_PREFIX;
284: yyclass return OPT_YYCLASS;
285:
286: \"[^"\n]*\" {
1.7 deraadt 287: strlcpy( nmstr, yytext + 1, sizeof nmstr);
1.9 ! ray 288: if (nmstr[strlen(nmstr) - 1] == '"')
! 289: nmstr[strlen(nmstr) - 1] = '\0';
1.1 deraadt 290: return NAME;
291: }
292:
293: (([a-mo-z]|n[a-np-z])[[:alpha:]\-+]*)|. {
294: format_synerr( _( "unrecognized %%option: %s" ),
295: yytext );
296: BEGIN(RECOVER);
297: }
298: }
299:
300: <RECOVER>.*{NL} ++linenum; BEGIN(INITIAL);
301:
302:
303: <SECT2PROLOG>{
304: ^"%{".* ++bracelevel; yyless( 2 ); /* eat only %{ */
305: ^"%}".* --bracelevel; yyless( 2 ); /* eat only %} */
306:
307: ^{WS}.* ACTION_ECHO; /* indented code in prolog */
308:
309: ^{NOT_WS}.* { /* non-indented code */
310: if ( bracelevel <= 0 )
311: { /* not in %{ ... %} */
312: yyless( 0 ); /* put it all back */
313: yy_set_bol( 1 );
314: mark_prolog();
315: BEGIN(SECT2);
316: }
317: else
318: ACTION_ECHO;
319: }
320:
321: .* ACTION_ECHO;
322: {NL} ++linenum; ACTION_ECHO;
323:
324: <<EOF>> {
325: mark_prolog();
326: sectnum = 0;
327: yyterminate(); /* to stop the parser */
328: }
329: }
330:
331: <SECT2>{
332: ^{OPTWS}{NL} ++linenum; /* allow blank lines in section 2 */
333:
334: ^{OPTWS}"%{" {
335: indented_code = false;
336: doing_codeblock = true;
337: bracelevel = 1;
338: BEGIN(PERCENT_BRACE_ACTION);
339: }
340:
341: ^{OPTWS}"<" BEGIN(SC); return '<';
342: ^{OPTWS}"^" return '^';
343: \" BEGIN(QUOTE); return '"';
344: "{"/[[:digit:]] BEGIN(NUM); return '{';
345: "$"/([[:blank:]]|{NL}) return '$';
346:
347: {WS}"%{" {
348: bracelevel = 1;
349: BEGIN(PERCENT_BRACE_ACTION);
350:
351: if ( in_rule )
352: {
353: doing_rule_action = true;
354: in_rule = false;
355: return '\n';
356: }
357: }
358: {WS}"|".*{NL} continued_action = true; ++linenum; return '\n';
359:
360: ^{WS}"/*" {
361: yyless( yyleng - 2 ); /* put back '/', '*' */
362: bracelevel = 0;
363: continued_action = false;
364: BEGIN(ACTION);
365: }
366:
367: ^{WS} /* allow indented rules */
368:
369: {WS} {
370: /* This rule is separate from the one below because
371: * otherwise we get variable trailing context, so
372: * we can't build the scanner using -{f,F}.
373: */
374: bracelevel = 0;
375: continued_action = false;
376: BEGIN(ACTION);
377:
378: if ( in_rule )
379: {
380: doing_rule_action = true;
381: in_rule = false;
382: return '\n';
383: }
384: }
385:
386: {OPTWS}{NL} {
387: bracelevel = 0;
388: continued_action = false;
389: BEGIN(ACTION);
390: unput( '\n' ); /* so <ACTION> sees it */
391:
392: if ( in_rule )
393: {
394: doing_rule_action = true;
395: in_rule = false;
396: return '\n';
397: }
398: }
399:
400: ^{OPTWS}"<<EOF>>" |
401: "<<EOF>>" return EOF_OP;
402:
403: ^"%%".* {
404: sectnum = 3;
405: BEGIN(SECT3);
406: yyterminate(); /* to stop the parser */
407: }
408:
409: "["({FIRST_CCL_CHAR}|{CCL_EXPR})({CCL_CHAR}|{CCL_EXPR})* {
410: int cclval;
411:
1.7 deraadt 412: strlcpy( nmstr, yytext, sizeof nmstr);
1.1 deraadt 413:
414: /* Check to see if we've already encountered this
415: * ccl.
416: */
417: if ( (cclval = ccllookup( (Char *) nmstr )) != 0 )
418: {
419: if ( input() != ']' )
420: synerr( _( "bad character class" ) );
421:
422: yylval = cclval;
423: ++cclreuse;
424: return PREVCCL;
425: }
426: else
427: {
428: /* We fudge a bit. We know that this ccl will
429: * soon be numbered as lastccl + 1 by cclinit.
430: */
431: cclinstal( (Char *) nmstr, lastccl + 1 );
432:
433: /* Push back everything but the leading bracket
434: * so the ccl can be rescanned.
435: */
436: yyless( 1 );
437:
438: BEGIN(FIRSTCCL);
439: return '[';
440: }
441: }
442:
443: "{"{NAME}"}" {
444: register Char *nmdefptr;
445: Char *ndlookup();
446:
1.7 deraadt 447: strlcpy( nmstr, yytext + 1, sizeof nmstr );
1.1 deraadt 448: nmstr[yyleng - 2] = '\0'; /* chop trailing brace */
449:
450: if ( (nmdefptr = ndlookup( nmstr )) == 0 )
451: format_synerr(
452: _( "undefined definition {%s}" ),
453: nmstr );
454:
455: else
456: { /* push back name surrounded by ()'s */
457: int len = strlen( (char *) nmdefptr );
458:
459: if ( lex_compat || nmdefptr[0] == '^' ||
460: (len > 0 && nmdefptr[len - 1] == '$') )
461: { /* don't use ()'s after all */
462: PUT_BACK_STRING((char *) nmdefptr, 0);
463:
464: if ( nmdefptr[0] == '^' )
465: BEGIN(CARETISBOL);
466: }
467:
468: else
469: {
470: unput(')');
471: PUT_BACK_STRING((char *) nmdefptr, 0);
472: unput('(');
473: }
474: }
475: }
476:
477: [/|*+?.(){}] return (unsigned char) yytext[0];
478: . RETURNCHAR;
479: }
480:
481:
482: <SC>{
483: [,*] return (unsigned char) yytext[0];
484: ">" BEGIN(SECT2); return '>';
485: ">"/^ BEGIN(CARETISBOL); return '>';
486: {SCNAME} RETURNNAME;
487: . {
488: format_synerr( _( "bad <start condition>: %s" ),
489: yytext );
490: }
491: }
492:
493: <CARETISBOL>"^" BEGIN(SECT2); return '^';
494:
495:
496: <QUOTE>{
497: [^"\n] RETURNCHAR;
498: \" BEGIN(SECT2); return '"';
499:
500: {NL} {
501: synerr( _( "missing quote" ) );
502: BEGIN(SECT2);
503: ++linenum;
504: return '"';
505: }
506: }
507:
508:
509: <FIRSTCCL>{
510: "^"/[^-\]\n] BEGIN(CCL); return '^';
511: "^"/("-"|"]") return '^';
512: . BEGIN(CCL); RETURNCHAR;
513: }
514:
515: <CCL>{
516: -/[^\]\n] return '-';
517: [^\]\n] RETURNCHAR;
518: "]" BEGIN(SECT2); return ']';
519: .|{NL} {
520: synerr( _( "bad character class" ) );
521: BEGIN(SECT2);
522: return ']';
523: }
524: }
525:
526: <FIRSTCCL,CCL>{
527: "[:alnum:]" BEGIN(CCL); return CCE_ALNUM;
528: "[:alpha:]" BEGIN(CCL); return CCE_ALPHA;
529: "[:blank:]" BEGIN(CCL); return CCE_BLANK;
530: "[:cntrl:]" BEGIN(CCL); return CCE_CNTRL;
531: "[:digit:]" BEGIN(CCL); return CCE_DIGIT;
532: "[:graph:]" BEGIN(CCL); return CCE_GRAPH;
533: "[:lower:]" BEGIN(CCL); return CCE_LOWER;
534: "[:print:]" BEGIN(CCL); return CCE_PRINT;
535: "[:punct:]" BEGIN(CCL); return CCE_PUNCT;
536: "[:space:]" BEGIN(CCL); return CCE_SPACE;
537: "[:upper:]" BEGIN(CCL); return CCE_UPPER;
538: "[:xdigit:]" BEGIN(CCL); return CCE_XDIGIT;
539: {CCL_EXPR} {
540: format_synerr(
541: _( "bad character class expression: %s" ),
542: yytext );
543: BEGIN(CCL); return CCE_ALNUM;
544: }
545: }
546:
547: <NUM>{
548: [[:digit:]]+ {
549: yylval = myctoi( yytext );
550: return NUMBER;
551: }
552:
553: "," return ',';
554: "}" BEGIN(SECT2); return '}';
555:
556: . {
557: synerr( _( "bad character inside {}'s" ) );
558: BEGIN(SECT2);
559: return '}';
560: }
561:
562: {NL} {
563: synerr( _( "missing }" ) );
564: BEGIN(SECT2);
565: ++linenum;
566: return '}';
567: }
568: }
569:
570:
571: <PERCENT_BRACE_ACTION>{
572: {OPTWS}"%}".* bracelevel = 0;
573:
574: <ACTION>"/*" ACTION_ECHO; yy_push_state( COMMENT );
575:
576: <CODEBLOCK,ACTION>{
577: "reject" {
578: ACTION_ECHO;
579: CHECK_REJECT(yytext);
580: }
581: "yymore" {
582: ACTION_ECHO;
583: CHECK_YYMORE(yytext);
584: }
585: }
586:
587: {NAME}|{NOT_NAME}|. ACTION_ECHO;
588: {NL} {
589: ++linenum;
590: ACTION_ECHO;
591: if ( bracelevel == 0 ||
592: (doing_codeblock && indented_code) )
593: {
594: if ( doing_rule_action )
595: add_action( "\tYY_BREAK\n" );
596:
597: doing_rule_action = doing_codeblock = false;
598: BEGIN(SECT2);
599: }
600: }
601: }
602:
603:
604: /* Reject and YYmore() are checked for above, in PERCENT_BRACE_ACTION */
605: <ACTION>{
606: "{" ACTION_ECHO; ++bracelevel;
607: "}" ACTION_ECHO; --bracelevel;
608: [^[:alpha:]_{}"'/\n]+ ACTION_ECHO;
609: {NAME} ACTION_ECHO;
610: "'"([^'\\\n]|\\.)*"'" ACTION_ECHO; /* character constant */
611: \" ACTION_ECHO; BEGIN(ACTION_STRING);
612: {NL} {
613: ++linenum;
614: ACTION_ECHO;
615: if ( bracelevel == 0 )
616: {
617: if ( doing_rule_action )
618: add_action( "\tYY_BREAK\n" );
619:
620: doing_rule_action = false;
621: BEGIN(SECT2);
622: }
623: }
624: . ACTION_ECHO;
625: }
626:
627: <ACTION_STRING>{
628: [^"\\\n]+ ACTION_ECHO;
629: \\. ACTION_ECHO;
630: {NL} ++linenum; ACTION_ECHO;
631: \" ACTION_ECHO; BEGIN(ACTION);
632: . ACTION_ECHO;
633: }
634:
635: <COMMENT,ACTION,ACTION_STRING><<EOF>> {
636: synerr( _( "EOF encountered inside an action" ) );
637: yyterminate();
638: }
639:
640:
641: <SECT2,QUOTE,FIRSTCCL,CCL>{ESCSEQ} {
642: yylval = myesc( (Char *) yytext );
643:
644: if ( YY_START == FIRSTCCL )
645: BEGIN(CCL);
646:
647: return CHAR;
648: }
649:
650:
651: <SECT3>{
652: .*(\n?) ECHO;
653: <<EOF>> sectnum = 0; yyterminate();
654: }
655:
656: <*>.|\n format_synerr( _( "bad character: %s" ), yytext );
657:
658: %%
659:
660:
661: int yywrap()
662: {
663: if ( --num_input_files > 0 )
664: {
665: set_input_file( *++input_files );
666: return 0;
667: }
668:
669: else
670: return 1;
671: }
672:
673:
674: /* set_input_file - open the given file (if NULL, stdin) for scanning */
675:
676: void set_input_file( file )
677: char *file;
678: {
679: if ( file && strcmp( file, "-" ) )
680: {
681: infilename = copy_string( file );
682: yyin = fopen( infilename, "r" );
683:
684: if ( yyin == NULL )
685: lerrsf( _( "can't open %s" ), file );
686: }
687:
688: else
689: {
690: yyin = stdin;
691: infilename = copy_string( "<stdin>" );
692: }
693:
694: linenum = 1;
695: }
696:
697:
698: /* Wrapper routines for accessing the scanner's malloc routines. */
699:
700: void *flex_alloc( size )
701: size_t size;
702: {
703: return (void *) malloc( size );
704: }
705:
706: void *flex_realloc( ptr, size )
707: void *ptr;
708: size_t size;
709: {
710: return (void *) realloc( ptr, size );
711: }
712:
713: void flex_free( ptr )
714: void *ptr;
715: {
716: if ( ptr )
717: free( ptr );
718: }