Annotation of src/usr.bin/lex/scan.l, Revision 1.3
1.3 ! millert 1: /* $OpenBSD: scan.l,v 1.2 1996/06/26 05:35:42 deraadt Exp $ */
1.2 deraadt 2:
1.1 deraadt 3: /* scan.l - scanner for flex input */
4:
5: %{
6: /*-
7: * Copyright (c) 1990 The Regents of the University of California.
8: * All rights reserved.
9: *
10: * This code is derived from software contributed to Berkeley by
11: * Vern Paxson.
12: *
13: * The United States Government has rights in this work pursuant
14: * to contract no. DE-AC03-76SF00098 between the United States
15: * Department of Energy and the University of California.
16: *
17: * Redistribution and use in source and binary forms are permitted provided
18: * that: (1) source distributions retain this entire copyright notice and
19: * comment, and (2) distributions including binaries display the following
20: * acknowledgement: ``This product includes software developed by the
21: * University of California, Berkeley and its contributors'' in the
22: * documentation or other materials provided with the distribution and in
23: * all advertising materials mentioning features or use of this software.
24: * Neither the name of the University nor the names of its contributors may
25: * be used to endorse or promote products derived from this software without
26: * specific prior written permission.
27: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
28: * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
29: * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
30: */
31:
1.3 ! millert 32: /* $Header: /home/daffy/u0/vern/flex/RCS/scan.l,v 2.56 95/04/24 12:17:19 vern Exp $ */
1.1 deraadt 33:
34: #include "flexdef.h"
35: #include "parse.h"
36:
37: #define ACTION_ECHO add_action( yytext )
38: #define ACTION_IFDEF(def, should_define) \
39: { \
40: if ( should_define ) \
41: action_define( def, 1 ); \
42: }
43:
44: #define MARK_END_OF_PROLOG mark_prolog();
45:
46: #define YY_DECL \
47: int flexscan()
48:
49: #define RETURNCHAR \
50: yylval = (unsigned char) yytext[0]; \
51: return CHAR;
52:
53: #define RETURNNAME \
54: strcpy( nmstr, yytext ); \
55: return NAME;
56:
57: #define PUT_BACK_STRING(str, start) \
58: for ( i = strlen( str ) - 1; i >= start; --i ) \
59: unput((str)[i])
60:
61: #define CHECK_REJECT(str) \
62: if ( all_upper( str ) ) \
63: reject = true;
64:
65: #define CHECK_YYMORE(str) \
66: if ( all_lower( str ) ) \
67: yymore_used = true;
68: %}
69:
70: %option caseless nodefault outfile="scan.c" stack noyy_top_state
71: %option nostdinit
72:
73: %x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE
74: %x FIRSTCCL CCL ACTION RECOVER COMMENT ACTION_STRING PERCENT_BRACE_ACTION
75: %x OPTION LINEDIR
76:
77: WS [[:blank:]]+
78: OPTWS [[:blank:]]*
79: NOT_WS [^[:blank:]\n]
80:
81: NL \r?\n
82:
83: NAME ([[:alpha:]_][[:alnum:]_-]*)
84: NOT_NAME [^[:alpha:]_*\n]+
85:
86: SCNAME {NAME}
87:
88: ESCSEQ (\\([^\n]|[0-7]{1,3}|x[[:xdigit:]]{1,2}))
89:
90: FIRST_CCL_CHAR ([^\\\n]|{ESCSEQ})
91: CCL_CHAR ([^\\\n\]]|{ESCSEQ})
92: CCL_EXPR ("[:"[[:alpha:]]+":]")
93:
94: LEXOPT [aceknopr]
95:
96: %%
97: static int bracelevel, didadef, indented_code;
98: static int doing_rule_action = false;
99: static int option_sense;
100:
101: int doing_codeblock = false;
102: int i;
103: Char nmdef[MAXLINE], myesc();
104:
105:
106: <INITIAL>{
107: ^{WS} indented_code = true; BEGIN(CODEBLOCK);
108: ^"/*" ACTION_ECHO; yy_push_state( COMMENT );
109: ^#{OPTWS}line{WS} yy_push_state( LINEDIR );
110: ^"%s"{NAME}? return SCDECL;
111: ^"%x"{NAME}? return XSCDECL;
112: ^"%{".*{NL} {
113: ++linenum;
114: line_directive_out( (FILE *) 0, 1 );
115: indented_code = false;
116: BEGIN(CODEBLOCK);
117: }
118:
119: {WS} /* discard */
120:
121: ^"%%".* {
122: sectnum = 2;
123: bracelevel = 0;
124: mark_defs1();
125: line_directive_out( (FILE *) 0, 1 );
126: BEGIN(SECT2PROLOG);
127: return SECTEND;
128: }
129:
130: ^"%pointer".*{NL} yytext_is_array = false; ++linenum;
131: ^"%array".*{NL} yytext_is_array = true; ++linenum;
132:
133: ^"%option" BEGIN(OPTION); return OPTION_OP;
134:
135: ^"%"{LEXOPT}{OPTWS}[[:digit:]]*{OPTWS}{NL} ++linenum; /* ignore */
136: ^"%"{LEXOPT}{WS}.*{NL} ++linenum; /* ignore */
137:
138: ^"%"[^sxaceknopr{}].* synerr( _( "unrecognized '%' directive" ) );
139:
140: ^{NAME} {
141: strcpy( nmstr, yytext );
142: didadef = false;
143: BEGIN(PICKUPDEF);
144: }
145:
146: {SCNAME} RETURNNAME;
147: ^{OPTWS}{NL} ++linenum; /* allows blank lines in section 1 */
148: {OPTWS}{NL} ACTION_ECHO; ++linenum; /* maybe end of comment line */
149: }
150:
151:
152: <COMMENT>{
153: "*/" ACTION_ECHO; yy_pop_state();
154: "*" ACTION_ECHO;
155: [^*\n]+ ACTION_ECHO;
156: [^*\n]*{NL} ++linenum; ACTION_ECHO;
157: }
158:
159: <LINEDIR>{
160: \n yy_pop_state();
161: [[:digit:]]+ linenum = myctoi( yytext );
162:
163: \"[^"\n]*\" {
164: flex_free( (void *) infilename );
165: infilename = copy_string( yytext + 1 );
166: infilename[strlen( infilename ) - 1] = '\0';
167: }
168: . /* ignore spurious characters */
169: }
170:
171: <CODEBLOCK>{
172: ^"%}".*{NL} ++linenum; BEGIN(INITIAL);
173:
174: {NAME}|{NOT_NAME}|. ACTION_ECHO;
175:
176: {NL} {
177: ++linenum;
178: ACTION_ECHO;
179: if ( indented_code )
180: BEGIN(INITIAL);
181: }
182: }
183:
184:
185: <PICKUPDEF>{
186: {WS} /* separates name and definition */
187:
188: {NOT_WS}.* {
189: strcpy( (char *) nmdef, yytext );
190:
191: /* Skip trailing whitespace. */
192: for ( i = strlen( (char *) nmdef ) - 1;
193: i >= 0 && (nmdef[i] == ' ' || nmdef[i] == '\t');
194: --i )
195: ;
196:
197: nmdef[i + 1] = '\0';
198:
199: ndinstal( nmstr, nmdef );
200: didadef = true;
201: }
202:
203: {NL} {
204: if ( ! didadef )
205: synerr( _( "incomplete name definition" ) );
206: BEGIN(INITIAL);
207: ++linenum;
208: }
209: }
210:
211:
212: <OPTION>{
213: {NL} ++linenum; BEGIN(INITIAL);
214: {WS} option_sense = true;
215:
216: "=" return '=';
217:
218: no option_sense = ! option_sense;
219:
220: 7bit csize = option_sense ? 128 : 256;
221: 8bit csize = option_sense ? 256 : 128;
222:
223: align long_align = option_sense;
224: always-interactive {
225: action_define( "YY_ALWAYS_INTERACTIVE", option_sense );
226: }
227: array yytext_is_array = option_sense;
228: backup backing_up_report = option_sense;
229: batch interactive = ! option_sense;
230: "c++" C_plus_plus = option_sense;
231: caseful|case-sensitive caseins = ! option_sense;
232: caseless|case-insensitive caseins = option_sense;
233: debug ddebug = option_sense;
234: default spprdflt = ! option_sense;
235: ecs useecs = option_sense;
236: fast {
237: useecs = usemecs = false;
238: use_read = fullspd = true;
239: }
240: full {
241: useecs = usemecs = false;
242: use_read = fulltbl = true;
243: }
244: input ACTION_IFDEF("YY_NO_INPUT", ! option_sense);
245: interactive interactive = option_sense;
246: lex-compat lex_compat = option_sense;
247: main {
248: action_define( "YY_MAIN", option_sense );
249: do_yywrap = ! option_sense;
250: }
251: meta-ecs usemecs = option_sense;
252: never-interactive {
253: action_define( "YY_NEVER_INTERACTIVE", option_sense );
254: }
255: perf-report performance_report += option_sense ? 1 : -1;
256: pointer yytext_is_array = ! option_sense;
257: read use_read = option_sense;
258: reject reject_really_used = option_sense;
259: stack action_define( "YY_STACK_USED", option_sense );
260: stdinit do_stdinit = option_sense;
261: stdout use_stdout = option_sense;
262: unput ACTION_IFDEF("YY_NO_UNPUT", ! option_sense);
263: verbose printstats = option_sense;
264: warn nowarn = ! option_sense;
265: yylineno do_yylineno = option_sense;
266: yymore yymore_really_used = option_sense;
267: yywrap do_yywrap = option_sense;
268:
269: yy_push_state ACTION_IFDEF("YY_NO_PUSH_STATE", ! option_sense);
270: yy_pop_state ACTION_IFDEF("YY_NO_POP_STATE", ! option_sense);
271: yy_top_state ACTION_IFDEF("YY_NO_TOP_STATE", ! option_sense);
272:
273: yy_scan_buffer ACTION_IFDEF("YY_NO_SCAN_BUFFER", ! option_sense);
274: yy_scan_bytes ACTION_IFDEF("YY_NO_SCAN_BYTES", ! option_sense);
275: yy_scan_string ACTION_IFDEF("YY_NO_SCAN_STRING", ! option_sense);
276:
277: outfile return OPT_OUTFILE;
278: prefix return OPT_PREFIX;
279: yyclass return OPT_YYCLASS;
280:
281: \"[^"\n]*\" {
282: strcpy( nmstr, yytext + 1 );
283: nmstr[strlen( nmstr ) - 1] = '\0';
284: return NAME;
285: }
286:
287: (([a-mo-z]|n[a-np-z])[[:alpha:]\-+]*)|. {
288: format_synerr( _( "unrecognized %%option: %s" ),
289: yytext );
290: BEGIN(RECOVER);
291: }
292: }
293:
294: <RECOVER>.*{NL} ++linenum; BEGIN(INITIAL);
295:
296:
297: <SECT2PROLOG>{
298: ^"%{".* ++bracelevel; yyless( 2 ); /* eat only %{ */
299: ^"%}".* --bracelevel; yyless( 2 ); /* eat only %} */
300:
301: ^{WS}.* ACTION_ECHO; /* indented code in prolog */
302:
303: ^{NOT_WS}.* { /* non-indented code */
304: if ( bracelevel <= 0 )
305: { /* not in %{ ... %} */
306: yyless( 0 ); /* put it all back */
307: yy_set_bol( 1 );
308: mark_prolog();
309: BEGIN(SECT2);
310: }
311: else
312: ACTION_ECHO;
313: }
314:
315: .* ACTION_ECHO;
316: {NL} ++linenum; ACTION_ECHO;
317:
318: <<EOF>> {
319: mark_prolog();
320: sectnum = 0;
321: yyterminate(); /* to stop the parser */
322: }
323: }
324:
325: <SECT2>{
326: ^{OPTWS}{NL} ++linenum; /* allow blank lines in section 2 */
327:
328: ^{OPTWS}"%{" {
329: indented_code = false;
330: doing_codeblock = true;
331: bracelevel = 1;
332: BEGIN(PERCENT_BRACE_ACTION);
333: }
334:
335: ^{OPTWS}"<" BEGIN(SC); return '<';
336: ^{OPTWS}"^" return '^';
337: \" BEGIN(QUOTE); return '"';
338: "{"/[[:digit:]] BEGIN(NUM); return '{';
339: "$"/([[:blank:]]|{NL}) return '$';
340:
341: {WS}"%{" {
342: bracelevel = 1;
343: BEGIN(PERCENT_BRACE_ACTION);
344:
345: if ( in_rule )
346: {
347: doing_rule_action = true;
348: in_rule = false;
349: return '\n';
350: }
351: }
352: {WS}"|".*{NL} continued_action = true; ++linenum; return '\n';
353:
354: ^{WS}"/*" {
355: yyless( yyleng - 2 ); /* put back '/', '*' */
356: bracelevel = 0;
357: continued_action = false;
358: BEGIN(ACTION);
359: }
360:
361: ^{WS} /* allow indented rules */
362:
363: {WS} {
364: /* This rule is separate from the one below because
365: * otherwise we get variable trailing context, so
366: * we can't build the scanner using -{f,F}.
367: */
368: bracelevel = 0;
369: continued_action = false;
370: BEGIN(ACTION);
371:
372: if ( in_rule )
373: {
374: doing_rule_action = true;
375: in_rule = false;
376: return '\n';
377: }
378: }
379:
380: {OPTWS}{NL} {
381: bracelevel = 0;
382: continued_action = false;
383: BEGIN(ACTION);
384: unput( '\n' ); /* so <ACTION> sees it */
385:
386: if ( in_rule )
387: {
388: doing_rule_action = true;
389: in_rule = false;
390: return '\n';
391: }
392: }
393:
394: ^{OPTWS}"<<EOF>>" |
395: "<<EOF>>" return EOF_OP;
396:
397: ^"%%".* {
398: sectnum = 3;
399: BEGIN(SECT3);
400: yyterminate(); /* to stop the parser */
401: }
402:
403: "["({FIRST_CCL_CHAR}|{CCL_EXPR})({CCL_CHAR}|{CCL_EXPR})* {
404: int cclval;
405:
406: strcpy( nmstr, yytext );
407:
408: /* Check to see if we've already encountered this
409: * ccl.
410: */
411: if ( (cclval = ccllookup( (Char *) nmstr )) != 0 )
412: {
413: if ( input() != ']' )
414: synerr( _( "bad character class" ) );
415:
416: yylval = cclval;
417: ++cclreuse;
418: return PREVCCL;
419: }
420: else
421: {
422: /* We fudge a bit. We know that this ccl will
423: * soon be numbered as lastccl + 1 by cclinit.
424: */
425: cclinstal( (Char *) nmstr, lastccl + 1 );
426:
427: /* Push back everything but the leading bracket
428: * so the ccl can be rescanned.
429: */
430: yyless( 1 );
431:
432: BEGIN(FIRSTCCL);
433: return '[';
434: }
435: }
436:
437: "{"{NAME}"}" {
438: register Char *nmdefptr;
439: Char *ndlookup();
440:
441: strcpy( nmstr, yytext + 1 );
442: nmstr[yyleng - 2] = '\0'; /* chop trailing brace */
443:
444: if ( (nmdefptr = ndlookup( nmstr )) == 0 )
445: format_synerr(
446: _( "undefined definition {%s}" ),
447: nmstr );
448:
449: else
450: { /* push back name surrounded by ()'s */
451: int len = strlen( (char *) nmdefptr );
452:
453: if ( lex_compat || nmdefptr[0] == '^' ||
454: (len > 0 && nmdefptr[len - 1] == '$') )
455: { /* don't use ()'s after all */
456: PUT_BACK_STRING((char *) nmdefptr, 0);
457:
458: if ( nmdefptr[0] == '^' )
459: BEGIN(CARETISBOL);
460: }
461:
462: else
463: {
464: unput(')');
465: PUT_BACK_STRING((char *) nmdefptr, 0);
466: unput('(');
467: }
468: }
469: }
470:
471: [/|*+?.(){}] return (unsigned char) yytext[0];
472: . RETURNCHAR;
473: }
474:
475:
476: <SC>{
477: [,*] return (unsigned char) yytext[0];
478: ">" BEGIN(SECT2); return '>';
479: ">"/^ BEGIN(CARETISBOL); return '>';
480: {SCNAME} RETURNNAME;
481: . {
482: format_synerr( _( "bad <start condition>: %s" ),
483: yytext );
484: }
485: }
486:
487: <CARETISBOL>"^" BEGIN(SECT2); return '^';
488:
489:
490: <QUOTE>{
491: [^"\n] RETURNCHAR;
492: \" BEGIN(SECT2); return '"';
493:
494: {NL} {
495: synerr( _( "missing quote" ) );
496: BEGIN(SECT2);
497: ++linenum;
498: return '"';
499: }
500: }
501:
502:
503: <FIRSTCCL>{
504: "^"/[^-\]\n] BEGIN(CCL); return '^';
505: "^"/("-"|"]") return '^';
506: . BEGIN(CCL); RETURNCHAR;
507: }
508:
509: <CCL>{
510: -/[^\]\n] return '-';
511: [^\]\n] RETURNCHAR;
512: "]" BEGIN(SECT2); return ']';
513: .|{NL} {
514: synerr( _( "bad character class" ) );
515: BEGIN(SECT2);
516: return ']';
517: }
518: }
519:
520: <FIRSTCCL,CCL>{
521: "[:alnum:]" BEGIN(CCL); return CCE_ALNUM;
522: "[:alpha:]" BEGIN(CCL); return CCE_ALPHA;
523: "[:blank:]" BEGIN(CCL); return CCE_BLANK;
524: "[:cntrl:]" BEGIN(CCL); return CCE_CNTRL;
525: "[:digit:]" BEGIN(CCL); return CCE_DIGIT;
526: "[:graph:]" BEGIN(CCL); return CCE_GRAPH;
527: "[:lower:]" BEGIN(CCL); return CCE_LOWER;
528: "[:print:]" BEGIN(CCL); return CCE_PRINT;
529: "[:punct:]" BEGIN(CCL); return CCE_PUNCT;
530: "[:space:]" BEGIN(CCL); return CCE_SPACE;
531: "[:upper:]" BEGIN(CCL); return CCE_UPPER;
532: "[:xdigit:]" BEGIN(CCL); return CCE_XDIGIT;
533: {CCL_EXPR} {
534: format_synerr(
535: _( "bad character class expression: %s" ),
536: yytext );
537: BEGIN(CCL); return CCE_ALNUM;
538: }
539: }
540:
541: <NUM>{
542: [[:digit:]]+ {
543: yylval = myctoi( yytext );
544: return NUMBER;
545: }
546:
547: "," return ',';
548: "}" BEGIN(SECT2); return '}';
549:
550: . {
551: synerr( _( "bad character inside {}'s" ) );
552: BEGIN(SECT2);
553: return '}';
554: }
555:
556: {NL} {
557: synerr( _( "missing }" ) );
558: BEGIN(SECT2);
559: ++linenum;
560: return '}';
561: }
562: }
563:
564:
565: <PERCENT_BRACE_ACTION>{
566: {OPTWS}"%}".* bracelevel = 0;
567:
568: <ACTION>"/*" ACTION_ECHO; yy_push_state( COMMENT );
569:
570: <CODEBLOCK,ACTION>{
571: "reject" {
572: ACTION_ECHO;
573: CHECK_REJECT(yytext);
574: }
575: "yymore" {
576: ACTION_ECHO;
577: CHECK_YYMORE(yytext);
578: }
579: }
580:
581: {NAME}|{NOT_NAME}|. ACTION_ECHO;
582: {NL} {
583: ++linenum;
584: ACTION_ECHO;
585: if ( bracelevel == 0 ||
586: (doing_codeblock && indented_code) )
587: {
588: if ( doing_rule_action )
589: add_action( "\tYY_BREAK\n" );
590:
591: doing_rule_action = doing_codeblock = false;
592: BEGIN(SECT2);
593: }
594: }
595: }
596:
597:
598: /* Reject and YYmore() are checked for above, in PERCENT_BRACE_ACTION */
599: <ACTION>{
600: "{" ACTION_ECHO; ++bracelevel;
601: "}" ACTION_ECHO; --bracelevel;
602: [^[:alpha:]_{}"'/\n]+ ACTION_ECHO;
603: {NAME} ACTION_ECHO;
604: "'"([^'\\\n]|\\.)*"'" ACTION_ECHO; /* character constant */
605: \" ACTION_ECHO; BEGIN(ACTION_STRING);
606: {NL} {
607: ++linenum;
608: ACTION_ECHO;
609: if ( bracelevel == 0 )
610: {
611: if ( doing_rule_action )
612: add_action( "\tYY_BREAK\n" );
613:
614: doing_rule_action = false;
615: BEGIN(SECT2);
616: }
617: }
618: . ACTION_ECHO;
619: }
620:
621: <ACTION_STRING>{
622: [^"\\\n]+ ACTION_ECHO;
623: \\. ACTION_ECHO;
624: {NL} ++linenum; ACTION_ECHO;
625: \" ACTION_ECHO; BEGIN(ACTION);
626: . ACTION_ECHO;
627: }
628:
629: <COMMENT,ACTION,ACTION_STRING><<EOF>> {
630: synerr( _( "EOF encountered inside an action" ) );
631: yyterminate();
632: }
633:
634:
635: <SECT2,QUOTE,FIRSTCCL,CCL>{ESCSEQ} {
636: yylval = myesc( (Char *) yytext );
637:
638: if ( YY_START == FIRSTCCL )
639: BEGIN(CCL);
640:
641: return CHAR;
642: }
643:
644:
645: <SECT3>{
646: .*(\n?) ECHO;
647: <<EOF>> sectnum = 0; yyterminate();
648: }
649:
650: <*>.|\n format_synerr( _( "bad character: %s" ), yytext );
651:
652: %%
653:
654:
655: int yywrap()
656: {
657: if ( --num_input_files > 0 )
658: {
659: set_input_file( *++input_files );
660: return 0;
661: }
662:
663: else
664: return 1;
665: }
666:
667:
668: /* set_input_file - open the given file (if NULL, stdin) for scanning */
669:
670: void set_input_file( file )
671: char *file;
672: {
673: if ( file && strcmp( file, "-" ) )
674: {
675: infilename = copy_string( file );
676: yyin = fopen( infilename, "r" );
677:
678: if ( yyin == NULL )
679: lerrsf( _( "can't open %s" ), file );
680: }
681:
682: else
683: {
684: yyin = stdin;
685: infilename = copy_string( "<stdin>" );
686: }
687:
688: linenum = 1;
689: }
690:
691:
692: /* Wrapper routines for accessing the scanner's malloc routines. */
693:
694: void *flex_alloc( size )
695: size_t size;
696: {
697: return (void *) malloc( size );
698: }
699:
700: void *flex_realloc( ptr, size )
701: void *ptr;
702: size_t size;
703: {
704: return (void *) realloc( ptr, size );
705: }
706:
707: void flex_free( ptr )
708: void *ptr;
709: {
710: if ( ptr )
711: free( ptr );
712: }