Annotation of src/usr.bin/lex/scan.l, Revision 1.11
1.11 ! tedu 1: /* $OpenBSD: scan.l,v 1.10 2015/11/19 19:43:40 tedu Exp $ */
1.2 deraadt 2:
1.10 tedu 3: /* scan.l - scanner for flex input -*-C-*- */
1.1 deraadt 4:
5: %{
1.10 tedu 6: /* Copyright (c) 1990 The Regents of the University of California. */
7: /* All rights reserved. */
1.1 deraadt 8:
1.10 tedu 9: /* This code is derived from software contributed to Berkeley by */
10: /* Vern Paxson. */
11:
12: /* The United States Government has rights in this work pursuant */
13: /* to contract no. DE-AC03-76SF00098 between the United States */
14: /* Department of Energy and the University of California. */
15:
16: /* This file is part of flex. */
17:
18: /* Redistribution and use in source and binary forms, with or without */
19: /* modification, are permitted provided that the following conditions */
20: /* are met: */
21:
22: /* 1. Redistributions of source code must retain the above copyright */
23: /* notice, this list of conditions and the following disclaimer. */
24: /* 2. Redistributions in binary form must reproduce the above copyright */
25: /* notice, this list of conditions and the following disclaimer in the */
26: /* documentation and/or other materials provided with the distribution. */
27:
28: /* Neither the name of the University nor the names of its contributors */
29: /* may be used to endorse or promote products derived from this software */
30: /* without specific prior written permission. */
31:
32: /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
33: /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
34: /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
35: /* PURPOSE. */
1.1 deraadt 36:
37: #include "flexdef.h"
38: #include "parse.h"
1.10 tedu 39: extern bool tablesverify, tablesext;
40: extern int trlcontxt; /* Set in parse.y for each rule. */
41: extern const char *escaped_qstart, *escaped_qend;
1.1 deraadt 42:
43: #define ACTION_ECHO add_action( yytext )
44: #define ACTION_IFDEF(def, should_define) \
45: { \
46: if ( should_define ) \
47: action_define( def, 1 ); \
48: }
49:
1.10 tedu 50: #define ACTION_ECHO_QSTART add_action (escaped_qstart)
51: #define ACTION_ECHO_QEND add_action (escaped_qend)
52:
53: #define ACTION_M4_IFDEF(def, should_define) \
54: do{ \
55: if ( should_define ) \
56: buf_m4_define( &m4defs_buf, def, NULL);\
57: else \
58: buf_m4_undefine( &m4defs_buf, def);\
59: } while(0)
60:
1.1 deraadt 61: #define MARK_END_OF_PROLOG mark_prolog();
62:
63: #define YY_DECL \
64: int flexscan()
65:
66: #define RETURNCHAR \
67: yylval = (unsigned char) yytext[0]; \
68: return CHAR;
69:
70: #define RETURNNAME \
1.10 tedu 71: if(yyleng < MAXLINE) \
72: { \
73: strlcpy( nmstr, yytext, sizeof nmstr ); \
74: } \
75: else \
76: { \
77: synerr(_("Input line too long\n")); \
78: FLEX_EXIT(EXIT_FAILURE); \
79: } \
1.1 deraadt 80: return NAME;
81:
82: #define PUT_BACK_STRING(str, start) \
83: for ( i = strlen( str ) - 1; i >= start; --i ) \
84: unput((str)[i])
85:
86: #define CHECK_REJECT(str) \
87: if ( all_upper( str ) ) \
88: reject = true;
89:
90: #define CHECK_YYMORE(str) \
91: if ( all_lower( str ) ) \
92: yymore_used = true;
1.10 tedu 93:
94: #define YY_USER_INIT \
95: if ( getenv("POSIXLY_CORRECT") ) \
96: posix_compat = true;
97:
1.1 deraadt 98: %}
99:
1.10 tedu 100: %option caseless nodefault stack noyy_top_state
1.1 deraadt 101: %option nostdinit
102:
103: %x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE
104: %x FIRSTCCL CCL ACTION RECOVER COMMENT ACTION_STRING PERCENT_BRACE_ACTION
1.10 tedu 105: %x OPTION LINEDIR CODEBLOCK_MATCH_BRACE
106: %x GROUP_WITH_PARAMS
107: %x GROUP_MINUS_PARAMS
108: %x EXTENDED_COMMENT
109: %x COMMENT_DISCARD
1.1 deraadt 110:
111: WS [[:blank:]]+
112: OPTWS [[:blank:]]*
1.10 tedu 113: NOT_WS [^[:blank:]\r\n]
1.1 deraadt 114:
115: NL \r?\n
116:
117: NAME ([[:alpha:]_][[:alnum:]_-]*)
118: NOT_NAME [^[:alpha:]_*\n]+
119:
120: SCNAME {NAME}
121:
122: ESCSEQ (\\([^\n]|[0-7]{1,3}|x[[:xdigit:]]{1,2}))
123:
124: FIRST_CCL_CHAR ([^\\\n]|{ESCSEQ})
125: CCL_CHAR ([^\\\n\]]|{ESCSEQ})
1.10 tedu 126: CCL_EXPR ("[:"^?[[:alpha:]]+":]")
1.1 deraadt 127:
128: LEXOPT [aceknopr]
129:
1.10 tedu 130: M4QSTART "[["
131: M4QEND "]]"
132:
1.1 deraadt 133: %%
134: static int bracelevel, didadef, indented_code;
135: static int doing_rule_action = false;
136: static int option_sense;
137:
138: int doing_codeblock = false;
1.10 tedu 139: int i, brace_depth=0, brace_start_line=0;
140: Char nmdef[MAXLINE];
1.1 deraadt 141:
142:
143: <INITIAL>{
144: ^{WS} indented_code = true; BEGIN(CODEBLOCK);
145: ^"/*" ACTION_ECHO; yy_push_state( COMMENT );
146: ^#{OPTWS}line{WS} yy_push_state( LINEDIR );
147: ^"%s"{NAME}? return SCDECL;
148: ^"%x"{NAME}? return XSCDECL;
149: ^"%{".*{NL} {
150: ++linenum;
151: line_directive_out( (FILE *) 0, 1 );
152: indented_code = false;
153: BEGIN(CODEBLOCK);
154: }
1.10 tedu 155: ^"%top"[[:blank:]]*"{"[[:blank:]]*{NL} {
156: brace_start_line = linenum;
157: ++linenum;
158: buf_linedir( &top_buf, infilename?infilename:"<stdin>", linenum);
159: brace_depth = 1;
160: yy_push_state(CODEBLOCK_MATCH_BRACE);
161: }
162:
163: ^"%top".* synerr( _("malformed '%top' directive") );
1.1 deraadt 164:
165: {WS} /* discard */
166:
167: ^"%%".* {
168: sectnum = 2;
169: bracelevel = 0;
170: mark_defs1();
171: line_directive_out( (FILE *) 0, 1 );
172: BEGIN(SECT2PROLOG);
173: return SECTEND;
174: }
175:
176: ^"%pointer".*{NL} yytext_is_array = false; ++linenum;
177: ^"%array".*{NL} yytext_is_array = true; ++linenum;
178:
179: ^"%option" BEGIN(OPTION); return OPTION_OP;
180:
181: ^"%"{LEXOPT}{OPTWS}[[:digit:]]*{OPTWS}{NL} ++linenum; /* ignore */
182: ^"%"{LEXOPT}{WS}.*{NL} ++linenum; /* ignore */
183:
1.10 tedu 184: /* xgettext: no-c-format */
1.1 deraadt 185: ^"%"[^sxaceknopr{}].* synerr( _( "unrecognized '%' directive" ) );
186:
187: ^{NAME} {
1.10 tedu 188: if(yyleng < MAXLINE)
189: {
1.7 deraadt 190: strlcpy( nmstr, yytext, sizeof nmstr );
1.10 tedu 191: }
192: else
193: {
194: synerr( _("Definition name too long\n"));
195: FLEX_EXIT(EXIT_FAILURE);
196: }
197:
1.1 deraadt 198: didadef = false;
199: BEGIN(PICKUPDEF);
200: }
201:
202: {SCNAME} RETURNNAME;
203: ^{OPTWS}{NL} ++linenum; /* allows blank lines in section 1 */
204: {OPTWS}{NL} ACTION_ECHO; ++linenum; /* maybe end of comment line */
205: }
206:
207:
208: <COMMENT>{
209: "*/" ACTION_ECHO; yy_pop_state();
210: "*" ACTION_ECHO;
1.10 tedu 211: {M4QSTART} ACTION_ECHO_QSTART;
212: {M4QEND} ACTION_ECHO_QEND;
213: [^*\n] ACTION_ECHO;
214: {NL} ++linenum; ACTION_ECHO;
215: }
216:
217: <COMMENT_DISCARD>{
218: /* This is the same as COMMENT, but is discarded rather than output. */
219: "*/" yy_pop_state();
220: "*" ;
221: [^*\n] ;
222: {NL} ++linenum;
223: }
224:
225: <EXTENDED_COMMENT>{
226: ")" yy_pop_state();
227: [^\n\)]+ ;
228: {NL} ++linenum;
1.1 deraadt 229: }
230:
231: <LINEDIR>{
232: \n yy_pop_state();
233: [[:digit:]]+ linenum = myctoi( yytext );
234:
235: \"[^"\n]*\" {
1.11 ! tedu 236: free( (void *) infilename );
1.1 deraadt 237: infilename = copy_string( yytext + 1 );
238: infilename[strlen( infilename ) - 1] = '\0';
239: }
240: . /* ignore spurious characters */
241: }
242:
243: <CODEBLOCK>{
244: ^"%}".*{NL} ++linenum; BEGIN(INITIAL);
245:
1.10 tedu 246: {M4QSTART} ACTION_ECHO_QSTART;
247: {M4QEND} ACTION_ECHO_QEND;
248: . ACTION_ECHO;
1.1 deraadt 249:
250: {NL} {
251: ++linenum;
252: ACTION_ECHO;
253: if ( indented_code )
254: BEGIN(INITIAL);
255: }
256: }
257:
1.10 tedu 258: <CODEBLOCK_MATCH_BRACE>{
259: "}" {
260: if( --brace_depth == 0){
261: /* TODO: Matched. */
262: yy_pop_state();
263: }else
264: buf_strnappend(&top_buf, yytext, yyleng);
265: }
266:
267: "{" {
268: brace_depth++;
269: buf_strnappend(&top_buf, yytext, yyleng);
270: }
271:
272: {NL} {
273: ++linenum;
274: buf_strnappend(&top_buf, yytext, yyleng);
275: }
276:
277: {M4QSTART} buf_strnappend(&top_buf, escaped_qstart, strlen(escaped_qstart));
278: {M4QEND} buf_strnappend(&top_buf, escaped_qend, strlen(escaped_qend));
279:
280: [^{}\r\n] {
281: buf_strnappend(&top_buf, yytext, yyleng);
282: }
283:
284: <<EOF>> {
285: linenum = brace_start_line;
286: synerr(_("Unmatched '{'"));
287: yyterminate();
288: }
289: }
290:
1.1 deraadt 291:
292: <PICKUPDEF>{
293: {WS} /* separates name and definition */
294:
1.10 tedu 295: {NOT_WS}[^\r\n]* {
296: if(yyleng < MAXLINE)
297: {
298: strlcpy( (char *) nmdef, yytext, sizeof nmdef );
299: }
300: else
301: {
302: format_synerr( _("Definition value for {%s} too long\n"), nmstr);
303: FLEX_EXIT(EXIT_FAILURE);
304: }
1.1 deraadt 305: /* Skip trailing whitespace. */
306: for ( i = strlen( (char *) nmdef ) - 1;
307: i >= 0 && (nmdef[i] == ' ' || nmdef[i] == '\t');
308: --i )
309: ;
310:
311: nmdef[i + 1] = '\0';
312:
313: ndinstal( nmstr, nmdef );
314: didadef = true;
315: }
316:
317: {NL} {
318: if ( ! didadef )
319: synerr( _( "incomplete name definition" ) );
320: BEGIN(INITIAL);
321: ++linenum;
322: }
323: }
324:
325:
326: <OPTION>{
327: {NL} ++linenum; BEGIN(INITIAL);
328: {WS} option_sense = true;
329:
330: "=" return '=';
331:
332: no option_sense = ! option_sense;
333:
334: 7bit csize = option_sense ? 128 : 256;
335: 8bit csize = option_sense ? 256 : 128;
336:
337: align long_align = option_sense;
338: always-interactive {
1.10 tedu 339: ACTION_M4_IFDEF( "M4""_YY_ALWAYS_INTERACTIVE", option_sense );
340: interactive = option_sense;
1.1 deraadt 341: }
342: array yytext_is_array = option_sense;
1.10 tedu 343: ansi-definitions ansi_func_defs = option_sense;
344: ansi-prototypes ansi_func_protos = option_sense;
1.1 deraadt 345: backup backing_up_report = option_sense;
346: batch interactive = ! option_sense;
1.10 tedu 347: bison-bridge bison_bridge_lval = option_sense;
348: bison-locations { if((bison_bridge_lloc = option_sense))
349: bison_bridge_lval = true;
350: }
1.1 deraadt 351: "c++" C_plus_plus = option_sense;
1.10 tedu 352: caseful|case-sensitive sf_set_case_ins(!option_sense);
353: caseless|case-insensitive sf_set_case_ins(option_sense);
1.1 deraadt 354: debug ddebug = option_sense;
355: default spprdflt = ! option_sense;
356: ecs useecs = option_sense;
357: fast {
358: useecs = usemecs = false;
359: use_read = fullspd = true;
360: }
361: full {
362: useecs = usemecs = false;
363: use_read = fulltbl = true;
364: }
365: input ACTION_IFDEF("YY_NO_INPUT", ! option_sense);
366: interactive interactive = option_sense;
367: lex-compat lex_compat = option_sense;
1.10 tedu 368: posix-compat posix_compat = option_sense;
1.1 deraadt 369: main {
1.10 tedu 370: ACTION_M4_IFDEF( "M4""_YY_MAIN", option_sense);
371: /* Override yywrap */
372: if( option_sense == true )
373: do_yywrap = false;
1.1 deraadt 374: }
375: meta-ecs usemecs = option_sense;
376: never-interactive {
1.10 tedu 377: ACTION_M4_IFDEF( "M4""_YY_NEVER_INTERACTIVE", option_sense );
378: interactive = !option_sense;
1.1 deraadt 379: }
380: perf-report performance_report += option_sense ? 1 : -1;
381: pointer yytext_is_array = ! option_sense;
382: read use_read = option_sense;
1.10 tedu 383: reentrant reentrant = option_sense;
1.1 deraadt 384: reject reject_really_used = option_sense;
1.10 tedu 385: stack ACTION_M4_IFDEF( "M4""_YY_STACK_USED", option_sense );
1.1 deraadt 386: stdinit do_stdinit = option_sense;
387: stdout use_stdout = option_sense;
1.10 tedu 388: unistd ACTION_IFDEF("YY_NO_UNISTD_H", ! option_sense);
389: unput ACTION_M4_IFDEF("M4""_YY_NO_UNPUT", ! option_sense);
1.1 deraadt 390: verbose printstats = option_sense;
391: warn nowarn = ! option_sense;
1.10 tedu 392: yylineno do_yylineno = option_sense; ACTION_M4_IFDEF("M4""_YY_USE_LINENO", option_sense);
1.1 deraadt 393: yymore yymore_really_used = option_sense;
1.10 tedu 394: yywrap do_yywrap = option_sense;
1.1 deraadt 395:
1.10 tedu 396: yy_push_state ACTION_M4_IFDEF("M4""_YY_NO_PUSH_STATE", ! option_sense);
397: yy_pop_state ACTION_M4_IFDEF("M4""_YY_NO_POP_STATE", ! option_sense);
398: yy_top_state ACTION_M4_IFDEF("M4""_YY_NO_TOP_STATE", ! option_sense);
399:
400: yy_scan_buffer ACTION_M4_IFDEF("M4""_YY_NO_SCAN_BUFFER", ! option_sense);
401: yy_scan_bytes ACTION_M4_IFDEF("M4""_YY_NO_SCAN_BYTES", ! option_sense);
402: yy_scan_string ACTION_M4_IFDEF("M4""_YY_NO_SCAN_STRING", ! option_sense);
403:
404: yyalloc ACTION_M4_IFDEF("M4""_YY_NO_FLEX_ALLOC", ! option_sense);
405: yyrealloc ACTION_M4_IFDEF("M4""_YY_NO_FLEX_REALLOC", ! option_sense);
406: yyfree ACTION_M4_IFDEF("M4""_YY_NO_FLEX_FREE", ! option_sense);
407:
408: yyget_debug ACTION_M4_IFDEF("M4""_YY_NO_GET_DEBUG", ! option_sense);
409: yyset_debug ACTION_M4_IFDEF("M4""_YY_NO_SET_DEBUG", ! option_sense);
410: yyget_extra ACTION_M4_IFDEF("M4""_YY_NO_GET_EXTRA", ! option_sense);
411: yyset_extra ACTION_M4_IFDEF("M4""_YY_NO_SET_EXTRA", ! option_sense);
412: yyget_leng ACTION_M4_IFDEF("M4""_YY_NO_GET_LENG", ! option_sense);
413: yyget_text ACTION_M4_IFDEF("M4""_YY_NO_GET_TEXT", ! option_sense);
414: yyget_lineno ACTION_M4_IFDEF("M4""_YY_NO_GET_LINENO", ! option_sense);
415: yyset_lineno ACTION_M4_IFDEF("M4""_YY_NO_SET_LINENO", ! option_sense);
416: yyget_in ACTION_M4_IFDEF("M4""_YY_NO_GET_IN", ! option_sense);
417: yyset_in ACTION_M4_IFDEF("M4""_YY_NO_SET_IN", ! option_sense);
418: yyget_out ACTION_M4_IFDEF("M4""_YY_NO_GET_OUT", ! option_sense);
419: yyset_out ACTION_M4_IFDEF("M4""_YY_NO_SET_OUT", ! option_sense);
420: yyget_lval ACTION_M4_IFDEF("M4""_YY_NO_GET_LVAL", ! option_sense);
421: yyset_lval ACTION_M4_IFDEF("M4""_YY_NO_SET_LVAL", ! option_sense);
422: yyget_lloc ACTION_M4_IFDEF("M4""_YY_NO_GET_LLOC", ! option_sense);
423: yyset_lloc ACTION_M4_IFDEF("M4""_YY_NO_SET_LLOC", ! option_sense);
1.1 deraadt 424:
1.10 tedu 425: extra-type return OPT_EXTRA_TYPE;
1.1 deraadt 426: outfile return OPT_OUTFILE;
427: prefix return OPT_PREFIX;
428: yyclass return OPT_YYCLASS;
1.10 tedu 429: header(-file)? return OPT_HEADER;
430: tables-file return OPT_TABLES;
431: tables-verify {
432: tablesverify = option_sense;
433: if(!tablesext && option_sense)
434: tablesext = true;
435: }
436:
1.1 deraadt 437:
438: \"[^"\n]*\" {
1.10 tedu 439: if(yyleng-1 < MAXLINE)
440: {
441: strlcpy( nmstr, yytext + 1, sizeof nmstr );
442: }
443: else
444: {
445: synerr( _("Option line too long\n"));
446: FLEX_EXIT(EXIT_FAILURE);
447: }
448: if (nmstr[strlen( nmstr ) - 1] == '"')
449: nmstr[strlen( nmstr ) - 1] = '\0';
1.1 deraadt 450: return NAME;
451: }
452:
453: (([a-mo-z]|n[a-np-z])[[:alpha:]\-+]*)|. {
454: format_synerr( _( "unrecognized %%option: %s" ),
455: yytext );
456: BEGIN(RECOVER);
457: }
458: }
459:
460: <RECOVER>.*{NL} ++linenum; BEGIN(INITIAL);
461:
462:
463: <SECT2PROLOG>{
464: ^"%{".* ++bracelevel; yyless( 2 ); /* eat only %{ */
465: ^"%}".* --bracelevel; yyless( 2 ); /* eat only %} */
466:
467: ^{WS}.* ACTION_ECHO; /* indented code in prolog */
468:
469: ^{NOT_WS}.* { /* non-indented code */
470: if ( bracelevel <= 0 )
471: { /* not in %{ ... %} */
472: yyless( 0 ); /* put it all back */
473: yy_set_bol( 1 );
474: mark_prolog();
475: BEGIN(SECT2);
476: }
477: else
478: ACTION_ECHO;
479: }
480:
1.10 tedu 481: . ACTION_ECHO;
1.1 deraadt 482: {NL} ++linenum; ACTION_ECHO;
483:
484: <<EOF>> {
485: mark_prolog();
486: sectnum = 0;
487: yyterminate(); /* to stop the parser */
488: }
489: }
490:
491: <SECT2>{
492: ^{OPTWS}{NL} ++linenum; /* allow blank lines in section 2 */
493:
494: ^{OPTWS}"%{" {
495: indented_code = false;
496: doing_codeblock = true;
497: bracelevel = 1;
498: BEGIN(PERCENT_BRACE_ACTION);
499: }
500:
1.10 tedu 501: ^{OPTWS}"<" {
502: /* Allow "<" to appear in (?x) patterns. */
503: if (!sf_skip_ws())
504: BEGIN(SC);
505: return '<';
506: }
1.1 deraadt 507: ^{OPTWS}"^" return '^';
508: \" BEGIN(QUOTE); return '"';
1.10 tedu 509: "{"/[[:digit:]] {
510: BEGIN(NUM);
511: if ( lex_compat || posix_compat )
512: return BEGIN_REPEAT_POSIX;
513: else
514: return BEGIN_REPEAT_FLEX;
515: }
1.1 deraadt 516: "$"/([[:blank:]]|{NL}) return '$';
517:
518: {WS}"%{" {
519: bracelevel = 1;
520: BEGIN(PERCENT_BRACE_ACTION);
521:
522: if ( in_rule )
523: {
524: doing_rule_action = true;
525: in_rule = false;
526: return '\n';
527: }
528: }
1.10 tedu 529: {WS}"|".*{NL} {
530: if (sf_skip_ws()){
531: /* We're in the middle of a (?x: ) pattern. */
532: /* Push back everything starting at the "|" */
533: size_t amt;
534: amt = strchr (yytext, '|') - yytext;
535: yyless(amt);
536: }
537: else {
538: continued_action = true;
539: ++linenum;
540: return '\n';
541: }
542: }
1.1 deraadt 543:
544: ^{WS}"/*" {
1.10 tedu 545:
546: if (sf_skip_ws()){
547: /* We're in the middle of a (?x: ) pattern. */
548: yy_push_state(COMMENT_DISCARD);
549: }
550: else{
551: yyless( yyleng - 2 ); /* put back '/', '*' */
552: bracelevel = 0;
553: continued_action = false;
554: BEGIN(ACTION);
555: }
1.1 deraadt 556: }
557:
1.10 tedu 558: ^{WS} /* allow indented rules */ ;
1.1 deraadt 559:
560: {WS} {
1.10 tedu 561: if (sf_skip_ws()){
562: /* We're in the middle of a (?x: ) pattern. */
563: }
564: else{
565: /* This rule is separate from the one below because
566: * otherwise we get variable trailing context, so
567: * we can't build the scanner using -{f,F}.
568: */
569: bracelevel = 0;
570: continued_action = false;
571: BEGIN(ACTION);
572:
573: if ( in_rule )
574: {
575: doing_rule_action = true;
576: in_rule = false;
577: return '\n';
578: }
579: }
1.1 deraadt 580: }
581:
582: {OPTWS}{NL} {
1.10 tedu 583: if (sf_skip_ws()){
584: /* We're in the middle of a (?x: ) pattern. */
585: ++linenum;
586: }
587: else{
588: bracelevel = 0;
589: continued_action = false;
590: BEGIN(ACTION);
591: unput( '\n' ); /* so <ACTION> sees it */
592:
593: if ( in_rule )
594: {
595: doing_rule_action = true;
596: in_rule = false;
597: return '\n';
598: }
599: }
1.1 deraadt 600: }
601:
602: ^{OPTWS}"<<EOF>>" |
603: "<<EOF>>" return EOF_OP;
604:
605: ^"%%".* {
606: sectnum = 3;
607: BEGIN(SECT3);
1.10 tedu 608: outn("/* Begin user sect3 */");
1.1 deraadt 609: yyterminate(); /* to stop the parser */
610: }
611:
612: "["({FIRST_CCL_CHAR}|{CCL_EXPR})({CCL_CHAR}|{CCL_EXPR})* {
613: int cclval;
614:
1.10 tedu 615: if(yyleng < MAXLINE)
616: {
617: strlcpy( nmstr, yytext, sizeof nmstr );
618: }
619: else
620: {
621: synerr( _("Input line too long\n"));
622: FLEX_EXIT(EXIT_FAILURE);
623: }
1.1 deraadt 624:
625: /* Check to see if we've already encountered this
626: * ccl.
627: */
1.10 tedu 628: if (0 /* <--- This "0" effectively disables the reuse of a
629: * character class (purely based on its source text).
630: * The reason it was disabled is so yacc/bison can parse
631: * ccl operations, such as ccl difference and union.
632: */
633: && (cclval = ccllookup( (Char *) nmstr )) != 0 )
1.1 deraadt 634: {
635: if ( input() != ']' )
636: synerr( _( "bad character class" ) );
637:
638: yylval = cclval;
639: ++cclreuse;
640: return PREVCCL;
641: }
642: else
643: {
644: /* We fudge a bit. We know that this ccl will
645: * soon be numbered as lastccl + 1 by cclinit.
646: */
647: cclinstal( (Char *) nmstr, lastccl + 1 );
648:
649: /* Push back everything but the leading bracket
650: * so the ccl can be rescanned.
651: */
652: yyless( 1 );
653:
654: BEGIN(FIRSTCCL);
655: return '[';
656: }
657: }
1.10 tedu 658: "{-}" return CCL_OP_DIFF;
659: "{+}" return CCL_OP_UNION;
660:
1.1 deraadt 661:
1.10 tedu 662: /* Check for :space: at the end of the rule so we don't
663: * wrap the expanded regex in '(' ')' -- breaking trailing
664: * context.
665: */
666: "{"{NAME}"}"[[:space:]]? {
667: Char *nmdefptr;
668: int end_is_ws, end_ch;
1.1 deraadt 669:
1.10 tedu 670: end_ch = yytext[yyleng-1];
671: end_is_ws = end_ch != '}' ? 1 : 0;
672:
673: if(yyleng-1 < MAXLINE)
674: {
1.7 deraadt 675: strlcpy( nmstr, yytext + 1, sizeof nmstr );
1.10 tedu 676: }
677: else
678: {
679: synerr( _("Input line too long\n"));
680: FLEX_EXIT(EXIT_FAILURE);
681: }
682: nmstr[yyleng - 2 - end_is_ws] = '\0'; /* chop trailing brace */
1.1 deraadt 683:
684: if ( (nmdefptr = ndlookup( nmstr )) == 0 )
685: format_synerr(
686: _( "undefined definition {%s}" ),
687: nmstr );
688:
689: else
690: { /* push back name surrounded by ()'s */
691: int len = strlen( (char *) nmdefptr );
1.10 tedu 692: if (end_is_ws)
693: unput(end_ch);
1.1 deraadt 694:
695: if ( lex_compat || nmdefptr[0] == '^' ||
1.10 tedu 696: (len > 0 && nmdefptr[len - 1] == '$')
697: || (end_is_ws && trlcontxt && !sf_skip_ws()))
1.1 deraadt 698: { /* don't use ()'s after all */
699: PUT_BACK_STRING((char *) nmdefptr, 0);
700:
701: if ( nmdefptr[0] == '^' )
702: BEGIN(CARETISBOL);
703: }
704:
705: else
706: {
707: unput(')');
708: PUT_BACK_STRING((char *) nmdefptr, 0);
709: unput('(');
710: }
711: }
712: }
713:
1.10 tedu 714: "/*" {
715: if (sf_skip_ws())
716: yy_push_state(COMMENT_DISCARD);
717: else{
718: /* Push back the "*" and return "/" as usual. */
719: yyless(1);
720: return '/';
721: }
722: }
723:
724: "(?#" {
725: if (lex_compat || posix_compat){
726: /* Push back the "?#" and treat it like a normal parens. */
727: yyless(1);
728: sf_push();
729: return '(';
730: }
731: else
732: yy_push_state(EXTENDED_COMMENT);
733: }
734: "(?" {
735: sf_push();
736: if (lex_compat || posix_compat)
737: /* Push back the "?" and treat it like a normal parens. */
738: yyless(1);
739: else
740: BEGIN(GROUP_WITH_PARAMS);
741: return '(';
742: }
743: "(" sf_push(); return '(';
744: ")" sf_pop(); return ')';
745:
1.1 deraadt 746: [/|*+?.(){}] return (unsigned char) yytext[0];
747: . RETURNCHAR;
748: }
749:
750:
751: <SC>{
1.10 tedu 752: {OPTWS}{NL}{OPTWS} ++linenum; /* Allow blank lines & continuations */
1.1 deraadt 753: [,*] return (unsigned char) yytext[0];
754: ">" BEGIN(SECT2); return '>';
755: ">"/^ BEGIN(CARETISBOL); return '>';
756: {SCNAME} RETURNNAME;
757: . {
758: format_synerr( _( "bad <start condition>: %s" ),
759: yytext );
760: }
761: }
762:
763: <CARETISBOL>"^" BEGIN(SECT2); return '^';
764:
765:
766: <QUOTE>{
767: [^"\n] RETURNCHAR;
768: \" BEGIN(SECT2); return '"';
769:
770: {NL} {
771: synerr( _( "missing quote" ) );
772: BEGIN(SECT2);
773: ++linenum;
774: return '"';
775: }
776: }
777:
1.10 tedu 778: <GROUP_WITH_PARAMS>{
779: ":" BEGIN(SECT2);
780: "-" BEGIN(GROUP_MINUS_PARAMS);
781: i sf_set_case_ins(1);
782: s sf_set_dot_all(1);
783: x sf_set_skip_ws(1);
784: }
785: <GROUP_MINUS_PARAMS>{
786: ":" BEGIN(SECT2);
787: i sf_set_case_ins(0);
788: s sf_set_dot_all(0);
789: x sf_set_skip_ws(0);
790: }
1.1 deraadt 791:
792: <FIRSTCCL>{
793: "^"/[^-\]\n] BEGIN(CCL); return '^';
794: "^"/("-"|"]") return '^';
795: . BEGIN(CCL); RETURNCHAR;
796: }
797:
798: <CCL>{
799: -/[^\]\n] return '-';
800: [^\]\n] RETURNCHAR;
801: "]" BEGIN(SECT2); return ']';
802: .|{NL} {
803: synerr( _( "bad character class" ) );
804: BEGIN(SECT2);
805: return ']';
806: }
807: }
808:
809: <FIRSTCCL,CCL>{
810: "[:alnum:]" BEGIN(CCL); return CCE_ALNUM;
811: "[:alpha:]" BEGIN(CCL); return CCE_ALPHA;
812: "[:blank:]" BEGIN(CCL); return CCE_BLANK;
813: "[:cntrl:]" BEGIN(CCL); return CCE_CNTRL;
814: "[:digit:]" BEGIN(CCL); return CCE_DIGIT;
815: "[:graph:]" BEGIN(CCL); return CCE_GRAPH;
816: "[:lower:]" BEGIN(CCL); return CCE_LOWER;
817: "[:print:]" BEGIN(CCL); return CCE_PRINT;
818: "[:punct:]" BEGIN(CCL); return CCE_PUNCT;
819: "[:space:]" BEGIN(CCL); return CCE_SPACE;
820: "[:upper:]" BEGIN(CCL); return CCE_UPPER;
821: "[:xdigit:]" BEGIN(CCL); return CCE_XDIGIT;
1.10 tedu 822:
823: "[:^alnum:]" BEGIN(CCL); return CCE_NEG_ALNUM;
824: "[:^alpha:]" BEGIN(CCL); return CCE_NEG_ALPHA;
825: "[:^blank:]" BEGIN(CCL); return CCE_NEG_BLANK;
826: "[:^cntrl:]" BEGIN(CCL); return CCE_NEG_CNTRL;
827: "[:^digit:]" BEGIN(CCL); return CCE_NEG_DIGIT;
828: "[:^graph:]" BEGIN(CCL); return CCE_NEG_GRAPH;
829: "[:^lower:]" BEGIN(CCL); return CCE_NEG_LOWER;
830: "[:^print:]" BEGIN(CCL); return CCE_NEG_PRINT;
831: "[:^punct:]" BEGIN(CCL); return CCE_NEG_PUNCT;
832: "[:^space:]" BEGIN(CCL); return CCE_NEG_SPACE;
833: "[:^upper:]" BEGIN(CCL); return CCE_NEG_UPPER;
834: "[:^xdigit:]" BEGIN(CCL); return CCE_NEG_XDIGIT;
1.1 deraadt 835: {CCL_EXPR} {
836: format_synerr(
837: _( "bad character class expression: %s" ),
838: yytext );
839: BEGIN(CCL); return CCE_ALNUM;
840: }
841: }
842:
843: <NUM>{
844: [[:digit:]]+ {
845: yylval = myctoi( yytext );
846: return NUMBER;
847: }
848:
849: "," return ',';
1.10 tedu 850: "}" {
851: BEGIN(SECT2);
852: if ( lex_compat || posix_compat )
853: return END_REPEAT_POSIX;
854: else
855: return END_REPEAT_FLEX;
856: }
1.1 deraadt 857:
858: . {
859: synerr( _( "bad character inside {}'s" ) );
860: BEGIN(SECT2);
861: return '}';
862: }
863:
864: {NL} {
865: synerr( _( "missing }" ) );
866: BEGIN(SECT2);
867: ++linenum;
868: return '}';
869: }
870: }
871:
872:
873: <PERCENT_BRACE_ACTION>{
874: {OPTWS}"%}".* bracelevel = 0;
875:
876: <ACTION>"/*" ACTION_ECHO; yy_push_state( COMMENT );
877:
878: <CODEBLOCK,ACTION>{
879: "reject" {
880: ACTION_ECHO;
881: CHECK_REJECT(yytext);
882: }
883: "yymore" {
884: ACTION_ECHO;
885: CHECK_YYMORE(yytext);
886: }
887: }
888:
1.10 tedu 889: {M4QSTART} ACTION_ECHO_QSTART;
890: {M4QEND} ACTION_ECHO_QEND;
891: . ACTION_ECHO;
1.1 deraadt 892: {NL} {
893: ++linenum;
894: ACTION_ECHO;
895: if ( bracelevel == 0 ||
896: (doing_codeblock && indented_code) )
897: {
898: if ( doing_rule_action )
899: add_action( "\tYY_BREAK\n" );
900:
901: doing_rule_action = doing_codeblock = false;
902: BEGIN(SECT2);
903: }
904: }
905: }
906:
907:
908: /* Reject and YYmore() are checked for above, in PERCENT_BRACE_ACTION */
909: <ACTION>{
910: "{" ACTION_ECHO; ++bracelevel;
911: "}" ACTION_ECHO; --bracelevel;
1.10 tedu 912: {M4QSTART} ACTION_ECHO_QSTART;
913: {M4QEND} ACTION_ECHO_QEND;
914: [^[:alpha:]_{}"'/\n\[\]]+ ACTION_ECHO;
915: [\[\]] ACTION_ECHO;
1.1 deraadt 916: {NAME} ACTION_ECHO;
917: "'"([^'\\\n]|\\.)*"'" ACTION_ECHO; /* character constant */
918: \" ACTION_ECHO; BEGIN(ACTION_STRING);
919: {NL} {
920: ++linenum;
921: ACTION_ECHO;
922: if ( bracelevel == 0 )
923: {
924: if ( doing_rule_action )
925: add_action( "\tYY_BREAK\n" );
926:
927: doing_rule_action = false;
928: BEGIN(SECT2);
929: }
930: }
931: . ACTION_ECHO;
932: }
933:
934: <ACTION_STRING>{
935: [^"\\\n]+ ACTION_ECHO;
936: \\. ACTION_ECHO;
1.10 tedu 937: {NL} ++linenum; ACTION_ECHO; BEGIN(ACTION);
1.1 deraadt 938: \" ACTION_ECHO; BEGIN(ACTION);
939: . ACTION_ECHO;
940: }
941:
1.10 tedu 942: <COMMENT,COMMENT_DISCARD,ACTION,ACTION_STRING><<EOF>> {
1.1 deraadt 943: synerr( _( "EOF encountered inside an action" ) );
944: yyterminate();
945: }
946:
1.10 tedu 947: <EXTENDED_COMMENT,GROUP_WITH_PARAMS,GROUP_MINUS_PARAMS><<EOF>> {
948: synerr( _( "EOF encountered inside pattern" ) );
949: yyterminate();
950: }
1.1 deraadt 951:
952: <SECT2,QUOTE,FIRSTCCL,CCL>{ESCSEQ} {
953: yylval = myesc( (Char *) yytext );
954:
955: if ( YY_START == FIRSTCCL )
956: BEGIN(CCL);
957:
958: return CHAR;
959: }
960:
961:
962: <SECT3>{
1.10 tedu 963: {M4QSTART} fwrite (escaped_qstart, 1, strlen(escaped_qstart), yyout);
964: {M4QEND} fwrite (escaped_qend, 1, strlen(escaped_qend), yyout);
965: [^\[\]\n]*(\n?) ECHO;
966: (.|\n) ECHO;
1.1 deraadt 967: <<EOF>> sectnum = 0; yyterminate();
968: }
969:
970: <*>.|\n format_synerr( _( "bad character: %s" ), yytext );
971:
972: %%
973:
974:
975: int yywrap()
976: {
977: if ( --num_input_files > 0 )
978: {
979: set_input_file( *++input_files );
980: return 0;
981: }
982:
983: else
984: return 1;
985: }
986:
987:
988: /* set_input_file - open the given file (if NULL, stdin) for scanning */
989:
990: void set_input_file( file )
991: char *file;
992: {
993: if ( file && strcmp( file, "-" ) )
994: {
995: infilename = copy_string( file );
996: yyin = fopen( infilename, "r" );
997:
998: if ( yyin == NULL )
999: lerrsf( _( "can't open %s" ), file );
1000: }
1001:
1002: else
1003: {
1004: yyin = stdin;
1005: infilename = copy_string( "<stdin>" );
1006: }
1007:
1008: linenum = 1;
1009: }
1010: