Annotation of src/usr.bin/awk/awklex.l, Revision 1.2
1.2 ! millert 1: %Start A str sc reg comment
1.1 tholo 2:
3: %{
4: /****************************************************************
5: Copyright (C) AT&T and Lucent Technologies 1996
6: All Rights Reserved
7:
8: Permission to use, copy, modify, and distribute this software and
9: its documentation for any purpose and without fee is hereby
10: granted, provided that the above copyright notice appear in all
11: copies and that both that the copyright notice and this
12: permission notice and warranty disclaimer appear in supporting
13: documentation, and that the names of AT&T or Lucent Technologies
14: or any of their entities not be used in advertising or publicity
15: pertaining to distribution of the software without specific,
16: written prior permission.
17:
18: AT&T AND LUCENT DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
19: SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
20: FITNESS. IN NO EVENT SHALL AT&T OR LUCENT OR ANY OF THEIR
21: ENTITIES BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
22: DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
23: DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
24: OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE
25: USE OR PERFORMANCE OF THIS SOFTWARE.
26: ****************************************************************/
27:
28: /* some of this depends on behavior of lex that
29: may not be preserved in other implementations of lex.
30: */
31:
32: #ifndef FLEX_SCANNER
33: #undef input /* defeat lex */
34: #undef unput
35: #endif /* !FLEX_SCANNER */
36:
37: #include <stdlib.h>
38: #include <string.h>
39: #include "awk.h"
40: #include "awkgram.h"
41:
42: extern YYSTYPE yylval;
43: extern int infunc;
44:
45: int lineno = 1;
46: int bracecnt = 0;
47: int brackcnt = 0;
48: int parencnt = 0;
49:
50: #define DEBUG
51: #ifdef DEBUG
52: # define RET(x) {if(dbg)printf("lex %s [%s]\n", tokname(x), yytext); return(x); }
53: #else
54: # define RET(x) return(x)
55: #endif
56:
57: #define CADD if (cadd(gs, yytext[0]) == 0) { \
58: ERROR "string/reg expr %.30s... too long", gs->cbuf SYNTAX; \
59: BEGIN A; \
60: }
61:
62: char *s;
63: Gstring *gs = 0; /* initialized in main() */
64: int cflag;
65:
66: #ifdef FLEX_SCANNER
67: static int my_input( YY_CHAR *buf, int max_size );
68:
69: #undef YY_INPUT
70: #define YY_INPUT(buf,result,max_size) result = my_input(buf, max_size);
71:
72: #undef YY_USER_INIT
73: #define YY_USER_INIT init_input_source();
74:
75: #define FIRST ((yy_start - 1) / 2)
76: #else /* FLEX_SCANNER */
77: #define FIRST (yybgin - yysvec - 1)
78: #endif /* FLEX_SCANNER */
79: %}
80:
81: A [a-zA-Z_]
82: B [a-zA-Z0-9_]
83: D [0-9]
84: O [0-7]
85: H [0-9a-fA-F]
86: WS [ \t]
87:
88: %%
89: switch (FIRST) { /* witchcraft */
90: case 0:
91: BEGIN A;
92: break;
93: case sc:
94: BEGIN A;
95: RET('}');
96: }
97:
98: <A>\n { lineno++; RET(NL); }
99: <A>#.* { ; } /* strip comments */
100: <A>{WS}+ { ; }
101: <A>; { RET(';'); }
102:
103: <A>"\\"\n { lineno++; }
104: <A>BEGIN { RET(XBEGIN); }
105: <A>END { RET(XEND); }
106: <A>func(tion)? { if (infunc) ERROR "illegal nested function" SYNTAX; RET(FUNC); }
107: <A>return { if (!infunc) ERROR "return not in function" SYNTAX; RET(RETURN); }
108: <A>"&&" { RET(AND); }
109: <A>"||" { RET(BOR); }
110: <A>"!" { RET(NOT); }
111: <A>"!=" { yylval.i = NE; RET(NE); }
112: <A>"~" { yylval.i = MATCH; RET(MATCHOP); }
113: <A>"!~" { yylval.i = NOTMATCH; RET(MATCHOP); }
114: <A>"<" { yylval.i = LT; RET(LT); }
115: <A>"<=" { yylval.i = LE; RET(LE); }
116: <A>"==" { yylval.i = EQ; RET(EQ); }
117: <A>">=" { yylval.i = GE; RET(GE); }
118: <A>">" { yylval.i = GT; RET(GT); }
119: <A>">>" { yylval.i = APPEND; RET(APPEND); }
120: <A>"++" { yylval.i = INCR; RET(INCR); }
121: <A>"--" { yylval.i = DECR; RET(DECR); }
122: <A>"+=" { yylval.i = ADDEQ; RET(ASGNOP); }
123: <A>"-=" { yylval.i = SUBEQ; RET(ASGNOP); }
124: <A>"*=" { yylval.i = MULTEQ; RET(ASGNOP); }
125: <A>"/=" { yylval.i = DIVEQ; RET(ASGNOP); }
126: <A>"%=" { yylval.i = MODEQ; RET(ASGNOP); }
127: <A>"^=" { yylval.i = POWEQ; RET(ASGNOP); }
128: <A>"**=" { yylval.i = POWEQ; RET(ASGNOP); }
129: <A>"=" { yylval.i = ASSIGN; RET(ASGNOP); }
130: <A>"**" { RET(POWER); }
131: <A>"^" { RET(POWER); }
132:
133: <A>"$"{D}+ { yylval.cp = fieldadr(atoi(yytext+1)); RET(FIELD); }
134: <A>"$NF" { unputstr("(NF)"); return(INDIRECT); }
135: <A>"$"{A}{B}* {
136: int c;
137: char *yytext_copy = strdup(yytext);
138: c = input(); unput(c); /* look for '(' or '[' */
139: if (c == '(' || c == '[' ||
140: infunc && isarg(yytext_copy+1) >= 0) {
141: unputstr(yytext_copy+1);
142: free(yytext_copy);
143: return(INDIRECT);
144: } else {
145: yylval.cp =
146: setsymtab(yytext_copy+1,"",0.0,STR|NUM,symtab);
147: free(yytext_copy);
148: RET(IVAR);
149: }
150: }
151: <A>"$" { RET(INDIRECT); }
152: <A>NF { yylval.cp = setsymtab(yytext, "", 0.0, NUM, symtab); RET(VARNF); }
153:
154: <A>({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)? {
155: yylval.cp = setsymtab(yytext, tostring(yytext), atof(yytext), CON|NUM, symtab);
156: /* should this also have STR set? */
157: RET(NUMBER); }
158:
159: <A>while { RET(WHILE); }
160: <A>for { RET(FOR); }
161: <A>do { RET(DO); }
162: <A>if { RET(IF); }
163: <A>else { RET(ELSE); }
164: <A>next { RET(NEXT); }
165: <A>nextfile { RET(NEXTFILE); }
166: <A>exit { RET(EXIT); }
167: <A>break { RET(BREAK); }
168: <A>continue { RET(CONTINUE); }
169: <A>print { yylval.i = PRINT; RET(PRINT); }
170: <A>printf { yylval.i = PRINTF; RET(PRINTF); }
171: <A>sprintf { yylval.i = SPRINTF; RET(SPRINTF); }
172: <A>split { yylval.i = SPLIT; RET(SPLIT); }
173: <A>substr { RET(SUBSTR); }
174: <A>sub { yylval.i = SUB; RET(SUB); }
175: <A>gsub { yylval.i = GSUB; RET(GSUB); }
176: <A>index { RET(INDEX); }
177: <A>match { RET(MATCHFCN); }
178: <A>in { RET(IN); }
179: <A>getline { RET(GETLINE); }
180: <A>close { RET(CLOSE); }
181: <A>delete { RET(DELETE); }
182: <A>length { yylval.i = FLENGTH; RET(BLTIN); }
183: <A>log { yylval.i = FLOG; RET(BLTIN); }
184: <A>int { yylval.i = FINT; RET(BLTIN); }
185: <A>exp { yylval.i = FEXP; RET(BLTIN); }
186: <A>sqrt { yylval.i = FSQRT; RET(BLTIN); }
187: <A>sin { yylval.i = FSIN; RET(BLTIN); }
188: <A>cos { yylval.i = FCOS; RET(BLTIN); }
189: <A>atan2 { yylval.i = FATAN; RET(BLTIN); }
190: <A>system { yylval.i = FSYSTEM; RET(BLTIN); }
191: <A>rand { yylval.i = FRAND; RET(BLTIN); }
192: <A>srand { yylval.i = FSRAND; RET(BLTIN); }
193: <A>toupper { yylval.i = FTOUPPER; RET(BLTIN); }
194: <A>tolower { yylval.i = FTOLOWER; RET(BLTIN); }
195: <A>fflush { yylval.i = FFLUSH; RET(BLTIN); }
196:
197: <A>{A}{B}* { int n, c;
198: char *yytext_copy = strdup(yytext);
199: c = input(); unput(c); /* look for '(' */
200: if (c != '(' && infunc && (n=isarg(yytext_copy)) >= 0) {
201: yylval.i = n;
202: free(yytext_copy);
203: RET(ARG);
204: } else {
205: yylval.cp = setsymtab(yytext_copy, "", 0.0, STR|NUM, symtab);
206: free(yytext_copy);
207: if (c == '(') {
208: RET(CALL);
209: } else {
210: RET(VAR);
211: }
212: }
213: }
1.2 ! millert 214: <A>\" { BEGIN str; caddreset(gs); }
1.1 tholo 215:
216: <A>"}" { if (--bracecnt < 0) ERROR "extra }" SYNTAX; BEGIN sc; RET(';'); }
217: <A>"]" { if (--brackcnt < 0) ERROR "extra ]" SYNTAX; RET(']'); }
218: <A>")" { if (--parencnt < 0) ERROR "extra )" SYNTAX; RET(')'); }
219:
220: <A>. { if (yytext[0] == '{') bracecnt++;
221: else if (yytext[0] == '[') brackcnt++;
222: else if (yytext[0] == '(') parencnt++;
223: RET(yylval.i = yytext[0]); /* everything else */ }
224:
225: <reg>\\. { cadd(gs, '\\'); cadd(gs, yytext[1]); }
226: <reg>\n { ERROR "newline in regular expression %.10s...", gs->cbuf SYNTAX; lineno++; BEGIN A; }
227: <reg>"/" { BEGIN A;
228: cadd(gs, 0);
229: yylval.s = tostring(gs->cbuf);
230: unput('/');
231: RET(REGEXPR); }
232: <reg>. { CADD; }
233:
1.2 ! millert 234: <str>\" { BEGIN A;
1.1 tholo 235: cadd(gs, 0); s = tostring(gs->cbuf);
236: cunadd(gs);
237: cadd(gs, ' '); cadd(gs, 0);
238: yylval.cp = setsymtab(gs->cbuf, s, 0.0, CON|STR, symtab);
239: RET(STRING); }
1.2 ! millert 240: <str>\n { ERROR "newline in string %.10s...", gs->cbuf SYNTAX; lineno++; BEGIN A; }
! 241: <str>"\\\"" { cadd(gs, '"'); }
! 242: <str>"\\"n { cadd(gs, '\n'); }
! 243: <str>"\\"t { cadd(gs, '\t'); }
! 244: <str>"\\"f { cadd(gs, '\f'); }
! 245: <str>"\\"r { cadd(gs, '\r'); }
! 246: <str>"\\"b { cadd(gs, '\b'); }
! 247: <str>"\\"v { cadd(gs, '\v'); } /* these ANSIisms may not be known by */
! 248: <str>"\\"a { cadd(gs, '\007'); } /* your compiler. hence 007 for bell */
! 249: <str>"\\\\" { cadd(gs, '\\'); }
! 250: <str>"\\"({O}{O}{O}|{O}{O}|{O}) { int n;
1.1 tholo 251: sscanf(yytext+1, "%o", &n); cadd(gs, n); }
1.2 ! millert 252: <str>"\\"x({H}+) { int n; /* ANSI permits any number! */
1.1 tholo 253: sscanf(yytext+2, "%x", &n); cadd(gs, n); }
1.2 ! millert 254: <str>"\\". { cadd(gs, yytext[1]); }
! 255: <str>. { CADD; }
1.1 tholo 256:
257: %%
258:
259: void startreg(void) /* start parsing a regular expression */
260: {
261: BEGIN reg;
262: caddreset(gs);
263: }
264:
265: #ifdef FLEX_SCANNER
266: static int my_input( YY_CHAR *buf, int max_size )
267: {
268: extern uschar *lexprog;
269:
270: if ( lexprog ) { /* awk '...' */
271: int num_chars = strlen( lexprog );
272: if ( num_chars > max_size )
273: {
274: num_chars = max_size;
275: strncpy( buf, lexprog, num_chars );
276: }
277: else
278: strcpy( buf, lexprog );
279: lexprog += num_chars;
280: return num_chars;
281:
282: } else { /* awk -f ... */
283: int c = pgetc();
284: if (c == EOF)
285: return 0;
286: buf[0] = c;
287: return 1;
288: }
289: }
290: #else /* FLEX_SCANNER */
291: /* input() and unput() are transcriptions of the standard lex
292: macros for input and output with additions for error message
293: printing. God help us all if someone changes how lex works.
294: */
295:
296: char ebuf[300];
297: char *ep = ebuf;
298:
299: int input(void) /* get next lexical input character */
300: {
301: int c;
302: extern char *lexprog;
303:
304: if (yysptr > yysbuf)
305: c = U(*--yysptr);
306: else if (lexprog != NULL) { /* awk '...' */
307: if ((c = *lexprog) != 0)
308: lexprog++;
309: } else /* awk -f ... */
310: c = pgetc();
311: if (c == '\n')
312: yylineno++;
313: else if (c == EOF)
314: c = 0;
315: if (ep >= ebuf + sizeof ebuf)
316: ep = ebuf;
317: return *ep++ = c;
318: }
319:
320: void unput(int c) /* put lexical character back on input */
321: {
322: yytchar = c;
323: if (yytchar == '\n')
324: yylineno--;
325: *yysptr++ = yytchar;
326: if (--ep < ebuf)
327: ep = ebuf + sizeof(ebuf) - 1;
328: }
329: #endif /* FLEX_SCANNER */
330:
331: void unputstr(char *s) /* put a string back on input */
332: {
333: int i;
334:
335: for (i = strlen(s)-1; i >= 0; i--)
336: unput(s[i]);
337: }
338:
339: int lex_input()
340: {
341: return input();
342: }
343:
344: /* growing-string code */
345:
346: const int CBUFLEN = 400;
347:
348: Gstring *newGstring()
349: {
350: Gstring *gs = (Gstring *) malloc(sizeof(Gstring));
351: char *cp = (char *) malloc(CBUFLEN);
352:
353: if (gs == 0 || cp == 0)
354: ERROR "Out of space for strings" FATAL;
355: gs->cbuf = cp;
356: gs->cmax = CBUFLEN;
357: gs->clen = 0;
358: return gs;
359: }
360:
361: char *cadd(Gstring *gs, int c) /* add one char to gs->cbuf, grow as needed */
362: {
363: if (gs->clen >= gs->cmax) { /* need to grow */
364: gs->cmax *= 4;
365: gs->cbuf = (char *) realloc((void *) gs->cbuf, gs->cmax);
366:
367: }
368: if (gs->cbuf != 0)
369: gs->cbuf[gs->clen++] = c;
370: return gs->cbuf;
371: }
372:
373: void caddreset(Gstring *gs)
374: {
375: gs->clen = 0;
376: }
377:
378: void cunadd(Gstring *gs)
379: {
380: if (gs->clen > 0)
381: gs->clen--;
382: }
383:
384: void delGstring(Gstring *gs)
385: {
386: free((void *) gs->cbuf);
387: free((void *) gs);
388: }
389:
390: #ifdef FLEX_SCANNER
391: void init_input_source(void)
392: {
393: extern int curpfile;
394: extern char *pfile[];
395:
396: if (yyin == NULL) {
397: if (pfile[curpfile] == 0)
398: return;
399: if (strcmp((char *) pfile[curpfile], "-") == 0)
400: yyin = stdin;
401: else if ((yyin = fopen((char *) pfile[curpfile], "r")) == NULL)
402: ERROR "can't open file %s", pfile[curpfile] FATAL;
403: }
404: }
405: #endif