Annotation of src/usr.bin/awk/awklex.l, Revision 1.3
1.2 millert 1: %Start A str sc reg comment
1.1 tholo 2:
3: %{
4: /****************************************************************
5: Copyright (C) AT&T and Lucent Technologies 1996
6: All Rights Reserved
7:
8: Permission to use, copy, modify, and distribute this software and
9: its documentation for any purpose and without fee is hereby
10: granted, provided that the above copyright notice appear in all
11: copies and that both that the copyright notice and this
12: permission notice and warranty disclaimer appear in supporting
13: documentation, and that the names of AT&T or Lucent Technologies
14: or any of their entities not be used in advertising or publicity
15: pertaining to distribution of the software without specific,
16: written prior permission.
17:
18: AT&T AND LUCENT DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
19: SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
20: FITNESS. IN NO EVENT SHALL AT&T OR LUCENT OR ANY OF THEIR
21: ENTITIES BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
22: DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
23: DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
24: OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE
25: USE OR PERFORMANCE OF THIS SOFTWARE.
26: ****************************************************************/
27:
28: /* some of this depends on behavior of lex that
29: may not be preserved in other implementations of lex.
30: */
31:
32: #undef input /* defeat lex */
33: #undef unput
34:
35: #include <stdlib.h>
36: #include <string.h>
37: #include "awk.h"
1.3 ! millert 38: #include "ytab.h"
1.1 tholo 39:
40: extern YYSTYPE yylval;
41: extern int infunc;
42:
43: int lineno = 1;
44: int bracecnt = 0;
45: int brackcnt = 0;
46: int parencnt = 0;
47:
48: #define DEBUG
49: #ifdef DEBUG
50: # define RET(x) {if(dbg)printf("lex %s [%s]\n", tokname(x), yytext); return(x); }
51: #else
52: # define RET(x) return(x)
53: #endif
54:
55: #define CADD if (cadd(gs, yytext[0]) == 0) { \
56: ERROR "string/reg expr %.30s... too long", gs->cbuf SYNTAX; \
57: BEGIN A; \
58: }
59:
60: char *s;
61: Gstring *gs = 0; /* initialized in main() */
62: int cflag;
63: %}
64:
65: A [a-zA-Z_]
66: B [a-zA-Z0-9_]
67: D [0-9]
68: O [0-7]
69: H [0-9a-fA-F]
70: WS [ \t]
71:
72: %%
1.3 ! millert 73: switch (yybgin-yysvec-1) { /* witchcraft */
1.1 tholo 74: case 0:
75: BEGIN A;
76: break;
77: case sc:
78: BEGIN A;
79: RET('}');
80: }
81:
82: <A>\n { lineno++; RET(NL); }
83: <A>#.* { ; } /* strip comments */
84: <A>{WS}+ { ; }
85: <A>; { RET(';'); }
86:
87: <A>"\\"\n { lineno++; }
88: <A>BEGIN { RET(XBEGIN); }
89: <A>END { RET(XEND); }
90: <A>func(tion)? { if (infunc) ERROR "illegal nested function" SYNTAX; RET(FUNC); }
91: <A>return { if (!infunc) ERROR "return not in function" SYNTAX; RET(RETURN); }
92: <A>"&&" { RET(AND); }
93: <A>"||" { RET(BOR); }
94: <A>"!" { RET(NOT); }
95: <A>"!=" { yylval.i = NE; RET(NE); }
96: <A>"~" { yylval.i = MATCH; RET(MATCHOP); }
97: <A>"!~" { yylval.i = NOTMATCH; RET(MATCHOP); }
98: <A>"<" { yylval.i = LT; RET(LT); }
99: <A>"<=" { yylval.i = LE; RET(LE); }
100: <A>"==" { yylval.i = EQ; RET(EQ); }
101: <A>">=" { yylval.i = GE; RET(GE); }
102: <A>">" { yylval.i = GT; RET(GT); }
103: <A>">>" { yylval.i = APPEND; RET(APPEND); }
104: <A>"++" { yylval.i = INCR; RET(INCR); }
105: <A>"--" { yylval.i = DECR; RET(DECR); }
106: <A>"+=" { yylval.i = ADDEQ; RET(ASGNOP); }
107: <A>"-=" { yylval.i = SUBEQ; RET(ASGNOP); }
108: <A>"*=" { yylval.i = MULTEQ; RET(ASGNOP); }
109: <A>"/=" { yylval.i = DIVEQ; RET(ASGNOP); }
110: <A>"%=" { yylval.i = MODEQ; RET(ASGNOP); }
111: <A>"^=" { yylval.i = POWEQ; RET(ASGNOP); }
112: <A>"**=" { yylval.i = POWEQ; RET(ASGNOP); }
113: <A>"=" { yylval.i = ASSIGN; RET(ASGNOP); }
114: <A>"**" { RET(POWER); }
115: <A>"^" { RET(POWER); }
116:
117: <A>"$"{D}+ { yylval.cp = fieldadr(atoi(yytext+1)); RET(FIELD); }
118: <A>"$NF" { unputstr("(NF)"); return(INDIRECT); }
1.3 ! millert 119: <A>"$"{A}{B}* { int c, n;
! 120: c = input(); unput(c);
! 121: if (c == '(' || c == '[' || (infunc && (n=isarg(yytext+1)) >= 0)) {
! 122: unputstr(yytext+1);
! 123: return(INDIRECT);
1.1 tholo 124: } else {
1.3 ! millert 125: yylval.cp = setsymtab(yytext+1, "", 0.0, STR|NUM, symtab);
! 126: RET(IVAR);
1.1 tholo 127: }
128: }
129: <A>"$" { RET(INDIRECT); }
130: <A>NF { yylval.cp = setsymtab(yytext, "", 0.0, NUM, symtab); RET(VARNF); }
131:
132: <A>({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)? {
133: yylval.cp = setsymtab(yytext, tostring(yytext), atof(yytext), CON|NUM, symtab);
134: /* should this also have STR set? */
135: RET(NUMBER); }
136:
137: <A>while { RET(WHILE); }
138: <A>for { RET(FOR); }
139: <A>do { RET(DO); }
140: <A>if { RET(IF); }
141: <A>else { RET(ELSE); }
142: <A>next { RET(NEXT); }
143: <A>nextfile { RET(NEXTFILE); }
144: <A>exit { RET(EXIT); }
145: <A>break { RET(BREAK); }
146: <A>continue { RET(CONTINUE); }
147: <A>print { yylval.i = PRINT; RET(PRINT); }
148: <A>printf { yylval.i = PRINTF; RET(PRINTF); }
149: <A>sprintf { yylval.i = SPRINTF; RET(SPRINTF); }
150: <A>split { yylval.i = SPLIT; RET(SPLIT); }
151: <A>substr { RET(SUBSTR); }
152: <A>sub { yylval.i = SUB; RET(SUB); }
153: <A>gsub { yylval.i = GSUB; RET(GSUB); }
154: <A>index { RET(INDEX); }
155: <A>match { RET(MATCHFCN); }
156: <A>in { RET(IN); }
157: <A>getline { RET(GETLINE); }
158: <A>close { RET(CLOSE); }
159: <A>delete { RET(DELETE); }
160: <A>length { yylval.i = FLENGTH; RET(BLTIN); }
161: <A>log { yylval.i = FLOG; RET(BLTIN); }
162: <A>int { yylval.i = FINT; RET(BLTIN); }
163: <A>exp { yylval.i = FEXP; RET(BLTIN); }
164: <A>sqrt { yylval.i = FSQRT; RET(BLTIN); }
165: <A>sin { yylval.i = FSIN; RET(BLTIN); }
166: <A>cos { yylval.i = FCOS; RET(BLTIN); }
167: <A>atan2 { yylval.i = FATAN; RET(BLTIN); }
168: <A>system { yylval.i = FSYSTEM; RET(BLTIN); }
169: <A>rand { yylval.i = FRAND; RET(BLTIN); }
170: <A>srand { yylval.i = FSRAND; RET(BLTIN); }
171: <A>toupper { yylval.i = FTOUPPER; RET(BLTIN); }
172: <A>tolower { yylval.i = FTOLOWER; RET(BLTIN); }
173: <A>fflush { yylval.i = FFLUSH; RET(BLTIN); }
174:
175: <A>{A}{B}* { int n, c;
176: c = input(); unput(c); /* look for '(' */
1.3 ! millert 177: if (c != '(' && infunc && (n=isarg(yytext)) >= 0) {
1.1 tholo 178: yylval.i = n;
179: RET(ARG);
180: } else {
1.3 ! millert 181: yylval.cp = setsymtab(yytext, "", 0.0, STR|NUM, symtab);
1.1 tholo 182: if (c == '(') {
183: RET(CALL);
184: } else {
185: RET(VAR);
186: }
187: }
188: }
1.2 millert 189: <A>\" { BEGIN str; caddreset(gs); }
1.1 tholo 190:
191: <A>"}" { if (--bracecnt < 0) ERROR "extra }" SYNTAX; BEGIN sc; RET(';'); }
192: <A>"]" { if (--brackcnt < 0) ERROR "extra ]" SYNTAX; RET(']'); }
193: <A>")" { if (--parencnt < 0) ERROR "extra )" SYNTAX; RET(')'); }
194:
195: <A>. { if (yytext[0] == '{') bracecnt++;
196: else if (yytext[0] == '[') brackcnt++;
197: else if (yytext[0] == '(') parencnt++;
198: RET(yylval.i = yytext[0]); /* everything else */ }
199:
200: <reg>\\. { cadd(gs, '\\'); cadd(gs, yytext[1]); }
201: <reg>\n { ERROR "newline in regular expression %.10s...", gs->cbuf SYNTAX; lineno++; BEGIN A; }
202: <reg>"/" { BEGIN A;
203: cadd(gs, 0);
204: yylval.s = tostring(gs->cbuf);
205: unput('/');
206: RET(REGEXPR); }
207: <reg>. { CADD; }
208:
1.2 millert 209: <str>\" { BEGIN A;
1.1 tholo 210: cadd(gs, 0); s = tostring(gs->cbuf);
211: cunadd(gs);
212: cadd(gs, ' '); cadd(gs, 0);
213: yylval.cp = setsymtab(gs->cbuf, s, 0.0, CON|STR, symtab);
214: RET(STRING); }
1.2 millert 215: <str>\n { ERROR "newline in string %.10s...", gs->cbuf SYNTAX; lineno++; BEGIN A; }
216: <str>"\\\"" { cadd(gs, '"'); }
217: <str>"\\"n { cadd(gs, '\n'); }
218: <str>"\\"t { cadd(gs, '\t'); }
219: <str>"\\"f { cadd(gs, '\f'); }
220: <str>"\\"r { cadd(gs, '\r'); }
221: <str>"\\"b { cadd(gs, '\b'); }
222: <str>"\\"v { cadd(gs, '\v'); } /* these ANSIisms may not be known by */
223: <str>"\\"a { cadd(gs, '\007'); } /* your compiler. hence 007 for bell */
224: <str>"\\\\" { cadd(gs, '\\'); }
225: <str>"\\"({O}{O}{O}|{O}{O}|{O}) { int n;
1.1 tholo 226: sscanf(yytext+1, "%o", &n); cadd(gs, n); }
1.2 millert 227: <str>"\\"x({H}+) { int n; /* ANSI permits any number! */
1.1 tholo 228: sscanf(yytext+2, "%x", &n); cadd(gs, n); }
1.2 millert 229: <str>"\\". { cadd(gs, yytext[1]); }
230: <str>. { CADD; }
1.1 tholo 231:
232: %%
233:
234: void startreg(void) /* start parsing a regular expression */
235: {
236: BEGIN reg;
237: caddreset(gs);
238: }
239:
240: /* input() and unput() are transcriptions of the standard lex
241: macros for input and output with additions for error message
242: printing. God help us all if someone changes how lex works.
243: */
244:
245: char ebuf[300];
246: char *ep = ebuf;
247:
248: int input(void) /* get next lexical input character */
249: {
250: int c;
251: extern char *lexprog;
252:
253: if (yysptr > yysbuf)
254: c = U(*--yysptr);
255: else if (lexprog != NULL) { /* awk '...' */
256: if ((c = *lexprog) != 0)
257: lexprog++;
258: } else /* awk -f ... */
259: c = pgetc();
260: if (c == '\n')
261: yylineno++;
262: else if (c == EOF)
263: c = 0;
264: if (ep >= ebuf + sizeof ebuf)
265: ep = ebuf;
266: return *ep++ = c;
267: }
268:
269: void unput(int c) /* put lexical character back on input */
270: {
271: yytchar = c;
272: if (yytchar == '\n')
273: yylineno--;
274: *yysptr++ = yytchar;
275: if (--ep < ebuf)
276: ep = ebuf + sizeof(ebuf) - 1;
277: }
1.3 ! millert 278:
1.1 tholo 279:
280: void unputstr(char *s) /* put a string back on input */
281: {
282: int i;
283:
284: for (i = strlen(s)-1; i >= 0; i--)
285: unput(s[i]);
286: }
287:
288: /* growing-string code */
289:
290: const int CBUFLEN = 400;
291:
292: Gstring *newGstring()
293: {
294: Gstring *gs = (Gstring *) malloc(sizeof(Gstring));
295: char *cp = (char *) malloc(CBUFLEN);
296:
297: if (gs == 0 || cp == 0)
298: ERROR "Out of space for strings" FATAL;
299: gs->cbuf = cp;
300: gs->cmax = CBUFLEN;
301: gs->clen = 0;
302: return gs;
303: }
304:
305: char *cadd(Gstring *gs, int c) /* add one char to gs->cbuf, grow as needed */
306: {
307: if (gs->clen >= gs->cmax) { /* need to grow */
308: gs->cmax *= 4;
309: gs->cbuf = (char *) realloc((void *) gs->cbuf, gs->cmax);
310:
311: }
312: if (gs->cbuf != 0)
313: gs->cbuf[gs->clen++] = c;
314: return gs->cbuf;
315: }
316:
317: void caddreset(Gstring *gs)
318: {
319: gs->clen = 0;
320: }
321:
322: void cunadd(Gstring *gs)
323: {
324: if (gs->clen > 0)
325: gs->clen--;
326: }
327:
328: void delGstring(Gstring *gs)
329: {
330: free((void *) gs->cbuf);
331: free((void *) gs);
332: }