Annotation of src/usr.bin/awk/awkgram.y, Revision 1.3
1.1 tholo 1: /****************************************************************
2: Copyright (C) AT&T and Lucent Technologies 1996
3: All Rights Reserved
4:
5: Permission to use, copy, modify, and distribute this software and
6: its documentation for any purpose and without fee is hereby
7: granted, provided that the above copyright notice appear in all
8: copies and that both that the copyright notice and this
9: permission notice and warranty disclaimer appear in supporting
10: documentation, and that the names of AT&T or Lucent Technologies
11: or any of their entities not be used in advertising or publicity
12: pertaining to distribution of the software without specific,
13: written prior permission.
14:
15: AT&T AND LUCENT DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
16: SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
17: FITNESS. IN NO EVENT SHALL AT&T OR LUCENT OR ANY OF THEIR
18: ENTITIES BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
19: DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
20: DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
21: OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE
22: USE OR PERFORMANCE OF THIS SOFTWARE.
23: ****************************************************************/
24:
25: %{
26: #include <stdio.h>
27: #include <string.h>
28: #include "awk.h"
29:
30: void checkdup(Node *list, Cell *item);
31: int yywrap(void) { return(1); }
32:
33: Node *beginloc = 0;
34: Node *endloc = 0;
35: int infunc = 0; /* = 1 if in arglist or body of func */
36: int inloop = 0; /* = 1 if in while, for, do */
37: char *curfname = 0; /* current function name */
38: Node *arglist = 0; /* list of args for current function */
39: %}
40:
41: %union {
42: Node *p;
43: Cell *cp;
1.3 ! millert 44: int i;
1.1 tholo 45: char *s;
46: }
47:
48: %token <i> FIRSTTOKEN /* must be first */
49: %token <p> PROGRAM PASTAT PASTAT2 XBEGIN XEND
50: %token <i> NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']'
51: %token <i> ARRAY
52: %token <i> MATCH NOTMATCH MATCHOP
53: %token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS
54: %token <i> AND BOR APPEND EQ GE GT LE LT NE IN
55: %token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC
56: %token <i> SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE
57: %token <i> ADD MINUS MULT DIVIDE MOD
58: %token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ
59: %token <i> PRINT PRINTF SPRINTF
60: %token <p> ELSE INTEST CONDEXPR
61: %token <i> POSTINCR PREINCR POSTDECR PREDECR
62: %token <cp> VAR IVAR VARNF CALL NUMBER STRING FIELD
63: %token <s> REGEXPR
64:
65: %type <p> pas pattern ppattern plist pplist patlist prarg term re
66: %type <p> pa_pat pa_stat pa_stats
67: %type <s> reg_expr
68: %type <p> simple_stmt opt_simple_stmt stmt stmtlist
69: %type <p> var varname funcname varlist
70: %type <p> for if else while
71: %type <i> do st
72: %type <i> pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor
73: %type <i> subop print
74:
75: %right ASGNOP
76: %right '?'
77: %right ':'
78: %left BOR
79: %left AND
80: %left GETLINE
81: %nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|'
82: %left ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FIELD FUNC
83: %left GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER
84: %left PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR
85: %left REGEXPR VAR VARNF IVAR WHILE '('
86: %left CAT
87: %left '+' '-'
88: %left '*' '/' '%'
89: %left NOT UMINUS
90: %right POWER
91: %right DECR INCR
92: %left INDIRECT
93: %token LASTTOKEN /* must be last */
94:
95: %%
96:
97: program:
98: pas { if (errorflag==0)
99: winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); }
100: | error { yyclearin; bracecheck(); ERROR "bailing out" SYNTAX; }
101: ;
102:
103: and:
104: AND | and NL
105: ;
106:
107: bor:
108: BOR | bor NL
109: ;
110:
111: comma:
112: ',' | comma NL
113: ;
114:
115: do:
116: DO | do NL
117: ;
118:
119: else:
120: ELSE | else NL
121: ;
122:
123: for:
124: FOR '(' opt_simple_stmt ';' opt_nl pattern ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
125: { --inloop; $$ = stat4(FOR, $3, notnull($6), $9, $12); }
126: | FOR '(' opt_simple_stmt ';' ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
127: { --inloop; $$ = stat4(FOR, $3, NIL, $7, $10); }
128: | FOR '(' varname IN varname rparen {inloop++;} stmt
129: { --inloop; $$ = stat3(IN, $3, makearr($5), $8); }
130: ;
131:
132: funcname:
133: VAR { setfname($1); }
134: | CALL { setfname($1); }
135: ;
136:
137: if:
138: IF '(' pattern rparen { $$ = notnull($3); }
139: ;
140:
141: lbrace:
142: '{' | lbrace NL
143: ;
144:
145: nl:
146: NL | nl NL
147: ;
148:
149: opt_nl:
150: /* empty */ { $$ = 0; }
151: | nl
152: ;
153:
154: opt_pst:
155: /* empty */ { $$ = 0; }
156: | pst
157: ;
158:
159:
160: opt_simple_stmt:
161: /* empty */ { $$ = 0; }
162: | simple_stmt
163: ;
164:
165: pas:
166: opt_pst { $$ = 0; }
167: | opt_pst pa_stats opt_pst { $$ = $2; }
168: ;
169:
170: pa_pat:
171: pattern { $$ = notnull($1); }
172: ;
173:
174: pa_stat:
175: pa_pat { $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); }
176: | pa_pat lbrace stmtlist '}' { $$ = stat2(PASTAT, $1, $3); }
177: | pa_pat ',' pa_pat { $$ = pa2stat($1, $3, stat2(PRINT, rectonode(), NIL)); }
178: | pa_pat ',' pa_pat lbrace stmtlist '}' { $$ = pa2stat($1, $3, $5); }
179: | lbrace stmtlist '}' { $$ = stat2(PASTAT, NIL, $2); }
180: | XBEGIN lbrace stmtlist '}'
181: { beginloc = linkum(beginloc, $3); $$ = 0; }
182: | XEND lbrace stmtlist '}'
183: { endloc = linkum(endloc, $3); $$ = 0; }
184: | FUNC funcname '(' varlist rparen {infunc++;} lbrace stmtlist '}'
185: { infunc--; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; }
186: ;
187:
188: pa_stats:
189: pa_stat
190: | pa_stats opt_pst pa_stat { $$ = linkum($1, $3); }
191: ;
192:
193: patlist:
194: pattern
195: | patlist comma pattern { $$ = linkum($1, $3); }
196: ;
197:
198: ppattern:
199: var ASGNOP ppattern { $$ = op2($2, $1, $3); }
200: | ppattern '?' ppattern ':' ppattern %prec '?'
201: { $$ = op3(CONDEXPR, notnull($1), $3, $5); }
202: | ppattern bor ppattern %prec BOR
203: { $$ = op2(BOR, notnull($1), notnull($3)); }
204: | ppattern and ppattern %prec AND
205: { $$ = op2(AND, notnull($1), notnull($3)); }
206: | ppattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
207: | ppattern MATCHOP ppattern
208: { if (constnode($3))
209: $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
210: else
211: $$ = op3($2, (Node *)1, $1, $3); }
212: | ppattern IN varname { $$ = op2(INTEST, $1, makearr($3)); }
213: | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); }
214: | ppattern term %prec CAT { $$ = op2(CAT, $1, $2); }
215: | re
216: | term
217: ;
218:
219: pattern:
220: var ASGNOP pattern { $$ = op2($2, $1, $3); }
221: | pattern '?' pattern ':' pattern %prec '?'
222: { $$ = op3(CONDEXPR, notnull($1), $3, $5); }
223: | pattern bor pattern %prec BOR
224: { $$ = op2(BOR, notnull($1), notnull($3)); }
225: | pattern and pattern %prec AND
226: { $$ = op2(AND, notnull($1), notnull($3)); }
227: | pattern EQ pattern { $$ = op2($2, $1, $3); }
228: | pattern GE pattern { $$ = op2($2, $1, $3); }
229: | pattern GT pattern { $$ = op2($2, $1, $3); }
230: | pattern LE pattern { $$ = op2($2, $1, $3); }
231: | pattern LT pattern { $$ = op2($2, $1, $3); }
232: | pattern NE pattern { $$ = op2($2, $1, $3); }
233: | pattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
234: | pattern MATCHOP pattern
235: { if (constnode($3))
236: $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
237: else
238: $$ = op3($2, (Node *)1, $1, $3); }
239: | pattern IN varname { $$ = op2(INTEST, $1, makearr($3)); }
240: | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); }
241: | pattern '|' GETLINE var { $$ = op3(GETLINE, $4, (Node*)$2, $1); }
242: | pattern '|' GETLINE { $$ = op3(GETLINE, (Node*)0, (Node*)$2, $1); }
243: | pattern term %prec CAT { $$ = op2(CAT, $1, $2); }
244: | re
245: | term
246: ;
247:
248: plist:
249: pattern comma pattern { $$ = linkum($1, $3); }
250: | plist comma pattern { $$ = linkum($1, $3); }
251: ;
252:
253: pplist:
254: ppattern
255: | pplist comma ppattern { $$ = linkum($1, $3); }
256: ;
257:
258: prarg:
259: /* empty */ { $$ = rectonode(); }
260: | pplist
261: | '(' plist ')' { $$ = $2; }
262: ;
263:
264: print:
265: PRINT | PRINTF
266: ;
267:
268: pst:
269: NL | ';' | pst NL | pst ';'
270: ;
271:
272: rbrace:
273: '}' | rbrace NL
274: ;
275:
276: re:
277: reg_expr
278: { $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); }
279: | NOT re { $$ = op1(NOT, notnull($2)); }
280: ;
281:
282: reg_expr:
283: '/' {startreg();} REGEXPR '/' { $$ = $3; }
284: ;
285:
286: rparen:
287: ')' | rparen NL
288: ;
289:
290: simple_stmt:
291: print prarg '|' term { $$ = stat3($1, $2, (Node *) $3, $4); }
292: | print prarg APPEND term { $$ = stat3($1, $2, (Node *) $3, $4); }
293: | print prarg GT term { $$ = stat3($1, $2, (Node *) $3, $4); }
294: | print prarg { $$ = stat3($1, $2, NIL, NIL); }
295: | DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); }
296: | DELETE varname { $$ = stat2(DELETE, makearr($2), 0); }
297: | pattern { $$ = exptostat($1); }
298: | error { yyclearin; ERROR "illegal statement" SYNTAX; }
299: ;
300:
301: st:
302: nl
303: | ';' opt_nl
304: ;
305:
306: stmt:
307: BREAK st { if (!inloop) ERROR "break illegal outside of loops" SYNTAX;
308: $$ = stat1(BREAK, NIL); }
309: | CLOSE pattern st { $$ = stat1(CLOSE, $2); }
310: | CONTINUE st { if (!inloop) ERROR "continue illegal outside of loops" SYNTAX;
311: $$ = stat1(CONTINUE, NIL); }
312: | do {inloop++;} stmt {--inloop;} WHILE '(' pattern ')' st
313: { $$ = stat2(DO, $3, notnull($7)); }
314: | EXIT pattern st { $$ = stat1(EXIT, $2); }
315: | EXIT st { $$ = stat1(EXIT, NIL); }
316: | for
317: | if stmt else stmt { $$ = stat3(IF, $1, $2, $4); }
318: | if stmt { $$ = stat3(IF, $1, $2, NIL); }
319: | lbrace stmtlist rbrace { $$ = $2; }
320: | NEXT st { if (infunc)
321: ERROR "next is illegal inside a function" SYNTAX;
322: $$ = stat1(NEXT, NIL); }
323: | NEXTFILE st { if (infunc)
324: ERROR "nextfile is illegal inside a function" SYNTAX;
325: $$ = stat1(NEXTFILE, NIL); }
326: | RETURN pattern st { $$ = stat1(RETURN, $2); }
327: | RETURN st { $$ = stat1(RETURN, NIL); }
328: | simple_stmt st
329: | while {inloop++;} stmt { --inloop; $$ = stat2(WHILE, $1, $3); }
330: | ';' opt_nl { $$ = 0; }
331: ;
332:
333: stmtlist:
334: stmt
335: | stmtlist stmt { $$ = linkum($1, $2); }
336: ;
337:
338: subop:
339: SUB | GSUB
340: ;
341:
342: term:
343: term '+' term { $$ = op2(ADD, $1, $3); }
344: | term '-' term { $$ = op2(MINUS, $1, $3); }
345: | term '*' term { $$ = op2(MULT, $1, $3); }
346: | term '/' term { $$ = op2(DIVIDE, $1, $3); }
347: | term '%' term { $$ = op2(MOD, $1, $3); }
348: | term POWER term { $$ = op2(POWER, $1, $3); }
349: | '-' term %prec UMINUS { $$ = op1(UMINUS, $2); }
350: | '+' term %prec UMINUS { $$ = $2; }
351: | NOT term %prec UMINUS { $$ = op1(NOT, notnull($2)); }
352: | BLTIN '(' ')' { $$ = op2(BLTIN, (Node *) $1, rectonode()); }
353: | BLTIN '(' patlist ')' { $$ = op2(BLTIN, (Node *) $1, $3); }
354: | BLTIN { $$ = op2(BLTIN, (Node *) $1, rectonode()); }
355: | CALL '(' ')' { $$ = op2(CALL, valtonode($1,CVAR), NIL); }
356: | CALL '(' patlist ')' { $$ = op2(CALL, valtonode($1,CVAR), $3); }
357: | DECR var { $$ = op1(PREDECR, $2); }
358: | INCR var { $$ = op1(PREINCR, $2); }
359: | var DECR { $$ = op1(POSTDECR, $1); }
360: | var INCR { $$ = op1(POSTINCR, $1); }
361: | GETLINE var LT term { $$ = op3(GETLINE, $2, (Node *)$3, $4); }
362: | GETLINE LT term { $$ = op3(GETLINE, NIL, (Node *)$2, $3); }
363: | GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); }
364: | GETLINE { $$ = op3(GETLINE, NIL, NIL, NIL); }
365: | INDEX '(' pattern comma pattern ')'
366: { $$ = op2(INDEX, $3, $5); }
367: | INDEX '(' pattern comma reg_expr ')'
368: { ERROR "index() doesn't permit regular expressions" SYNTAX;
369: $$ = op2(INDEX, $3, (Node*)$5); }
370: | '(' pattern ')' { $$ = $2; }
371: | MATCHFCN '(' pattern comma reg_expr ')'
372: { $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); }
373: | MATCHFCN '(' pattern comma pattern ')'
374: { if (constnode($5))
375: $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1));
376: else
377: $$ = op3(MATCHFCN, (Node *)1, $3, $5); }
378: | NUMBER { $$ = valtonode($1, CCON); }
379: | SPLIT '(' pattern comma varname comma pattern ')' /* string */
380: { $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); }
381: | SPLIT '(' pattern comma varname comma reg_expr ')' /* const /regexp/ */
382: { $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); }
383: | SPLIT '(' pattern comma varname ')'
384: { $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); } /* default */
385: | SPRINTF '(' patlist ')' { $$ = op1($1, $3); }
386: | STRING { $$ = valtonode($1, CCON); }
387: | subop '(' reg_expr comma pattern ')'
388: { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); }
389: | subop '(' pattern comma pattern ')'
390: { if (constnode($3))
391: $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode());
392: else
393: $$ = op4($1, (Node *)1, $3, $5, rectonode()); }
394: | subop '(' reg_expr comma pattern comma var ')'
395: { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); }
396: | subop '(' pattern comma pattern comma var ')'
397: { if (constnode($3))
398: $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7);
399: else
400: $$ = op4($1, (Node *)1, $3, $5, $7); }
401: | SUBSTR '(' pattern comma pattern comma pattern ')'
402: { $$ = op3(SUBSTR, $3, $5, $7); }
403: | SUBSTR '(' pattern comma pattern ')'
404: { $$ = op3(SUBSTR, $3, $5, NIL); }
405: | var
406: ;
407:
408: var:
409: varname
410: | varname '[' patlist ']' { $$ = op2(ARRAY, makearr($1), $3); }
411: | FIELD { $$ = valtonode($1, CFLD); }
412: | IVAR { $$ = op1(INDIRECT, valtonode($1, CVAR)); }
413: | INDIRECT term { $$ = op1(INDIRECT, $2); }
414: ;
415:
416: varlist:
417: /* nothing */ { arglist = $$ = 0; }
418: | VAR { arglist = $$ = valtonode($1,CVAR); }
419: | varlist comma VAR {
420: checkdup($1, $3);
421: arglist = $$ = linkum($1,valtonode($3,CVAR)); }
422: ;
423:
424: varname:
425: VAR { $$ = valtonode($1, CVAR); }
426: | ARG { $$ = op1(ARG, (Node *) $1); }
427: | VARNF { $$ = op1(VARNF, (Node *) $1); }
428: ;
429:
430:
431: while:
432: WHILE '(' pattern rparen { $$ = notnull($3); }
433: ;
434:
435: %%
436:
437: void setfname(Cell *p)
438: {
439: if (isarr(p))
440: ERROR "%s is an array, not a function", p->nval SYNTAX;
441: else if (isfunc(p))
442: ERROR "you can't define function %s more than once", p->nval SYNTAX;
443: curfname = p->nval;
444: }
445:
446: int constnode(Node *p)
447: {
448: return isvalue(p) && ((Cell *) (p->narg[0]))->csub == CCON;
449: }
450:
451: char *strnode(Node *p)
452: {
453: return ((Cell *)(p->narg[0]))->sval;
454: }
455:
456: Node *notnull(Node *n)
457: {
458: switch (n->nobj) {
459: case LE: case LT: case EQ: case NE: case GT: case GE:
460: case BOR: case AND: case NOT:
461: return n;
462: default:
463: return op2(NE, n, nullnode);
464: }
465: }
466:
467: void checkdup(Node *vl, Cell *cp) /* check if name already in list */
468: {
469: char *s = cp->nval;
470: for ( ; vl; vl = vl->nnext) {
471: if (strcmp(s, ((Cell *)(vl->narg[0]))->nval) == 0) {
472: ERROR "duplicate argument %s", s SYNTAX;
473: break;
474: }
475: }
476: }