Annotation of src/usr.bin/awk/awkgram.y, Revision 1.7
1.7 ! millert 1: /* $OpenBSD: awkgram.y,v 1.6 1999/12/08 23:09:45 millert Exp $ */
1.1 tholo 2: /****************************************************************
1.4 kstailey 3: Copyright (C) Lucent Technologies 1997
1.1 tholo 4: All Rights Reserved
5:
6: Permission to use, copy, modify, and distribute this software and
7: its documentation for any purpose and without fee is hereby
8: granted, provided that the above copyright notice appear in all
9: copies and that both that the copyright notice and this
10: permission notice and warranty disclaimer appear in supporting
1.4 kstailey 11: documentation, and that the name Lucent Technologies or any of
12: its entities not be used in advertising or publicity pertaining
13: to distribution of the software without specific, written prior
14: permission.
15:
16: LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17: INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18: IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19: SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20: WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21: IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22: ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23: THIS SOFTWARE.
1.1 tholo 24: ****************************************************************/
25:
26: %{
27: #include <stdio.h>
28: #include <string.h>
29: #include "awk.h"
30:
31: void checkdup(Node *list, Cell *item);
32: int yywrap(void) { return(1); }
33:
34: Node *beginloc = 0;
35: Node *endloc = 0;
36: int infunc = 0; /* = 1 if in arglist or body of func */
37: int inloop = 0; /* = 1 if in while, for, do */
38: char *curfname = 0; /* current function name */
39: Node *arglist = 0; /* list of args for current function */
40: %}
41:
42: %union {
43: Node *p;
44: Cell *cp;
1.3 millert 45: int i;
1.1 tholo 46: char *s;
47: }
48:
49: %token <i> FIRSTTOKEN /* must be first */
50: %token <p> PROGRAM PASTAT PASTAT2 XBEGIN XEND
51: %token <i> NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']'
52: %token <i> ARRAY
53: %token <i> MATCH NOTMATCH MATCHOP
54: %token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS
55: %token <i> AND BOR APPEND EQ GE GT LE LT NE IN
56: %token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC
57: %token <i> SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE
58: %token <i> ADD MINUS MULT DIVIDE MOD
59: %token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ
60: %token <i> PRINT PRINTF SPRINTF
61: %token <p> ELSE INTEST CONDEXPR
62: %token <i> POSTINCR PREINCR POSTDECR PREDECR
1.4 kstailey 63: %token <cp> VAR IVAR VARNF CALL NUMBER STRING
1.1 tholo 64: %token <s> REGEXPR
65:
66: %type <p> pas pattern ppattern plist pplist patlist prarg term re
67: %type <p> pa_pat pa_stat pa_stats
68: %type <s> reg_expr
69: %type <p> simple_stmt opt_simple_stmt stmt stmtlist
70: %type <p> var varname funcname varlist
71: %type <p> for if else while
72: %type <i> do st
73: %type <i> pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor
74: %type <i> subop print
75:
76: %right ASGNOP
77: %right '?'
78: %right ':'
79: %left BOR
80: %left AND
81: %left GETLINE
82: %nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|'
1.4 kstailey 83: %left ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC
1.1 tholo 84: %left GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER
85: %left PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR
86: %left REGEXPR VAR VARNF IVAR WHILE '('
87: %left CAT
88: %left '+' '-'
89: %left '*' '/' '%'
90: %left NOT UMINUS
91: %right POWER
92: %right DECR INCR
93: %left INDIRECT
94: %token LASTTOKEN /* must be last */
95:
96: %%
97:
98: program:
99: pas { if (errorflag==0)
100: winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); }
1.6 millert 101: | error { yyclearin; bracecheck(); SYNTAX("bailing out"); }
1.1 tholo 102: ;
103:
104: and:
105: AND | and NL
106: ;
107:
108: bor:
109: BOR | bor NL
110: ;
111:
112: comma:
113: ',' | comma NL
114: ;
115:
116: do:
117: DO | do NL
118: ;
119:
120: else:
121: ELSE | else NL
122: ;
123:
124: for:
125: FOR '(' opt_simple_stmt ';' opt_nl pattern ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
126: { --inloop; $$ = stat4(FOR, $3, notnull($6), $9, $12); }
127: | FOR '(' opt_simple_stmt ';' ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
128: { --inloop; $$ = stat4(FOR, $3, NIL, $7, $10); }
129: | FOR '(' varname IN varname rparen {inloop++;} stmt
130: { --inloop; $$ = stat3(IN, $3, makearr($5), $8); }
131: ;
132:
133: funcname:
134: VAR { setfname($1); }
135: | CALL { setfname($1); }
136: ;
137:
138: if:
139: IF '(' pattern rparen { $$ = notnull($3); }
140: ;
141:
142: lbrace:
143: '{' | lbrace NL
144: ;
145:
146: nl:
147: NL | nl NL
148: ;
149:
150: opt_nl:
151: /* empty */ { $$ = 0; }
152: | nl
153: ;
154:
155: opt_pst:
156: /* empty */ { $$ = 0; }
157: | pst
158: ;
159:
160:
161: opt_simple_stmt:
162: /* empty */ { $$ = 0; }
163: | simple_stmt
164: ;
165:
166: pas:
167: opt_pst { $$ = 0; }
168: | opt_pst pa_stats opt_pst { $$ = $2; }
169: ;
170:
171: pa_pat:
172: pattern { $$ = notnull($1); }
173: ;
174:
175: pa_stat:
176: pa_pat { $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); }
177: | pa_pat lbrace stmtlist '}' { $$ = stat2(PASTAT, $1, $3); }
178: | pa_pat ',' pa_pat { $$ = pa2stat($1, $3, stat2(PRINT, rectonode(), NIL)); }
179: | pa_pat ',' pa_pat lbrace stmtlist '}' { $$ = pa2stat($1, $3, $5); }
180: | lbrace stmtlist '}' { $$ = stat2(PASTAT, NIL, $2); }
181: | XBEGIN lbrace stmtlist '}'
182: { beginloc = linkum(beginloc, $3); $$ = 0; }
183: | XEND lbrace stmtlist '}'
184: { endloc = linkum(endloc, $3); $$ = 0; }
185: | FUNC funcname '(' varlist rparen {infunc++;} lbrace stmtlist '}'
186: { infunc--; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; }
187: ;
188:
189: pa_stats:
190: pa_stat
191: | pa_stats opt_pst pa_stat { $$ = linkum($1, $3); }
192: ;
193:
194: patlist:
195: pattern
196: | patlist comma pattern { $$ = linkum($1, $3); }
197: ;
198:
199: ppattern:
200: var ASGNOP ppattern { $$ = op2($2, $1, $3); }
201: | ppattern '?' ppattern ':' ppattern %prec '?'
202: { $$ = op3(CONDEXPR, notnull($1), $3, $5); }
203: | ppattern bor ppattern %prec BOR
204: { $$ = op2(BOR, notnull($1), notnull($3)); }
205: | ppattern and ppattern %prec AND
206: { $$ = op2(AND, notnull($1), notnull($3)); }
207: | ppattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
208: | ppattern MATCHOP ppattern
209: { if (constnode($3))
210: $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
211: else
212: $$ = op3($2, (Node *)1, $1, $3); }
213: | ppattern IN varname { $$ = op2(INTEST, $1, makearr($3)); }
214: | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); }
215: | ppattern term %prec CAT { $$ = op2(CAT, $1, $2); }
216: | re
217: | term
218: ;
219:
220: pattern:
221: var ASGNOP pattern { $$ = op2($2, $1, $3); }
222: | pattern '?' pattern ':' pattern %prec '?'
223: { $$ = op3(CONDEXPR, notnull($1), $3, $5); }
224: | pattern bor pattern %prec BOR
225: { $$ = op2(BOR, notnull($1), notnull($3)); }
226: | pattern and pattern %prec AND
227: { $$ = op2(AND, notnull($1), notnull($3)); }
228: | pattern EQ pattern { $$ = op2($2, $1, $3); }
229: | pattern GE pattern { $$ = op2($2, $1, $3); }
230: | pattern GT pattern { $$ = op2($2, $1, $3); }
231: | pattern LE pattern { $$ = op2($2, $1, $3); }
232: | pattern LT pattern { $$ = op2($2, $1, $3); }
233: | pattern NE pattern { $$ = op2($2, $1, $3); }
234: | pattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
235: | pattern MATCHOP pattern
236: { if (constnode($3))
237: $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
238: else
239: $$ = op3($2, (Node *)1, $1, $3); }
240: | pattern IN varname { $$ = op2(INTEST, $1, makearr($3)); }
241: | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); }
1.4 kstailey 242: | pattern '|' GETLINE var {
1.6 millert 243: if (safe) SYNTAX("cmd | getline is unsafe");
1.5 millert 244: else $$ = op3(GETLINE, $4, itonp($2), $1); }
1.4 kstailey 245: | pattern '|' GETLINE {
1.6 millert 246: if (safe) SYNTAX("cmd | getline is unsafe");
1.5 millert 247: else $$ = op3(GETLINE, (Node*)0, itonp($2), $1); }
1.1 tholo 248: | pattern term %prec CAT { $$ = op2(CAT, $1, $2); }
249: | re
250: | term
251: ;
252:
253: plist:
254: pattern comma pattern { $$ = linkum($1, $3); }
255: | plist comma pattern { $$ = linkum($1, $3); }
256: ;
257:
258: pplist:
259: ppattern
260: | pplist comma ppattern { $$ = linkum($1, $3); }
261: ;
262:
263: prarg:
264: /* empty */ { $$ = rectonode(); }
265: | pplist
266: | '(' plist ')' { $$ = $2; }
267: ;
268:
269: print:
270: PRINT | PRINTF
271: ;
272:
273: pst:
274: NL | ';' | pst NL | pst ';'
275: ;
276:
277: rbrace:
278: '}' | rbrace NL
279: ;
280:
281: re:
282: reg_expr
283: { $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); }
284: | NOT re { $$ = op1(NOT, notnull($2)); }
285: ;
286:
287: reg_expr:
288: '/' {startreg();} REGEXPR '/' { $$ = $3; }
289: ;
290:
291: rparen:
292: ')' | rparen NL
293: ;
294:
295: simple_stmt:
1.4 kstailey 296: print prarg '|' term {
1.6 millert 297: if (safe) SYNTAX("print | is unsafe");
1.5 millert 298: else $$ = stat3($1, $2, itonp($3), $4); }
1.4 kstailey 299: | print prarg APPEND term {
1.6 millert 300: if (safe) SYNTAX("print >> is unsafe");
1.5 millert 301: else $$ = stat3($1, $2, itonp($3), $4); }
1.4 kstailey 302: | print prarg GT term {
1.6 millert 303: if (safe) SYNTAX("print > is unsafe");
1.5 millert 304: else $$ = stat3($1, $2, itonp($3), $4); }
1.1 tholo 305: | print prarg { $$ = stat3($1, $2, NIL, NIL); }
306: | DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); }
307: | DELETE varname { $$ = stat2(DELETE, makearr($2), 0); }
308: | pattern { $$ = exptostat($1); }
1.6 millert 309: | error { yyclearin; SYNTAX("illegal statement"); }
1.1 tholo 310: ;
311:
312: st:
313: nl
314: | ';' opt_nl
315: ;
316:
317: stmt:
1.6 millert 318: BREAK st { if (!inloop) SYNTAX("break illegal outside of loops");
1.1 tholo 319: $$ = stat1(BREAK, NIL); }
1.6 millert 320: | CONTINUE st { if (!inloop) SYNTAX("continue illegal outside of loops");
1.1 tholo 321: $$ = stat1(CONTINUE, NIL); }
322: | do {inloop++;} stmt {--inloop;} WHILE '(' pattern ')' st
323: { $$ = stat2(DO, $3, notnull($7)); }
324: | EXIT pattern st { $$ = stat1(EXIT, $2); }
325: | EXIT st { $$ = stat1(EXIT, NIL); }
326: | for
327: | if stmt else stmt { $$ = stat3(IF, $1, $2, $4); }
328: | if stmt { $$ = stat3(IF, $1, $2, NIL); }
329: | lbrace stmtlist rbrace { $$ = $2; }
330: | NEXT st { if (infunc)
1.6 millert 331: SYNTAX("next is illegal inside a function");
1.1 tholo 332: $$ = stat1(NEXT, NIL); }
333: | NEXTFILE st { if (infunc)
1.6 millert 334: SYNTAX("nextfile is illegal inside a function");
1.1 tholo 335: $$ = stat1(NEXTFILE, NIL); }
336: | RETURN pattern st { $$ = stat1(RETURN, $2); }
337: | RETURN st { $$ = stat1(RETURN, NIL); }
338: | simple_stmt st
339: | while {inloop++;} stmt { --inloop; $$ = stat2(WHILE, $1, $3); }
340: | ';' opt_nl { $$ = 0; }
341: ;
342:
343: stmtlist:
344: stmt
345: | stmtlist stmt { $$ = linkum($1, $2); }
346: ;
347:
348: subop:
349: SUB | GSUB
350: ;
351:
352: term:
1.5 millert 353: term '/' ASGNOP term { $$ = op2(DIVEQ, $1, $4); }
354: | term '+' term { $$ = op2(ADD, $1, $3); }
1.1 tholo 355: | term '-' term { $$ = op2(MINUS, $1, $3); }
356: | term '*' term { $$ = op2(MULT, $1, $3); }
357: | term '/' term { $$ = op2(DIVIDE, $1, $3); }
358: | term '%' term { $$ = op2(MOD, $1, $3); }
359: | term POWER term { $$ = op2(POWER, $1, $3); }
360: | '-' term %prec UMINUS { $$ = op1(UMINUS, $2); }
361: | '+' term %prec UMINUS { $$ = $2; }
362: | NOT term %prec UMINUS { $$ = op1(NOT, notnull($2)); }
1.5 millert 363: | BLTIN '(' ')' { $$ = op2(BLTIN, itonp($1), rectonode()); }
364: | BLTIN '(' patlist ')' { $$ = op2(BLTIN, itonp($1), $3); }
365: | BLTIN { $$ = op2(BLTIN, itonp($1), rectonode()); }
1.4 kstailey 366: | CALL '(' ')' { $$ = op2(CALL, celltonode($1,CVAR), NIL); }
367: | CALL '(' patlist ')' { $$ = op2(CALL, celltonode($1,CVAR), $3); }
1.7 ! millert 368: | CLOSE term { $$ = op1(CLOSE, $2); }
1.1 tholo 369: | DECR var { $$ = op1(PREDECR, $2); }
370: | INCR var { $$ = op1(PREINCR, $2); }
371: | var DECR { $$ = op1(POSTDECR, $1); }
372: | var INCR { $$ = op1(POSTINCR, $1); }
1.5 millert 373: | GETLINE var LT term { $$ = op3(GETLINE, $2, itonp($3), $4); }
374: | GETLINE LT term { $$ = op3(GETLINE, NIL, itonp($2), $3); }
1.1 tholo 375: | GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); }
376: | GETLINE { $$ = op3(GETLINE, NIL, NIL, NIL); }
377: | INDEX '(' pattern comma pattern ')'
378: { $$ = op2(INDEX, $3, $5); }
379: | INDEX '(' pattern comma reg_expr ')'
1.6 millert 380: { SYNTAX("index() doesn't permit regular expressions");
1.1 tholo 381: $$ = op2(INDEX, $3, (Node*)$5); }
382: | '(' pattern ')' { $$ = $2; }
383: | MATCHFCN '(' pattern comma reg_expr ')'
384: { $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); }
385: | MATCHFCN '(' pattern comma pattern ')'
386: { if (constnode($5))
387: $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1));
388: else
389: $$ = op3(MATCHFCN, (Node *)1, $3, $5); }
1.4 kstailey 390: | NUMBER { $$ = celltonode($1, CCON); }
1.1 tholo 391: | SPLIT '(' pattern comma varname comma pattern ')' /* string */
392: { $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); }
393: | SPLIT '(' pattern comma varname comma reg_expr ')' /* const /regexp/ */
394: { $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); }
395: | SPLIT '(' pattern comma varname ')'
396: { $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); } /* default */
397: | SPRINTF '(' patlist ')' { $$ = op1($1, $3); }
1.4 kstailey 398: | STRING { $$ = celltonode($1, CCON); }
1.1 tholo 399: | subop '(' reg_expr comma pattern ')'
400: { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); }
401: | subop '(' pattern comma pattern ')'
402: { if (constnode($3))
403: $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode());
404: else
405: $$ = op4($1, (Node *)1, $3, $5, rectonode()); }
406: | subop '(' reg_expr comma pattern comma var ')'
407: { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); }
408: | subop '(' pattern comma pattern comma var ')'
409: { if (constnode($3))
410: $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7);
411: else
412: $$ = op4($1, (Node *)1, $3, $5, $7); }
413: | SUBSTR '(' pattern comma pattern comma pattern ')'
414: { $$ = op3(SUBSTR, $3, $5, $7); }
415: | SUBSTR '(' pattern comma pattern ')'
416: { $$ = op3(SUBSTR, $3, $5, NIL); }
417: | var
418: ;
419:
420: var:
421: varname
422: | varname '[' patlist ']' { $$ = op2(ARRAY, makearr($1), $3); }
1.4 kstailey 423: | IVAR { $$ = op1(INDIRECT, celltonode($1, CVAR)); }
1.1 tholo 424: | INDIRECT term { $$ = op1(INDIRECT, $2); }
425: ;
426:
427: varlist:
428: /* nothing */ { arglist = $$ = 0; }
1.4 kstailey 429: | VAR { arglist = $$ = celltonode($1,CVAR); }
1.1 tholo 430: | varlist comma VAR {
431: checkdup($1, $3);
1.4 kstailey 432: arglist = $$ = linkum($1,celltonode($3,CVAR)); }
1.1 tholo 433: ;
434:
435: varname:
1.4 kstailey 436: VAR { $$ = celltonode($1, CVAR); }
1.5 millert 437: | ARG { $$ = op1(ARG, itonp($1)); }
1.1 tholo 438: | VARNF { $$ = op1(VARNF, (Node *) $1); }
439: ;
440:
441:
442: while:
443: WHILE '(' pattern rparen { $$ = notnull($3); }
444: ;
445:
446: %%
447:
448: void setfname(Cell *p)
449: {
450: if (isarr(p))
1.6 millert 451: SYNTAX("%s is an array, not a function", p->nval);
1.4 kstailey 452: else if (isfcn(p))
1.6 millert 453: SYNTAX("you can't define function %s more than once", p->nval);
1.1 tholo 454: curfname = p->nval;
455: }
456:
457: int constnode(Node *p)
458: {
459: return isvalue(p) && ((Cell *) (p->narg[0]))->csub == CCON;
460: }
461:
462: char *strnode(Node *p)
463: {
464: return ((Cell *)(p->narg[0]))->sval;
465: }
466:
467: Node *notnull(Node *n)
468: {
469: switch (n->nobj) {
470: case LE: case LT: case EQ: case NE: case GT: case GE:
471: case BOR: case AND: case NOT:
472: return n;
473: default:
474: return op2(NE, n, nullnode);
475: }
476: }
477:
478: void checkdup(Node *vl, Cell *cp) /* check if name already in list */
479: {
480: char *s = cp->nval;
481: for ( ; vl; vl = vl->nnext) {
482: if (strcmp(s, ((Cell *)(vl->narg[0]))->nval) == 0) {
1.6 millert 483: SYNTAX("duplicate argument %s", s);
1.1 tholo 484: break;
485: }
486: }
487: }