Annotation of src/usr.bin/awk/awkgram.y, Revision 1.14
1.14 ! millert 1: /* $OpenBSD: awkgram.y,v 1.13 2020/06/10 21:05:02 millert Exp $ */
1.1 tholo 2: /****************************************************************
1.4 kstailey 3: Copyright (C) Lucent Technologies 1997
1.1 tholo 4: All Rights Reserved
5:
6: Permission to use, copy, modify, and distribute this software and
7: its documentation for any purpose and without fee is hereby
8: granted, provided that the above copyright notice appear in all
9: copies and that both that the copyright notice and this
10: permission notice and warranty disclaimer appear in supporting
1.4 kstailey 11: documentation, and that the name Lucent Technologies or any of
12: its entities not be used in advertising or publicity pertaining
13: to distribution of the software without specific, written prior
14: permission.
15:
16: LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17: INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18: IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19: SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20: WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21: IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22: ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23: THIS SOFTWARE.
1.1 tholo 24: ****************************************************************/
25:
26: %{
27: #include <stdio.h>
28: #include <string.h>
29: #include "awk.h"
30:
31: void checkdup(Node *list, Cell *item);
32: int yywrap(void) { return(1); }
33:
34: Node *beginloc = 0;
35: Node *endloc = 0;
1.12 millert 36: bool infunc = false; /* = true if in arglist or body of func */
37: int inloop = 0; /* >= 1 if in while, for, do; can't be bool, since loops can next */
1.1 tholo 38: char *curfname = 0; /* current function name */
39: Node *arglist = 0; /* list of args for current function */
40: %}
41:
42: %union {
43: Node *p;
44: Cell *cp;
1.3 millert 45: int i;
1.1 tholo 46: char *s;
47: }
48:
49: %token <i> FIRSTTOKEN /* must be first */
50: %token <p> PROGRAM PASTAT PASTAT2 XBEGIN XEND
51: %token <i> NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']'
52: %token <i> ARRAY
53: %token <i> MATCH NOTMATCH MATCHOP
1.13 millert 54: %token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE ZERO
1.1 tholo 55: %token <i> AND BOR APPEND EQ GE GT LE LT NE IN
1.11 millert 56: %token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC
1.14 ! millert 57: %token <i> GENSUB SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE
1.1 tholo 58: %token <i> ADD MINUS MULT DIVIDE MOD
59: %token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ
60: %token <i> PRINT PRINTF SPRINTF
61: %token <p> ELSE INTEST CONDEXPR
62: %token <i> POSTINCR PREINCR POSTDECR PREDECR
1.4 kstailey 63: %token <cp> VAR IVAR VARNF CALL NUMBER STRING
1.1 tholo 64: %token <s> REGEXPR
65:
66: %type <p> pas pattern ppattern plist pplist patlist prarg term re
67: %type <p> pa_pat pa_stat pa_stats
68: %type <s> reg_expr
69: %type <p> simple_stmt opt_simple_stmt stmt stmtlist
70: %type <p> var varname funcname varlist
71: %type <p> for if else while
72: %type <i> do st
73: %type <i> pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor
74: %type <i> subop print
1.11 millert 75: %type <cp> string
1.1 tholo 76:
77: %right ASGNOP
78: %right '?'
79: %right ':'
80: %left BOR
81: %left AND
82: %left GETLINE
83: %nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|'
1.11 millert 84: %left ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC
1.1 tholo 85: %left GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER
86: %left PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR
87: %left REGEXPR VAR VARNF IVAR WHILE '('
88: %left CAT
89: %left '+' '-'
90: %left '*' '/' '%'
1.10 millert 91: %left NOT UMINUS UPLUS
1.1 tholo 92: %right POWER
93: %right DECR INCR
94: %left INDIRECT
95: %token LASTTOKEN /* must be last */
96:
97: %%
98:
99: program:
100: pas { if (errorflag==0)
101: winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); }
1.6 millert 102: | error { yyclearin; bracecheck(); SYNTAX("bailing out"); }
1.1 tholo 103: ;
104:
105: and:
106: AND | and NL
107: ;
108:
109: bor:
110: BOR | bor NL
111: ;
112:
113: comma:
114: ',' | comma NL
115: ;
116:
117: do:
118: DO | do NL
119: ;
120:
121: else:
122: ELSE | else NL
123: ;
124:
125: for:
126: FOR '(' opt_simple_stmt ';' opt_nl pattern ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
127: { --inloop; $$ = stat4(FOR, $3, notnull($6), $9, $12); }
128: | FOR '(' opt_simple_stmt ';' ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
129: { --inloop; $$ = stat4(FOR, $3, NIL, $7, $10); }
130: | FOR '(' varname IN varname rparen {inloop++;} stmt
131: { --inloop; $$ = stat3(IN, $3, makearr($5), $8); }
132: ;
133:
134: funcname:
135: VAR { setfname($1); }
136: | CALL { setfname($1); }
137: ;
138:
139: if:
140: IF '(' pattern rparen { $$ = notnull($3); }
141: ;
142:
143: lbrace:
144: '{' | lbrace NL
145: ;
146:
147: nl:
148: NL | nl NL
149: ;
150:
151: opt_nl:
152: /* empty */ { $$ = 0; }
153: | nl
154: ;
155:
156: opt_pst:
157: /* empty */ { $$ = 0; }
158: | pst
159: ;
160:
161:
162: opt_simple_stmt:
163: /* empty */ { $$ = 0; }
164: | simple_stmt
165: ;
166:
167: pas:
168: opt_pst { $$ = 0; }
169: | opt_pst pa_stats opt_pst { $$ = $2; }
170: ;
171:
172: pa_pat:
173: pattern { $$ = notnull($1); }
174: ;
175:
176: pa_stat:
177: pa_pat { $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); }
178: | pa_pat lbrace stmtlist '}' { $$ = stat2(PASTAT, $1, $3); }
1.9 millert 179: | pa_pat ',' opt_nl pa_pat { $$ = pa2stat($1, $4, stat2(PRINT, rectonode(), NIL)); }
180: | pa_pat ',' opt_nl pa_pat lbrace stmtlist '}' { $$ = pa2stat($1, $4, $6); }
1.1 tholo 181: | lbrace stmtlist '}' { $$ = stat2(PASTAT, NIL, $2); }
182: | XBEGIN lbrace stmtlist '}'
183: { beginloc = linkum(beginloc, $3); $$ = 0; }
184: | XEND lbrace stmtlist '}'
185: { endloc = linkum(endloc, $3); $$ = 0; }
1.12 millert 186: | FUNC funcname '(' varlist rparen {infunc = true;} lbrace stmtlist '}'
187: { infunc = false; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; }
1.1 tholo 188: ;
189:
190: pa_stats:
191: pa_stat
192: | pa_stats opt_pst pa_stat { $$ = linkum($1, $3); }
193: ;
194:
195: patlist:
196: pattern
197: | patlist comma pattern { $$ = linkum($1, $3); }
198: ;
199:
200: ppattern:
201: var ASGNOP ppattern { $$ = op2($2, $1, $3); }
202: | ppattern '?' ppattern ':' ppattern %prec '?'
203: { $$ = op3(CONDEXPR, notnull($1), $3, $5); }
204: | ppattern bor ppattern %prec BOR
205: { $$ = op2(BOR, notnull($1), notnull($3)); }
206: | ppattern and ppattern %prec AND
207: { $$ = op2(AND, notnull($1), notnull($3)); }
208: | ppattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
209: | ppattern MATCHOP ppattern
210: { if (constnode($3))
211: $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
212: else
213: $$ = op3($2, (Node *)1, $1, $3); }
214: | ppattern IN varname { $$ = op2(INTEST, $1, makearr($3)); }
215: | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); }
216: | ppattern term %prec CAT { $$ = op2(CAT, $1, $2); }
217: | re
218: | term
219: ;
220:
221: pattern:
222: var ASGNOP pattern { $$ = op2($2, $1, $3); }
223: | pattern '?' pattern ':' pattern %prec '?'
224: { $$ = op3(CONDEXPR, notnull($1), $3, $5); }
225: | pattern bor pattern %prec BOR
226: { $$ = op2(BOR, notnull($1), notnull($3)); }
227: | pattern and pattern %prec AND
228: { $$ = op2(AND, notnull($1), notnull($3)); }
229: | pattern EQ pattern { $$ = op2($2, $1, $3); }
230: | pattern GE pattern { $$ = op2($2, $1, $3); }
231: | pattern GT pattern { $$ = op2($2, $1, $3); }
232: | pattern LE pattern { $$ = op2($2, $1, $3); }
233: | pattern LT pattern { $$ = op2($2, $1, $3); }
234: | pattern NE pattern { $$ = op2($2, $1, $3); }
235: | pattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
236: | pattern MATCHOP pattern
237: { if (constnode($3))
238: $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
239: else
240: $$ = op3($2, (Node *)1, $1, $3); }
241: | pattern IN varname { $$ = op2(INTEST, $1, makearr($3)); }
242: | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); }
1.11 millert 243: | pattern '|' GETLINE var {
1.6 millert 244: if (safe) SYNTAX("cmd | getline is unsafe");
1.5 millert 245: else $$ = op3(GETLINE, $4, itonp($2), $1); }
1.11 millert 246: | pattern '|' GETLINE {
1.6 millert 247: if (safe) SYNTAX("cmd | getline is unsafe");
1.5 millert 248: else $$ = op3(GETLINE, (Node*)0, itonp($2), $1); }
1.1 tholo 249: | pattern term %prec CAT { $$ = op2(CAT, $1, $2); }
250: | re
251: | term
252: ;
253:
254: plist:
255: pattern comma pattern { $$ = linkum($1, $3); }
256: | plist comma pattern { $$ = linkum($1, $3); }
257: ;
258:
259: pplist:
260: ppattern
261: | pplist comma ppattern { $$ = linkum($1, $3); }
262: ;
263:
264: prarg:
265: /* empty */ { $$ = rectonode(); }
266: | pplist
267: | '(' plist ')' { $$ = $2; }
268: ;
269:
270: print:
271: PRINT | PRINTF
272: ;
273:
274: pst:
275: NL | ';' | pst NL | pst ';'
276: ;
277:
278: rbrace:
279: '}' | rbrace NL
280: ;
281:
282: re:
283: reg_expr
284: { $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); }
285: | NOT re { $$ = op1(NOT, notnull($2)); }
286: ;
287:
288: reg_expr:
289: '/' {startreg();} REGEXPR '/' { $$ = $3; }
290: ;
291:
292: rparen:
293: ')' | rparen NL
294: ;
295:
296: simple_stmt:
1.11 millert 297: print prarg '|' term {
1.6 millert 298: if (safe) SYNTAX("print | is unsafe");
1.5 millert 299: else $$ = stat3($1, $2, itonp($3), $4); }
1.4 kstailey 300: | print prarg APPEND term {
1.6 millert 301: if (safe) SYNTAX("print >> is unsafe");
1.5 millert 302: else $$ = stat3($1, $2, itonp($3), $4); }
1.4 kstailey 303: | print prarg GT term {
1.6 millert 304: if (safe) SYNTAX("print > is unsafe");
1.5 millert 305: else $$ = stat3($1, $2, itonp($3), $4); }
1.1 tholo 306: | print prarg { $$ = stat3($1, $2, NIL, NIL); }
307: | DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); }
308: | DELETE varname { $$ = stat2(DELETE, makearr($2), 0); }
309: | pattern { $$ = exptostat($1); }
1.6 millert 310: | error { yyclearin; SYNTAX("illegal statement"); }
1.1 tholo 311: ;
312:
313: st:
314: nl
315: | ';' opt_nl
316: ;
317:
318: stmt:
1.6 millert 319: BREAK st { if (!inloop) SYNTAX("break illegal outside of loops");
1.1 tholo 320: $$ = stat1(BREAK, NIL); }
1.6 millert 321: | CONTINUE st { if (!inloop) SYNTAX("continue illegal outside of loops");
1.1 tholo 322: $$ = stat1(CONTINUE, NIL); }
323: | do {inloop++;} stmt {--inloop;} WHILE '(' pattern ')' st
324: { $$ = stat2(DO, $3, notnull($7)); }
325: | EXIT pattern st { $$ = stat1(EXIT, $2); }
326: | EXIT st { $$ = stat1(EXIT, NIL); }
327: | for
328: | if stmt else stmt { $$ = stat3(IF, $1, $2, $4); }
329: | if stmt { $$ = stat3(IF, $1, $2, NIL); }
330: | lbrace stmtlist rbrace { $$ = $2; }
331: | NEXT st { if (infunc)
1.6 millert 332: SYNTAX("next is illegal inside a function");
1.1 tholo 333: $$ = stat1(NEXT, NIL); }
334: | NEXTFILE st { if (infunc)
1.6 millert 335: SYNTAX("nextfile is illegal inside a function");
1.1 tholo 336: $$ = stat1(NEXTFILE, NIL); }
337: | RETURN pattern st { $$ = stat1(RETURN, $2); }
338: | RETURN st { $$ = stat1(RETURN, NIL); }
339: | simple_stmt st
340: | while {inloop++;} stmt { --inloop; $$ = stat2(WHILE, $1, $3); }
341: | ';' opt_nl { $$ = 0; }
342: ;
343:
344: stmtlist:
345: stmt
346: | stmtlist stmt { $$ = linkum($1, $2); }
347: ;
348:
349: subop:
350: SUB | GSUB
351: ;
352:
1.11 millert 353: string:
354: STRING
355: | string STRING { $$ = catstr($1, $2); }
356: ;
357:
1.1 tholo 358: term:
1.5 millert 359: term '/' ASGNOP term { $$ = op2(DIVEQ, $1, $4); }
360: | term '+' term { $$ = op2(ADD, $1, $3); }
1.1 tholo 361: | term '-' term { $$ = op2(MINUS, $1, $3); }
362: | term '*' term { $$ = op2(MULT, $1, $3); }
363: | term '/' term { $$ = op2(DIVIDE, $1, $3); }
364: | term '%' term { $$ = op2(MOD, $1, $3); }
365: | term POWER term { $$ = op2(POWER, $1, $3); }
366: | '-' term %prec UMINUS { $$ = op1(UMINUS, $2); }
1.10 millert 367: | '+' term %prec UMINUS { $$ = op1(UPLUS, $2); }
1.1 tholo 368: | NOT term %prec UMINUS { $$ = op1(NOT, notnull($2)); }
1.5 millert 369: | BLTIN '(' ')' { $$ = op2(BLTIN, itonp($1), rectonode()); }
370: | BLTIN '(' patlist ')' { $$ = op2(BLTIN, itonp($1), $3); }
371: | BLTIN { $$ = op2(BLTIN, itonp($1), rectonode()); }
1.4 kstailey 372: | CALL '(' ')' { $$ = op2(CALL, celltonode($1,CVAR), NIL); }
373: | CALL '(' patlist ')' { $$ = op2(CALL, celltonode($1,CVAR), $3); }
1.7 millert 374: | CLOSE term { $$ = op1(CLOSE, $2); }
1.1 tholo 375: | DECR var { $$ = op1(PREDECR, $2); }
376: | INCR var { $$ = op1(PREINCR, $2); }
377: | var DECR { $$ = op1(POSTDECR, $1); }
378: | var INCR { $$ = op1(POSTINCR, $1); }
1.14 ! millert 379: | GENSUB '(' reg_expr comma pattern comma pattern ')'
! 380: { $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, rectonode()); }
! 381: | GENSUB '(' pattern comma pattern comma pattern ')'
! 382: { if (constnode($3))
! 383: $$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3), 1), $5, $7, rectonode());
! 384: else
! 385: $$ = op5(GENSUB, (Node *)1, $3, $5, $7, rectonode());
! 386: }
! 387: | GENSUB '(' reg_expr comma pattern comma pattern comma pattern ')'
! 388: { $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, $9); }
! 389: | GENSUB '(' pattern comma pattern comma pattern comma pattern ')'
! 390: { if (constnode($3))
! 391: $$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3),1), $5,$7,$9);
! 392: else
! 393: $$ = op5(GENSUB, (Node *)1, $3, $5, $7, $9);
! 394: }
1.5 millert 395: | GETLINE var LT term { $$ = op3(GETLINE, $2, itonp($3), $4); }
396: | GETLINE LT term { $$ = op3(GETLINE, NIL, itonp($2), $3); }
1.1 tholo 397: | GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); }
398: | GETLINE { $$ = op3(GETLINE, NIL, NIL, NIL); }
399: | INDEX '(' pattern comma pattern ')'
400: { $$ = op2(INDEX, $3, $5); }
401: | INDEX '(' pattern comma reg_expr ')'
1.6 millert 402: { SYNTAX("index() doesn't permit regular expressions");
1.1 tholo 403: $$ = op2(INDEX, $3, (Node*)$5); }
404: | '(' pattern ')' { $$ = $2; }
405: | MATCHFCN '(' pattern comma reg_expr ')'
406: { $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); }
407: | MATCHFCN '(' pattern comma pattern ')'
408: { if (constnode($5))
409: $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1));
410: else
411: $$ = op3(MATCHFCN, (Node *)1, $3, $5); }
1.4 kstailey 412: | NUMBER { $$ = celltonode($1, CCON); }
1.1 tholo 413: | SPLIT '(' pattern comma varname comma pattern ')' /* string */
414: { $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); }
415: | SPLIT '(' pattern comma varname comma reg_expr ')' /* const /regexp/ */
416: { $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); }
417: | SPLIT '(' pattern comma varname ')'
418: { $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); } /* default */
419: | SPRINTF '(' patlist ')' { $$ = op1($1, $3); }
1.11 millert 420: | string { $$ = celltonode($1, CCON); }
1.1 tholo 421: | subop '(' reg_expr comma pattern ')'
422: { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); }
423: | subop '(' pattern comma pattern ')'
424: { if (constnode($3))
425: $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode());
426: else
427: $$ = op4($1, (Node *)1, $3, $5, rectonode()); }
428: | subop '(' reg_expr comma pattern comma var ')'
429: { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); }
430: | subop '(' pattern comma pattern comma var ')'
431: { if (constnode($3))
432: $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7);
433: else
434: $$ = op4($1, (Node *)1, $3, $5, $7); }
435: | SUBSTR '(' pattern comma pattern comma pattern ')'
436: { $$ = op3(SUBSTR, $3, $5, $7); }
437: | SUBSTR '(' pattern comma pattern ')'
438: { $$ = op3(SUBSTR, $3, $5, NIL); }
439: | var
440: ;
441:
442: var:
443: varname
444: | varname '[' patlist ']' { $$ = op2(ARRAY, makearr($1), $3); }
1.4 kstailey 445: | IVAR { $$ = op1(INDIRECT, celltonode($1, CVAR)); }
1.1 tholo 446: | INDIRECT term { $$ = op1(INDIRECT, $2); }
1.11 millert 447: ;
1.1 tholo 448:
449: varlist:
450: /* nothing */ { arglist = $$ = 0; }
1.4 kstailey 451: | VAR { arglist = $$ = celltonode($1,CVAR); }
1.1 tholo 452: | varlist comma VAR {
453: checkdup($1, $3);
1.4 kstailey 454: arglist = $$ = linkum($1,celltonode($3,CVAR)); }
1.1 tholo 455: ;
456:
457: varname:
1.4 kstailey 458: VAR { $$ = celltonode($1, CVAR); }
1.5 millert 459: | ARG { $$ = op1(ARG, itonp($1)); }
1.1 tholo 460: | VARNF { $$ = op1(VARNF, (Node *) $1); }
461: ;
462:
463:
464: while:
465: WHILE '(' pattern rparen { $$ = notnull($3); }
466: ;
467:
468: %%
469:
470: void setfname(Cell *p)
471: {
472: if (isarr(p))
1.6 millert 473: SYNTAX("%s is an array, not a function", p->nval);
1.4 kstailey 474: else if (isfcn(p))
1.6 millert 475: SYNTAX("you can't define function %s more than once", p->nval);
1.1 tholo 476: curfname = p->nval;
477: }
478:
479: int constnode(Node *p)
480: {
481: return isvalue(p) && ((Cell *) (p->narg[0]))->csub == CCON;
482: }
483:
484: char *strnode(Node *p)
485: {
486: return ((Cell *)(p->narg[0]))->sval;
487: }
488:
489: Node *notnull(Node *n)
490: {
491: switch (n->nobj) {
492: case LE: case LT: case EQ: case NE: case GT: case GE:
493: case BOR: case AND: case NOT:
494: return n;
495: default:
496: return op2(NE, n, nullnode);
497: }
498: }
499:
500: void checkdup(Node *vl, Cell *cp) /* check if name already in list */
501: {
502: char *s = cp->nval;
503: for ( ; vl; vl = vl->nnext) {
504: if (strcmp(s, ((Cell *)(vl->narg[0]))->nval) == 0) {
1.6 millert 505: SYNTAX("duplicate argument %s", s);
1.1 tholo 506: break;
507: }
508: }
509: }