Annotation of src/usr.bin/awk/awkgram.y, Revision 1.11
1.11 ! millert 1: /* $OpenBSD: awkgram.y,v 1.10 2020/06/10 21:00:01 millert Exp $ */
1.1 tholo 2: /****************************************************************
1.4 kstailey 3: Copyright (C) Lucent Technologies 1997
1.1 tholo 4: All Rights Reserved
5:
6: Permission to use, copy, modify, and distribute this software and
7: its documentation for any purpose and without fee is hereby
8: granted, provided that the above copyright notice appear in all
9: copies and that both that the copyright notice and this
10: permission notice and warranty disclaimer appear in supporting
1.4 kstailey 11: documentation, and that the name Lucent Technologies or any of
12: its entities not be used in advertising or publicity pertaining
13: to distribution of the software without specific, written prior
14: permission.
15:
16: LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17: INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18: IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19: SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20: WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21: IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22: ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23: THIS SOFTWARE.
1.1 tholo 24: ****************************************************************/
25:
26: %{
27: #include <stdio.h>
28: #include <string.h>
29: #include "awk.h"
30:
31: void checkdup(Node *list, Cell *item);
32: int yywrap(void) { return(1); }
33:
34: Node *beginloc = 0;
35: Node *endloc = 0;
36: int infunc = 0; /* = 1 if in arglist or body of func */
37: int inloop = 0; /* = 1 if in while, for, do */
38: char *curfname = 0; /* current function name */
39: Node *arglist = 0; /* list of args for current function */
40: %}
41:
42: %union {
43: Node *p;
44: Cell *cp;
1.3 millert 45: int i;
1.1 tholo 46: char *s;
47: }
48:
49: %token <i> FIRSTTOKEN /* must be first */
50: %token <p> PROGRAM PASTAT PASTAT2 XBEGIN XEND
51: %token <i> NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']'
52: %token <i> ARRAY
53: %token <i> MATCH NOTMATCH MATCHOP
1.8 millert 54: %token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE
1.1 tholo 55: %token <i> AND BOR APPEND EQ GE GT LE LT NE IN
1.11 ! millert 56: %token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC
1.1 tholo 57: %token <i> SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE
58: %token <i> ADD MINUS MULT DIVIDE MOD
59: %token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ
60: %token <i> PRINT PRINTF SPRINTF
61: %token <p> ELSE INTEST CONDEXPR
62: %token <i> POSTINCR PREINCR POSTDECR PREDECR
1.4 kstailey 63: %token <cp> VAR IVAR VARNF CALL NUMBER STRING
1.1 tholo 64: %token <s> REGEXPR
65:
66: %type <p> pas pattern ppattern plist pplist patlist prarg term re
67: %type <p> pa_pat pa_stat pa_stats
68: %type <s> reg_expr
69: %type <p> simple_stmt opt_simple_stmt stmt stmtlist
70: %type <p> var varname funcname varlist
71: %type <p> for if else while
72: %type <i> do st
73: %type <i> pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor
74: %type <i> subop print
1.11 ! millert 75: %type <cp> string
1.1 tholo 76:
77: %right ASGNOP
78: %right '?'
79: %right ':'
80: %left BOR
81: %left AND
82: %left GETLINE
83: %nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|'
1.11 ! millert 84: %left ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC
1.1 tholo 85: %left GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER
86: %left PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR
87: %left REGEXPR VAR VARNF IVAR WHILE '('
88: %left CAT
89: %left '+' '-'
90: %left '*' '/' '%'
1.10 millert 91: %left NOT UMINUS UPLUS
1.1 tholo 92: %right POWER
93: %right DECR INCR
94: %left INDIRECT
95: %token LASTTOKEN /* must be last */
96:
97: %%
98:
99: program:
100: pas { if (errorflag==0)
101: winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); }
1.6 millert 102: | error { yyclearin; bracecheck(); SYNTAX("bailing out"); }
1.1 tholo 103: ;
104:
105: and:
106: AND | and NL
107: ;
108:
109: bor:
110: BOR | bor NL
111: ;
112:
113: comma:
114: ',' | comma NL
115: ;
116:
117: do:
118: DO | do NL
119: ;
120:
121: else:
122: ELSE | else NL
123: ;
124:
125: for:
126: FOR '(' opt_simple_stmt ';' opt_nl pattern ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
127: { --inloop; $$ = stat4(FOR, $3, notnull($6), $9, $12); }
128: | FOR '(' opt_simple_stmt ';' ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
129: { --inloop; $$ = stat4(FOR, $3, NIL, $7, $10); }
130: | FOR '(' varname IN varname rparen {inloop++;} stmt
131: { --inloop; $$ = stat3(IN, $3, makearr($5), $8); }
132: ;
133:
134: funcname:
135: VAR { setfname($1); }
136: | CALL { setfname($1); }
137: ;
138:
139: if:
140: IF '(' pattern rparen { $$ = notnull($3); }
141: ;
142:
143: lbrace:
144: '{' | lbrace NL
145: ;
146:
147: nl:
148: NL | nl NL
149: ;
150:
151: opt_nl:
152: /* empty */ { $$ = 0; }
153: | nl
154: ;
155:
156: opt_pst:
157: /* empty */ { $$ = 0; }
158: | pst
159: ;
160:
161:
162: opt_simple_stmt:
163: /* empty */ { $$ = 0; }
164: | simple_stmt
165: ;
166:
167: pas:
168: opt_pst { $$ = 0; }
169: | opt_pst pa_stats opt_pst { $$ = $2; }
170: ;
171:
172: pa_pat:
173: pattern { $$ = notnull($1); }
174: ;
175:
176: pa_stat:
177: pa_pat { $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); }
178: | pa_pat lbrace stmtlist '}' { $$ = stat2(PASTAT, $1, $3); }
1.9 millert 179: | pa_pat ',' opt_nl pa_pat { $$ = pa2stat($1, $4, stat2(PRINT, rectonode(), NIL)); }
180: | pa_pat ',' opt_nl pa_pat lbrace stmtlist '}' { $$ = pa2stat($1, $4, $6); }
1.1 tholo 181: | lbrace stmtlist '}' { $$ = stat2(PASTAT, NIL, $2); }
182: | XBEGIN lbrace stmtlist '}'
183: { beginloc = linkum(beginloc, $3); $$ = 0; }
184: | XEND lbrace stmtlist '}'
185: { endloc = linkum(endloc, $3); $$ = 0; }
186: | FUNC funcname '(' varlist rparen {infunc++;} lbrace stmtlist '}'
187: { infunc--; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; }
188: ;
189:
190: pa_stats:
191: pa_stat
192: | pa_stats opt_pst pa_stat { $$ = linkum($1, $3); }
193: ;
194:
195: patlist:
196: pattern
197: | patlist comma pattern { $$ = linkum($1, $3); }
198: ;
199:
200: ppattern:
201: var ASGNOP ppattern { $$ = op2($2, $1, $3); }
202: | ppattern '?' ppattern ':' ppattern %prec '?'
203: { $$ = op3(CONDEXPR, notnull($1), $3, $5); }
204: | ppattern bor ppattern %prec BOR
205: { $$ = op2(BOR, notnull($1), notnull($3)); }
206: | ppattern and ppattern %prec AND
207: { $$ = op2(AND, notnull($1), notnull($3)); }
208: | ppattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
209: | ppattern MATCHOP ppattern
210: { if (constnode($3))
211: $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
212: else
213: $$ = op3($2, (Node *)1, $1, $3); }
214: | ppattern IN varname { $$ = op2(INTEST, $1, makearr($3)); }
215: | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); }
216: | ppattern term %prec CAT { $$ = op2(CAT, $1, $2); }
217: | re
218: | term
219: ;
220:
221: pattern:
222: var ASGNOP pattern { $$ = op2($2, $1, $3); }
223: | pattern '?' pattern ':' pattern %prec '?'
224: { $$ = op3(CONDEXPR, notnull($1), $3, $5); }
225: | pattern bor pattern %prec BOR
226: { $$ = op2(BOR, notnull($1), notnull($3)); }
227: | pattern and pattern %prec AND
228: { $$ = op2(AND, notnull($1), notnull($3)); }
229: | pattern EQ pattern { $$ = op2($2, $1, $3); }
230: | pattern GE pattern { $$ = op2($2, $1, $3); }
231: | pattern GT pattern { $$ = op2($2, $1, $3); }
232: | pattern LE pattern { $$ = op2($2, $1, $3); }
233: | pattern LT pattern { $$ = op2($2, $1, $3); }
234: | pattern NE pattern { $$ = op2($2, $1, $3); }
235: | pattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
236: | pattern MATCHOP pattern
237: { if (constnode($3))
238: $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
239: else
240: $$ = op3($2, (Node *)1, $1, $3); }
241: | pattern IN varname { $$ = op2(INTEST, $1, makearr($3)); }
242: | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); }
1.11 ! millert 243: | pattern '|' GETLINE var {
1.6 millert 244: if (safe) SYNTAX("cmd | getline is unsafe");
1.5 millert 245: else $$ = op3(GETLINE, $4, itonp($2), $1); }
1.11 ! millert 246: | pattern '|' GETLINE {
1.6 millert 247: if (safe) SYNTAX("cmd | getline is unsafe");
1.5 millert 248: else $$ = op3(GETLINE, (Node*)0, itonp($2), $1); }
1.1 tholo 249: | pattern term %prec CAT { $$ = op2(CAT, $1, $2); }
250: | re
251: | term
252: ;
253:
254: plist:
255: pattern comma pattern { $$ = linkum($1, $3); }
256: | plist comma pattern { $$ = linkum($1, $3); }
257: ;
258:
259: pplist:
260: ppattern
261: | pplist comma ppattern { $$ = linkum($1, $3); }
262: ;
263:
264: prarg:
265: /* empty */ { $$ = rectonode(); }
266: | pplist
267: | '(' plist ')' { $$ = $2; }
268: ;
269:
270: print:
271: PRINT | PRINTF
272: ;
273:
274: pst:
275: NL | ';' | pst NL | pst ';'
276: ;
277:
278: rbrace:
279: '}' | rbrace NL
280: ;
281:
282: re:
283: reg_expr
284: { $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); }
285: | NOT re { $$ = op1(NOT, notnull($2)); }
286: ;
287:
288: reg_expr:
289: '/' {startreg();} REGEXPR '/' { $$ = $3; }
290: ;
291:
292: rparen:
293: ')' | rparen NL
294: ;
295:
296: simple_stmt:
1.11 ! millert 297: print prarg '|' term {
1.6 millert 298: if (safe) SYNTAX("print | is unsafe");
1.5 millert 299: else $$ = stat3($1, $2, itonp($3), $4); }
1.4 kstailey 300: | print prarg APPEND term {
1.6 millert 301: if (safe) SYNTAX("print >> is unsafe");
1.5 millert 302: else $$ = stat3($1, $2, itonp($3), $4); }
1.4 kstailey 303: | print prarg GT term {
1.6 millert 304: if (safe) SYNTAX("print > is unsafe");
1.5 millert 305: else $$ = stat3($1, $2, itonp($3), $4); }
1.1 tholo 306: | print prarg { $$ = stat3($1, $2, NIL, NIL); }
307: | DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); }
308: | DELETE varname { $$ = stat2(DELETE, makearr($2), 0); }
309: | pattern { $$ = exptostat($1); }
1.6 millert 310: | error { yyclearin; SYNTAX("illegal statement"); }
1.1 tholo 311: ;
312:
313: st:
314: nl
315: | ';' opt_nl
316: ;
317:
318: stmt:
1.6 millert 319: BREAK st { if (!inloop) SYNTAX("break illegal outside of loops");
1.1 tholo 320: $$ = stat1(BREAK, NIL); }
1.6 millert 321: | CONTINUE st { if (!inloop) SYNTAX("continue illegal outside of loops");
1.1 tholo 322: $$ = stat1(CONTINUE, NIL); }
323: | do {inloop++;} stmt {--inloop;} WHILE '(' pattern ')' st
324: { $$ = stat2(DO, $3, notnull($7)); }
325: | EXIT pattern st { $$ = stat1(EXIT, $2); }
326: | EXIT st { $$ = stat1(EXIT, NIL); }
327: | for
328: | if stmt else stmt { $$ = stat3(IF, $1, $2, $4); }
329: | if stmt { $$ = stat3(IF, $1, $2, NIL); }
330: | lbrace stmtlist rbrace { $$ = $2; }
331: | NEXT st { if (infunc)
1.6 millert 332: SYNTAX("next is illegal inside a function");
1.1 tholo 333: $$ = stat1(NEXT, NIL); }
334: | NEXTFILE st { if (infunc)
1.6 millert 335: SYNTAX("nextfile is illegal inside a function");
1.1 tholo 336: $$ = stat1(NEXTFILE, NIL); }
337: | RETURN pattern st { $$ = stat1(RETURN, $2); }
338: | RETURN st { $$ = stat1(RETURN, NIL); }
339: | simple_stmt st
340: | while {inloop++;} stmt { --inloop; $$ = stat2(WHILE, $1, $3); }
341: | ';' opt_nl { $$ = 0; }
342: ;
343:
344: stmtlist:
345: stmt
346: | stmtlist stmt { $$ = linkum($1, $2); }
347: ;
348:
349: subop:
350: SUB | GSUB
351: ;
352:
1.11 ! millert 353: string:
! 354: STRING
! 355: | string STRING { $$ = catstr($1, $2); }
! 356: ;
! 357:
1.1 tholo 358: term:
1.5 millert 359: term '/' ASGNOP term { $$ = op2(DIVEQ, $1, $4); }
360: | term '+' term { $$ = op2(ADD, $1, $3); }
1.1 tholo 361: | term '-' term { $$ = op2(MINUS, $1, $3); }
362: | term '*' term { $$ = op2(MULT, $1, $3); }
363: | term '/' term { $$ = op2(DIVIDE, $1, $3); }
364: | term '%' term { $$ = op2(MOD, $1, $3); }
365: | term POWER term { $$ = op2(POWER, $1, $3); }
366: | '-' term %prec UMINUS { $$ = op1(UMINUS, $2); }
1.10 millert 367: | '+' term %prec UMINUS { $$ = op1(UPLUS, $2); }
1.1 tholo 368: | NOT term %prec UMINUS { $$ = op1(NOT, notnull($2)); }
1.5 millert 369: | BLTIN '(' ')' { $$ = op2(BLTIN, itonp($1), rectonode()); }
370: | BLTIN '(' patlist ')' { $$ = op2(BLTIN, itonp($1), $3); }
371: | BLTIN { $$ = op2(BLTIN, itonp($1), rectonode()); }
1.4 kstailey 372: | CALL '(' ')' { $$ = op2(CALL, celltonode($1,CVAR), NIL); }
373: | CALL '(' patlist ')' { $$ = op2(CALL, celltonode($1,CVAR), $3); }
1.7 millert 374: | CLOSE term { $$ = op1(CLOSE, $2); }
1.1 tholo 375: | DECR var { $$ = op1(PREDECR, $2); }
376: | INCR var { $$ = op1(PREINCR, $2); }
377: | var DECR { $$ = op1(POSTDECR, $1); }
378: | var INCR { $$ = op1(POSTINCR, $1); }
1.5 millert 379: | GETLINE var LT term { $$ = op3(GETLINE, $2, itonp($3), $4); }
380: | GETLINE LT term { $$ = op3(GETLINE, NIL, itonp($2), $3); }
1.1 tholo 381: | GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); }
382: | GETLINE { $$ = op3(GETLINE, NIL, NIL, NIL); }
383: | INDEX '(' pattern comma pattern ')'
384: { $$ = op2(INDEX, $3, $5); }
385: | INDEX '(' pattern comma reg_expr ')'
1.6 millert 386: { SYNTAX("index() doesn't permit regular expressions");
1.1 tholo 387: $$ = op2(INDEX, $3, (Node*)$5); }
388: | '(' pattern ')' { $$ = $2; }
389: | MATCHFCN '(' pattern comma reg_expr ')'
390: { $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); }
391: | MATCHFCN '(' pattern comma pattern ')'
392: { if (constnode($5))
393: $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1));
394: else
395: $$ = op3(MATCHFCN, (Node *)1, $3, $5); }
1.4 kstailey 396: | NUMBER { $$ = celltonode($1, CCON); }
1.1 tholo 397: | SPLIT '(' pattern comma varname comma pattern ')' /* string */
398: { $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); }
399: | SPLIT '(' pattern comma varname comma reg_expr ')' /* const /regexp/ */
400: { $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); }
401: | SPLIT '(' pattern comma varname ')'
402: { $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); } /* default */
403: | SPRINTF '(' patlist ')' { $$ = op1($1, $3); }
1.11 ! millert 404: | string { $$ = celltonode($1, CCON); }
1.1 tholo 405: | subop '(' reg_expr comma pattern ')'
406: { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); }
407: | subop '(' pattern comma pattern ')'
408: { if (constnode($3))
409: $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode());
410: else
411: $$ = op4($1, (Node *)1, $3, $5, rectonode()); }
412: | subop '(' reg_expr comma pattern comma var ')'
413: { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); }
414: | subop '(' pattern comma pattern comma var ')'
415: { if (constnode($3))
416: $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7);
417: else
418: $$ = op4($1, (Node *)1, $3, $5, $7); }
419: | SUBSTR '(' pattern comma pattern comma pattern ')'
420: { $$ = op3(SUBSTR, $3, $5, $7); }
421: | SUBSTR '(' pattern comma pattern ')'
422: { $$ = op3(SUBSTR, $3, $5, NIL); }
423: | var
424: ;
425:
426: var:
427: varname
428: | varname '[' patlist ']' { $$ = op2(ARRAY, makearr($1), $3); }
1.4 kstailey 429: | IVAR { $$ = op1(INDIRECT, celltonode($1, CVAR)); }
1.1 tholo 430: | INDIRECT term { $$ = op1(INDIRECT, $2); }
1.11 ! millert 431: ;
1.1 tholo 432:
433: varlist:
434: /* nothing */ { arglist = $$ = 0; }
1.4 kstailey 435: | VAR { arglist = $$ = celltonode($1,CVAR); }
1.1 tholo 436: | varlist comma VAR {
437: checkdup($1, $3);
1.4 kstailey 438: arglist = $$ = linkum($1,celltonode($3,CVAR)); }
1.1 tholo 439: ;
440:
441: varname:
1.4 kstailey 442: VAR { $$ = celltonode($1, CVAR); }
1.5 millert 443: | ARG { $$ = op1(ARG, itonp($1)); }
1.1 tholo 444: | VARNF { $$ = op1(VARNF, (Node *) $1); }
445: ;
446:
447:
448: while:
449: WHILE '(' pattern rparen { $$ = notnull($3); }
450: ;
451:
452: %%
453:
454: void setfname(Cell *p)
455: {
456: if (isarr(p))
1.6 millert 457: SYNTAX("%s is an array, not a function", p->nval);
1.4 kstailey 458: else if (isfcn(p))
1.6 millert 459: SYNTAX("you can't define function %s more than once", p->nval);
1.1 tholo 460: curfname = p->nval;
461: }
462:
463: int constnode(Node *p)
464: {
465: return isvalue(p) && ((Cell *) (p->narg[0]))->csub == CCON;
466: }
467:
468: char *strnode(Node *p)
469: {
470: return ((Cell *)(p->narg[0]))->sval;
471: }
472:
473: Node *notnull(Node *n)
474: {
475: switch (n->nobj) {
476: case LE: case LT: case EQ: case NE: case GT: case GE:
477: case BOR: case AND: case NOT:
478: return n;
479: default:
480: return op2(NE, n, nullnode);
481: }
482: }
483:
484: void checkdup(Node *vl, Cell *cp) /* check if name already in list */
485: {
486: char *s = cp->nval;
487: for ( ; vl; vl = vl->nnext) {
488: if (strcmp(s, ((Cell *)(vl->narg[0]))->nval) == 0) {
1.6 millert 489: SYNTAX("duplicate argument %s", s);
1.1 tholo 490: break;
491: }
492: }
493: }