Annotation of src/usr.bin/awk/awkgram.y, Revision 1.1
1.1 ! tholo 1: /****************************************************************
! 2: Copyright (C) AT&T and Lucent Technologies 1996
! 3: All Rights Reserved
! 4:
! 5: Permission to use, copy, modify, and distribute this software and
! 6: its documentation for any purpose and without fee is hereby
! 7: granted, provided that the above copyright notice appear in all
! 8: copies and that both that the copyright notice and this
! 9: permission notice and warranty disclaimer appear in supporting
! 10: documentation, and that the names of AT&T or Lucent Technologies
! 11: or any of their entities not be used in advertising or publicity
! 12: pertaining to distribution of the software without specific,
! 13: written prior permission.
! 14:
! 15: AT&T AND LUCENT DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
! 16: SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
! 17: FITNESS. IN NO EVENT SHALL AT&T OR LUCENT OR ANY OF THEIR
! 18: ENTITIES BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
! 19: DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
! 20: DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
! 21: OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE
! 22: USE OR PERFORMANCE OF THIS SOFTWARE.
! 23: ****************************************************************/
! 24:
! 25: %{
! 26: #include <stdio.h>
! 27: #include <string.h>
! 28: #include "awk.h"
! 29:
! 30: void checkdup(Node *list, Cell *item);
! 31: int yywrap(void) { return(1); }
! 32:
! 33: Node *beginloc = 0;
! 34: Node *endloc = 0;
! 35: int infunc = 0; /* = 1 if in arglist or body of func */
! 36: int inloop = 0; /* = 1 if in while, for, do */
! 37: char *curfname = 0; /* current function name */
! 38: Node *arglist = 0; /* list of args for current function */
! 39: %}
! 40:
! 41: %union {
! 42: Node *p;
! 43: Cell *cp;
! 44: int i;
! 45: char *s;
! 46: }
! 47:
! 48: %token <i> FIRSTTOKEN /* must be first */
! 49: %token <p> PROGRAM PASTAT PASTAT2 XBEGIN XEND
! 50: %token <i> NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']'
! 51: %token <i> ARRAY
! 52: %token <i> MATCH NOTMATCH MATCHOP
! 53: %token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS
! 54: %token <i> AND BOR APPEND EQ GE GT LE LT NE IN
! 55: %token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC
! 56: %token <i> SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE
! 57: %token <i> ADD MINUS MULT DIVIDE MOD
! 58: %token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ
! 59: %token <i> PRINT PRINTF SPRINTF
! 60: %token <p> ELSE INTEST CONDEXPR
! 61: %token <i> POSTINCR PREINCR POSTDECR PREDECR
! 62: %token <cp> VAR IVAR VARNF CALL NUMBER STRING FIELD
! 63: %token <s> REGEXPR
! 64:
! 65: %type <p> pas pattern ppattern plist pplist patlist prarg term re
! 66: %type <p> pa_pat pa_stat pa_stats
! 67: %type <s> reg_expr
! 68: %type <p> simple_stmt opt_simple_stmt stmt stmtlist
! 69: %type <p> var varname funcname varlist
! 70: %type <p> for if else while
! 71: %type <i> do st
! 72: %type <i> pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor
! 73: %type <i> subop print
! 74:
! 75: %right ASGNOP
! 76: %right '?'
! 77: %right ':'
! 78: %left BOR
! 79: %left AND
! 80: %left GETLINE
! 81: %nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|'
! 82: %left ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FIELD FUNC
! 83: %left GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER
! 84: %left PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR
! 85: %left REGEXPR VAR VARNF IVAR WHILE '('
! 86: %left CAT
! 87: %left '+' '-'
! 88: %left '*' '/' '%'
! 89: %left NOT UMINUS
! 90: %right POWER
! 91: %right DECR INCR
! 92: %left INDIRECT
! 93: %token LASTTOKEN /* must be last */
! 94:
! 95: %%
! 96:
! 97: program:
! 98: pas { if (errorflag==0)
! 99: winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); }
! 100: | error { yyclearin; bracecheck(); ERROR "bailing out" SYNTAX; }
! 101: ;
! 102:
! 103: and:
! 104: AND | and NL
! 105: ;
! 106:
! 107: bor:
! 108: BOR | bor NL
! 109: ;
! 110:
! 111: comma:
! 112: ',' | comma NL
! 113: ;
! 114:
! 115: do:
! 116: DO | do NL
! 117: ;
! 118:
! 119: else:
! 120: ELSE | else NL
! 121: ;
! 122:
! 123: for:
! 124: FOR '(' opt_simple_stmt ';' opt_nl pattern ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
! 125: { --inloop; $$ = stat4(FOR, $3, notnull($6), $9, $12); }
! 126: | FOR '(' opt_simple_stmt ';' ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
! 127: { --inloop; $$ = stat4(FOR, $3, NIL, $7, $10); }
! 128: | FOR '(' varname IN varname rparen {inloop++;} stmt
! 129: { --inloop; $$ = stat3(IN, $3, makearr($5), $8); }
! 130: ;
! 131:
! 132: funcname:
! 133: VAR { setfname($1); }
! 134: | CALL { setfname($1); }
! 135: ;
! 136:
! 137: if:
! 138: IF '(' pattern rparen { $$ = notnull($3); }
! 139: ;
! 140:
! 141: lbrace:
! 142: '{' | lbrace NL
! 143: ;
! 144:
! 145: nl:
! 146: NL | nl NL
! 147: ;
! 148:
! 149: opt_nl:
! 150: /* empty */ { $$ = 0; }
! 151: | nl
! 152: ;
! 153:
! 154: opt_pst:
! 155: /* empty */ { $$ = 0; }
! 156: | pst
! 157: ;
! 158:
! 159:
! 160: opt_simple_stmt:
! 161: /* empty */ { $$ = 0; }
! 162: | simple_stmt
! 163: ;
! 164:
! 165: pas:
! 166: opt_pst { $$ = 0; }
! 167: | opt_pst pa_stats opt_pst { $$ = $2; }
! 168: ;
! 169:
! 170: pa_pat:
! 171: pattern { $$ = notnull($1); }
! 172: ;
! 173:
! 174: pa_stat:
! 175: pa_pat { $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); }
! 176: | pa_pat lbrace stmtlist '}' { $$ = stat2(PASTAT, $1, $3); }
! 177: | pa_pat ',' pa_pat { $$ = pa2stat($1, $3, stat2(PRINT, rectonode(), NIL)); }
! 178: | pa_pat ',' pa_pat lbrace stmtlist '}' { $$ = pa2stat($1, $3, $5); }
! 179: | lbrace stmtlist '}' { $$ = stat2(PASTAT, NIL, $2); }
! 180: | XBEGIN lbrace stmtlist '}'
! 181: { beginloc = linkum(beginloc, $3); $$ = 0; }
! 182: | XEND lbrace stmtlist '}'
! 183: { endloc = linkum(endloc, $3); $$ = 0; }
! 184: | FUNC funcname '(' varlist rparen {infunc++;} lbrace stmtlist '}'
! 185: { infunc--; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; }
! 186: ;
! 187:
! 188: pa_stats:
! 189: pa_stat
! 190: | pa_stats opt_pst pa_stat { $$ = linkum($1, $3); }
! 191: ;
! 192:
! 193: patlist:
! 194: pattern
! 195: | patlist comma pattern { $$ = linkum($1, $3); }
! 196: ;
! 197:
! 198: ppattern:
! 199: var ASGNOP ppattern { $$ = op2($2, $1, $3); }
! 200: | ppattern '?' ppattern ':' ppattern %prec '?'
! 201: { $$ = op3(CONDEXPR, notnull($1), $3, $5); }
! 202: | ppattern bor ppattern %prec BOR
! 203: { $$ = op2(BOR, notnull($1), notnull($3)); }
! 204: | ppattern and ppattern %prec AND
! 205: { $$ = op2(AND, notnull($1), notnull($3)); }
! 206: | ppattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
! 207: | ppattern MATCHOP ppattern
! 208: { if (constnode($3))
! 209: $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
! 210: else
! 211: $$ = op3($2, (Node *)1, $1, $3); }
! 212: | ppattern IN varname { $$ = op2(INTEST, $1, makearr($3)); }
! 213: | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); }
! 214: | ppattern term %prec CAT { $$ = op2(CAT, $1, $2); }
! 215: | re
! 216: | term
! 217: ;
! 218:
! 219: pattern:
! 220: var ASGNOP pattern { $$ = op2($2, $1, $3); }
! 221: | pattern '?' pattern ':' pattern %prec '?'
! 222: { $$ = op3(CONDEXPR, notnull($1), $3, $5); }
! 223: | pattern bor pattern %prec BOR
! 224: { $$ = op2(BOR, notnull($1), notnull($3)); }
! 225: | pattern and pattern %prec AND
! 226: { $$ = op2(AND, notnull($1), notnull($3)); }
! 227: | pattern EQ pattern { $$ = op2($2, $1, $3); }
! 228: | pattern GE pattern { $$ = op2($2, $1, $3); }
! 229: | pattern GT pattern { $$ = op2($2, $1, $3); }
! 230: | pattern LE pattern { $$ = op2($2, $1, $3); }
! 231: | pattern LT pattern { $$ = op2($2, $1, $3); }
! 232: | pattern NE pattern { $$ = op2($2, $1, $3); }
! 233: | pattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
! 234: | pattern MATCHOP pattern
! 235: { if (constnode($3))
! 236: $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
! 237: else
! 238: $$ = op3($2, (Node *)1, $1, $3); }
! 239: | pattern IN varname { $$ = op2(INTEST, $1, makearr($3)); }
! 240: | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); }
! 241: | pattern '|' GETLINE var { $$ = op3(GETLINE, $4, (Node*)$2, $1); }
! 242: | pattern '|' GETLINE { $$ = op3(GETLINE, (Node*)0, (Node*)$2, $1); }
! 243: | pattern term %prec CAT { $$ = op2(CAT, $1, $2); }
! 244: | re
! 245: | term
! 246: ;
! 247:
! 248: plist:
! 249: pattern comma pattern { $$ = linkum($1, $3); }
! 250: | plist comma pattern { $$ = linkum($1, $3); }
! 251: ;
! 252:
! 253: pplist:
! 254: ppattern
! 255: | pplist comma ppattern { $$ = linkum($1, $3); }
! 256: ;
! 257:
! 258: prarg:
! 259: /* empty */ { $$ = rectonode(); }
! 260: | pplist
! 261: | '(' plist ')' { $$ = $2; }
! 262: ;
! 263:
! 264: print:
! 265: PRINT | PRINTF
! 266: ;
! 267:
! 268: pst:
! 269: NL | ';' | pst NL | pst ';'
! 270: ;
! 271:
! 272: rbrace:
! 273: '}' | rbrace NL
! 274: ;
! 275:
! 276: re:
! 277: reg_expr
! 278: { $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); }
! 279: | NOT re { $$ = op1(NOT, notnull($2)); }
! 280: ;
! 281:
! 282: reg_expr:
! 283: '/' {startreg();} REGEXPR '/' { $$ = $3; }
! 284: ;
! 285:
! 286: rparen:
! 287: ')' | rparen NL
! 288: ;
! 289:
! 290: simple_stmt:
! 291: print prarg '|' term { $$ = stat3($1, $2, (Node *) $3, $4); }
! 292: | print prarg APPEND term { $$ = stat3($1, $2, (Node *) $3, $4); }
! 293: | print prarg GT term { $$ = stat3($1, $2, (Node *) $3, $4); }
! 294: | print prarg { $$ = stat3($1, $2, NIL, NIL); }
! 295: | DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); }
! 296: | DELETE varname { $$ = stat2(DELETE, makearr($2), 0); }
! 297: | pattern { $$ = exptostat($1); }
! 298: | error { yyclearin; ERROR "illegal statement" SYNTAX; }
! 299: ;
! 300:
! 301: st:
! 302: nl
! 303: | ';' opt_nl
! 304: ;
! 305:
! 306: stmt:
! 307: BREAK st { if (!inloop) ERROR "break illegal outside of loops" SYNTAX;
! 308: $$ = stat1(BREAK, NIL); }
! 309: | CLOSE pattern st { $$ = stat1(CLOSE, $2); }
! 310: | CONTINUE st { if (!inloop) ERROR "continue illegal outside of loops" SYNTAX;
! 311: $$ = stat1(CONTINUE, NIL); }
! 312: | do {inloop++;} stmt {--inloop;} WHILE '(' pattern ')' st
! 313: { $$ = stat2(DO, $3, notnull($7)); }
! 314: | EXIT pattern st { $$ = stat1(EXIT, $2); }
! 315: | EXIT st { $$ = stat1(EXIT, NIL); }
! 316: | for
! 317: | if stmt else stmt { $$ = stat3(IF, $1, $2, $4); }
! 318: | if stmt { $$ = stat3(IF, $1, $2, NIL); }
! 319: | lbrace stmtlist rbrace { $$ = $2; }
! 320: | NEXT st { if (infunc)
! 321: ERROR "next is illegal inside a function" SYNTAX;
! 322: $$ = stat1(NEXT, NIL); }
! 323: | NEXTFILE st { if (infunc)
! 324: ERROR "nextfile is illegal inside a function" SYNTAX;
! 325: $$ = stat1(NEXTFILE, NIL); }
! 326: | RETURN pattern st { $$ = stat1(RETURN, $2); }
! 327: | RETURN st { $$ = stat1(RETURN, NIL); }
! 328: | simple_stmt st
! 329: | while {inloop++;} stmt { --inloop; $$ = stat2(WHILE, $1, $3); }
! 330: | ';' opt_nl { $$ = 0; }
! 331: ;
! 332:
! 333: stmtlist:
! 334: stmt
! 335: | stmtlist stmt { $$ = linkum($1, $2); }
! 336: ;
! 337:
! 338: subop:
! 339: SUB | GSUB
! 340: ;
! 341:
! 342: term:
! 343: term '+' term { $$ = op2(ADD, $1, $3); }
! 344: | term '-' term { $$ = op2(MINUS, $1, $3); }
! 345: | term '*' term { $$ = op2(MULT, $1, $3); }
! 346: | term '/' term { $$ = op2(DIVIDE, $1, $3); }
! 347: | term '%' term { $$ = op2(MOD, $1, $3); }
! 348: | term POWER term { $$ = op2(POWER, $1, $3); }
! 349: | '-' term %prec UMINUS { $$ = op1(UMINUS, $2); }
! 350: | '+' term %prec UMINUS { $$ = $2; }
! 351: | NOT term %prec UMINUS { $$ = op1(NOT, notnull($2)); }
! 352: | BLTIN '(' ')' { $$ = op2(BLTIN, (Node *) $1, rectonode()); }
! 353: | BLTIN '(' patlist ')' { $$ = op2(BLTIN, (Node *) $1, $3); }
! 354: | BLTIN { $$ = op2(BLTIN, (Node *) $1, rectonode()); }
! 355: | CALL '(' ')' { $$ = op2(CALL, valtonode($1,CVAR), NIL); }
! 356: | CALL '(' patlist ')' { $$ = op2(CALL, valtonode($1,CVAR), $3); }
! 357: | DECR var { $$ = op1(PREDECR, $2); }
! 358: | INCR var { $$ = op1(PREINCR, $2); }
! 359: | var DECR { $$ = op1(POSTDECR, $1); }
! 360: | var INCR { $$ = op1(POSTINCR, $1); }
! 361: | GETLINE var LT term { $$ = op3(GETLINE, $2, (Node *)$3, $4); }
! 362: | GETLINE LT term { $$ = op3(GETLINE, NIL, (Node *)$2, $3); }
! 363: | GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); }
! 364: | GETLINE { $$ = op3(GETLINE, NIL, NIL, NIL); }
! 365: | INDEX '(' pattern comma pattern ')'
! 366: { $$ = op2(INDEX, $3, $5); }
! 367: | INDEX '(' pattern comma reg_expr ')'
! 368: { ERROR "index() doesn't permit regular expressions" SYNTAX;
! 369: $$ = op2(INDEX, $3, (Node*)$5); }
! 370: | '(' pattern ')' { $$ = $2; }
! 371: | MATCHFCN '(' pattern comma reg_expr ')'
! 372: { $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); }
! 373: | MATCHFCN '(' pattern comma pattern ')'
! 374: { if (constnode($5))
! 375: $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1));
! 376: else
! 377: $$ = op3(MATCHFCN, (Node *)1, $3, $5); }
! 378: | NUMBER { $$ = valtonode($1, CCON); }
! 379: | SPLIT '(' pattern comma varname comma pattern ')' /* string */
! 380: { $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); }
! 381: | SPLIT '(' pattern comma varname comma reg_expr ')' /* const /regexp/ */
! 382: { $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); }
! 383: | SPLIT '(' pattern comma varname ')'
! 384: { $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); } /* default */
! 385: | SPRINTF '(' patlist ')' { $$ = op1($1, $3); }
! 386: | STRING { $$ = valtonode($1, CCON); }
! 387: | subop '(' reg_expr comma pattern ')'
! 388: { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); }
! 389: | subop '(' pattern comma pattern ')'
! 390: { if (constnode($3))
! 391: $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode());
! 392: else
! 393: $$ = op4($1, (Node *)1, $3, $5, rectonode()); }
! 394: | subop '(' reg_expr comma pattern comma var ')'
! 395: { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); }
! 396: | subop '(' pattern comma pattern comma var ')'
! 397: { if (constnode($3))
! 398: $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7);
! 399: else
! 400: $$ = op4($1, (Node *)1, $3, $5, $7); }
! 401: | SUBSTR '(' pattern comma pattern comma pattern ')'
! 402: { $$ = op3(SUBSTR, $3, $5, $7); }
! 403: | SUBSTR '(' pattern comma pattern ')'
! 404: { $$ = op3(SUBSTR, $3, $5, NIL); }
! 405: | var
! 406: ;
! 407:
! 408: var:
! 409: varname
! 410: | varname '[' patlist ']' { $$ = op2(ARRAY, makearr($1), $3); }
! 411: | FIELD { $$ = valtonode($1, CFLD); }
! 412: | IVAR { $$ = op1(INDIRECT, valtonode($1, CVAR)); }
! 413: | INDIRECT term { $$ = op1(INDIRECT, $2); }
! 414: ;
! 415:
! 416: varlist:
! 417: /* nothing */ { arglist = $$ = 0; }
! 418: | VAR { arglist = $$ = valtonode($1,CVAR); }
! 419: | varlist comma VAR {
! 420: checkdup($1, $3);
! 421: arglist = $$ = linkum($1,valtonode($3,CVAR)); }
! 422: ;
! 423:
! 424: varname:
! 425: VAR { $$ = valtonode($1, CVAR); }
! 426: | ARG { $$ = op1(ARG, (Node *) $1); }
! 427: | VARNF { $$ = op1(VARNF, (Node *) $1); }
! 428: ;
! 429:
! 430:
! 431: while:
! 432: WHILE '(' pattern rparen { $$ = notnull($3); }
! 433: ;
! 434:
! 435: %%
! 436:
! 437: void setfname(Cell *p)
! 438: {
! 439: if (isarr(p))
! 440: ERROR "%s is an array, not a function", p->nval SYNTAX;
! 441: else if (isfunc(p))
! 442: ERROR "you can't define function %s more than once", p->nval SYNTAX;
! 443: curfname = p->nval;
! 444: }
! 445:
! 446: int constnode(Node *p)
! 447: {
! 448: return isvalue(p) && ((Cell *) (p->narg[0]))->csub == CCON;
! 449: }
! 450:
! 451: char *strnode(Node *p)
! 452: {
! 453: return ((Cell *)(p->narg[0]))->sval;
! 454: }
! 455:
! 456: Node *notnull(Node *n)
! 457: {
! 458: switch (n->nobj) {
! 459: case LE: case LT: case EQ: case NE: case GT: case GE:
! 460: case BOR: case AND: case NOT:
! 461: return n;
! 462: default:
! 463: return op2(NE, n, nullnode);
! 464: }
! 465: }
! 466:
! 467: void checkdup(Node *vl, Cell *cp) /* check if name already in list */
! 468: {
! 469: char *s = cp->nval;
! 470: for ( ; vl; vl = vl->nnext) {
! 471: if (strcmp(s, ((Cell *)(vl->narg[0]))->nval) == 0) {
! 472: ERROR "duplicate argument %s", s SYNTAX;
! 473: break;
! 474: }
! 475: }
! 476: }