/* $OpenBSD: awkgram.y,v 1.14 2020/06/13 01:21:01 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the name Lucent Technologies or any of its entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ****************************************************************/ %{ #include #include #include "awk.h" void checkdup(Node *list, Cell *item); int yywrap(void) { return(1); } Node *beginloc = 0; Node *endloc = 0; bool infunc = false; /* = true if in arglist or body of func */ int inloop = 0; /* >= 1 if in while, for, do; can't be bool, since loops can next */ char *curfname = 0; /* current function name */ Node *arglist = 0; /* list of args for current function */ %} %union { Node *p; Cell *cp; int i; char *s; } %token FIRSTTOKEN /* must be first */ %token

PROGRAM PASTAT PASTAT2 XBEGIN XEND %token NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']' %token ARRAY %token MATCH NOTMATCH MATCHOP %token FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE ZERO %token AND BOR APPEND EQ GE GT LE LT NE IN %token ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC %token GENSUB SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE %token ADD MINUS MULT DIVIDE MOD %token ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ %token PRINT PRINTF SPRINTF %token

ELSE INTEST CONDEXPR %token POSTINCR PREINCR POSTDECR PREDECR %token VAR IVAR VARNF CALL NUMBER STRING %token REGEXPR %type

pas pattern ppattern plist pplist patlist prarg term re %type

pa_pat pa_stat pa_stats %type reg_expr %type

simple_stmt opt_simple_stmt stmt stmtlist %type

var varname funcname varlist %type

for if else while %type do st %type pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor %type subop print %type string %right ASGNOP %right '?' %right ':' %left BOR %left AND %left GETLINE %nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|' %left ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC %left GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER %left PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR %left REGEXPR VAR VARNF IVAR WHILE '(' %left CAT %left '+' '-' %left '*' '/' '%' %left NOT UMINUS UPLUS %right POWER %right DECR INCR %left INDIRECT %token LASTTOKEN /* must be last */ %% program: pas { if (errorflag==0) winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); } | error { yyclearin; bracecheck(); SYNTAX("bailing out"); } ; and: AND | and NL ; bor: BOR | bor NL ; comma: ',' | comma NL ; do: DO | do NL ; else: ELSE | else NL ; for: FOR '(' opt_simple_stmt ';' opt_nl pattern ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt { --inloop; $$ = stat4(FOR, $3, notnull($6), $9, $12); } | FOR '(' opt_simple_stmt ';' ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt { --inloop; $$ = stat4(FOR, $3, NIL, $7, $10); } | FOR '(' varname IN varname rparen {inloop++;} stmt { --inloop; $$ = stat3(IN, $3, makearr($5), $8); } ; funcname: VAR { setfname($1); } | CALL { setfname($1); } ; if: IF '(' pattern rparen { $$ = notnull($3); } ; lbrace: '{' | lbrace NL ; nl: NL | nl NL ; opt_nl: /* empty */ { $$ = 0; } | nl ; opt_pst: /* empty */ { $$ = 0; } | pst ; opt_simple_stmt: /* empty */ { $$ = 0; } | simple_stmt ; pas: opt_pst { $$ = 0; } | opt_pst pa_stats opt_pst { $$ = $2; } ; pa_pat: pattern { $$ = notnull($1); } ; pa_stat: pa_pat { $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); } | pa_pat lbrace stmtlist '}' { $$ = stat2(PASTAT, $1, $3); } | pa_pat ',' opt_nl pa_pat { $$ = pa2stat($1, $4, stat2(PRINT, rectonode(), NIL)); } | pa_pat ',' opt_nl pa_pat lbrace stmtlist '}' { $$ = pa2stat($1, $4, $6); } | lbrace stmtlist '}' { $$ = stat2(PASTAT, NIL, $2); } | XBEGIN lbrace stmtlist '}' { beginloc = linkum(beginloc, $3); $$ = 0; } | XEND lbrace stmtlist '}' { endloc = linkum(endloc, $3); $$ = 0; } | FUNC funcname '(' varlist rparen {infunc = true;} lbrace stmtlist '}' { infunc = false; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; } ; pa_stats: pa_stat | pa_stats opt_pst pa_stat { $$ = linkum($1, $3); } ; patlist: pattern | patlist comma pattern { $$ = linkum($1, $3); } ; ppattern: var ASGNOP ppattern { $$ = op2($2, $1, $3); } | ppattern '?' ppattern ':' ppattern %prec '?' { $$ = op3(CONDEXPR, notnull($1), $3, $5); } | ppattern bor ppattern %prec BOR { $$ = op2(BOR, notnull($1), notnull($3)); } | ppattern and ppattern %prec AND { $$ = op2(AND, notnull($1), notnull($3)); } | ppattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); } | ppattern MATCHOP ppattern { if (constnode($3)) $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0)); else $$ = op3($2, (Node *)1, $1, $3); } | ppattern IN varname { $$ = op2(INTEST, $1, makearr($3)); } | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); } | ppattern term %prec CAT { $$ = op2(CAT, $1, $2); } | re | term ; pattern: var ASGNOP pattern { $$ = op2($2, $1, $3); } | pattern '?' pattern ':' pattern %prec '?' { $$ = op3(CONDEXPR, notnull($1), $3, $5); } | pattern bor pattern %prec BOR { $$ = op2(BOR, notnull($1), notnull($3)); } | pattern and pattern %prec AND { $$ = op2(AND, notnull($1), notnull($3)); } | pattern EQ pattern { $$ = op2($2, $1, $3); } | pattern GE pattern { $$ = op2($2, $1, $3); } | pattern GT pattern { $$ = op2($2, $1, $3); } | pattern LE pattern { $$ = op2($2, $1, $3); } | pattern LT pattern { $$ = op2($2, $1, $3); } | pattern NE pattern { $$ = op2($2, $1, $3); } | pattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); } | pattern MATCHOP pattern { if (constnode($3)) $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0)); else $$ = op3($2, (Node *)1, $1, $3); } | pattern IN varname { $$ = op2(INTEST, $1, makearr($3)); } | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); } | pattern '|' GETLINE var { if (safe) SYNTAX("cmd | getline is unsafe"); else $$ = op3(GETLINE, $4, itonp($2), $1); } | pattern '|' GETLINE { if (safe) SYNTAX("cmd | getline is unsafe"); else $$ = op3(GETLINE, (Node*)0, itonp($2), $1); } | pattern term %prec CAT { $$ = op2(CAT, $1, $2); } | re | term ; plist: pattern comma pattern { $$ = linkum($1, $3); } | plist comma pattern { $$ = linkum($1, $3); } ; pplist: ppattern | pplist comma ppattern { $$ = linkum($1, $3); } ; prarg: /* empty */ { $$ = rectonode(); } | pplist | '(' plist ')' { $$ = $2; } ; print: PRINT | PRINTF ; pst: NL | ';' | pst NL | pst ';' ; rbrace: '}' | rbrace NL ; re: reg_expr { $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); } | NOT re { $$ = op1(NOT, notnull($2)); } ; reg_expr: '/' {startreg();} REGEXPR '/' { $$ = $3; } ; rparen: ')' | rparen NL ; simple_stmt: print prarg '|' term { if (safe) SYNTAX("print | is unsafe"); else $$ = stat3($1, $2, itonp($3), $4); } | print prarg APPEND term { if (safe) SYNTAX("print >> is unsafe"); else $$ = stat3($1, $2, itonp($3), $4); } | print prarg GT term { if (safe) SYNTAX("print > is unsafe"); else $$ = stat3($1, $2, itonp($3), $4); } | print prarg { $$ = stat3($1, $2, NIL, NIL); } | DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); } | DELETE varname { $$ = stat2(DELETE, makearr($2), 0); } | pattern { $$ = exptostat($1); } | error { yyclearin; SYNTAX("illegal statement"); } ; st: nl | ';' opt_nl ; stmt: BREAK st { if (!inloop) SYNTAX("break illegal outside of loops"); $$ = stat1(BREAK, NIL); } | CONTINUE st { if (!inloop) SYNTAX("continue illegal outside of loops"); $$ = stat1(CONTINUE, NIL); } | do {inloop++;} stmt {--inloop;} WHILE '(' pattern ')' st { $$ = stat2(DO, $3, notnull($7)); } | EXIT pattern st { $$ = stat1(EXIT, $2); } | EXIT st { $$ = stat1(EXIT, NIL); } | for | if stmt else stmt { $$ = stat3(IF, $1, $2, $4); } | if stmt { $$ = stat3(IF, $1, $2, NIL); } | lbrace stmtlist rbrace { $$ = $2; } | NEXT st { if (infunc) SYNTAX("next is illegal inside a function"); $$ = stat1(NEXT, NIL); } | NEXTFILE st { if (infunc) SYNTAX("nextfile is illegal inside a function"); $$ = stat1(NEXTFILE, NIL); } | RETURN pattern st { $$ = stat1(RETURN, $2); } | RETURN st { $$ = stat1(RETURN, NIL); } | simple_stmt st | while {inloop++;} stmt { --inloop; $$ = stat2(WHILE, $1, $3); } | ';' opt_nl { $$ = 0; } ; stmtlist: stmt | stmtlist stmt { $$ = linkum($1, $2); } ; subop: SUB | GSUB ; string: STRING | string STRING { $$ = catstr($1, $2); } ; term: term '/' ASGNOP term { $$ = op2(DIVEQ, $1, $4); } | term '+' term { $$ = op2(ADD, $1, $3); } | term '-' term { $$ = op2(MINUS, $1, $3); } | term '*' term { $$ = op2(MULT, $1, $3); } | term '/' term { $$ = op2(DIVIDE, $1, $3); } | term '%' term { $$ = op2(MOD, $1, $3); } | term POWER term { $$ = op2(POWER, $1, $3); } | '-' term %prec UMINUS { $$ = op1(UMINUS, $2); } | '+' term %prec UMINUS { $$ = op1(UPLUS, $2); } | NOT term %prec UMINUS { $$ = op1(NOT, notnull($2)); } | BLTIN '(' ')' { $$ = op2(BLTIN, itonp($1), rectonode()); } | BLTIN '(' patlist ')' { $$ = op2(BLTIN, itonp($1), $3); } | BLTIN { $$ = op2(BLTIN, itonp($1), rectonode()); } | CALL '(' ')' { $$ = op2(CALL, celltonode($1,CVAR), NIL); } | CALL '(' patlist ')' { $$ = op2(CALL, celltonode($1,CVAR), $3); } | CLOSE term { $$ = op1(CLOSE, $2); } | DECR var { $$ = op1(PREDECR, $2); } | INCR var { $$ = op1(PREINCR, $2); } | var DECR { $$ = op1(POSTDECR, $1); } | var INCR { $$ = op1(POSTINCR, $1); } | GENSUB '(' reg_expr comma pattern comma pattern ')' { $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, rectonode()); } | GENSUB '(' pattern comma pattern comma pattern ')' { if (constnode($3)) $$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3), 1), $5, $7, rectonode()); else $$ = op5(GENSUB, (Node *)1, $3, $5, $7, rectonode()); } | GENSUB '(' reg_expr comma pattern comma pattern comma pattern ')' { $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, $9); } | GENSUB '(' pattern comma pattern comma pattern comma pattern ')' { if (constnode($3)) $$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3),1), $5,$7,$9); else $$ = op5(GENSUB, (Node *)1, $3, $5, $7, $9); } | GETLINE var LT term { $$ = op3(GETLINE, $2, itonp($3), $4); } | GETLINE LT term { $$ = op3(GETLINE, NIL, itonp($2), $3); } | GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); } | GETLINE { $$ = op3(GETLINE, NIL, NIL, NIL); } | INDEX '(' pattern comma pattern ')' { $$ = op2(INDEX, $3, $5); } | INDEX '(' pattern comma reg_expr ')' { SYNTAX("index() doesn't permit regular expressions"); $$ = op2(INDEX, $3, (Node*)$5); } | '(' pattern ')' { $$ = $2; } | MATCHFCN '(' pattern comma reg_expr ')' { $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); } | MATCHFCN '(' pattern comma pattern ')' { if (constnode($5)) $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1)); else $$ = op3(MATCHFCN, (Node *)1, $3, $5); } | NUMBER { $$ = celltonode($1, CCON); } | SPLIT '(' pattern comma varname comma pattern ')' /* string */ { $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); } | SPLIT '(' pattern comma varname comma reg_expr ')' /* const /regexp/ */ { $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); } | SPLIT '(' pattern comma varname ')' { $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); } /* default */ | SPRINTF '(' patlist ')' { $$ = op1($1, $3); } | string { $$ = celltonode($1, CCON); } | subop '(' reg_expr comma pattern ')' { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); } | subop '(' pattern comma pattern ')' { if (constnode($3)) $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode()); else $$ = op4($1, (Node *)1, $3, $5, rectonode()); } | subop '(' reg_expr comma pattern comma var ')' { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); } | subop '(' pattern comma pattern comma var ')' { if (constnode($3)) $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7); else $$ = op4($1, (Node *)1, $3, $5, $7); } | SUBSTR '(' pattern comma pattern comma pattern ')' { $$ = op3(SUBSTR, $3, $5, $7); } | SUBSTR '(' pattern comma pattern ')' { $$ = op3(SUBSTR, $3, $5, NIL); } | var ; var: varname | varname '[' patlist ']' { $$ = op2(ARRAY, makearr($1), $3); } | IVAR { $$ = op1(INDIRECT, celltonode($1, CVAR)); } | INDIRECT term { $$ = op1(INDIRECT, $2); } ; varlist: /* nothing */ { arglist = $$ = 0; } | VAR { arglist = $$ = celltonode($1,CVAR); } | varlist comma VAR { checkdup($1, $3); arglist = $$ = linkum($1,celltonode($3,CVAR)); } ; varname: VAR { $$ = celltonode($1, CVAR); } | ARG { $$ = op1(ARG, itonp($1)); } | VARNF { $$ = op1(VARNF, (Node *) $1); } ; while: WHILE '(' pattern rparen { $$ = notnull($3); } ; %% void setfname(Cell *p) { if (isarr(p)) SYNTAX("%s is an array, not a function", p->nval); else if (isfcn(p)) SYNTAX("you can't define function %s more than once", p->nval); curfname = p->nval; } int constnode(Node *p) { return isvalue(p) && ((Cell *) (p->narg[0]))->csub == CCON; } char *strnode(Node *p) { return ((Cell *)(p->narg[0]))->sval; } Node *notnull(Node *n) { switch (n->nobj) { case LE: case LT: case EQ: case NE: case GT: case GE: case BOR: case AND: case NOT: return n; default: return op2(NE, n, nullnode); } } void checkdup(Node *vl, Cell *cp) /* check if name already in list */ { char *s = cp->nval; for ( ; vl; vl = vl->nnext) { if (strcmp(s, ((Cell *)(vl->narg[0]))->nval) == 0) { SYNTAX("duplicate argument %s", s); break; } } }