src/usr.bin/awk/awklex.l - annotate

Return to awklex.l CVS log
Up to [local] / src / usr.bin / awk
Annotation of src/usr.bin/awk/awklex.l, Revision 1.3

1.2       millert     1: %Start A str sc reg comment
1.1       tholo       2:
                      3: %{
                      4: /****************************************************************
                      5: Copyright (C) AT&T and Lucent Technologies 1996
                      6: All Rights Reserved
                      7:
                      8: Permission to use, copy, modify, and distribute this software and
                      9: its documentation for any purpose and without fee is hereby
                     10: granted, provided that the above copyright notice appear in all
                     11: copies and that both that the copyright notice and this
                     12: permission notice and warranty disclaimer appear in supporting
                     13: documentation, and that the names of AT&T or Lucent Technologies
                     14: or any of their entities not be used in advertising or publicity
                     15: pertaining to distribution of the software without specific,
                     16: written prior permission.
                     17:
                     18: AT&T AND LUCENT DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
                     19: SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
                     20: FITNESS. IN NO EVENT SHALL AT&T OR LUCENT OR ANY OF THEIR
                     21: ENTITIES BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
                     22: DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
                     23: DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
                     24: OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE
                     25: USE OR PERFORMANCE OF THIS SOFTWARE.
                     26: ****************************************************************/
                     27:
                     28: /* some of this depends on behavior of lex that
                     29:    may not be preserved in other implementations of lex.
                     30: */
                     31:
                     32: #undef input   /* defeat lex */
                     33: #undef unput
                     34:
                     35: #include <stdlib.h>
                     36: #include <string.h>
                     37: #include "awk.h"
1.3     ! millert    38: #include "ytab.h"
1.1       tholo      39:
                     40: extern YYSTYPE yylval;
                     41: extern int     infunc;
                     42:
                     43: int    lineno  = 1;
                     44: int    bracecnt = 0;
                     45: int    brackcnt  = 0;
                     46: int    parencnt = 0;
                     47:
                     48: #define DEBUG
                     49: #ifdef DEBUG
                     50: #      define  RET(x)  {if(dbg)printf("lex %s [%s]\n", tokname(x), yytext); return(x); }
                     51: #else
                     52: #      define  RET(x)  return(x)
                     53: #endif
                     54:
                     55: #define        CADD    if (cadd(gs, yytext[0]) == 0) { \
                     56:                        ERROR "string/reg expr %.30s... too long", gs->cbuf SYNTAX; \
                     57:                        BEGIN A; \
                     58:                }
                     59:
                     60: char   *s;
                     61: Gstring        *gs = 0;        /* initialized in main() */
                     62: int    cflag;
                     63: %}
                     64:
                     65: A      [a-zA-Z_]
                     66: B      [a-zA-Z0-9_]
                     67: D      [0-9]
                     68: O      [0-7]
                     69: H      [0-9a-fA-F]
                     70: WS     [ \t]
                     71:
                     72: %%
1.3     ! millert    73:        switch (yybgin-yysvec-1) {      /* witchcraft */
1.1       tholo      74:        case 0:
                     75:                BEGIN A;
                     76:                break;
                     77:        case sc:
                     78:                BEGIN A;
                     79:                RET('}');
                     80:        }
                     81:
                     82: <A>\n          { lineno++; RET(NL); }
                     83: <A>#.*         { ; }   /* strip comments */
                     84: <A>{WS}+       { ; }
                     85: <A>;           { RET(';'); }
                     86:
                     87: <A>"\\"\n      { lineno++; }
                     88: <A>BEGIN       { RET(XBEGIN); }
                     89: <A>END         { RET(XEND); }
                     90: <A>func(tion)? { if (infunc) ERROR "illegal nested function" SYNTAX; RET(FUNC); }
                     91: <A>return      { if (!infunc) ERROR "return not in function" SYNTAX; RET(RETURN); }
                     92: <A>"&&"                { RET(AND); }
                     93: <A>"||"                { RET(BOR); }
                     94: <A>"!"         { RET(NOT); }
                     95: <A>"!="                { yylval.i = NE; RET(NE); }
                     96: <A>"~"         { yylval.i = MATCH; RET(MATCHOP); }
                     97: <A>"!~"                { yylval.i = NOTMATCH; RET(MATCHOP); }
                     98: <A>"<"         { yylval.i = LT; RET(LT); }
                     99: <A>"<="                { yylval.i = LE; RET(LE); }
                    100: <A>"=="                { yylval.i = EQ; RET(EQ); }
                    101: <A>">="                { yylval.i = GE; RET(GE); }
                    102: <A>">"         { yylval.i = GT; RET(GT); }
                    103: <A>">>"                { yylval.i = APPEND; RET(APPEND); }
                    104: <A>"++"                { yylval.i = INCR; RET(INCR); }
                    105: <A>"--"                { yylval.i = DECR; RET(DECR); }
                    106: <A>"+="                { yylval.i = ADDEQ; RET(ASGNOP); }
                    107: <A>"-="                { yylval.i = SUBEQ; RET(ASGNOP); }
                    108: <A>"*="                { yylval.i = MULTEQ; RET(ASGNOP); }
                    109: <A>"/="                { yylval.i = DIVEQ; RET(ASGNOP); }
                    110: <A>"%="                { yylval.i = MODEQ; RET(ASGNOP); }
                    111: <A>"^="                { yylval.i = POWEQ; RET(ASGNOP); }
                    112: <A>"**="       { yylval.i = POWEQ; RET(ASGNOP); }
                    113: <A>"="         { yylval.i = ASSIGN; RET(ASGNOP); }
                    114: <A>"**"                { RET(POWER); }
                    115: <A>"^"         { RET(POWER); }
                    116:
                    117: <A>"$"{D}+     { yylval.cp = fieldadr(atoi(yytext+1)); RET(FIELD); }
                    118: <A>"$NF"       { unputstr("(NF)"); return(INDIRECT); }
1.3     ! millert   119: <A>"$"{A}{B}*  { int c, n;
        !           120:                  c = input(); unput(c);
        !           121:                  if (c == '(' || c == '[' || (infunc && (n=isarg(yytext+1)) >= 0)) {
        !           122:                        unputstr(yytext+1);
        !           123:                        return(INDIRECT);
1.1       tholo     124:                  } else {
1.3     ! millert   125:                        yylval.cp = setsymtab(yytext+1, "", 0.0, STR|NUM, symtab);
        !           126:                        RET(IVAR);
1.1       tholo     127:                  }
                    128:                }
                    129: <A>"$"         { RET(INDIRECT); }
                    130: <A>NF          { yylval.cp = setsymtab(yytext, "", 0.0, NUM, symtab); RET(VARNF); }
                    131:
                    132: <A>({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)?        {
                    133:                  yylval.cp = setsymtab(yytext, tostring(yytext), atof(yytext), CON|NUM, symtab);
                    134:                /* should this also have STR set? */
                    135:                  RET(NUMBER); }
                    136:
                    137: <A>while       { RET(WHILE); }
                    138: <A>for         { RET(FOR); }
                    139: <A>do          { RET(DO); }
                    140: <A>if          { RET(IF); }
                    141: <A>else                { RET(ELSE); }
                    142: <A>next                { RET(NEXT); }
                    143: <A>nextfile    { RET(NEXTFILE); }
                    144: <A>exit                { RET(EXIT); }
                    145: <A>break       { RET(BREAK); }
                    146: <A>continue    { RET(CONTINUE); }
                    147: <A>print       { yylval.i = PRINT; RET(PRINT); }
                    148: <A>printf      { yylval.i = PRINTF; RET(PRINTF); }
                    149: <A>sprintf     { yylval.i = SPRINTF; RET(SPRINTF); }
                    150: <A>split       { yylval.i = SPLIT; RET(SPLIT); }
                    151: <A>substr      { RET(SUBSTR); }
                    152: <A>sub         { yylval.i = SUB; RET(SUB); }
                    153: <A>gsub                { yylval.i = GSUB; RET(GSUB); }
                    154: <A>index       { RET(INDEX); }
                    155: <A>match       { RET(MATCHFCN); }
                    156: <A>in          { RET(IN); }
                    157: <A>getline     { RET(GETLINE); }
                    158: <A>close       { RET(CLOSE); }
                    159: <A>delete      { RET(DELETE); }
                    160: <A>length      { yylval.i = FLENGTH; RET(BLTIN); }
                    161: <A>log         { yylval.i = FLOG; RET(BLTIN); }
                    162: <A>int         { yylval.i = FINT; RET(BLTIN); }
                    163: <A>exp         { yylval.i = FEXP; RET(BLTIN); }
                    164: <A>sqrt                { yylval.i = FSQRT; RET(BLTIN); }
                    165: <A>sin         { yylval.i = FSIN; RET(BLTIN); }
                    166: <A>cos         { yylval.i = FCOS; RET(BLTIN); }
                    167: <A>atan2       { yylval.i = FATAN; RET(BLTIN); }
                    168: <A>system      { yylval.i = FSYSTEM; RET(BLTIN); }
                    169: <A>rand                { yylval.i = FRAND; RET(BLTIN); }
                    170: <A>srand       { yylval.i = FSRAND; RET(BLTIN); }
                    171: <A>toupper     { yylval.i = FTOUPPER; RET(BLTIN); }
                    172: <A>tolower     { yylval.i = FTOLOWER; RET(BLTIN); }
                    173: <A>fflush      { yylval.i = FFLUSH; RET(BLTIN); }
                    174:
                    175: <A>{A}{B}*     { int n, c;
                    176:                  c = input(); unput(c);        /* look for '(' */
1.3     ! millert   177:                  if (c != '(' && infunc && (n=isarg(yytext)) >= 0) {
1.1       tholo     178:                        yylval.i = n;
                    179:                        RET(ARG);
                    180:                  } else {
1.3     ! millert   181:                        yylval.cp = setsymtab(yytext, "", 0.0, STR|NUM, symtab);
1.1       tholo     182:                        if (c == '(') {
                    183:                                RET(CALL);
                    184:                        } else {
                    185:                                RET(VAR);
                    186:                        }
                    187:                  }
                    188:                }
1.2       millert   189: <A>\"          { BEGIN str; caddreset(gs); }
1.1       tholo     190:
                    191: <A>"}"         { if (--bracecnt < 0) ERROR "extra }" SYNTAX; BEGIN sc; RET(';'); }
                    192: <A>"]"         { if (--brackcnt < 0) ERROR "extra ]" SYNTAX; RET(']'); }
                    193: <A>")"         { if (--parencnt < 0) ERROR "extra )" SYNTAX; RET(')'); }
                    194:
                    195: <A>.           { if (yytext[0] == '{') bracecnt++;
                    196:                  else if (yytext[0] == '[') brackcnt++;
                    197:                  else if (yytext[0] == '(') parencnt++;
                    198:                  RET(yylval.i = yytext[0]); /* everything else */ }
                    199:
                    200: <reg>\\.       { cadd(gs, '\\'); cadd(gs, yytext[1]); }
                    201: <reg>\n                { ERROR "newline in regular expression %.10s...", gs->cbuf SYNTAX; lineno++; BEGIN A; }
                    202: <reg>"/"       { BEGIN A;
                    203:                  cadd(gs, 0);
                    204:                  yylval.s = tostring(gs->cbuf);
                    205:                  unput('/');
                    206:                  RET(REGEXPR); }
                    207: <reg>.         { CADD; }
                    208:
1.2       millert   209: <str>\"                { BEGIN A;
1.1       tholo     210:                  cadd(gs, 0); s = tostring(gs->cbuf);
                    211:                  cunadd(gs);
                    212:                  cadd(gs, ' '); cadd(gs, 0);
                    213:                  yylval.cp = setsymtab(gs->cbuf, s, 0.0, CON|STR, symtab);
                    214:                  RET(STRING); }
1.2       millert   215: <str>\n                { ERROR "newline in string %.10s...", gs->cbuf SYNTAX; lineno++; BEGIN A; }
                    216: <str>"\\\""    { cadd(gs, '"'); }
                    217: <str>"\\"n     { cadd(gs, '\n'); }
                    218: <str>"\\"t     { cadd(gs, '\t'); }
                    219: <str>"\\"f     { cadd(gs, '\f'); }
                    220: <str>"\\"r     { cadd(gs, '\r'); }
                    221: <str>"\\"b     { cadd(gs, '\b'); }
                    222: <str>"\\"v     { cadd(gs, '\v'); }     /* these ANSIisms may not be known by */
                    223: <str>"\\"a     { cadd(gs, '\007'); }   /* your compiler. hence 007 for bell */
                    224: <str>"\\\\"    { cadd(gs, '\\'); }
                    225: <str>"\\"({O}{O}{O}|{O}{O}|{O}) { int n;
1.1       tholo     226:                  sscanf(yytext+1, "%o", &n); cadd(gs, n); }
1.2       millert   227: <str>"\\"x({H}+) { int n;      /* ANSI permits any number! */
1.1       tholo     228:                  sscanf(yytext+2, "%x", &n); cadd(gs, n); }
1.2       millert   229: <str>"\\".     { cadd(gs, yytext[1]); }
                    230: <str>.         { CADD; }
1.1       tholo     231:
                    232: %%
                    233:
                    234: void startreg(void)    /* start parsing a regular expression */
                    235: {
                    236:        BEGIN reg;
                    237:        caddreset(gs);
                    238: }
                    239:
                    240: /* input() and unput() are transcriptions of the standard lex
                    241:    macros for input and output with additions for error message
                    242:    printing.  God help us all if someone changes how lex works.
                    243: */
                    244:
                    245: char   ebuf[300];
                    246: char   *ep = ebuf;
                    247:
                    248: int input(void)        /* get next lexical input character */
                    249: {
                    250:        int c;
                    251:        extern char *lexprog;
                    252:
                    253:        if (yysptr > yysbuf)
                    254:                c = U(*--yysptr);
                    255:        else if (lexprog != NULL) {     /* awk '...' */
                    256:                if ((c = *lexprog) != 0)
                    257:                        lexprog++;
                    258:        } else                          /* awk -f ... */
                    259:                c = pgetc();
                    260:        if (c == '\n')
                    261:                yylineno++;
                    262:        else if (c == EOF)
                    263:                c = 0;
                    264:        if (ep >= ebuf + sizeof ebuf)
                    265:                ep = ebuf;
                    266:        return *ep++ = c;
                    267: }
                    268:
                    269: void unput(int c)      /* put lexical character back on input */
                    270: {
                    271:        yytchar = c;
                    272:        if (yytchar == '\n')
                    273:                yylineno--;
                    274:        *yysptr++ = yytchar;
                    275:        if (--ep < ebuf)
                    276:                ep = ebuf + sizeof(ebuf) - 1;
                    277: }
1.3     ! millert   278:
1.1       tholo     279:
                    280: void unputstr(char *s) /* put a string back on input */
                    281: {
                    282:        int i;
                    283:
                    284:        for (i = strlen(s)-1; i >= 0; i--)
                    285:                unput(s[i]);
                    286: }
                    287:
                    288: /* growing-string code */
                    289:
                    290: const int CBUFLEN = 400;
                    291:
                    292: Gstring *newGstring()
                    293: {
                    294:        Gstring *gs = (Gstring *) malloc(sizeof(Gstring));
                    295:        char *cp = (char *) malloc(CBUFLEN);
                    296:
                    297:        if (gs == 0 || cp == 0)
                    298:                ERROR "Out of space for strings" FATAL;
                    299:        gs->cbuf = cp;
                    300:        gs->cmax = CBUFLEN;
                    301:        gs->clen = 0;
                    302:        return gs;
                    303: }
                    304:
                    305: char *cadd(Gstring *gs, int c) /* add one char to gs->cbuf, grow as needed */
                    306: {
                    307:        if (gs->clen >= gs->cmax) {     /* need to grow */
                    308:                gs->cmax *= 4;
                    309:                gs->cbuf = (char *) realloc((void *) gs->cbuf, gs->cmax);
                    310:
                    311:        }
                    312:        if (gs->cbuf != 0)
                    313:                gs->cbuf[gs->clen++] = c;
                    314:        return gs->cbuf;
                    315: }
                    316:
                    317: void caddreset(Gstring *gs)
                    318: {
                    319:        gs->clen = 0;
                    320: }
                    321:
                    322: void cunadd(Gstring *gs)
                    323: {
                    324:        if (gs->clen > 0)
                    325:                gs->clen--;
                    326: }
                    327:
                    328: void delGstring(Gstring *gs)
                    329: {
                    330:        free((void *) gs->cbuf);
                    331:        free((void *) gs);
                    332: }