src/usr.bin/awk/awklex.l - annotate

Return to awklex.l CVS log
Up to [local] / src / usr.bin / awk
Annotation of src/usr.bin/awk/awklex.l, Revision 1.2

1.2     ! millert     1: %Start A str sc reg comment
1.1       tholo       2:
                      3: %{
                      4: /****************************************************************
                      5: Copyright (C) AT&T and Lucent Technologies 1996
                      6: All Rights Reserved
                      7:
                      8: Permission to use, copy, modify, and distribute this software and
                      9: its documentation for any purpose and without fee is hereby
                     10: granted, provided that the above copyright notice appear in all
                     11: copies and that both that the copyright notice and this
                     12: permission notice and warranty disclaimer appear in supporting
                     13: documentation, and that the names of AT&T or Lucent Technologies
                     14: or any of their entities not be used in advertising or publicity
                     15: pertaining to distribution of the software without specific,
                     16: written prior permission.
                     17:
                     18: AT&T AND LUCENT DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
                     19: SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
                     20: FITNESS. IN NO EVENT SHALL AT&T OR LUCENT OR ANY OF THEIR
                     21: ENTITIES BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
                     22: DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
                     23: DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
                     24: OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE
                     25: USE OR PERFORMANCE OF THIS SOFTWARE.
                     26: ****************************************************************/
                     27:
                     28: /* some of this depends on behavior of lex that
                     29:    may not be preserved in other implementations of lex.
                     30: */
                     31:
                     32: #ifndef FLEX_SCANNER
                     33: #undef input   /* defeat lex */
                     34: #undef unput
                     35: #endif /* !FLEX_SCANNER */
                     36:
                     37: #include <stdlib.h>
                     38: #include <string.h>
                     39: #include "awk.h"
                     40: #include "awkgram.h"
                     41:
                     42: extern YYSTYPE yylval;
                     43: extern int     infunc;
                     44:
                     45: int    lineno  = 1;
                     46: int    bracecnt = 0;
                     47: int    brackcnt  = 0;
                     48: int    parencnt = 0;
                     49:
                     50: #define DEBUG
                     51: #ifdef DEBUG
                     52: #      define  RET(x)  {if(dbg)printf("lex %s [%s]\n", tokname(x), yytext); return(x); }
                     53: #else
                     54: #      define  RET(x)  return(x)
                     55: #endif
                     56:
                     57: #define        CADD    if (cadd(gs, yytext[0]) == 0) { \
                     58:                        ERROR "string/reg expr %.30s... too long", gs->cbuf SYNTAX; \
                     59:                        BEGIN A; \
                     60:                }
                     61:
                     62: char   *s;
                     63: Gstring        *gs = 0;        /* initialized in main() */
                     64: int    cflag;
                     65:
                     66: #ifdef FLEX_SCANNER
                     67: static int     my_input( YY_CHAR *buf, int max_size );
                     68:
                     69: #undef YY_INPUT
                     70: #define YY_INPUT(buf,result,max_size) result = my_input(buf, max_size);
                     71:
                     72: #undef YY_USER_INIT
                     73: #define YY_USER_INIT init_input_source();
                     74:
                     75: #define        FIRST   ((yy_start - 1) / 2)
                     76: #else /* FLEX_SCANNER */
                     77: #define        FIRST   (yybgin - yysvec - 1)
                     78: #endif /* FLEX_SCANNER */
                     79: %}
                     80:
                     81: A      [a-zA-Z_]
                     82: B      [a-zA-Z0-9_]
                     83: D      [0-9]
                     84: O      [0-7]
                     85: H      [0-9a-fA-F]
                     86: WS     [ \t]
                     87:
                     88: %%
                     89:        switch (FIRST) {        /* witchcraft */
                     90:        case 0:
                     91:                BEGIN A;
                     92:                break;
                     93:        case sc:
                     94:                BEGIN A;
                     95:                RET('}');
                     96:        }
                     97:
                     98: <A>\n          { lineno++; RET(NL); }
                     99: <A>#.*         { ; }   /* strip comments */
                    100: <A>{WS}+       { ; }
                    101: <A>;           { RET(';'); }
                    102:
                    103: <A>"\\"\n      { lineno++; }
                    104: <A>BEGIN       { RET(XBEGIN); }
                    105: <A>END         { RET(XEND); }
                    106: <A>func(tion)? { if (infunc) ERROR "illegal nested function" SYNTAX; RET(FUNC); }
                    107: <A>return      { if (!infunc) ERROR "return not in function" SYNTAX; RET(RETURN); }
                    108: <A>"&&"                { RET(AND); }
                    109: <A>"||"                { RET(BOR); }
                    110: <A>"!"         { RET(NOT); }
                    111: <A>"!="                { yylval.i = NE; RET(NE); }
                    112: <A>"~"         { yylval.i = MATCH; RET(MATCHOP); }
                    113: <A>"!~"                { yylval.i = NOTMATCH; RET(MATCHOP); }
                    114: <A>"<"         { yylval.i = LT; RET(LT); }
                    115: <A>"<="                { yylval.i = LE; RET(LE); }
                    116: <A>"=="                { yylval.i = EQ; RET(EQ); }
                    117: <A>">="                { yylval.i = GE; RET(GE); }
                    118: <A>">"         { yylval.i = GT; RET(GT); }
                    119: <A>">>"                { yylval.i = APPEND; RET(APPEND); }
                    120: <A>"++"                { yylval.i = INCR; RET(INCR); }
                    121: <A>"--"                { yylval.i = DECR; RET(DECR); }
                    122: <A>"+="                { yylval.i = ADDEQ; RET(ASGNOP); }
                    123: <A>"-="                { yylval.i = SUBEQ; RET(ASGNOP); }
                    124: <A>"*="                { yylval.i = MULTEQ; RET(ASGNOP); }
                    125: <A>"/="                { yylval.i = DIVEQ; RET(ASGNOP); }
                    126: <A>"%="                { yylval.i = MODEQ; RET(ASGNOP); }
                    127: <A>"^="                { yylval.i = POWEQ; RET(ASGNOP); }
                    128: <A>"**="       { yylval.i = POWEQ; RET(ASGNOP); }
                    129: <A>"="         { yylval.i = ASSIGN; RET(ASGNOP); }
                    130: <A>"**"                { RET(POWER); }
                    131: <A>"^"         { RET(POWER); }
                    132:
                    133: <A>"$"{D}+     { yylval.cp = fieldadr(atoi(yytext+1)); RET(FIELD); }
                    134: <A>"$NF"       { unputstr("(NF)"); return(INDIRECT); }
                    135: <A>"$"{A}{B}*  {
                    136:                  int c;
                    137:                  char *yytext_copy = strdup(yytext);
                    138:                  c = input(); unput(c);        /* look for '(' or '[' */
                    139:                  if (c == '(' || c == '[' ||
                    140:                      infunc && isarg(yytext_copy+1) >= 0) {
                    141:                          unputstr(yytext_copy+1);
                    142:                          free(yytext_copy);
                    143:                          return(INDIRECT);
                    144:                  } else {
                    145:                          yylval.cp =
                    146:                                setsymtab(yytext_copy+1,"",0.0,STR|NUM,symtab);
                    147:                          free(yytext_copy);
                    148:                          RET(IVAR);
                    149:                  }
                    150:                }
                    151: <A>"$"         { RET(INDIRECT); }
                    152: <A>NF          { yylval.cp = setsymtab(yytext, "", 0.0, NUM, symtab); RET(VARNF); }
                    153:
                    154: <A>({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)?        {
                    155:                  yylval.cp = setsymtab(yytext, tostring(yytext), atof(yytext), CON|NUM, symtab);
                    156:                /* should this also have STR set? */
                    157:                  RET(NUMBER); }
                    158:
                    159: <A>while       { RET(WHILE); }
                    160: <A>for         { RET(FOR); }
                    161: <A>do          { RET(DO); }
                    162: <A>if          { RET(IF); }
                    163: <A>else                { RET(ELSE); }
                    164: <A>next                { RET(NEXT); }
                    165: <A>nextfile    { RET(NEXTFILE); }
                    166: <A>exit                { RET(EXIT); }
                    167: <A>break       { RET(BREAK); }
                    168: <A>continue    { RET(CONTINUE); }
                    169: <A>print       { yylval.i = PRINT; RET(PRINT); }
                    170: <A>printf      { yylval.i = PRINTF; RET(PRINTF); }
                    171: <A>sprintf     { yylval.i = SPRINTF; RET(SPRINTF); }
                    172: <A>split       { yylval.i = SPLIT; RET(SPLIT); }
                    173: <A>substr      { RET(SUBSTR); }
                    174: <A>sub         { yylval.i = SUB; RET(SUB); }
                    175: <A>gsub                { yylval.i = GSUB; RET(GSUB); }
                    176: <A>index       { RET(INDEX); }
                    177: <A>match       { RET(MATCHFCN); }
                    178: <A>in          { RET(IN); }
                    179: <A>getline     { RET(GETLINE); }
                    180: <A>close       { RET(CLOSE); }
                    181: <A>delete      { RET(DELETE); }
                    182: <A>length      { yylval.i = FLENGTH; RET(BLTIN); }
                    183: <A>log         { yylval.i = FLOG; RET(BLTIN); }
                    184: <A>int         { yylval.i = FINT; RET(BLTIN); }
                    185: <A>exp         { yylval.i = FEXP; RET(BLTIN); }
                    186: <A>sqrt                { yylval.i = FSQRT; RET(BLTIN); }
                    187: <A>sin         { yylval.i = FSIN; RET(BLTIN); }
                    188: <A>cos         { yylval.i = FCOS; RET(BLTIN); }
                    189: <A>atan2       { yylval.i = FATAN; RET(BLTIN); }
                    190: <A>system      { yylval.i = FSYSTEM; RET(BLTIN); }
                    191: <A>rand                { yylval.i = FRAND; RET(BLTIN); }
                    192: <A>srand       { yylval.i = FSRAND; RET(BLTIN); }
                    193: <A>toupper     { yylval.i = FTOUPPER; RET(BLTIN); }
                    194: <A>tolower     { yylval.i = FTOLOWER; RET(BLTIN); }
                    195: <A>fflush      { yylval.i = FFLUSH; RET(BLTIN); }
                    196:
                    197: <A>{A}{B}*     { int n, c;
                    198:                  char *yytext_copy = strdup(yytext);
                    199:                  c = input(); unput(c);        /* look for '(' */
                    200:                  if (c != '(' && infunc && (n=isarg(yytext_copy)) >= 0) {
                    201:                        yylval.i = n;
                    202:                        free(yytext_copy);
                    203:                        RET(ARG);
                    204:                  } else {
                    205:                        yylval.cp = setsymtab(yytext_copy, "", 0.0, STR|NUM, symtab);
                    206:                        free(yytext_copy);
                    207:                        if (c == '(') {
                    208:                                RET(CALL);
                    209:                        } else {
                    210:                                RET(VAR);
                    211:                        }
                    212:                  }
                    213:                }
1.2     ! millert   214: <A>\"          { BEGIN str; caddreset(gs); }
1.1       tholo     215:
                    216: <A>"}"         { if (--bracecnt < 0) ERROR "extra }" SYNTAX; BEGIN sc; RET(';'); }
                    217: <A>"]"         { if (--brackcnt < 0) ERROR "extra ]" SYNTAX; RET(']'); }
                    218: <A>")"         { if (--parencnt < 0) ERROR "extra )" SYNTAX; RET(')'); }
                    219:
                    220: <A>.           { if (yytext[0] == '{') bracecnt++;
                    221:                  else if (yytext[0] == '[') brackcnt++;
                    222:                  else if (yytext[0] == '(') parencnt++;
                    223:                  RET(yylval.i = yytext[0]); /* everything else */ }
                    224:
                    225: <reg>\\.       { cadd(gs, '\\'); cadd(gs, yytext[1]); }
                    226: <reg>\n                { ERROR "newline in regular expression %.10s...", gs->cbuf SYNTAX; lineno++; BEGIN A; }
                    227: <reg>"/"       { BEGIN A;
                    228:                  cadd(gs, 0);
                    229:                  yylval.s = tostring(gs->cbuf);
                    230:                  unput('/');
                    231:                  RET(REGEXPR); }
                    232: <reg>.         { CADD; }
                    233:
1.2     ! millert   234: <str>\"                { BEGIN A;
1.1       tholo     235:                  cadd(gs, 0); s = tostring(gs->cbuf);
                    236:                  cunadd(gs);
                    237:                  cadd(gs, ' '); cadd(gs, 0);
                    238:                  yylval.cp = setsymtab(gs->cbuf, s, 0.0, CON|STR, symtab);
                    239:                  RET(STRING); }
1.2     ! millert   240: <str>\n                { ERROR "newline in string %.10s...", gs->cbuf SYNTAX; lineno++; BEGIN A; }
        !           241: <str>"\\\""    { cadd(gs, '"'); }
        !           242: <str>"\\"n     { cadd(gs, '\n'); }
        !           243: <str>"\\"t     { cadd(gs, '\t'); }
        !           244: <str>"\\"f     { cadd(gs, '\f'); }
        !           245: <str>"\\"r     { cadd(gs, '\r'); }
        !           246: <str>"\\"b     { cadd(gs, '\b'); }
        !           247: <str>"\\"v     { cadd(gs, '\v'); }     /* these ANSIisms may not be known by */
        !           248: <str>"\\"a     { cadd(gs, '\007'); }   /* your compiler. hence 007 for bell */
        !           249: <str>"\\\\"    { cadd(gs, '\\'); }
        !           250: <str>"\\"({O}{O}{O}|{O}{O}|{O}) { int n;
1.1       tholo     251:                  sscanf(yytext+1, "%o", &n); cadd(gs, n); }
1.2     ! millert   252: <str>"\\"x({H}+) { int n;      /* ANSI permits any number! */
1.1       tholo     253:                  sscanf(yytext+2, "%x", &n); cadd(gs, n); }
1.2     ! millert   254: <str>"\\".     { cadd(gs, yytext[1]); }
        !           255: <str>.         { CADD; }
1.1       tholo     256:
                    257: %%
                    258:
                    259: void startreg(void)    /* start parsing a regular expression */
                    260: {
                    261:        BEGIN reg;
                    262:        caddreset(gs);
                    263: }
                    264:
                    265: #ifdef FLEX_SCANNER
                    266: static int my_input( YY_CHAR *buf, int max_size )
                    267: {
                    268:        extern uschar *lexprog;
                    269:
                    270:        if ( lexprog ) {                /* awk '...' */
                    271:                int num_chars = strlen( lexprog );
                    272:                if ( num_chars > max_size )
                    273:                        {
                    274:                        num_chars = max_size;
                    275:                        strncpy( buf, lexprog, num_chars );
                    276:                        }
                    277:                else
                    278:                        strcpy( buf, lexprog );
                    279:                lexprog += num_chars;
                    280:                return num_chars;
                    281:
                    282:        } else {                        /* awk -f ... */
                    283:                int c = pgetc();
                    284:                if (c == EOF)
                    285:                        return 0;
                    286:                buf[0] = c;
                    287:                return 1;
                    288:        }
                    289: }
                    290: #else /* FLEX_SCANNER */
                    291: /* input() and unput() are transcriptions of the standard lex
                    292:    macros for input and output with additions for error message
                    293:    printing.  God help us all if someone changes how lex works.
                    294: */
                    295:
                    296: char   ebuf[300];
                    297: char   *ep = ebuf;
                    298:
                    299: int input(void)        /* get next lexical input character */
                    300: {
                    301:        int c;
                    302:        extern char *lexprog;
                    303:
                    304:        if (yysptr > yysbuf)
                    305:                c = U(*--yysptr);
                    306:        else if (lexprog != NULL) {     /* awk '...' */
                    307:                if ((c = *lexprog) != 0)
                    308:                        lexprog++;
                    309:        } else                          /* awk -f ... */
                    310:                c = pgetc();
                    311:        if (c == '\n')
                    312:                yylineno++;
                    313:        else if (c == EOF)
                    314:                c = 0;
                    315:        if (ep >= ebuf + sizeof ebuf)
                    316:                ep = ebuf;
                    317:        return *ep++ = c;
                    318: }
                    319:
                    320: void unput(int c)      /* put lexical character back on input */
                    321: {
                    322:        yytchar = c;
                    323:        if (yytchar == '\n')
                    324:                yylineno--;
                    325:        *yysptr++ = yytchar;
                    326:        if (--ep < ebuf)
                    327:                ep = ebuf + sizeof(ebuf) - 1;
                    328: }
                    329: #endif /* FLEX_SCANNER */
                    330:
                    331: void unputstr(char *s) /* put a string back on input */
                    332: {
                    333:        int i;
                    334:
                    335:        for (i = strlen(s)-1; i >= 0; i--)
                    336:                unput(s[i]);
                    337: }
                    338:
                    339: int lex_input()
                    340: {
                    341:        return input();
                    342: }
                    343:
                    344: /* growing-string code */
                    345:
                    346: const int CBUFLEN = 400;
                    347:
                    348: Gstring *newGstring()
                    349: {
                    350:        Gstring *gs = (Gstring *) malloc(sizeof(Gstring));
                    351:        char *cp = (char *) malloc(CBUFLEN);
                    352:
                    353:        if (gs == 0 || cp == 0)
                    354:                ERROR "Out of space for strings" FATAL;
                    355:        gs->cbuf = cp;
                    356:        gs->cmax = CBUFLEN;
                    357:        gs->clen = 0;
                    358:        return gs;
                    359: }
                    360:
                    361: char *cadd(Gstring *gs, int c) /* add one char to gs->cbuf, grow as needed */
                    362: {
                    363:        if (gs->clen >= gs->cmax) {     /* need to grow */
                    364:                gs->cmax *= 4;
                    365:                gs->cbuf = (char *) realloc((void *) gs->cbuf, gs->cmax);
                    366:
                    367:        }
                    368:        if (gs->cbuf != 0)
                    369:                gs->cbuf[gs->clen++] = c;
                    370:        return gs->cbuf;
                    371: }
                    372:
                    373: void caddreset(Gstring *gs)
                    374: {
                    375:        gs->clen = 0;
                    376: }
                    377:
                    378: void cunadd(Gstring *gs)
                    379: {
                    380:        if (gs->clen > 0)
                    381:                gs->clen--;
                    382: }
                    383:
                    384: void delGstring(Gstring *gs)
                    385: {
                    386:        free((void *) gs->cbuf);
                    387:        free((void *) gs);
                    388: }
                    389:
                    390: #ifdef FLEX_SCANNER
                    391: void init_input_source(void)
                    392: {
                    393:        extern int curpfile;
                    394:        extern char *pfile[];
                    395:
                    396:        if (yyin == NULL) {
                    397:                if (pfile[curpfile] == 0)
                    398:                        return;
                    399:                if (strcmp((char *) pfile[curpfile], "-") == 0)
                    400:                        yyin = stdin;
                    401:                else if ((yyin = fopen((char *) pfile[curpfile], "r")) == NULL)
                    402:                        ERROR "can't open file %s", pfile[curpfile] FATAL;
                    403:        }
                    404: }
                    405: #endif