[BACK]Return to lib.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / awk

Annotation of src/usr.bin/awk/lib.c, Revision 1.57

1.57    ! jsg         1: /*     $OpenBSD: lib.c,v 1.56 2024/05/04 22:59:21 millert Exp $        */
1.1       tholo       2: /****************************************************************
1.4       kstailey    3: Copyright (C) Lucent Technologies 1997
1.1       tholo       4: All Rights Reserved
                      5:
                      6: Permission to use, copy, modify, and distribute this software and
                      7: its documentation for any purpose and without fee is hereby
                      8: granted, provided that the above copyright notice appear in all
                      9: copies and that both that the copyright notice and this
                     10: permission notice and warranty disclaimer appear in supporting
1.4       kstailey   11: documentation, and that the name Lucent Technologies or any of
                     12: its entities not be used in advertising or publicity pertaining
                     13: to distribution of the software without specific, written prior
                     14: permission.
                     15:
                     16: LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
                     17: INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
                     18: IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
                     19: SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
                     20: WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
                     21: IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
                     22: ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
                     23: THIS SOFTWARE.
1.1       tholo      24: ****************************************************************/
                     25:
                     26: #define DEBUG
                     27: #include <stdio.h>
                     28: #include <string.h>
1.53      millert    29: #include <strings.h>
1.1       tholo      30: #include <ctype.h>
                     31: #include <errno.h>
                     32: #include <stdlib.h>
1.7       millert    33: #include <stdarg.h>
1.33      millert    34: #include <limits.h>
1.44      millert    35: #include <math.h>
1.1       tholo      36: #include "awk.h"
                     37:
1.51      millert    38: extern int u8_nextlen(const char *s);
                     39:
1.33      millert    40: char   EMPTY[] = { '\0' };
1.1       tholo      41: FILE   *infile = NULL;
1.34      millert    42: bool   innew;          /* true = infile has not been read by readrec */
1.33      millert    43: char   *file   = EMPTY;
1.4       kstailey   44: char   *record;
1.1       tholo      45: int    recsize = RECSIZE;
                     46: char   *fields;
1.4       kstailey   47: int    fieldssize = RECSIZE;
                     48:
                     49: Cell   **fldtab;       /* pointers to Cells */
1.33      millert    50: static size_t  len_inputFS = 0;
                     51: static char    *inputFS = NULL; /* FS at time of input, for field splitting */
1.1       tholo      52:
1.18      millert    53: #define        MAXFLD  2
1.4       kstailey   54: int    nfields = MAXFLD;       /* last allocated slot for $i */
1.1       tholo      55:
1.32      millert    56: bool   donefld;        /* true = implies rec broken into fields */
                     57: bool   donerec;        /* true = record is valid (no flds have changed) */
1.1       tholo      58:
1.4       kstailey   59: int    lastfld = 0;    /* last used field */
1.1       tholo      60: int    argno   = 1;    /* current input argument number */
                     61: extern Awkfloat *ARGC;
                     62:
1.34      millert    63: static Cell dollar0 = { OCELL, CFLD, NULL, EMPTY, 0.0, REC|STR|DONTFREE, NULL, NULL };
                     64: static Cell dollar1 = { OCELL, CFLD, NULL, EMPTY, 0.0, FLD|STR|DONTFREE, NULL, NULL };
1.4       kstailey   65:
1.1       tholo      66: void recinit(unsigned int n)
                     67: {
1.42      millert    68:        if ( (record = (char *) malloc(n)) == NULL
                     69:          || (fields = (char *) malloc(n+1)) == NULL
                     70:          || (fldtab = (Cell **) calloc(nfields+2, sizeof(*fldtab))) == NULL
                     71:          || (fldtab[0] = (Cell *) malloc(sizeof(**fldtab))) == NULL)
1.7       millert    72:                FATAL("out of space for $0 and fields");
1.22      millert    73:        *record = '\0';
1.4       kstailey   74:        *fldtab[0] = dollar0;
                     75:        fldtab[0]->sval = record;
                     76:        fldtab[0]->nval = tostring("0");
                     77:        makefields(1, nfields);
                     78: }
                     79:
                     80: void makefields(int n1, int n2)                /* create $n1..$n2 inclusive */
                     81: {
                     82:        char temp[50];
1.1       tholo      83:        int i;
                     84:
1.4       kstailey   85:        for (i = n1; i <= n2; i++) {
1.42      millert    86:                fldtab[i] = (Cell *) malloc(sizeof(**fldtab));
1.4       kstailey   87:                if (fldtab[i] == NULL)
1.7       millert    88:                        FATAL("out of space in makefields %d", i);
1.4       kstailey   89:                *fldtab[i] = dollar1;
1.31      millert    90:                snprintf(temp, sizeof(temp), "%d", i);
1.4       kstailey   91:                fldtab[i]->nval = tostring(temp);
                     92:        }
1.1       tholo      93: }
                     94:
                     95: void initgetrec(void)
                     96: {
                     97:        int i;
                     98:        char *p;
                     99:
                    100:        for (i = 1; i < *ARGC; i++) {
1.20      millert   101:                p = getargv(i); /* find 1st real filename */
                    102:                if (p == NULL || *p == '\0') {  /* deleted or zapped */
                    103:                        argno++;
                    104:                        continue;
                    105:                }
                    106:                if (!isclvar(p)) {
                    107:                        setsval(lookup("FILENAME", symtab), p);
1.1       tholo     108:                        return;
                    109:                }
                    110:                setclvar(p);    /* a commandline assignment before filename */
                    111:                argno++;
                    112:        }
                    113:        infile = stdin;         /* no filenames, so use stdin */
1.34      millert   114:        innew = true;
1.1       tholo     115: }
                    116:
1.29      millert   117: /*
                    118:  * POSIX specifies that fields are supposed to be evaluated as if they were
                    119:  * split using the value of FS at the time that the record's value ($0) was
                    120:  * read.
                    121:  *
                    122:  * Since field-splitting is done lazily, we save the current value of FS
                    123:  * whenever a new record is read in (implicitly or via getline), or when
                    124:  * a new value is assigned to $0.
                    125:  */
                    126: void savefs(void)
                    127: {
1.37      millert   128:        size_t len = strlen(getsval(fsloc));
                    129:        if (len >= len_inputFS) {
                    130:                len_inputFS = len + 1;
1.42      millert   131:                inputFS = (char *) realloc(inputFS, len_inputFS);
1.37      millert   132:                if (inputFS == NULL)
                    133:                        FATAL("field separator %.10s... is too long", *FS);
1.33      millert   134:        }
1.37      millert   135:        if (strlcpy(inputFS, *FS, len_inputFS) >= len_inputFS)
1.29      millert   136:                FATAL("field separator %.10s... is too long", *FS);
                    137: }
                    138:
1.32      millert   139: static bool firsttime = true;
1.15      millert   140:
1.32      millert   141: int getrec(char **pbuf, int *pbufsize, bool isrecord)  /* get next input record */
1.4       kstailey  142: {                      /* note: cares whether buf == record */
1.1       tholo     143:        int c;
1.4       kstailey  144:        char *buf = *pbuf;
1.18      millert   145:        uschar saveb0;
                    146:        int bufsize = *pbufsize, savebufsize = bufsize;
1.1       tholo     147:
                    148:        if (firsttime) {
1.32      millert   149:                firsttime = false;
1.1       tholo     150:                initgetrec();
                    151:        }
1.39      millert   152:        DPRINTF("RS=<%s>, FS=<%s>, ARGC=%g, FILENAME=%s\n",
                    153:                *RS, *FS, *ARGC, *FILENAME);
1.18      millert   154:        saveb0 = buf[0];
1.1       tholo     155:        buf[0] = 0;
                    156:        while (argno < *ARGC || infile == stdin) {
1.39      millert   157:                DPRINTF("argno=%d, file=|%s|\n", argno, file);
1.1       tholo     158:                if (infile == NULL) {   /* have to open a new file */
                    159:                        file = getargv(argno);
1.20      millert   160:                        if (file == NULL || *file == '\0') {    /* deleted or zapped */
1.1       tholo     161:                                argno++;
                    162:                                continue;
                    163:                        }
                    164:                        if (isclvar(file)) {    /* a var=value arg */
                    165:                                setclvar(file);
                    166:                                argno++;
                    167:                                continue;
                    168:                        }
                    169:                        *FILENAME = file;
1.39      millert   170:                        DPRINTF("opening file %s\n", file);
1.1       tholo     171:                        if (*file == '-' && *(file+1) == '\0')
                    172:                                infile = stdin;
1.4       kstailey  173:                        else if ((infile = fopen(file, "r")) == NULL)
1.7       millert   174:                                FATAL("can't open file %s", file);
1.45      millert   175:                        innew = true;
1.1       tholo     176:                        setfval(fnrloc, 0.0);
                    177:                }
1.34      millert   178:                c = readrec(&buf, &bufsize, infile, innew);
                    179:                if (innew)
                    180:                        innew = false;
1.1       tholo     181:                if (c != 0 || buf[0] != '\0') { /* normal record */
1.4       kstailey  182:                        if (isrecord) {
1.42      millert   183:                                double result;
                    184:
1.4       kstailey  185:                                if (freeable(fldtab[0]))
                    186:                                        xfree(fldtab[0]->sval);
                    187:                                fldtab[0]->sval = buf;  /* buf == record */
                    188:                                fldtab[0]->tval = REC | STR | DONTFREE;
1.42      millert   189:                                if (is_number(fldtab[0]->sval, & result)) {
                    190:                                        fldtab[0]->fval = result;
1.4       kstailey  191:                                        fldtab[0]->tval |= NUM;
1.1       tholo     192:                                }
1.50      millert   193:                                donefld = false;
                    194:                                donerec = true;
                    195:                                savefs();
1.1       tholo     196:                        }
                    197:                        setfval(nrloc, nrloc->fval+1);
                    198:                        setfval(fnrloc, fnrloc->fval+1);
1.4       kstailey  199:                        *pbuf = buf;
                    200:                        *pbufsize = bufsize;
1.1       tholo     201:                        return 1;
                    202:                }
                    203:                /* EOF arrived on this file; set up next */
                    204:                if (infile != stdin)
                    205:                        fclose(infile);
                    206:                infile = NULL;
                    207:                argno++;
                    208:        }
1.18      millert   209:        buf[0] = saveb0;
1.4       kstailey  210:        *pbuf = buf;
1.18      millert   211:        *pbufsize = savebufsize;
1.1       tholo     212:        return 0;       /* true end of file */
                    213: }
                    214:
                    215: void nextfile(void)
                    216: {
1.18      millert   217:        if (infile != NULL && infile != stdin)
1.1       tholo     218:                fclose(infile);
                    219:        infile = NULL;
                    220:        argno++;
                    221: }
                    222:
1.51      millert   223: extern int readcsvrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag);
                    224:
1.34      millert   225: int readrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag)       /* read one record into buf */
1.1       tholo     226: {
1.51      millert   227:        int sep, c, isrec; // POTENTIAL BUG? isrec is a macro in awk.h
                    228:        char *rr = *pbuf, *buf = *pbuf;
1.4       kstailey  229:        int bufsize = *pbufsize;
1.27      millert   230:        char *rs = getsval(rsloc);
1.1       tholo     231:
1.51      millert   232:        if (CSV) {
                    233:                c = readcsvrec(pbuf, pbufsize, inf, newflag);
                    234:                isrec = (c == EOF && rr == buf) ? false : true;
                    235:        } else if (*rs && rs[1]) {
1.32      millert   236:                bool found;
1.30      millert   237:
1.52      millert   238:                memset(buf, 0, bufsize);
1.30      millert   239:                fa *pfa = makedfa(rs, 1);
1.34      millert   240:                if (newflag)
                    241:                        found = fnematch(pfa, inf, &buf, &bufsize, recsize);
                    242:                else {
                    243:                        int tempstat = pfa->initstat;
                    244:                        pfa->initstat = 2;
                    245:                        found = fnematch(pfa, inf, &buf, &bufsize, recsize);
                    246:                        pfa->initstat = tempstat;
                    247:                }
1.30      millert   248:                if (found)
1.31      millert   249:                        setptr(patbeg, '\0');
1.54      millert   250:                isrec = (found == 0 && *buf == '\0') ? false : true;
1.30      millert   251:        } else {
                    252:                if ((sep = *rs) == 0) {
                    253:                        sep = '\n';
                    254:                        while ((c=getc(inf)) == '\n' && c != EOF)       /* skip leading \n's */
                    255:                                ;
                    256:                        if (c != EOF)
                    257:                                ungetc(c, inf);
                    258:                }
                    259:                for (rr = buf; ; ) {
                    260:                        for (; (c=getc(inf)) != sep && c != EOF; ) {
                    261:                                if (rr-buf+1 > bufsize)
                    262:                                        if (!adjbuf(&buf, &bufsize, 1+rr-buf,
                    263:                                            recsize, &rr, "readrec 1"))
                    264:                                                FATAL("input record `%.30s...' too long", buf);
                    265:                                *rr++ = c;
                    266:                        }
                    267:                        if (*rs == sep || c == EOF)
                    268:                                break;
                    269:                        if ((c = getc(inf)) == '\n' || c == EOF)        /* 2 in a row */
                    270:                                break;
                    271:                        if (!adjbuf(&buf, &bufsize, 2+rr-buf, recsize, &rr,
                    272:                            "readrec 2"))
                    273:                                FATAL("input record `%.30s...' too long", buf);
                    274:                        *rr++ = '\n';
1.4       kstailey  275:                        *rr++ = c;
                    276:                }
1.30      millert   277:                if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 3"))
1.7       millert   278:                        FATAL("input record `%.30s...' too long", buf);
1.30      millert   279:                *rr = 0;
1.54      millert   280:                isrec = (c == EOF && rr == buf) ? false : true;
1.1       tholo     281:        }
1.4       kstailey  282:        *pbuf = buf;
                    283:        *pbufsize = bufsize;
1.39      millert   284:        DPRINTF("readrec saw <%s>, returns %d\n", buf, isrec);
1.30      millert   285:        return isrec;
1.1       tholo     286: }
                    287:
1.51      millert   288:
                    289: /*******************
                    290:  * loose ends here:
                    291:  *   \r\n should become \n
                    292:  *   what about bare \r?  Excel uses that for embedded newlines
                    293:  *   can't have "" in unquoted fields, according to RFC 4180
                    294: */
                    295:
                    296: int readcsvrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag) /* csv can have \n's */
                    297: {                      /* so read a complete record that might be multiple lines */
                    298:        int sep, c;
                    299:        char *rr = *pbuf, *buf = *pbuf;
                    300:        int bufsize = *pbufsize;
                    301:        bool in_quote = false;
                    302:
                    303:        sep = '\n'; /* the only separator; have to skip over \n embedded in "..." */
                    304:        rr = buf;
                    305:        while ((c = getc(inf)) != EOF) {
                    306:                if (c == sep) {
                    307:                        if (! in_quote)
                    308:                                break;
                    309:                        if (rr > buf && rr[-1] == '\r') // remove \r if was \r\n
                    310:                                rr--;
                    311:                }
                    312:
                    313:                if (rr-buf+1 > bufsize)
                    314:                        if (!adjbuf(&buf, &bufsize, 1+rr-buf,
                    315:                            recsize, &rr, "readcsvrec 1"))
                    316:                                FATAL("input record `%.30s...' too long", buf);
                    317:                *rr++ = c;
                    318:                if (c == '"')
                    319:                        in_quote = ! in_quote;
                    320:        }
                    321:        if (c == '\n' && rr > buf && rr[-1] == '\r')    // remove \r if was \r\n
                    322:                rr--;
                    323:
                    324:        if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readcsvrec 4"))
                    325:                FATAL("input record `%.30s...' too long", buf);
                    326:        *rr = 0;
                    327:        *pbuf = buf;
                    328:        *pbufsize = bufsize;
                    329:        DPRINTF("readcsvrec saw <%s>, returns %d\n", buf, c);
                    330:        return c;
                    331: }
                    332:
1.1       tholo     333: char *getargv(int n)   /* get ARGV[n] */
                    334: {
1.57    ! jsg       335:        Array *ap;
1.1       tholo     336:        Cell *x;
1.4       kstailey  337:        char *s, temp[50];
1.57    ! jsg       338:        extern Cell *ARGVcell;
1.1       tholo     339:
1.57    ! jsg       340:        ap = (Array *)ARGVcell->sval;
1.31      millert   341:        snprintf(temp, sizeof(temp), "%d", n);
1.57    ! jsg       342:        if (lookup(temp, ap) == NULL)
1.20      millert   343:                return NULL;
1.57    ! jsg       344:        x = setsymtab(temp, "", 0.0, STR, ap);
1.1       tholo     345:        s = getsval(x);
1.39      millert   346:        DPRINTF("getargv(%d) returns |%s|\n", n, s);
1.1       tholo     347:        return s;
                    348: }
                    349:
                    350: void setclvar(char *s) /* set var=value from s */
                    351: {
1.48      millert   352:        char *e, *p;
1.1       tholo     353:        Cell *q;
1.42      millert   354:        double result;
1.1       tholo     355:
1.51      millert   356: /* commit f3d9187d4e0f02294fb1b0e31152070506314e67 broke T.argv test */
                    357: /* I don't understand why it was changed. */
                    358:
1.1       tholo     359:        for (p=s; *p != '='; p++)
                    360:                ;
1.48      millert   361:        e = p;
1.1       tholo     362:        *p++ = 0;
                    363:        p = qstring(p, '\0');
                    364:        q = setsymtab(s, p, 0.0, STR, symtab);
                    365:        setsval(q, p);
1.42      millert   366:        if (is_number(q->sval, & result)) {
                    367:                q->fval = result;
1.1       tholo     368:                q->tval |= NUM;
                    369:        }
1.39      millert   370:        DPRINTF("command line set %s to |%s|\n", s, p);
1.49      millert   371:        free(p);
1.48      millert   372:        *e = '=';
1.1       tholo     373: }
                    374:
                    375:
                    376: void fldbld(void)      /* create fields from current record */
                    377: {
1.4       kstailey  378:        /* this relies on having fields[] the same length as $0 */
                    379:        /* the fields are all stored in this one array with \0's */
1.20      millert   380:        /* possibly with a final trailing \0 not associated with any field */
1.1       tholo     381:        char *r, *fr, sep;
                    382:        Cell *p;
1.4       kstailey  383:        int i, j, n;
1.1       tholo     384:
                    385:        if (donefld)
                    386:                return;
1.4       kstailey  387:        if (!isstr(fldtab[0]))
                    388:                getsval(fldtab[0]);
                    389:        r = fldtab[0]->sval;
                    390:        n = strlen(r);
                    391:        if (n > fieldssize) {
                    392:                xfree(fields);
1.42      millert   393:                if ((fields = (char *) malloc(n+2)) == NULL) /* possibly 2 final \0s */
1.7       millert   394:                        FATAL("out of space for fields in fldbld %d", n);
1.4       kstailey  395:                fieldssize = n;
                    396:        }
1.1       tholo     397:        fr = fields;
                    398:        i = 0;  /* number of fields accumulated here */
1.35      millert   399:        if (inputFS == NULL)    /* make sure we have a copy of FS */
                    400:                savefs();
1.55      millert   401:        if (!CSV && strlen(inputFS) > 1) {      /* it's a regular expression */
1.2       millert   402:                i = refldbld(r, inputFS);
1.51      millert   403:        } else if (!CSV && (sep = *inputFS) == ' ') {   /* default whitespace */
1.1       tholo     404:                for (i = 0; ; ) {
                    405:                        while (*r == ' ' || *r == '\t' || *r == '\n')
                    406:                                r++;
                    407:                        if (*r == 0)
                    408:                                break;
                    409:                        i++;
1.4       kstailey  410:                        if (i > nfields)
                    411:                                growfldtab(i);
                    412:                        if (freeable(fldtab[i]))
                    413:                                xfree(fldtab[i]->sval);
                    414:                        fldtab[i]->sval = fr;
                    415:                        fldtab[i]->tval = FLD | STR | DONTFREE;
1.1       tholo     416:                        do
                    417:                                *fr++ = *r++;
                    418:                        while (*r != ' ' && *r != '\t' && *r != '\n' && *r != '\0');
                    419:                        *fr++ = 0;
                    420:                }
                    421:                *fr = 0;
1.51      millert   422:        } else if (CSV) {       /* CSV processing.  no error handling */
                    423:                if (*r != 0) {
                    424:                        for (;;) {
                    425:                                i++;
                    426:                                if (i > nfields)
                    427:                                        growfldtab(i);
                    428:                                if (freeable(fldtab[i]))
                    429:                                        xfree(fldtab[i]->sval);
                    430:                                fldtab[i]->sval = fr;
                    431:                                fldtab[i]->tval = FLD | STR | DONTFREE;
                    432:                                if (*r == '"' ) { /* start of "..." */
                    433:                                        for (r++ ; *r != '\0'; ) {
                    434:                                                if (*r == '"' && r[1] != '\0' && r[1] == '"') {
                    435:                                                        r += 2; /* doubled quote */
                    436:                                                        *fr++ = '"';
                    437:                                                } else if (*r == '"' && (r[1] == '\0' || r[1] == ',')) {
                    438:                                                        r++; /* skip over closing quote */
                    439:                                                        break;
                    440:                                                } else {
                    441:                                                        *fr++ = *r++;
                    442:                                                }
                    443:                                        }
                    444:                                        *fr++ = 0;
                    445:                                } else {        /* unquoted field */
                    446:                                        while (*r != ',' && *r != '\0')
                    447:                                                *fr++ = *r++;
                    448:                                        *fr++ = 0;
                    449:                                }
                    450:                                if (*r++ == 0)
                    451:                                        break;
                    452:
                    453:                        }
                    454:                }
                    455:                *fr = 0;
                    456:        } else if ((sep = *inputFS) == 0) {     /* new: FS="" => 1 char/field */
                    457:                for (i = 0; *r != '\0'; ) {
                    458:                        char buf[10];
1.1       tholo     459:                        i++;
1.4       kstailey  460:                        if (i > nfields)
                    461:                                growfldtab(i);
                    462:                        if (freeable(fldtab[i]))
                    463:                                xfree(fldtab[i]->sval);
1.51      millert   464:                        n = u8_nextlen(r);
                    465:                        for (j = 0; j < n; j++)
                    466:                                buf[j] = *r++;
                    467:                        buf[j] = '\0';
1.4       kstailey  468:                        fldtab[i]->sval = tostring(buf);
                    469:                        fldtab[i]->tval = FLD | STR;
1.1       tholo     470:                }
                    471:                *fr = 0;
                    472:        } else if (*r != 0) {   /* if 0, it's a null field */
1.51      millert   473:                /* subtle case: if length(FS) == 1 && length(RS > 0)
1.15      millert   474:                 * \n is NOT a field separator (cf awk book 61,84).
                    475:                 * this variable is tested in the inner while loop.
                    476:                 */
                    477:                int rtest = '\n';  /* normal case */
                    478:                if (strlen(*RS) > 0)
                    479:                        rtest = '\0';
1.1       tholo     480:                for (;;) {
                    481:                        i++;
1.4       kstailey  482:                        if (i > nfields)
                    483:                                growfldtab(i);
                    484:                        if (freeable(fldtab[i]))
                    485:                                xfree(fldtab[i]->sval);
                    486:                        fldtab[i]->sval = fr;
                    487:                        fldtab[i]->tval = FLD | STR | DONTFREE;
1.15      millert   488:                        while (*r != sep && *r != rtest && *r != '\0')  /* \n is always a separator */
1.1       tholo     489:                                *fr++ = *r++;
                    490:                        *fr++ = 0;
                    491:                        if (*r++ == 0)
                    492:                                break;
                    493:                }
                    494:                *fr = 0;
                    495:        }
1.4       kstailey  496:        if (i > nfields)
1.7       millert   497:                FATAL("record `%.30s...' has too many fields; can't happen", r);
1.4       kstailey  498:        cleanfld(i+1, lastfld); /* clean out junk from previous record */
                    499:        lastfld = i;
1.32      millert   500:        donefld = true;
1.4       kstailey  501:        for (j = 1; j <= lastfld; j++) {
1.42      millert   502:                double result;
                    503:
1.4       kstailey  504:                p = fldtab[j];
1.42      millert   505:                if(is_number(p->sval, & result)) {
                    506:                        p->fval = result;
1.1       tholo     507:                        p->tval |= NUM;
                    508:                }
                    509:        }
1.4       kstailey  510:        setfval(nfloc, (Awkfloat) lastfld);
1.32      millert   511:        donerec = true; /* restore */
1.4       kstailey  512:        if (dbg) {
                    513:                for (j = 0; j <= lastfld; j++) {
                    514:                        p = fldtab[j];
                    515:                        printf("field %d (%s): |%s|\n", j, p->nval, p->sval);
                    516:                }
                    517:        }
1.1       tholo     518: }
                    519:
1.4       kstailey  520: void cleanfld(int n1, int n2)  /* clean out fields n1 .. n2 inclusive */
                    521: {                              /* nvals remain intact */
                    522:        Cell *p;
                    523:        int i;
1.1       tholo     524:
1.4       kstailey  525:        for (i = n1; i <= n2; i++) {
                    526:                p = fldtab[i];
                    527:                if (freeable(p))
1.1       tholo     528:                        xfree(p->sval);
1.33      millert   529:                p->sval = EMPTY,
1.1       tholo     530:                p->tval = FLD | STR | DONTFREE;
                    531:        }
                    532: }
                    533:
1.4       kstailey  534: void newfld(int n)     /* add field n after end of existing lastfld */
1.1       tholo     535: {
1.4       kstailey  536:        if (n > nfields)
                    537:                growfldtab(n);
                    538:        cleanfld(lastfld+1, n);
                    539:        lastfld = n;
1.1       tholo     540:        setfval(nfloc, (Awkfloat) n);
1.26      millert   541: }
                    542:
                    543: void setlastfld(int n) /* set lastfld cleaning fldtab cells if necessary */
                    544: {
1.27      millert   545:        if (n < 0)
                    546:                FATAL("cannot set NF to a negative value");
1.26      millert   547:        if (n > nfields)
                    548:                growfldtab(n);
                    549:
                    550:        if (lastfld < n)
                    551:            cleanfld(lastfld+1, n);
                    552:        else
                    553:            cleanfld(n+1, lastfld);
                    554:
                    555:        lastfld = n;
1.1       tholo     556: }
                    557:
1.4       kstailey  558: Cell *fieldadr(int n)  /* get nth field */
                    559: {
                    560:        if (n < 0)
1.15      millert   561:                FATAL("trying to access out of range field %d", n);
1.4       kstailey  562:        if (n > nfields)        /* fields after NF are empty */
                    563:                growfldtab(n);  /* but does not increase NF */
                    564:        return(fldtab[n]);
                    565: }
                    566:
                    567: void growfldtab(int n) /* make new fields up to at least $n */
                    568: {
                    569:        int nf = 2 * nfields;
1.15      millert   570:        size_t s;
1.4       kstailey  571:
                    572:        if (n > nf)
                    573:                nf = n;
1.15      millert   574:        s = (nf+1) * (sizeof (struct Cell *));  /* freebsd: how much do we need? */
1.34      millert   575:        if (s / sizeof(struct Cell *) - 1 == (size_t)nf) /* didn't overflow */
1.42      millert   576:                fldtab = (Cell **) realloc(fldtab, s);
1.15      millert   577:        else                                    /* overflow sizeof int */
                    578:                xfree(fldtab);  /* make it null */
1.4       kstailey  579:        if (fldtab == NULL)
1.7       millert   580:                FATAL("out of space creating %d fields", nf);
1.4       kstailey  581:        makefields(nfields+1, nf);
                    582:        nfields = nf;
                    583: }
                    584:
1.11      millert   585: int refldbld(const char *rec, const char *fs)  /* build fields from reg expr in FS */
1.1       tholo     586: {
1.4       kstailey  587:        /* this relies on having fields[] the same length as $0 */
                    588:        /* the fields are all stored in this one array with \0's */
1.1       tholo     589:        char *fr;
1.4       kstailey  590:        int i, tempstat, n;
1.1       tholo     591:        fa *pfa;
                    592:
1.4       kstailey  593:        n = strlen(rec);
                    594:        if (n > fieldssize) {
                    595:                xfree(fields);
1.42      millert   596:                if ((fields = (char *) malloc(n+1)) == NULL)
1.7       millert   597:                        FATAL("out of space for fields in refldbld %d", n);
1.4       kstailey  598:                fieldssize = n;
                    599:        }
1.1       tholo     600:        fr = fields;
                    601:        *fr = '\0';
                    602:        if (*rec == '\0')
                    603:                return 0;
                    604:        pfa = makedfa(fs, 1);
1.39      millert   605:        DPRINTF("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs);
1.1       tholo     606:        tempstat = pfa->initstat;
1.4       kstailey  607:        for (i = 1; ; i++) {
1.37      millert   608:                const size_t fss_rem = fields + fieldssize + 1 - fr;
1.4       kstailey  609:                if (i > nfields)
                    610:                        growfldtab(i);
                    611:                if (freeable(fldtab[i]))
                    612:                        xfree(fldtab[i]->sval);
                    613:                fldtab[i]->tval = FLD | STR | DONTFREE;
                    614:                fldtab[i]->sval = fr;
1.39      millert   615:                DPRINTF("refldbld: i=%d\n", i);
1.1       tholo     616:                if (nematch(pfa, rec)) {
1.37      millert   617:                        const size_t reclen = patbeg - rec;
1.4       kstailey  618:                        pfa->initstat = 2;      /* horrible coupling to b.c */
1.39      millert   619:                        DPRINTF("match %s (%d chars)\n", patbeg, patlen);
1.37      millert   620:                        if (reclen >= fss_rem)
                    621:                                FATAL("out of space for fields in refldbld");
                    622:                        memcpy(fr, rec, reclen);
                    623:                        fr += reclen;
                    624:                        *fr++ = '\0';
1.1       tholo     625:                        rec = patbeg + patlen;
                    626:                } else {
1.39      millert   627:                        DPRINTF("no match %s\n", rec);
1.37      millert   628:                        if (strlcpy(fr, rec, fss_rem) >= fss_rem)
                    629:                                FATAL("out of space for fields in refldbld");
1.1       tholo     630:                        pfa->initstat = tempstat;
                    631:                        break;
                    632:                }
                    633:        }
1.29      millert   634:        return i;
1.1       tholo     635: }
                    636:
                    637: void recbld(void)      /* create $0 from $1..$NF if necessary */
                    638: {
                    639:        int i;
                    640:        char *r, *p;
1.27      millert   641:        char *sep = getsval(ofsloc);
1.1       tholo     642:
1.32      millert   643:        if (donerec)
1.1       tholo     644:                return;
1.5       millert   645:        r = record;
1.1       tholo     646:        for (i = 1; i <= *NF; i++) {
1.4       kstailey  647:                p = getsval(fldtab[i]);
1.5       millert   648:                if (!adjbuf(&record, &recsize, 1+strlen(p)+r-record, recsize, &r, "recbld 1"))
1.7       millert   649:                        FATAL("created $0 `%.30s...' too long", record);
1.4       kstailey  650:                while ((*r = *p++) != 0)
1.1       tholo     651:                        r++;
1.4       kstailey  652:                if (i < *NF) {
1.27      millert   653:                        if (!adjbuf(&record, &recsize, 2+strlen(sep)+r-record, recsize, &r, "recbld 2"))
1.7       millert   654:                                FATAL("created $0 `%.30s...' too long", record);
1.27      millert   655:                        for (p = sep; (*r = *p++) != 0; )
1.1       tholo     656:                                r++;
1.4       kstailey  657:                }
1.1       tholo     658:        }
1.5       millert   659:        if (!adjbuf(&record, &recsize, 2+r-record, recsize, &r, "recbld 3"))
1.7       millert   660:                FATAL("built giant record `%.30s...'", record);
1.1       tholo     661:        *r = '\0';
1.39      millert   662:        DPRINTF("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, (void*)fldtab[0]);
1.4       kstailey  663:
                    664:        if (freeable(fldtab[0]))
                    665:                xfree(fldtab[0]->sval);
                    666:        fldtab[0]->tval = REC | STR | DONTFREE;
                    667:        fldtab[0]->sval = record;
                    668:
1.39      millert   669:        DPRINTF("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, (void*)fldtab[0]);
                    670:        DPRINTF("recbld = |%s|\n", record);
1.32      millert   671:        donerec = true;
1.1       tholo     672: }
                    673:
                    674: int    errorflag       = 0;
                    675:
1.11      millert   676: void yyerror(const char *s)
1.1       tholo     677: {
1.14      grange    678:        SYNTAX("%s", s);
1.7       millert   679: }
                    680:
1.11      millert   681: void SYNTAX(const char *fmt, ...)
1.7       millert   682: {
1.1       tholo     683:        extern char *cmdname, *curfname;
                    684:        static int been_here = 0;
1.7       millert   685:        va_list varg;
1.1       tholo     686:
                    687:        if (been_here++ > 2)
                    688:                return;
1.7       millert   689:        fprintf(stderr, "%s: ", cmdname);
                    690:        va_start(varg, fmt);
                    691:        vfprintf(stderr, fmt, varg);
                    692:        va_end(varg);
1.1       tholo     693:        fprintf(stderr, " at source line %d", lineno);
                    694:        if (curfname != NULL)
                    695:                fprintf(stderr, " in function %s", curfname);
1.32      millert   696:        if (compile_time == COMPILING && cursource() != NULL)
1.6       millert   697:                fprintf(stderr, " source file %s", cursource());
1.1       tholo     698:        fprintf(stderr, "\n");
                    699:        errorflag = 2;
                    700:        eprint();
                    701: }
                    702:
                    703: extern int bracecnt, brackcnt, parencnt;
                    704:
                    705: void bracecheck(void)
                    706: {
                    707:        int c;
                    708:        static int beenhere = 0;
                    709:
                    710:        if (beenhere++)
                    711:                return;
1.3       millert   712:        while ((c = input()) != EOF && c != '\0')
1.1       tholo     713:                bclass(c);
                    714:        bcheck2(bracecnt, '{', '}');
                    715:        bcheck2(brackcnt, '[', ']');
                    716:        bcheck2(parencnt, '(', ')');
                    717: }
                    718:
                    719: void bcheck2(int n, int c1, int c2)
                    720: {
                    721:        if (n == 1)
                    722:                fprintf(stderr, "\tmissing %c\n", c2);
                    723:        else if (n > 1)
                    724:                fprintf(stderr, "\t%d missing %c's\n", n, c2);
                    725:        else if (n == -1)
                    726:                fprintf(stderr, "\textra %c\n", c2);
                    727:        else if (n < -1)
                    728:                fprintf(stderr, "\t%d extra %c's\n", -n, c2);
                    729: }
                    730:
1.35      millert   731: void FATAL(const char *fmt, ...)
1.7       millert   732: {
                    733:        extern char *cmdname;
                    734:        va_list varg;
                    735:
                    736:        fflush(stdout);
                    737:        fprintf(stderr, "%s: ", cmdname);
                    738:        va_start(varg, fmt);
                    739:        vfprintf(stderr, fmt, varg);
                    740:        va_end(varg);
                    741:        error();
                    742:        if (dbg > 1)            /* core dump if serious debugging on */
                    743:                abort();
                    744:        exit(2);
                    745: }
                    746:
1.11      millert   747: void WARNING(const char *fmt, ...)
1.1       tholo     748: {
                    749:        extern char *cmdname;
1.7       millert   750:        va_list varg;
1.1       tholo     751:
                    752:        fflush(stdout);
                    753:        fprintf(stderr, "%s: ", cmdname);
1.7       millert   754:        va_start(varg, fmt);
                    755:        vfprintf(stderr, fmt, varg);
                    756:        va_end(varg);
                    757:        error();
                    758: }
                    759:
                    760: void error()
                    761: {
                    762:        extern Node *curnode;
                    763:
1.1       tholo     764:        fprintf(stderr, "\n");
1.32      millert   765:        if (compile_time != ERROR_PRINTING) {
                    766:                if (NR && *NR > 0) {
                    767:                        fprintf(stderr, " input record number %d", (int) (*FNR));
                    768:                        if (strcmp(*FILENAME, "-") != 0)
                    769:                                fprintf(stderr, ", file %s", *FILENAME);
                    770:                        fprintf(stderr, "\n");
                    771:                }
                    772:                if (curnode)
                    773:                        fprintf(stderr, " source line number %d", curnode->lineno);
                    774:                else if (lineno)
                    775:                        fprintf(stderr, " source line number %d", lineno);
1.41      millert   776:                if (compile_time == COMPILING && cursource() != NULL)
                    777:                        fprintf(stderr, " source file %s", cursource());
                    778:                fprintf(stderr, "\n");
                    779:                eprint();
1.32      millert   780:        }
1.1       tholo     781: }
                    782:
                    783: void eprint(void)      /* try to print context around error */
                    784: {
                    785:        char *p, *q;
                    786:        int c;
                    787:        static int been_here = 0;
                    788:        extern char ebuf[], *ep;
                    789:
1.32      millert   790:        if (compile_time != COMPILING || been_here++ > 0 || ebuf == ep)
1.1       tholo     791:                return;
                    792:        p = ep - 1;
                    793:        if (p > ebuf && *p == '\n')
                    794:                p--;
                    795:        for ( ; p > ebuf && *p != '\n' && *p != '\0'; p--)
                    796:                ;
                    797:        while (*p == '\n')
                    798:                p++;
                    799:        fprintf(stderr, " context is\n\t");
                    800:        for (q=ep-1; q>=p && *q!=' ' && *q!='\t' && *q!='\n'; q--)
                    801:                ;
                    802:        for ( ; p < q; p++)
                    803:                if (*p)
                    804:                        putc(*p, stderr);
                    805:        fprintf(stderr, " >>> ");
                    806:        for ( ; p < ep; p++)
                    807:                if (*p)
                    808:                        putc(*p, stderr);
                    809:        fprintf(stderr, " <<< ");
                    810:        if (*ep)
                    811:                while ((c = input()) != '\n' && c != '\0' && c != EOF) {
                    812:                        putc(c, stderr);
                    813:                        bclass(c);
                    814:                }
                    815:        putc('\n', stderr);
                    816:        ep = ebuf;
                    817: }
                    818:
                    819: void bclass(int c)
                    820: {
                    821:        switch (c) {
                    822:        case '{': bracecnt++; break;
                    823:        case '}': bracecnt--; break;
                    824:        case '[': brackcnt++; break;
                    825:        case ']': brackcnt--; break;
                    826:        case '(': parencnt++; break;
                    827:        case ')': parencnt--; break;
                    828:        }
                    829: }
                    830:
1.11      millert   831: double errcheck(double x, const char *s)
1.1       tholo     832: {
                    833:
                    834:        if (errno == EDOM) {
                    835:                errno = 0;
1.7       millert   836:                WARNING("%s argument out of domain", s);
1.1       tholo     837:                x = 1;
                    838:        } else if (errno == ERANGE) {
                    839:                errno = 0;
1.7       millert   840:                WARNING("%s result out of range", s);
1.1       tholo     841:                x = 1;
                    842:        }
                    843:        return x;
                    844: }
                    845:
1.11      millert   846: int isclvar(const char *s)     /* is s of form var=something ? */
1.1       tholo     847: {
1.11      millert   848:        const char *os = s;
1.1       tholo     849:
1.56      millert   850:        if (!isalpha((uschar)*s) && *s != '_')
1.1       tholo     851:                return 0;
                    852:        for ( ; *s; s++)
1.56      millert   853:                if (!(isalnum((uschar)*s) || *s == '_'))
1.1       tholo     854:                        break;
1.28      millert   855:        return *s == '=' && s > os;
1.1       tholo     856: }
                    857:
1.4       kstailey  858: /* strtod is supposed to be a proper test of what's a valid number */
1.8       millert   859: /* appears to be broken in gcc on linux: thinks 0x123 is a valid FP number */
                    860: /* wrong: violates 4.10.1.4 of ansi C standard */
1.42      millert   861:
1.38      millert   862: /* well, not quite. As of C99, hex floating point is allowed. so this is
1.42      millert   863:  * a bit of a mess. We work around the mess by checking for a hexadecimal
                    864:  * value and disallowing it. Similarly, we now follow gawk and allow only
                    865:  * +nan, -nan, +inf, and -inf for NaN and infinity values.
1.38      millert   866:  */
1.1       tholo     867:
1.42      millert   868: /*
                    869:  * This routine now has a more complicated interface, the main point
                    870:  * being to avoid the double conversion of a string to double, and
                    871:  * also to convey out, if requested, the information that the numeric
                    872:  * value was a leading string or is all of the string. The latter bit
                    873:  * is used in getfval().
                    874:  */
                    875:
                    876: bool is_valid_number(const char *s, bool trailing_stuff_ok,
                    877:                        bool *no_trailing, double *result)
1.1       tholo     878: {
1.4       kstailey  879:        double r;
                    880:        char *ep;
1.42      millert   881:        bool retval = false;
1.44      millert   882:        bool is_nan = false;
                    883:        bool is_inf = false;
1.42      millert   884:
                    885:        if (no_trailing)
                    886:                *no_trailing = false;
                    887:
1.43      millert   888:        while (isspace((uschar)*s))
1.42      millert   889:                s++;
                    890:
1.51      millert   891:        /* no hex floating point, sorry */
1.44      millert   892:        if (s[0] == '0' && tolower((uschar)s[1]) == 'x')
1.42      millert   893:                return false;
                    894:
1.51      millert   895:        /* allow +nan, -nan, +inf, -inf, any other letter, no */
1.42      millert   896:        if (s[0] == '+' || s[0] == '-') {
1.44      millert   897:                is_nan = (strncasecmp(s+1, "nan", 3) == 0);
                    898:                is_inf = (strncasecmp(s+1, "inf", 3) == 0);
                    899:                if ((is_nan || is_inf)
                    900:                    && (isspace((uschar)s[4]) || s[4] == '\0'))
                    901:                        goto convert;
                    902:                else if (! isdigit((uschar)s[1]) && s[1] != '.')
1.42      millert   903:                        return false;
1.44      millert   904:        }
                    905:        else if (! isdigit((uschar)s[0]) && s[0] != '.')
1.42      millert   906:                return false;
                    907:
1.44      millert   908: convert:
1.4       kstailey  909:        errno = 0;
                    910:        r = strtod(s, &ep);
1.43      millert   911:        if (ep == s || errno == ERANGE)
1.42      millert   912:                return false;
                    913:
1.44      millert   914:        if (isnan(r) && s[0] == '-' && signbit(r) == 0)
                    915:                r = -r;
                    916:
1.42      millert   917:        if (result != NULL)
                    918:                *result = r;
                    919:
1.47      millert   920:        /*
                    921:         * check for trailing stuff
                    922:         */
                    923:        while (isspace((uschar)*ep))
                    924:                ep++;
1.42      millert   925:
1.44      millert   926:        if (no_trailing != NULL)
1.42      millert   927:                *no_trailing = (*ep == '\0');
1.47      millert   928:
1.51      millert   929:        /* return true if found the end, or trailing stuff is allowed */
1.47      millert   930:        retval = *ep == '\0' || trailing_stuff_ok;
1.42      millert   931:
                    932:        return retval;
1.1       tholo     933: }