Annotation of src/usr.bin/awk/lib.c, Revision 1.57
1.57 ! jsg 1: /* $OpenBSD: lib.c,v 1.56 2024/05/04 22:59:21 millert Exp $ */
1.1 tholo 2: /****************************************************************
1.4 kstailey 3: Copyright (C) Lucent Technologies 1997
1.1 tholo 4: All Rights Reserved
5:
6: Permission to use, copy, modify, and distribute this software and
7: its documentation for any purpose and without fee is hereby
8: granted, provided that the above copyright notice appear in all
9: copies and that both that the copyright notice and this
10: permission notice and warranty disclaimer appear in supporting
1.4 kstailey 11: documentation, and that the name Lucent Technologies or any of
12: its entities not be used in advertising or publicity pertaining
13: to distribution of the software without specific, written prior
14: permission.
15:
16: LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17: INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18: IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19: SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20: WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21: IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22: ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23: THIS SOFTWARE.
1.1 tholo 24: ****************************************************************/
25:
26: #define DEBUG
27: #include <stdio.h>
28: #include <string.h>
1.53 millert 29: #include <strings.h>
1.1 tholo 30: #include <ctype.h>
31: #include <errno.h>
32: #include <stdlib.h>
1.7 millert 33: #include <stdarg.h>
1.33 millert 34: #include <limits.h>
1.44 millert 35: #include <math.h>
1.1 tholo 36: #include "awk.h"
37:
1.51 millert 38: extern int u8_nextlen(const char *s);
39:
1.33 millert 40: char EMPTY[] = { '\0' };
1.1 tholo 41: FILE *infile = NULL;
1.34 millert 42: bool innew; /* true = infile has not been read by readrec */
1.33 millert 43: char *file = EMPTY;
1.4 kstailey 44: char *record;
1.1 tholo 45: int recsize = RECSIZE;
46: char *fields;
1.4 kstailey 47: int fieldssize = RECSIZE;
48:
49: Cell **fldtab; /* pointers to Cells */
1.33 millert 50: static size_t len_inputFS = 0;
51: static char *inputFS = NULL; /* FS at time of input, for field splitting */
1.1 tholo 52:
1.18 millert 53: #define MAXFLD 2
1.4 kstailey 54: int nfields = MAXFLD; /* last allocated slot for $i */
1.1 tholo 55:
1.32 millert 56: bool donefld; /* true = implies rec broken into fields */
57: bool donerec; /* true = record is valid (no flds have changed) */
1.1 tholo 58:
1.4 kstailey 59: int lastfld = 0; /* last used field */
1.1 tholo 60: int argno = 1; /* current input argument number */
61: extern Awkfloat *ARGC;
62:
1.34 millert 63: static Cell dollar0 = { OCELL, CFLD, NULL, EMPTY, 0.0, REC|STR|DONTFREE, NULL, NULL };
64: static Cell dollar1 = { OCELL, CFLD, NULL, EMPTY, 0.0, FLD|STR|DONTFREE, NULL, NULL };
1.4 kstailey 65:
1.1 tholo 66: void recinit(unsigned int n)
67: {
1.42 millert 68: if ( (record = (char *) malloc(n)) == NULL
69: || (fields = (char *) malloc(n+1)) == NULL
70: || (fldtab = (Cell **) calloc(nfields+2, sizeof(*fldtab))) == NULL
71: || (fldtab[0] = (Cell *) malloc(sizeof(**fldtab))) == NULL)
1.7 millert 72: FATAL("out of space for $0 and fields");
1.22 millert 73: *record = '\0';
1.4 kstailey 74: *fldtab[0] = dollar0;
75: fldtab[0]->sval = record;
76: fldtab[0]->nval = tostring("0");
77: makefields(1, nfields);
78: }
79:
80: void makefields(int n1, int n2) /* create $n1..$n2 inclusive */
81: {
82: char temp[50];
1.1 tholo 83: int i;
84:
1.4 kstailey 85: for (i = n1; i <= n2; i++) {
1.42 millert 86: fldtab[i] = (Cell *) malloc(sizeof(**fldtab));
1.4 kstailey 87: if (fldtab[i] == NULL)
1.7 millert 88: FATAL("out of space in makefields %d", i);
1.4 kstailey 89: *fldtab[i] = dollar1;
1.31 millert 90: snprintf(temp, sizeof(temp), "%d", i);
1.4 kstailey 91: fldtab[i]->nval = tostring(temp);
92: }
1.1 tholo 93: }
94:
95: void initgetrec(void)
96: {
97: int i;
98: char *p;
99:
100: for (i = 1; i < *ARGC; i++) {
1.20 millert 101: p = getargv(i); /* find 1st real filename */
102: if (p == NULL || *p == '\0') { /* deleted or zapped */
103: argno++;
104: continue;
105: }
106: if (!isclvar(p)) {
107: setsval(lookup("FILENAME", symtab), p);
1.1 tholo 108: return;
109: }
110: setclvar(p); /* a commandline assignment before filename */
111: argno++;
112: }
113: infile = stdin; /* no filenames, so use stdin */
1.34 millert 114: innew = true;
1.1 tholo 115: }
116:
1.29 millert 117: /*
118: * POSIX specifies that fields are supposed to be evaluated as if they were
119: * split using the value of FS at the time that the record's value ($0) was
120: * read.
121: *
122: * Since field-splitting is done lazily, we save the current value of FS
123: * whenever a new record is read in (implicitly or via getline), or when
124: * a new value is assigned to $0.
125: */
126: void savefs(void)
127: {
1.37 millert 128: size_t len = strlen(getsval(fsloc));
129: if (len >= len_inputFS) {
130: len_inputFS = len + 1;
1.42 millert 131: inputFS = (char *) realloc(inputFS, len_inputFS);
1.37 millert 132: if (inputFS == NULL)
133: FATAL("field separator %.10s... is too long", *FS);
1.33 millert 134: }
1.37 millert 135: if (strlcpy(inputFS, *FS, len_inputFS) >= len_inputFS)
1.29 millert 136: FATAL("field separator %.10s... is too long", *FS);
137: }
138:
1.32 millert 139: static bool firsttime = true;
1.15 millert 140:
1.32 millert 141: int getrec(char **pbuf, int *pbufsize, bool isrecord) /* get next input record */
1.4 kstailey 142: { /* note: cares whether buf == record */
1.1 tholo 143: int c;
1.4 kstailey 144: char *buf = *pbuf;
1.18 millert 145: uschar saveb0;
146: int bufsize = *pbufsize, savebufsize = bufsize;
1.1 tholo 147:
148: if (firsttime) {
1.32 millert 149: firsttime = false;
1.1 tholo 150: initgetrec();
151: }
1.39 millert 152: DPRINTF("RS=<%s>, FS=<%s>, ARGC=%g, FILENAME=%s\n",
153: *RS, *FS, *ARGC, *FILENAME);
1.18 millert 154: saveb0 = buf[0];
1.1 tholo 155: buf[0] = 0;
156: while (argno < *ARGC || infile == stdin) {
1.39 millert 157: DPRINTF("argno=%d, file=|%s|\n", argno, file);
1.1 tholo 158: if (infile == NULL) { /* have to open a new file */
159: file = getargv(argno);
1.20 millert 160: if (file == NULL || *file == '\0') { /* deleted or zapped */
1.1 tholo 161: argno++;
162: continue;
163: }
164: if (isclvar(file)) { /* a var=value arg */
165: setclvar(file);
166: argno++;
167: continue;
168: }
169: *FILENAME = file;
1.39 millert 170: DPRINTF("opening file %s\n", file);
1.1 tholo 171: if (*file == '-' && *(file+1) == '\0')
172: infile = stdin;
1.4 kstailey 173: else if ((infile = fopen(file, "r")) == NULL)
1.7 millert 174: FATAL("can't open file %s", file);
1.45 millert 175: innew = true;
1.1 tholo 176: setfval(fnrloc, 0.0);
177: }
1.34 millert 178: c = readrec(&buf, &bufsize, infile, innew);
179: if (innew)
180: innew = false;
1.1 tholo 181: if (c != 0 || buf[0] != '\0') { /* normal record */
1.4 kstailey 182: if (isrecord) {
1.42 millert 183: double result;
184:
1.4 kstailey 185: if (freeable(fldtab[0]))
186: xfree(fldtab[0]->sval);
187: fldtab[0]->sval = buf; /* buf == record */
188: fldtab[0]->tval = REC | STR | DONTFREE;
1.42 millert 189: if (is_number(fldtab[0]->sval, & result)) {
190: fldtab[0]->fval = result;
1.4 kstailey 191: fldtab[0]->tval |= NUM;
1.1 tholo 192: }
1.50 millert 193: donefld = false;
194: donerec = true;
195: savefs();
1.1 tholo 196: }
197: setfval(nrloc, nrloc->fval+1);
198: setfval(fnrloc, fnrloc->fval+1);
1.4 kstailey 199: *pbuf = buf;
200: *pbufsize = bufsize;
1.1 tholo 201: return 1;
202: }
203: /* EOF arrived on this file; set up next */
204: if (infile != stdin)
205: fclose(infile);
206: infile = NULL;
207: argno++;
208: }
1.18 millert 209: buf[0] = saveb0;
1.4 kstailey 210: *pbuf = buf;
1.18 millert 211: *pbufsize = savebufsize;
1.1 tholo 212: return 0; /* true end of file */
213: }
214:
215: void nextfile(void)
216: {
1.18 millert 217: if (infile != NULL && infile != stdin)
1.1 tholo 218: fclose(infile);
219: infile = NULL;
220: argno++;
221: }
222:
1.51 millert 223: extern int readcsvrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag);
224:
1.34 millert 225: int readrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag) /* read one record into buf */
1.1 tholo 226: {
1.51 millert 227: int sep, c, isrec; // POTENTIAL BUG? isrec is a macro in awk.h
228: char *rr = *pbuf, *buf = *pbuf;
1.4 kstailey 229: int bufsize = *pbufsize;
1.27 millert 230: char *rs = getsval(rsloc);
1.1 tholo 231:
1.51 millert 232: if (CSV) {
233: c = readcsvrec(pbuf, pbufsize, inf, newflag);
234: isrec = (c == EOF && rr == buf) ? false : true;
235: } else if (*rs && rs[1]) {
1.32 millert 236: bool found;
1.30 millert 237:
1.52 millert 238: memset(buf, 0, bufsize);
1.30 millert 239: fa *pfa = makedfa(rs, 1);
1.34 millert 240: if (newflag)
241: found = fnematch(pfa, inf, &buf, &bufsize, recsize);
242: else {
243: int tempstat = pfa->initstat;
244: pfa->initstat = 2;
245: found = fnematch(pfa, inf, &buf, &bufsize, recsize);
246: pfa->initstat = tempstat;
247: }
1.30 millert 248: if (found)
1.31 millert 249: setptr(patbeg, '\0');
1.54 millert 250: isrec = (found == 0 && *buf == '\0') ? false : true;
1.30 millert 251: } else {
252: if ((sep = *rs) == 0) {
253: sep = '\n';
254: while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */
255: ;
256: if (c != EOF)
257: ungetc(c, inf);
258: }
259: for (rr = buf; ; ) {
260: for (; (c=getc(inf)) != sep && c != EOF; ) {
261: if (rr-buf+1 > bufsize)
262: if (!adjbuf(&buf, &bufsize, 1+rr-buf,
263: recsize, &rr, "readrec 1"))
264: FATAL("input record `%.30s...' too long", buf);
265: *rr++ = c;
266: }
267: if (*rs == sep || c == EOF)
268: break;
269: if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */
270: break;
271: if (!adjbuf(&buf, &bufsize, 2+rr-buf, recsize, &rr,
272: "readrec 2"))
273: FATAL("input record `%.30s...' too long", buf);
274: *rr++ = '\n';
1.4 kstailey 275: *rr++ = c;
276: }
1.30 millert 277: if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 3"))
1.7 millert 278: FATAL("input record `%.30s...' too long", buf);
1.30 millert 279: *rr = 0;
1.54 millert 280: isrec = (c == EOF && rr == buf) ? false : true;
1.1 tholo 281: }
1.4 kstailey 282: *pbuf = buf;
283: *pbufsize = bufsize;
1.39 millert 284: DPRINTF("readrec saw <%s>, returns %d\n", buf, isrec);
1.30 millert 285: return isrec;
1.1 tholo 286: }
287:
1.51 millert 288:
289: /*******************
290: * loose ends here:
291: * \r\n should become \n
292: * what about bare \r? Excel uses that for embedded newlines
293: * can't have "" in unquoted fields, according to RFC 4180
294: */
295:
296: int readcsvrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag) /* csv can have \n's */
297: { /* so read a complete record that might be multiple lines */
298: int sep, c;
299: char *rr = *pbuf, *buf = *pbuf;
300: int bufsize = *pbufsize;
301: bool in_quote = false;
302:
303: sep = '\n'; /* the only separator; have to skip over \n embedded in "..." */
304: rr = buf;
305: while ((c = getc(inf)) != EOF) {
306: if (c == sep) {
307: if (! in_quote)
308: break;
309: if (rr > buf && rr[-1] == '\r') // remove \r if was \r\n
310: rr--;
311: }
312:
313: if (rr-buf+1 > bufsize)
314: if (!adjbuf(&buf, &bufsize, 1+rr-buf,
315: recsize, &rr, "readcsvrec 1"))
316: FATAL("input record `%.30s...' too long", buf);
317: *rr++ = c;
318: if (c == '"')
319: in_quote = ! in_quote;
320: }
321: if (c == '\n' && rr > buf && rr[-1] == '\r') // remove \r if was \r\n
322: rr--;
323:
324: if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readcsvrec 4"))
325: FATAL("input record `%.30s...' too long", buf);
326: *rr = 0;
327: *pbuf = buf;
328: *pbufsize = bufsize;
329: DPRINTF("readcsvrec saw <%s>, returns %d\n", buf, c);
330: return c;
331: }
332:
1.1 tholo 333: char *getargv(int n) /* get ARGV[n] */
334: {
1.57 ! jsg 335: Array *ap;
1.1 tholo 336: Cell *x;
1.4 kstailey 337: char *s, temp[50];
1.57 ! jsg 338: extern Cell *ARGVcell;
1.1 tholo 339:
1.57 ! jsg 340: ap = (Array *)ARGVcell->sval;
1.31 millert 341: snprintf(temp, sizeof(temp), "%d", n);
1.57 ! jsg 342: if (lookup(temp, ap) == NULL)
1.20 millert 343: return NULL;
1.57 ! jsg 344: x = setsymtab(temp, "", 0.0, STR, ap);
1.1 tholo 345: s = getsval(x);
1.39 millert 346: DPRINTF("getargv(%d) returns |%s|\n", n, s);
1.1 tholo 347: return s;
348: }
349:
350: void setclvar(char *s) /* set var=value from s */
351: {
1.48 millert 352: char *e, *p;
1.1 tholo 353: Cell *q;
1.42 millert 354: double result;
1.1 tholo 355:
1.51 millert 356: /* commit f3d9187d4e0f02294fb1b0e31152070506314e67 broke T.argv test */
357: /* I don't understand why it was changed. */
358:
1.1 tholo 359: for (p=s; *p != '='; p++)
360: ;
1.48 millert 361: e = p;
1.1 tholo 362: *p++ = 0;
363: p = qstring(p, '\0');
364: q = setsymtab(s, p, 0.0, STR, symtab);
365: setsval(q, p);
1.42 millert 366: if (is_number(q->sval, & result)) {
367: q->fval = result;
1.1 tholo 368: q->tval |= NUM;
369: }
1.39 millert 370: DPRINTF("command line set %s to |%s|\n", s, p);
1.49 millert 371: free(p);
1.48 millert 372: *e = '=';
1.1 tholo 373: }
374:
375:
376: void fldbld(void) /* create fields from current record */
377: {
1.4 kstailey 378: /* this relies on having fields[] the same length as $0 */
379: /* the fields are all stored in this one array with \0's */
1.20 millert 380: /* possibly with a final trailing \0 not associated with any field */
1.1 tholo 381: char *r, *fr, sep;
382: Cell *p;
1.4 kstailey 383: int i, j, n;
1.1 tholo 384:
385: if (donefld)
386: return;
1.4 kstailey 387: if (!isstr(fldtab[0]))
388: getsval(fldtab[0]);
389: r = fldtab[0]->sval;
390: n = strlen(r);
391: if (n > fieldssize) {
392: xfree(fields);
1.42 millert 393: if ((fields = (char *) malloc(n+2)) == NULL) /* possibly 2 final \0s */
1.7 millert 394: FATAL("out of space for fields in fldbld %d", n);
1.4 kstailey 395: fieldssize = n;
396: }
1.1 tholo 397: fr = fields;
398: i = 0; /* number of fields accumulated here */
1.35 millert 399: if (inputFS == NULL) /* make sure we have a copy of FS */
400: savefs();
1.55 millert 401: if (!CSV && strlen(inputFS) > 1) { /* it's a regular expression */
1.2 millert 402: i = refldbld(r, inputFS);
1.51 millert 403: } else if (!CSV && (sep = *inputFS) == ' ') { /* default whitespace */
1.1 tholo 404: for (i = 0; ; ) {
405: while (*r == ' ' || *r == '\t' || *r == '\n')
406: r++;
407: if (*r == 0)
408: break;
409: i++;
1.4 kstailey 410: if (i > nfields)
411: growfldtab(i);
412: if (freeable(fldtab[i]))
413: xfree(fldtab[i]->sval);
414: fldtab[i]->sval = fr;
415: fldtab[i]->tval = FLD | STR | DONTFREE;
1.1 tholo 416: do
417: *fr++ = *r++;
418: while (*r != ' ' && *r != '\t' && *r != '\n' && *r != '\0');
419: *fr++ = 0;
420: }
421: *fr = 0;
1.51 millert 422: } else if (CSV) { /* CSV processing. no error handling */
423: if (*r != 0) {
424: for (;;) {
425: i++;
426: if (i > nfields)
427: growfldtab(i);
428: if (freeable(fldtab[i]))
429: xfree(fldtab[i]->sval);
430: fldtab[i]->sval = fr;
431: fldtab[i]->tval = FLD | STR | DONTFREE;
432: if (*r == '"' ) { /* start of "..." */
433: for (r++ ; *r != '\0'; ) {
434: if (*r == '"' && r[1] != '\0' && r[1] == '"') {
435: r += 2; /* doubled quote */
436: *fr++ = '"';
437: } else if (*r == '"' && (r[1] == '\0' || r[1] == ',')) {
438: r++; /* skip over closing quote */
439: break;
440: } else {
441: *fr++ = *r++;
442: }
443: }
444: *fr++ = 0;
445: } else { /* unquoted field */
446: while (*r != ',' && *r != '\0')
447: *fr++ = *r++;
448: *fr++ = 0;
449: }
450: if (*r++ == 0)
451: break;
452:
453: }
454: }
455: *fr = 0;
456: } else if ((sep = *inputFS) == 0) { /* new: FS="" => 1 char/field */
457: for (i = 0; *r != '\0'; ) {
458: char buf[10];
1.1 tholo 459: i++;
1.4 kstailey 460: if (i > nfields)
461: growfldtab(i);
462: if (freeable(fldtab[i]))
463: xfree(fldtab[i]->sval);
1.51 millert 464: n = u8_nextlen(r);
465: for (j = 0; j < n; j++)
466: buf[j] = *r++;
467: buf[j] = '\0';
1.4 kstailey 468: fldtab[i]->sval = tostring(buf);
469: fldtab[i]->tval = FLD | STR;
1.1 tholo 470: }
471: *fr = 0;
472: } else if (*r != 0) { /* if 0, it's a null field */
1.51 millert 473: /* subtle case: if length(FS) == 1 && length(RS > 0)
1.15 millert 474: * \n is NOT a field separator (cf awk book 61,84).
475: * this variable is tested in the inner while loop.
476: */
477: int rtest = '\n'; /* normal case */
478: if (strlen(*RS) > 0)
479: rtest = '\0';
1.1 tholo 480: for (;;) {
481: i++;
1.4 kstailey 482: if (i > nfields)
483: growfldtab(i);
484: if (freeable(fldtab[i]))
485: xfree(fldtab[i]->sval);
486: fldtab[i]->sval = fr;
487: fldtab[i]->tval = FLD | STR | DONTFREE;
1.15 millert 488: while (*r != sep && *r != rtest && *r != '\0') /* \n is always a separator */
1.1 tholo 489: *fr++ = *r++;
490: *fr++ = 0;
491: if (*r++ == 0)
492: break;
493: }
494: *fr = 0;
495: }
1.4 kstailey 496: if (i > nfields)
1.7 millert 497: FATAL("record `%.30s...' has too many fields; can't happen", r);
1.4 kstailey 498: cleanfld(i+1, lastfld); /* clean out junk from previous record */
499: lastfld = i;
1.32 millert 500: donefld = true;
1.4 kstailey 501: for (j = 1; j <= lastfld; j++) {
1.42 millert 502: double result;
503:
1.4 kstailey 504: p = fldtab[j];
1.42 millert 505: if(is_number(p->sval, & result)) {
506: p->fval = result;
1.1 tholo 507: p->tval |= NUM;
508: }
509: }
1.4 kstailey 510: setfval(nfloc, (Awkfloat) lastfld);
1.32 millert 511: donerec = true; /* restore */
1.4 kstailey 512: if (dbg) {
513: for (j = 0; j <= lastfld; j++) {
514: p = fldtab[j];
515: printf("field %d (%s): |%s|\n", j, p->nval, p->sval);
516: }
517: }
1.1 tholo 518: }
519:
1.4 kstailey 520: void cleanfld(int n1, int n2) /* clean out fields n1 .. n2 inclusive */
521: { /* nvals remain intact */
522: Cell *p;
523: int i;
1.1 tholo 524:
1.4 kstailey 525: for (i = n1; i <= n2; i++) {
526: p = fldtab[i];
527: if (freeable(p))
1.1 tholo 528: xfree(p->sval);
1.33 millert 529: p->sval = EMPTY,
1.1 tholo 530: p->tval = FLD | STR | DONTFREE;
531: }
532: }
533:
1.4 kstailey 534: void newfld(int n) /* add field n after end of existing lastfld */
1.1 tholo 535: {
1.4 kstailey 536: if (n > nfields)
537: growfldtab(n);
538: cleanfld(lastfld+1, n);
539: lastfld = n;
1.1 tholo 540: setfval(nfloc, (Awkfloat) n);
1.26 millert 541: }
542:
543: void setlastfld(int n) /* set lastfld cleaning fldtab cells if necessary */
544: {
1.27 millert 545: if (n < 0)
546: FATAL("cannot set NF to a negative value");
1.26 millert 547: if (n > nfields)
548: growfldtab(n);
549:
550: if (lastfld < n)
551: cleanfld(lastfld+1, n);
552: else
553: cleanfld(n+1, lastfld);
554:
555: lastfld = n;
1.1 tholo 556: }
557:
1.4 kstailey 558: Cell *fieldadr(int n) /* get nth field */
559: {
560: if (n < 0)
1.15 millert 561: FATAL("trying to access out of range field %d", n);
1.4 kstailey 562: if (n > nfields) /* fields after NF are empty */
563: growfldtab(n); /* but does not increase NF */
564: return(fldtab[n]);
565: }
566:
567: void growfldtab(int n) /* make new fields up to at least $n */
568: {
569: int nf = 2 * nfields;
1.15 millert 570: size_t s;
1.4 kstailey 571:
572: if (n > nf)
573: nf = n;
1.15 millert 574: s = (nf+1) * (sizeof (struct Cell *)); /* freebsd: how much do we need? */
1.34 millert 575: if (s / sizeof(struct Cell *) - 1 == (size_t)nf) /* didn't overflow */
1.42 millert 576: fldtab = (Cell **) realloc(fldtab, s);
1.15 millert 577: else /* overflow sizeof int */
578: xfree(fldtab); /* make it null */
1.4 kstailey 579: if (fldtab == NULL)
1.7 millert 580: FATAL("out of space creating %d fields", nf);
1.4 kstailey 581: makefields(nfields+1, nf);
582: nfields = nf;
583: }
584:
1.11 millert 585: int refldbld(const char *rec, const char *fs) /* build fields from reg expr in FS */
1.1 tholo 586: {
1.4 kstailey 587: /* this relies on having fields[] the same length as $0 */
588: /* the fields are all stored in this one array with \0's */
1.1 tholo 589: char *fr;
1.4 kstailey 590: int i, tempstat, n;
1.1 tholo 591: fa *pfa;
592:
1.4 kstailey 593: n = strlen(rec);
594: if (n > fieldssize) {
595: xfree(fields);
1.42 millert 596: if ((fields = (char *) malloc(n+1)) == NULL)
1.7 millert 597: FATAL("out of space for fields in refldbld %d", n);
1.4 kstailey 598: fieldssize = n;
599: }
1.1 tholo 600: fr = fields;
601: *fr = '\0';
602: if (*rec == '\0')
603: return 0;
604: pfa = makedfa(fs, 1);
1.39 millert 605: DPRINTF("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs);
1.1 tholo 606: tempstat = pfa->initstat;
1.4 kstailey 607: for (i = 1; ; i++) {
1.37 millert 608: const size_t fss_rem = fields + fieldssize + 1 - fr;
1.4 kstailey 609: if (i > nfields)
610: growfldtab(i);
611: if (freeable(fldtab[i]))
612: xfree(fldtab[i]->sval);
613: fldtab[i]->tval = FLD | STR | DONTFREE;
614: fldtab[i]->sval = fr;
1.39 millert 615: DPRINTF("refldbld: i=%d\n", i);
1.1 tholo 616: if (nematch(pfa, rec)) {
1.37 millert 617: const size_t reclen = patbeg - rec;
1.4 kstailey 618: pfa->initstat = 2; /* horrible coupling to b.c */
1.39 millert 619: DPRINTF("match %s (%d chars)\n", patbeg, patlen);
1.37 millert 620: if (reclen >= fss_rem)
621: FATAL("out of space for fields in refldbld");
622: memcpy(fr, rec, reclen);
623: fr += reclen;
624: *fr++ = '\0';
1.1 tholo 625: rec = patbeg + patlen;
626: } else {
1.39 millert 627: DPRINTF("no match %s\n", rec);
1.37 millert 628: if (strlcpy(fr, rec, fss_rem) >= fss_rem)
629: FATAL("out of space for fields in refldbld");
1.1 tholo 630: pfa->initstat = tempstat;
631: break;
632: }
633: }
1.29 millert 634: return i;
1.1 tholo 635: }
636:
637: void recbld(void) /* create $0 from $1..$NF if necessary */
638: {
639: int i;
640: char *r, *p;
1.27 millert 641: char *sep = getsval(ofsloc);
1.1 tholo 642:
1.32 millert 643: if (donerec)
1.1 tholo 644: return;
1.5 millert 645: r = record;
1.1 tholo 646: for (i = 1; i <= *NF; i++) {
1.4 kstailey 647: p = getsval(fldtab[i]);
1.5 millert 648: if (!adjbuf(&record, &recsize, 1+strlen(p)+r-record, recsize, &r, "recbld 1"))
1.7 millert 649: FATAL("created $0 `%.30s...' too long", record);
1.4 kstailey 650: while ((*r = *p++) != 0)
1.1 tholo 651: r++;
1.4 kstailey 652: if (i < *NF) {
1.27 millert 653: if (!adjbuf(&record, &recsize, 2+strlen(sep)+r-record, recsize, &r, "recbld 2"))
1.7 millert 654: FATAL("created $0 `%.30s...' too long", record);
1.27 millert 655: for (p = sep; (*r = *p++) != 0; )
1.1 tholo 656: r++;
1.4 kstailey 657: }
1.1 tholo 658: }
1.5 millert 659: if (!adjbuf(&record, &recsize, 2+r-record, recsize, &r, "recbld 3"))
1.7 millert 660: FATAL("built giant record `%.30s...'", record);
1.1 tholo 661: *r = '\0';
1.39 millert 662: DPRINTF("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, (void*)fldtab[0]);
1.4 kstailey 663:
664: if (freeable(fldtab[0]))
665: xfree(fldtab[0]->sval);
666: fldtab[0]->tval = REC | STR | DONTFREE;
667: fldtab[0]->sval = record;
668:
1.39 millert 669: DPRINTF("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, (void*)fldtab[0]);
670: DPRINTF("recbld = |%s|\n", record);
1.32 millert 671: donerec = true;
1.1 tholo 672: }
673:
674: int errorflag = 0;
675:
1.11 millert 676: void yyerror(const char *s)
1.1 tholo 677: {
1.14 grange 678: SYNTAX("%s", s);
1.7 millert 679: }
680:
1.11 millert 681: void SYNTAX(const char *fmt, ...)
1.7 millert 682: {
1.1 tholo 683: extern char *cmdname, *curfname;
684: static int been_here = 0;
1.7 millert 685: va_list varg;
1.1 tholo 686:
687: if (been_here++ > 2)
688: return;
1.7 millert 689: fprintf(stderr, "%s: ", cmdname);
690: va_start(varg, fmt);
691: vfprintf(stderr, fmt, varg);
692: va_end(varg);
1.1 tholo 693: fprintf(stderr, " at source line %d", lineno);
694: if (curfname != NULL)
695: fprintf(stderr, " in function %s", curfname);
1.32 millert 696: if (compile_time == COMPILING && cursource() != NULL)
1.6 millert 697: fprintf(stderr, " source file %s", cursource());
1.1 tholo 698: fprintf(stderr, "\n");
699: errorflag = 2;
700: eprint();
701: }
702:
703: extern int bracecnt, brackcnt, parencnt;
704:
705: void bracecheck(void)
706: {
707: int c;
708: static int beenhere = 0;
709:
710: if (beenhere++)
711: return;
1.3 millert 712: while ((c = input()) != EOF && c != '\0')
1.1 tholo 713: bclass(c);
714: bcheck2(bracecnt, '{', '}');
715: bcheck2(brackcnt, '[', ']');
716: bcheck2(parencnt, '(', ')');
717: }
718:
719: void bcheck2(int n, int c1, int c2)
720: {
721: if (n == 1)
722: fprintf(stderr, "\tmissing %c\n", c2);
723: else if (n > 1)
724: fprintf(stderr, "\t%d missing %c's\n", n, c2);
725: else if (n == -1)
726: fprintf(stderr, "\textra %c\n", c2);
727: else if (n < -1)
728: fprintf(stderr, "\t%d extra %c's\n", -n, c2);
729: }
730:
1.35 millert 731: void FATAL(const char *fmt, ...)
1.7 millert 732: {
733: extern char *cmdname;
734: va_list varg;
735:
736: fflush(stdout);
737: fprintf(stderr, "%s: ", cmdname);
738: va_start(varg, fmt);
739: vfprintf(stderr, fmt, varg);
740: va_end(varg);
741: error();
742: if (dbg > 1) /* core dump if serious debugging on */
743: abort();
744: exit(2);
745: }
746:
1.11 millert 747: void WARNING(const char *fmt, ...)
1.1 tholo 748: {
749: extern char *cmdname;
1.7 millert 750: va_list varg;
1.1 tholo 751:
752: fflush(stdout);
753: fprintf(stderr, "%s: ", cmdname);
1.7 millert 754: va_start(varg, fmt);
755: vfprintf(stderr, fmt, varg);
756: va_end(varg);
757: error();
758: }
759:
760: void error()
761: {
762: extern Node *curnode;
763:
1.1 tholo 764: fprintf(stderr, "\n");
1.32 millert 765: if (compile_time != ERROR_PRINTING) {
766: if (NR && *NR > 0) {
767: fprintf(stderr, " input record number %d", (int) (*FNR));
768: if (strcmp(*FILENAME, "-") != 0)
769: fprintf(stderr, ", file %s", *FILENAME);
770: fprintf(stderr, "\n");
771: }
772: if (curnode)
773: fprintf(stderr, " source line number %d", curnode->lineno);
774: else if (lineno)
775: fprintf(stderr, " source line number %d", lineno);
1.41 millert 776: if (compile_time == COMPILING && cursource() != NULL)
777: fprintf(stderr, " source file %s", cursource());
778: fprintf(stderr, "\n");
779: eprint();
1.32 millert 780: }
1.1 tholo 781: }
782:
783: void eprint(void) /* try to print context around error */
784: {
785: char *p, *q;
786: int c;
787: static int been_here = 0;
788: extern char ebuf[], *ep;
789:
1.32 millert 790: if (compile_time != COMPILING || been_here++ > 0 || ebuf == ep)
1.1 tholo 791: return;
792: p = ep - 1;
793: if (p > ebuf && *p == '\n')
794: p--;
795: for ( ; p > ebuf && *p != '\n' && *p != '\0'; p--)
796: ;
797: while (*p == '\n')
798: p++;
799: fprintf(stderr, " context is\n\t");
800: for (q=ep-1; q>=p && *q!=' ' && *q!='\t' && *q!='\n'; q--)
801: ;
802: for ( ; p < q; p++)
803: if (*p)
804: putc(*p, stderr);
805: fprintf(stderr, " >>> ");
806: for ( ; p < ep; p++)
807: if (*p)
808: putc(*p, stderr);
809: fprintf(stderr, " <<< ");
810: if (*ep)
811: while ((c = input()) != '\n' && c != '\0' && c != EOF) {
812: putc(c, stderr);
813: bclass(c);
814: }
815: putc('\n', stderr);
816: ep = ebuf;
817: }
818:
819: void bclass(int c)
820: {
821: switch (c) {
822: case '{': bracecnt++; break;
823: case '}': bracecnt--; break;
824: case '[': brackcnt++; break;
825: case ']': brackcnt--; break;
826: case '(': parencnt++; break;
827: case ')': parencnt--; break;
828: }
829: }
830:
1.11 millert 831: double errcheck(double x, const char *s)
1.1 tholo 832: {
833:
834: if (errno == EDOM) {
835: errno = 0;
1.7 millert 836: WARNING("%s argument out of domain", s);
1.1 tholo 837: x = 1;
838: } else if (errno == ERANGE) {
839: errno = 0;
1.7 millert 840: WARNING("%s result out of range", s);
1.1 tholo 841: x = 1;
842: }
843: return x;
844: }
845:
1.11 millert 846: int isclvar(const char *s) /* is s of form var=something ? */
1.1 tholo 847: {
1.11 millert 848: const char *os = s;
1.1 tholo 849:
1.56 millert 850: if (!isalpha((uschar)*s) && *s != '_')
1.1 tholo 851: return 0;
852: for ( ; *s; s++)
1.56 millert 853: if (!(isalnum((uschar)*s) || *s == '_'))
1.1 tholo 854: break;
1.28 millert 855: return *s == '=' && s > os;
1.1 tholo 856: }
857:
1.4 kstailey 858: /* strtod is supposed to be a proper test of what's a valid number */
1.8 millert 859: /* appears to be broken in gcc on linux: thinks 0x123 is a valid FP number */
860: /* wrong: violates 4.10.1.4 of ansi C standard */
1.42 millert 861:
1.38 millert 862: /* well, not quite. As of C99, hex floating point is allowed. so this is
1.42 millert 863: * a bit of a mess. We work around the mess by checking for a hexadecimal
864: * value and disallowing it. Similarly, we now follow gawk and allow only
865: * +nan, -nan, +inf, and -inf for NaN and infinity values.
1.38 millert 866: */
1.1 tholo 867:
1.42 millert 868: /*
869: * This routine now has a more complicated interface, the main point
870: * being to avoid the double conversion of a string to double, and
871: * also to convey out, if requested, the information that the numeric
872: * value was a leading string or is all of the string. The latter bit
873: * is used in getfval().
874: */
875:
876: bool is_valid_number(const char *s, bool trailing_stuff_ok,
877: bool *no_trailing, double *result)
1.1 tholo 878: {
1.4 kstailey 879: double r;
880: char *ep;
1.42 millert 881: bool retval = false;
1.44 millert 882: bool is_nan = false;
883: bool is_inf = false;
1.42 millert 884:
885: if (no_trailing)
886: *no_trailing = false;
887:
1.43 millert 888: while (isspace((uschar)*s))
1.42 millert 889: s++;
890:
1.51 millert 891: /* no hex floating point, sorry */
1.44 millert 892: if (s[0] == '0' && tolower((uschar)s[1]) == 'x')
1.42 millert 893: return false;
894:
1.51 millert 895: /* allow +nan, -nan, +inf, -inf, any other letter, no */
1.42 millert 896: if (s[0] == '+' || s[0] == '-') {
1.44 millert 897: is_nan = (strncasecmp(s+1, "nan", 3) == 0);
898: is_inf = (strncasecmp(s+1, "inf", 3) == 0);
899: if ((is_nan || is_inf)
900: && (isspace((uschar)s[4]) || s[4] == '\0'))
901: goto convert;
902: else if (! isdigit((uschar)s[1]) && s[1] != '.')
1.42 millert 903: return false;
1.44 millert 904: }
905: else if (! isdigit((uschar)s[0]) && s[0] != '.')
1.42 millert 906: return false;
907:
1.44 millert 908: convert:
1.4 kstailey 909: errno = 0;
910: r = strtod(s, &ep);
1.43 millert 911: if (ep == s || errno == ERANGE)
1.42 millert 912: return false;
913:
1.44 millert 914: if (isnan(r) && s[0] == '-' && signbit(r) == 0)
915: r = -r;
916:
1.42 millert 917: if (result != NULL)
918: *result = r;
919:
1.47 millert 920: /*
921: * check for trailing stuff
922: */
923: while (isspace((uschar)*ep))
924: ep++;
1.42 millert 925:
1.44 millert 926: if (no_trailing != NULL)
1.42 millert 927: *no_trailing = (*ep == '\0');
1.47 millert 928:
1.51 millert 929: /* return true if found the end, or trailing stuff is allowed */
1.47 millert 930: retval = *ep == '\0' || trailing_stuff_ok;
1.42 millert 931:
932: return retval;
1.1 tholo 933: }