Annotation of src/usr.bin/awk/lib.c, Revision 1.54
1.54 ! millert 1: /* $OpenBSD: lib.c,v 1.53 2023/10/30 16:14:51 millert Exp $ */
1.1 tholo 2: /****************************************************************
1.4 kstailey 3: Copyright (C) Lucent Technologies 1997
1.1 tholo 4: All Rights Reserved
5:
6: Permission to use, copy, modify, and distribute this software and
7: its documentation for any purpose and without fee is hereby
8: granted, provided that the above copyright notice appear in all
9: copies and that both that the copyright notice and this
10: permission notice and warranty disclaimer appear in supporting
1.4 kstailey 11: documentation, and that the name Lucent Technologies or any of
12: its entities not be used in advertising or publicity pertaining
13: to distribution of the software without specific, written prior
14: permission.
15:
16: LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17: INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18: IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19: SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20: WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21: IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22: ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23: THIS SOFTWARE.
1.1 tholo 24: ****************************************************************/
25:
26: #define DEBUG
27: #include <stdio.h>
28: #include <string.h>
1.53 millert 29: #include <strings.h>
1.1 tholo 30: #include <ctype.h>
31: #include <errno.h>
32: #include <stdlib.h>
1.7 millert 33: #include <stdarg.h>
1.33 millert 34: #include <limits.h>
1.44 millert 35: #include <math.h>
1.1 tholo 36: #include "awk.h"
37:
1.51 millert 38: extern int u8_nextlen(const char *s);
39:
1.33 millert 40: char EMPTY[] = { '\0' };
1.1 tholo 41: FILE *infile = NULL;
1.34 millert 42: bool innew; /* true = infile has not been read by readrec */
1.33 millert 43: char *file = EMPTY;
1.4 kstailey 44: char *record;
1.1 tholo 45: int recsize = RECSIZE;
46: char *fields;
1.4 kstailey 47: int fieldssize = RECSIZE;
48:
49: Cell **fldtab; /* pointers to Cells */
1.33 millert 50: static size_t len_inputFS = 0;
51: static char *inputFS = NULL; /* FS at time of input, for field splitting */
1.1 tholo 52:
1.18 millert 53: #define MAXFLD 2
1.4 kstailey 54: int nfields = MAXFLD; /* last allocated slot for $i */
1.1 tholo 55:
1.32 millert 56: bool donefld; /* true = implies rec broken into fields */
57: bool donerec; /* true = record is valid (no flds have changed) */
1.1 tholo 58:
1.4 kstailey 59: int lastfld = 0; /* last used field */
1.1 tholo 60: int argno = 1; /* current input argument number */
61: extern Awkfloat *ARGC;
62:
1.34 millert 63: static Cell dollar0 = { OCELL, CFLD, NULL, EMPTY, 0.0, REC|STR|DONTFREE, NULL, NULL };
64: static Cell dollar1 = { OCELL, CFLD, NULL, EMPTY, 0.0, FLD|STR|DONTFREE, NULL, NULL };
1.4 kstailey 65:
1.1 tholo 66: void recinit(unsigned int n)
67: {
1.42 millert 68: if ( (record = (char *) malloc(n)) == NULL
69: || (fields = (char *) malloc(n+1)) == NULL
70: || (fldtab = (Cell **) calloc(nfields+2, sizeof(*fldtab))) == NULL
71: || (fldtab[0] = (Cell *) malloc(sizeof(**fldtab))) == NULL)
1.7 millert 72: FATAL("out of space for $0 and fields");
1.22 millert 73: *record = '\0';
1.4 kstailey 74: *fldtab[0] = dollar0;
75: fldtab[0]->sval = record;
76: fldtab[0]->nval = tostring("0");
77: makefields(1, nfields);
78: }
79:
80: void makefields(int n1, int n2) /* create $n1..$n2 inclusive */
81: {
82: char temp[50];
1.1 tholo 83: int i;
84:
1.4 kstailey 85: for (i = n1; i <= n2; i++) {
1.42 millert 86: fldtab[i] = (Cell *) malloc(sizeof(**fldtab));
1.4 kstailey 87: if (fldtab[i] == NULL)
1.7 millert 88: FATAL("out of space in makefields %d", i);
1.4 kstailey 89: *fldtab[i] = dollar1;
1.31 millert 90: snprintf(temp, sizeof(temp), "%d", i);
1.4 kstailey 91: fldtab[i]->nval = tostring(temp);
92: }
1.1 tholo 93: }
94:
95: void initgetrec(void)
96: {
97: int i;
98: char *p;
99:
100: for (i = 1; i < *ARGC; i++) {
1.20 millert 101: p = getargv(i); /* find 1st real filename */
102: if (p == NULL || *p == '\0') { /* deleted or zapped */
103: argno++;
104: continue;
105: }
106: if (!isclvar(p)) {
107: setsval(lookup("FILENAME", symtab), p);
1.1 tholo 108: return;
109: }
110: setclvar(p); /* a commandline assignment before filename */
111: argno++;
112: }
113: infile = stdin; /* no filenames, so use stdin */
1.34 millert 114: innew = true;
1.1 tholo 115: }
116:
1.29 millert 117: /*
118: * POSIX specifies that fields are supposed to be evaluated as if they were
119: * split using the value of FS at the time that the record's value ($0) was
120: * read.
121: *
122: * Since field-splitting is done lazily, we save the current value of FS
123: * whenever a new record is read in (implicitly or via getline), or when
124: * a new value is assigned to $0.
125: */
126: void savefs(void)
127: {
1.37 millert 128: size_t len = strlen(getsval(fsloc));
129: if (len >= len_inputFS) {
130: len_inputFS = len + 1;
1.42 millert 131: inputFS = (char *) realloc(inputFS, len_inputFS);
1.37 millert 132: if (inputFS == NULL)
133: FATAL("field separator %.10s... is too long", *FS);
1.33 millert 134: }
1.37 millert 135: if (strlcpy(inputFS, *FS, len_inputFS) >= len_inputFS)
1.29 millert 136: FATAL("field separator %.10s... is too long", *FS);
137: }
138:
1.32 millert 139: static bool firsttime = true;
1.15 millert 140:
1.32 millert 141: int getrec(char **pbuf, int *pbufsize, bool isrecord) /* get next input record */
1.4 kstailey 142: { /* note: cares whether buf == record */
1.1 tholo 143: int c;
1.4 kstailey 144: char *buf = *pbuf;
1.18 millert 145: uschar saveb0;
146: int bufsize = *pbufsize, savebufsize = bufsize;
1.1 tholo 147:
148: if (firsttime) {
1.32 millert 149: firsttime = false;
1.1 tholo 150: initgetrec();
151: }
1.39 millert 152: DPRINTF("RS=<%s>, FS=<%s>, ARGC=%g, FILENAME=%s\n",
153: *RS, *FS, *ARGC, *FILENAME);
1.18 millert 154: saveb0 = buf[0];
1.1 tholo 155: buf[0] = 0;
156: while (argno < *ARGC || infile == stdin) {
1.39 millert 157: DPRINTF("argno=%d, file=|%s|\n", argno, file);
1.1 tholo 158: if (infile == NULL) { /* have to open a new file */
159: file = getargv(argno);
1.20 millert 160: if (file == NULL || *file == '\0') { /* deleted or zapped */
1.1 tholo 161: argno++;
162: continue;
163: }
164: if (isclvar(file)) { /* a var=value arg */
165: setclvar(file);
166: argno++;
167: continue;
168: }
169: *FILENAME = file;
1.39 millert 170: DPRINTF("opening file %s\n", file);
1.1 tholo 171: if (*file == '-' && *(file+1) == '\0')
172: infile = stdin;
1.4 kstailey 173: else if ((infile = fopen(file, "r")) == NULL)
1.7 millert 174: FATAL("can't open file %s", file);
1.45 millert 175: innew = true;
1.1 tholo 176: setfval(fnrloc, 0.0);
177: }
1.34 millert 178: c = readrec(&buf, &bufsize, infile, innew);
179: if (innew)
180: innew = false;
1.1 tholo 181: if (c != 0 || buf[0] != '\0') { /* normal record */
1.4 kstailey 182: if (isrecord) {
1.42 millert 183: double result;
184:
1.4 kstailey 185: if (freeable(fldtab[0]))
186: xfree(fldtab[0]->sval);
187: fldtab[0]->sval = buf; /* buf == record */
188: fldtab[0]->tval = REC | STR | DONTFREE;
1.42 millert 189: if (is_number(fldtab[0]->sval, & result)) {
190: fldtab[0]->fval = result;
1.4 kstailey 191: fldtab[0]->tval |= NUM;
1.1 tholo 192: }
1.50 millert 193: donefld = false;
194: donerec = true;
195: savefs();
1.1 tholo 196: }
197: setfval(nrloc, nrloc->fval+1);
198: setfval(fnrloc, fnrloc->fval+1);
1.4 kstailey 199: *pbuf = buf;
200: *pbufsize = bufsize;
1.1 tholo 201: return 1;
202: }
203: /* EOF arrived on this file; set up next */
204: if (infile != stdin)
205: fclose(infile);
206: infile = NULL;
207: argno++;
208: }
1.18 millert 209: buf[0] = saveb0;
1.4 kstailey 210: *pbuf = buf;
1.18 millert 211: *pbufsize = savebufsize;
1.1 tholo 212: return 0; /* true end of file */
213: }
214:
215: void nextfile(void)
216: {
1.18 millert 217: if (infile != NULL && infile != stdin)
1.1 tholo 218: fclose(infile);
219: infile = NULL;
220: argno++;
221: }
222:
1.51 millert 223: extern int readcsvrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag);
224:
1.34 millert 225: int readrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag) /* read one record into buf */
1.1 tholo 226: {
1.51 millert 227: int sep, c, isrec; // POTENTIAL BUG? isrec is a macro in awk.h
228: char *rr = *pbuf, *buf = *pbuf;
1.4 kstailey 229: int bufsize = *pbufsize;
1.27 millert 230: char *rs = getsval(rsloc);
1.1 tholo 231:
1.51 millert 232: if (CSV) {
233: c = readcsvrec(pbuf, pbufsize, inf, newflag);
234: isrec = (c == EOF && rr == buf) ? false : true;
235: } else if (*rs && rs[1]) {
1.32 millert 236: bool found;
1.30 millert 237:
1.52 millert 238: memset(buf, 0, bufsize);
1.30 millert 239: fa *pfa = makedfa(rs, 1);
1.34 millert 240: if (newflag)
241: found = fnematch(pfa, inf, &buf, &bufsize, recsize);
242: else {
243: int tempstat = pfa->initstat;
244: pfa->initstat = 2;
245: found = fnematch(pfa, inf, &buf, &bufsize, recsize);
246: pfa->initstat = tempstat;
247: }
1.30 millert 248: if (found)
1.31 millert 249: setptr(patbeg, '\0');
1.54 ! millert 250: isrec = (found == 0 && *buf == '\0') ? false : true;
1.30 millert 251: } else {
252: if ((sep = *rs) == 0) {
253: sep = '\n';
254: while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */
255: ;
256: if (c != EOF)
257: ungetc(c, inf);
258: }
259: for (rr = buf; ; ) {
260: for (; (c=getc(inf)) != sep && c != EOF; ) {
261: if (rr-buf+1 > bufsize)
262: if (!adjbuf(&buf, &bufsize, 1+rr-buf,
263: recsize, &rr, "readrec 1"))
264: FATAL("input record `%.30s...' too long", buf);
265: *rr++ = c;
266: }
267: if (*rs == sep || c == EOF)
268: break;
269: if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */
270: break;
271: if (!adjbuf(&buf, &bufsize, 2+rr-buf, recsize, &rr,
272: "readrec 2"))
273: FATAL("input record `%.30s...' too long", buf);
274: *rr++ = '\n';
1.4 kstailey 275: *rr++ = c;
276: }
1.30 millert 277: if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 3"))
1.7 millert 278: FATAL("input record `%.30s...' too long", buf);
1.30 millert 279: *rr = 0;
1.54 ! millert 280: isrec = (c == EOF && rr == buf) ? false : true;
1.1 tholo 281: }
1.4 kstailey 282: *pbuf = buf;
283: *pbufsize = bufsize;
1.39 millert 284: DPRINTF("readrec saw <%s>, returns %d\n", buf, isrec);
1.30 millert 285: return isrec;
1.1 tholo 286: }
287:
1.51 millert 288:
289: /*******************
290: * loose ends here:
291: * \r\n should become \n
292: * what about bare \r? Excel uses that for embedded newlines
293: * can't have "" in unquoted fields, according to RFC 4180
294: */
295:
296: int readcsvrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag) /* csv can have \n's */
297: { /* so read a complete record that might be multiple lines */
298: int sep, c;
299: char *rr = *pbuf, *buf = *pbuf;
300: int bufsize = *pbufsize;
301: bool in_quote = false;
302:
303: sep = '\n'; /* the only separator; have to skip over \n embedded in "..." */
304: rr = buf;
305: while ((c = getc(inf)) != EOF) {
306: if (c == sep) {
307: if (! in_quote)
308: break;
309: if (rr > buf && rr[-1] == '\r') // remove \r if was \r\n
310: rr--;
311: }
312:
313: if (rr-buf+1 > bufsize)
314: if (!adjbuf(&buf, &bufsize, 1+rr-buf,
315: recsize, &rr, "readcsvrec 1"))
316: FATAL("input record `%.30s...' too long", buf);
317: *rr++ = c;
318: if (c == '"')
319: in_quote = ! in_quote;
320: }
321: if (c == '\n' && rr > buf && rr[-1] == '\r') // remove \r if was \r\n
322: rr--;
323:
324: if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readcsvrec 4"))
325: FATAL("input record `%.30s...' too long", buf);
326: *rr = 0;
327: *pbuf = buf;
328: *pbufsize = bufsize;
329: DPRINTF("readcsvrec saw <%s>, returns %d\n", buf, c);
330: return c;
331: }
332:
1.1 tholo 333: char *getargv(int n) /* get ARGV[n] */
334: {
335: Cell *x;
1.4 kstailey 336: char *s, temp[50];
1.1 tholo 337: extern Array *ARGVtab;
338:
1.31 millert 339: snprintf(temp, sizeof(temp), "%d", n);
1.20 millert 340: if (lookup(temp, ARGVtab) == NULL)
341: return NULL;
1.1 tholo 342: x = setsymtab(temp, "", 0.0, STR, ARGVtab);
343: s = getsval(x);
1.39 millert 344: DPRINTF("getargv(%d) returns |%s|\n", n, s);
1.1 tholo 345: return s;
346: }
347:
348: void setclvar(char *s) /* set var=value from s */
349: {
1.48 millert 350: char *e, *p;
1.1 tholo 351: Cell *q;
1.42 millert 352: double result;
1.1 tholo 353:
1.51 millert 354: /* commit f3d9187d4e0f02294fb1b0e31152070506314e67 broke T.argv test */
355: /* I don't understand why it was changed. */
356:
1.1 tholo 357: for (p=s; *p != '='; p++)
358: ;
1.48 millert 359: e = p;
1.1 tholo 360: *p++ = 0;
361: p = qstring(p, '\0');
362: q = setsymtab(s, p, 0.0, STR, symtab);
363: setsval(q, p);
1.42 millert 364: if (is_number(q->sval, & result)) {
365: q->fval = result;
1.1 tholo 366: q->tval |= NUM;
367: }
1.39 millert 368: DPRINTF("command line set %s to |%s|\n", s, p);
1.49 millert 369: free(p);
1.48 millert 370: *e = '=';
1.1 tholo 371: }
372:
373:
374: void fldbld(void) /* create fields from current record */
375: {
1.4 kstailey 376: /* this relies on having fields[] the same length as $0 */
377: /* the fields are all stored in this one array with \0's */
1.20 millert 378: /* possibly with a final trailing \0 not associated with any field */
1.1 tholo 379: char *r, *fr, sep;
380: Cell *p;
1.4 kstailey 381: int i, j, n;
1.1 tholo 382:
383: if (donefld)
384: return;
1.4 kstailey 385: if (!isstr(fldtab[0]))
386: getsval(fldtab[0]);
387: r = fldtab[0]->sval;
388: n = strlen(r);
389: if (n > fieldssize) {
390: xfree(fields);
1.42 millert 391: if ((fields = (char *) malloc(n+2)) == NULL) /* possibly 2 final \0s */
1.7 millert 392: FATAL("out of space for fields in fldbld %d", n);
1.4 kstailey 393: fieldssize = n;
394: }
1.1 tholo 395: fr = fields;
396: i = 0; /* number of fields accumulated here */
1.35 millert 397: if (inputFS == NULL) /* make sure we have a copy of FS */
398: savefs();
1.2 millert 399: if (strlen(inputFS) > 1) { /* it's a regular expression */
400: i = refldbld(r, inputFS);
1.51 millert 401: } else if (!CSV && (sep = *inputFS) == ' ') { /* default whitespace */
1.1 tholo 402: for (i = 0; ; ) {
403: while (*r == ' ' || *r == '\t' || *r == '\n')
404: r++;
405: if (*r == 0)
406: break;
407: i++;
1.4 kstailey 408: if (i > nfields)
409: growfldtab(i);
410: if (freeable(fldtab[i]))
411: xfree(fldtab[i]->sval);
412: fldtab[i]->sval = fr;
413: fldtab[i]->tval = FLD | STR | DONTFREE;
1.1 tholo 414: do
415: *fr++ = *r++;
416: while (*r != ' ' && *r != '\t' && *r != '\n' && *r != '\0');
417: *fr++ = 0;
418: }
419: *fr = 0;
1.51 millert 420: } else if (CSV) { /* CSV processing. no error handling */
421: if (*r != 0) {
422: for (;;) {
423: i++;
424: if (i > nfields)
425: growfldtab(i);
426: if (freeable(fldtab[i]))
427: xfree(fldtab[i]->sval);
428: fldtab[i]->sval = fr;
429: fldtab[i]->tval = FLD | STR | DONTFREE;
430: if (*r == '"' ) { /* start of "..." */
431: for (r++ ; *r != '\0'; ) {
432: if (*r == '"' && r[1] != '\0' && r[1] == '"') {
433: r += 2; /* doubled quote */
434: *fr++ = '"';
435: } else if (*r == '"' && (r[1] == '\0' || r[1] == ',')) {
436: r++; /* skip over closing quote */
437: break;
438: } else {
439: *fr++ = *r++;
440: }
441: }
442: *fr++ = 0;
443: } else { /* unquoted field */
444: while (*r != ',' && *r != '\0')
445: *fr++ = *r++;
446: *fr++ = 0;
447: }
448: if (*r++ == 0)
449: break;
450:
451: }
452: }
453: *fr = 0;
454: } else if ((sep = *inputFS) == 0) { /* new: FS="" => 1 char/field */
455: for (i = 0; *r != '\0'; ) {
456: char buf[10];
1.1 tholo 457: i++;
1.4 kstailey 458: if (i > nfields)
459: growfldtab(i);
460: if (freeable(fldtab[i]))
461: xfree(fldtab[i]->sval);
1.51 millert 462: n = u8_nextlen(r);
463: for (j = 0; j < n; j++)
464: buf[j] = *r++;
465: buf[j] = '\0';
1.4 kstailey 466: fldtab[i]->sval = tostring(buf);
467: fldtab[i]->tval = FLD | STR;
1.1 tholo 468: }
469: *fr = 0;
470: } else if (*r != 0) { /* if 0, it's a null field */
1.51 millert 471: /* subtle case: if length(FS) == 1 && length(RS > 0)
1.15 millert 472: * \n is NOT a field separator (cf awk book 61,84).
473: * this variable is tested in the inner while loop.
474: */
475: int rtest = '\n'; /* normal case */
476: if (strlen(*RS) > 0)
477: rtest = '\0';
1.1 tholo 478: for (;;) {
479: i++;
1.4 kstailey 480: if (i > nfields)
481: growfldtab(i);
482: if (freeable(fldtab[i]))
483: xfree(fldtab[i]->sval);
484: fldtab[i]->sval = fr;
485: fldtab[i]->tval = FLD | STR | DONTFREE;
1.15 millert 486: while (*r != sep && *r != rtest && *r != '\0') /* \n is always a separator */
1.1 tholo 487: *fr++ = *r++;
488: *fr++ = 0;
489: if (*r++ == 0)
490: break;
491: }
492: *fr = 0;
493: }
1.4 kstailey 494: if (i > nfields)
1.7 millert 495: FATAL("record `%.30s...' has too many fields; can't happen", r);
1.4 kstailey 496: cleanfld(i+1, lastfld); /* clean out junk from previous record */
497: lastfld = i;
1.32 millert 498: donefld = true;
1.4 kstailey 499: for (j = 1; j <= lastfld; j++) {
1.42 millert 500: double result;
501:
1.4 kstailey 502: p = fldtab[j];
1.42 millert 503: if(is_number(p->sval, & result)) {
504: p->fval = result;
1.1 tholo 505: p->tval |= NUM;
506: }
507: }
1.4 kstailey 508: setfval(nfloc, (Awkfloat) lastfld);
1.32 millert 509: donerec = true; /* restore */
1.4 kstailey 510: if (dbg) {
511: for (j = 0; j <= lastfld; j++) {
512: p = fldtab[j];
513: printf("field %d (%s): |%s|\n", j, p->nval, p->sval);
514: }
515: }
1.1 tholo 516: }
517:
1.4 kstailey 518: void cleanfld(int n1, int n2) /* clean out fields n1 .. n2 inclusive */
519: { /* nvals remain intact */
520: Cell *p;
521: int i;
1.1 tholo 522:
1.4 kstailey 523: for (i = n1; i <= n2; i++) {
524: p = fldtab[i];
525: if (freeable(p))
1.1 tholo 526: xfree(p->sval);
1.33 millert 527: p->sval = EMPTY,
1.1 tholo 528: p->tval = FLD | STR | DONTFREE;
529: }
530: }
531:
1.4 kstailey 532: void newfld(int n) /* add field n after end of existing lastfld */
1.1 tholo 533: {
1.4 kstailey 534: if (n > nfields)
535: growfldtab(n);
536: cleanfld(lastfld+1, n);
537: lastfld = n;
1.1 tholo 538: setfval(nfloc, (Awkfloat) n);
1.26 millert 539: }
540:
541: void setlastfld(int n) /* set lastfld cleaning fldtab cells if necessary */
542: {
1.27 millert 543: if (n < 0)
544: FATAL("cannot set NF to a negative value");
1.26 millert 545: if (n > nfields)
546: growfldtab(n);
547:
548: if (lastfld < n)
549: cleanfld(lastfld+1, n);
550: else
551: cleanfld(n+1, lastfld);
552:
553: lastfld = n;
1.1 tholo 554: }
555:
1.4 kstailey 556: Cell *fieldadr(int n) /* get nth field */
557: {
558: if (n < 0)
1.15 millert 559: FATAL("trying to access out of range field %d", n);
1.4 kstailey 560: if (n > nfields) /* fields after NF are empty */
561: growfldtab(n); /* but does not increase NF */
562: return(fldtab[n]);
563: }
564:
565: void growfldtab(int n) /* make new fields up to at least $n */
566: {
567: int nf = 2 * nfields;
1.15 millert 568: size_t s;
1.4 kstailey 569:
570: if (n > nf)
571: nf = n;
1.15 millert 572: s = (nf+1) * (sizeof (struct Cell *)); /* freebsd: how much do we need? */
1.34 millert 573: if (s / sizeof(struct Cell *) - 1 == (size_t)nf) /* didn't overflow */
1.42 millert 574: fldtab = (Cell **) realloc(fldtab, s);
1.15 millert 575: else /* overflow sizeof int */
576: xfree(fldtab); /* make it null */
1.4 kstailey 577: if (fldtab == NULL)
1.7 millert 578: FATAL("out of space creating %d fields", nf);
1.4 kstailey 579: makefields(nfields+1, nf);
580: nfields = nf;
581: }
582:
1.11 millert 583: int refldbld(const char *rec, const char *fs) /* build fields from reg expr in FS */
1.1 tholo 584: {
1.4 kstailey 585: /* this relies on having fields[] the same length as $0 */
586: /* the fields are all stored in this one array with \0's */
1.1 tholo 587: char *fr;
1.4 kstailey 588: int i, tempstat, n;
1.1 tholo 589: fa *pfa;
590:
1.4 kstailey 591: n = strlen(rec);
592: if (n > fieldssize) {
593: xfree(fields);
1.42 millert 594: if ((fields = (char *) malloc(n+1)) == NULL)
1.7 millert 595: FATAL("out of space for fields in refldbld %d", n);
1.4 kstailey 596: fieldssize = n;
597: }
1.1 tholo 598: fr = fields;
599: *fr = '\0';
600: if (*rec == '\0')
601: return 0;
602: pfa = makedfa(fs, 1);
1.39 millert 603: DPRINTF("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs);
1.1 tholo 604: tempstat = pfa->initstat;
1.4 kstailey 605: for (i = 1; ; i++) {
1.37 millert 606: const size_t fss_rem = fields + fieldssize + 1 - fr;
1.4 kstailey 607: if (i > nfields)
608: growfldtab(i);
609: if (freeable(fldtab[i]))
610: xfree(fldtab[i]->sval);
611: fldtab[i]->tval = FLD | STR | DONTFREE;
612: fldtab[i]->sval = fr;
1.39 millert 613: DPRINTF("refldbld: i=%d\n", i);
1.1 tholo 614: if (nematch(pfa, rec)) {
1.37 millert 615: const size_t reclen = patbeg - rec;
1.4 kstailey 616: pfa->initstat = 2; /* horrible coupling to b.c */
1.39 millert 617: DPRINTF("match %s (%d chars)\n", patbeg, patlen);
1.37 millert 618: if (reclen >= fss_rem)
619: FATAL("out of space for fields in refldbld");
620: memcpy(fr, rec, reclen);
621: fr += reclen;
622: *fr++ = '\0';
1.1 tholo 623: rec = patbeg + patlen;
624: } else {
1.39 millert 625: DPRINTF("no match %s\n", rec);
1.37 millert 626: if (strlcpy(fr, rec, fss_rem) >= fss_rem)
627: FATAL("out of space for fields in refldbld");
1.1 tholo 628: pfa->initstat = tempstat;
629: break;
630: }
631: }
1.29 millert 632: return i;
1.1 tholo 633: }
634:
635: void recbld(void) /* create $0 from $1..$NF if necessary */
636: {
637: int i;
638: char *r, *p;
1.27 millert 639: char *sep = getsval(ofsloc);
1.1 tholo 640:
1.32 millert 641: if (donerec)
1.1 tholo 642: return;
1.5 millert 643: r = record;
1.1 tholo 644: for (i = 1; i <= *NF; i++) {
1.4 kstailey 645: p = getsval(fldtab[i]);
1.5 millert 646: if (!adjbuf(&record, &recsize, 1+strlen(p)+r-record, recsize, &r, "recbld 1"))
1.7 millert 647: FATAL("created $0 `%.30s...' too long", record);
1.4 kstailey 648: while ((*r = *p++) != 0)
1.1 tholo 649: r++;
1.4 kstailey 650: if (i < *NF) {
1.27 millert 651: if (!adjbuf(&record, &recsize, 2+strlen(sep)+r-record, recsize, &r, "recbld 2"))
1.7 millert 652: FATAL("created $0 `%.30s...' too long", record);
1.27 millert 653: for (p = sep; (*r = *p++) != 0; )
1.1 tholo 654: r++;
1.4 kstailey 655: }
1.1 tholo 656: }
1.5 millert 657: if (!adjbuf(&record, &recsize, 2+r-record, recsize, &r, "recbld 3"))
1.7 millert 658: FATAL("built giant record `%.30s...'", record);
1.1 tholo 659: *r = '\0';
1.39 millert 660: DPRINTF("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, (void*)fldtab[0]);
1.4 kstailey 661:
662: if (freeable(fldtab[0]))
663: xfree(fldtab[0]->sval);
664: fldtab[0]->tval = REC | STR | DONTFREE;
665: fldtab[0]->sval = record;
666:
1.39 millert 667: DPRINTF("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, (void*)fldtab[0]);
668: DPRINTF("recbld = |%s|\n", record);
1.32 millert 669: donerec = true;
1.1 tholo 670: }
671:
672: int errorflag = 0;
673:
1.11 millert 674: void yyerror(const char *s)
1.1 tholo 675: {
1.14 grange 676: SYNTAX("%s", s);
1.7 millert 677: }
678:
1.11 millert 679: void SYNTAX(const char *fmt, ...)
1.7 millert 680: {
1.1 tholo 681: extern char *cmdname, *curfname;
682: static int been_here = 0;
1.7 millert 683: va_list varg;
1.1 tholo 684:
685: if (been_here++ > 2)
686: return;
1.7 millert 687: fprintf(stderr, "%s: ", cmdname);
688: va_start(varg, fmt);
689: vfprintf(stderr, fmt, varg);
690: va_end(varg);
1.1 tholo 691: fprintf(stderr, " at source line %d", lineno);
692: if (curfname != NULL)
693: fprintf(stderr, " in function %s", curfname);
1.32 millert 694: if (compile_time == COMPILING && cursource() != NULL)
1.6 millert 695: fprintf(stderr, " source file %s", cursource());
1.1 tholo 696: fprintf(stderr, "\n");
697: errorflag = 2;
698: eprint();
699: }
700:
701: extern int bracecnt, brackcnt, parencnt;
702:
703: void bracecheck(void)
704: {
705: int c;
706: static int beenhere = 0;
707:
708: if (beenhere++)
709: return;
1.3 millert 710: while ((c = input()) != EOF && c != '\0')
1.1 tholo 711: bclass(c);
712: bcheck2(bracecnt, '{', '}');
713: bcheck2(brackcnt, '[', ']');
714: bcheck2(parencnt, '(', ')');
715: }
716:
717: void bcheck2(int n, int c1, int c2)
718: {
719: if (n == 1)
720: fprintf(stderr, "\tmissing %c\n", c2);
721: else if (n > 1)
722: fprintf(stderr, "\t%d missing %c's\n", n, c2);
723: else if (n == -1)
724: fprintf(stderr, "\textra %c\n", c2);
725: else if (n < -1)
726: fprintf(stderr, "\t%d extra %c's\n", -n, c2);
727: }
728:
1.35 millert 729: void FATAL(const char *fmt, ...)
1.7 millert 730: {
731: extern char *cmdname;
732: va_list varg;
733:
734: fflush(stdout);
735: fprintf(stderr, "%s: ", cmdname);
736: va_start(varg, fmt);
737: vfprintf(stderr, fmt, varg);
738: va_end(varg);
739: error();
740: if (dbg > 1) /* core dump if serious debugging on */
741: abort();
742: exit(2);
743: }
744:
1.11 millert 745: void WARNING(const char *fmt, ...)
1.1 tholo 746: {
747: extern char *cmdname;
1.7 millert 748: va_list varg;
1.1 tholo 749:
750: fflush(stdout);
751: fprintf(stderr, "%s: ", cmdname);
1.7 millert 752: va_start(varg, fmt);
753: vfprintf(stderr, fmt, varg);
754: va_end(varg);
755: error();
756: }
757:
758: void error()
759: {
760: extern Node *curnode;
761:
1.1 tholo 762: fprintf(stderr, "\n");
1.32 millert 763: if (compile_time != ERROR_PRINTING) {
764: if (NR && *NR > 0) {
765: fprintf(stderr, " input record number %d", (int) (*FNR));
766: if (strcmp(*FILENAME, "-") != 0)
767: fprintf(stderr, ", file %s", *FILENAME);
768: fprintf(stderr, "\n");
769: }
770: if (curnode)
771: fprintf(stderr, " source line number %d", curnode->lineno);
772: else if (lineno)
773: fprintf(stderr, " source line number %d", lineno);
1.41 millert 774: if (compile_time == COMPILING && cursource() != NULL)
775: fprintf(stderr, " source file %s", cursource());
776: fprintf(stderr, "\n");
777: eprint();
1.32 millert 778: }
1.1 tholo 779: }
780:
781: void eprint(void) /* try to print context around error */
782: {
783: char *p, *q;
784: int c;
785: static int been_here = 0;
786: extern char ebuf[], *ep;
787:
1.32 millert 788: if (compile_time != COMPILING || been_here++ > 0 || ebuf == ep)
1.1 tholo 789: return;
790: p = ep - 1;
791: if (p > ebuf && *p == '\n')
792: p--;
793: for ( ; p > ebuf && *p != '\n' && *p != '\0'; p--)
794: ;
795: while (*p == '\n')
796: p++;
797: fprintf(stderr, " context is\n\t");
798: for (q=ep-1; q>=p && *q!=' ' && *q!='\t' && *q!='\n'; q--)
799: ;
800: for ( ; p < q; p++)
801: if (*p)
802: putc(*p, stderr);
803: fprintf(stderr, " >>> ");
804: for ( ; p < ep; p++)
805: if (*p)
806: putc(*p, stderr);
807: fprintf(stderr, " <<< ");
808: if (*ep)
809: while ((c = input()) != '\n' && c != '\0' && c != EOF) {
810: putc(c, stderr);
811: bclass(c);
812: }
813: putc('\n', stderr);
814: ep = ebuf;
815: }
816:
817: void bclass(int c)
818: {
819: switch (c) {
820: case '{': bracecnt++; break;
821: case '}': bracecnt--; break;
822: case '[': brackcnt++; break;
823: case ']': brackcnt--; break;
824: case '(': parencnt++; break;
825: case ')': parencnt--; break;
826: }
827: }
828:
1.11 millert 829: double errcheck(double x, const char *s)
1.1 tholo 830: {
831:
832: if (errno == EDOM) {
833: errno = 0;
1.7 millert 834: WARNING("%s argument out of domain", s);
1.1 tholo 835: x = 1;
836: } else if (errno == ERANGE) {
837: errno = 0;
1.7 millert 838: WARNING("%s result out of range", s);
1.1 tholo 839: x = 1;
840: }
841: return x;
842: }
843:
1.11 millert 844: int isclvar(const char *s) /* is s of form var=something ? */
1.1 tholo 845: {
1.11 millert 846: const char *os = s;
1.1 tholo 847:
1.8 millert 848: if (!isalpha((uschar) *s) && *s != '_')
1.1 tholo 849: return 0;
850: for ( ; *s; s++)
1.8 millert 851: if (!(isalnum((uschar) *s) || *s == '_'))
1.1 tholo 852: break;
1.28 millert 853: return *s == '=' && s > os;
1.1 tholo 854: }
855:
1.4 kstailey 856: /* strtod is supposed to be a proper test of what's a valid number */
1.8 millert 857: /* appears to be broken in gcc on linux: thinks 0x123 is a valid FP number */
858: /* wrong: violates 4.10.1.4 of ansi C standard */
1.42 millert 859:
1.38 millert 860: /* well, not quite. As of C99, hex floating point is allowed. so this is
1.42 millert 861: * a bit of a mess. We work around the mess by checking for a hexadecimal
862: * value and disallowing it. Similarly, we now follow gawk and allow only
863: * +nan, -nan, +inf, and -inf for NaN and infinity values.
1.38 millert 864: */
1.1 tholo 865:
1.42 millert 866: /*
867: * This routine now has a more complicated interface, the main point
868: * being to avoid the double conversion of a string to double, and
869: * also to convey out, if requested, the information that the numeric
870: * value was a leading string or is all of the string. The latter bit
871: * is used in getfval().
872: */
873:
874: bool is_valid_number(const char *s, bool trailing_stuff_ok,
875: bool *no_trailing, double *result)
1.1 tholo 876: {
1.4 kstailey 877: double r;
878: char *ep;
1.42 millert 879: bool retval = false;
1.44 millert 880: bool is_nan = false;
881: bool is_inf = false;
1.42 millert 882:
883: if (no_trailing)
884: *no_trailing = false;
885:
1.43 millert 886: while (isspace((uschar)*s))
1.42 millert 887: s++;
888:
1.51 millert 889: /* no hex floating point, sorry */
1.44 millert 890: if (s[0] == '0' && tolower((uschar)s[1]) == 'x')
1.42 millert 891: return false;
892:
1.51 millert 893: /* allow +nan, -nan, +inf, -inf, any other letter, no */
1.42 millert 894: if (s[0] == '+' || s[0] == '-') {
1.44 millert 895: is_nan = (strncasecmp(s+1, "nan", 3) == 0);
896: is_inf = (strncasecmp(s+1, "inf", 3) == 0);
897: if ((is_nan || is_inf)
898: && (isspace((uschar)s[4]) || s[4] == '\0'))
899: goto convert;
900: else if (! isdigit((uschar)s[1]) && s[1] != '.')
1.42 millert 901: return false;
1.44 millert 902: }
903: else if (! isdigit((uschar)s[0]) && s[0] != '.')
1.42 millert 904: return false;
905:
1.44 millert 906: convert:
1.4 kstailey 907: errno = 0;
908: r = strtod(s, &ep);
1.43 millert 909: if (ep == s || errno == ERANGE)
1.42 millert 910: return false;
911:
1.44 millert 912: if (isnan(r) && s[0] == '-' && signbit(r) == 0)
913: r = -r;
914:
1.42 millert 915: if (result != NULL)
916: *result = r;
917:
1.47 millert 918: /*
919: * check for trailing stuff
920: */
921: while (isspace((uschar)*ep))
922: ep++;
1.42 millert 923:
1.44 millert 924: if (no_trailing != NULL)
1.42 millert 925: *no_trailing = (*ep == '\0');
1.47 millert 926:
1.51 millert 927: /* return true if found the end, or trailing stuff is allowed */
1.47 millert 928: retval = *ep == '\0' || trailing_stuff_ok;
1.42 millert 929:
930: return retval;
1.1 tholo 931: }