Annotation of src/usr.bin/awk/lib.c, Revision 1.51
1.51 ! millert 1: /* $OpenBSD: lib.c,v 1.50 2023/09/10 14:59:00 millert Exp $ */
1.1 tholo 2: /****************************************************************
1.4 kstailey 3: Copyright (C) Lucent Technologies 1997
1.1 tholo 4: All Rights Reserved
5:
6: Permission to use, copy, modify, and distribute this software and
7: its documentation for any purpose and without fee is hereby
8: granted, provided that the above copyright notice appear in all
9: copies and that both that the copyright notice and this
10: permission notice and warranty disclaimer appear in supporting
1.4 kstailey 11: documentation, and that the name Lucent Technologies or any of
12: its entities not be used in advertising or publicity pertaining
13: to distribution of the software without specific, written prior
14: permission.
15:
16: LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17: INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18: IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19: SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20: WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21: IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22: ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23: THIS SOFTWARE.
1.1 tholo 24: ****************************************************************/
25:
26: #define DEBUG
27: #include <stdio.h>
28: #include <string.h>
29: #include <ctype.h>
30: #include <errno.h>
31: #include <stdlib.h>
1.7 millert 32: #include <stdarg.h>
1.33 millert 33: #include <limits.h>
1.44 millert 34: #include <math.h>
1.1 tholo 35: #include "awk.h"
36:
1.51 ! millert 37: extern int u8_nextlen(const char *s);
! 38:
1.33 millert 39: char EMPTY[] = { '\0' };
1.1 tholo 40: FILE *infile = NULL;
1.34 millert 41: bool innew; /* true = infile has not been read by readrec */
1.33 millert 42: char *file = EMPTY;
1.4 kstailey 43: char *record;
1.1 tholo 44: int recsize = RECSIZE;
45: char *fields;
1.4 kstailey 46: int fieldssize = RECSIZE;
47:
48: Cell **fldtab; /* pointers to Cells */
1.33 millert 49: static size_t len_inputFS = 0;
50: static char *inputFS = NULL; /* FS at time of input, for field splitting */
1.1 tholo 51:
1.18 millert 52: #define MAXFLD 2
1.4 kstailey 53: int nfields = MAXFLD; /* last allocated slot for $i */
1.1 tholo 54:
1.32 millert 55: bool donefld; /* true = implies rec broken into fields */
56: bool donerec; /* true = record is valid (no flds have changed) */
1.1 tholo 57:
1.4 kstailey 58: int lastfld = 0; /* last used field */
1.1 tholo 59: int argno = 1; /* current input argument number */
60: extern Awkfloat *ARGC;
61:
1.34 millert 62: static Cell dollar0 = { OCELL, CFLD, NULL, EMPTY, 0.0, REC|STR|DONTFREE, NULL, NULL };
63: static Cell dollar1 = { OCELL, CFLD, NULL, EMPTY, 0.0, FLD|STR|DONTFREE, NULL, NULL };
1.4 kstailey 64:
1.1 tholo 65: void recinit(unsigned int n)
66: {
1.42 millert 67: if ( (record = (char *) malloc(n)) == NULL
68: || (fields = (char *) malloc(n+1)) == NULL
69: || (fldtab = (Cell **) calloc(nfields+2, sizeof(*fldtab))) == NULL
70: || (fldtab[0] = (Cell *) malloc(sizeof(**fldtab))) == NULL)
1.7 millert 71: FATAL("out of space for $0 and fields");
1.22 millert 72: *record = '\0';
1.4 kstailey 73: *fldtab[0] = dollar0;
74: fldtab[0]->sval = record;
75: fldtab[0]->nval = tostring("0");
76: makefields(1, nfields);
77: }
78:
79: void makefields(int n1, int n2) /* create $n1..$n2 inclusive */
80: {
81: char temp[50];
1.1 tholo 82: int i;
83:
1.4 kstailey 84: for (i = n1; i <= n2; i++) {
1.42 millert 85: fldtab[i] = (Cell *) malloc(sizeof(**fldtab));
1.4 kstailey 86: if (fldtab[i] == NULL)
1.7 millert 87: FATAL("out of space in makefields %d", i);
1.4 kstailey 88: *fldtab[i] = dollar1;
1.31 millert 89: snprintf(temp, sizeof(temp), "%d", i);
1.4 kstailey 90: fldtab[i]->nval = tostring(temp);
91: }
1.1 tholo 92: }
93:
94: void initgetrec(void)
95: {
96: int i;
97: char *p;
98:
99: for (i = 1; i < *ARGC; i++) {
1.20 millert 100: p = getargv(i); /* find 1st real filename */
101: if (p == NULL || *p == '\0') { /* deleted or zapped */
102: argno++;
103: continue;
104: }
105: if (!isclvar(p)) {
106: setsval(lookup("FILENAME", symtab), p);
1.1 tholo 107: return;
108: }
109: setclvar(p); /* a commandline assignment before filename */
110: argno++;
111: }
112: infile = stdin; /* no filenames, so use stdin */
1.34 millert 113: innew = true;
1.1 tholo 114: }
115:
1.29 millert 116: /*
117: * POSIX specifies that fields are supposed to be evaluated as if they were
118: * split using the value of FS at the time that the record's value ($0) was
119: * read.
120: *
121: * Since field-splitting is done lazily, we save the current value of FS
122: * whenever a new record is read in (implicitly or via getline), or when
123: * a new value is assigned to $0.
124: */
125: void savefs(void)
126: {
1.37 millert 127: size_t len = strlen(getsval(fsloc));
128: if (len >= len_inputFS) {
129: len_inputFS = len + 1;
1.42 millert 130: inputFS = (char *) realloc(inputFS, len_inputFS);
1.37 millert 131: if (inputFS == NULL)
132: FATAL("field separator %.10s... is too long", *FS);
1.33 millert 133: }
1.37 millert 134: if (strlcpy(inputFS, *FS, len_inputFS) >= len_inputFS)
1.29 millert 135: FATAL("field separator %.10s... is too long", *FS);
136: }
137:
1.32 millert 138: static bool firsttime = true;
1.15 millert 139:
1.32 millert 140: int getrec(char **pbuf, int *pbufsize, bool isrecord) /* get next input record */
1.4 kstailey 141: { /* note: cares whether buf == record */
1.1 tholo 142: int c;
1.4 kstailey 143: char *buf = *pbuf;
1.18 millert 144: uschar saveb0;
145: int bufsize = *pbufsize, savebufsize = bufsize;
1.1 tholo 146:
147: if (firsttime) {
1.32 millert 148: firsttime = false;
1.1 tholo 149: initgetrec();
150: }
1.39 millert 151: DPRINTF("RS=<%s>, FS=<%s>, ARGC=%g, FILENAME=%s\n",
152: *RS, *FS, *ARGC, *FILENAME);
1.18 millert 153: saveb0 = buf[0];
1.1 tholo 154: buf[0] = 0;
155: while (argno < *ARGC || infile == stdin) {
1.39 millert 156: DPRINTF("argno=%d, file=|%s|\n", argno, file);
1.1 tholo 157: if (infile == NULL) { /* have to open a new file */
158: file = getargv(argno);
1.20 millert 159: if (file == NULL || *file == '\0') { /* deleted or zapped */
1.1 tholo 160: argno++;
161: continue;
162: }
163: if (isclvar(file)) { /* a var=value arg */
164: setclvar(file);
165: argno++;
166: continue;
167: }
168: *FILENAME = file;
1.39 millert 169: DPRINTF("opening file %s\n", file);
1.1 tholo 170: if (*file == '-' && *(file+1) == '\0')
171: infile = stdin;
1.4 kstailey 172: else if ((infile = fopen(file, "r")) == NULL)
1.7 millert 173: FATAL("can't open file %s", file);
1.45 millert 174: innew = true;
1.1 tholo 175: setfval(fnrloc, 0.0);
176: }
1.34 millert 177: c = readrec(&buf, &bufsize, infile, innew);
178: if (innew)
179: innew = false;
1.1 tholo 180: if (c != 0 || buf[0] != '\0') { /* normal record */
1.4 kstailey 181: if (isrecord) {
1.42 millert 182: double result;
183:
1.4 kstailey 184: if (freeable(fldtab[0]))
185: xfree(fldtab[0]->sval);
186: fldtab[0]->sval = buf; /* buf == record */
187: fldtab[0]->tval = REC | STR | DONTFREE;
1.42 millert 188: if (is_number(fldtab[0]->sval, & result)) {
189: fldtab[0]->fval = result;
1.4 kstailey 190: fldtab[0]->tval |= NUM;
1.1 tholo 191: }
1.50 millert 192: donefld = false;
193: donerec = true;
194: savefs();
1.1 tholo 195: }
196: setfval(nrloc, nrloc->fval+1);
197: setfval(fnrloc, fnrloc->fval+1);
1.4 kstailey 198: *pbuf = buf;
199: *pbufsize = bufsize;
1.1 tholo 200: return 1;
201: }
202: /* EOF arrived on this file; set up next */
203: if (infile != stdin)
204: fclose(infile);
205: infile = NULL;
206: argno++;
207: }
1.18 millert 208: buf[0] = saveb0;
1.4 kstailey 209: *pbuf = buf;
1.18 millert 210: *pbufsize = savebufsize;
1.1 tholo 211: return 0; /* true end of file */
212: }
213:
214: void nextfile(void)
215: {
1.18 millert 216: if (infile != NULL && infile != stdin)
1.1 tholo 217: fclose(infile);
218: infile = NULL;
219: argno++;
220: }
221:
1.51 ! millert 222: extern int readcsvrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag);
! 223:
1.34 millert 224: int readrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag) /* read one record into buf */
1.1 tholo 225: {
1.51 ! millert 226: int sep, c, isrec; // POTENTIAL BUG? isrec is a macro in awk.h
! 227: char *rr = *pbuf, *buf = *pbuf;
1.4 kstailey 228: int bufsize = *pbufsize;
1.27 millert 229: char *rs = getsval(rsloc);
1.1 tholo 230:
1.51 ! millert 231: if (CSV) {
! 232: c = readcsvrec(pbuf, pbufsize, inf, newflag);
! 233: isrec = (c == EOF && rr == buf) ? false : true;
! 234: } else if (*rs && rs[1]) {
1.32 millert 235: bool found;
1.30 millert 236:
237: fa *pfa = makedfa(rs, 1);
1.34 millert 238: if (newflag)
239: found = fnematch(pfa, inf, &buf, &bufsize, recsize);
240: else {
241: int tempstat = pfa->initstat;
242: pfa->initstat = 2;
243: found = fnematch(pfa, inf, &buf, &bufsize, recsize);
244: pfa->initstat = tempstat;
245: }
1.30 millert 246: if (found)
1.31 millert 247: setptr(patbeg, '\0');
1.46 millert 248: isrec = (found == 0 && *buf == '\0') ? 0 : 1;
1.30 millert 249: } else {
250: if ((sep = *rs) == 0) {
251: sep = '\n';
252: while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */
253: ;
254: if (c != EOF)
255: ungetc(c, inf);
256: }
257: for (rr = buf; ; ) {
258: for (; (c=getc(inf)) != sep && c != EOF; ) {
259: if (rr-buf+1 > bufsize)
260: if (!adjbuf(&buf, &bufsize, 1+rr-buf,
261: recsize, &rr, "readrec 1"))
262: FATAL("input record `%.30s...' too long", buf);
263: *rr++ = c;
264: }
265: if (*rs == sep || c == EOF)
266: break;
267: if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */
268: break;
269: if (!adjbuf(&buf, &bufsize, 2+rr-buf, recsize, &rr,
270: "readrec 2"))
271: FATAL("input record `%.30s...' too long", buf);
272: *rr++ = '\n';
1.4 kstailey 273: *rr++ = c;
274: }
1.30 millert 275: if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 3"))
1.7 millert 276: FATAL("input record `%.30s...' too long", buf);
1.30 millert 277: *rr = 0;
1.46 millert 278: isrec = (c == EOF && rr == buf) ? 0 : 1;
1.1 tholo 279: }
1.4 kstailey 280: *pbuf = buf;
281: *pbufsize = bufsize;
1.39 millert 282: DPRINTF("readrec saw <%s>, returns %d\n", buf, isrec);
1.30 millert 283: return isrec;
1.1 tholo 284: }
285:
1.51 ! millert 286:
! 287: /*******************
! 288: * loose ends here:
! 289: * \r\n should become \n
! 290: * what about bare \r? Excel uses that for embedded newlines
! 291: * can't have "" in unquoted fields, according to RFC 4180
! 292: */
! 293:
! 294: int readcsvrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag) /* csv can have \n's */
! 295: { /* so read a complete record that might be multiple lines */
! 296: int sep, c;
! 297: char *rr = *pbuf, *buf = *pbuf;
! 298: int bufsize = *pbufsize;
! 299: bool in_quote = false;
! 300:
! 301: sep = '\n'; /* the only separator; have to skip over \n embedded in "..." */
! 302: rr = buf;
! 303: while ((c = getc(inf)) != EOF) {
! 304: if (c == sep) {
! 305: if (! in_quote)
! 306: break;
! 307: if (rr > buf && rr[-1] == '\r') // remove \r if was \r\n
! 308: rr--;
! 309: }
! 310:
! 311: if (rr-buf+1 > bufsize)
! 312: if (!adjbuf(&buf, &bufsize, 1+rr-buf,
! 313: recsize, &rr, "readcsvrec 1"))
! 314: FATAL("input record `%.30s...' too long", buf);
! 315: *rr++ = c;
! 316: if (c == '"')
! 317: in_quote = ! in_quote;
! 318: }
! 319: if (c == '\n' && rr > buf && rr[-1] == '\r') // remove \r if was \r\n
! 320: rr--;
! 321:
! 322: if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readcsvrec 4"))
! 323: FATAL("input record `%.30s...' too long", buf);
! 324: *rr = 0;
! 325: *pbuf = buf;
! 326: *pbufsize = bufsize;
! 327: DPRINTF("readcsvrec saw <%s>, returns %d\n", buf, c);
! 328: return c;
! 329: }
! 330:
1.1 tholo 331: char *getargv(int n) /* get ARGV[n] */
332: {
333: Cell *x;
1.4 kstailey 334: char *s, temp[50];
1.1 tholo 335: extern Array *ARGVtab;
336:
1.31 millert 337: snprintf(temp, sizeof(temp), "%d", n);
1.20 millert 338: if (lookup(temp, ARGVtab) == NULL)
339: return NULL;
1.1 tholo 340: x = setsymtab(temp, "", 0.0, STR, ARGVtab);
341: s = getsval(x);
1.39 millert 342: DPRINTF("getargv(%d) returns |%s|\n", n, s);
1.1 tholo 343: return s;
344: }
345:
346: void setclvar(char *s) /* set var=value from s */
347: {
1.48 millert 348: char *e, *p;
1.1 tholo 349: Cell *q;
1.42 millert 350: double result;
1.1 tholo 351:
1.51 ! millert 352: /* commit f3d9187d4e0f02294fb1b0e31152070506314e67 broke T.argv test */
! 353: /* I don't understand why it was changed. */
! 354:
1.1 tholo 355: for (p=s; *p != '='; p++)
356: ;
1.48 millert 357: e = p;
1.1 tholo 358: *p++ = 0;
359: p = qstring(p, '\0');
360: q = setsymtab(s, p, 0.0, STR, symtab);
361: setsval(q, p);
1.42 millert 362: if (is_number(q->sval, & result)) {
363: q->fval = result;
1.1 tholo 364: q->tval |= NUM;
365: }
1.39 millert 366: DPRINTF("command line set %s to |%s|\n", s, p);
1.49 millert 367: free(p);
1.48 millert 368: *e = '=';
1.1 tholo 369: }
370:
371:
372: void fldbld(void) /* create fields from current record */
373: {
1.4 kstailey 374: /* this relies on having fields[] the same length as $0 */
375: /* the fields are all stored in this one array with \0's */
1.20 millert 376: /* possibly with a final trailing \0 not associated with any field */
1.1 tholo 377: char *r, *fr, sep;
378: Cell *p;
1.4 kstailey 379: int i, j, n;
1.1 tholo 380:
381: if (donefld)
382: return;
1.4 kstailey 383: if (!isstr(fldtab[0]))
384: getsval(fldtab[0]);
385: r = fldtab[0]->sval;
386: n = strlen(r);
387: if (n > fieldssize) {
388: xfree(fields);
1.42 millert 389: if ((fields = (char *) malloc(n+2)) == NULL) /* possibly 2 final \0s */
1.7 millert 390: FATAL("out of space for fields in fldbld %d", n);
1.4 kstailey 391: fieldssize = n;
392: }
1.1 tholo 393: fr = fields;
394: i = 0; /* number of fields accumulated here */
1.35 millert 395: if (inputFS == NULL) /* make sure we have a copy of FS */
396: savefs();
1.2 millert 397: if (strlen(inputFS) > 1) { /* it's a regular expression */
398: i = refldbld(r, inputFS);
1.51 ! millert 399: } else if (!CSV && (sep = *inputFS) == ' ') { /* default whitespace */
1.1 tholo 400: for (i = 0; ; ) {
401: while (*r == ' ' || *r == '\t' || *r == '\n')
402: r++;
403: if (*r == 0)
404: break;
405: i++;
1.4 kstailey 406: if (i > nfields)
407: growfldtab(i);
408: if (freeable(fldtab[i]))
409: xfree(fldtab[i]->sval);
410: fldtab[i]->sval = fr;
411: fldtab[i]->tval = FLD | STR | DONTFREE;
1.1 tholo 412: do
413: *fr++ = *r++;
414: while (*r != ' ' && *r != '\t' && *r != '\n' && *r != '\0');
415: *fr++ = 0;
416: }
417: *fr = 0;
1.51 ! millert 418: } else if (CSV) { /* CSV processing. no error handling */
! 419: if (*r != 0) {
! 420: for (;;) {
! 421: i++;
! 422: if (i > nfields)
! 423: growfldtab(i);
! 424: if (freeable(fldtab[i]))
! 425: xfree(fldtab[i]->sval);
! 426: fldtab[i]->sval = fr;
! 427: fldtab[i]->tval = FLD | STR | DONTFREE;
! 428: if (*r == '"' ) { /* start of "..." */
! 429: for (r++ ; *r != '\0'; ) {
! 430: if (*r == '"' && r[1] != '\0' && r[1] == '"') {
! 431: r += 2; /* doubled quote */
! 432: *fr++ = '"';
! 433: } else if (*r == '"' && (r[1] == '\0' || r[1] == ',')) {
! 434: r++; /* skip over closing quote */
! 435: break;
! 436: } else {
! 437: *fr++ = *r++;
! 438: }
! 439: }
! 440: *fr++ = 0;
! 441: } else { /* unquoted field */
! 442: while (*r != ',' && *r != '\0')
! 443: *fr++ = *r++;
! 444: *fr++ = 0;
! 445: }
! 446: if (*r++ == 0)
! 447: break;
! 448:
! 449: }
! 450: }
! 451: *fr = 0;
! 452: } else if ((sep = *inputFS) == 0) { /* new: FS="" => 1 char/field */
! 453: for (i = 0; *r != '\0'; ) {
! 454: char buf[10];
1.1 tholo 455: i++;
1.4 kstailey 456: if (i > nfields)
457: growfldtab(i);
458: if (freeable(fldtab[i]))
459: xfree(fldtab[i]->sval);
1.51 ! millert 460: n = u8_nextlen(r);
! 461: for (j = 0; j < n; j++)
! 462: buf[j] = *r++;
! 463: buf[j] = '\0';
1.4 kstailey 464: fldtab[i]->sval = tostring(buf);
465: fldtab[i]->tval = FLD | STR;
1.1 tholo 466: }
467: *fr = 0;
468: } else if (*r != 0) { /* if 0, it's a null field */
1.51 ! millert 469: /* subtle case: if length(FS) == 1 && length(RS > 0)
1.15 millert 470: * \n is NOT a field separator (cf awk book 61,84).
471: * this variable is tested in the inner while loop.
472: */
473: int rtest = '\n'; /* normal case */
474: if (strlen(*RS) > 0)
475: rtest = '\0';
1.1 tholo 476: for (;;) {
477: i++;
1.4 kstailey 478: if (i > nfields)
479: growfldtab(i);
480: if (freeable(fldtab[i]))
481: xfree(fldtab[i]->sval);
482: fldtab[i]->sval = fr;
483: fldtab[i]->tval = FLD | STR | DONTFREE;
1.15 millert 484: while (*r != sep && *r != rtest && *r != '\0') /* \n is always a separator */
1.1 tholo 485: *fr++ = *r++;
486: *fr++ = 0;
487: if (*r++ == 0)
488: break;
489: }
490: *fr = 0;
491: }
1.4 kstailey 492: if (i > nfields)
1.7 millert 493: FATAL("record `%.30s...' has too many fields; can't happen", r);
1.4 kstailey 494: cleanfld(i+1, lastfld); /* clean out junk from previous record */
495: lastfld = i;
1.32 millert 496: donefld = true;
1.4 kstailey 497: for (j = 1; j <= lastfld; j++) {
1.42 millert 498: double result;
499:
1.4 kstailey 500: p = fldtab[j];
1.42 millert 501: if(is_number(p->sval, & result)) {
502: p->fval = result;
1.1 tholo 503: p->tval |= NUM;
504: }
505: }
1.4 kstailey 506: setfval(nfloc, (Awkfloat) lastfld);
1.32 millert 507: donerec = true; /* restore */
1.4 kstailey 508: if (dbg) {
509: for (j = 0; j <= lastfld; j++) {
510: p = fldtab[j];
511: printf("field %d (%s): |%s|\n", j, p->nval, p->sval);
512: }
513: }
1.1 tholo 514: }
515:
1.4 kstailey 516: void cleanfld(int n1, int n2) /* clean out fields n1 .. n2 inclusive */
517: { /* nvals remain intact */
518: Cell *p;
519: int i;
1.1 tholo 520:
1.4 kstailey 521: for (i = n1; i <= n2; i++) {
522: p = fldtab[i];
523: if (freeable(p))
1.1 tholo 524: xfree(p->sval);
1.33 millert 525: p->sval = EMPTY,
1.1 tholo 526: p->tval = FLD | STR | DONTFREE;
527: }
528: }
529:
1.4 kstailey 530: void newfld(int n) /* add field n after end of existing lastfld */
1.1 tholo 531: {
1.4 kstailey 532: if (n > nfields)
533: growfldtab(n);
534: cleanfld(lastfld+1, n);
535: lastfld = n;
1.1 tholo 536: setfval(nfloc, (Awkfloat) n);
1.26 millert 537: }
538:
539: void setlastfld(int n) /* set lastfld cleaning fldtab cells if necessary */
540: {
1.27 millert 541: if (n < 0)
542: FATAL("cannot set NF to a negative value");
1.26 millert 543: if (n > nfields)
544: growfldtab(n);
545:
546: if (lastfld < n)
547: cleanfld(lastfld+1, n);
548: else
549: cleanfld(n+1, lastfld);
550:
551: lastfld = n;
1.1 tholo 552: }
553:
1.4 kstailey 554: Cell *fieldadr(int n) /* get nth field */
555: {
556: if (n < 0)
1.15 millert 557: FATAL("trying to access out of range field %d", n);
1.4 kstailey 558: if (n > nfields) /* fields after NF are empty */
559: growfldtab(n); /* but does not increase NF */
560: return(fldtab[n]);
561: }
562:
563: void growfldtab(int n) /* make new fields up to at least $n */
564: {
565: int nf = 2 * nfields;
1.15 millert 566: size_t s;
1.4 kstailey 567:
568: if (n > nf)
569: nf = n;
1.15 millert 570: s = (nf+1) * (sizeof (struct Cell *)); /* freebsd: how much do we need? */
1.34 millert 571: if (s / sizeof(struct Cell *) - 1 == (size_t)nf) /* didn't overflow */
1.42 millert 572: fldtab = (Cell **) realloc(fldtab, s);
1.15 millert 573: else /* overflow sizeof int */
574: xfree(fldtab); /* make it null */
1.4 kstailey 575: if (fldtab == NULL)
1.7 millert 576: FATAL("out of space creating %d fields", nf);
1.4 kstailey 577: makefields(nfields+1, nf);
578: nfields = nf;
579: }
580:
1.11 millert 581: int refldbld(const char *rec, const char *fs) /* build fields from reg expr in FS */
1.1 tholo 582: {
1.4 kstailey 583: /* this relies on having fields[] the same length as $0 */
584: /* the fields are all stored in this one array with \0's */
1.1 tholo 585: char *fr;
1.4 kstailey 586: int i, tempstat, n;
1.1 tholo 587: fa *pfa;
588:
1.4 kstailey 589: n = strlen(rec);
590: if (n > fieldssize) {
591: xfree(fields);
1.42 millert 592: if ((fields = (char *) malloc(n+1)) == NULL)
1.7 millert 593: FATAL("out of space for fields in refldbld %d", n);
1.4 kstailey 594: fieldssize = n;
595: }
1.1 tholo 596: fr = fields;
597: *fr = '\0';
598: if (*rec == '\0')
599: return 0;
600: pfa = makedfa(fs, 1);
1.39 millert 601: DPRINTF("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs);
1.1 tholo 602: tempstat = pfa->initstat;
1.4 kstailey 603: for (i = 1; ; i++) {
1.37 millert 604: const size_t fss_rem = fields + fieldssize + 1 - fr;
1.4 kstailey 605: if (i > nfields)
606: growfldtab(i);
607: if (freeable(fldtab[i]))
608: xfree(fldtab[i]->sval);
609: fldtab[i]->tval = FLD | STR | DONTFREE;
610: fldtab[i]->sval = fr;
1.39 millert 611: DPRINTF("refldbld: i=%d\n", i);
1.1 tholo 612: if (nematch(pfa, rec)) {
1.37 millert 613: const size_t reclen = patbeg - rec;
1.4 kstailey 614: pfa->initstat = 2; /* horrible coupling to b.c */
1.39 millert 615: DPRINTF("match %s (%d chars)\n", patbeg, patlen);
1.37 millert 616: if (reclen >= fss_rem)
617: FATAL("out of space for fields in refldbld");
618: memcpy(fr, rec, reclen);
619: fr += reclen;
620: *fr++ = '\0';
1.1 tholo 621: rec = patbeg + patlen;
622: } else {
1.39 millert 623: DPRINTF("no match %s\n", rec);
1.37 millert 624: if (strlcpy(fr, rec, fss_rem) >= fss_rem)
625: FATAL("out of space for fields in refldbld");
1.1 tholo 626: pfa->initstat = tempstat;
627: break;
628: }
629: }
1.29 millert 630: return i;
1.1 tholo 631: }
632:
633: void recbld(void) /* create $0 from $1..$NF if necessary */
634: {
635: int i;
636: char *r, *p;
1.27 millert 637: char *sep = getsval(ofsloc);
1.1 tholo 638:
1.32 millert 639: if (donerec)
1.1 tholo 640: return;
1.5 millert 641: r = record;
1.1 tholo 642: for (i = 1; i <= *NF; i++) {
1.4 kstailey 643: p = getsval(fldtab[i]);
1.5 millert 644: if (!adjbuf(&record, &recsize, 1+strlen(p)+r-record, recsize, &r, "recbld 1"))
1.7 millert 645: FATAL("created $0 `%.30s...' too long", record);
1.4 kstailey 646: while ((*r = *p++) != 0)
1.1 tholo 647: r++;
1.4 kstailey 648: if (i < *NF) {
1.27 millert 649: if (!adjbuf(&record, &recsize, 2+strlen(sep)+r-record, recsize, &r, "recbld 2"))
1.7 millert 650: FATAL("created $0 `%.30s...' too long", record);
1.27 millert 651: for (p = sep; (*r = *p++) != 0; )
1.1 tholo 652: r++;
1.4 kstailey 653: }
1.1 tholo 654: }
1.5 millert 655: if (!adjbuf(&record, &recsize, 2+r-record, recsize, &r, "recbld 3"))
1.7 millert 656: FATAL("built giant record `%.30s...'", record);
1.1 tholo 657: *r = '\0';
1.39 millert 658: DPRINTF("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, (void*)fldtab[0]);
1.4 kstailey 659:
660: if (freeable(fldtab[0]))
661: xfree(fldtab[0]->sval);
662: fldtab[0]->tval = REC | STR | DONTFREE;
663: fldtab[0]->sval = record;
664:
1.39 millert 665: DPRINTF("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, (void*)fldtab[0]);
666: DPRINTF("recbld = |%s|\n", record);
1.32 millert 667: donerec = true;
1.1 tholo 668: }
669:
670: int errorflag = 0;
671:
1.11 millert 672: void yyerror(const char *s)
1.1 tholo 673: {
1.14 grange 674: SYNTAX("%s", s);
1.7 millert 675: }
676:
1.11 millert 677: void SYNTAX(const char *fmt, ...)
1.7 millert 678: {
1.1 tholo 679: extern char *cmdname, *curfname;
680: static int been_here = 0;
1.7 millert 681: va_list varg;
1.1 tholo 682:
683: if (been_here++ > 2)
684: return;
1.7 millert 685: fprintf(stderr, "%s: ", cmdname);
686: va_start(varg, fmt);
687: vfprintf(stderr, fmt, varg);
688: va_end(varg);
1.1 tholo 689: fprintf(stderr, " at source line %d", lineno);
690: if (curfname != NULL)
691: fprintf(stderr, " in function %s", curfname);
1.32 millert 692: if (compile_time == COMPILING && cursource() != NULL)
1.6 millert 693: fprintf(stderr, " source file %s", cursource());
1.1 tholo 694: fprintf(stderr, "\n");
695: errorflag = 2;
696: eprint();
697: }
698:
699: extern int bracecnt, brackcnt, parencnt;
700:
701: void bracecheck(void)
702: {
703: int c;
704: static int beenhere = 0;
705:
706: if (beenhere++)
707: return;
1.3 millert 708: while ((c = input()) != EOF && c != '\0')
1.1 tholo 709: bclass(c);
710: bcheck2(bracecnt, '{', '}');
711: bcheck2(brackcnt, '[', ']');
712: bcheck2(parencnt, '(', ')');
713: }
714:
715: void bcheck2(int n, int c1, int c2)
716: {
717: if (n == 1)
718: fprintf(stderr, "\tmissing %c\n", c2);
719: else if (n > 1)
720: fprintf(stderr, "\t%d missing %c's\n", n, c2);
721: else if (n == -1)
722: fprintf(stderr, "\textra %c\n", c2);
723: else if (n < -1)
724: fprintf(stderr, "\t%d extra %c's\n", -n, c2);
725: }
726:
1.35 millert 727: void FATAL(const char *fmt, ...)
1.7 millert 728: {
729: extern char *cmdname;
730: va_list varg;
731:
732: fflush(stdout);
733: fprintf(stderr, "%s: ", cmdname);
734: va_start(varg, fmt);
735: vfprintf(stderr, fmt, varg);
736: va_end(varg);
737: error();
738: if (dbg > 1) /* core dump if serious debugging on */
739: abort();
740: exit(2);
741: }
742:
1.11 millert 743: void WARNING(const char *fmt, ...)
1.1 tholo 744: {
745: extern char *cmdname;
1.7 millert 746: va_list varg;
1.1 tholo 747:
748: fflush(stdout);
749: fprintf(stderr, "%s: ", cmdname);
1.7 millert 750: va_start(varg, fmt);
751: vfprintf(stderr, fmt, varg);
752: va_end(varg);
753: error();
754: }
755:
756: void error()
757: {
758: extern Node *curnode;
759:
1.1 tholo 760: fprintf(stderr, "\n");
1.32 millert 761: if (compile_time != ERROR_PRINTING) {
762: if (NR && *NR > 0) {
763: fprintf(stderr, " input record number %d", (int) (*FNR));
764: if (strcmp(*FILENAME, "-") != 0)
765: fprintf(stderr, ", file %s", *FILENAME);
766: fprintf(stderr, "\n");
767: }
768: if (curnode)
769: fprintf(stderr, " source line number %d", curnode->lineno);
770: else if (lineno)
771: fprintf(stderr, " source line number %d", lineno);
1.41 millert 772: if (compile_time == COMPILING && cursource() != NULL)
773: fprintf(stderr, " source file %s", cursource());
774: fprintf(stderr, "\n");
775: eprint();
1.32 millert 776: }
1.1 tholo 777: }
778:
779: void eprint(void) /* try to print context around error */
780: {
781: char *p, *q;
782: int c;
783: static int been_here = 0;
784: extern char ebuf[], *ep;
785:
1.32 millert 786: if (compile_time != COMPILING || been_here++ > 0 || ebuf == ep)
1.1 tholo 787: return;
788: p = ep - 1;
789: if (p > ebuf && *p == '\n')
790: p--;
791: for ( ; p > ebuf && *p != '\n' && *p != '\0'; p--)
792: ;
793: while (*p == '\n')
794: p++;
795: fprintf(stderr, " context is\n\t");
796: for (q=ep-1; q>=p && *q!=' ' && *q!='\t' && *q!='\n'; q--)
797: ;
798: for ( ; p < q; p++)
799: if (*p)
800: putc(*p, stderr);
801: fprintf(stderr, " >>> ");
802: for ( ; p < ep; p++)
803: if (*p)
804: putc(*p, stderr);
805: fprintf(stderr, " <<< ");
806: if (*ep)
807: while ((c = input()) != '\n' && c != '\0' && c != EOF) {
808: putc(c, stderr);
809: bclass(c);
810: }
811: putc('\n', stderr);
812: ep = ebuf;
813: }
814:
815: void bclass(int c)
816: {
817: switch (c) {
818: case '{': bracecnt++; break;
819: case '}': bracecnt--; break;
820: case '[': brackcnt++; break;
821: case ']': brackcnt--; break;
822: case '(': parencnt++; break;
823: case ')': parencnt--; break;
824: }
825: }
826:
1.11 millert 827: double errcheck(double x, const char *s)
1.1 tholo 828: {
829:
830: if (errno == EDOM) {
831: errno = 0;
1.7 millert 832: WARNING("%s argument out of domain", s);
1.1 tholo 833: x = 1;
834: } else if (errno == ERANGE) {
835: errno = 0;
1.7 millert 836: WARNING("%s result out of range", s);
1.1 tholo 837: x = 1;
838: }
839: return x;
840: }
841:
1.11 millert 842: int isclvar(const char *s) /* is s of form var=something ? */
1.1 tholo 843: {
1.11 millert 844: const char *os = s;
1.1 tholo 845:
1.8 millert 846: if (!isalpha((uschar) *s) && *s != '_')
1.1 tholo 847: return 0;
848: for ( ; *s; s++)
1.8 millert 849: if (!(isalnum((uschar) *s) || *s == '_'))
1.1 tholo 850: break;
1.28 millert 851: return *s == '=' && s > os;
1.1 tholo 852: }
853:
1.4 kstailey 854: /* strtod is supposed to be a proper test of what's a valid number */
1.8 millert 855: /* appears to be broken in gcc on linux: thinks 0x123 is a valid FP number */
856: /* wrong: violates 4.10.1.4 of ansi C standard */
1.42 millert 857:
1.38 millert 858: /* well, not quite. As of C99, hex floating point is allowed. so this is
1.42 millert 859: * a bit of a mess. We work around the mess by checking for a hexadecimal
860: * value and disallowing it. Similarly, we now follow gawk and allow only
861: * +nan, -nan, +inf, and -inf for NaN and infinity values.
1.38 millert 862: */
1.1 tholo 863:
1.42 millert 864: /*
865: * This routine now has a more complicated interface, the main point
866: * being to avoid the double conversion of a string to double, and
867: * also to convey out, if requested, the information that the numeric
868: * value was a leading string or is all of the string. The latter bit
869: * is used in getfval().
870: */
871:
872: bool is_valid_number(const char *s, bool trailing_stuff_ok,
873: bool *no_trailing, double *result)
1.1 tholo 874: {
1.4 kstailey 875: double r;
876: char *ep;
1.42 millert 877: bool retval = false;
1.44 millert 878: bool is_nan = false;
879: bool is_inf = false;
1.42 millert 880:
881: if (no_trailing)
882: *no_trailing = false;
883:
1.43 millert 884: while (isspace((uschar)*s))
1.42 millert 885: s++;
886:
1.51 ! millert 887: /* no hex floating point, sorry */
1.44 millert 888: if (s[0] == '0' && tolower((uschar)s[1]) == 'x')
1.42 millert 889: return false;
890:
1.51 ! millert 891: /* allow +nan, -nan, +inf, -inf, any other letter, no */
1.42 millert 892: if (s[0] == '+' || s[0] == '-') {
1.44 millert 893: is_nan = (strncasecmp(s+1, "nan", 3) == 0);
894: is_inf = (strncasecmp(s+1, "inf", 3) == 0);
895: if ((is_nan || is_inf)
896: && (isspace((uschar)s[4]) || s[4] == '\0'))
897: goto convert;
898: else if (! isdigit((uschar)s[1]) && s[1] != '.')
1.42 millert 899: return false;
1.44 millert 900: }
901: else if (! isdigit((uschar)s[0]) && s[0] != '.')
1.42 millert 902: return false;
903:
1.44 millert 904: convert:
1.4 kstailey 905: errno = 0;
906: r = strtod(s, &ep);
1.43 millert 907: if (ep == s || errno == ERANGE)
1.42 millert 908: return false;
909:
1.44 millert 910: if (isnan(r) && s[0] == '-' && signbit(r) == 0)
911: r = -r;
912:
1.42 millert 913: if (result != NULL)
914: *result = r;
915:
1.47 millert 916: /*
917: * check for trailing stuff
918: */
919: while (isspace((uschar)*ep))
920: ep++;
1.42 millert 921:
1.44 millert 922: if (no_trailing != NULL)
1.42 millert 923: *no_trailing = (*ep == '\0');
1.47 millert 924:
1.51 ! millert 925: /* return true if found the end, or trailing stuff is allowed */
1.47 millert 926: retval = *ep == '\0' || trailing_stuff_ok;
1.42 millert 927:
928: return retval;
1.1 tholo 929: }