Annotation of src/usr.bin/awk/lib.c, Revision 1.3
1.1 tholo 1: /****************************************************************
2: Copyright (C) AT&T and Lucent Technologies 1996
3: All Rights Reserved
4:
5: Permission to use, copy, modify, and distribute this software and
6: its documentation for any purpose and without fee is hereby
7: granted, provided that the above copyright notice appear in all
8: copies and that both that the copyright notice and this
9: permission notice and warranty disclaimer appear in supporting
10: documentation, and that the names of AT&T or Lucent Technologies
11: or any of their entities not be used in advertising or publicity
12: pertaining to distribution of the software without specific,
13: written prior permission.
14:
15: AT&T AND LUCENT DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
16: SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
17: FITNESS. IN NO EVENT SHALL AT&T OR LUCENT OR ANY OF THEIR
18: ENTITIES BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
19: DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
20: DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
21: OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE
22: USE OR PERFORMANCE OF THIS SOFTWARE.
23: ****************************************************************/
24:
25: #define DEBUG
26: #include <stdio.h>
27: #include <string.h>
28: #include <ctype.h>
29: #include <errno.h>
30: #include <stdlib.h>
31: #include "awk.h"
32: #include "awkgram.h"
33:
34: FILE *infile = NULL;
35: char *file = "";
36: int recsize = RECSIZE;
37: char *recdata;
38: char *record;
39: char *fields;
40: Cell *fldtab;
1.2 millert 41: char inputFS[100]; /* BUG: unchecked */
1.1 tholo 42:
43: #define MAXFLD 200
44: int nfields = MAXFLD; /* can be set from commandline in main */
45:
46: int donefld; /* 1 = implies rec broken into fields */
47: int donerec; /* 1 = record is valid (no flds have changed) */
48:
49: int maxfld = 0; /* last used field */
50: int argno = 1; /* current input argument number */
51: extern Awkfloat *ARGC;
52:
53: void recinit(unsigned int n)
54: {
55: static Cell dollar0 = {
56: OCELL, CFLD, "$0", /*recdata*/0, 0.0, REC|STR|DONTFREE };
57: static Cell dollar1 = {
58: OCELL, CFLD, NULL, "", 0.0, FLD|STR|DONTFREE };
59: int i;
60:
61: record = recdata = (char *) malloc(n);
62: fields = (char *) malloc(n);
63: fldtab = (Cell *) malloc(nfields * sizeof(Cell));
64: if (recdata == NULL || fields == NULL || fldtab == NULL)
65: ERROR "out of space for $0 and fields" FATAL;
66: fldtab[0] = dollar0;
67: fldtab[0].sval = recdata;
68: for (i = 1; i < nfields; i++)
69: fldtab[i] = dollar1;
70: }
71:
72: void initgetrec(void)
73: {
74: int i;
75: char *p;
76:
77: for (i = 1; i < *ARGC; i++) {
78: if (!isclvar(p = getargv(i))) { /* find 1st real filename */
79: setsval(lookup("FILENAME", symtab), getargv(i));
80: return;
81: }
82: setclvar(p); /* a commandline assignment before filename */
83: argno++;
84: }
85: infile = stdin; /* no filenames, so use stdin */
86: }
87:
88: int getrec(char *buf) /* get next input record from whatever source */
89: { /* note: tests whether buf == record */
90: int c;
91: static int firsttime = 1;
92:
93: if (firsttime) {
94: firsttime = 0;
95: initgetrec();
96: }
97: dprintf( ("RS=<%s>, FS=<%s>, ARGC=%g, FILENAME=%s\n",
98: *RS, *FS, *ARGC, *FILENAME) );
99: donefld = 0;
100: donerec = 1;
101: buf[0] = 0;
102: while (argno < *ARGC || infile == stdin) {
103: dprintf( ("argno=%d, file=|%s|\n", argno, file) );
104: if (infile == NULL) { /* have to open a new file */
105: file = getargv(argno);
106: if (*file == '\0') { /* it's been zapped */
107: argno++;
108: continue;
109: }
110: if (isclvar(file)) { /* a var=value arg */
111: setclvar(file);
112: argno++;
113: continue;
114: }
115: *FILENAME = file;
116: dprintf( ("opening file %s\n", file) );
117: if (*file == '-' && *(file+1) == '\0')
118: infile = stdin;
119: else if ((infile = fopen((char *)file, "r")) == NULL)
120: ERROR "can't open file %s", file FATAL;
121: setfval(fnrloc, 0.0);
122: }
123: c = readrec(buf, recsize, infile);
124: if (c != 0 || buf[0] != '\0') { /* normal record */
125: if (buf == record) {
126: if (!(recloc->tval & DONTFREE))
127: xfree(recloc->sval);
128: recloc->sval = record;
129: recloc->tval = REC | STR | DONTFREE;
130: if (isnumber(recloc->sval)) {
131: recloc->fval = atof(recloc->sval);
132: recloc->tval |= NUM;
133: }
134: }
135: setfval(nrloc, nrloc->fval+1);
136: setfval(fnrloc, fnrloc->fval+1);
137: return 1;
138: }
139: /* EOF arrived on this file; set up next */
140: if (infile != stdin)
141: fclose(infile);
142: infile = NULL;
143: argno++;
144: }
145: return 0; /* true end of file */
146: }
147:
148: void nextfile(void)
149: {
150: if (infile != stdin)
151: fclose(infile);
152: infile = NULL;
153: argno++;
154: }
155:
156: int readrec(char *buf, int bufsize, FILE *inf) /* read one record into buf */
157: {
158: int sep, c;
159: char *rr;
160: int nrr;
161:
1.2 millert 162: strcpy(inputFS, *FS); /* for subsequent field splitting */
1.1 tholo 163: if ((sep = **RS) == 0) {
164: sep = '\n';
165: while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */
166: ;
167: if (c != EOF)
168: ungetc(c, inf);
169: }
170: for (rr = buf, nrr = bufsize; ; ) {
171: for (; (c=getc(inf)) != sep && c != EOF; *rr++ = c)
172: if (--nrr < 0)
173: ERROR "input record `%.30s...' too long; try -mr n", buf FATAL;
174: if (**RS == sep || c == EOF)
175: break;
176: if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */
177: break;
178: *rr++ = '\n';
179: *rr++ = c;
180: }
181: if (rr > buf + bufsize)
182: ERROR "input record `%.30s...' too long; try -mr n", buf FATAL;
183: *rr = 0;
184: dprintf( ("readrec saw <%s>, returns %d\n", buf, c == EOF && rr == buf ? 0 : 1) );
185: return c == EOF && rr == buf ? 0 : 1;
186: }
187:
188: char *getargv(int n) /* get ARGV[n] */
189: {
190: Cell *x;
191: char *s, temp[10];
192: extern Array *ARGVtab;
193:
194: sprintf(temp, "%d", n);
195: x = setsymtab(temp, "", 0.0, STR, ARGVtab);
196: s = getsval(x);
197: dprintf( ("getargv(%d) returns |%s|\n", n, s) );
198: return s;
199: }
200:
201: void setclvar(char *s) /* set var=value from s */
202: {
203: char *p;
204: Cell *q;
205:
206: for (p=s; *p != '='; p++)
207: ;
208: *p++ = 0;
209: p = qstring(p, '\0');
210: q = setsymtab(s, p, 0.0, STR, symtab);
211: setsval(q, p);
212: if (isnumber(q->sval)) {
213: q->fval = atof(q->sval);
214: q->tval |= NUM;
215: }
216: dprintf( ("command line set %s to |%s|\n", s, p) );
217: }
218:
219:
220: void fldbld(void) /* create fields from current record */
221: {
222: char *r, *fr, sep;
223: Cell *p;
224: int i;
225:
226: if (donefld)
227: return;
228: if (!(recloc->tval & STR))
229: getsval(recloc);
230: r = recloc->sval;
231: fr = fields;
232: i = 0; /* number of fields accumulated here */
1.2 millert 233: if (strlen(inputFS) > 1) { /* it's a regular expression */
234: i = refldbld(r, inputFS);
235: } else if ((sep = *inputFS) == ' ') { /* default whitespace */
1.1 tholo 236: for (i = 0; ; ) {
237: while (*r == ' ' || *r == '\t' || *r == '\n')
238: r++;
239: if (*r == 0)
240: break;
241: i++;
242: if (i >= nfields)
243: break;
244: if (!(fldtab[i].tval & DONTFREE))
245: xfree(fldtab[i].sval);
246: fldtab[i].sval = fr;
247: fldtab[i].tval = FLD | STR | DONTFREE;
248: do
249: *fr++ = *r++;
250: while (*r != ' ' && *r != '\t' && *r != '\n' && *r != '\0');
251: *fr++ = 0;
252: }
253: *fr = 0;
1.2 millert 254: } else if ((sep = *inputFS) == 0) { /* new: FS="" => 1 char/field */
1.1 tholo 255: for (i = 0; *r != 0; r++) {
256: char buf[2];
257: i++;
258: if (i >= nfields)
259: break;
260: if (!(fldtab[i].tval & DONTFREE))
261: xfree(fldtab[i].sval);
262: buf[0] = *r;
263: buf[1] = 0;
264: fldtab[i].sval = tostring(buf);
265: fldtab[i].tval = FLD | STR;
266: }
267: *fr = 0;
268: } else if (*r != 0) { /* if 0, it's a null field */
269: for (;;) {
270: i++;
271: if (i >= nfields)
272: break;
273: if (!(fldtab[i].tval & DONTFREE))
274: xfree(fldtab[i].sval);
275: fldtab[i].sval = fr;
276: fldtab[i].tval = FLD | STR | DONTFREE;
277: while (*r != sep && *r != '\n' && *r != '\0') /* \n is always a separator */
278: *fr++ = *r++;
279: *fr++ = 0;
280: if (*r++ == 0)
281: break;
282: }
283: *fr = 0;
284: }
285: if (i >= nfields)
286: ERROR "record `%.30s...' has too many fields; try -mf n", record FATAL;
287: /* clean out junk from previous record */
288: cleanfld(i, maxfld);
289: maxfld = i;
290: donefld = 1;
291: for (p = fldtab+1; p <= fldtab+maxfld; p++) {
292: if(isnumber(p->sval)) {
293: p->fval = atof(p->sval);
294: p->tval |= NUM;
295: }
296: }
297: setfval(nfloc, (Awkfloat) maxfld);
298: if (dbg)
299: for (p = fldtab; p <= fldtab+maxfld; p++)
300: printf("field %d: |%s|\n", p-fldtab, p->sval);
301: }
302:
303: void cleanfld(int n1, int n2) /* clean out fields n1..n2 inclusive */
304: {
305: static char *nullstat = "";
306: Cell *p, *q;
307:
308: for (p = &fldtab[n2], q = &fldtab[n1]; p > q; p--) {
309: if (!(p->tval & DONTFREE))
310: xfree(p->sval);
311: p->tval = FLD | STR | DONTFREE;
312: p->sval = nullstat;
313: }
314: }
315:
316: void newfld(int n) /* add field n (after end) */
317: {
318: if (n >= nfields)
319: ERROR "creating too many fields (%d); try -mf n", n FATAL;
320: cleanfld(maxfld, n);
321: maxfld = n;
322: setfval(nfloc, (Awkfloat) n);
323: }
324:
325: int refldbld(char *rec, char *fs) /* build fields from reg expr in FS */
326: {
327: char *fr;
328: int i, tempstat;
329: fa *pfa;
330:
331: fr = fields;
332: *fr = '\0';
333: if (*rec == '\0')
334: return 0;
335: pfa = makedfa(fs, 1);
336: dprintf( ("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs) );
337: tempstat = pfa->initstat;
338: for (i = 1; i < nfields; i++) {
339: if (!(fldtab[i].tval & DONTFREE))
340: xfree(fldtab[i].sval);
341: fldtab[i].tval = FLD | STR | DONTFREE;
342: fldtab[i].sval = fr;
343: dprintf( ("refldbld: i=%d\n", i) );
344: if (nematch(pfa, rec)) {
345: pfa->initstat = 2; /* horrible coupling */
346: dprintf( ("match %s (%d chars)\n", patbeg, patlen) );
347: strncpy(fr, rec, patbeg-rec);
348: fr += patbeg - rec + 1;
349: *(fr-1) = '\0';
350: rec = patbeg + patlen;
351: } else {
352: dprintf( ("no match %s\n", rec) );
353: strcpy(fr, rec);
354: pfa->initstat = tempstat;
355: break;
356: }
357: }
358: return i;
359: }
360:
361: void recbld(void) /* create $0 from $1..$NF if necessary */
362: {
363: int i;
364: char *r, *p;
365: static char *rec = 0;
366:
367: if (donerec == 1)
368: return;
369: if (rec == 0) {
370: rec = (char *) malloc(recsize);
371: if (rec == 0)
372: ERROR "out of space building $0, record size %d", recsize FATAL;
373: }
374: r = rec;
375: for (i = 1; i <= *NF; i++) {
376: p = getsval(&fldtab[i]);
377: while (r < rec+recsize-1 && (*r = *p++))
378: r++;
379: if (i < *NF)
380: for (p = *OFS; r < rec+recsize-1 && (*r = *p++); )
381: r++;
382: }
383: if (r > rec + recsize - 1)
384: ERROR "built giant record `%.30s...'; try -mr n", record FATAL;
385: *r = '\0';
1.2 millert 386: dprintf( ("in recbld inputFS=%s, recloc=%p\n", inputFS, recloc) );
1.1 tholo 387: recloc->tval = REC | STR | DONTFREE;
388: recloc->sval = record = rec;
1.2 millert 389: dprintf( ("in recbld inputFS=%s, recloc=%p\n", inputFS, recloc) );
1.1 tholo 390: dprintf( ("recbld = |%s|\n", record) );
391: donerec = 1;
392: }
393:
394: Cell *fieldadr(int n)
395: {
396: if (n < 0 || n >= nfields)
397: ERROR "trying to access field %d; try -mf n", n FATAL;
398: return(&fldtab[n]);
399: }
400:
401: int errorflag = 0;
402: char errbuf[200];
403:
404: void yyerror(char *s)
405: {
406: extern char *cmdname, *curfname;
407: static int been_here = 0;
408:
409: if (been_here++ > 2)
410: return;
411: fprintf(stderr, "%s: %s", cmdname, s);
412: fprintf(stderr, " at source line %d", lineno);
413: if (curfname != NULL)
414: fprintf(stderr, " in function %s", curfname);
415: fprintf(stderr, "\n");
416: errorflag = 2;
417: eprint();
418: }
419:
420: void fpecatch(int n)
421: {
422: ERROR "floating point exception %d", n FATAL;
423: }
424:
425: extern int bracecnt, brackcnt, parencnt;
426:
427: void bracecheck(void)
428: {
429: int c;
430: static int beenhere = 0;
431:
432: if (beenhere++)
433: return;
1.3 ! millert 434: while ((c = input()) != EOF && c != '\0')
1.1 tholo 435: bclass(c);
436: bcheck2(bracecnt, '{', '}');
437: bcheck2(brackcnt, '[', ']');
438: bcheck2(parencnt, '(', ')');
439: }
440:
441: void bcheck2(int n, int c1, int c2)
442: {
443: if (n == 1)
444: fprintf(stderr, "\tmissing %c\n", c2);
445: else if (n > 1)
446: fprintf(stderr, "\t%d missing %c's\n", n, c2);
447: else if (n == -1)
448: fprintf(stderr, "\textra %c\n", c2);
449: else if (n < -1)
450: fprintf(stderr, "\t%d extra %c's\n", -n, c2);
451: }
452:
453: void error(int f, char *s)
454: {
455: extern Node *curnode;
456: extern char *cmdname;
457:
458: fflush(stdout);
459: fprintf(stderr, "%s: ", cmdname);
460: fprintf(stderr, "%s", s);
461: fprintf(stderr, "\n");
462: if (compile_time != 2 && NR && *NR > 0) {
463: fprintf(stderr, " input record number %d", (int) (*FNR));
464: if (strcmp(*FILENAME, "-") != 0)
465: fprintf(stderr, ", file %s", *FILENAME);
466: fprintf(stderr, "\n");
467: }
468: if (compile_time != 2 && curnode)
469: fprintf(stderr, " source line number %d\n", curnode->lineno);
470: else if (compile_time != 2 && lineno)
471: fprintf(stderr, " source line number %d\n", lineno);
472: eprint();
473: if (f) {
474: if (dbg > 1) /* core dump if serious debugging on */
475: abort();
476: exit(2);
477: }
478: }
479:
480: void eprint(void) /* try to print context around error */
481: {
482: char *p, *q;
483: int c;
484: static int been_here = 0;
485: extern char ebuf[], *ep;
486:
487: if (compile_time == 2 || compile_time == 0 || been_here++ > 0)
488: return;
489: p = ep - 1;
490: if (p > ebuf && *p == '\n')
491: p--;
492: for ( ; p > ebuf && *p != '\n' && *p != '\0'; p--)
493: ;
494: while (*p == '\n')
495: p++;
496: fprintf(stderr, " context is\n\t");
497: for (q=ep-1; q>=p && *q!=' ' && *q!='\t' && *q!='\n'; q--)
498: ;
499: for ( ; p < q; p++)
500: if (*p)
501: putc(*p, stderr);
502: fprintf(stderr, " >>> ");
503: for ( ; p < ep; p++)
504: if (*p)
505: putc(*p, stderr);
506: fprintf(stderr, " <<< ");
507: if (*ep)
508: while ((c = input()) != '\n' && c != '\0' && c != EOF) {
509: putc(c, stderr);
510: bclass(c);
511: }
512: putc('\n', stderr);
513: ep = ebuf;
514: }
515:
516: void bclass(int c)
517: {
518: switch (c) {
519: case '{': bracecnt++; break;
520: case '}': bracecnt--; break;
521: case '[': brackcnt++; break;
522: case ']': brackcnt--; break;
523: case '(': parencnt++; break;
524: case ')': parencnt--; break;
525: }
526: }
527:
528: double errcheck(double x, char *s)
529: {
530: extern int errno;
531:
532: if (errno == EDOM) {
533: errno = 0;
534: ERROR "%s argument out of domain", s WARNING;
535: x = 1;
536: } else if (errno == ERANGE) {
537: errno = 0;
538: ERROR "%s result out of range", s WARNING;
539: x = 1;
540: }
541: return x;
542: }
543:
544: int isclvar(char *s) /* is s of form var=something ? */
545: {
546: char *os = s;
547:
548: if (!isalpha(*s) && *s != '_')
549: return 0;
550: for ( ; *s; s++)
551: if (!(isalnum(*s) || *s == '_'))
552: break;
553: return *s == '=' && s > os && *(s+1) != '=';
554: }
555:
556: #define MAXEXPON 38 /* maximum exponent for fp number. should be IEEE */
557:
558: int isnumber(char *s) /* should be done by a library function */
559: {
560: int d1, d2;
561: int point;
562: char *es;
563:
564: d1 = d2 = point = 0;
565: while (*s == ' ' || *s == '\t' || *s == '\n')
566: s++;
567: if (*s == '\0')
568: return(0); /* empty stuff isn't a number */
569: if (*s == '+' || *s == '-')
570: s++;
571: if (!isdigit(*s) && *s != '.')
572: return(0);
573: if (isdigit(*s)) {
574: do {
575: d1++;
576: s++;
577: } while (isdigit(*s));
578: }
579: if (*s == '.') {
580: point++;
581: s++;
582: }
583: if (isdigit(*s)) {
584: d2++;
585: do {
586: s++;
587: } while (isdigit(*s));
588: }
589: if (!(d1 || (point && d2)))
590: return(0);
591: if (*s == 'e' || *s == 'E') {
592: s++;
593: if (*s == '+' || *s == '-')
594: s++;
595: if (!isdigit(*s))
596: return(0);
597: es = s;
598: do {
599: s++;
600: } while (isdigit(*s));
601: if (s - es > 2)
602: return(0);
603: else if (s - es == 2 && (int)(10 * (*es-'0') + *(es+1)-'0') >= MAXEXPON)
604: return(0);
605: }
606: while (*s == ' ' || *s == '\t' || *s == '\n')
607: s++;
608: if (*s == '\0')
609: return(1);
610: else
611: return(0);
612: }
613:
614: #if 0
615: /* THIS IS AN EXPERIMENT THAT'S NOT DONE. */
616: /* strtod ought to provide a better test of what's */
617: /* a valid number, but it doesn't work according to */
618: /* the standard on any machine near me! */
619:
620: #include <math.h>
621: isnumber(char *s)
622: {
623: double r;
624: char *ep;
625: errno = 0;
626: r = strtod(s, &ep);
627: if (r == HUGE_VAL || errno == ERANGE)
628: return 0;
629: while (*ep == ' ' || *ep == '\t' || *ep == '\n')
630: ep++;
631: if (*ep == '\0')
632: return 1;
633: else
634: return 0;
635: }
636: #endif