src/usr.bin/awk/lib.c - diff

Return to lib.c CVS log

Up to [local] / src / usr.bin / awk

Diff for /src/usr.bin/awk/lib.c between version 1.50 and 1.51

version 1.50, 2023/09/10 14:59:00

version 1.51, 2023/09/17 14:49:44

Line 34

#include <math.h>

#include "awk.h"

extern int u8_nextlen(const char *s);

char EMPTY[] = { '\0' };

FILE *infile = NULL;

bool innew; /* true = infile has not been read by readrec */

Line 217

Line 219

argno++;

}

extern int readcsvrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag);

int readrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag) /* read one record into buf */

{

int sep, c, isrec;

int sep, c, isrec; // POTENTIAL BUG? isrec is a macro in awk.h

char *rr, *buf = *pbuf;

char *rr = *pbuf, *buf = *pbuf;

int bufsize = *pbufsize;

char *rs = getsval(rsloc);

if (*rs && rs[1]) {

if (CSV) {

c = readcsvrec(pbuf, pbufsize, inf, newflag);

isrec = (c == EOF && rr == buf) ? false : true;

} else if (*rs && rs[1]) {

bool found;

fa *pfa = makedfa(rs, 1);

Line 276

Line 283

return isrec;

}

/*******************

* loose ends here:

* \r\n should become \n

* what about bare \r? Excel uses that for embedded newlines

* can't have "" in unquoted fields, according to RFC 4180

int readcsvrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag) /* csv can have \n's */

{ /* so read a complete record that might be multiple lines */

int sep, c;

char *rr = *pbuf, *buf = *pbuf;

int bufsize = *pbufsize;

bool in_quote = false;

sep = '\n'; /* the only separator; have to skip over \n embedded in "..." */

rr = buf;

while ((c = getc(inf)) != EOF) {

if (c == sep) {

if (! in_quote)

break;

if (rr > buf && rr[-1] == '\r') // remove \r if was \r\n

rr--;

}

if (rr-buf+1 > bufsize)

if (!adjbuf(&buf, &bufsize, 1+rr-buf,

recsize, &rr, "readcsvrec 1"))

FATAL("input record `%.30s...' too long", buf);

*rr++ = c;

if (c == '"')

in_quote = ! in_quote;

}

if (c == '\n' && rr > buf && rr[-1] == '\r') // remove \r if was \r\n

rr--;

if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readcsvrec 4"))

FATAL("input record `%.30s...' too long", buf);

*rr = 0;

*pbuf = buf;

*pbufsize = bufsize;

DPRINTF("readcsvrec saw <%s>, returns %d\n", buf, c);

return c;

}

char *getargv(int n) /* get ARGV[n] */

{

Cell *x;

Line 297

Line 349

Cell *q;

double result;

/* commit f3d9187d4e0f02294fb1b0e31152070506314e67 broke T.argv test */

/* I don't understand why it was changed. */

for (p=s; *p != '='; p++)

;

e = p;

Line 341

Line 396

savefs();

if (strlen(inputFS) > 1) { /* it's a regular expression */

i = refldbld(r, inputFS);

} else if ((sep = *inputFS) == ' ') { /* default whitespace */

} else if (!CSV && (sep = *inputFS) == ' ') { /* default whitespace */

for (i = 0; ; ) {

while (*r == ' ' || *r == '\t' || *r == '\n')

r++;

Line 360

Line 415

*fr++ = 0;

}

*fr = 0;

} else if ((sep = *inputFS) == 0) { /* new: FS="" => 1 char/field */

} else if (CSV) { /* CSV processing. no error handling */

for (i = 0; *r != '\0'; r += n) {

if (*r != 0) {

char buf[MB_LEN_MAX + 1];

for (;;) {

i++;

if (i > nfields)

growfldtab(i);

if (freeable(fldtab[i]))

xfree(fldtab[i]->sval);

fldtab[i]->sval = fr;

fldtab[i]->tval = FLD | STR | DONTFREE;

if (*r == '"' ) { /* start of "..." */

for (r++ ; *r != '\0'; ) {

if (*r == '"' && r[1] != '\0' && r[1] == '"') {

r += 2; /* doubled quote */

*fr++ = '"';

} else if (*r == '"' && (r[1] == '\0' || r[1] == ',')) {

r++; /* skip over closing quote */

break;

} else {

*fr++ = *r++;

}

*fr++ = 0;

} else { /* unquoted field */

while (*r != ',' && *r != '\0')

*fr++ = *r++;

*fr++ = 0;

}

if (*r++ == 0)

break;

}

*fr = 0;

} else if ((sep = *inputFS) == 0) { /* new: FS="" => 1 char/field */

for (i = 0; *r != '\0'; ) {

char buf[10];

i++;

if (i > nfields)

growfldtab(i);

if (freeable(fldtab[i]))

xfree(fldtab[i]->sval);

n = mblen(r, MB_LEN_MAX);

n = u8_nextlen(r);

if (n < 0)

for (j = 0; j < n; j++)

n = 1;

buf[j] = *r++;

memcpy(buf, r, n);

buf[j] = '\0';

buf[n] = '\0';

fldtab[i]->sval = tostring(buf);

fldtab[i]->tval = FLD | STR;

}

*fr = 0;

} else if (*r != 0) { /* if 0, it's a null field */

/* subtlecase : if length(FS) == 1 && length(RS > 0)

/* subtle case: if length(FS) == 1 && length(RS > 0)

* \n is NOT a field separator (cf awk book 61,84).

* this variable is tested in the inner while loop.

Line 797

Line 884

while (isspace((uschar)*s))

s++;

// no hex floating point, sorry

/* no hex floating point, sorry */

if (s[0] == '0' && tolower((uschar)s[1]) == 'x')

return false;

// allow +nan, -nan, +inf, -inf, any other letter, no

/* allow +nan, -nan, +inf, -inf, any other letter, no */

if (s[0] == '+' || s[0] == '-') {

is_nan = (strncasecmp(s+1, "nan", 3) == 0);

is_inf = (strncasecmp(s+1, "inf", 3) == 0);

Line 835

Line 922

if (no_trailing != NULL)

*no_trailing = (*ep == '\0');

// return true if found the end, or trailing stuff is allowed

/* return true if found the end, or trailing stuff is allowed */

retval = *ep == '\0' || trailing_stuff_ok;

return retval;