src/usr.bin/awk/b.c - diff

Return to b.c CVS log

Up to [local] / src / usr.bin / awk

Diff for /src/usr.bin/awk/b.c between version 1.42 and 1.43

version 1.42, 2023/09/21 17:19:06

version 1.43, 2023/10/06 22:29:24

Line 81

fa *fatab[NFA];

int nfatab = 0; /* entries in fatab */

extern int u8_nextlen(const char *s);

/* utf-8 mechanism:

For most of Awk, utf-8 strings just "work", since they look like

Line 760

Line 762

return (0);

}

static int getrune(FILE *fp, char **pbuf, int *pbufsize, int quantum,

int *curpos, int *lastpos)

#define MAX_UTF_BYTES 4 // UTF-8 is up to 4 bytes long

// Read one rune at a time from the given FILE*. Return both

// the bytes and the actual rune.

struct runedata {

int rune;

size_t len;

char bytes[6];

};

struct runedata getrune(FILE *fp)

{

int c = 0;

struct runedata result;

char *buf = *pbuf;

int c, i, next;

static const int max_bytes = 4; // max multiple bytes in UTF-8 is 4

int i, rune;

uschar private_buf[max_bytes + 1];

for (i = 0; i <= max_bytes; i++) {

memset(&result, 0, sizeof(result));

if (++*curpos == *lastpos) {

if (*lastpos == *pbufsize)

c = getc(fp);

if (!adjbuf((char **) pbuf, pbufsize, *pbufsize+1, quantum, 0, "getrune"))

if (c == EOF)

FATAL("stream '%.30s...' too long", buf);

return result; // result.rune == 0 --> EOF

buf[(*lastpos)++] = (c = getc(fp)) != EOF ? c : 0;

else if (c < 128 || awk_mb_cur_max == 1) {

private_buf[i] = c;

result.bytes[0] = c;

}

result.len = 1;

if (c == 0 || c < 128 || (c >> 6) == 4) { // 10xxxxxx starts a new character

result.rune = c;

ungetc(c, fp);

private_buf[i] = 0;

return result;

}

// need to get bytes and fill things in

result.bytes[0] = c;

result.len = 1;

next = 1;

for (i = 1; i < MAX_UTF_BYTES; i++) {

c = getc(fp);

if (c == EOF)

break;

}

result.bytes[next++] = c;

result.len++;

}

u8_rune(& rune, private_buf);

// put back any extra input bytes

int actual_len = u8_nextlen(result.bytes);

while (result.len > actual_len) {

ungetc(result.bytes[--result.len], fp);

}

return rune;

result.bytes[result.len] = '\0';

(void) u8_rune(& result.rune, (uschar *) result.bytes);

return result;

}

Line 809

Line 837

{

char *buf = *pbuf;

int bufsize = *pbufsize;

int c, i, j, k, ns, s;

int i, j, k, ns, s;

int rune;

struct runedata r;

s = pfa->initstat;

patlen = 0;

Line 819

Line 847

* All indices relative to buf.

* i <= j <= k <= bufsize

* i: origin of active substring

* i: origin of active substring (first byte of first character)

* j: current character

* j: current character (last byte of current character)

* k: destination of next getc()

i = -1, k = 0;

do {

j = i++;

do {

if (++j == k) {

r = getrune(f);

if (k == bufsize)

if ((++j + r.len) >= k) {

if (k >= bufsize)

if (!adjbuf(&buf, &bufsize, bufsize+1, quantum, 0, "fnematch"))

FATAL("stream '%.30s...' too long", buf);

buf[k++] = (c = getc(f)) != EOF ? c : 0;

}

c = (uschar)buf[j];

memcpy(buf + k, r.bytes, r.len);

if (c < 128 || awk_mb_cur_max == 1)

j += r.len - 1; // incremented next time around the loop

rune = c;

k += r.len;

else {

j--;

k--;

ungetc(c, f);

rune = getrune(f, &buf, &bufsize, quantum, &j, &k);

}

if ((ns = get_gototab(pfa, s, rune)) != 0)

if ((ns = get_gototab(pfa, s, r.rune)) != 0)

s = ns;

else

s = cgoto(pfa, s, rune);

s = cgoto(pfa, s, r.rune);

if (pfa->out[s]) { /* final state */

patlen = j - i + 1;

if (c == 0) /* don't count $ */

if (r.rune == 0) /* don't count $ */

patlen--;

}

} while (buf[j] && s != 1);

s = 2;

if (r.len > 1)

i += r.len - 1; // i incremented around the loop

} while (buf[i] && !patlen);

/* adjbuf() may have relocated a resized buffer. Inform the world. */

Line 874

Line 898

* (except for EOF's nullbyte, if present) and null

* terminate the buffer.

do {

if (buf[--k] && ungetc(buf[k], f) == EOF)

int ii;

FATAL("unable to ungetc '%c'", buf[k]);

for (ii = r.len; ii > 0; ii--)

while (k > i + patlen);

if (buf[--k] && ungetc(buf[k], f) == EOF)

FATAL("unable to ungetc '%c'", buf[k]);

} while (k > i + patlen);

buf[k] = '\0';

return true;

}