=================================================================== RCS file: /cvsrepo/anoncvs/cvs/src/usr.bin/awk/b.c,v retrieving revision 1.47 retrieving revision 1.48 diff -c -r1.47 -r1.48 *** src/usr.bin/awk/b.c 2023/11/15 18:56:53 1.47 --- src/usr.bin/awk/b.c 2023/11/22 01:01:21 1.48 *************** *** 1,4 **** ! /* $OpenBSD: b.c,v 1.47 2023/11/15 18:56:53 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved --- 1,4 ---- ! /* $OpenBSD: b.c,v 1.48 2023/11/22 01:01:21 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved *************** *** 770,828 **** #define MAX_UTF_BYTES 4 // UTF-8 is up to 4 bytes long - // Read one rune at a time from the given FILE*. Return both - // the bytes and the actual rune. - - struct runedata { - int rune; - size_t len; - char bytes[6]; - }; - - struct runedata getrune(FILE *fp) - { - struct runedata result; - int c, i, next; - - memset(&result, 0, sizeof(result)); - - c = getc(fp); - if (c == EOF) - return result; // result.rune == 0 --> EOF - else if (c < 128 || awk_mb_cur_max == 1) { - result.bytes[0] = c; - result.len = 1; - result.rune = c; - - return result; - } - - // need to get bytes and fill things in - result.bytes[0] = c; - result.len = 1; - - next = 1; - for (i = 1; i < MAX_UTF_BYTES; i++) { - c = getc(fp); - if (c == EOF) - break; - result.bytes[next++] = c; - result.len++; - } - - // put back any extra input bytes - int actual_len = u8_nextlen(result.bytes); - while (result.len > actual_len) { - ungetc(result.bytes[--result.len], fp); - } - - result.bytes[result.len] = '\0'; - (void) u8_rune(& result.rune, (uschar *) result.bytes); - - return result; - } - - /* * NAME * fnematch --- 770,775 ---- *************** *** 840,899 **** bool fnematch(fa *pfa, FILE *f, char **pbuf, int *pbufsize, int quantum) { ! char *buf = *pbuf; int bufsize = *pbufsize; ! int i, j, k, ns, s; ! struct runedata r; s = pfa->initstat; patlen = 0; /* ! * All indices relative to buf. ! * i <= j <= k <= bufsize * ! * i: origin of active substring (first byte of first character) ! * j: current character (last byte of current character) ! * k: destination of next getc() */ ! i = -1, k = 0; ! do { ! j = i++; ! do { ! r = getrune(f); ! if (r.len == 0) { ! r.len = 1; // store NUL byte for EOF } ! j += r.len; ! if (j >= bufsize) { ! if (!adjbuf(&buf, &bufsize, j+1, quantum, 0, "fnematch")) ! FATAL("stream '%.30s...' too long", buf); } ! memcpy(buf + k, r.bytes, r.len); ! k += r.len; ! if ((ns = get_gototab(pfa, s, r.rune)) != 0) ! s = ns; ! else ! s = cgoto(pfa, s, r.rune); ! if (pfa->out[s]) { /* final state */ ! patlen = j - i + 1; ! if (r.rune == 0) /* don't count $ */ ! patlen--; ! } ! } while (buf[j] && s != 1); s = 2; ! if (r.len > 1) ! i += r.len - 1; // i incremented around the loop ! } while (buf[i] && !patlen); /* adjbuf() may have relocated a resized buffer. Inform the world. */ *pbuf = buf; *pbufsize = bufsize; if (patlen) { - patbeg = buf + i; /* * Under no circumstances is the last character fed to * the automaton part of the match. It is EOF's nullbyte, --- 787,862 ---- bool fnematch(fa *pfa, FILE *f, char **pbuf, int *pbufsize, int quantum) { ! char *i, *j, *k, *buf = *pbuf; int bufsize = *pbufsize; ! int c, n, ns, s; s = pfa->initstat; patlen = 0; /* ! * buf <= i <= j <= k <= buf+bufsize * ! * i: origin of active substring ! * j: current character ! * k: destination of the next getc */ ! ! i = j = k = buf; ! ! do { ! /* ! * Call u8_rune with at least MAX_UTF_BYTES ahead in ! * the buffer until EOF interferes. ! */ ! if (k - j < MAX_UTF_BYTES) { ! if (k + MAX_UTF_BYTES > buf + bufsize) { ! adjbuf(&buf, &bufsize, ! bufsize + MAX_UTF_BYTES, ! quantum, 0, "fnematch"); } ! for (n = MAX_UTF_BYTES ; n > 0; n--) { ! *k++ = (c = getc(f)) != EOF ? c : 0; ! if (c == EOF) { ! if (ferror(f)) ! FATAL("fnematch: getc error"); ! break; ! } } ! } ! j += u8_rune(&c, (uschar *)j); ! if ((ns = get_gototab(pfa, s, c)) != 0) ! s = ns; ! else ! s = cgoto(pfa, s, c); ! ! if (pfa->out[s]) { /* final state */ ! patbeg = i; ! patlen = j - i; ! if (c == 0) /* don't count $ */ ! patlen--; ! } ! ! if (c && s != 1) ! continue; /* origin i still viable, next j */ ! if (patlen) ! break; /* best match found */ ! ! /* no match at origin i, next i and start over */ ! i += u8_rune(&c, (uschar *)i); ! if (c == 0) ! break; /* no match */ ! j = i; s = 2; ! } while (1); /* adjbuf() may have relocated a resized buffer. Inform the world. */ *pbuf = buf; *pbufsize = bufsize; if (patlen) { /* * Under no circumstances is the last character fed to * the automaton part of the match. It is EOF's nullbyte, *************** *** 905,914 **** * (except for EOF's nullbyte, if present) and null * terminate the buffer. */ ! for (; r.len > 0; r.len--) ! if (buf[--k] && ungetc(buf[k], f) == EOF) ! FATAL("unable to ungetc '%c'", buf[k]); ! buf[k-patlen] = '\0'; return true; } else --- 868,878 ---- * (except for EOF's nullbyte, if present) and null * terminate the buffer. */ ! do ! if (*--k && ungetc(*k, f) == EOF) ! FATAL("unable to ungetc '%c'", *k); ! while (k > patbeg + patlen); ! *k = '\0'; return true; } else