Annotation of src/usr.bin/sed/process.c, Revision 1.1
1.1 ! deraadt 1: /*-
! 2: * Copyright (c) 1992 Diomidis Spinellis.
! 3: * Copyright (c) 1992, 1993
! 4: * The Regents of the University of California. All rights reserved.
! 5: *
! 6: * This code is derived from software contributed to Berkeley by
! 7: * Diomidis Spinellis of Imperial College, University of London.
! 8: *
! 9: * Redistribution and use in source and binary forms, with or without
! 10: * modification, are permitted provided that the following conditions
! 11: * are met:
! 12: * 1. Redistributions of source code must retain the above copyright
! 13: * notice, this list of conditions and the following disclaimer.
! 14: * 2. Redistributions in binary form must reproduce the above copyright
! 15: * notice, this list of conditions and the following disclaimer in the
! 16: * documentation and/or other materials provided with the distribution.
! 17: * 3. All advertising materials mentioning features or use of this software
! 18: * must display the following acknowledgement:
! 19: * This product includes software developed by the University of
! 20: * California, Berkeley and its contributors.
! 21: * 4. Neither the name of the University nor the names of its contributors
! 22: * may be used to endorse or promote products derived from this software
! 23: * without specific prior written permission.
! 24: *
! 25: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
! 26: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
! 27: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
! 28: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
! 29: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
! 30: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
! 31: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
! 32: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
! 33: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
! 34: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
! 35: * SUCH DAMAGE.
! 36: */
! 37:
! 38: #ifndef lint
! 39: /* from: static char sccsid[] = "@(#)process.c 8.1 (Berkeley) 6/6/93"; */
! 40: static char *rcsid = "$Id: process.c,v 1.17 1995/07/11 04:09:50 cgd Exp $";
! 41: #endif /* not lint */
! 42:
! 43: #include <sys/types.h>
! 44: #include <sys/stat.h>
! 45: #include <sys/ioctl.h>
! 46: #include <sys/uio.h>
! 47:
! 48: #include <ctype.h>
! 49: #include <errno.h>
! 50: #include <fcntl.h>
! 51: #include <limits.h>
! 52: #include <regex.h>
! 53: #include <stdio.h>
! 54: #include <stdlib.h>
! 55: #include <string.h>
! 56: #include <unistd.h>
! 57:
! 58: #include "defs.h"
! 59: #include "extern.h"
! 60:
! 61: static SPACE HS, PS, SS;
! 62: #define pd PS.deleted
! 63: #define ps PS.space
! 64: #define psl PS.len
! 65: #define hs HS.space
! 66: #define hsl HS.len
! 67:
! 68: static inline int applies __P((struct s_command *));
! 69: static void flush_appends __P((void));
! 70: static void lputs __P((char *));
! 71: static inline int regexec_e __P((regex_t *, const char *, int, int, size_t));
! 72: static void regsub __P((SPACE *, char *, char *));
! 73: static int substitute __P((struct s_command *));
! 74:
! 75: struct s_appends *appends; /* Array of pointers to strings to append. */
! 76: static int appendx; /* Index into appends array. */
! 77: int appendnum; /* Size of appends array. */
! 78:
! 79: static int lastaddr; /* Set by applies if last address of a range. */
! 80: static int sdone; /* If any substitutes since last line input. */
! 81: /* Iov structure for 'w' commands. */
! 82: static regex_t *defpreg;
! 83: size_t maxnsub;
! 84: regmatch_t *match;
! 85:
! 86: #define OUT(s) { fwrite(s, sizeof(u_char), psl, stdout); }
! 87:
! 88: void
! 89: process()
! 90: {
! 91: struct s_command *cp;
! 92: SPACE tspace;
! 93: size_t len, oldpsl;
! 94: char *p;
! 95:
! 96: for (linenum = 0; mf_fgets(&PS, REPLACE);) {
! 97: pd = 0;
! 98: top:
! 99: cp = prog;
! 100: redirect:
! 101: while (cp != NULL) {
! 102: if (!applies(cp)) {
! 103: cp = cp->next;
! 104: continue;
! 105: }
! 106: switch (cp->code) {
! 107: case '{':
! 108: cp = cp->u.c;
! 109: goto redirect;
! 110: case 'a':
! 111: if (appendx >= appendnum)
! 112: appends = xrealloc(appends,
! 113: sizeof(struct s_appends) *
! 114: (appendnum *= 2));
! 115: appends[appendx].type = AP_STRING;
! 116: appends[appendx].s = cp->t;
! 117: appends[appendx].len = strlen(cp->t);
! 118: appendx++;
! 119: break;
! 120: case 'b':
! 121: cp = cp->u.c;
! 122: goto redirect;
! 123: case 'c':
! 124: pd = 1;
! 125: psl = 0;
! 126: if (cp->a2 == NULL || lastaddr)
! 127: (void)printf("%s", cp->t);
! 128: break;
! 129: case 'd':
! 130: pd = 1;
! 131: goto new;
! 132: case 'D':
! 133: if (pd)
! 134: goto new;
! 135: if ((p = memchr(ps, '\n', psl - 1)) == NULL) {
! 136: pd = 1;
! 137: goto new;
! 138: } else {
! 139: psl -= (p + 1) - ps;
! 140: memmove(ps, p + 1, psl);
! 141: goto top;
! 142: }
! 143: case 'g':
! 144: cspace(&PS, hs, hsl, REPLACE);
! 145: break;
! 146: case 'G':
! 147: cspace(&PS, hs, hsl, 0);
! 148: break;
! 149: case 'h':
! 150: cspace(&HS, ps, psl, REPLACE);
! 151: break;
! 152: case 'H':
! 153: cspace(&HS, ps, psl, 0);
! 154: break;
! 155: case 'i':
! 156: (void)printf("%s", cp->t);
! 157: break;
! 158: case 'l':
! 159: lputs(ps);
! 160: break;
! 161: case 'n':
! 162: if (!nflag && !pd)
! 163: OUT(ps)
! 164: flush_appends();
! 165: if (!mf_fgets(&PS, REPLACE))
! 166: exit(0);
! 167: pd = 0;
! 168: break;
! 169: case 'N':
! 170: flush_appends();
! 171: if (!mf_fgets(&PS, 0)) {
! 172: if (!nflag && !pd)
! 173: OUT(ps)
! 174: exit(0);
! 175: }
! 176: break;
! 177: case 'p':
! 178: if (pd)
! 179: break;
! 180: OUT(ps)
! 181: break;
! 182: case 'P':
! 183: if (pd)
! 184: break;
! 185: if ((p = memchr(ps, '\n', psl - 1)) != NULL) {
! 186: oldpsl = psl;
! 187: psl = (p + 1) - ps;
! 188: }
! 189: OUT(ps)
! 190: if (p != NULL)
! 191: psl = oldpsl;
! 192: break;
! 193: case 'q':
! 194: if (!nflag && !pd)
! 195: OUT(ps)
! 196: flush_appends();
! 197: exit(0);
! 198: case 'r':
! 199: if (appendx >= appendnum)
! 200: appends = xrealloc(appends,
! 201: sizeof(struct s_appends) *
! 202: (appendnum *= 2));
! 203: appends[appendx].type = AP_FILE;
! 204: appends[appendx].s = cp->t;
! 205: appends[appendx].len = strlen(cp->t);
! 206: appendx++;
! 207: break;
! 208: case 's':
! 209: sdone |= substitute(cp);
! 210: break;
! 211: case 't':
! 212: if (sdone) {
! 213: sdone = 0;
! 214: cp = cp->u.c;
! 215: goto redirect;
! 216: }
! 217: break;
! 218: case 'w':
! 219: if (pd)
! 220: break;
! 221: if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
! 222: O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
! 223: DEFFILEMODE)) == -1)
! 224: err(FATAL, "%s: %s\n",
! 225: cp->t, strerror(errno));
! 226: if (write(cp->u.fd, ps, psl) != psl)
! 227: err(FATAL, "%s: %s\n",
! 228: cp->t, strerror(errno));
! 229: break;
! 230: case 'x':
! 231: if (hs == NULL)
! 232: cspace(&HS, "\n", 1, REPLACE);
! 233: tspace = PS;
! 234: PS = HS;
! 235: HS = tspace;
! 236: break;
! 237: case 'y':
! 238: if (pd)
! 239: break;
! 240: for (p = ps, len = psl; --len; ++p)
! 241: *p = cp->u.y[*p];
! 242: break;
! 243: case ':':
! 244: case '}':
! 245: break;
! 246: case '=':
! 247: (void)printf("%lu\n", linenum);
! 248: }
! 249: cp = cp->next;
! 250: } /* for all cp */
! 251:
! 252: new: if (!nflag && !pd)
! 253: OUT(ps)
! 254: flush_appends();
! 255: } /* for all lines */
! 256: }
! 257:
! 258: /*
! 259: * TRUE if the address passed matches the current program state
! 260: * (lastline, linenumber, ps).
! 261: */
! 262: #define MATCH(a) \
! 263: (a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) : \
! 264: (a)->type == AT_LINE ? linenum == (a)->u.l : lastline
! 265:
! 266: /*
! 267: * Return TRUE if the command applies to the current line. Sets the inrange
! 268: * flag to process ranges. Interprets the non-select (``!'') flag.
! 269: */
! 270: static inline int
! 271: applies(cp)
! 272: struct s_command *cp;
! 273: {
! 274: int r;
! 275:
! 276: lastaddr = 0;
! 277: if (cp->a1 == NULL && cp->a2 == NULL)
! 278: r = 1;
! 279: else if (cp->a2)
! 280: if (cp->inrange) {
! 281: if (MATCH(cp->a2)) {
! 282: cp->inrange = 0;
! 283: lastaddr = 1;
! 284: }
! 285: r = 1;
! 286: } else if (MATCH(cp->a1)) {
! 287: /*
! 288: * If the second address is a number less than or
! 289: * equal to the line number first selected, only
! 290: * one line shall be selected.
! 291: * -- POSIX 1003.2
! 292: */
! 293: if (cp->a2->type == AT_LINE &&
! 294: linenum >= cp->a2->u.l)
! 295: lastaddr = 1;
! 296: else
! 297: cp->inrange = 1;
! 298: r = 1;
! 299: } else
! 300: r = 0;
! 301: else
! 302: r = MATCH(cp->a1);
! 303: return (cp->nonsel ? ! r : r);
! 304: }
! 305:
! 306: /*
! 307: * substitute --
! 308: * Do substitutions in the pattern space. Currently, we build a
! 309: * copy of the new pattern space in the substitute space structure
! 310: * and then swap them.
! 311: */
! 312: static int
! 313: substitute(cp)
! 314: struct s_command *cp;
! 315: {
! 316: SPACE tspace;
! 317: regex_t *re;
! 318: size_t re_off, slen;
! 319: int n, lastempty;
! 320: char *s;
! 321:
! 322: s = ps;
! 323: re = cp->u.s->re;
! 324: if (re == NULL) {
! 325: if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
! 326: linenum = cp->u.s->linenum;
! 327: err(COMPILE, "\\%d not defined in the RE",
! 328: cp->u.s->maxbref);
! 329: }
! 330: }
! 331: if (!regexec_e(re, s, 0, 0, psl))
! 332: return (0);
! 333:
! 334: SS.len = 0; /* Clean substitute space. */
! 335: slen = psl;
! 336: n = cp->u.s->n;
! 337: lastempty = 1;
! 338:
! 339: switch (n) {
! 340: case 0: /* Global */
! 341: do {
! 342: if (lastempty || match[0].rm_so != match[0].rm_eo) {
! 343: /* Locate start of replaced string. */
! 344: re_off = match[0].rm_so;
! 345: /* Copy leading retained string. */
! 346: cspace(&SS, s, re_off, APPEND);
! 347: /* Add in regular expression. */
! 348: regsub(&SS, s, cp->u.s->new);
! 349: }
! 350:
! 351: /* Move past this match. */
! 352: if (match[0].rm_so != match[0].rm_eo) {
! 353: s += match[0].rm_eo;
! 354: slen -= match[0].rm_eo;
! 355: lastempty = 0;
! 356: } else {
! 357: if (match[0].rm_so == 0)
! 358: cspace(&SS, s, match[0].rm_so + 1,
! 359: APPEND);
! 360: else
! 361: cspace(&SS, s + match[0].rm_so, 1,
! 362: APPEND);
! 363: s += match[0].rm_so + 1;
! 364: slen -= match[0].rm_so + 1;
! 365: lastempty = 1;
! 366: }
! 367: } while (slen > 0 && regexec_e(re, s, REG_NOTBOL, 0, slen));
! 368: /* Copy trailing retained string. */
! 369: if (slen > 0)
! 370: cspace(&SS, s, slen, APPEND);
! 371: break;
! 372: default: /* Nth occurrence */
! 373: while (--n) {
! 374: s += match[0].rm_eo;
! 375: slen -= match[0].rm_eo;
! 376: if (!regexec_e(re, s, REG_NOTBOL, 0, slen))
! 377: return (0);
! 378: }
! 379: /* FALLTHROUGH */
! 380: case 1: /* 1st occurrence */
! 381: /* Locate start of replaced string. */
! 382: re_off = match[0].rm_so + (s - ps);
! 383: /* Copy leading retained string. */
! 384: cspace(&SS, ps, re_off, APPEND);
! 385: /* Add in regular expression. */
! 386: regsub(&SS, s, cp->u.s->new);
! 387: /* Copy trailing retained string. */
! 388: s += match[0].rm_eo;
! 389: slen -= match[0].rm_eo;
! 390: cspace(&SS, s, slen, APPEND);
! 391: break;
! 392: }
! 393:
! 394: /*
! 395: * Swap the substitute space and the pattern space, and make sure
! 396: * that any leftover pointers into stdio memory get lost.
! 397: */
! 398: tspace = PS;
! 399: PS = SS;
! 400: SS = tspace;
! 401: SS.space = SS.back;
! 402:
! 403: /* Handle the 'p' flag. */
! 404: if (cp->u.s->p)
! 405: OUT(ps)
! 406:
! 407: /* Handle the 'w' flag. */
! 408: if (cp->u.s->wfile && !pd) {
! 409: if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
! 410: O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
! 411: err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno));
! 412: if (write(cp->u.s->wfd, ps, psl) != psl)
! 413: err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno));
! 414: }
! 415: return (1);
! 416: }
! 417:
! 418: /*
! 419: * Flush append requests. Always called before reading a line,
! 420: * therefore it also resets the substitution done (sdone) flag.
! 421: */
! 422: static void
! 423: flush_appends()
! 424: {
! 425: FILE *f;
! 426: int count, i;
! 427: char buf[8 * 1024];
! 428:
! 429: for (i = 0; i < appendx; i++)
! 430: switch (appends[i].type) {
! 431: case AP_STRING:
! 432: fwrite(appends[i].s, sizeof(char), appends[i].len,
! 433: stdout);
! 434: break;
! 435: case AP_FILE:
! 436: /*
! 437: * Read files probably shouldn't be cached. Since
! 438: * it's not an error to read a non-existent file,
! 439: * it's possible that another program is interacting
! 440: * with the sed script through the file system. It
! 441: * would be truly bizarre, but possible. It's probably
! 442: * not that big a performance win, anyhow.
! 443: */
! 444: if ((f = fopen(appends[i].s, "r")) == NULL)
! 445: break;
! 446: while (count = fread(buf, sizeof(char), sizeof(buf), f))
! 447: (void)fwrite(buf, sizeof(char), count, stdout);
! 448: (void)fclose(f);
! 449: break;
! 450: }
! 451: if (ferror(stdout))
! 452: err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
! 453: appendx = sdone = 0;
! 454: }
! 455:
! 456: static void
! 457: lputs(s)
! 458: register char *s;
! 459: {
! 460: register int count;
! 461: register char *escapes, *p;
! 462: struct winsize win;
! 463: static int termwidth = -1;
! 464:
! 465: if (termwidth == -1)
! 466: if (p = getenv("COLUMNS"))
! 467: termwidth = atoi(p);
! 468: else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 &&
! 469: win.ws_col > 0)
! 470: termwidth = win.ws_col;
! 471: else
! 472: termwidth = 60;
! 473:
! 474: for (count = 0; *s; ++s) {
! 475: if (count >= termwidth) {
! 476: (void)printf("\\\n");
! 477: count = 0;
! 478: }
! 479: if (isascii(*s) && isprint(*s) && *s != '\\') {
! 480: (void)putchar(*s);
! 481: count++;
! 482: } else {
! 483: escapes = "\\\a\b\f\n\r\t\v";
! 484: (void)putchar('\\');
! 485: if (p = strchr(escapes, *s)) {
! 486: (void)putchar("\\abfnrtv"[p - escapes]);
! 487: count += 2;
! 488: } else {
! 489: (void)printf("%03o", *(u_char *)s);
! 490: count += 4;
! 491: }
! 492: }
! 493: }
! 494: (void)putchar('$');
! 495: (void)putchar('\n');
! 496: if (ferror(stdout))
! 497: err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
! 498: }
! 499:
! 500: static inline int
! 501: regexec_e(preg, string, eflags, nomatch, slen)
! 502: regex_t *preg;
! 503: const char *string;
! 504: int eflags, nomatch;
! 505: size_t slen;
! 506: {
! 507: int eval;
! 508:
! 509: if (preg == NULL) {
! 510: if (defpreg == NULL)
! 511: err(FATAL, "first RE may not be empty");
! 512: } else
! 513: defpreg = preg;
! 514:
! 515: /* Set anchors, discounting trailing newline (if any). */
! 516: if (slen > 0 && string[slen - 1] == '\n')
! 517: slen--;
! 518: match[0].rm_so = 0;
! 519: match[0].rm_eo = slen;
! 520:
! 521: eval = regexec(defpreg, string,
! 522: nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
! 523: switch(eval) {
! 524: case 0:
! 525: return (1);
! 526: case REG_NOMATCH:
! 527: return (0);
! 528: }
! 529: err(FATAL, "RE error: %s", strregerror(eval, defpreg));
! 530: /* NOTREACHED */
! 531: }
! 532:
! 533: /*
! 534: * regsub - perform substitutions after a regexp match
! 535: * Based on a routine by Henry Spencer
! 536: */
! 537: static void
! 538: regsub(sp, string, src)
! 539: SPACE *sp;
! 540: char *string, *src;
! 541: {
! 542: register int len, no;
! 543: register char c, *dst;
! 544:
! 545: #define NEEDSP(reqlen) \
! 546: if (sp->len >= sp->blen - (reqlen) - 1) { \
! 547: sp->blen += (reqlen) + 1024; \
! 548: sp->space = sp->back = xrealloc(sp->back, sp->blen); \
! 549: dst = sp->space + sp->len; \
! 550: }
! 551:
! 552: dst = sp->space + sp->len;
! 553: while ((c = *src++) != '\0') {
! 554: if (c == '&')
! 555: no = 0;
! 556: else if (c == '\\' && isdigit(*src))
! 557: no = *src++ - '0';
! 558: else
! 559: no = -1;
! 560: if (no < 0) { /* Ordinary character. */
! 561: if (c == '\\' && (*src == '\\' || *src == '&'))
! 562: c = *src++;
! 563: NEEDSP(1);
! 564: *dst++ = c;
! 565: ++sp->len;
! 566: } else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
! 567: len = match[no].rm_eo - match[no].rm_so;
! 568: NEEDSP(len);
! 569: memmove(dst, string + match[no].rm_so, len);
! 570: dst += len;
! 571: sp->len += len;
! 572: }
! 573: }
! 574: NEEDSP(1);
! 575: *dst = '\0';
! 576: }
! 577:
! 578: /*
! 579: * aspace --
! 580: * Append the source space to the destination space, allocating new
! 581: * space as necessary.
! 582: */
! 583: void
! 584: cspace(sp, p, len, spflag)
! 585: SPACE *sp;
! 586: char *p;
! 587: size_t len;
! 588: enum e_spflag spflag;
! 589: {
! 590: size_t tlen;
! 591:
! 592: /* Make sure SPACE has enough memory and ramp up quickly. */
! 593: tlen = sp->len + len + 1;
! 594: if (tlen > sp->blen) {
! 595: sp->blen = tlen + 1024;
! 596: sp->space = sp->back = xrealloc(sp->back, sp->blen);
! 597: }
! 598:
! 599: if (spflag == REPLACE)
! 600: sp->len = 0;
! 601:
! 602: memmove(sp->space + sp->len, p, len);
! 603:
! 604: sp->space[sp->len += len] = '\0';
! 605: }
! 606:
! 607: /*
! 608: * Close all cached opened files and report any errors
! 609: */
! 610: void
! 611: cfclose(cp, end)
! 612: register struct s_command *cp, *end;
! 613: {
! 614:
! 615: for (; cp != end; cp = cp->next)
! 616: switch(cp->code) {
! 617: case 's':
! 618: if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
! 619: err(FATAL,
! 620: "%s: %s", cp->u.s->wfile, strerror(errno));
! 621: cp->u.s->wfd = -1;
! 622: break;
! 623: case 'w':
! 624: if (cp->u.fd != -1 && close(cp->u.fd))
! 625: err(FATAL, "%s: %s", cp->t, strerror(errno));
! 626: cp->u.fd = -1;
! 627: break;
! 628: case '{':
! 629: cfclose(cp->u.c, cp->next);
! 630: break;
! 631: }
! 632: }