Annotation of src/usr.bin/sed/process.c, Revision 1.14
1.14 ! millert 1: /* $OpenBSD: process.c,v 1.13 2006/10/09 00:23:57 tedu Exp $ */
1.2 deraadt 2:
1.1 deraadt 3: /*-
4: * Copyright (c) 1992 Diomidis Spinellis.
5: * Copyright (c) 1992, 1993
6: * The Regents of the University of California. All rights reserved.
7: *
8: * This code is derived from software contributed to Berkeley by
9: * Diomidis Spinellis of Imperial College, University of London.
10: *
11: * Redistribution and use in source and binary forms, with or without
12: * modification, are permitted provided that the following conditions
13: * are met:
14: * 1. Redistributions of source code must retain the above copyright
15: * notice, this list of conditions and the following disclaimer.
16: * 2. Redistributions in binary form must reproduce the above copyright
17: * notice, this list of conditions and the following disclaimer in the
18: * documentation and/or other materials provided with the distribution.
1.10 millert 19: * 3. Neither the name of the University nor the names of its contributors
1.1 deraadt 20: * may be used to endorse or promote products derived from this software
21: * without specific prior written permission.
22: *
23: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33: * SUCH DAMAGE.
34: */
35:
36: #ifndef lint
37: /* from: static char sccsid[] = "@(#)process.c 8.1 (Berkeley) 6/6/93"; */
1.14 ! millert 38: static const char rcsid[] = "$OpenBSD: process.c,v 1.13 2006/10/09 00:23:57 tedu Exp $";
1.1 deraadt 39: #endif /* not lint */
40:
41: #include <sys/types.h>
42: #include <sys/stat.h>
43: #include <sys/ioctl.h>
44: #include <sys/uio.h>
45:
46: #include <ctype.h>
47: #include <errno.h>
48: #include <fcntl.h>
49: #include <limits.h>
50: #include <regex.h>
51: #include <stdio.h>
52: #include <stdlib.h>
53: #include <string.h>
54: #include <unistd.h>
55:
56: #include "defs.h"
57: #include "extern.h"
58:
59: static SPACE HS, PS, SS;
60: #define pd PS.deleted
61: #define ps PS.space
62: #define psl PS.len
63: #define hs HS.space
64: #define hsl HS.len
65:
1.7 millert 66: static inline int applies(struct s_command *);
67: static void flush_appends(void);
68: static void lputs(char *);
69: static inline int regexec_e(regex_t *, const char *, int, int, size_t);
70: static void regsub(SPACE *, char *, char *);
71: static int substitute(struct s_command *);
1.1 deraadt 72:
73: struct s_appends *appends; /* Array of pointers to strings to append. */
74: static int appendx; /* Index into appends array. */
75: int appendnum; /* Size of appends array. */
76:
77: static int lastaddr; /* Set by applies if last address of a range. */
78: static int sdone; /* If any substitutes since last line input. */
79: /* Iov structure for 'w' commands. */
80: static regex_t *defpreg;
81: size_t maxnsub;
82: regmatch_t *match;
83:
1.13 tedu 84: #define OUT(s) do { fwrite(s, sizeof(u_char), psl, stdout); } while (0)
1.1 deraadt 85:
86: void
1.11 deraadt 87: process(void)
1.1 deraadt 88: {
89: struct s_command *cp;
90: SPACE tspace;
91: size_t len, oldpsl;
92: char *p;
93:
94: for (linenum = 0; mf_fgets(&PS, REPLACE);) {
95: pd = 0;
96: top:
97: cp = prog;
98: redirect:
99: while (cp != NULL) {
100: if (!applies(cp)) {
101: cp = cp->next;
102: continue;
103: }
104: switch (cp->code) {
105: case '{':
106: cp = cp->u.c;
107: goto redirect;
108: case 'a':
1.12 tedu 109: if (appendx >= appendnum) {
1.1 deraadt 110: appends = xrealloc(appends,
111: sizeof(struct s_appends) *
1.12 tedu 112: (appendnum * 2));
113: appendnum *= 2;
114: }
1.1 deraadt 115: appends[appendx].type = AP_STRING;
116: appends[appendx].s = cp->t;
117: appends[appendx].len = strlen(cp->t);
118: appendx++;
119: break;
120: case 'b':
121: cp = cp->u.c;
122: goto redirect;
123: case 'c':
124: pd = 1;
125: psl = 0;
126: if (cp->a2 == NULL || lastaddr)
127: (void)printf("%s", cp->t);
128: break;
129: case 'd':
130: pd = 1;
131: goto new;
132: case 'D':
133: if (pd)
134: goto new;
1.8 millert 135: if (psl == 0 ||
136: (p = memchr(ps, '\n', psl - 1)) == NULL) {
1.1 deraadt 137: pd = 1;
138: goto new;
139: } else {
140: psl -= (p + 1) - ps;
141: memmove(ps, p + 1, psl);
142: goto top;
143: }
144: case 'g':
145: cspace(&PS, hs, hsl, REPLACE);
146: break;
147: case 'G':
1.5 deraadt 148: if (hs == NULL)
149: cspace(&HS, "\n", 1, REPLACE);
1.1 deraadt 150: cspace(&PS, hs, hsl, 0);
151: break;
152: case 'h':
153: cspace(&HS, ps, psl, REPLACE);
154: break;
155: case 'H':
156: cspace(&HS, ps, psl, 0);
157: break;
158: case 'i':
159: (void)printf("%s", cp->t);
160: break;
161: case 'l':
162: lputs(ps);
163: break;
164: case 'n':
165: if (!nflag && !pd)
1.13 tedu 166: OUT(ps);
1.1 deraadt 167: flush_appends();
168: if (!mf_fgets(&PS, REPLACE))
169: exit(0);
170: pd = 0;
171: break;
172: case 'N':
173: flush_appends();
174: if (!mf_fgets(&PS, 0)) {
175: if (!nflag && !pd)
1.13 tedu 176: OUT(ps);
1.1 deraadt 177: exit(0);
178: }
179: break;
180: case 'p':
181: if (pd)
182: break;
1.13 tedu 183: OUT(ps);
1.1 deraadt 184: break;
185: case 'P':
186: if (pd)
187: break;
1.8 millert 188: if (psl != 0 &&
189: (p = memchr(ps, '\n', psl - 1)) != NULL) {
1.1 deraadt 190: oldpsl = psl;
191: psl = (p + 1) - ps;
192: }
1.13 tedu 193: OUT(ps);
1.1 deraadt 194: if (p != NULL)
195: psl = oldpsl;
196: break;
197: case 'q':
198: if (!nflag && !pd)
1.13 tedu 199: OUT(ps);
1.1 deraadt 200: flush_appends();
201: exit(0);
202: case 'r':
203: if (appendx >= appendnum)
204: appends = xrealloc(appends,
205: sizeof(struct s_appends) *
206: (appendnum *= 2));
207: appends[appendx].type = AP_FILE;
208: appends[appendx].s = cp->t;
209: appends[appendx].len = strlen(cp->t);
210: appendx++;
211: break;
212: case 's':
213: sdone |= substitute(cp);
214: break;
215: case 't':
216: if (sdone) {
217: sdone = 0;
218: cp = cp->u.c;
219: goto redirect;
220: }
221: break;
222: case 'w':
223: if (pd)
224: break;
225: if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
226: O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
227: DEFFILEMODE)) == -1)
1.9 jsyn 228: err(FATAL, "%s: %s",
1.1 deraadt 229: cp->t, strerror(errno));
230: if (write(cp->u.fd, ps, psl) != psl)
1.9 jsyn 231: err(FATAL, "%s: %s",
1.1 deraadt 232: cp->t, strerror(errno));
233: break;
234: case 'x':
235: if (hs == NULL)
236: cspace(&HS, "\n", 1, REPLACE);
237: tspace = PS;
238: PS = HS;
239: HS = tspace;
240: break;
241: case 'y':
1.8 millert 242: if (pd || psl == 0)
1.1 deraadt 243: break;
244: for (p = ps, len = psl; --len; ++p)
1.4 deraadt 245: *p = cp->u.y[(unsigned char)*p];
1.1 deraadt 246: break;
247: case ':':
248: case '}':
249: break;
250: case '=':
251: (void)printf("%lu\n", linenum);
252: }
253: cp = cp->next;
254: } /* for all cp */
255:
256: new: if (!nflag && !pd)
1.13 tedu 257: OUT(ps);
1.1 deraadt 258: flush_appends();
259: } /* for all lines */
260: }
261:
262: /*
263: * TRUE if the address passed matches the current program state
264: * (lastline, linenumber, ps).
265: */
266: #define MATCH(a) \
267: (a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) : \
268: (a)->type == AT_LINE ? linenum == (a)->u.l : lastline
269:
270: /*
271: * Return TRUE if the command applies to the current line. Sets the inrange
272: * flag to process ranges. Interprets the non-select (``!'') flag.
273: */
274: static inline int
1.11 deraadt 275: applies(struct s_command *cp)
1.1 deraadt 276: {
277: int r;
278:
279: lastaddr = 0;
280: if (cp->a1 == NULL && cp->a2 == NULL)
281: r = 1;
282: else if (cp->a2)
283: if (cp->inrange) {
284: if (MATCH(cp->a2)) {
285: cp->inrange = 0;
286: lastaddr = 1;
287: }
288: r = 1;
289: } else if (MATCH(cp->a1)) {
290: /*
291: * If the second address is a number less than or
292: * equal to the line number first selected, only
293: * one line shall be selected.
294: * -- POSIX 1003.2
295: */
296: if (cp->a2->type == AT_LINE &&
297: linenum >= cp->a2->u.l)
298: lastaddr = 1;
299: else
300: cp->inrange = 1;
301: r = 1;
302: } else
303: r = 0;
304: else
305: r = MATCH(cp->a1);
1.13 tedu 306: return (cp->nonsel ? !r : r);
1.1 deraadt 307: }
308:
309: /*
310: * substitute --
311: * Do substitutions in the pattern space. Currently, we build a
312: * copy of the new pattern space in the substitute space structure
313: * and then swap them.
314: */
315: static int
1.11 deraadt 316: substitute(struct s_command *cp)
1.1 deraadt 317: {
318: SPACE tspace;
319: regex_t *re;
320: size_t re_off, slen;
321: int n, lastempty;
322: char *s;
323:
324: s = ps;
325: re = cp->u.s->re;
326: if (re == NULL) {
327: if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
328: linenum = cp->u.s->linenum;
329: err(COMPILE, "\\%d not defined in the RE",
330: cp->u.s->maxbref);
331: }
332: }
333: if (!regexec_e(re, s, 0, 0, psl))
334: return (0);
335:
336: SS.len = 0; /* Clean substitute space. */
337: slen = psl;
338: n = cp->u.s->n;
339: lastempty = 1;
340:
341: switch (n) {
342: case 0: /* Global */
343: do {
344: if (lastempty || match[0].rm_so != match[0].rm_eo) {
345: /* Locate start of replaced string. */
346: re_off = match[0].rm_so;
347: /* Copy leading retained string. */
348: cspace(&SS, s, re_off, APPEND);
349: /* Add in regular expression. */
350: regsub(&SS, s, cp->u.s->new);
351: }
352:
353: /* Move past this match. */
354: if (match[0].rm_so != match[0].rm_eo) {
355: s += match[0].rm_eo;
356: slen -= match[0].rm_eo;
357: lastempty = 0;
358: } else {
359: if (match[0].rm_so == 0)
360: cspace(&SS, s, match[0].rm_so + 1,
361: APPEND);
362: else
363: cspace(&SS, s + match[0].rm_so, 1,
364: APPEND);
365: s += match[0].rm_so + 1;
366: slen -= match[0].rm_so + 1;
367: lastempty = 1;
368: }
369: } while (slen > 0 && regexec_e(re, s, REG_NOTBOL, 0, slen));
370: /* Copy trailing retained string. */
371: if (slen > 0)
372: cspace(&SS, s, slen, APPEND);
373: break;
374: default: /* Nth occurrence */
375: while (--n) {
376: s += match[0].rm_eo;
377: slen -= match[0].rm_eo;
378: if (!regexec_e(re, s, REG_NOTBOL, 0, slen))
379: return (0);
380: }
381: /* FALLTHROUGH */
382: case 1: /* 1st occurrence */
383: /* Locate start of replaced string. */
384: re_off = match[0].rm_so + (s - ps);
385: /* Copy leading retained string. */
386: cspace(&SS, ps, re_off, APPEND);
387: /* Add in regular expression. */
388: regsub(&SS, s, cp->u.s->new);
389: /* Copy trailing retained string. */
390: s += match[0].rm_eo;
391: slen -= match[0].rm_eo;
392: cspace(&SS, s, slen, APPEND);
393: break;
394: }
395:
396: /*
397: * Swap the substitute space and the pattern space, and make sure
398: * that any leftover pointers into stdio memory get lost.
399: */
400: tspace = PS;
401: PS = SS;
402: SS = tspace;
403: SS.space = SS.back;
404:
405: /* Handle the 'p' flag. */
406: if (cp->u.s->p)
1.13 tedu 407: OUT(ps);
1.1 deraadt 408:
409: /* Handle the 'w' flag. */
410: if (cp->u.s->wfile && !pd) {
411: if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
412: O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
1.9 jsyn 413: err(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno));
1.1 deraadt 414: if (write(cp->u.s->wfd, ps, psl) != psl)
1.9 jsyn 415: err(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno));
1.1 deraadt 416: }
417: return (1);
418: }
419:
420: /*
421: * Flush append requests. Always called before reading a line,
422: * therefore it also resets the substitution done (sdone) flag.
423: */
424: static void
1.11 deraadt 425: flush_appends(void)
1.1 deraadt 426: {
427: FILE *f;
428: int count, i;
429: char buf[8 * 1024];
430:
431: for (i = 0; i < appendx; i++)
432: switch (appends[i].type) {
433: case AP_STRING:
434: fwrite(appends[i].s, sizeof(char), appends[i].len,
435: stdout);
436: break;
437: case AP_FILE:
438: /*
439: * Read files probably shouldn't be cached. Since
440: * it's not an error to read a non-existent file,
441: * it's possible that another program is interacting
442: * with the sed script through the file system. It
443: * would be truly bizarre, but possible. It's probably
444: * not that big a performance win, anyhow.
445: */
446: if ((f = fopen(appends[i].s, "r")) == NULL)
447: break;
1.3 deraadt 448: while ((count = fread(buf, sizeof(char), sizeof(buf), f)))
1.1 deraadt 449: (void)fwrite(buf, sizeof(char), count, stdout);
450: (void)fclose(f);
451: break;
452: }
453: if (ferror(stdout))
454: err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
455: appendx = sdone = 0;
456: }
457:
458: static void
1.11 deraadt 459: lputs(char *s)
1.1 deraadt 460: {
1.6 mpech 461: int count;
462: char *escapes, *p;
1.1 deraadt 463: struct winsize win;
464: static int termwidth = -1;
465:
1.13 tedu 466: if (termwidth == -1) {
1.3 deraadt 467: if ((p = getenv("COLUMNS")))
1.1 deraadt 468: termwidth = atoi(p);
469: else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 &&
470: win.ws_col > 0)
471: termwidth = win.ws_col;
472: else
473: termwidth = 60;
1.13 tedu 474: }
1.1 deraadt 475:
476: for (count = 0; *s; ++s) {
477: if (count >= termwidth) {
478: (void)printf("\\\n");
479: count = 0;
480: }
481: if (isascii(*s) && isprint(*s) && *s != '\\') {
482: (void)putchar(*s);
483: count++;
1.14 ! millert 484: } else if (*s != '\n') {
! 485: escapes = "\\\a\b\f\r\t\v";
1.1 deraadt 486: (void)putchar('\\');
1.3 deraadt 487: if ((p = strchr(escapes, *s))) {
1.14 ! millert 488: (void)putchar("\\abfrtv"[p - escapes]);
1.1 deraadt 489: count += 2;
490: } else {
491: (void)printf("%03o", *(u_char *)s);
492: count += 4;
493: }
494: }
495: }
496: (void)putchar('$');
497: (void)putchar('\n');
498: if (ferror(stdout))
499: err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
500: }
501:
502: static inline int
1.11 deraadt 503: regexec_e(regex_t *preg, const char *string, int eflags,
504: int nomatch, size_t slen)
1.1 deraadt 505: {
506: int eval;
507:
508: if (preg == NULL) {
509: if (defpreg == NULL)
510: err(FATAL, "first RE may not be empty");
511: } else
512: defpreg = preg;
513:
514: /* Set anchors, discounting trailing newline (if any). */
515: if (slen > 0 && string[slen - 1] == '\n')
516: slen--;
517: match[0].rm_so = 0;
518: match[0].rm_eo = slen;
519:
520: eval = regexec(defpreg, string,
521: nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
1.13 tedu 522: switch (eval) {
1.1 deraadt 523: case 0:
524: return (1);
525: case REG_NOMATCH:
526: return (0);
527: }
528: err(FATAL, "RE error: %s", strregerror(eval, defpreg));
529: /* NOTREACHED */
530: }
531:
532: /*
533: * regsub - perform substitutions after a regexp match
534: * Based on a routine by Henry Spencer
535: */
536: static void
1.11 deraadt 537: regsub(SPACE *sp, char *string, char *src)
1.1 deraadt 538: {
1.6 mpech 539: int len, no;
540: char c, *dst;
1.1 deraadt 541:
542: #define NEEDSP(reqlen) \
1.12 tedu 543: if (sp->len + (reqlen) + 1 >= sp->blen) { \
544: size_t newlen = sp->blen + (reqlen) + 1024; \
545: sp->space = sp->back = xrealloc(sp->back, newlen); \
546: sp->blen = newlen; \
1.1 deraadt 547: dst = sp->space + sp->len; \
548: }
549:
550: dst = sp->space + sp->len;
551: while ((c = *src++) != '\0') {
552: if (c == '&')
553: no = 0;
554: else if (c == '\\' && isdigit(*src))
555: no = *src++ - '0';
556: else
557: no = -1;
558: if (no < 0) { /* Ordinary character. */
559: if (c == '\\' && (*src == '\\' || *src == '&'))
560: c = *src++;
561: NEEDSP(1);
562: *dst++ = c;
563: ++sp->len;
564: } else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
565: len = match[no].rm_eo - match[no].rm_so;
566: NEEDSP(len);
567: memmove(dst, string + match[no].rm_so, len);
568: dst += len;
569: sp->len += len;
570: }
571: }
572: NEEDSP(1);
573: *dst = '\0';
574: }
575:
576: /*
577: * aspace --
578: * Append the source space to the destination space, allocating new
579: * space as necessary.
580: */
581: void
1.11 deraadt 582: cspace(SPACE *sp, char *p, size_t len, enum e_spflag spflag)
1.1 deraadt 583: {
584: size_t tlen;
585:
586: /* Make sure SPACE has enough memory and ramp up quickly. */
587: tlen = sp->len + len + 1;
588: if (tlen > sp->blen) {
1.12 tedu 589: size_t newlen = tlen + 1024;
590: sp->space = sp->back = xrealloc(sp->back, newlen);
591: sp->blen = newlen;
1.1 deraadt 592: }
593:
594: if (spflag == REPLACE)
595: sp->len = 0;
596:
597: memmove(sp->space + sp->len, p, len);
598:
599: sp->space[sp->len += len] = '\0';
600: }
601:
602: /*
603: * Close all cached opened files and report any errors
604: */
605: void
1.11 deraadt 606: cfclose(struct s_command *cp, struct s_command *end)
1.1 deraadt 607: {
608:
609: for (; cp != end; cp = cp->next)
1.13 tedu 610: switch (cp->code) {
1.1 deraadt 611: case 's':
612: if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
613: err(FATAL,
614: "%s: %s", cp->u.s->wfile, strerror(errno));
615: cp->u.s->wfd = -1;
616: break;
617: case 'w':
618: if (cp->u.fd != -1 && close(cp->u.fd))
619: err(FATAL, "%s: %s", cp->t, strerror(errno));
620: cp->u.fd = -1;
621: break;
622: case '{':
623: cfclose(cp->u.c, cp->next);
624: break;
625: }
626: }