Annotation of src/usr.bin/sed/process.c, Revision 1.9
1.9 ! jsyn 1: /* $OpenBSD: process.c,v 1.8 2002/04/15 21:47:45 millert Exp $ */
1.2 deraadt 2:
1.1 deraadt 3: /*-
4: * Copyright (c) 1992 Diomidis Spinellis.
5: * Copyright (c) 1992, 1993
6: * The Regents of the University of California. All rights reserved.
7: *
8: * This code is derived from software contributed to Berkeley by
9: * Diomidis Spinellis of Imperial College, University of London.
10: *
11: * Redistribution and use in source and binary forms, with or without
12: * modification, are permitted provided that the following conditions
13: * are met:
14: * 1. Redistributions of source code must retain the above copyright
15: * notice, this list of conditions and the following disclaimer.
16: * 2. Redistributions in binary form must reproduce the above copyright
17: * notice, this list of conditions and the following disclaimer in the
18: * documentation and/or other materials provided with the distribution.
19: * 3. All advertising materials mentioning features or use of this software
20: * must display the following acknowledgement:
21: * This product includes software developed by the University of
22: * California, Berkeley and its contributors.
23: * 4. Neither the name of the University nor the names of its contributors
24: * may be used to endorse or promote products derived from this software
25: * without specific prior written permission.
26: *
27: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37: * SUCH DAMAGE.
38: */
39:
40: #ifndef lint
41: /* from: static char sccsid[] = "@(#)process.c 8.1 (Berkeley) 6/6/93"; */
1.9 ! jsyn 42: static char *rcsid = "$OpenBSD: process.c,v 1.8 2002/04/15 21:47:45 millert Exp $";
1.1 deraadt 43: #endif /* not lint */
44:
45: #include <sys/types.h>
46: #include <sys/stat.h>
47: #include <sys/ioctl.h>
48: #include <sys/uio.h>
49:
50: #include <ctype.h>
51: #include <errno.h>
52: #include <fcntl.h>
53: #include <limits.h>
54: #include <regex.h>
55: #include <stdio.h>
56: #include <stdlib.h>
57: #include <string.h>
58: #include <unistd.h>
59:
60: #include "defs.h"
61: #include "extern.h"
62:
63: static SPACE HS, PS, SS;
64: #define pd PS.deleted
65: #define ps PS.space
66: #define psl PS.len
67: #define hs HS.space
68: #define hsl HS.len
69:
1.7 millert 70: static inline int applies(struct s_command *);
71: static void flush_appends(void);
72: static void lputs(char *);
73: static inline int regexec_e(regex_t *, const char *, int, int, size_t);
74: static void regsub(SPACE *, char *, char *);
75: static int substitute(struct s_command *);
1.1 deraadt 76:
77: struct s_appends *appends; /* Array of pointers to strings to append. */
78: static int appendx; /* Index into appends array. */
79: int appendnum; /* Size of appends array. */
80:
81: static int lastaddr; /* Set by applies if last address of a range. */
82: static int sdone; /* If any substitutes since last line input. */
83: /* Iov structure for 'w' commands. */
84: static regex_t *defpreg;
85: size_t maxnsub;
86: regmatch_t *match;
87:
88: #define OUT(s) { fwrite(s, sizeof(u_char), psl, stdout); }
89:
90: void
91: process()
92: {
93: struct s_command *cp;
94: SPACE tspace;
95: size_t len, oldpsl;
96: char *p;
97:
98: for (linenum = 0; mf_fgets(&PS, REPLACE);) {
99: pd = 0;
100: top:
101: cp = prog;
102: redirect:
103: while (cp != NULL) {
104: if (!applies(cp)) {
105: cp = cp->next;
106: continue;
107: }
108: switch (cp->code) {
109: case '{':
110: cp = cp->u.c;
111: goto redirect;
112: case 'a':
113: if (appendx >= appendnum)
114: appends = xrealloc(appends,
115: sizeof(struct s_appends) *
116: (appendnum *= 2));
117: appends[appendx].type = AP_STRING;
118: appends[appendx].s = cp->t;
119: appends[appendx].len = strlen(cp->t);
120: appendx++;
121: break;
122: case 'b':
123: cp = cp->u.c;
124: goto redirect;
125: case 'c':
126: pd = 1;
127: psl = 0;
128: if (cp->a2 == NULL || lastaddr)
129: (void)printf("%s", cp->t);
130: break;
131: case 'd':
132: pd = 1;
133: goto new;
134: case 'D':
135: if (pd)
136: goto new;
1.8 millert 137: if (psl == 0 ||
138: (p = memchr(ps, '\n', psl - 1)) == NULL) {
1.1 deraadt 139: pd = 1;
140: goto new;
141: } else {
142: psl -= (p + 1) - ps;
143: memmove(ps, p + 1, psl);
144: goto top;
145: }
146: case 'g':
147: cspace(&PS, hs, hsl, REPLACE);
148: break;
149: case 'G':
1.5 deraadt 150: if (hs == NULL)
151: cspace(&HS, "\n", 1, REPLACE);
1.1 deraadt 152: cspace(&PS, hs, hsl, 0);
153: break;
154: case 'h':
155: cspace(&HS, ps, psl, REPLACE);
156: break;
157: case 'H':
158: cspace(&HS, ps, psl, 0);
159: break;
160: case 'i':
161: (void)printf("%s", cp->t);
162: break;
163: case 'l':
164: lputs(ps);
165: break;
166: case 'n':
167: if (!nflag && !pd)
168: OUT(ps)
169: flush_appends();
170: if (!mf_fgets(&PS, REPLACE))
171: exit(0);
172: pd = 0;
173: break;
174: case 'N':
175: flush_appends();
176: if (!mf_fgets(&PS, 0)) {
177: if (!nflag && !pd)
178: OUT(ps)
179: exit(0);
180: }
181: break;
182: case 'p':
183: if (pd)
184: break;
185: OUT(ps)
186: break;
187: case 'P':
188: if (pd)
189: break;
1.8 millert 190: if (psl != 0 &&
191: (p = memchr(ps, '\n', psl - 1)) != NULL) {
1.1 deraadt 192: oldpsl = psl;
193: psl = (p + 1) - ps;
194: }
195: OUT(ps)
196: if (p != NULL)
197: psl = oldpsl;
198: break;
199: case 'q':
200: if (!nflag && !pd)
201: OUT(ps)
202: flush_appends();
203: exit(0);
204: case 'r':
205: if (appendx >= appendnum)
206: appends = xrealloc(appends,
207: sizeof(struct s_appends) *
208: (appendnum *= 2));
209: appends[appendx].type = AP_FILE;
210: appends[appendx].s = cp->t;
211: appends[appendx].len = strlen(cp->t);
212: appendx++;
213: break;
214: case 's':
215: sdone |= substitute(cp);
216: break;
217: case 't':
218: if (sdone) {
219: sdone = 0;
220: cp = cp->u.c;
221: goto redirect;
222: }
223: break;
224: case 'w':
225: if (pd)
226: break;
227: if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
228: O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
229: DEFFILEMODE)) == -1)
1.9 ! jsyn 230: err(FATAL, "%s: %s",
1.1 deraadt 231: cp->t, strerror(errno));
232: if (write(cp->u.fd, ps, psl) != psl)
1.9 ! jsyn 233: err(FATAL, "%s: %s",
1.1 deraadt 234: cp->t, strerror(errno));
235: break;
236: case 'x':
237: if (hs == NULL)
238: cspace(&HS, "\n", 1, REPLACE);
239: tspace = PS;
240: PS = HS;
241: HS = tspace;
242: break;
243: case 'y':
1.8 millert 244: if (pd || psl == 0)
1.1 deraadt 245: break;
246: for (p = ps, len = psl; --len; ++p)
1.4 deraadt 247: *p = cp->u.y[(unsigned char)*p];
1.1 deraadt 248: break;
249: case ':':
250: case '}':
251: break;
252: case '=':
253: (void)printf("%lu\n", linenum);
254: }
255: cp = cp->next;
256: } /* for all cp */
257:
258: new: if (!nflag && !pd)
259: OUT(ps)
260: flush_appends();
261: } /* for all lines */
262: }
263:
264: /*
265: * TRUE if the address passed matches the current program state
266: * (lastline, linenumber, ps).
267: */
268: #define MATCH(a) \
269: (a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) : \
270: (a)->type == AT_LINE ? linenum == (a)->u.l : lastline
271:
272: /*
273: * Return TRUE if the command applies to the current line. Sets the inrange
274: * flag to process ranges. Interprets the non-select (``!'') flag.
275: */
276: static inline int
277: applies(cp)
278: struct s_command *cp;
279: {
280: int r;
281:
282: lastaddr = 0;
283: if (cp->a1 == NULL && cp->a2 == NULL)
284: r = 1;
285: else if (cp->a2)
286: if (cp->inrange) {
287: if (MATCH(cp->a2)) {
288: cp->inrange = 0;
289: lastaddr = 1;
290: }
291: r = 1;
292: } else if (MATCH(cp->a1)) {
293: /*
294: * If the second address is a number less than or
295: * equal to the line number first selected, only
296: * one line shall be selected.
297: * -- POSIX 1003.2
298: */
299: if (cp->a2->type == AT_LINE &&
300: linenum >= cp->a2->u.l)
301: lastaddr = 1;
302: else
303: cp->inrange = 1;
304: r = 1;
305: } else
306: r = 0;
307: else
308: r = MATCH(cp->a1);
309: return (cp->nonsel ? ! r : r);
310: }
311:
312: /*
313: * substitute --
314: * Do substitutions in the pattern space. Currently, we build a
315: * copy of the new pattern space in the substitute space structure
316: * and then swap them.
317: */
318: static int
319: substitute(cp)
320: struct s_command *cp;
321: {
322: SPACE tspace;
323: regex_t *re;
324: size_t re_off, slen;
325: int n, lastempty;
326: char *s;
327:
328: s = ps;
329: re = cp->u.s->re;
330: if (re == NULL) {
331: if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
332: linenum = cp->u.s->linenum;
333: err(COMPILE, "\\%d not defined in the RE",
334: cp->u.s->maxbref);
335: }
336: }
337: if (!regexec_e(re, s, 0, 0, psl))
338: return (0);
339:
340: SS.len = 0; /* Clean substitute space. */
341: slen = psl;
342: n = cp->u.s->n;
343: lastempty = 1;
344:
345: switch (n) {
346: case 0: /* Global */
347: do {
348: if (lastempty || match[0].rm_so != match[0].rm_eo) {
349: /* Locate start of replaced string. */
350: re_off = match[0].rm_so;
351: /* Copy leading retained string. */
352: cspace(&SS, s, re_off, APPEND);
353: /* Add in regular expression. */
354: regsub(&SS, s, cp->u.s->new);
355: }
356:
357: /* Move past this match. */
358: if (match[0].rm_so != match[0].rm_eo) {
359: s += match[0].rm_eo;
360: slen -= match[0].rm_eo;
361: lastempty = 0;
362: } else {
363: if (match[0].rm_so == 0)
364: cspace(&SS, s, match[0].rm_so + 1,
365: APPEND);
366: else
367: cspace(&SS, s + match[0].rm_so, 1,
368: APPEND);
369: s += match[0].rm_so + 1;
370: slen -= match[0].rm_so + 1;
371: lastempty = 1;
372: }
373: } while (slen > 0 && regexec_e(re, s, REG_NOTBOL, 0, slen));
374: /* Copy trailing retained string. */
375: if (slen > 0)
376: cspace(&SS, s, slen, APPEND);
377: break;
378: default: /* Nth occurrence */
379: while (--n) {
380: s += match[0].rm_eo;
381: slen -= match[0].rm_eo;
382: if (!regexec_e(re, s, REG_NOTBOL, 0, slen))
383: return (0);
384: }
385: /* FALLTHROUGH */
386: case 1: /* 1st occurrence */
387: /* Locate start of replaced string. */
388: re_off = match[0].rm_so + (s - ps);
389: /* Copy leading retained string. */
390: cspace(&SS, ps, re_off, APPEND);
391: /* Add in regular expression. */
392: regsub(&SS, s, cp->u.s->new);
393: /* Copy trailing retained string. */
394: s += match[0].rm_eo;
395: slen -= match[0].rm_eo;
396: cspace(&SS, s, slen, APPEND);
397: break;
398: }
399:
400: /*
401: * Swap the substitute space and the pattern space, and make sure
402: * that any leftover pointers into stdio memory get lost.
403: */
404: tspace = PS;
405: PS = SS;
406: SS = tspace;
407: SS.space = SS.back;
408:
409: /* Handle the 'p' flag. */
410: if (cp->u.s->p)
411: OUT(ps)
412:
413: /* Handle the 'w' flag. */
414: if (cp->u.s->wfile && !pd) {
415: if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
416: O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
1.9 ! jsyn 417: err(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno));
1.1 deraadt 418: if (write(cp->u.s->wfd, ps, psl) != psl)
1.9 ! jsyn 419: err(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno));
1.1 deraadt 420: }
421: return (1);
422: }
423:
424: /*
425: * Flush append requests. Always called before reading a line,
426: * therefore it also resets the substitution done (sdone) flag.
427: */
428: static void
429: flush_appends()
430: {
431: FILE *f;
432: int count, i;
433: char buf[8 * 1024];
434:
435: for (i = 0; i < appendx; i++)
436: switch (appends[i].type) {
437: case AP_STRING:
438: fwrite(appends[i].s, sizeof(char), appends[i].len,
439: stdout);
440: break;
441: case AP_FILE:
442: /*
443: * Read files probably shouldn't be cached. Since
444: * it's not an error to read a non-existent file,
445: * it's possible that another program is interacting
446: * with the sed script through the file system. It
447: * would be truly bizarre, but possible. It's probably
448: * not that big a performance win, anyhow.
449: */
450: if ((f = fopen(appends[i].s, "r")) == NULL)
451: break;
1.3 deraadt 452: while ((count = fread(buf, sizeof(char), sizeof(buf), f)))
1.1 deraadt 453: (void)fwrite(buf, sizeof(char), count, stdout);
454: (void)fclose(f);
455: break;
456: }
457: if (ferror(stdout))
458: err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
459: appendx = sdone = 0;
460: }
461:
462: static void
463: lputs(s)
1.6 mpech 464: char *s;
1.1 deraadt 465: {
1.6 mpech 466: int count;
467: char *escapes, *p;
1.1 deraadt 468: struct winsize win;
469: static int termwidth = -1;
470:
471: if (termwidth == -1)
1.3 deraadt 472: if ((p = getenv("COLUMNS")))
1.1 deraadt 473: termwidth = atoi(p);
474: else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 &&
475: win.ws_col > 0)
476: termwidth = win.ws_col;
477: else
478: termwidth = 60;
479:
480: for (count = 0; *s; ++s) {
481: if (count >= termwidth) {
482: (void)printf("\\\n");
483: count = 0;
484: }
485: if (isascii(*s) && isprint(*s) && *s != '\\') {
486: (void)putchar(*s);
487: count++;
488: } else {
489: escapes = "\\\a\b\f\n\r\t\v";
490: (void)putchar('\\');
1.3 deraadt 491: if ((p = strchr(escapes, *s))) {
1.1 deraadt 492: (void)putchar("\\abfnrtv"[p - escapes]);
493: count += 2;
494: } else {
495: (void)printf("%03o", *(u_char *)s);
496: count += 4;
497: }
498: }
499: }
500: (void)putchar('$');
501: (void)putchar('\n');
502: if (ferror(stdout))
503: err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
504: }
505:
506: static inline int
507: regexec_e(preg, string, eflags, nomatch, slen)
508: regex_t *preg;
509: const char *string;
510: int eflags, nomatch;
511: size_t slen;
512: {
513: int eval;
514:
515: if (preg == NULL) {
516: if (defpreg == NULL)
517: err(FATAL, "first RE may not be empty");
518: } else
519: defpreg = preg;
520:
521: /* Set anchors, discounting trailing newline (if any). */
522: if (slen > 0 && string[slen - 1] == '\n')
523: slen--;
524: match[0].rm_so = 0;
525: match[0].rm_eo = slen;
526:
527: eval = regexec(defpreg, string,
528: nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
529: switch(eval) {
530: case 0:
531: return (1);
532: case REG_NOMATCH:
533: return (0);
534: }
535: err(FATAL, "RE error: %s", strregerror(eval, defpreg));
536: /* NOTREACHED */
537: }
538:
539: /*
540: * regsub - perform substitutions after a regexp match
541: * Based on a routine by Henry Spencer
542: */
543: static void
544: regsub(sp, string, src)
545: SPACE *sp;
546: char *string, *src;
547: {
1.6 mpech 548: int len, no;
549: char c, *dst;
1.1 deraadt 550:
551: #define NEEDSP(reqlen) \
552: if (sp->len >= sp->blen - (reqlen) - 1) { \
553: sp->blen += (reqlen) + 1024; \
554: sp->space = sp->back = xrealloc(sp->back, sp->blen); \
555: dst = sp->space + sp->len; \
556: }
557:
558: dst = sp->space + sp->len;
559: while ((c = *src++) != '\0') {
560: if (c == '&')
561: no = 0;
562: else if (c == '\\' && isdigit(*src))
563: no = *src++ - '0';
564: else
565: no = -1;
566: if (no < 0) { /* Ordinary character. */
567: if (c == '\\' && (*src == '\\' || *src == '&'))
568: c = *src++;
569: NEEDSP(1);
570: *dst++ = c;
571: ++sp->len;
572: } else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
573: len = match[no].rm_eo - match[no].rm_so;
574: NEEDSP(len);
575: memmove(dst, string + match[no].rm_so, len);
576: dst += len;
577: sp->len += len;
578: }
579: }
580: NEEDSP(1);
581: *dst = '\0';
582: }
583:
584: /*
585: * aspace --
586: * Append the source space to the destination space, allocating new
587: * space as necessary.
588: */
589: void
590: cspace(sp, p, len, spflag)
591: SPACE *sp;
592: char *p;
593: size_t len;
594: enum e_spflag spflag;
595: {
596: size_t tlen;
597:
598: /* Make sure SPACE has enough memory and ramp up quickly. */
599: tlen = sp->len + len + 1;
600: if (tlen > sp->blen) {
601: sp->blen = tlen + 1024;
602: sp->space = sp->back = xrealloc(sp->back, sp->blen);
603: }
604:
605: if (spflag == REPLACE)
606: sp->len = 0;
607:
608: memmove(sp->space + sp->len, p, len);
609:
610: sp->space[sp->len += len] = '\0';
611: }
612:
613: /*
614: * Close all cached opened files and report any errors
615: */
616: void
617: cfclose(cp, end)
1.6 mpech 618: struct s_command *cp, *end;
1.1 deraadt 619: {
620:
621: for (; cp != end; cp = cp->next)
622: switch(cp->code) {
623: case 's':
624: if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
625: err(FATAL,
626: "%s: %s", cp->u.s->wfile, strerror(errno));
627: cp->u.s->wfd = -1;
628: break;
629: case 'w':
630: if (cp->u.fd != -1 && close(cp->u.fd))
631: err(FATAL, "%s: %s", cp->t, strerror(errno));
632: cp->u.fd = -1;
633: break;
634: case '{':
635: cfclose(cp->u.c, cp->next);
636: break;
637: }
638: }