Annotation of src/usr.bin/sed/process.c, Revision 1.6
1.6 ! mpech 1: /* $OpenBSD: process.c,v 1.5 1999/06/06 15:42:24 deraadt Exp $ */
1.2 deraadt 2:
1.1 deraadt 3: /*-
4: * Copyright (c) 1992 Diomidis Spinellis.
5: * Copyright (c) 1992, 1993
6: * The Regents of the University of California. All rights reserved.
7: *
8: * This code is derived from software contributed to Berkeley by
9: * Diomidis Spinellis of Imperial College, University of London.
10: *
11: * Redistribution and use in source and binary forms, with or without
12: * modification, are permitted provided that the following conditions
13: * are met:
14: * 1. Redistributions of source code must retain the above copyright
15: * notice, this list of conditions and the following disclaimer.
16: * 2. Redistributions in binary form must reproduce the above copyright
17: * notice, this list of conditions and the following disclaimer in the
18: * documentation and/or other materials provided with the distribution.
19: * 3. All advertising materials mentioning features or use of this software
20: * must display the following acknowledgement:
21: * This product includes software developed by the University of
22: * California, Berkeley and its contributors.
23: * 4. Neither the name of the University nor the names of its contributors
24: * may be used to endorse or promote products derived from this software
25: * without specific prior written permission.
26: *
27: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37: * SUCH DAMAGE.
38: */
39:
40: #ifndef lint
41: /* from: static char sccsid[] = "@(#)process.c 8.1 (Berkeley) 6/6/93"; */
1.6 ! mpech 42: static char *rcsid = "$OpenBSD: process.c,v 1.5 1999/06/06 15:42:24 deraadt Exp $";
1.1 deraadt 43: #endif /* not lint */
44:
45: #include <sys/types.h>
46: #include <sys/stat.h>
47: #include <sys/ioctl.h>
48: #include <sys/uio.h>
49:
50: #include <ctype.h>
51: #include <errno.h>
52: #include <fcntl.h>
53: #include <limits.h>
54: #include <regex.h>
55: #include <stdio.h>
56: #include <stdlib.h>
57: #include <string.h>
58: #include <unistd.h>
59:
60: #include "defs.h"
61: #include "extern.h"
62:
63: static SPACE HS, PS, SS;
64: #define pd PS.deleted
65: #define ps PS.space
66: #define psl PS.len
67: #define hs HS.space
68: #define hsl HS.len
69:
70: static inline int applies __P((struct s_command *));
71: static void flush_appends __P((void));
72: static void lputs __P((char *));
73: static inline int regexec_e __P((regex_t *, const char *, int, int, size_t));
74: static void regsub __P((SPACE *, char *, char *));
75: static int substitute __P((struct s_command *));
76:
77: struct s_appends *appends; /* Array of pointers to strings to append. */
78: static int appendx; /* Index into appends array. */
79: int appendnum; /* Size of appends array. */
80:
81: static int lastaddr; /* Set by applies if last address of a range. */
82: static int sdone; /* If any substitutes since last line input. */
83: /* Iov structure for 'w' commands. */
84: static regex_t *defpreg;
85: size_t maxnsub;
86: regmatch_t *match;
87:
88: #define OUT(s) { fwrite(s, sizeof(u_char), psl, stdout); }
89:
90: void
91: process()
92: {
93: struct s_command *cp;
94: SPACE tspace;
95: size_t len, oldpsl;
96: char *p;
97:
98: for (linenum = 0; mf_fgets(&PS, REPLACE);) {
99: pd = 0;
100: top:
101: cp = prog;
102: redirect:
103: while (cp != NULL) {
104: if (!applies(cp)) {
105: cp = cp->next;
106: continue;
107: }
108: switch (cp->code) {
109: case '{':
110: cp = cp->u.c;
111: goto redirect;
112: case 'a':
113: if (appendx >= appendnum)
114: appends = xrealloc(appends,
115: sizeof(struct s_appends) *
116: (appendnum *= 2));
117: appends[appendx].type = AP_STRING;
118: appends[appendx].s = cp->t;
119: appends[appendx].len = strlen(cp->t);
120: appendx++;
121: break;
122: case 'b':
123: cp = cp->u.c;
124: goto redirect;
125: case 'c':
126: pd = 1;
127: psl = 0;
128: if (cp->a2 == NULL || lastaddr)
129: (void)printf("%s", cp->t);
130: break;
131: case 'd':
132: pd = 1;
133: goto new;
134: case 'D':
135: if (pd)
136: goto new;
137: if ((p = memchr(ps, '\n', psl - 1)) == NULL) {
138: pd = 1;
139: goto new;
140: } else {
141: psl -= (p + 1) - ps;
142: memmove(ps, p + 1, psl);
143: goto top;
144: }
145: case 'g':
146: cspace(&PS, hs, hsl, REPLACE);
147: break;
148: case 'G':
1.5 deraadt 149: if (hs == NULL)
150: cspace(&HS, "\n", 1, REPLACE);
1.1 deraadt 151: cspace(&PS, hs, hsl, 0);
152: break;
153: case 'h':
154: cspace(&HS, ps, psl, REPLACE);
155: break;
156: case 'H':
157: cspace(&HS, ps, psl, 0);
158: break;
159: case 'i':
160: (void)printf("%s", cp->t);
161: break;
162: case 'l':
163: lputs(ps);
164: break;
165: case 'n':
166: if (!nflag && !pd)
167: OUT(ps)
168: flush_appends();
169: if (!mf_fgets(&PS, REPLACE))
170: exit(0);
171: pd = 0;
172: break;
173: case 'N':
174: flush_appends();
175: if (!mf_fgets(&PS, 0)) {
176: if (!nflag && !pd)
177: OUT(ps)
178: exit(0);
179: }
180: break;
181: case 'p':
182: if (pd)
183: break;
184: OUT(ps)
185: break;
186: case 'P':
187: if (pd)
188: break;
189: if ((p = memchr(ps, '\n', psl - 1)) != NULL) {
190: oldpsl = psl;
191: psl = (p + 1) - ps;
192: }
193: OUT(ps)
194: if (p != NULL)
195: psl = oldpsl;
196: break;
197: case 'q':
198: if (!nflag && !pd)
199: OUT(ps)
200: flush_appends();
201: exit(0);
202: case 'r':
203: if (appendx >= appendnum)
204: appends = xrealloc(appends,
205: sizeof(struct s_appends) *
206: (appendnum *= 2));
207: appends[appendx].type = AP_FILE;
208: appends[appendx].s = cp->t;
209: appends[appendx].len = strlen(cp->t);
210: appendx++;
211: break;
212: case 's':
213: sdone |= substitute(cp);
214: break;
215: case 't':
216: if (sdone) {
217: sdone = 0;
218: cp = cp->u.c;
219: goto redirect;
220: }
221: break;
222: case 'w':
223: if (pd)
224: break;
225: if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
226: O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
227: DEFFILEMODE)) == -1)
228: err(FATAL, "%s: %s\n",
229: cp->t, strerror(errno));
230: if (write(cp->u.fd, ps, psl) != psl)
231: err(FATAL, "%s: %s\n",
232: cp->t, strerror(errno));
233: break;
234: case 'x':
235: if (hs == NULL)
236: cspace(&HS, "\n", 1, REPLACE);
237: tspace = PS;
238: PS = HS;
239: HS = tspace;
240: break;
241: case 'y':
242: if (pd)
243: break;
244: for (p = ps, len = psl; --len; ++p)
1.4 deraadt 245: *p = cp->u.y[(unsigned char)*p];
1.1 deraadt 246: break;
247: case ':':
248: case '}':
249: break;
250: case '=':
251: (void)printf("%lu\n", linenum);
252: }
253: cp = cp->next;
254: } /* for all cp */
255:
256: new: if (!nflag && !pd)
257: OUT(ps)
258: flush_appends();
259: } /* for all lines */
260: }
261:
262: /*
263: * TRUE if the address passed matches the current program state
264: * (lastline, linenumber, ps).
265: */
266: #define MATCH(a) \
267: (a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) : \
268: (a)->type == AT_LINE ? linenum == (a)->u.l : lastline
269:
270: /*
271: * Return TRUE if the command applies to the current line. Sets the inrange
272: * flag to process ranges. Interprets the non-select (``!'') flag.
273: */
274: static inline int
275: applies(cp)
276: struct s_command *cp;
277: {
278: int r;
279:
280: lastaddr = 0;
281: if (cp->a1 == NULL && cp->a2 == NULL)
282: r = 1;
283: else if (cp->a2)
284: if (cp->inrange) {
285: if (MATCH(cp->a2)) {
286: cp->inrange = 0;
287: lastaddr = 1;
288: }
289: r = 1;
290: } else if (MATCH(cp->a1)) {
291: /*
292: * If the second address is a number less than or
293: * equal to the line number first selected, only
294: * one line shall be selected.
295: * -- POSIX 1003.2
296: */
297: if (cp->a2->type == AT_LINE &&
298: linenum >= cp->a2->u.l)
299: lastaddr = 1;
300: else
301: cp->inrange = 1;
302: r = 1;
303: } else
304: r = 0;
305: else
306: r = MATCH(cp->a1);
307: return (cp->nonsel ? ! r : r);
308: }
309:
310: /*
311: * substitute --
312: * Do substitutions in the pattern space. Currently, we build a
313: * copy of the new pattern space in the substitute space structure
314: * and then swap them.
315: */
316: static int
317: substitute(cp)
318: struct s_command *cp;
319: {
320: SPACE tspace;
321: regex_t *re;
322: size_t re_off, slen;
323: int n, lastempty;
324: char *s;
325:
326: s = ps;
327: re = cp->u.s->re;
328: if (re == NULL) {
329: if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
330: linenum = cp->u.s->linenum;
331: err(COMPILE, "\\%d not defined in the RE",
332: cp->u.s->maxbref);
333: }
334: }
335: if (!regexec_e(re, s, 0, 0, psl))
336: return (0);
337:
338: SS.len = 0; /* Clean substitute space. */
339: slen = psl;
340: n = cp->u.s->n;
341: lastempty = 1;
342:
343: switch (n) {
344: case 0: /* Global */
345: do {
346: if (lastempty || match[0].rm_so != match[0].rm_eo) {
347: /* Locate start of replaced string. */
348: re_off = match[0].rm_so;
349: /* Copy leading retained string. */
350: cspace(&SS, s, re_off, APPEND);
351: /* Add in regular expression. */
352: regsub(&SS, s, cp->u.s->new);
353: }
354:
355: /* Move past this match. */
356: if (match[0].rm_so != match[0].rm_eo) {
357: s += match[0].rm_eo;
358: slen -= match[0].rm_eo;
359: lastempty = 0;
360: } else {
361: if (match[0].rm_so == 0)
362: cspace(&SS, s, match[0].rm_so + 1,
363: APPEND);
364: else
365: cspace(&SS, s + match[0].rm_so, 1,
366: APPEND);
367: s += match[0].rm_so + 1;
368: slen -= match[0].rm_so + 1;
369: lastempty = 1;
370: }
371: } while (slen > 0 && regexec_e(re, s, REG_NOTBOL, 0, slen));
372: /* Copy trailing retained string. */
373: if (slen > 0)
374: cspace(&SS, s, slen, APPEND);
375: break;
376: default: /* Nth occurrence */
377: while (--n) {
378: s += match[0].rm_eo;
379: slen -= match[0].rm_eo;
380: if (!regexec_e(re, s, REG_NOTBOL, 0, slen))
381: return (0);
382: }
383: /* FALLTHROUGH */
384: case 1: /* 1st occurrence */
385: /* Locate start of replaced string. */
386: re_off = match[0].rm_so + (s - ps);
387: /* Copy leading retained string. */
388: cspace(&SS, ps, re_off, APPEND);
389: /* Add in regular expression. */
390: regsub(&SS, s, cp->u.s->new);
391: /* Copy trailing retained string. */
392: s += match[0].rm_eo;
393: slen -= match[0].rm_eo;
394: cspace(&SS, s, slen, APPEND);
395: break;
396: }
397:
398: /*
399: * Swap the substitute space and the pattern space, and make sure
400: * that any leftover pointers into stdio memory get lost.
401: */
402: tspace = PS;
403: PS = SS;
404: SS = tspace;
405: SS.space = SS.back;
406:
407: /* Handle the 'p' flag. */
408: if (cp->u.s->p)
409: OUT(ps)
410:
411: /* Handle the 'w' flag. */
412: if (cp->u.s->wfile && !pd) {
413: if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
414: O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
415: err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno));
416: if (write(cp->u.s->wfd, ps, psl) != psl)
417: err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno));
418: }
419: return (1);
420: }
421:
422: /*
423: * Flush append requests. Always called before reading a line,
424: * therefore it also resets the substitution done (sdone) flag.
425: */
426: static void
427: flush_appends()
428: {
429: FILE *f;
430: int count, i;
431: char buf[8 * 1024];
432:
433: for (i = 0; i < appendx; i++)
434: switch (appends[i].type) {
435: case AP_STRING:
436: fwrite(appends[i].s, sizeof(char), appends[i].len,
437: stdout);
438: break;
439: case AP_FILE:
440: /*
441: * Read files probably shouldn't be cached. Since
442: * it's not an error to read a non-existent file,
443: * it's possible that another program is interacting
444: * with the sed script through the file system. It
445: * would be truly bizarre, but possible. It's probably
446: * not that big a performance win, anyhow.
447: */
448: if ((f = fopen(appends[i].s, "r")) == NULL)
449: break;
1.3 deraadt 450: while ((count = fread(buf, sizeof(char), sizeof(buf), f)))
1.1 deraadt 451: (void)fwrite(buf, sizeof(char), count, stdout);
452: (void)fclose(f);
453: break;
454: }
455: if (ferror(stdout))
456: err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
457: appendx = sdone = 0;
458: }
459:
460: static void
461: lputs(s)
1.6 ! mpech 462: char *s;
1.1 deraadt 463: {
1.6 ! mpech 464: int count;
! 465: char *escapes, *p;
1.1 deraadt 466: struct winsize win;
467: static int termwidth = -1;
468:
469: if (termwidth == -1)
1.3 deraadt 470: if ((p = getenv("COLUMNS")))
1.1 deraadt 471: termwidth = atoi(p);
472: else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 &&
473: win.ws_col > 0)
474: termwidth = win.ws_col;
475: else
476: termwidth = 60;
477:
478: for (count = 0; *s; ++s) {
479: if (count >= termwidth) {
480: (void)printf("\\\n");
481: count = 0;
482: }
483: if (isascii(*s) && isprint(*s) && *s != '\\') {
484: (void)putchar(*s);
485: count++;
486: } else {
487: escapes = "\\\a\b\f\n\r\t\v";
488: (void)putchar('\\');
1.3 deraadt 489: if ((p = strchr(escapes, *s))) {
1.1 deraadt 490: (void)putchar("\\abfnrtv"[p - escapes]);
491: count += 2;
492: } else {
493: (void)printf("%03o", *(u_char *)s);
494: count += 4;
495: }
496: }
497: }
498: (void)putchar('$');
499: (void)putchar('\n');
500: if (ferror(stdout))
501: err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
502: }
503:
504: static inline int
505: regexec_e(preg, string, eflags, nomatch, slen)
506: regex_t *preg;
507: const char *string;
508: int eflags, nomatch;
509: size_t slen;
510: {
511: int eval;
512:
513: if (preg == NULL) {
514: if (defpreg == NULL)
515: err(FATAL, "first RE may not be empty");
516: } else
517: defpreg = preg;
518:
519: /* Set anchors, discounting trailing newline (if any). */
520: if (slen > 0 && string[slen - 1] == '\n')
521: slen--;
522: match[0].rm_so = 0;
523: match[0].rm_eo = slen;
524:
525: eval = regexec(defpreg, string,
526: nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
527: switch(eval) {
528: case 0:
529: return (1);
530: case REG_NOMATCH:
531: return (0);
532: }
533: err(FATAL, "RE error: %s", strregerror(eval, defpreg));
534: /* NOTREACHED */
535: }
536:
537: /*
538: * regsub - perform substitutions after a regexp match
539: * Based on a routine by Henry Spencer
540: */
541: static void
542: regsub(sp, string, src)
543: SPACE *sp;
544: char *string, *src;
545: {
1.6 ! mpech 546: int len, no;
! 547: char c, *dst;
1.1 deraadt 548:
549: #define NEEDSP(reqlen) \
550: if (sp->len >= sp->blen - (reqlen) - 1) { \
551: sp->blen += (reqlen) + 1024; \
552: sp->space = sp->back = xrealloc(sp->back, sp->blen); \
553: dst = sp->space + sp->len; \
554: }
555:
556: dst = sp->space + sp->len;
557: while ((c = *src++) != '\0') {
558: if (c == '&')
559: no = 0;
560: else if (c == '\\' && isdigit(*src))
561: no = *src++ - '0';
562: else
563: no = -1;
564: if (no < 0) { /* Ordinary character. */
565: if (c == '\\' && (*src == '\\' || *src == '&'))
566: c = *src++;
567: NEEDSP(1);
568: *dst++ = c;
569: ++sp->len;
570: } else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
571: len = match[no].rm_eo - match[no].rm_so;
572: NEEDSP(len);
573: memmove(dst, string + match[no].rm_so, len);
574: dst += len;
575: sp->len += len;
576: }
577: }
578: NEEDSP(1);
579: *dst = '\0';
580: }
581:
582: /*
583: * aspace --
584: * Append the source space to the destination space, allocating new
585: * space as necessary.
586: */
587: void
588: cspace(sp, p, len, spflag)
589: SPACE *sp;
590: char *p;
591: size_t len;
592: enum e_spflag spflag;
593: {
594: size_t tlen;
595:
596: /* Make sure SPACE has enough memory and ramp up quickly. */
597: tlen = sp->len + len + 1;
598: if (tlen > sp->blen) {
599: sp->blen = tlen + 1024;
600: sp->space = sp->back = xrealloc(sp->back, sp->blen);
601: }
602:
603: if (spflag == REPLACE)
604: sp->len = 0;
605:
606: memmove(sp->space + sp->len, p, len);
607:
608: sp->space[sp->len += len] = '\0';
609: }
610:
611: /*
612: * Close all cached opened files and report any errors
613: */
614: void
615: cfclose(cp, end)
1.6 ! mpech 616: struct s_command *cp, *end;
1.1 deraadt 617: {
618:
619: for (; cp != end; cp = cp->next)
620: switch(cp->code) {
621: case 's':
622: if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
623: err(FATAL,
624: "%s: %s", cp->u.s->wfile, strerror(errno));
625: cp->u.s->wfd = -1;
626: break;
627: case 'w':
628: if (cp->u.fd != -1 && close(cp->u.fd))
629: err(FATAL, "%s: %s", cp->t, strerror(errno));
630: cp->u.fd = -1;
631: break;
632: case '{':
633: cfclose(cp->u.c, cp->next);
634: break;
635: }
636: }