src/usr.bin/vi/ex/ex_subst.c - view

Return to ex_subst.c CVS log

Up to [local] / src / usr.bin / vi / ex

File: [local] / src / usr.bin / vi / ex / ex_subst.c (download)

Revision 1.31, Fri Jun 23 15:06:45 2023 UTC (11 months, 2 weeks ago) by millert
Branch: MAIN
CVS Tags: OPENBSD_7_5_BASE, OPENBSD_7_5, OPENBSD_7_4_BASE, OPENBSD_7_4, HEAD
Changes since 1.30: +4 -2 lines

Fix a bug in ex's 's' command with the 'c' flag when 'number' is off.
The underlining was positioned in the wrong place.  This fixes
the problem and matches historic ex behavior.  OK op@

/*	$OpenBSD: ex_subst.c,v 1.31 2023/06/23 15:06:45 millert Exp $	*/

/*-
 * Copyright (c) 1992, 1993, 1994
 *	The Regents of the University of California.  All rights reserved.
 * Copyright (c) 1992, 1993, 1994, 1995, 1996
 *	Keith Bostic.  All rights reserved.
 *
 * See the LICENSE file for redistribution information.
 */

#include "config.h"

#include <sys/queue.h>
#include <sys/time.h>

#include <bitstring.h>
#include <ctype.h>
#include <errno.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#include "../common/common.h"
#include "../vi/vi.h"

#define MAXIMUM(a, b)	(((a) > (b)) ? (a) : (b))

#define	SUB_FIRST	0x01		/* The 'r' flag isn't reasonable. */
#define	SUB_MUSTSETR	0x02		/* The 'r' flag is required. */

static int re_conv(SCR *, char **, size_t *, int *);
static int re_sub(SCR *, char *, char **, size_t *, size_t *, regmatch_t [10]);
static int re_tag_conv(SCR *, char **, size_t *, int *);
static int s(SCR *, EXCMD *, char *, regex_t *, u_int);

/*
 * ex_s --
 *	[line [,line]] s[ubstitute] [[/;]pat[/;]/repl[/;] [cgr] [count] [#lp]]
 *
 *	Substitute on lines matching a pattern.
 *
 * PUBLIC: int ex_s(SCR *, EXCMD *);
 */
int
ex_s(SCR *sp, EXCMD *cmdp)
{
	regex_t *re;
	size_t blen, len;
	u_int flags;
	int delim;
	char *bp, *ptrn, *rep, *p, *t;

	/*
	 * Skip leading white space.
	 *
	 * !!!
	 * Historic vi allowed any non-alphanumeric to serve as the
	 * substitution command delimiter.
	 *
	 * !!!
	 * If the arguments are empty, it's the same as &, i.e. we
	 * repeat the last substitution.
	 */
	if (cmdp->argc == 0)
		goto subagain;
	for (p = cmdp->argv[0]->bp,
	    len = cmdp->argv[0]->len; len > 0; --len, ++p) {
		if (!isblank(*p))
			break;
	}
	if (len == 0)
subagain:	return (ex_subagain(sp, cmdp));

	delim = *p++;
	if (isalnum(delim) || delim == '\\')
		return (s(sp, cmdp, p, &sp->subre_c, SUB_MUSTSETR));

	/*
	 * !!!
	 * The full-blown substitute command reset the remembered
	 * state of the 'c' and 'g' suffices.
	 */
	sp->c_suffix = sp->g_suffix = 0;

	/*
	 * Get the pattern string, toss escaping characters.
	 *
	 * !!!
	 * Historic vi accepted any of the following forms:
	 *
	 *	:s/abc/def/		change "abc" to "def"
	 *	:s/abc/def		change "abc" to "def"
	 *	:s/abc/			delete "abc"
	 *	:s/abc			delete "abc"
	 *
	 * QUOTING NOTE:
	 *
	 * Only toss an escaping character if it escapes a delimiter.
	 * This means that "s/A/\\\\f" replaces "A" with "\\f".  It
	 * would be nice to be more regular, i.e. for each layer of
	 * escaping a single escaping character is removed, but that's
	 * not how the historic vi worked.
	 */
	for (ptrn = t = p;;) {
		if (p[0] == '\0' || p[0] == delim) {
			if (p[0] == delim)
				++p;
			/*
			 * !!!
			 * Nul terminate the pattern string -- it's passed
			 * to regcomp which doesn't understand anything else.
			 */
			*t = '\0';
			break;
		}
		if (p[0] == '\\') {
			if (p[1] == delim)
				++p;
			else if (p[1] == '\\')
				*t++ = *p++;
		}
		*t++ = *p++;
	}

	/*
	 * If the pattern string is empty, use the last RE (not just the
	 * last substitution RE).
	 */
	if (*ptrn == '\0') {
		if (sp->re == NULL) {
			ex_emsg(sp, NULL, EXM_NOPREVRE);
			return (1);
		}

		/* Re-compile the RE if necessary. */
		if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp,
		    sp->re, sp->re_len, NULL, NULL, &sp->re_c, RE_C_SEARCH))
			return (1);
		flags = 0;
	} else {
		/*
		 * !!!
		 * Compile the RE.  Historic practice is that substitutes set
		 * the search direction as well as both substitute and search
		 * RE's.  We compile the RE twice, as we don't want to bother
		 * ref counting the pattern string and (opaque) structure.
		 */
		if (re_compile(sp, ptrn, t - ptrn,
		    &sp->re, &sp->re_len, &sp->re_c, RE_C_SEARCH))
			return (1);
		if (re_compile(sp, ptrn, t - ptrn,
		    &sp->subre, &sp->subre_len, &sp->subre_c, RE_C_SUBST))
			return (1);
		
		flags = SUB_FIRST;
		sp->searchdir = FORWARD;
	}
	re = &sp->re_c;

	/*
	 * Get the replacement string.
	 *
	 * The special character & (\& if O_MAGIC not set) matches the
	 * entire RE.  No handling of & is required here, it's done by
	 * re_sub().
	 *
	 * The special character ~ (\~ if O_MAGIC not set) inserts the
	 * previous replacement string into this replacement string.
	 * Count ~'s to figure out how much space we need.  We could
	 * special case nonexistent last patterns or whether or not
	 * O_MAGIC is set, but it's probably not worth the effort.
	 *
	 * QUOTING NOTE:
	 *
	 * Only toss an escaping character if it escapes a delimiter or
	 * if O_MAGIC is set and it escapes a tilde.
	 *
	 * !!!
	 * If the entire replacement pattern is "%", then use the last
	 * replacement pattern.  This semantic was added to vi in System
	 * V and then percolated elsewhere, presumably around the time
	 * that it was added to their version of ed(1).
	 */
	if (p[0] == '\0' || p[0] == delim) {
		if (p[0] == delim)
			++p;
		free(sp->repl);
		sp->repl = NULL;
		sp->repl_len = 0;
	} else if (p[0] == '%' && (p[1] == '\0' || p[1] == delim))
		p += p[1] == delim ? 2 : 1;
	else {
		for (rep = p, len = 0;
		    p[0] != '\0' && p[0] != delim; ++p, ++len)
			if (p[0] == '~')
				len += sp->repl_len;
		GET_SPACE_RET(sp, bp, blen, len);
		for (t = bp, len = 0, p = rep;;) {
			if (p[0] == '\0' || p[0] == delim) {
				if (p[0] == delim)
					++p;
				break;
			}
			if (p[0] == '\\') {
				if (p[1] == delim)
					++p;
				else if (p[1] == '\\') {
					*t++ = *p++;
					++len;
				} else if (p[1] == '~') {
					++p;
					if (!O_ISSET(sp, O_MAGIC))
						goto tilde;
				}
			} else if (p[0] == '~' && O_ISSET(sp, O_MAGIC)) {
tilde:				++p;
				memcpy(t, sp->repl, sp->repl_len);
				t += sp->repl_len;
				len += sp->repl_len;
				continue;
			}
			*t++ = *p++;
			++len;
		}
		if ((sp->repl_len = len) != 0) {
			free(sp->repl);
			if ((sp->repl = malloc(len)) == NULL) {
				msgq(sp, M_SYSERR, NULL);
				FREE_SPACE(sp, bp, blen);
				return (1);
			}
			memcpy(sp->repl, bp, len);
		}
		FREE_SPACE(sp, bp, blen);
	}
	return (s(sp, cmdp, p, re, flags));
}

/*
 * ex_subagain --
 *	[line [,line]] & [cgr] [count] [#lp]]
 *
 *	Substitute using the last substitute RE and replacement pattern.
 *
 * PUBLIC: int ex_subagain(SCR *, EXCMD *);
 */
int
ex_subagain(SCR *sp, EXCMD *cmdp)
{
	if (sp->subre == NULL) {
		ex_emsg(sp, NULL, EXM_NOPREVRE);
		return (1);
	}
	if (!F_ISSET(sp, SC_RE_SUBST) && re_compile(sp,
	    sp->subre, sp->subre_len, NULL, NULL, &sp->subre_c, RE_C_SUBST))
		return (1);
	return (s(sp,
	    cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->subre_c, 0));
}

/*
 * ex_subtilde --
 *	[line [,line]] ~ [cgr] [count] [#lp]]
 *
 *	Substitute using the last RE and last substitute replacement pattern.
 *
 * PUBLIC: int ex_subtilde(SCR *, EXCMD *);
 */
int
ex_subtilde(SCR *sp, EXCMD *cmdp)
{
	if (sp->re == NULL) {
		ex_emsg(sp, NULL, EXM_NOPREVRE);
		return (1);
	}
	if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp,
	    sp->re, sp->re_len, NULL, NULL, &sp->re_c, RE_C_SEARCH))
		return (1);
	return (s(sp,
	    cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->re_c, 0));
}

/*
 * s --
 * Do the substitution.  This stuff is *really* tricky.  There are lots of
 * special cases, and general nastiness.  Don't mess with it unless you're
 * pretty confident.
 * 
 * The nasty part of the substitution is what happens when the replacement
 * string contains newlines.  It's a bit tricky -- consider the information
 * that has to be retained for "s/f\(o\)o/^M\1^M\1/".  The solution here is
 * to build a set of newline offsets which we use to break the line up later,
 * when the replacement is done.  Don't change it unless you're *damned*
 * confident.
 */
#define	NEEDNEWLINE(sp) {						\
	if ((sp)->newl_len == (sp)->newl_cnt) {				\
		(sp)->newl_len += 25;					\
		REALLOCARRAY((sp), (sp)->newl,				\
		    (sp)->newl_len, sizeof(size_t));			\
		if ((sp)->newl == NULL) {				\
			(sp)->newl_len = 0;				\
			return (1);					\
		}							\
	}								\
}

#define	BUILD(sp, l, len) {						\
	if (lbclen + (len) > lblen) {					\
		lblen += MAXIMUM(lbclen + (len), 256);			\
		REALLOC((sp), lb, lblen);				\
		if (lb == NULL) {					\
			lbclen = 0;					\
			return (1);					\
		}							\
	}								\
	memcpy(lb + lbclen, (l), (len));				\
	lbclen += (len);						\
}

#define	NEEDSP(sp, len, pnt) {						\
	if (lbclen + (len) > lblen) {					\
		lblen += MAXIMUM(lbclen + (len), 256);			\
		REALLOC((sp), lb, lblen);				\
		if (lb == NULL) {					\
			lbclen = 0;					\
			return (1);					\
		}							\
		(pnt) = lb + lbclen;					\
	}								\
}

static int
s(SCR *sp, EXCMD *cmdp, char *s, regex_t *re, u_int flags)
{
	EVENT ev;
	MARK from, to;
	TEXTH tiq;
	recno_t elno, lno, slno;
	regmatch_t match[10];
	size_t blen, cnt, last, lbclen, lblen, len, llen;
	size_t offset, saved_offset, scno;
	int lflag, nflag, pflag, rflag;
	int didsub, do_eol_match, eflags, nempty, eval;
	int linechanged, matched, quit, rval;
	unsigned long ul;
	char *bp, *lb;

	NEEDFILE(sp, cmdp);

	slno = sp->lno;
	scno = sp->cno;

	/*
	 * !!!
	 * Historically, the 'g' and 'c' suffices were always toggled as flags,
	 * so ":s/A/B/" was the same as ":s/A/B/ccgg".  If O_EDCOMPATIBLE was
	 * not set, they were initialized to 0 for all substitute commands.  If
	 * O_EDCOMPATIBLE was set, they were initialized to 0 only if the user
	 * specified substitute/replacement patterns (see ex_s()).
	 */
	if (!O_ISSET(sp, O_EDCOMPATIBLE))
		sp->c_suffix = sp->g_suffix = 0;

	/*
	 * Historic vi permitted the '#', 'l' and 'p' options in vi mode, but
	 * it only displayed the last change.  I'd disallow them, but they are
	 * useful in combination with the [v]global commands.  In the current
	 * model the problem is combining them with the 'c' flag -- the screen
	 * would have to flip back and forth between the confirm screen and the
	 * ex print screen, which would be pretty awful.  We do display all
	 * changes, though, for what that's worth.
	 *
	 * !!!
	 * Historic vi was fairly strict about the order of "options", the
	 * count, and "flags".  I'm somewhat fuzzy on the difference between
	 * options and flags, anyway, so this is a simpler approach, and we
	 * just take it them in whatever order the user gives them.  (The ex
	 * usage statement doesn't reflect this.)
	 */
	lflag = nflag = pflag = rflag = 0;
	if (s == NULL)
		goto noargs;
	for (lno = OOBLNO; *s != '\0'; ++s)
		switch (*s) {
		case ' ':
		case '\t':
			continue;
		case '+':
			++cmdp->flagoff;
			break;
		case '-':
			--cmdp->flagoff;
			break;
		case '0': case '1': case '2': case '3': case '4':
		case '5': case '6': case '7': case '8': case '9':
			if (lno != OOBLNO)
				goto usage;
			errno = 0;
			if ((ul = strtoul(s, &s, 10)) >= UINT_MAX)
				errno = ERANGE;
			if (*s == '\0')		/* Loop increment correction. */
				--s;
			if (errno == ERANGE) {
				if (ul >= UINT_MAX)
					msgq(sp, M_ERR, "Count overflow");
				else
					msgq(sp, M_SYSERR, NULL);
				return (1);
			}
			lno = (recno_t)ul;
			/*
			 * In historic vi, the count was inclusive from the
			 * second address.
			 */
			cmdp->addr1.lno = cmdp->addr2.lno;
			cmdp->addr2.lno += lno - 1;
			if (!db_exist(sp, cmdp->addr2.lno) &&
			    db_last(sp, &cmdp->addr2.lno))
				return (1);
			break;
		case '#':
			nflag = 1;
			break;
		case 'c':
			sp->c_suffix = !sp->c_suffix;

			/* Ex text structure initialization. */
			if (F_ISSET(sp, SC_EX)) {
				memset(&tiq, 0, sizeof(TEXTH));
				TAILQ_INIT(&tiq);
			}
			break;
		case 'g':
			sp->g_suffix = !sp->g_suffix;
			break;
		case 'l':
			lflag = 1;
			break;
		case 'p':
			pflag = 1;
			break;
		case 'r':
			if (LF_ISSET(SUB_FIRST)) {
				msgq(sp, M_ERR,
		    "Regular expression specified; r flag meaningless");
				return (1);
			}
			if (!F_ISSET(sp, SC_RE_SEARCH)) {
				ex_emsg(sp, NULL, EXM_NOPREVRE);
				return (1);
			}
			rflag = 1;
			re = &sp->re_c;
			break;
		default:
			goto usage;
		}

	if (*s != '\0' || (!rflag && LF_ISSET(SUB_MUSTSETR))) {
usage:		ex_emsg(sp, cmdp->cmd->usage, EXM_USAGE);
		return (1);
	}

noargs:	if (F_ISSET(sp, SC_VI) && sp->c_suffix && (lflag || nflag || pflag)) {
		msgq(sp, M_ERR,
"The #, l and p flags may not be combined with the c flag in vi mode");
		return (1);
	}

	/*
	 * bp:		if interactive, line cache
	 * blen:	if interactive, line cache length
	 * lb:		build buffer pointer.
	 * lbclen:	current length of built buffer.
	 * lblen;	length of build buffer.
	 */
	bp = lb = NULL;
	blen = lbclen = lblen = 0;

	/* For each line... */
	for (matched = quit = 0, lno = cmdp->addr1.lno,
	    elno = cmdp->addr2.lno; !quit && lno <= elno; ++lno) {

		/* Someone's unhappy, time to stop. */
		if (INTERRUPTED(sp))
			break;

		/* Get the line. */
		if (db_get(sp, lno, DBG_FATAL, &s, &llen))
			goto err;

		/*
		 * Make a local copy if doing confirmation -- when calling
		 * the confirm routine we're likely to lose the cached copy.
		 */
		if (sp->c_suffix) {
			if (bp == NULL) {
				GET_SPACE_RET(sp, bp, blen, llen);
			} else
				ADD_SPACE_RET(sp, bp, blen, llen);
			memcpy(bp, s, llen);
			s = bp;
		}

		/* Start searching from the beginning. */
		offset = 0;
		len = llen;

		/* Reset the build buffer offset. */
		lbclen = 0;

		/* Reset empty match test variable. */
		nempty = -1;

		/*
		 * We don't want to have to do a setline if the line didn't
		 * change -- keep track of whether or not this line changed.
		 * If doing confirmations, don't want to keep setting the
		 * line if change is refused -- keep track of substitutions.
		 */
		didsub = linechanged = 0;

		/* New line, do an EOL match. */
		do_eol_match = 1;

		/* It's not nul terminated, but we pretend it is. */
		eflags = REG_STARTEND;

		/* The search area is from s + offset to the EOL.  */
nextmatch:	match[0].rm_so = offset;
		match[0].rm_eo = llen;

		/* Get the next match. */
		eval = regexec(re, (char *)s, 10, match, eflags);

		/*
		 * There wasn't a match or if there was an error, deal with
		 * it.  If there was a previous match in this line, resolve
		 * the changes into the database.  Otherwise, just move on.
		 */
		if (eval == REG_NOMATCH)
			goto endmatch;
		if (eval != 0) {
			re_error(sp, eval, re);
			goto err;
		}
		matched = 1;

		/* Only the first search can match an anchored expression. */
		eflags |= REG_NOTBOL;

		/*
		 * !!!
		 * It's possible to match 0-length strings -- for example, the
		 * command s;a*;X;, when matched against the string "aabb" will
		 * result in "XbXbX", i.e. the matches are "aa", the space
		 * between the b's and the space between the b's and the end of
		 * the string.  There is a similar space between the beginning
		 * of the string and the a's.  The rule that we use (because vi
		 * historically used it) is that any 0-length match, occurring
		 * immediately after a match, is ignored.  Otherwise, the above
		 * example would have resulted in "XXbXbX".  Another example is
		 * incorrectly using " *" to replace groups of spaces with one
		 * space.
		 *
		 * If the match is empty and at the same place as the end of the
		 * previous match, ignore the match and move forward.  If
		 * there's no more characters in the string, we were
		 * attempting to match after the last character, so quit.
		 */
		if (match[0].rm_so == nempty && match[0].rm_eo == nempty) {
			nempty = -1;
			if (len == 0)
				goto endmatch;
			BUILD(sp, s + offset, 1)
			++offset;
			--len;
			goto nextmatch;
		}

		/* Confirm change. */
		if (sp->c_suffix) {
			/*
			 * Set the cursor position for confirmation.  Note,
			 * if we matched on a '$', the cursor may be past
			 * the end of line.
			 */
			from.lno = to.lno = lno;
			from.cno = match[0].rm_so;
			to.cno = match[0].rm_eo;
			/*
			 * Both ex and vi have to correct for a change before
			 * the first character in the line.
			 */
			if (llen == 0)
				from.cno = to.cno = 0;
			if (F_ISSET(sp, SC_VI)) {
				/*
				 * Only vi has to correct for a change after
				 * the last character in the line.
				 *
				 * XXX
				 * It would be nice to change the vi code so
				 * that we could display a cursor past EOL.
				 */
				if (to.cno >= llen)
					to.cno = llen - 1;
				if (from.cno >= llen)
					from.cno = llen - 1;

				sp->lno = from.lno;
				sp->cno = from.cno;
				if (vs_refresh(sp, 1))
					goto err;

				vs_update(sp, "Confirm change? [n]", NULL);

				if (v_event_get(sp, &ev, 0, 0))
					goto err;
				switch (ev.e_event) {
				case E_CHARACTER:
					break;
				case E_EOF:
				case E_ERR:
				case E_INTERRUPT:
					goto lquit;
				default:
					v_event_err(sp, &ev);
					goto lquit;
				}
			} else {
				const int flags =
				    O_ISSET(sp, O_NUMBER) ? E_C_HASH : 0;
				if (ex_print(sp, cmdp, &from, &to, flags) ||
				    ex_scprint(sp, &from, &to))
					goto lquit;
				if (ex_txt(sp, &tiq, 0, TXT_CR))
					goto err;
				ev.e_c = TAILQ_FIRST(&tiq)->lb[0];
			}

			switch (ev.e_c) {
			case CH_YES:
				break;
			default:
			case CH_NO:
				didsub = 0;
				BUILD(sp, s + offset, match[0].rm_eo - offset);
				goto skip;
			case CH_QUIT:
				/* Set the quit/interrupted flags. */
lquit:				quit = 1;
				F_SET(sp->gp, G_INTERRUPTED);

				/*
				 * Resolve any changes, then return to (and
				 * exit from) the main loop.
				 */
				goto endmatch;
			}
		}

		/*
		 * Set the cursor to the last position changed, converting
		 * from 1-based to 0-based.
		 */
		sp->lno = lno;
		sp->cno = match[0].rm_so;

		/* Copy the bytes before the match into the build buffer. */
		BUILD(sp, s + offset, match[0].rm_so - offset);

		/* Substitute the matching bytes. */
		didsub = 1;
		if (re_sub(sp, s, &lb, &lbclen, &lblen, match))
			goto err;

		/* Set the change flag so we know this line was modified. */
		linechanged = 1;

		/* Move past the matched bytes. */
skip:		offset = match[0].rm_eo;
		len = llen - match[0].rm_eo;

		/* A match cannot be followed by an empty pattern. */
		nempty = match[0].rm_eo;

		/*
		 * If doing a global change with confirmation, we have to
		 * update the screen.  The basic idea is to store the line
		 * so the screen update routines can find it, and restart.
		 */
		if (didsub && sp->c_suffix && sp->g_suffix) {
			/*
			 * The new search offset will be the end of the
			 * modified line.
			 */
			saved_offset = lbclen;

			/* Copy the rest of the line. */
			if (len)
				BUILD(sp, s + offset, len)

			/* Set the new offset. */
			offset = saved_offset;

			/* Store inserted lines, adjusting the build buffer. */
			last = 0;
			if (sp->newl_cnt) {
				for (cnt = 0;
				    cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
					if (db_insert(sp, lno,
					    lb + last, sp->newl[cnt] - last))
						goto err;
					last = sp->newl[cnt] + 1;
					++sp->rptlines[L_ADDED];
				}
				lbclen -= last;
				offset -= last;
				sp->newl_cnt = 0;
			}

			/* Store and retrieve the line. */
			if (db_set(sp, lno, lb + last, lbclen))
				goto err;
			if (db_get(sp, lno, DBG_FATAL, &s, &llen))
				goto err;
			ADD_SPACE_RET(sp, bp, blen, llen)
			memcpy(bp, s, llen);
			s = bp;
			len = llen - offset;

			/* Restart the build. */
			lbclen = 0;
			BUILD(sp, s, offset);

			/*
			 * If we haven't already done the after-the-string
			 * match, do one.  Set REG_NOTEOL so the '$' pattern
			 * only matches once.
			 */
			if (!do_eol_match)
				goto endmatch;
			if (offset == len) {
				do_eol_match = 0;
				eflags |= REG_NOTEOL;
			}
			goto nextmatch;
		}

		/*
		 * If it's a global:
		 *
		 * If at the end of the string, do a test for the after
		 * the string match.  Set REG_NOTEOL so the '$' pattern
		 * only matches once.
		 */
		if (sp->g_suffix && do_eol_match) {
			if (len == 0) {
				do_eol_match = 0;
				eflags |= REG_NOTEOL;
			}
			goto nextmatch;
		}

endmatch:	if (!linechanged)
			continue;

		/* Copy any remaining bytes into the build buffer. */
		if (len)
			BUILD(sp, s + offset, len)

		/* Store inserted lines, adjusting the build buffer. */
		last = 0;
		if (sp->newl_cnt) {
			for (cnt = 0;
			    cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
				if (db_insert(sp,
				    lno, lb + last, sp->newl[cnt] - last))
					goto err;
				last = sp->newl[cnt] + 1;
				++sp->rptlines[L_ADDED];
			}
			lbclen -= last;
			sp->newl_cnt = 0;
		}

		/* Store the changed line. */
		if (db_set(sp, lno, lb + last, lbclen))
			goto err;

		/* Update changed line counter. */
		if (sp->rptlchange != lno) {
			sp->rptlchange = lno;
			++sp->rptlines[L_CHANGED];
		}

		/*
		 * !!!
		 * Display as necessary.  Historic practice is to only
		 * display the last line of a line split into multiple
		 * lines.
		 */
		if (lflag || nflag || pflag) {
			from.lno = to.lno = lno;
			from.cno = to.cno = 0;
			if (lflag)
				(void)ex_print(sp, cmdp, &from, &to, E_C_LIST);
			if (nflag)
				(void)ex_print(sp, cmdp, &from, &to, E_C_HASH);
			if (pflag)
				(void)ex_print(sp, cmdp, &from, &to, E_C_PRINT);
		}
	}

	/*
	 * !!!
	 * Historically, vi attempted to leave the cursor at the same place if
	 * the substitution was done at the current cursor position.  Otherwise
	 * it moved it to the first non-blank of the last line changed.  There
	 * were some problems: for example, :s/$/foo/ with the cursor on the
	 * last character of the line left the cursor on the last character, or
	 * the & command with multiple occurrences of the matching string in the
	 * line usually left the cursor in a fairly random position.
	 *
	 * We try to do the same thing, with the exception that if the user is
	 * doing substitution with confirmation, we move to the last line about
	 * which the user was consulted, as opposed to the last line that they
	 * actually changed.  This prevents a screen flash if the user doesn't
	 * change many of the possible lines.
	 */
	if (!sp->c_suffix && (sp->lno != slno || sp->cno != scno)) {
		sp->cno = 0;
		(void)nonblank(sp, sp->lno, &sp->cno);
	}

	/*
	 * If not in a global command, and nothing matched, say so.
	 * Else, if none of the lines displayed, put something up.
	 */
	rval = 0;
	if (!matched) {
		if (!F_ISSET(sp, SC_EX_GLOBAL)) {
			msgq(sp, M_ERR, "No match found");
			goto err;
		}
	} else if (!lflag && !nflag && !pflag)
		F_SET(cmdp, E_AUTOPRINT);

	if (0) {
err:		rval = 1;
	}

	if (bp != NULL)
		FREE_SPACE(sp, bp, blen);
	free(lb);
	return (rval);
}

/*
 * re_compile --
 *	Compile the RE.
 *
 * PUBLIC: int re_compile(SCR *,
 * PUBLIC:     char *, size_t, char **, size_t *, regex_t *, u_int);
 */
int
re_compile(SCR *sp, char *ptrn, size_t plen, char **ptrnp, size_t *lenp,
    regex_t *rep, u_int flags)
{
	size_t len;
	int reflags, replaced, rval;
	char *p;

	/* Set RE flags. */
	reflags = 0;
	if (!LF_ISSET(RE_C_TAG)) {
		if (O_ISSET(sp, O_EXTENDED))
			reflags |= REG_EXTENDED;
		if (O_ISSET(sp, O_IGNORECASE))
			reflags |= REG_ICASE;
		if (O_ISSET(sp, O_ICLOWER)) {
			for (p = ptrn, len = plen; len > 0; ++p, --len)
				if (isupper(*p))
					break;
			if (len == 0)
				reflags |= REG_ICASE;
		}
	}

	/* If we're replacing a saved value, clear the old one. */
	if (LF_ISSET(RE_C_SEARCH) && F_ISSET(sp, SC_RE_SEARCH)) {
		regfree(&sp->re_c);
		F_CLR(sp, SC_RE_SEARCH);
	}
	if (LF_ISSET(RE_C_SUBST) && F_ISSET(sp, SC_RE_SUBST)) {
		regfree(&sp->subre_c);
		F_CLR(sp, SC_RE_SUBST);
	}

	/*
	 * If we're saving the string, it's a pattern we haven't seen before,
	 * so convert the vi-style RE's to POSIX 1003.2 RE's.  Save a copy for
	 * later recompilation.   Free any previously saved value.
	 */
	if (ptrnp != NULL) {
		if (LF_ISSET(RE_C_TAG)) {
			if (re_tag_conv(sp, &ptrn, &plen, &replaced))
				return (1);
		} else
			if (re_conv(sp, &ptrn, &plen, &replaced))
				return (1);

		/* Discard previous pattern. */
		free(*ptrnp);
		*ptrnp = NULL;
		if (lenp != NULL)
			*lenp = plen;

		/*
		 * Copy the string into allocated memory.
		 *
		 * XXX
		 * Regcomp isn't 8-bit clean, so the pattern is nul-terminated
		 * for now.  There's just no other solution.  
		 */
		MALLOC(sp, *ptrnp, plen + 1);
		if (*ptrnp != NULL) {
			memcpy(*ptrnp, ptrn, plen);
			(*ptrnp)[plen] = '\0';
		}

		/* Free up conversion-routine-allocated memory. */
		if (replaced)
			FREE_SPACE(sp, ptrn, 0);

		if (*ptrnp == NULL)
			return (1);

		ptrn = *ptrnp;
	}

	/*
	 * XXX
	 * Regcomp isn't 8-bit clean, so we just lost if the pattern
	 * contained a nul.  Bummer!
	 */
	if ((rval = regcomp(rep, ptrn, /* plen, */ reflags)) != 0) {
		if (!LF_ISSET(RE_C_SILENT))
			re_error(sp, rval, rep); 
		return (1);
	}

	if (LF_ISSET(RE_C_SEARCH))
		F_SET(sp, SC_RE_SEARCH);
	if (LF_ISSET(RE_C_SUBST))
		F_SET(sp, SC_RE_SUBST);

	return (0);
}

/*
 * re_conv --
 *	Convert vi's regular expressions into something that the
 *	the POSIX 1003.2 RE functions can handle.
 *
 * There are two conversions we make to make vi's RE's (specifically
 * the global, search, and substitute patterns) work with POSIX RE's.
 * We assume that \<ptrn\> does "word" searches, which is non-standard
 * but supported by most regexp libraries..
 *
 * 1: If O_MAGIC is not set, strip backslashes from the magic character
 *    set (.[*~) that have them, and add them to the ones that don't.
 * 2: If O_MAGIC is not set, the string "\~" is replaced with the text
 *    from the last substitute command's replacement string.  If O_MAGIC
 *    is set, it's the string "~".
 *
 * !!!/XXX
 * This doesn't exactly match the historic behavior of vi because we do
 * the ~ substitution before calling the RE engine, so magic characters
 * in the replacement string will be expanded by the RE engine, and they
 * weren't historically.  It's a bug.
 */
static int
re_conv(SCR *sp, char **ptrnp, size_t *plenp, int *replacedp)
{
	size_t blen, len, needlen;
	int magic;
	char *bp, *p, *t;

	/*
	 * First pass through, we figure out how much space we'll need.
	 * We do it in two passes, on the grounds that most of the time
	 * the user is doing a search and won't have magic characters.
	 * That way we can skip most of the memory allocation and copies.
	 */
	magic = 0;
	for (p = *ptrnp, len = *plenp, needlen = 0; len > 0; ++p, --len)
		switch (*p) {
		case '\\':
			if (len > 1) {
				--len;
				switch (*++p) {
				case '~':
					if (!O_ISSET(sp, O_MAGIC)) {
						magic = 1;
						needlen += sp->repl_len;
					}
					break;
				case '.':
				case '[':
				case '*':
					if (!O_ISSET(sp, O_MAGIC)) {
						magic = 1;
						needlen += 1;
					}
					break;
				default:
					needlen += 2;
				}
			} else
				needlen += 1;
			break;
		case '~':
			if (O_ISSET(sp, O_MAGIC)) {
				magic = 1;
				needlen += sp->repl_len;
			}
			break;
		case '.':
		case '[':
		case '*':
			if (!O_ISSET(sp, O_MAGIC)) {
				magic = 1;
				needlen += 2;
			}
			break;
		default:
			needlen += 1;
			break;
		}

	if (!magic) {
		*replacedp = 0;
		return (0);
	}

	/* Get enough memory to hold the final pattern. */
	*replacedp = 1;
	GET_SPACE_RET(sp, bp, blen, needlen);

	for (p = *ptrnp, len = *plenp, t = bp; len > 0; ++p, --len)
		switch (*p) {
		case '\\':
			if (len > 1) {
				--len;
				switch (*++p) {
				case '~':
					if (O_ISSET(sp, O_MAGIC))
						*t++ = '~';
					else {
						memcpy(t,
						    sp->repl, sp->repl_len);
						t += sp->repl_len;
					}
					break;
				case '.':
				case '[':
				case '*':
					if (O_ISSET(sp, O_MAGIC))
						*t++ = '\\';
					*t++ = *p;
					break;
				default:
					*t++ = '\\';
					*t++ = *p;
				}
			} else
				*t++ = '\\';
			break;
		case '~':
			if (O_ISSET(sp, O_MAGIC)) {
				memcpy(t, sp->repl, sp->repl_len);
				t += sp->repl_len;
			} else
				*t++ = '~';
			break;
		case '.':
		case '[':
		case '*':
			if (!O_ISSET(sp, O_MAGIC))
				*t++ = '\\';
			*t++ = *p;
			break;
		default:
			*t++ = *p;
			break;
		}

	*ptrnp = bp;
	*plenp = t - bp;
	return (0);
}

/*
 * re_tag_conv --
 *	Convert a tags search path into something that the POSIX
 *	1003.2 RE functions can handle.
 */
static int
re_tag_conv(SCR *sp, char **ptrnp, size_t *plenp, int *replacedp)
{
	size_t blen, len;
	int lastdollar;
	char *bp, *p, *t;

	len = *plenp;

	/* Max memory usage is 2 times the length of the string. */
	*replacedp = 1;
	GET_SPACE_RET(sp, bp, blen, len * 2);

	p = *ptrnp;
	t = bp;

	/* If the last character is a '/' or '?', we just strip it. */
	if (len > 0 && (p[len - 1] == '/' || p[len - 1] == '?'))
		--len;

	/* If the next-to-last or last character is a '$', it's magic. */
	if (len > 0 && p[len - 1] == '$') {
		--len;
		lastdollar = 1;
	} else
		lastdollar = 0;

	/* If the first character is a '/' or '?', we just strip it. */
	if (len > 0 && (p[0] == '/' || p[0] == '?')) {
		++p;
		--len;
	}

	/* If the first or second character is a '^', it's magic. */
	if (p[0] == '^') {
		*t++ = *p++;
		--len;
	}

	/*
	 * Escape every other magic character we can find, meanwhile stripping
	 * the backslashes ctags inserts when escaping the search delimiter
	 * characters.
	 */
	for (; len > 0; --len) {
		if (p[0] == '\\' && (p[1] == '/' || p[1] == '?')) {
			++p;
			--len;
		} else if (strchr("^.[]$*", p[0]))
			*t++ = '\\';
		*t++ = *p++;
		if (len == 0)
			break;
	}
	if (lastdollar)
		*t++ = '$';

	*ptrnp = bp;
	*plenp = t - bp;
	return (0);
}

/*
 * re_error --
 *	Report a regular expression error.
 *
 * PUBLIC: void re_error(SCR *, int, regex_t *);
 */
void
re_error(SCR *sp, int errcode, regex_t *preg)
{
	size_t s;
	char *oe;

	s = regerror(errcode, preg, "", 0);
	if ((oe = malloc(s)) == NULL)
		msgq(sp, M_SYSERR, NULL);
	else {
		(void)regerror(errcode, preg, oe, s);
		msgq(sp, M_ERR, "RE error: %s", oe);
		free(oe);
	}
}

/*
 * re_sub --
 * 	Do the substitution for a regular expression.
 */
static int
re_sub(SCR *sp, char *ip, char **lbp, size_t *lbclenp, size_t *lblenp,
    regmatch_t match[10])
{
	enum { C_NOTSET, C_LOWER, C_ONELOWER, C_ONEUPPER, C_UPPER } conv;
	size_t lbclen, lblen;		/* Local copies. */
	size_t mlen;			/* Match length. */
	size_t rpl;			/* Remaining replacement length. */
	char *rp;			/* Replacement pointer. */
	int ch;
	int no;				/* Match replacement offset. */
	char *p, *t;			/* Buffer pointers. */
	char *lb;			/* Local copies. */

	lb = *lbp;			/* Get local copies. */
	lbclen = *lbclenp;
	lblen = *lblenp;

	/*
	 * QUOTING NOTE:
	 *
	 * There are some special sequences that vi provides in the
	 * replacement patterns.
	 *	 & string the RE matched (\& if nomagic set)
	 *	\# n-th regular subexpression
	 *	\E end \U, \L conversion
	 *	\e end \U, \L conversion
	 *	\l convert the next character to lower-case
	 *	\L convert to lower-case, until \E, \e, or end of replacement
	 *	\u convert the next character to upper-case
	 *	\U convert to upper-case, until \E, \e, or end of replacement
	 *
	 * Otherwise, since this is the lowest level of replacement, discard
	 * all escaping characters.  This (hopefully) matches historic practice.
	 */
#define	OUTCH(ch, nltrans) {						\
	CHAR_T __ch = (ch);						\
	u_int __value = KEY_VAL(sp, __ch);				\
	if ((nltrans) && (__value == K_CR || __value == K_NL)) {	\
		NEEDNEWLINE(sp);					\
		sp->newl[sp->newl_cnt++] = lbclen;			\
	} else if (conv != C_NOTSET) {					\
		switch (conv) {						\
		case C_ONELOWER:					\
			conv = C_NOTSET;				\
			/* FALLTHROUGH */				\
		case C_LOWER:						\
			if (isupper(__ch))				\
				__ch = tolower(__ch);			\
			break;						\
		case C_ONEUPPER:					\
			conv = C_NOTSET;				\
			/* FALLTHROUGH */				\
		case C_UPPER:						\
			if (islower(__ch))				\
				__ch = toupper(__ch);			\
			break;						\
		default:						\
			abort();					\
		}							\
	}								\
	NEEDSP(sp, 1, p);						\
	*p++ = __ch;							\
	++lbclen;							\
}
	conv = C_NOTSET;
	for (rp = sp->repl, rpl = sp->repl_len, p = lb + lbclen; rpl--;) {
		switch (ch = *rp++) {
		case '&':
			if (O_ISSET(sp, O_MAGIC)) {
				no = 0;
				goto subzero;
			}
			break;
		case '\\':
			if (rpl == 0)
				break;
			--rpl;
			switch (ch = *rp) {
			case '&':
				++rp;
				if (!O_ISSET(sp, O_MAGIC)) {
					no = 0;
					goto subzero;
				}
				break;
			case '0': case '1': case '2': case '3': case '4':
			case '5': case '6': case '7': case '8': case '9':
				no = *rp++ - '0';
subzero:			if (match[no].rm_so == -1 ||
			    	    match[no].rm_eo == -1)
					break;
				mlen = match[no].rm_eo - match[no].rm_so;
				for (t = ip + match[no].rm_so; mlen--; ++t)
					OUTCH(*t, 0);
				continue;
			case 'e':
			case 'E':
				++rp;
				conv = C_NOTSET;
				continue;
			case 'l':
				++rp;
				conv = C_ONELOWER;
				continue;
			case 'L':
				++rp;
				conv = C_LOWER;
				continue;
			case 'u':
				++rp;
				conv = C_ONEUPPER;
				continue;
			case 'U':
				++rp;
				conv = C_UPPER;
				continue;
			default:
				++rp;
				break;
			}
		}
		OUTCH(ch, 1);
	}

	*lbp = lb;			/* Update caller's information. */
	*lbclenp = lbclen;
	*lblenp = lblen;
	return (0);
}