[BACK]Return to re_search.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / mg

File: [local] / src / usr.bin / mg / re_search.c (download)

Revision 1.18, Tue Aug 9 00:53:48 2005 UTC (18 years, 10 months ago) by kjell
Branch: MAIN
CVS Tags: OPENBSD_3_8_BASE, OPENBSD_3_8
Changes since 1.17: +5 -4 lines

Clean up eread handling in mg. (basically, fallout from the 'enter often
means abort' behaviour added during the hackathon). Eliminates
redundant ereply function, fixes miscellaneous cores when aborting,
and move a number of assumed pathnames into the prompt text, since
they are used there anyway. All changes consistent with emacs behavior

ok beck@ many, many moons ago.

/*	$OpenBSD: re_search.c,v 1.18 2005/08/09 00:53:48 kjell Exp $	*/

/* This file is in the public domain. */

/*
 *	regular expression search commands for Mg
 *
 * This file contains functions to implement several of gnuemacs's regular
 * expression functions for Mg.  Several of the routines below are just minor
 * re-arrangements of Mg's non-regular expression search functions.  Some of
 * them are similar in structure to the original MicroEMACS, others are
 * modifications of Rich Ellison's code.  Peter Newton re-wrote about half of
 * them from scratch.
 */

#ifdef REGEX
#include <sys/types.h>
#include <regex.h>

#include "def.h"
#include "macro.h"

#define SRCH_BEGIN	(0)		/* search sub-codes		    */
#define SRCH_FORW	(-1)
#define SRCH_BACK	(-2)
#define SRCH_NOPR	(-3)
#define SRCH_ACCM	(-4)
#define SRCH_MARK	(-5)

#define RE_NMATCH	10		/* max number of matches	    */
#define REPLEN		256		/* max length of replacement string */

char	re_pat[NPAT];			/* regex pattern		    */
int	re_srch_lastdir = SRCH_NOPR;	/* last search flags		    */
int	casefoldsearch = TRUE;		/* does search ignore case?	    */

static int	 re_doreplace(RSIZE, char *, int);
static int	 re_forwsrch(void);
static int	 re_backsrch(void);
static int	 re_readpattern(char *);
static int	 killmatches(int);
static int	 countmatches(int);

/*
 * Search forward.
 * Get a search string from the user and search for it starting at ".".  If
 * found, move "." to just after the matched characters.  display does all
 * the hard stuff.  If not found, it just prints a message.
 */
/* ARGSUSED */
int
re_forwsearch(int f, int n)
{
	int	s;

	if ((s = re_readpattern("RE Search")) != TRUE)
		return (s);
	if (re_forwsrch() == FALSE) {
		ewprintf("Search failed: \"%s\"", re_pat);
		return (FALSE);
	}
	re_srch_lastdir = SRCH_FORW;
	return (TRUE);
}

/*
 * Reverse search.
 * Get a search string from the user, and search, starting at "."
 * and proceeding toward the front of the buffer. If found "." is left
 * pointing at the first character of the pattern [the last character that
 * was matched].
 */
/* ARGSUSED */
int
re_backsearch(int f, int n)
{
	int	s;

	if ((s = re_readpattern("RE Search backward")) != TRUE)
		return (s);
	if (re_backsrch() == FALSE) {
		ewprintf("Search failed: \"%s\"", re_pat);
		return (FALSE);
	}
	re_srch_lastdir = SRCH_BACK;
	return (TRUE);
}

/*
 * Search again, using the same search string and direction as the last search
 * command.  The direction has been saved in "srch_lastdir", so you know which
 * way to go.
 *
 * XXX: This code has problems -- some incompatibility(?) with extend.c causes
 * match to fail when it should not.
 */
/* ARGSUSED */
int
re_searchagain(int f, int n)
{
	if (re_srch_lastdir == SRCH_NOPR) {
		ewprintf("No last search");
		return (FALSE);
	}
	if (re_srch_lastdir == SRCH_FORW) {
		if (re_forwsrch() == FALSE) {
			ewprintf("Search failed: \"%s\"", re_pat);
			return (FALSE);
		}
		return (TRUE);
	}
	if (re_srch_lastdir == SRCH_BACK)
		if (re_backsrch() == FALSE) {
			ewprintf("Search failed: \"%s\"", re_pat);
			return (FALSE);
		}

	return (TRUE);
}

/* Compiled regex goes here-- changed only when new pattern read */
static regex_t		re_buff;
static regmatch_t	re_match[RE_NMATCH];

/*
 * Re-Query Replace.
 *	Replace strings selectively.  Does a search and replace operation.
 */
/* ARGSUSED */
int
re_queryrepl(int f, int n)
{
	int	rcnt = 0;		/* replacements made so far	*/
	int	plen, s;		/* length of found string	*/
	char	news[NPAT], *rep;	/* replacement string		*/

	/* Casefold check */
	if (!casefoldsearch)
		f = TRUE;

	if ((s = re_readpattern("RE Query replace")) != TRUE)
		return (s);
	if ((rep = eread("Query replace %s with: ", news, NPAT,
	    EFNUL | EFNEW | EFCR, re_pat)) == NULL)
		return (ABORT);
	ewprintf("Query replacing %s with %s:", re_pat, news);

	/*
	 * Search forward repeatedly, checking each time whether to insert
	 * or not.  The "!" case makes the check always true, so it gets put
	 * into a tighter loop for efficiency.
	 */
	while (re_forwsrch() == TRUE) {
retry:
		update();
		switch (getkey(FALSE)) {
		case ' ':
			plen = re_match[0].rm_eo - re_match[0].rm_so;
			if (re_doreplace((RSIZE)plen, news, f) == FALSE)
				return (FALSE);
			rcnt++;
			break;

		case '.':
			plen = re_match[0].rm_eo - re_match[0].rm_so;
			if (re_doreplace((RSIZE)plen, news, f) == FALSE)
				return (FALSE);
			rcnt++;
			goto stopsearch;

		case CCHR('G'):				/* ^G */
			(void)ctrlg(FFRAND, 0);
		case CCHR('['):				/* ESC */
		case '`':
			goto stopsearch;
		case '!':
			do {
				plen = re_match[0].rm_eo - re_match[0].rm_so;
				if (re_doreplace((RSIZE)plen, news, f) == FALSE)
					return (FALSE);
				rcnt++;
			} while (re_forwsrch() == TRUE);
			goto stopsearch;

		case CCHR('?'):				/* To not replace */
			break;

		default:
			ewprintf("<SP> replace, [.] rep-end, <DEL> don't, [!] repl rest <ESC> quit");
			goto retry;
		}
	}

stopsearch:
	curwp->w_flag |= WFHARD;
	update();
	if (!inmacro) {
		if (rcnt == 0)
			ewprintf("(No replacements done)");
		else if (rcnt == 1)
			ewprintf("(1 replacement done)");
		else
			ewprintf("(%d replacements done)", rcnt);
	}
	return (TRUE);
}

/*
 * Routine re_doreplace calls lreplace to make replacements needed by
 * re_query replace.  Its reason for existence is to deal with \1, \2. etc.
 *  plen: length to remove
 *  st:   replacement string
 *  f:    case hack disable
 */
static int
re_doreplace(RSIZE plen, char *st, int f)
{
	int	 j, k, s, more, num, state;
	LINE	*clp;
	char	 repstr[REPLEN];

	clp = curwp->w_dotp;
	more = TRUE;
	j = 0;
	state = 0;
	num = 0;

	/* The following FSA parses the replacement string */
	while (more) {
		switch (state) {
		case 0:
			if (*st == '\\') {
				st++;
				state = 1;
			} else if (*st == '\0')
				more = FALSE;
			else {
				repstr[j] = *st;
				j++;
				if (j >= REPLEN)
					return (FALSE);
				st++;
			}
			break;
		case 1:
			if (*st >= '0' && *st <= '9') {
				num = *st - '0';
				st++;
				state = 2;
			} else if (*st == '\0')
				more = FALSE;
			else {
				repstr[j] = *st;
				j++;
				if (j >= REPLEN)
					return (FALSE);
				st++;
				state = 0;
			}
			break;
		case 2:
			if (*st >= '0' && *st <= '9') {
				num = 10 * num + *st - '0';
				st++;
			} else {
				if (num >= RE_NMATCH)
					return (FALSE);
				k = re_match[num].rm_eo - re_match[num].rm_so;
				if (j + k >= REPLEN)
					return (FALSE);
				bcopy(&(clp->l_text[re_match[num].rm_so]),
				    &repstr[j], k);
				j += k;
				if (*st == '\0')
					more = FALSE;
				if (*st == '\\') {
					st++;
					state = 1;
				} else {
					repstr[j] = *st;
					j++;
					if (j >= REPLEN)
						return (FALSE);
					st++;
					state = 0;
				}
			}
			break;
		}		/* switch (state) */
	}			/* while (more)   */

	repstr[j] = '\0';
	s = lreplace(plen, repstr, f);
	return (s);
}

/*
 * This routine does the real work of a forward search.  The pattern is
 * sitting in the external variable "pat".  If found, dot is updated, the
 * window system is notified of the change, and TRUE is returned.  If the
 * string isn't found, FALSE is returned.
 */
static int
re_forwsrch(void)
{
	int	 tbo, error;
	LINE	*clp;

	clp = curwp->w_dotp;
	tbo = curwp->w_doto;

	if (tbo == clp->l_used)
		/*
		 * Don't start matching past end of line -- must move to
		 * beginning of next line, unless at end of file.
		 */
		if (clp != curbp->b_linep) {
			clp = lforw(clp);
			tbo = 0;
		}
	/*
	 * Note this loop does not process the last line, but this editor
	 * always makes the last line empty so this is good.
	 */
	while (clp != (curbp->b_linep)) {
		re_match[0].rm_so = tbo;
		re_match[0].rm_eo = llength(clp);
		error = regexec(&re_buff, ltext(clp), RE_NMATCH, re_match,
		    REG_STARTEND);
		if (error != 0) {
			clp = lforw(clp);
			tbo = 0;
		} else {
			curwp->w_doto = re_match[0].rm_eo;
			curwp->w_dotp = clp;
			curwp->w_flag |= WFMOVE;
			return (TRUE);
		}
	}
	return (FALSE);
}

/*
 * This routine does the real work of a backward search.  The pattern is sitting
 * in the external variable "re_pat".  If found, dot is updated, the window
 * system is notified of the change, and TRUE is returned.  If the string isn't
 * found, FALSE is returned.
 */
static int
re_backsrch(void)
{
	LINE		*clp;
	int		 tbo;
	regmatch_t	 lastmatch;

	clp = curwp->w_dotp;
	tbo = curwp->w_doto;

	/* Start search one position to the left of dot */
	tbo = tbo - 1;
	if (tbo < 0) {
		/* must move up one line */
		clp = lback(clp);
		tbo = llength(clp);
	}

	/*
	 * Note this loop does not process the last line, but this editor
	 * always makes the last line empty so this is good.
	 */
	while (clp != (curbp->b_linep)) {
		re_match[0].rm_so = 0;
		re_match[0].rm_eo = llength(clp);
		lastmatch.rm_so = -1;
		/*
		 * Keep searching until we don't match any longer.  Assumes a
		 * non-match does not modify the re_match array.  We have to
		 * do this character-by-character after the first match since
		 * POSIX regexps don't give you a way to do reverse matches.
		 */
		while (!regexec(&re_buff, ltext(clp), RE_NMATCH, re_match,
		    REG_STARTEND) && re_match[0].rm_so < tbo) {
			memcpy(&lastmatch, &re_match[0], sizeof(regmatch_t));
			re_match[0].rm_so++;
			re_match[0].rm_eo = llength(clp);
		}
		if (lastmatch.rm_so == -1) {
			clp = lback(clp);
			tbo = llength(clp);
		} else {
			memcpy(&re_match[0], &lastmatch, sizeof(regmatch_t));
			curwp->w_doto = re_match[0].rm_so;
			curwp->w_dotp = clp;
			curwp->w_flag |= WFMOVE;
			return (TRUE);
		}
	}
	return (FALSE);
}

/*
 * Read a pattern.
 * Stash it in the external variable "re_pat". The "pat" is
 * not updated if the user types in an empty line. If the user typed
 * an empty line, and there is no old pattern, it is an error.
 * Display the old pattern, in the style of Jeff Lomicka. There is
 * some do-it-yourself control expansion.
 */
static int
re_readpattern(char *prompt)
{
	static int	dofree = 0;
	int		flags, error, s;
	char		tpat[NPAT], *rep;

	if (re_pat[0] == '\0')
		rep = eread("%s: ", tpat, NPAT, EFNEW | EFCR, prompt);
	else
		rep = eread("%s: (default %s) ", tpat, NPAT,
		    EFNUL | EFNEW | EFCR, prompt, re_pat);
	if (rep == NULL)
		return (ABORT);
	if (rep[0] != '\0') {
		/* New pattern given */
		(void)strlcpy(re_pat, tpat, sizeof(re_pat));
		if (casefoldsearch)
			flags = REG_EXTENDED | REG_ICASE;
		else
			flags = REG_EXTENDED;
		if (dofree)
			regfree(&re_buff);
		error = regcomp(&re_buff, re_pat, flags);
		if (error != 0) {
			char	message[256];
			regerror(error, &re_buff, message, sizeof(message));
			ewprintf("Regex Error: %s", message);
			re_pat[0] = '\0';
			return (FALSE);
		}
		dofree = 1;
		s = TRUE;
	} else if (rep[0] == '\0' && re_pat[0] != '\0')
		/* Just using old pattern */
		s = TRUE;
	else
		s = FALSE;
	return (s);
}

/*
 * Cause case to not matter in searches.  This is the default.	If called
 * with argument cause case to matter.
 */
int
setcasefold(int f, int n)
{
	if (f & FFARG) {
		casefoldsearch = FALSE;
		ewprintf("Case-fold-search unset");
	} else {
		casefoldsearch = TRUE;
		ewprintf("Case-fold-search set");
	}

	/*
	 * Invalidate the regular expression pattern since I'm too lazy to
	 * recompile it.
	 */
	re_pat[0] = '\0';
	return (TRUE);
}

/*
 * Delete all lines after dot that contain a string matching regex.
 */
int
delmatchlines(int f, int n)
{
	int	s;

	if ((s = re_readpattern("Flush lines (containing match for regexp)"))
	    != TRUE)
		return (s);

	s = killmatches(TRUE);
	return (s);
}

/*
 * Delete all lines after dot that don't contain a string matching regex.
 */
int
delnonmatchlines(int f, int n)
{
	int	s;

	if ((s = re_readpattern("Keep lines (containing match for regexp)"))
	    != TRUE)
		return (s);

	s = killmatches(FALSE);
	return (s);
}

/*
 * This function does the work of deleting matching lines.
 */
static int
killmatches(int cond)
{
	int	 s, error;
	int	 count = 0;
	LINE	*clp;

	clp = curwp->w_dotp;
	if (curwp->w_doto == llength(clp))
		/* Consider dot on next line */
		clp = lforw(clp);

	while (clp != (curbp->b_linep)) {
		/* see if line matches */
		re_match[0].rm_so = 0;
		re_match[0].rm_eo = llength(clp);
		error = regexec(&re_buff, ltext(clp), RE_NMATCH, re_match,
		    REG_STARTEND);

		/* Delete line when appropriate */
		if ((cond == FALSE && error) || (cond == TRUE && !error)) {
			curwp->w_doto = 0;
			curwp->w_dotp = clp;
			count++;
			s = ldelete(llength(clp) + 1, KNONE);
			clp = curwp->w_dotp;
			curwp->w_flag |= WFMOVE;
			if (s == FALSE)
				return (FALSE);
		} else
			clp = lforw(clp);
	}

	ewprintf("%d line(s) deleted", count);
	if (count > 0)
		curwp->w_flag |= WFMOVE;

	return (TRUE);
}

/*
 * Count lines matching regex.
 */
int
cntmatchlines(int f, int n)
{
	int	s;

	if ((s = re_readpattern("Count lines (matching regexp)")) != TRUE)
		return (s);
	s = countmatches(TRUE);

	return (s);
}

/*
 * Count lines that fail to match regex.
 */
int
cntnonmatchlines(int f, int n)
{
	int	s;

	if ((s = re_readpattern("Count lines (not matching regexp)")) != TRUE)
		return (s);
	s = countmatches(FALSE);

	return (s);
}

/*
 * This function does the work of counting matching lines.
 */
int
countmatches(int cond)
{
	int	 error;
	int	 count = 0;
	LINE	*clp;

	clp = curwp->w_dotp;
	if (curwp->w_doto == llength(clp))
		/* Consider dot on next line */
		clp = lforw(clp);

	while (clp != (curbp->b_linep)) {
		/* see if line matches */
		re_match[0].rm_so = 0;
		re_match[0].rm_eo = llength(clp);
		error = regexec(&re_buff, ltext(clp), RE_NMATCH, re_match,
		    REG_STARTEND);

		/* Count line when appropriate */
		if ((cond == FALSE && error) || (cond == TRUE && !error))
			count++;
		clp = lforw(clp);
	}

	if (cond)
		ewprintf("Number of lines matching: %d", count);
	else
		ewprintf("Number of lines not matching: %d", count);

	return (TRUE);
}
#endif	/* REGEX */