[BACK]Return to re_search.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / mg

File: [local] / src / usr.bin / mg / re_search.c (download)

Revision 1.31, Thu Mar 19 21:22:15 2015 UTC (9 years, 2 months ago) by bcallah
Branch: MAIN
CVS Tags: OPENBSD_6_1_BASE, OPENBSD_6_1, OPENBSD_6_0_BASE, OPENBSD_6_0, OPENBSD_5_9_BASE, OPENBSD_5_9, OPENBSD_5_8_BASE, OPENBSD_5_8
Changes since 1.30: +6 -3 lines

Clean up the includes in mg.
This does the following:
Moves all POSIX headers from sysdef.h into the individual .c files so that
each file now only includes what it needs. All headers are properly sorted.
Moves the remainder of sysdef.h to other files (mostly def.h) and deletes
sysdef.h now that it's no longer contains anything.
Tweak a comment that references sysdef.h so that it no longer does that.
ok florian@

/*	$OpenBSD: re_search.c,v 1.31 2015/03/19 21:22:15 bcallah Exp $	*/

/* This file is in the public domain. */

/*
 *	regular expression search commands for Mg
 *
 * This file contains functions to implement several of gnuemacs's regular
 * expression functions for Mg.  Several of the routines below are just minor
 * re-arrangements of Mg's non-regular expression search functions.  Some of
 * them are similar in structure to the original MicroEMACS, others are
 * modifications of Rich Ellison's code.  Peter Newton re-wrote about half of
 * them from scratch.
 */

#ifdef REGEX
#include <sys/queue.h>
#include <sys/types.h>
#include <regex.h>
#include <signal.h>
#include <stdio.h>
#include <string.h>

#include "def.h"
#include "macro.h"

#define SRCH_BEGIN	(0)		/* search sub-codes		    */
#define SRCH_FORW	(-1)
#define SRCH_BACK	(-2)
#define SRCH_NOPR	(-3)
#define SRCH_ACCM	(-4)
#define SRCH_MARK	(-5)

#define RE_NMATCH	10		/* max number of matches	    */
#define REPLEN		256		/* max length of replacement string */

char	re_pat[NPAT];			/* regex pattern		    */
int	re_srch_lastdir = SRCH_NOPR;	/* last search flags		    */
int	casefoldsearch = TRUE;		/* does search ignore case?	    */

static int	 re_doreplace(RSIZE, char *);
static int	 re_forwsrch(void);
static int	 re_backsrch(void);
static int	 re_readpattern(char *);
static int	 killmatches(int);
static int	 countmatches(int);

/*
 * Search forward.
 * Get a search string from the user and search for it starting at ".".  If
 * found, move "." to just after the matched characters.  display does all
 * the hard stuff.  If not found, it just prints a message.
 */
/* ARGSUSED */
int
re_forwsearch(int f, int n)
{
	int	s;

	if ((s = re_readpattern("RE Search")) != TRUE)
		return (s);
	if (re_forwsrch() == FALSE) {
		dobeep();
		ewprintf("Search failed: \"%s\"", re_pat);
		return (FALSE);
	}
	re_srch_lastdir = SRCH_FORW;
	return (TRUE);
}

/*
 * Reverse search.
 * Get a search string from the user, and search, starting at "."
 * and proceeding toward the front of the buffer. If found "." is left
 * pointing at the first character of the pattern [the last character that
 * was matched].
 */
/* ARGSUSED */
int
re_backsearch(int f, int n)
{
	int	s;

	if ((s = re_readpattern("RE Search backward")) != TRUE)
		return (s);
	if (re_backsrch() == FALSE) {
		dobeep();
		ewprintf("Search failed: \"%s\"", re_pat);
		return (FALSE);
	}
	re_srch_lastdir = SRCH_BACK;
	return (TRUE);
}

/*
 * Search again, using the same search string and direction as the last search
 * command.  The direction has been saved in "srch_lastdir", so you know which
 * way to go.
 *
 * XXX: This code has problems -- some incompatibility(?) with extend.c causes
 * match to fail when it should not.
 */
/* ARGSUSED */
int
re_searchagain(int f, int n)
{
	if (re_srch_lastdir == SRCH_NOPR) {
		dobeep();
		ewprintf("No last search");
		return (FALSE);
	}
	if (re_srch_lastdir == SRCH_FORW) {
		if (re_forwsrch() == FALSE) {
			dobeep();
			ewprintf("Search failed: \"%s\"", re_pat);
			return (FALSE);
		}
		return (TRUE);
	}
	if (re_srch_lastdir == SRCH_BACK)
		if (re_backsrch() == FALSE) {
			dobeep();
			ewprintf("Search failed: \"%s\"", re_pat);
			return (FALSE);
		}

	return (TRUE);
}

/* Compiled regex goes here-- changed only when new pattern read */
static regex_t		regex_buff;
static regmatch_t	regex_match[RE_NMATCH];

/*
 * Re-Query Replace.
 *	Replace strings selectively.  Does a search and replace operation.
 */
/* ARGSUSED */
int
re_queryrepl(int f, int n)
{
	int	rcnt = 0;		/* replacements made so far	*/
	int	plen, s;		/* length of found string	*/
	char	news[NPAT];		/* replacement string		*/

	if ((s = re_readpattern("RE Query replace")) != TRUE)
		return (s);
	if (eread("Query replace %s with: ", news, NPAT,
	    EFNUL | EFNEW | EFCR, re_pat) == NULL)
		return (ABORT);
	ewprintf("Query replacing %s with %s:", re_pat, news);

	/*
	 * Search forward repeatedly, checking each time whether to insert
	 * or not.  The "!" case makes the check always true, so it gets put
	 * into a tighter loop for efficiency.
	 */
	while (re_forwsrch() == TRUE) {
retry:
		update(CMODE);
		switch (getkey(FALSE)) {
		case ' ':
			plen = regex_match[0].rm_eo - regex_match[0].rm_so;
			if (re_doreplace((RSIZE)plen, news) == FALSE)
				return (FALSE);
			rcnt++;
			break;

		case '.':
			plen = regex_match[0].rm_eo - regex_match[0].rm_so;
			if (re_doreplace((RSIZE)plen, news) == FALSE)
				return (FALSE);
			rcnt++;
			goto stopsearch;

		case CCHR('G'):				/* ^G */
			(void)ctrlg(FFRAND, 0);
			goto stopsearch;
		case CCHR('['):				/* ESC */
		case '`':
			goto stopsearch;
		case '!':
			do {
				plen = regex_match[0].rm_eo - regex_match[0].rm_so;
				if (re_doreplace((RSIZE)plen, news) == FALSE)
					return (FALSE);
				rcnt++;
			} while (re_forwsrch() == TRUE);
			goto stopsearch;

		case CCHR('?'):				/* To not replace */
			break;

		default:
			ewprintf("<SP> replace, [.] rep-end, <DEL> don't, [!] repl rest <ESC> quit");
			goto retry;
		}
	}

stopsearch:
	curwp->w_rflag |= WFFULL;
	update(CMODE);
	if (!inmacro) {
		if (rcnt == 0)
			ewprintf("(No replacements done)");
		else if (rcnt == 1)
			ewprintf("(1 replacement done)");
		else
			ewprintf("(%d replacements done)", rcnt);
	}
	return (TRUE);
}

/*
 * Routine re_doreplace calls lreplace to make replacements needed by
 * re_query replace.  Its reason for existence is to deal with \1, \2. etc.
 *  plen: length to remove
 *  st:   replacement string
 */
static int
re_doreplace(RSIZE plen, char *st)
{
	int	 j, k, s, more, num, state;
	struct line	*clp;
	char	 repstr[REPLEN];

	clp = curwp->w_dotp;
	more = TRUE;
	j = 0;
	state = 0;
	num = 0;

	/* The following FSA parses the replacement string */
	while (more) {
		switch (state) {
		case 0:
			if (*st == '\\') {
				st++;
				state = 1;
			} else if (*st == '\0')
				more = FALSE;
			else {
				repstr[j] = *st;
				j++;
				if (j >= REPLEN)
					return (FALSE);
				st++;
			}
			break;
		case 1:
			if (*st >= '0' && *st <= '9') {
				num = *st - '0';
				st++;
				state = 2;
			} else if (*st == '\0')
				more = FALSE;
			else {
				repstr[j] = *st;
				j++;
				if (j >= REPLEN)
					return (FALSE);
				st++;
				state = 0;
			}
			break;
		case 2:
			if (*st >= '0' && *st <= '9') {
				num = 10 * num + *st - '0';
				st++;
			} else {
				if (num >= RE_NMATCH)
					return (FALSE);
				k = regex_match[num].rm_eo - regex_match[num].rm_so;
				if (j + k >= REPLEN)
					return (FALSE);
				bcopy(&(clp->l_text[regex_match[num].rm_so]),
				    &repstr[j], k);
				j += k;
				if (*st == '\0')
					more = FALSE;
				if (*st == '\\') {
					st++;
					state = 1;
				} else {
					repstr[j] = *st;
					j++;
					if (j >= REPLEN)
						return (FALSE);
					st++;
					state = 0;
				}
			}
			break;
		}		/* switch (state) */
	}			/* while (more)   */

	repstr[j] = '\0';
	s = lreplace(plen, repstr);
	return (s);
}

/*
 * This routine does the real work of a forward search.  The pattern is
 * sitting in the external variable "pat".  If found, dot is updated, the
 * window system is notified of the change, and TRUE is returned.  If the
 * string isn't found, FALSE is returned.
 */
static int
re_forwsrch(void)
{
	int	 tbo, tdotline, error;
	struct line	*clp;

	clp = curwp->w_dotp;
	tbo = curwp->w_doto;
	tdotline = curwp->w_dotline;

	if (tbo == clp->l_used)
		/*
		 * Don't start matching past end of line -- must move to
		 * beginning of next line, unless at end of file.
		 */
		if (clp != curbp->b_headp) {
			clp = lforw(clp);
			tdotline++;
			tbo = 0;
		}
	/*
	 * Note this loop does not process the last line, but this editor
	 * always makes the last line empty so this is good.
	 */
	while (clp != (curbp->b_headp)) {
		regex_match[0].rm_so = tbo;
		regex_match[0].rm_eo = llength(clp);
		error = regexec(&regex_buff, ltext(clp), RE_NMATCH, regex_match,
		    REG_STARTEND);
		if (error != 0) {
			clp = lforw(clp);
			tdotline++;
			tbo = 0;
		} else {
			curwp->w_doto = regex_match[0].rm_eo;
			curwp->w_dotp = clp;
			curwp->w_dotline = tdotline;
			curwp->w_rflag |= WFMOVE;
			return (TRUE);
		}
	}
	return (FALSE);
}

/*
 * This routine does the real work of a backward search.  The pattern is sitting
 * in the external variable "re_pat".  If found, dot is updated, the window
 * system is notified of the change, and TRUE is returned.  If the string isn't
 * found, FALSE is returned.
 */
static int
re_backsrch(void)
{
	struct line		*clp;
	int		 tbo, tdotline;
	regmatch_t	 lastmatch;

	clp = curwp->w_dotp;
	tbo = curwp->w_doto;
	tdotline = curwp->w_dotline;

	/* Start search one position to the left of dot */
	tbo = tbo - 1;
	if (tbo < 0) {
		/* must move up one line */
		clp = lback(clp);
		tdotline--;
		tbo = llength(clp);
	}

	/*
	 * Note this loop does not process the last line, but this editor
	 * always makes the last line empty so this is good.
	 */
	while (clp != (curbp->b_headp)) {
		regex_match[0].rm_so = 0;
		regex_match[0].rm_eo = llength(clp);
		lastmatch.rm_so = -1;
		/*
		 * Keep searching until we don't match any longer.  Assumes a
		 * non-match does not modify the regex_match array.  We have to
		 * do this character-by-character after the first match since
		 * POSIX regexps don't give you a way to do reverse matches.
		 */
		while (!regexec(&regex_buff, ltext(clp), RE_NMATCH, regex_match,
		    REG_STARTEND) && regex_match[0].rm_so < tbo) {
			memcpy(&lastmatch, &regex_match[0], sizeof(regmatch_t));
			regex_match[0].rm_so++;
			regex_match[0].rm_eo = llength(clp);
		}
		if (lastmatch.rm_so == -1) {
			clp = lback(clp);
			tdotline--;
			tbo = llength(clp);
		} else {
			memcpy(&regex_match[0], &lastmatch, sizeof(regmatch_t));
			curwp->w_doto = regex_match[0].rm_so;
			curwp->w_dotp = clp;
			curwp->w_dotline = tdotline;
			curwp->w_rflag |= WFMOVE;
			return (TRUE);
		}
	}
	return (FALSE);
}

/*
 * Read a pattern.
 * Stash it in the external variable "re_pat". The "pat" is
 * not updated if the user types in an empty line. If the user typed
 * an empty line, and there is no old pattern, it is an error.
 * Display the old pattern, in the style of Jeff Lomicka. There is
 * some do-it-yourself control expansion.
 */
static int
re_readpattern(char *prompt)
{
	static int	dofree = 0;
	int		flags, error, s;
	char		tpat[NPAT], *rep;

	if (re_pat[0] == '\0')
		rep = eread("%s: ", tpat, NPAT, EFNEW | EFCR, prompt);
	else
		rep = eread("%s: (default %s) ", tpat, NPAT,
		    EFNUL | EFNEW | EFCR, prompt, re_pat);
	if (rep == NULL)
		return (ABORT);
	if (rep[0] != '\0') {
		/* New pattern given */
		(void)strlcpy(re_pat, tpat, sizeof(re_pat));
		if (casefoldsearch)
			flags = REG_EXTENDED | REG_ICASE;
		else
			flags = REG_EXTENDED;
		if (dofree)
			regfree(&regex_buff);
		error = regcomp(&regex_buff, re_pat, flags);
		if (error != 0) {
			char	message[256];
			regerror(error, &regex_buff, message, sizeof(message));
			dobeep();
			ewprintf("Regex Error: %s", message);
			re_pat[0] = '\0';
			return (FALSE);
		}
		dofree = 1;
		s = TRUE;
	} else if (rep[0] == '\0' && re_pat[0] != '\0')
		/* Just using old pattern */
		s = TRUE;
	else
		s = FALSE;
	return (s);
}

/*
 * Cause case to not matter in searches.  This is the default.	If called
 * with argument cause case to matter.
 */
/* ARGSUSED*/
int
setcasefold(int f, int n)
{
	if (f & FFARG) {
		casefoldsearch = FALSE;
		ewprintf("Case-fold-search unset");
	} else {
		casefoldsearch = TRUE;
		ewprintf("Case-fold-search set");
	}

	/*
	 * Invalidate the regular expression pattern since I'm too lazy to
	 * recompile it.
	 */
	re_pat[0] = '\0';
	return (TRUE);
}

/*
 * Delete all lines after dot that contain a string matching regex.
 */
/* ARGSUSED */
int
delmatchlines(int f, int n)
{
	int	s;

	if ((s = re_readpattern("Flush lines (containing match for regexp)"))
	    != TRUE)
		return (s);

	s = killmatches(TRUE);
	return (s);
}

/*
 * Delete all lines after dot that don't contain a string matching regex.
 */
/* ARGSUSED */
int
delnonmatchlines(int f, int n)
{
	int	s;

	if ((s = re_readpattern("Keep lines (containing match for regexp)"))
	    != TRUE)
		return (s);

	s = killmatches(FALSE);
	return (s);
}

/*
 * This function does the work of deleting matching lines.
 */
static int
killmatches(int cond)
{
	int	 s, error;
	int	 count = 0;
	struct line	*clp;

	clp = curwp->w_dotp;
	if (curwp->w_doto == llength(clp))
		/* Consider dot on next line */
		clp = lforw(clp);

	while (clp != (curbp->b_headp)) {
		/* see if line matches */
		regex_match[0].rm_so = 0;
		regex_match[0].rm_eo = llength(clp);
		error = regexec(&regex_buff, ltext(clp), RE_NMATCH, regex_match,
		    REG_STARTEND);

		/* Delete line when appropriate */
		if ((cond == FALSE && error) || (cond == TRUE && !error)) {
			curwp->w_doto = 0;
			curwp->w_dotp = clp;
			count++;
			s = ldelete(llength(clp) + 1, KNONE);
			clp = curwp->w_dotp;
			curwp->w_rflag |= WFMOVE;
			if (s == FALSE)
				return (FALSE);
		} else
			clp = lforw(clp);
	}

	ewprintf("%d line(s) deleted", count);
	if (count > 0)
		curwp->w_rflag |= WFMOVE;

	return (TRUE);
}

/*
 * Count lines matching regex.
 */
/* ARGSUSED */
int
cntmatchlines(int f, int n)
{
	int	s;

	if ((s = re_readpattern("Count lines (matching regexp)")) != TRUE)
		return (s);
	s = countmatches(TRUE);

	return (s);
}

/*
 * Count lines that fail to match regex.
 */
/* ARGSUSED */
int
cntnonmatchlines(int f, int n)
{
	int	s;

	if ((s = re_readpattern("Count lines (not matching regexp)")) != TRUE)
		return (s);
	s = countmatches(FALSE);

	return (s);
}

/*
 * This function does the work of counting matching lines.
 */
int
countmatches(int cond)
{
	int	 error;
	int	 count = 0;
	struct line	*clp;

	clp = curwp->w_dotp;
	if (curwp->w_doto == llength(clp))
		/* Consider dot on next line */
		clp = lforw(clp);

	while (clp != (curbp->b_headp)) {
		/* see if line matches */
		regex_match[0].rm_so = 0;
		regex_match[0].rm_eo = llength(clp);
		error = regexec(&regex_buff, ltext(clp), RE_NMATCH, regex_match,
		    REG_STARTEND);

		/* Count line when appropriate */
		if ((cond == FALSE && error) || (cond == TRUE && !error))
			count++;
		clp = lforw(clp);
	}

	if (cond)
		ewprintf("Number of lines matching: %d", count);
	else
		ewprintf("Number of lines not matching: %d", count);

	return (TRUE);
}
#endif	/* REGEX */