File: [local] / src / usr.bin / mg / re_search.c (download)
Revision 1.35, Wed Jul 22 13:29:05 2020 UTC (3 years, 10 months ago) by tb
Branch: MAIN
CVS Tags: OPENBSD_6_9_BASE, OPENBSD_6_9, OPENBSD_6_8_BASE, OPENBSD_6_8 Changes since 1.34: +9 -5 lines
Avoid running out of memory with query-replace-regex ^
Choosing ! (replace rest) never advances beyond the current line
and keeps inserting the replacement test, and mg eventually runs
out of memory.
Patch from Mark Willson with minor stylistic tweaks.
Looks good to Hiltjo Posthuma who would have preferred making forward
replacing on empty lines work. Mark argued that this makes it behave
consistently with the current behaviour when searching for ^ and the
point at the beginning of a non-empty line in which case the cursor does
not move either.
Also tested by krw
|
/* $OpenBSD: re_search.c,v 1.35 2020/07/22 13:29:05 tb Exp $ */
/* This file is in the public domain. */
/*
* regular expression search commands for Mg
*
* This file contains functions to implement several of gnuemacs's regular
* expression functions for Mg. Several of the routines below are just minor
* re-arrangements of Mg's non-regular expression search functions. Some of
* them are similar in structure to the original MicroEMACS, others are
* modifications of Rich Ellison's code. Peter Newton re-wrote about half of
* them from scratch.
*/
#ifdef REGEX
#include <sys/queue.h>
#include <sys/types.h>
#include <regex.h>
#include <signal.h>
#include <stdio.h>
#include <string.h>
#include "def.h"
#include "macro.h"
#define SRCH_BEGIN (0) /* search sub-codes */
#define SRCH_FORW (-1)
#define SRCH_BACK (-2)
#define SRCH_NOPR (-3)
#define SRCH_ACCM (-4)
#define SRCH_MARK (-5)
#define RE_NMATCH 10 /* max number of matches */
#define REPLEN 256 /* max length of replacement string */
char re_pat[NPAT]; /* regex pattern */
int re_srch_lastdir = SRCH_NOPR; /* last search flags */
int casefoldsearch = TRUE; /* does search ignore case? */
static int re_doreplace(RSIZE, char *);
static int re_forwsrch(void);
static int re_backsrch(void);
static int re_readpattern(char *);
static int killmatches(int);
static int countmatches(int);
/*
* Search forward.
* Get a search string from the user and search for it starting at ".". If
* found, move "." to just after the matched characters. display does all
* the hard stuff. If not found, it just prints a message.
*/
/* ARGSUSED */
int
re_forwsearch(int f, int n)
{
int s;
if ((s = re_readpattern("RE Search")) != TRUE)
return (s);
if (re_forwsrch() == FALSE) {
dobeep();
ewprintf("Search failed: \"%s\"", re_pat);
return (FALSE);
}
re_srch_lastdir = SRCH_FORW;
return (TRUE);
}
/*
* Reverse search.
* Get a search string from the user, and search, starting at "."
* and proceeding toward the front of the buffer. If found "." is left
* pointing at the first character of the pattern [the last character that
* was matched].
*/
/* ARGSUSED */
int
re_backsearch(int f, int n)
{
int s;
if ((s = re_readpattern("RE Search backward")) != TRUE)
return (s);
if (re_backsrch() == FALSE) {
dobeep();
ewprintf("Search failed: \"%s\"", re_pat);
return (FALSE);
}
re_srch_lastdir = SRCH_BACK;
return (TRUE);
}
/*
* Search again, using the same search string and direction as the last search
* command. The direction has been saved in "srch_lastdir", so you know which
* way to go.
*
* XXX: This code has problems -- some incompatibility(?) with extend.c causes
* match to fail when it should not.
*/
/* ARGSUSED */
int
re_searchagain(int f, int n)
{
if (re_srch_lastdir == SRCH_NOPR) {
dobeep();
ewprintf("No last search");
return (FALSE);
}
if (re_srch_lastdir == SRCH_FORW) {
if (re_forwsrch() == FALSE) {
dobeep();
ewprintf("Search failed: \"%s\"", re_pat);
return (FALSE);
}
return (TRUE);
}
if (re_srch_lastdir == SRCH_BACK)
if (re_backsrch() == FALSE) {
dobeep();
ewprintf("Search failed: \"%s\"", re_pat);
return (FALSE);
}
return (TRUE);
}
/* Compiled regex goes here-- changed only when new pattern read */
static regex_t regex_buff;
static regmatch_t regex_match[RE_NMATCH];
/*
* Re-Query Replace.
* Replace strings selectively. Does a search and replace operation.
*/
/* ARGSUSED */
int
re_queryrepl(int f, int n)
{
int rcnt = 0; /* replacements made so far */
int plen, s; /* length of found string */
char news[NPAT]; /* replacement string */
if ((s = re_readpattern("RE Query replace")) != TRUE)
return (s);
if (eread("Query replace %s with: ", news, NPAT,
EFNUL | EFNEW | EFCR, re_pat) == NULL)
return (ABORT);
ewprintf("Query replacing %s with %s:", re_pat, news);
/*
* Search forward repeatedly, checking each time whether to insert
* or not. The "!" case makes the check always true, so it gets put
* into a tighter loop for efficiency.
*/
while (re_forwsrch() == TRUE) {
retry:
update(CMODE);
switch (getkey(FALSE)) {
case ' ':
plen = regex_match[0].rm_eo - regex_match[0].rm_so;
if (re_doreplace((RSIZE)plen, news) == FALSE)
return (FALSE);
rcnt++;
break;
case '.':
plen = regex_match[0].rm_eo - regex_match[0].rm_so;
if (re_doreplace((RSIZE)plen, news) == FALSE)
return (FALSE);
rcnt++;
goto stopsearch;
case CCHR('G'): /* ^G */
(void)ctrlg(FFRAND, 0);
goto stopsearch;
case CCHR('['): /* ESC */
case '`':
goto stopsearch;
case '!':
do {
plen = regex_match[0].rm_eo - regex_match[0].rm_so;
if (re_doreplace((RSIZE)plen, news) == FALSE)
return (FALSE);
rcnt++;
} while (re_forwsrch() == TRUE);
goto stopsearch;
case CCHR('?'): /* To not replace */
break;
default:
ewprintf("<SP> replace, [.] rep-end, <DEL> don't, [!] repl rest <ESC> quit");
goto retry;
}
}
stopsearch:
curwp->w_rflag |= WFFULL;
update(CMODE);
if (!inmacro) {
if (rcnt == 0)
ewprintf("(No replacements done)");
else if (rcnt == 1)
ewprintf("(1 replacement done)");
else
ewprintf("(%d replacements done)", rcnt);
}
return (TRUE);
}
/*
* Routine re_doreplace calls lreplace to make replacements needed by
* re_query replace. Its reason for existence is to deal with \1, \2. etc.
* plen: length to remove
* st: replacement string
*/
static int
re_doreplace(RSIZE plen, char *st)
{
int j, k, s, more, num, state;
struct line *clp;
char repstr[REPLEN];
clp = curwp->w_dotp;
more = TRUE;
j = 0;
state = 0;
num = 0;
/* The following FSA parses the replacement string */
while (more) {
switch (state) {
case 0:
if (*st == '\\') {
st++;
state = 1;
} else if (*st == '\0')
more = FALSE;
else {
repstr[j] = *st;
j++;
if (j >= REPLEN)
return (FALSE);
st++;
}
break;
case 1:
if (*st >= '0' && *st <= '9') {
num = *st - '0';
st++;
state = 2;
} else if (*st == '\0')
more = FALSE;
else {
repstr[j] = *st;
j++;
if (j >= REPLEN)
return (FALSE);
st++;
state = 0;
}
break;
case 2:
if (*st >= '0' && *st <= '9') {
num = 10 * num + *st - '0';
st++;
} else {
if (num >= RE_NMATCH)
return (FALSE);
k = regex_match[num].rm_eo - regex_match[num].rm_so;
if (j + k >= REPLEN)
return (FALSE);
bcopy(&(clp->l_text[regex_match[num].rm_so]),
&repstr[j], k);
j += k;
if (*st == '\0')
more = FALSE;
if (*st == '\\') {
st++;
state = 1;
} else {
repstr[j] = *st;
j++;
if (j >= REPLEN)
return (FALSE);
st++;
state = 0;
}
}
break;
} /* switch (state) */
} /* while (more) */
repstr[j] = '\0';
s = lreplace(plen, repstr);
return (s);
}
/*
* This routine does the real work of a forward search. The pattern is
* sitting in the external variable "pat". If found, dot is updated, the
* window system is notified of the change, and TRUE is returned. If the
* string isn't found, FALSE is returned.
*/
static int
re_forwsrch(void)
{
int re_flags, tbo, tdotline, error;
struct line *clp;
clp = curwp->w_dotp;
tbo = curwp->w_doto;
tdotline = curwp->w_dotline;
if (tbo == clp->l_used)
/*
* Don't start matching past end of line -- must move to
* beginning of next line, unless line is empty or at
* end of file.
*/
if (clp != curbp->b_headp && llength(clp) != 0) {
clp = lforw(clp);
tdotline++;
tbo = 0;
}
/*
* Note this loop does not process the last line, but this editor
* always makes the last line empty so this is good.
*/
while (clp != (curbp->b_headp)) {
re_flags = REG_STARTEND;
if (tbo != 0)
re_flags |= REG_NOTBOL;
regex_match[0].rm_so = tbo;
regex_match[0].rm_eo = llength(clp);
error = regexec(®ex_buff, ltext(clp) ? ltext(clp) : "",
RE_NMATCH, regex_match, re_flags);
if (error != 0) {
clp = lforw(clp);
tdotline++;
tbo = 0;
} else {
curwp->w_doto = regex_match[0].rm_eo;
curwp->w_dotp = clp;
curwp->w_dotline = tdotline;
curwp->w_rflag |= WFMOVE;
return (TRUE);
}
}
return (FALSE);
}
/*
* This routine does the real work of a backward search. The pattern is sitting
* in the external variable "re_pat". If found, dot is updated, the window
* system is notified of the change, and TRUE is returned. If the string isn't
* found, FALSE is returned.
*/
static int
re_backsrch(void)
{
struct line *clp;
int tbo, tdotline;
regmatch_t lastmatch;
clp = curwp->w_dotp;
tbo = curwp->w_doto;
tdotline = curwp->w_dotline;
/* Start search one position to the left of dot */
tbo = tbo - 1;
if (tbo < 0) {
/* must move up one line */
clp = lback(clp);
tdotline--;
tbo = llength(clp);
}
/*
* Note this loop does not process the last line, but this editor
* always makes the last line empty so this is good.
*/
while (clp != (curbp->b_headp)) {
regex_match[0].rm_so = 0;
regex_match[0].rm_eo = llength(clp);
lastmatch.rm_so = -1;
/*
* Keep searching until we don't match any longer. Assumes a
* non-match does not modify the regex_match array. We have to
* do this character-by-character after the first match since
* POSIX regexps don't give you a way to do reverse matches.
*/
while (!regexec(®ex_buff, ltext(clp) ? ltext(clp) : "",
RE_NMATCH, regex_match, REG_STARTEND) &&
regex_match[0].rm_so <= tbo) {
memcpy(&lastmatch, ®ex_match[0], sizeof(regmatch_t));
regex_match[0].rm_so++;
regex_match[0].rm_eo = llength(clp);
}
if (lastmatch.rm_so == -1) {
clp = lback(clp);
tdotline--;
tbo = llength(clp);
} else {
memcpy(®ex_match[0], &lastmatch, sizeof(regmatch_t));
curwp->w_doto = regex_match[0].rm_so;
curwp->w_dotp = clp;
curwp->w_dotline = tdotline;
curwp->w_rflag |= WFMOVE;
return (TRUE);
}
}
return (FALSE);
}
/*
* Read a pattern.
* Stash it in the external variable "re_pat". The "pat" is
* not updated if the user types in an empty line. If the user typed
* an empty line, and there is no old pattern, it is an error.
* Display the old pattern, in the style of Jeff Lomicka. There is
* some do-it-yourself control expansion.
*/
static int
re_readpattern(char *re_prompt)
{
static int dofree = 0;
int flags, error, s;
char tpat[NPAT], *rep;
if (re_pat[0] == '\0')
rep = eread("%s: ", tpat, NPAT, EFNEW | EFCR, re_prompt);
else
rep = eread("%s (default %s): ", tpat, NPAT,
EFNUL | EFNEW | EFCR, re_prompt, re_pat);
if (rep == NULL)
return (ABORT);
if (rep[0] != '\0') {
/* New pattern given */
(void)strlcpy(re_pat, tpat, sizeof(re_pat));
if (casefoldsearch)
flags = REG_EXTENDED | REG_ICASE;
else
flags = REG_EXTENDED;
if (dofree)
regfree(®ex_buff);
error = regcomp(®ex_buff, re_pat, flags);
if (error != 0) {
char message[256];
regerror(error, ®ex_buff, message, sizeof(message));
dobeep();
ewprintf("Regex Error: %s", message);
re_pat[0] = '\0';
return (FALSE);
}
dofree = 1;
s = TRUE;
} else if (rep[0] == '\0' && re_pat[0] != '\0')
/* Just using old pattern */
s = TRUE;
else
s = FALSE;
return (s);
}
/*
* Cause case to not matter in searches. This is the default. If called
* with argument cause case to matter.
*/
/* ARGSUSED*/
int
setcasefold(int f, int n)
{
if (f & FFARG) {
casefoldsearch = FALSE;
ewprintf("Case-fold-search unset");
} else {
casefoldsearch = TRUE;
ewprintf("Case-fold-search set");
}
/*
* Invalidate the regular expression pattern since I'm too lazy to
* recompile it.
*/
re_pat[0] = '\0';
return (TRUE);
}
/*
* Delete all lines after dot that contain a string matching regex.
*/
/* ARGSUSED */
int
delmatchlines(int f, int n)
{
int s;
if ((s = re_readpattern("Flush lines (containing match for regexp)"))
!= TRUE)
return (s);
s = killmatches(TRUE);
return (s);
}
/*
* Delete all lines after dot that don't contain a string matching regex.
*/
/* ARGSUSED */
int
delnonmatchlines(int f, int n)
{
int s;
if ((s = re_readpattern("Keep lines (containing match for regexp)"))
!= TRUE)
return (s);
s = killmatches(FALSE);
return (s);
}
/*
* This function does the work of deleting matching lines.
*/
static int
killmatches(int cond)
{
int s, error;
int count = 0;
struct line *clp;
clp = curwp->w_dotp;
if (curwp->w_doto == llength(clp))
/* Consider dot on next line */
clp = lforw(clp);
while (clp != (curbp->b_headp)) {
/* see if line matches */
regex_match[0].rm_so = 0;
regex_match[0].rm_eo = llength(clp);
error = regexec(®ex_buff, ltext(clp) ? ltext(clp) : "",
RE_NMATCH, regex_match, REG_STARTEND);
/* Delete line when appropriate */
if ((cond == FALSE && error) || (cond == TRUE && !error)) {
curwp->w_doto = 0;
curwp->w_dotp = clp;
count++;
s = ldelete(llength(clp) + 1, KNONE);
clp = curwp->w_dotp;
curwp->w_rflag |= WFMOVE;
if (s == FALSE)
return (FALSE);
} else
clp = lforw(clp);
}
ewprintf("%d line(s) deleted", count);
if (count > 0)
curwp->w_rflag |= WFMOVE;
return (TRUE);
}
/*
* Count lines matching regex.
*/
/* ARGSUSED */
int
cntmatchlines(int f, int n)
{
int s;
if ((s = re_readpattern("Count lines (matching regexp)")) != TRUE)
return (s);
s = countmatches(TRUE);
return (s);
}
/*
* Count lines that fail to match regex.
*/
/* ARGSUSED */
int
cntnonmatchlines(int f, int n)
{
int s;
if ((s = re_readpattern("Count lines (not matching regexp)")) != TRUE)
return (s);
s = countmatches(FALSE);
return (s);
}
/*
* This function does the work of counting matching lines.
*/
int
countmatches(int cond)
{
int error;
int count = 0;
struct line *clp;
clp = curwp->w_dotp;
if (curwp->w_doto == llength(clp))
/* Consider dot on next line */
clp = lforw(clp);
while (clp != (curbp->b_headp)) {
/* see if line matches */
regex_match[0].rm_so = 0;
regex_match[0].rm_eo = llength(clp);
error = regexec(®ex_buff, ltext(clp) ? ltext(clp) : "",
RE_NMATCH, regex_match, REG_STARTEND);
/* Count line when appropriate */
if ((cond == FALSE && error) || (cond == TRUE && !error))
count++;
clp = lforw(clp);
}
if (cond)
ewprintf("Number of lines matching: %d", count);
else
ewprintf("Number of lines not matching: %d", count);
return (TRUE);
}
#endif /* REGEX */