File: [local] / src / usr.bin / mg / re_search.c (download)
Revision 1.11, Sat Feb 16 21:27:49 2002 UTC (22 years, 4 months ago) by millert
Branch: MAIN
CVS Tags: OPENBSD_3_3_BASE, OPENBSD_3_3, OPENBSD_3_2_BASE, OPENBSD_3_2, OPENBSD_3_1_BASE, OPENBSD_3_1 Changes since 1.10: +7 -7 lines
Part one of userland __P removal. Done with a simple regexp with some minor hand editing to make comments line up correctly. Another pass is forthcoming that handles the cases that could not be done automatically.
|
/* $OpenBSD: re_search.c,v 1.11 2002/02/16 21:27:49 millert Exp $ */
/*
* regular expression search commands for Mg
*
* This file contains functions to implement several of gnuemacs's regular
* expression functions for Mg. Several of the routines below are just minor
* re-arrangements of Mg's non-regular expression search functions. Some of
* them are similar in structure to the original MicroEMACS, others are
* modifications of Rich Ellison's code. Peter Newton re-wrote about half of
* them from scratch.
*/
#ifdef REGEX
#include <sys/types.h>
#include <regex.h>
#include "def.h"
#include "macro.h"
#define SRCH_BEGIN (0) /* search sub-codes */
#define SRCH_FORW (-1)
#define SRCH_BACK (-2)
#define SRCH_NOPR (-3)
#define SRCH_ACCM (-4)
#define SRCH_MARK (-5)
#define RE_NMATCH 10 /* max number of matches */
#define REPLEN 256 /* max length of replacement string */
char re_pat[NPAT]; /* regex pattern */
int re_srch_lastdir = SRCH_NOPR; /* last search flags */
int casefoldsearch = TRUE; /* does search ignore case? */
static int re_doreplace(RSIZE, char *, int);
static int re_forwsrch(void);
static int re_backsrch(void);
static int re_readpattern(char *);
static int killmatches(int);
static int countmatches(int);
/*
* Search forward.
* Get a search string from the user and search for it starting at ".". If
* found, move "." to just after the matched characters. display does all
* the hard stuff. If not found, it just prints a message.
*/
/* ARGSUSED */
int
re_forwsearch(f, n)
int f, n;
{
int s;
if ((s = re_readpattern("RE Search")) != TRUE)
return (s);
if (re_forwsrch() == FALSE) {
ewprintf("Search failed: \"%s\"", re_pat);
return (FALSE);
}
re_srch_lastdir = SRCH_FORW;
return (TRUE);
}
/*
* Reverse search.
* Get a search string from the user, and search, starting at "."
* and proceeding toward the front of the buffer. If found "." is left
* pointing at the first character of the pattern [the last character that
* was matched].
*/
/* ARGSUSED */
int
re_backsearch(f, n)
int f, n;
{
int s;
if ((s = re_readpattern("RE Search backward")) != TRUE)
return (s);
if (re_backsrch() == FALSE) {
ewprintf("Search failed: \"%s\"", re_pat);
return (FALSE);
}
re_srch_lastdir = SRCH_BACK;
return (TRUE);
}
/*
* Search again, using the same search string and direction as the last search
* command. The direction has been saved in "srch_lastdir", so you know which
* way to go.
*
* XXX: This code has problems -- some incompatibility(?) with extend.c causes
* match to fail when it should not.
*/
/* ARGSUSED */
int
re_searchagain(f, n)
int f, n;
{
if (re_srch_lastdir == SRCH_NOPR) {
ewprintf("No last search");
return (FALSE);
}
if (re_srch_lastdir == SRCH_FORW) {
if (re_forwsrch() == FALSE) {
ewprintf("Search failed: \"%s\"", re_pat);
return (FALSE);
}
return (TRUE);
}
if (re_srch_lastdir == SRCH_BACK)
if (re_backsrch() == FALSE) {
ewprintf("Search failed: \"%s\"", re_pat);
return (FALSE);
}
return (TRUE);
}
/* Compiled regex goes here-- changed only when new pattern read */
static regex_t re_buff;
static regmatch_t re_match[RE_NMATCH];
/*
* Re-Query Replace.
* Replace strings selectively. Does a search and replace operation.
*/
/* ARGSUSED */
int
re_queryrepl(f, n)
int f, n;
{
int s;
int rcnt = 0; /* replacements made so far */
int plen; /* length of found string */
char news[NPAT]; /* replacement string */
/* Casefold check */
if (!casefoldsearch)
f = TRUE;
if ((s = re_readpattern("RE Query replace")) != TRUE)
return (s);
if ((s =
ereply("Query replace %s with: ", news, NPAT, re_pat)) == ABORT)
return (s);
if (s == FALSE)
news[0] = '\0';
ewprintf("Query replacing %s with %s:", re_pat, news);
/*
* Search forward repeatedly, checking each time whether to insert
* or not. The "!" case makes the check always true, so it gets put
* into a tighter loop for efficiency.
*/
while (re_forwsrch() == TRUE) {
retry:
update();
switch (getkey(FALSE)) {
case ' ':
plen = re_match[0].rm_eo - re_match[0].rm_so;
if (re_doreplace((RSIZE)plen, news, f) == FALSE)
return (FALSE);
rcnt++;
break;
case '.':
plen = re_match[0].rm_eo - re_match[0].rm_so;
if (re_doreplace((RSIZE)plen, news, f) == FALSE)
return (FALSE);
rcnt++;
goto stopsearch;
case CCHR('G'): /* ^G */
(void)ctrlg(FFRAND, 0);
case CCHR('['): /* ESC */
case '`':
goto stopsearch;
case '!':
do {
plen = re_match[0].rm_eo - re_match[0].rm_so;
if (re_doreplace((RSIZE)plen, news, f) == FALSE)
return (FALSE);
rcnt++;
} while (re_forwsrch() == TRUE);
goto stopsearch;
case CCHR('?'): /* To not replace */
break;
default:
ewprintf("<SP> replace, [.] rep-end, <DEL> don't, [!] repl rest <ESC> quit");
goto retry;
}
}
stopsearch:
curwp->w_flag |= WFHARD;
update();
if (!inmacro) {
if (rcnt == 0)
ewprintf("(No replacements done)");
else if (rcnt == 1)
ewprintf("(1 replacement done)");
else
ewprintf("(%d replacements done)", rcnt);
}
return TRUE;
}
/*
* Routine re_doreplace calls lreplace to make replacements needed by
* re_query replace. Its reason for existence is to deal with \1, \2. etc.
*/
static int
re_doreplace(plen, st, f)
RSIZE plen; /* length to remove */
char *st; /* replacement string */
int f; /* case hack disable */
{
int j, k, s, more, num, state;
LINE *clp;
char repstr[REPLEN];
clp = curwp->w_dotp;
more = TRUE;
j = 0;
state = 0;
num = 0;
/* The following FSA parses the replacement string */
while (more) {
switch (state) {
case 0:
if (*st == '\\') {
st++;
state = 1;
} else if (*st == '\0')
more = FALSE;
else {
repstr[j] = *st;
j++;
if (j >= REPLEN)
return (FALSE);
st++;
}
break;
case 1:
if (*st >= '0' && *st <= '9') {
num = *st - '0';
st++;
state = 2;
} else if (*st == '\0')
more = FALSE;
else {
repstr[j] = *st;
j++;
if (j >= REPLEN)
return (FALSE);
st++;
state = 0;
}
break;
case 2:
if (*st >= '0' && *st <= '9') {
num = 10 * num + *st - '0';
st++;
} else {
if (num >= RE_NMATCH)
return (FALSE);
k = re_match[num].rm_eo - re_match[num].rm_so;
if (j + k >= REPLEN)
return (FALSE);
bcopy(&(clp->l_text[re_match[num].rm_so]),
&repstr[j], k);
j += k;
if (*st == '\0')
more = FALSE;
if (*st == '\\') {
st++;
state = 1;
} else {
repstr[j] = *st;
j++;
if (j >= REPLEN)
return (FALSE);
st++;
state = 0;
}
}
break;
} /* switch (state) */
} /* while (more) */
repstr[j] = '\0';
s = lreplace(plen, repstr, f);
return (s);
}
/*
* This routine does the real work of a forward search. The pattern is
* sitting in the external variable "pat". If found, dot is updated, the
* window system is notified of the change, and TRUE is returned. If the
* string isn't found, FALSE is returned.
*/
static int
re_forwsrch()
{
int tbo, error;
LINE *clp;
clp = curwp->w_dotp;
tbo = curwp->w_doto;
if (tbo == clp->l_used)
/*
* Don't start matching past end of line -- must move to
* beginning of next line, unless at end of file.
*/
if (clp != curbp->b_linep) {
clp = lforw(clp);
tbo = 0;
}
/*
* Note this loop does not process the last line, but this editor
* always makes the last line empty so this is good.
*/
while (clp != (curbp->b_linep)) {
re_match[0].rm_so = tbo;
re_match[0].rm_eo = llength(clp);
error = regexec(&re_buff, ltext(clp), RE_NMATCH, re_match,
REG_STARTEND);
if (error != 0) {
clp = lforw(clp);
tbo = 0;
} else {
curwp->w_doto = re_match[0].rm_eo;
curwp->w_dotp = clp;
curwp->w_flag |= WFMOVE;
return (TRUE);
}
}
return (FALSE);
}
/*
* This routine does the real work of a backward search. The pattern is sitting
* in the external variable "re_pat". If found, dot is updated, the window
* system is notified of the change, and TRUE is returned. If the string isn't
* found, FALSE is returned.
*/
static int
re_backsrch()
{
LINE *clp;
int tbo;
regmatch_t lastmatch;
clp = curwp->w_dotp;
tbo = curwp->w_doto;
/* Start search one position to the left of dot */
tbo = tbo - 1;
if (tbo < 0) {
/* must move up one line */
clp = lback(clp);
tbo = llength(clp);
}
/*
* Note this loop does not process the last line, but this editor
* always makes the last line empty so this is good.
*/
while (clp != (curbp->b_linep)) {
re_match[0].rm_so = 0;
re_match[0].rm_eo = llength(clp);
lastmatch.rm_so = -1;
/*
* Keep searching until we don't match any longer. Assumes a
* non-match does not modify the re_match array. We have to
* do this character-by-character after the first match since
* POSIX regexps don't give you a way to do reverse matches.
*/
while (!regexec(&re_buff, ltext(clp), RE_NMATCH, re_match,
REG_STARTEND) && re_match[0].rm_so < tbo) {
memcpy(&lastmatch, &re_match[0], sizeof(regmatch_t));
re_match[0].rm_so++;
re_match[0].rm_eo = llength(clp);
}
if (lastmatch.rm_so == -1) {
clp = lback(clp);
tbo = llength(clp);
} else {
memcpy(&re_match[0], &lastmatch, sizeof(regmatch_t));
curwp->w_doto = re_match[0].rm_so;
curwp->w_dotp = clp;
curwp->w_flag |= WFMOVE;
return (TRUE);
}
}
return (FALSE);
}
/*
* Read a pattern.
* Stash it in the external variable "re_pat". The "pat" is
* not updated if the user types in an empty line. If the user typed
* an empty line, and there is no old pattern, it is an error.
* Display the old pattern, in the style of Jeff Lomicka. There is
* some do-it-yourself control expansion.
*/
static int
re_readpattern(prompt)
char *prompt;
{
int s, flags, error;
char tpat[NPAT];
static int dofree = 0;
if (re_pat[0] == '\0')
s = ereply("%s: ", tpat, NPAT, prompt);
else
s = ereply("%s: (default %s) ", tpat, NPAT, prompt, re_pat);
if (s == TRUE) {
/* New pattern given */
(void)strlcpy(re_pat, tpat, sizeof re_pat);
if (casefoldsearch)
flags = REG_EXTENDED | REG_ICASE;
else
flags = REG_EXTENDED;
if (dofree)
regfree(&re_buff);
error = regcomp(&re_buff, re_pat, flags);
if (error != 0) {
char message[256];
regerror(error, &re_buff, message, sizeof(message));
ewprintf("Regex Error: %s", message);
re_pat[0] = '\0';
return (FALSE);
}
dofree = 1;
} else if (s == FALSE && re_pat[0] != '\0')
/* Just using old pattern */
s = TRUE;
return (s);
}
/*
* Cause case to not matter in searches. This is the default. If called
* with argument cause case to matter.
*/
int
setcasefold(f, n)
int f, n;
{
if (f & FFARG) {
casefoldsearch = FALSE;
ewprintf("Case-fold-search unset");
} else {
casefoldsearch = TRUE;
ewprintf("Case-fold-search set");
}
/*
* Invalidate the regular expression pattern since I'm too lazy to
* recompile it.
*/
re_pat[0] = '\0';
return (TRUE);
}
/*
* Delete all lines after dot that contain a string matching regex
*/
int
delmatchlines(f, n)
int f, n;
{
int s;
if ((s = re_readpattern("Flush lines (containing match for regexp)"))
!= TRUE)
return (s);
s = killmatches(TRUE);
return (s);
}
/*
* Delete all lines after dot that don't contain a string matching regex
*/
int
delnonmatchlines(f, n)
int f, n;
{
int s;
if ((s = re_readpattern("Keep lines (containing match for regexp)"))
!= TRUE)
return (s);
s = killmatches(FALSE);
return (s);
}
/*
* This function does the work of deleting matching lines
*/
static int
killmatches(cond)
int cond;
{
int s, error;
int count = 0;
LINE *clp;
clp = curwp->w_dotp;
if (curwp->w_doto == llength(clp))
/* Consider dot on next line */
clp = lforw(clp);
while (clp != (curbp->b_linep)) {
/* see if line matches */
re_match[0].rm_so = 0;
re_match[0].rm_eo = llength(clp);
error = regexec(&re_buff, ltext(clp), RE_NMATCH, re_match,
REG_STARTEND);
/* Delete line when appropriate */
if ((cond == FALSE && error) || (cond == TRUE && !error)) {
curwp->w_doto = 0;
curwp->w_dotp = clp;
count++;
s = ldelete(llength(clp) + 1, KNONE);
clp = curwp->w_dotp;
curwp->w_flag |= WFMOVE;
if (s == FALSE)
return (FALSE);
} else
clp = lforw(clp);
}
ewprintf("%d line(s) deleted", count);
if (count > 0)
curwp->w_flag |= WFMOVE;
return (TRUE);
}
/*
* Count lines matching regex
*/
int
cntmatchlines(f, n)
int f, n;
{
int s;
if ((s = re_readpattern("Count lines (matching regexp)")) != TRUE)
return (s);
s = countmatches(TRUE);
return (s);
}
/*
* Count lines that fail to match regex
*/
int
cntnonmatchlines(f, n)
int f, n;
{
int s;
if ((s = re_readpattern("Count lines (not matching regexp)")) != TRUE)
return (s);
s = countmatches(FALSE);
return (s);
}
/*
* This function does the work of counting matching lines.
*/
int
countmatches(cond)
int cond;
{
int error;
int count = 0;
LINE *clp;
clp = curwp->w_dotp;
if (curwp->w_doto == llength(clp))
/* Consider dot on next line */
clp = lforw(clp);
while (clp != (curbp->b_linep)) {
/* see if line matches */
re_match[0].rm_so = 0;
re_match[0].rm_eo = llength(clp);
error = regexec(&re_buff, ltext(clp), RE_NMATCH, re_match,
REG_STARTEND);
/* Count line when appropriate */
if ((cond == FALSE && error) || (cond == TRUE && !error))
count++;
clp = lforw(clp);
}
if (cond)
ewprintf("Number of lines matching: %d", count);
else
ewprintf("Number of lines not matching: %d", count);
return (TRUE);
}
#endif /* REGEX */