Annotation of src/usr.bin/mg/re_search.c, Revision 1.11
1.11 ! millert 1: /* $OpenBSD: re_search.c,v 1.10 2002/02/13 03:03:49 vincent Exp $ */
1.7 niklas 2:
1.1 deraadt 3: /*
1.6 millert 4: * regular expression search commands for Mg
1.1 deraadt 5: *
1.6 millert 6: * This file contains functions to implement several of gnuemacs's regular
7: * expression functions for Mg. Several of the routines below are just minor
8: * re-arrangements of Mg's non-regular expression search functions. Some of
1.9 mickey 9: * them are similar in structure to the original MicroEMACS, others are
1.6 millert 10: * modifications of Rich Ellison's code. Peter Newton re-wrote about half of
11: * them from scratch.
1.1 deraadt 12: */
13:
1.6 millert 14: #ifdef REGEX
1.2 millert 15: #include <sys/types.h>
16: #include <regex.h>
17:
1.6 millert 18: #include "def.h"
19: #include "macro.h"
1.1 deraadt 20:
1.6 millert 21: #define SRCH_BEGIN (0) /* search sub-codes */
1.1 deraadt 22: #define SRCH_FORW (-1)
23: #define SRCH_BACK (-2)
24: #define SRCH_NOPR (-3)
25: #define SRCH_ACCM (-4)
26: #define SRCH_MARK (-5)
27:
1.6 millert 28: #define RE_NMATCH 10 /* max number of matches */
29: #define REPLEN 256 /* max length of replacement string */
1.2 millert 30:
1.6 millert 31: char re_pat[NPAT]; /* regex pattern */
32: int re_srch_lastdir = SRCH_NOPR; /* last search flags */
33: int casefoldsearch = TRUE; /* does search ignore case? */
34:
1.11 ! millert 35: static int re_doreplace(RSIZE, char *, int);
! 36: static int re_forwsrch(void);
! 37: static int re_backsrch(void);
! 38: static int re_readpattern(char *);
! 39: static int killmatches(int);
! 40: static int countmatches(int);
1.1 deraadt 41:
42: /*
43: * Search forward.
1.9 mickey 44: * Get a search string from the user and search for it starting at ".". If
45: * found, move "." to just after the matched characters. display does all
1.6 millert 46: * the hard stuff. If not found, it just prints a message.
1.1 deraadt 47: */
1.5 millert 48: /* ARGSUSED */
1.6 millert 49: int
1.5 millert 50: re_forwsearch(f, n)
1.6 millert 51: int f, n;
1.5 millert 52: {
1.6 millert 53: int s;
1.1 deraadt 54:
1.5 millert 55: if ((s = re_readpattern("RE Search")) != TRUE)
1.1 deraadt 56: return (s);
57: if (re_forwsrch() == FALSE) {
58: ewprintf("Search failed: \"%s\"", re_pat);
59: return (FALSE);
60: }
61: re_srch_lastdir = SRCH_FORW;
62: return (TRUE);
63: }
64:
65: /*
66: * Reverse search.
67: * Get a search string from the user, and search, starting at "."
68: * and proceeding toward the front of the buffer. If found "." is left
69: * pointing at the first character of the pattern [the last character that
70: * was matched].
71: */
1.5 millert 72: /* ARGSUSED */
1.6 millert 73: int
1.5 millert 74: re_backsearch(f, n)
1.6 millert 75: int f, n;
1.5 millert 76: {
1.6 millert 77: int s;
1.1 deraadt 78:
1.5 millert 79: if ((s = re_readpattern("RE Search backward")) != TRUE)
1.1 deraadt 80: return (s);
81: if (re_backsrch() == FALSE) {
82: ewprintf("Search failed: \"%s\"", re_pat);
83: return (FALSE);
84: }
85: re_srch_lastdir = SRCH_BACK;
86: return (TRUE);
87: }
88:
89: /*
1.9 mickey 90: * Search again, using the same search string and direction as the last search
91: * command. The direction has been saved in "srch_lastdir", so you know which
1.6 millert 92: * way to go.
93: *
94: * XXX: This code has problems -- some incompatibility(?) with extend.c causes
95: * match to fail when it should not.
1.1 deraadt 96: */
1.5 millert 97: /* ARGSUSED */
1.6 millert 98: int
1.5 millert 99: re_searchagain(f, n)
1.6 millert 100: int f, n;
1.5 millert 101: {
102: if (re_srch_lastdir == SRCH_NOPR) {
103: ewprintf("No last search");
104: return (FALSE);
105: }
106: if (re_srch_lastdir == SRCH_FORW) {
107: if (re_forwsrch() == FALSE) {
108: ewprintf("Search failed: \"%s\"", re_pat);
109: return (FALSE);
110: }
111: return (TRUE);
112: }
1.9 mickey 113: if (re_srch_lastdir == SRCH_BACK)
1.5 millert 114: if (re_backsrch() == FALSE) {
115: ewprintf("Search failed: \"%s\"", re_pat);
116: return (FALSE);
117: }
1.6 millert 118:
119: return (TRUE);
1.1 deraadt 120: }
121:
122: /* Compiled regex goes here-- changed only when new pattern read */
1.6 millert 123: static regex_t re_buff;
124: static regmatch_t re_match[RE_NMATCH];
1.1 deraadt 125:
126: /*
127: * Re-Query Replace.
128: * Replace strings selectively. Does a search and replace operation.
129: */
1.5 millert 130: /* ARGSUSED */
1.6 millert 131: int
1.5 millert 132: re_queryrepl(f, n)
1.6 millert 133: int f, n;
1.5 millert 134: {
1.6 millert 135: int s;
136: int rcnt = 0; /* replacements made so far */
137: int plen; /* length of found string */
138: char news[NPAT]; /* replacement string */
1.1 deraadt 139:
140: /* Casefold check */
1.5 millert 141: if (!casefoldsearch)
142: f = TRUE;
1.1 deraadt 143:
1.5 millert 144: if ((s = re_readpattern("RE Query replace")) != TRUE)
1.1 deraadt 145: return (s);
1.9 mickey 146: if ((s =
1.6 millert 147: ereply("Query replace %s with: ", news, NPAT, re_pat)) == ABORT)
1.1 deraadt 148: return (s);
149: if (s == FALSE)
150: news[0] = '\0';
151: ewprintf("Query replacing %s with %s:", re_pat, news);
152:
153: /*
154: * Search forward repeatedly, checking each time whether to insert
155: * or not. The "!" case makes the check always true, so it gets put
156: * into a tighter loop for efficiency.
157: */
158: while (re_forwsrch() == TRUE) {
1.5 millert 159: retry:
1.1 deraadt 160: update();
161: switch (getkey(FALSE)) {
162: case ' ':
1.2 millert 163: plen = re_match[0].rm_eo - re_match[0].rm_so;
1.6 millert 164: if (re_doreplace((RSIZE)plen, news, f) == FALSE)
1.1 deraadt 165: return (FALSE);
166: rcnt++;
167: break;
168:
169: case '.':
1.2 millert 170: plen = re_match[0].rm_eo - re_match[0].rm_so;
1.6 millert 171: if (re_doreplace((RSIZE)plen, news, f) == FALSE)
1.1 deraadt 172: return (FALSE);
173: rcnt++;
174: goto stopsearch;
175:
1.6 millert 176: case CCHR('G'): /* ^G */
1.8 art 177: (void)ctrlg(FFRAND, 0);
1.6 millert 178: case CCHR('['): /* ESC */
1.1 deraadt 179: case '`':
180: goto stopsearch;
181: case '!':
182: do {
1.2 millert 183: plen = re_match[0].rm_eo - re_match[0].rm_so;
1.6 millert 184: if (re_doreplace((RSIZE)plen, news, f) == FALSE)
1.1 deraadt 185: return (FALSE);
186: rcnt++;
187: } while (re_forwsrch() == TRUE);
188: goto stopsearch;
189:
1.6 millert 190: case CCHR('?'): /* To not replace */
1.1 deraadt 191: break;
192:
193: default:
1.5 millert 194: ewprintf("<SP> replace, [.] rep-end, <DEL> don't, [!] repl rest <ESC> quit");
1.1 deraadt 195: goto retry;
196: }
197: }
1.6 millert 198:
1.1 deraadt 199: stopsearch:
200: curwp->w_flag |= WFHARD;
201: update();
202: if (!inmacro) {
203: if (rcnt == 0)
204: ewprintf("(No replacements done)");
205: else if (rcnt == 1)
206: ewprintf("(1 replacement done)");
207: else
208: ewprintf("(%d replacements done)", rcnt);
209: }
210: return TRUE;
211: }
212:
1.5 millert 213: /*
214: * Routine re_doreplace calls lreplace to make replacements needed by
215: * re_query replace. Its reason for existence is to deal with \1, \2. etc.
1.1 deraadt 216: */
1.6 millert 217: static int
1.1 deraadt 218: re_doreplace(plen, st, f)
1.9 mickey 219: RSIZE plen; /* length to remove */
220: char *st; /* replacement string */
221: int f; /* case hack disable */
1.6 millert 222: {
223: int j, k, s, more, num, state;
224: LINE *clp;
225: char repstr[REPLEN];
1.5 millert 226:
227: clp = curwp->w_dotp;
228: more = TRUE;
229: j = 0;
230: state = 0;
1.6 millert 231: num = 0;
1.5 millert 232:
233: /* The following FSA parses the replacement string */
234: while (more) {
235: switch (state) {
236: case 0:
237: if (*st == '\\') {
238: st++;
239: state = 1;
240: } else if (*st == '\0')
241: more = FALSE;
242: else {
243: repstr[j] = *st;
244: j++;
245: if (j >= REPLEN)
246: return (FALSE);
247: st++;
248: }
249: break;
250: case 1:
251: if (*st >= '0' && *st <= '9') {
252: num = *st - '0';
253: st++;
254: state = 2;
255: } else if (*st == '\0')
256: more = FALSE;
257: else {
258: repstr[j] = *st;
259: j++;
260: if (j >= REPLEN)
261: return (FALSE);
262: st++;
263: state = 0;
264: }
265: break;
266: case 2:
267: if (*st >= '0' && *st <= '9') {
268: num = 10 * num + *st - '0';
269: st++;
270: } else {
271: if (num >= RE_NMATCH)
272: return (FALSE);
273: k = re_match[num].rm_eo - re_match[num].rm_so;
274: if (j + k >= REPLEN)
275: return (FALSE);
1.9 mickey 276: bcopy(&(clp->l_text[re_match[num].rm_so]),
1.6 millert 277: &repstr[j], k);
1.5 millert 278: j += k;
279: if (*st == '\0')
280: more = FALSE;
281: if (*st == '\\') {
282: st++;
283: state = 1;
284: } else {
285: repstr[j] = *st;
286: j++;
287: if (j >= REPLEN)
288: return (FALSE);
289: st++;
290: state = 0;
291: }
292: }
293: break;
1.6 millert 294: } /* switch (state) */
295: } /* while (more) */
1.1 deraadt 296:
1.5 millert 297: repstr[j] = '\0';
298: s = lreplace(plen, repstr, f);
299: return (s);
1.1 deraadt 300: }
301:
302:
303:
304: /*
1.9 mickey 305: * This routine does the real work of a forward search. The pattern is
306: * sitting in the external variable "pat". If found, dot is updated, the
1.6 millert 307: * window system is notified of the change, and TRUE is returned. If the
1.1 deraadt 308: * string isn't found, FALSE is returned.
309: */
1.6 millert 310: static int
1.5 millert 311: re_forwsrch()
312: {
1.6 millert 313: int tbo, error;
314: LINE *clp;
1.5 millert 315:
316: clp = curwp->w_dotp;
317: tbo = curwp->w_doto;
318:
319: if (tbo == clp->l_used)
320: /*
1.6 millert 321: * Don't start matching past end of line -- must move to
322: * beginning of next line, unless at end of file.
1.5 millert 323: */
324: if (clp != curbp->b_linep) {
325: clp = lforw(clp);
326: tbo = 0;
327: }
328: /*
329: * Note this loop does not process the last line, but this editor
330: * always makes the last line empty so this is good.
331: */
332: while (clp != (curbp->b_linep)) {
333: re_match[0].rm_so = tbo;
334: re_match[0].rm_eo = llength(clp);
1.9 mickey 335: error = regexec(&re_buff, ltext(clp), RE_NMATCH, re_match,
1.6 millert 336: REG_STARTEND);
337: if (error != 0) {
1.5 millert 338: clp = lforw(clp);
339: tbo = 0;
340: } else {
341: curwp->w_doto = re_match[0].rm_eo;
342: curwp->w_dotp = clp;
343: curwp->w_flag |= WFMOVE;
344: return (TRUE);
345: }
346: }
347: return (FALSE);
1.1 deraadt 348: }
349:
350:
351: /*
1.6 millert 352: * This routine does the real work of a backward search. The pattern is sitting
1.9 mickey 353: * in the external variable "re_pat". If found, dot is updated, the window
354: * system is notified of the change, and TRUE is returned. If the string isn't
1.6 millert 355: * found, FALSE is returned.
1.1 deraadt 356: */
1.6 millert 357: static int
1.5 millert 358: re_backsrch()
359: {
1.6 millert 360: LINE *clp;
361: int tbo;
362: regmatch_t lastmatch;
1.5 millert 363:
364: clp = curwp->w_dotp;
365: tbo = curwp->w_doto;
366:
367: /* Start search one position to the left of dot */
368: tbo = tbo - 1;
369: if (tbo < 0) {
370: /* must move up one line */
371: clp = lback(clp);
372: tbo = llength(clp);
373: }
1.6 millert 374:
1.5 millert 375: /*
376: * Note this loop does not process the last line, but this editor
377: * always makes the last line empty so this is good.
378: */
379: while (clp != (curbp->b_linep)) {
380: re_match[0].rm_so = 0;
381: re_match[0].rm_eo = llength(clp);
382: lastmatch.rm_so = -1;
383: /*
384: * Keep searching until we don't match any longer. Assumes a
385: * non-match does not modify the re_match array. We have to
386: * do this character-by-character after the first match since
387: * POSIX regexps don't give you a way to do reverse matches.
388: */
389: while (!regexec(&re_buff, ltext(clp), RE_NMATCH, re_match,
390: REG_STARTEND) && re_match[0].rm_so < tbo) {
391: memcpy(&lastmatch, &re_match[0], sizeof(regmatch_t));
392: re_match[0].rm_so++;
393: re_match[0].rm_eo = llength(clp);
394: }
395: if (lastmatch.rm_so == -1) {
396: clp = lback(clp);
397: tbo = llength(clp);
398: } else {
399: memcpy(&re_match[0], &lastmatch, sizeof(regmatch_t));
400: curwp->w_doto = re_match[0].rm_so;
401: curwp->w_dotp = clp;
402: curwp->w_flag |= WFMOVE;
403: return (TRUE);
404: }
405: }
406: return (FALSE);
1.1 deraadt 407: }
408:
409:
410: /*
411: * Read a pattern.
412: * Stash it in the external variable "re_pat". The "pat" is
413: * not updated if the user types in an empty line. If the user typed
414: * an empty line, and there is no old pattern, it is an error.
415: * Display the old pattern, in the style of Jeff Lomicka. There is
416: * some do-it-yourself control expansion.
417: */
1.6 millert 418: static int
1.5 millert 419: re_readpattern(prompt)
1.6 millert 420: char *prompt;
1.5 millert 421: {
1.6 millert 422: int s, flags, error;
423: char tpat[NPAT];
424: static int dofree = 0;
1.5 millert 425:
426: if (re_pat[0] == '\0')
427: s = ereply("%s: ", tpat, NPAT, prompt);
428: else
429: s = ereply("%s: (default %s) ", tpat, NPAT, prompt, re_pat);
1.1 deraadt 430:
431: if (s == TRUE) {
1.5 millert 432: /* New pattern given */
1.10 vincent 433: (void)strlcpy(re_pat, tpat, sizeof re_pat);
1.5 millert 434: if (casefoldsearch)
435: flags = REG_EXTENDED | REG_ICASE;
436: else
437: flags = REG_EXTENDED;
438: if (dofree)
439: regfree(&re_buff);
440: error = regcomp(&re_buff, re_pat, flags);
1.6 millert 441: if (error != 0) {
442: char message[256];
1.5 millert 443: regerror(error, &re_buff, message, sizeof(message));
444: ewprintf("Regex Error: %s", message);
445: re_pat[0] = '\0';
446: return (FALSE);
447: }
448: dofree = 1;
449: } else if (s == FALSE && re_pat[0] != '\0')
450: /* Just using old pattern */
451: s = TRUE;
1.1 deraadt 452: return (s);
453: }
454:
1.5 millert 455: /*
456: * Cause case to not matter in searches. This is the default. If called
457: * with argument cause case to matter.
1.1 deraadt 458: */
1.6 millert 459: int
1.5 millert 460: setcasefold(f, n)
1.6 millert 461: int f, n;
1.5 millert 462: {
463: if (f & FFARG) {
464: casefoldsearch = FALSE;
465: ewprintf("Case-fold-search unset");
466: } else {
467: casefoldsearch = TRUE;
468: ewprintf("Case-fold-search set");
469: }
1.1 deraadt 470:
1.5 millert 471: /*
472: * Invalidate the regular expression pattern since I'm too lazy to
473: * recompile it.
474: */
475: re_pat[0] = '\0';
476: return (TRUE);
1.6 millert 477: }
1.1 deraadt 478:
479:
1.5 millert 480: /*
481: * Delete all lines after dot that contain a string matching regex
1.1 deraadt 482: */
1.6 millert 483: int
1.5 millert 484: delmatchlines(f, n)
1.6 millert 485: int f, n;
1.5 millert 486: {
1.6 millert 487: int s;
1.1 deraadt 488:
1.9 mickey 489: if ((s = re_readpattern("Flush lines (containing match for regexp)"))
1.6 millert 490: != TRUE)
1.5 millert 491: return (s);
1.1 deraadt 492:
1.5 millert 493: s = killmatches(TRUE);
494: return (s);
1.1 deraadt 495: }
496:
1.5 millert 497: /*
498: * Delete all lines after dot that don't contain a string matching regex
1.1 deraadt 499: */
1.6 millert 500: int
1.5 millert 501: delnonmatchlines(f, n)
1.6 millert 502: int f, n;
1.5 millert 503: {
1.6 millert 504: int s;
1.1 deraadt 505:
1.9 mickey 506: if ((s = re_readpattern("Keep lines (containing match for regexp)"))
1.6 millert 507: != TRUE)
1.5 millert 508: return (s);
1.1 deraadt 509:
1.5 millert 510: s = killmatches(FALSE);
511: return (s);
1.1 deraadt 512: }
513:
1.9 mickey 514: /*
515: * This function does the work of deleting matching lines
1.6 millert 516: */
517: static int
1.1 deraadt 518: killmatches(cond)
1.6 millert 519: int cond;
1.1 deraadt 520: {
1.6 millert 521: int s, error;
522: int count = 0;
523: LINE *clp;
1.5 millert 524:
525: clp = curwp->w_dotp;
526: if (curwp->w_doto == llength(clp))
527: /* Consider dot on next line */
528: clp = lforw(clp);
529:
530: while (clp != (curbp->b_linep)) {
531: /* see if line matches */
532: re_match[0].rm_so = 0;
533: re_match[0].rm_eo = llength(clp);
1.9 mickey 534: error = regexec(&re_buff, ltext(clp), RE_NMATCH, re_match,
1.6 millert 535: REG_STARTEND);
1.5 millert 536:
537: /* Delete line when appropriate */
538: if ((cond == FALSE && error) || (cond == TRUE && !error)) {
539: curwp->w_doto = 0;
540: curwp->w_dotp = clp;
541: count++;
542: s = ldelete(llength(clp) + 1, KNONE);
543: clp = curwp->w_dotp;
544: curwp->w_flag |= WFMOVE;
545: if (s == FALSE)
546: return (FALSE);
547: } else
548: clp = lforw(clp);
549: }
1.1 deraadt 550:
1.5 millert 551: ewprintf("%d line(s) deleted", count);
552: if (count > 0)
553: curwp->w_flag |= WFMOVE;
1.1 deraadt 554:
1.5 millert 555: return (TRUE);
1.1 deraadt 556: }
557:
1.5 millert 558: /*
559: * Count lines matching regex
1.1 deraadt 560: */
1.6 millert 561: int
1.5 millert 562: cntmatchlines(f, n)
1.6 millert 563: int f, n;
1.5 millert 564: {
1.6 millert 565: int s;
1.1 deraadt 566:
1.5 millert 567: if ((s = re_readpattern("Count lines (matching regexp)")) != TRUE)
568: return (s);
569: s = countmatches(TRUE);
570: return (s);
1.1 deraadt 571: }
572:
1.5 millert 573: /*
574: * Count lines that fail to match regex
1.1 deraadt 575: */
1.6 millert 576: int
1.5 millert 577: cntnonmatchlines(f, n)
1.6 millert 578: int f, n;
1.5 millert 579: {
1.6 millert 580: int s;
1.1 deraadt 581:
1.5 millert 582: if ((s = re_readpattern("Count lines (not matching regexp)")) != TRUE)
583: return (s);
1.1 deraadt 584:
1.5 millert 585: s = countmatches(FALSE);
1.1 deraadt 586:
1.5 millert 587: return (s);
1.1 deraadt 588: }
589:
1.6 millert 590: /*
591: * This function does the work of counting matching lines.
592: */
593: int
1.1 deraadt 594: countmatches(cond)
1.6 millert 595: int cond;
1.1 deraadt 596: {
1.6 millert 597: int error;
598: int count = 0;
599: LINE *clp;
1.5 millert 600:
601: clp = curwp->w_dotp;
602: if (curwp->w_doto == llength(clp))
603: /* Consider dot on next line */
604: clp = lforw(clp);
605:
606: while (clp != (curbp->b_linep)) {
607: /* see if line matches */
608: re_match[0].rm_so = 0;
609: re_match[0].rm_eo = llength(clp);
1.9 mickey 610: error = regexec(&re_buff, ltext(clp), RE_NMATCH, re_match,
1.6 millert 611: REG_STARTEND);
1.5 millert 612:
613: /* Count line when appropriate */
614: if ((cond == FALSE && error) || (cond == TRUE && !error))
615: count++;
616: clp = lforw(clp);
617: }
1.1 deraadt 618:
1.5 millert 619: if (cond)
620: ewprintf("Number of lines matching: %d", count);
621: else
622: ewprintf("Number of lines not matching: %d", count);
623:
624: return (TRUE);
1.1 deraadt 625: }
1.6 millert 626: #endif /* REGEX */