Annotation of src/usr.bin/mg/re_search.c, Revision 1.15
1.15 ! kjell 1: /* $OpenBSD: re_search.c,v 1.14 2005/04/03 02:09:28 db Exp $ */
1.7 niklas 2:
1.1 deraadt 3: /*
1.6 millert 4: * regular expression search commands for Mg
1.1 deraadt 5: *
1.6 millert 6: * This file contains functions to implement several of gnuemacs's regular
7: * expression functions for Mg. Several of the routines below are just minor
8: * re-arrangements of Mg's non-regular expression search functions. Some of
1.9 mickey 9: * them are similar in structure to the original MicroEMACS, others are
1.6 millert 10: * modifications of Rich Ellison's code. Peter Newton re-wrote about half of
11: * them from scratch.
1.1 deraadt 12: */
13:
1.6 millert 14: #ifdef REGEX
1.2 millert 15: #include <sys/types.h>
16: #include <regex.h>
17:
1.6 millert 18: #include "def.h"
19: #include "macro.h"
1.1 deraadt 20:
1.6 millert 21: #define SRCH_BEGIN (0) /* search sub-codes */
1.1 deraadt 22: #define SRCH_FORW (-1)
23: #define SRCH_BACK (-2)
24: #define SRCH_NOPR (-3)
25: #define SRCH_ACCM (-4)
26: #define SRCH_MARK (-5)
27:
1.6 millert 28: #define RE_NMATCH 10 /* max number of matches */
29: #define REPLEN 256 /* max length of replacement string */
1.2 millert 30:
1.6 millert 31: char re_pat[NPAT]; /* regex pattern */
32: int re_srch_lastdir = SRCH_NOPR; /* last search flags */
33: int casefoldsearch = TRUE; /* does search ignore case? */
34:
1.11 millert 35: static int re_doreplace(RSIZE, char *, int);
36: static int re_forwsrch(void);
37: static int re_backsrch(void);
38: static int re_readpattern(char *);
39: static int killmatches(int);
40: static int countmatches(int);
1.1 deraadt 41:
42: /*
43: * Search forward.
1.9 mickey 44: * Get a search string from the user and search for it starting at ".". If
45: * found, move "." to just after the matched characters. display does all
1.6 millert 46: * the hard stuff. If not found, it just prints a message.
1.1 deraadt 47: */
1.5 millert 48: /* ARGSUSED */
1.6 millert 49: int
1.12 cloder 50: re_forwsearch(int f, int n)
1.5 millert 51: {
1.6 millert 52: int s;
1.1 deraadt 53:
1.5 millert 54: if ((s = re_readpattern("RE Search")) != TRUE)
1.1 deraadt 55: return (s);
56: if (re_forwsrch() == FALSE) {
57: ewprintf("Search failed: \"%s\"", re_pat);
58: return (FALSE);
59: }
60: re_srch_lastdir = SRCH_FORW;
61: return (TRUE);
62: }
63:
64: /*
65: * Reverse search.
1.14 db 66: * Get a search string from the user, and search, starting at "."
1.1 deraadt 67: * and proceeding toward the front of the buffer. If found "." is left
68: * pointing at the first character of the pattern [the last character that
69: * was matched].
70: */
1.5 millert 71: /* ARGSUSED */
1.6 millert 72: int
1.12 cloder 73: re_backsearch(int f, int n)
1.5 millert 74: {
1.6 millert 75: int s;
1.1 deraadt 76:
1.5 millert 77: if ((s = re_readpattern("RE Search backward")) != TRUE)
1.1 deraadt 78: return (s);
79: if (re_backsrch() == FALSE) {
80: ewprintf("Search failed: \"%s\"", re_pat);
81: return (FALSE);
82: }
83: re_srch_lastdir = SRCH_BACK;
84: return (TRUE);
85: }
86:
87: /*
1.9 mickey 88: * Search again, using the same search string and direction as the last search
89: * command. The direction has been saved in "srch_lastdir", so you know which
1.6 millert 90: * way to go.
91: *
92: * XXX: This code has problems -- some incompatibility(?) with extend.c causes
93: * match to fail when it should not.
1.1 deraadt 94: */
1.5 millert 95: /* ARGSUSED */
1.6 millert 96: int
1.12 cloder 97: re_searchagain(int f, int n)
1.5 millert 98: {
99: if (re_srch_lastdir == SRCH_NOPR) {
100: ewprintf("No last search");
101: return (FALSE);
102: }
103: if (re_srch_lastdir == SRCH_FORW) {
104: if (re_forwsrch() == FALSE) {
105: ewprintf("Search failed: \"%s\"", re_pat);
106: return (FALSE);
107: }
108: return (TRUE);
109: }
1.9 mickey 110: if (re_srch_lastdir == SRCH_BACK)
1.5 millert 111: if (re_backsrch() == FALSE) {
112: ewprintf("Search failed: \"%s\"", re_pat);
113: return (FALSE);
114: }
1.6 millert 115:
116: return (TRUE);
1.1 deraadt 117: }
118:
119: /* Compiled regex goes here-- changed only when new pattern read */
1.6 millert 120: static regex_t re_buff;
121: static regmatch_t re_match[RE_NMATCH];
1.1 deraadt 122:
123: /*
124: * Re-Query Replace.
125: * Replace strings selectively. Does a search and replace operation.
126: */
1.5 millert 127: /* ARGSUSED */
1.6 millert 128: int
1.12 cloder 129: re_queryrepl(int f, int n)
1.5 millert 130: {
1.14 db 131: int rcnt = 0; /* replacements made so far */
1.13 vincent 132: int plen, s; /* length of found string */
133: char news[NPAT], *rep; /* replacement string */
1.1 deraadt 134:
135: /* Casefold check */
1.5 millert 136: if (!casefoldsearch)
137: f = TRUE;
1.1 deraadt 138:
1.5 millert 139: if ((s = re_readpattern("RE Query replace")) != TRUE)
1.1 deraadt 140: return (s);
1.13 vincent 141: if ((rep =
142: ereply("Query replace %s with: ", news, NPAT, re_pat)) == NULL)
143: return (ABORT);
1.1 deraadt 144: ewprintf("Query replacing %s with %s:", re_pat, news);
145:
146: /*
147: * Search forward repeatedly, checking each time whether to insert
148: * or not. The "!" case makes the check always true, so it gets put
149: * into a tighter loop for efficiency.
150: */
151: while (re_forwsrch() == TRUE) {
1.5 millert 152: retry:
1.1 deraadt 153: update();
154: switch (getkey(FALSE)) {
155: case ' ':
1.2 millert 156: plen = re_match[0].rm_eo - re_match[0].rm_so;
1.6 millert 157: if (re_doreplace((RSIZE)plen, news, f) == FALSE)
1.1 deraadt 158: return (FALSE);
159: rcnt++;
160: break;
161:
162: case '.':
1.2 millert 163: plen = re_match[0].rm_eo - re_match[0].rm_so;
1.6 millert 164: if (re_doreplace((RSIZE)plen, news, f) == FALSE)
1.1 deraadt 165: return (FALSE);
166: rcnt++;
167: goto stopsearch;
168:
1.6 millert 169: case CCHR('G'): /* ^G */
1.8 art 170: (void)ctrlg(FFRAND, 0);
1.6 millert 171: case CCHR('['): /* ESC */
1.1 deraadt 172: case '`':
173: goto stopsearch;
174: case '!':
175: do {
1.2 millert 176: plen = re_match[0].rm_eo - re_match[0].rm_so;
1.6 millert 177: if (re_doreplace((RSIZE)plen, news, f) == FALSE)
1.1 deraadt 178: return (FALSE);
179: rcnt++;
180: } while (re_forwsrch() == TRUE);
181: goto stopsearch;
182:
1.6 millert 183: case CCHR('?'): /* To not replace */
1.1 deraadt 184: break;
185:
186: default:
1.5 millert 187: ewprintf("<SP> replace, [.] rep-end, <DEL> don't, [!] repl rest <ESC> quit");
1.1 deraadt 188: goto retry;
189: }
190: }
1.6 millert 191:
1.1 deraadt 192: stopsearch:
193: curwp->w_flag |= WFHARD;
194: update();
195: if (!inmacro) {
196: if (rcnt == 0)
197: ewprintf("(No replacements done)");
198: else if (rcnt == 1)
199: ewprintf("(1 replacement done)");
200: else
201: ewprintf("(%d replacements done)", rcnt);
202: }
1.14 db 203: return (TRUE);
1.1 deraadt 204: }
205:
1.5 millert 206: /*
207: * Routine re_doreplace calls lreplace to make replacements needed by
208: * re_query replace. Its reason for existence is to deal with \1, \2. etc.
1.12 cloder 209: * plen: length to remove
210: * st: replacement string
211: * f: case hack disable
1.1 deraadt 212: */
1.6 millert 213: static int
1.12 cloder 214: re_doreplace(RSIZE plen, char *st, int f)
1.6 millert 215: {
216: int j, k, s, more, num, state;
217: LINE *clp;
218: char repstr[REPLEN];
1.5 millert 219:
220: clp = curwp->w_dotp;
221: more = TRUE;
222: j = 0;
223: state = 0;
1.6 millert 224: num = 0;
1.5 millert 225:
226: /* The following FSA parses the replacement string */
227: while (more) {
228: switch (state) {
229: case 0:
230: if (*st == '\\') {
231: st++;
232: state = 1;
233: } else if (*st == '\0')
234: more = FALSE;
235: else {
236: repstr[j] = *st;
237: j++;
238: if (j >= REPLEN)
239: return (FALSE);
240: st++;
241: }
242: break;
243: case 1:
244: if (*st >= '0' && *st <= '9') {
245: num = *st - '0';
246: st++;
247: state = 2;
248: } else if (*st == '\0')
249: more = FALSE;
250: else {
251: repstr[j] = *st;
252: j++;
253: if (j >= REPLEN)
254: return (FALSE);
255: st++;
256: state = 0;
257: }
258: break;
259: case 2:
260: if (*st >= '0' && *st <= '9') {
261: num = 10 * num + *st - '0';
262: st++;
263: } else {
264: if (num >= RE_NMATCH)
265: return (FALSE);
266: k = re_match[num].rm_eo - re_match[num].rm_so;
267: if (j + k >= REPLEN)
268: return (FALSE);
1.9 mickey 269: bcopy(&(clp->l_text[re_match[num].rm_so]),
1.6 millert 270: &repstr[j], k);
1.5 millert 271: j += k;
272: if (*st == '\0')
273: more = FALSE;
274: if (*st == '\\') {
275: st++;
276: state = 1;
277: } else {
278: repstr[j] = *st;
279: j++;
280: if (j >= REPLEN)
281: return (FALSE);
282: st++;
283: state = 0;
284: }
285: }
286: break;
1.6 millert 287: } /* switch (state) */
288: } /* while (more) */
1.1 deraadt 289:
1.5 millert 290: repstr[j] = '\0';
291: s = lreplace(plen, repstr, f);
292: return (s);
1.1 deraadt 293: }
294:
295: /*
1.9 mickey 296: * This routine does the real work of a forward search. The pattern is
297: * sitting in the external variable "pat". If found, dot is updated, the
1.6 millert 298: * window system is notified of the change, and TRUE is returned. If the
1.1 deraadt 299: * string isn't found, FALSE is returned.
300: */
1.6 millert 301: static int
1.12 cloder 302: re_forwsrch(void)
1.5 millert 303: {
1.6 millert 304: int tbo, error;
305: LINE *clp;
1.5 millert 306:
307: clp = curwp->w_dotp;
308: tbo = curwp->w_doto;
309:
310: if (tbo == clp->l_used)
311: /*
1.6 millert 312: * Don't start matching past end of line -- must move to
313: * beginning of next line, unless at end of file.
1.5 millert 314: */
315: if (clp != curbp->b_linep) {
316: clp = lforw(clp);
317: tbo = 0;
318: }
319: /*
320: * Note this loop does not process the last line, but this editor
321: * always makes the last line empty so this is good.
322: */
323: while (clp != (curbp->b_linep)) {
324: re_match[0].rm_so = tbo;
325: re_match[0].rm_eo = llength(clp);
1.9 mickey 326: error = regexec(&re_buff, ltext(clp), RE_NMATCH, re_match,
1.6 millert 327: REG_STARTEND);
328: if (error != 0) {
1.5 millert 329: clp = lforw(clp);
330: tbo = 0;
331: } else {
332: curwp->w_doto = re_match[0].rm_eo;
333: curwp->w_dotp = clp;
334: curwp->w_flag |= WFMOVE;
335: return (TRUE);
336: }
337: }
338: return (FALSE);
1.1 deraadt 339: }
340:
341: /*
1.6 millert 342: * This routine does the real work of a backward search. The pattern is sitting
1.9 mickey 343: * in the external variable "re_pat". If found, dot is updated, the window
344: * system is notified of the change, and TRUE is returned. If the string isn't
1.6 millert 345: * found, FALSE is returned.
1.1 deraadt 346: */
1.6 millert 347: static int
1.12 cloder 348: re_backsrch(void)
1.5 millert 349: {
1.6 millert 350: LINE *clp;
351: int tbo;
352: regmatch_t lastmatch;
1.5 millert 353:
354: clp = curwp->w_dotp;
355: tbo = curwp->w_doto;
356:
357: /* Start search one position to the left of dot */
358: tbo = tbo - 1;
359: if (tbo < 0) {
360: /* must move up one line */
361: clp = lback(clp);
362: tbo = llength(clp);
363: }
1.6 millert 364:
1.5 millert 365: /*
366: * Note this loop does not process the last line, but this editor
367: * always makes the last line empty so this is good.
368: */
369: while (clp != (curbp->b_linep)) {
370: re_match[0].rm_so = 0;
371: re_match[0].rm_eo = llength(clp);
372: lastmatch.rm_so = -1;
373: /*
374: * Keep searching until we don't match any longer. Assumes a
375: * non-match does not modify the re_match array. We have to
376: * do this character-by-character after the first match since
377: * POSIX regexps don't give you a way to do reverse matches.
378: */
379: while (!regexec(&re_buff, ltext(clp), RE_NMATCH, re_match,
380: REG_STARTEND) && re_match[0].rm_so < tbo) {
381: memcpy(&lastmatch, &re_match[0], sizeof(regmatch_t));
382: re_match[0].rm_so++;
383: re_match[0].rm_eo = llength(clp);
384: }
385: if (lastmatch.rm_so == -1) {
386: clp = lback(clp);
387: tbo = llength(clp);
388: } else {
389: memcpy(&re_match[0], &lastmatch, sizeof(regmatch_t));
390: curwp->w_doto = re_match[0].rm_so;
391: curwp->w_dotp = clp;
392: curwp->w_flag |= WFMOVE;
393: return (TRUE);
394: }
395: }
396: return (FALSE);
1.1 deraadt 397: }
398:
399: /*
400: * Read a pattern.
401: * Stash it in the external variable "re_pat". The "pat" is
402: * not updated if the user types in an empty line. If the user typed
403: * an empty line, and there is no old pattern, it is an error.
404: * Display the old pattern, in the style of Jeff Lomicka. There is
405: * some do-it-yourself control expansion.
406: */
1.6 millert 407: static int
1.12 cloder 408: re_readpattern(char *prompt)
1.5 millert 409: {
1.6 millert 410: static int dofree = 0;
1.13 vincent 411: int flags, error, s;
412: char tpat[NPAT], *rep;
1.5 millert 413:
414: if (re_pat[0] == '\0')
1.13 vincent 415: rep = ereply("%s: ", tpat, NPAT, prompt);
1.5 millert 416: else
1.15 ! kjell 417: rep = eread("%s: (default %s) ", tpat, NPAT,
! 418: EFNUL | EFNEW | EFCR, prompt, re_pat);
1.1 deraadt 419:
1.13 vincent 420: if (rep != NULL && *rep != '\0') {
1.5 millert 421: /* New pattern given */
1.14 db 422: (void)strlcpy(re_pat, tpat, sizeof(re_pat));
1.5 millert 423: if (casefoldsearch)
424: flags = REG_EXTENDED | REG_ICASE;
425: else
426: flags = REG_EXTENDED;
427: if (dofree)
428: regfree(&re_buff);
429: error = regcomp(&re_buff, re_pat, flags);
1.6 millert 430: if (error != 0) {
431: char message[256];
1.5 millert 432: regerror(error, &re_buff, message, sizeof(message));
433: ewprintf("Regex Error: %s", message);
434: re_pat[0] = '\0';
435: return (FALSE);
436: }
437: dofree = 1;
1.13 vincent 438: s = TRUE;
439: } else if (rep[0] == '\0' && re_pat[0] != '\0')
1.5 millert 440: /* Just using old pattern */
441: s = TRUE;
1.13 vincent 442: else
443: s = FALSE;
1.1 deraadt 444: return (s);
445: }
446:
1.5 millert 447: /*
448: * Cause case to not matter in searches. This is the default. If called
449: * with argument cause case to matter.
1.1 deraadt 450: */
1.6 millert 451: int
1.12 cloder 452: setcasefold(int f, int n)
1.5 millert 453: {
454: if (f & FFARG) {
455: casefoldsearch = FALSE;
456: ewprintf("Case-fold-search unset");
457: } else {
458: casefoldsearch = TRUE;
459: ewprintf("Case-fold-search set");
460: }
1.1 deraadt 461:
1.5 millert 462: /*
463: * Invalidate the regular expression pattern since I'm too lazy to
464: * recompile it.
465: */
466: re_pat[0] = '\0';
467: return (TRUE);
1.6 millert 468: }
1.1 deraadt 469:
1.5 millert 470: /*
1.14 db 471: * Delete all lines after dot that contain a string matching regex.
1.1 deraadt 472: */
1.6 millert 473: int
1.12 cloder 474: delmatchlines(int f, int n)
1.5 millert 475: {
1.6 millert 476: int s;
1.1 deraadt 477:
1.9 mickey 478: if ((s = re_readpattern("Flush lines (containing match for regexp)"))
1.6 millert 479: != TRUE)
1.5 millert 480: return (s);
1.1 deraadt 481:
1.5 millert 482: s = killmatches(TRUE);
483: return (s);
1.1 deraadt 484: }
485:
1.5 millert 486: /*
1.14 db 487: * Delete all lines after dot that don't contain a string matching regex.
1.1 deraadt 488: */
1.6 millert 489: int
1.12 cloder 490: delnonmatchlines(int f, int n)
1.5 millert 491: {
1.6 millert 492: int s;
1.1 deraadt 493:
1.9 mickey 494: if ((s = re_readpattern("Keep lines (containing match for regexp)"))
1.6 millert 495: != TRUE)
1.5 millert 496: return (s);
1.1 deraadt 497:
1.5 millert 498: s = killmatches(FALSE);
499: return (s);
1.1 deraadt 500: }
501:
1.9 mickey 502: /*
1.14 db 503: * This function does the work of deleting matching lines.
1.6 millert 504: */
505: static int
1.12 cloder 506: killmatches(int cond)
1.1 deraadt 507: {
1.6 millert 508: int s, error;
509: int count = 0;
510: LINE *clp;
1.5 millert 511:
512: clp = curwp->w_dotp;
513: if (curwp->w_doto == llength(clp))
514: /* Consider dot on next line */
515: clp = lforw(clp);
516:
517: while (clp != (curbp->b_linep)) {
518: /* see if line matches */
519: re_match[0].rm_so = 0;
520: re_match[0].rm_eo = llength(clp);
1.9 mickey 521: error = regexec(&re_buff, ltext(clp), RE_NMATCH, re_match,
1.6 millert 522: REG_STARTEND);
1.5 millert 523:
524: /* Delete line when appropriate */
525: if ((cond == FALSE && error) || (cond == TRUE && !error)) {
526: curwp->w_doto = 0;
527: curwp->w_dotp = clp;
528: count++;
529: s = ldelete(llength(clp) + 1, KNONE);
530: clp = curwp->w_dotp;
531: curwp->w_flag |= WFMOVE;
532: if (s == FALSE)
533: return (FALSE);
534: } else
535: clp = lforw(clp);
536: }
1.1 deraadt 537:
1.5 millert 538: ewprintf("%d line(s) deleted", count);
539: if (count > 0)
540: curwp->w_flag |= WFMOVE;
1.1 deraadt 541:
1.5 millert 542: return (TRUE);
1.1 deraadt 543: }
544:
1.5 millert 545: /*
1.14 db 546: * Count lines matching regex.
1.1 deraadt 547: */
1.6 millert 548: int
1.12 cloder 549: cntmatchlines(int f, int n)
1.5 millert 550: {
1.6 millert 551: int s;
1.1 deraadt 552:
1.5 millert 553: if ((s = re_readpattern("Count lines (matching regexp)")) != TRUE)
554: return (s);
555: s = countmatches(TRUE);
1.14 db 556:
1.5 millert 557: return (s);
1.1 deraadt 558: }
559:
1.5 millert 560: /*
1.14 db 561: * Count lines that fail to match regex.
1.1 deraadt 562: */
1.6 millert 563: int
1.12 cloder 564: cntnonmatchlines(int f, int n)
1.5 millert 565: {
1.6 millert 566: int s;
1.1 deraadt 567:
1.5 millert 568: if ((s = re_readpattern("Count lines (not matching regexp)")) != TRUE)
569: return (s);
570: s = countmatches(FALSE);
1.1 deraadt 571:
1.5 millert 572: return (s);
1.1 deraadt 573: }
574:
1.6 millert 575: /*
576: * This function does the work of counting matching lines.
577: */
578: int
1.12 cloder 579: countmatches(int cond)
1.1 deraadt 580: {
1.6 millert 581: int error;
582: int count = 0;
583: LINE *clp;
1.5 millert 584:
585: clp = curwp->w_dotp;
586: if (curwp->w_doto == llength(clp))
587: /* Consider dot on next line */
588: clp = lforw(clp);
589:
590: while (clp != (curbp->b_linep)) {
591: /* see if line matches */
592: re_match[0].rm_so = 0;
593: re_match[0].rm_eo = llength(clp);
1.9 mickey 594: error = regexec(&re_buff, ltext(clp), RE_NMATCH, re_match,
1.6 millert 595: REG_STARTEND);
1.5 millert 596:
597: /* Count line when appropriate */
598: if ((cond == FALSE && error) || (cond == TRUE && !error))
599: count++;
600: clp = lforw(clp);
601: }
1.1 deraadt 602:
1.5 millert 603: if (cond)
604: ewprintf("Number of lines matching: %d", count);
605: else
606: ewprintf("Number of lines not matching: %d", count);
607:
608: return (TRUE);
1.1 deraadt 609: }
1.6 millert 610: #endif /* REGEX */