Annotation of src/usr.bin/mg/re_search.c, Revision 1.18
1.18 ! kjell 1: /* $OpenBSD: re_search.c,v 1.17 2005/06/14 18:14:40 kjell Exp $ */
1.17 kjell 2:
3: /* This file is in the public domain. */
1.7 niklas 4:
1.1 deraadt 5: /*
1.6 millert 6: * regular expression search commands for Mg
1.1 deraadt 7: *
1.6 millert 8: * This file contains functions to implement several of gnuemacs's regular
9: * expression functions for Mg. Several of the routines below are just minor
10: * re-arrangements of Mg's non-regular expression search functions. Some of
1.9 mickey 11: * them are similar in structure to the original MicroEMACS, others are
1.6 millert 12: * modifications of Rich Ellison's code. Peter Newton re-wrote about half of
13: * them from scratch.
1.1 deraadt 14: */
15:
1.6 millert 16: #ifdef REGEX
1.2 millert 17: #include <sys/types.h>
18: #include <regex.h>
19:
1.6 millert 20: #include "def.h"
21: #include "macro.h"
1.1 deraadt 22:
1.6 millert 23: #define SRCH_BEGIN (0) /* search sub-codes */
1.1 deraadt 24: #define SRCH_FORW (-1)
25: #define SRCH_BACK (-2)
26: #define SRCH_NOPR (-3)
27: #define SRCH_ACCM (-4)
28: #define SRCH_MARK (-5)
29:
1.6 millert 30: #define RE_NMATCH 10 /* max number of matches */
31: #define REPLEN 256 /* max length of replacement string */
1.2 millert 32:
1.6 millert 33: char re_pat[NPAT]; /* regex pattern */
34: int re_srch_lastdir = SRCH_NOPR; /* last search flags */
35: int casefoldsearch = TRUE; /* does search ignore case? */
36:
1.11 millert 37: static int re_doreplace(RSIZE, char *, int);
38: static int re_forwsrch(void);
39: static int re_backsrch(void);
40: static int re_readpattern(char *);
41: static int killmatches(int);
42: static int countmatches(int);
1.1 deraadt 43:
44: /*
45: * Search forward.
1.9 mickey 46: * Get a search string from the user and search for it starting at ".". If
47: * found, move "." to just after the matched characters. display does all
1.6 millert 48: * the hard stuff. If not found, it just prints a message.
1.1 deraadt 49: */
1.5 millert 50: /* ARGSUSED */
1.6 millert 51: int
1.12 cloder 52: re_forwsearch(int f, int n)
1.5 millert 53: {
1.6 millert 54: int s;
1.1 deraadt 55:
1.5 millert 56: if ((s = re_readpattern("RE Search")) != TRUE)
1.1 deraadt 57: return (s);
58: if (re_forwsrch() == FALSE) {
59: ewprintf("Search failed: \"%s\"", re_pat);
60: return (FALSE);
61: }
62: re_srch_lastdir = SRCH_FORW;
63: return (TRUE);
64: }
65:
66: /*
67: * Reverse search.
1.14 db 68: * Get a search string from the user, and search, starting at "."
1.1 deraadt 69: * and proceeding toward the front of the buffer. If found "." is left
70: * pointing at the first character of the pattern [the last character that
71: * was matched].
72: */
1.5 millert 73: /* ARGSUSED */
1.6 millert 74: int
1.12 cloder 75: re_backsearch(int f, int n)
1.5 millert 76: {
1.6 millert 77: int s;
1.1 deraadt 78:
1.5 millert 79: if ((s = re_readpattern("RE Search backward")) != TRUE)
1.1 deraadt 80: return (s);
81: if (re_backsrch() == FALSE) {
82: ewprintf("Search failed: \"%s\"", re_pat);
83: return (FALSE);
84: }
85: re_srch_lastdir = SRCH_BACK;
86: return (TRUE);
87: }
88:
89: /*
1.9 mickey 90: * Search again, using the same search string and direction as the last search
91: * command. The direction has been saved in "srch_lastdir", so you know which
1.6 millert 92: * way to go.
93: *
94: * XXX: This code has problems -- some incompatibility(?) with extend.c causes
95: * match to fail when it should not.
1.1 deraadt 96: */
1.5 millert 97: /* ARGSUSED */
1.6 millert 98: int
1.12 cloder 99: re_searchagain(int f, int n)
1.5 millert 100: {
101: if (re_srch_lastdir == SRCH_NOPR) {
102: ewprintf("No last search");
103: return (FALSE);
104: }
105: if (re_srch_lastdir == SRCH_FORW) {
106: if (re_forwsrch() == FALSE) {
107: ewprintf("Search failed: \"%s\"", re_pat);
108: return (FALSE);
109: }
110: return (TRUE);
111: }
1.9 mickey 112: if (re_srch_lastdir == SRCH_BACK)
1.5 millert 113: if (re_backsrch() == FALSE) {
114: ewprintf("Search failed: \"%s\"", re_pat);
115: return (FALSE);
116: }
1.6 millert 117:
118: return (TRUE);
1.1 deraadt 119: }
120:
121: /* Compiled regex goes here-- changed only when new pattern read */
1.6 millert 122: static regex_t re_buff;
123: static regmatch_t re_match[RE_NMATCH];
1.1 deraadt 124:
125: /*
126: * Re-Query Replace.
127: * Replace strings selectively. Does a search and replace operation.
128: */
1.5 millert 129: /* ARGSUSED */
1.6 millert 130: int
1.12 cloder 131: re_queryrepl(int f, int n)
1.5 millert 132: {
1.14 db 133: int rcnt = 0; /* replacements made so far */
1.13 vincent 134: int plen, s; /* length of found string */
135: char news[NPAT], *rep; /* replacement string */
1.1 deraadt 136:
137: /* Casefold check */
1.5 millert 138: if (!casefoldsearch)
139: f = TRUE;
1.1 deraadt 140:
1.5 millert 141: if ((s = re_readpattern("RE Query replace")) != TRUE)
1.1 deraadt 142: return (s);
1.16 kjell 143: if ((rep = eread("Query replace %s with: ", news, NPAT,
144: EFNUL | EFNEW | EFCR, re_pat)) == NULL)
1.13 vincent 145: return (ABORT);
1.1 deraadt 146: ewprintf("Query replacing %s with %s:", re_pat, news);
147:
148: /*
149: * Search forward repeatedly, checking each time whether to insert
150: * or not. The "!" case makes the check always true, so it gets put
151: * into a tighter loop for efficiency.
152: */
153: while (re_forwsrch() == TRUE) {
1.5 millert 154: retry:
1.1 deraadt 155: update();
156: switch (getkey(FALSE)) {
157: case ' ':
1.2 millert 158: plen = re_match[0].rm_eo - re_match[0].rm_so;
1.6 millert 159: if (re_doreplace((RSIZE)plen, news, f) == FALSE)
1.1 deraadt 160: return (FALSE);
161: rcnt++;
162: break;
163:
164: case '.':
1.2 millert 165: plen = re_match[0].rm_eo - re_match[0].rm_so;
1.6 millert 166: if (re_doreplace((RSIZE)plen, news, f) == FALSE)
1.1 deraadt 167: return (FALSE);
168: rcnt++;
169: goto stopsearch;
170:
1.6 millert 171: case CCHR('G'): /* ^G */
1.8 art 172: (void)ctrlg(FFRAND, 0);
1.6 millert 173: case CCHR('['): /* ESC */
1.1 deraadt 174: case '`':
175: goto stopsearch;
176: case '!':
177: do {
1.2 millert 178: plen = re_match[0].rm_eo - re_match[0].rm_so;
1.6 millert 179: if (re_doreplace((RSIZE)plen, news, f) == FALSE)
1.1 deraadt 180: return (FALSE);
181: rcnt++;
182: } while (re_forwsrch() == TRUE);
183: goto stopsearch;
184:
1.6 millert 185: case CCHR('?'): /* To not replace */
1.1 deraadt 186: break;
187:
188: default:
1.5 millert 189: ewprintf("<SP> replace, [.] rep-end, <DEL> don't, [!] repl rest <ESC> quit");
1.1 deraadt 190: goto retry;
191: }
192: }
1.6 millert 193:
1.1 deraadt 194: stopsearch:
195: curwp->w_flag |= WFHARD;
196: update();
197: if (!inmacro) {
198: if (rcnt == 0)
199: ewprintf("(No replacements done)");
200: else if (rcnt == 1)
201: ewprintf("(1 replacement done)");
202: else
203: ewprintf("(%d replacements done)", rcnt);
204: }
1.14 db 205: return (TRUE);
1.1 deraadt 206: }
207:
1.5 millert 208: /*
209: * Routine re_doreplace calls lreplace to make replacements needed by
210: * re_query replace. Its reason for existence is to deal with \1, \2. etc.
1.12 cloder 211: * plen: length to remove
212: * st: replacement string
213: * f: case hack disable
1.1 deraadt 214: */
1.6 millert 215: static int
1.12 cloder 216: re_doreplace(RSIZE plen, char *st, int f)
1.6 millert 217: {
218: int j, k, s, more, num, state;
219: LINE *clp;
220: char repstr[REPLEN];
1.5 millert 221:
222: clp = curwp->w_dotp;
223: more = TRUE;
224: j = 0;
225: state = 0;
1.6 millert 226: num = 0;
1.5 millert 227:
228: /* The following FSA parses the replacement string */
229: while (more) {
230: switch (state) {
231: case 0:
232: if (*st == '\\') {
233: st++;
234: state = 1;
235: } else if (*st == '\0')
236: more = FALSE;
237: else {
238: repstr[j] = *st;
239: j++;
240: if (j >= REPLEN)
241: return (FALSE);
242: st++;
243: }
244: break;
245: case 1:
246: if (*st >= '0' && *st <= '9') {
247: num = *st - '0';
248: st++;
249: state = 2;
250: } else if (*st == '\0')
251: more = FALSE;
252: else {
253: repstr[j] = *st;
254: j++;
255: if (j >= REPLEN)
256: return (FALSE);
257: st++;
258: state = 0;
259: }
260: break;
261: case 2:
262: if (*st >= '0' && *st <= '9') {
263: num = 10 * num + *st - '0';
264: st++;
265: } else {
266: if (num >= RE_NMATCH)
267: return (FALSE);
268: k = re_match[num].rm_eo - re_match[num].rm_so;
269: if (j + k >= REPLEN)
270: return (FALSE);
1.9 mickey 271: bcopy(&(clp->l_text[re_match[num].rm_so]),
1.6 millert 272: &repstr[j], k);
1.5 millert 273: j += k;
274: if (*st == '\0')
275: more = FALSE;
276: if (*st == '\\') {
277: st++;
278: state = 1;
279: } else {
280: repstr[j] = *st;
281: j++;
282: if (j >= REPLEN)
283: return (FALSE);
284: st++;
285: state = 0;
286: }
287: }
288: break;
1.6 millert 289: } /* switch (state) */
290: } /* while (more) */
1.1 deraadt 291:
1.5 millert 292: repstr[j] = '\0';
293: s = lreplace(plen, repstr, f);
294: return (s);
1.1 deraadt 295: }
296:
297: /*
1.9 mickey 298: * This routine does the real work of a forward search. The pattern is
299: * sitting in the external variable "pat". If found, dot is updated, the
1.6 millert 300: * window system is notified of the change, and TRUE is returned. If the
1.1 deraadt 301: * string isn't found, FALSE is returned.
302: */
1.6 millert 303: static int
1.12 cloder 304: re_forwsrch(void)
1.5 millert 305: {
1.6 millert 306: int tbo, error;
307: LINE *clp;
1.5 millert 308:
309: clp = curwp->w_dotp;
310: tbo = curwp->w_doto;
311:
312: if (tbo == clp->l_used)
313: /*
1.6 millert 314: * Don't start matching past end of line -- must move to
315: * beginning of next line, unless at end of file.
1.5 millert 316: */
317: if (clp != curbp->b_linep) {
318: clp = lforw(clp);
319: tbo = 0;
320: }
321: /*
322: * Note this loop does not process the last line, but this editor
323: * always makes the last line empty so this is good.
324: */
325: while (clp != (curbp->b_linep)) {
326: re_match[0].rm_so = tbo;
327: re_match[0].rm_eo = llength(clp);
1.9 mickey 328: error = regexec(&re_buff, ltext(clp), RE_NMATCH, re_match,
1.6 millert 329: REG_STARTEND);
330: if (error != 0) {
1.5 millert 331: clp = lforw(clp);
332: tbo = 0;
333: } else {
334: curwp->w_doto = re_match[0].rm_eo;
335: curwp->w_dotp = clp;
336: curwp->w_flag |= WFMOVE;
337: return (TRUE);
338: }
339: }
340: return (FALSE);
1.1 deraadt 341: }
342:
343: /*
1.6 millert 344: * This routine does the real work of a backward search. The pattern is sitting
1.9 mickey 345: * in the external variable "re_pat". If found, dot is updated, the window
346: * system is notified of the change, and TRUE is returned. If the string isn't
1.6 millert 347: * found, FALSE is returned.
1.1 deraadt 348: */
1.6 millert 349: static int
1.12 cloder 350: re_backsrch(void)
1.5 millert 351: {
1.6 millert 352: LINE *clp;
353: int tbo;
354: regmatch_t lastmatch;
1.5 millert 355:
356: clp = curwp->w_dotp;
357: tbo = curwp->w_doto;
358:
359: /* Start search one position to the left of dot */
360: tbo = tbo - 1;
361: if (tbo < 0) {
362: /* must move up one line */
363: clp = lback(clp);
364: tbo = llength(clp);
365: }
1.6 millert 366:
1.5 millert 367: /*
368: * Note this loop does not process the last line, but this editor
369: * always makes the last line empty so this is good.
370: */
371: while (clp != (curbp->b_linep)) {
372: re_match[0].rm_so = 0;
373: re_match[0].rm_eo = llength(clp);
374: lastmatch.rm_so = -1;
375: /*
376: * Keep searching until we don't match any longer. Assumes a
377: * non-match does not modify the re_match array. We have to
378: * do this character-by-character after the first match since
379: * POSIX regexps don't give you a way to do reverse matches.
380: */
381: while (!regexec(&re_buff, ltext(clp), RE_NMATCH, re_match,
382: REG_STARTEND) && re_match[0].rm_so < tbo) {
383: memcpy(&lastmatch, &re_match[0], sizeof(regmatch_t));
384: re_match[0].rm_so++;
385: re_match[0].rm_eo = llength(clp);
386: }
387: if (lastmatch.rm_so == -1) {
388: clp = lback(clp);
389: tbo = llength(clp);
390: } else {
391: memcpy(&re_match[0], &lastmatch, sizeof(regmatch_t));
392: curwp->w_doto = re_match[0].rm_so;
393: curwp->w_dotp = clp;
394: curwp->w_flag |= WFMOVE;
395: return (TRUE);
396: }
397: }
398: return (FALSE);
1.1 deraadt 399: }
400:
401: /*
402: * Read a pattern.
403: * Stash it in the external variable "re_pat". The "pat" is
404: * not updated if the user types in an empty line. If the user typed
405: * an empty line, and there is no old pattern, it is an error.
406: * Display the old pattern, in the style of Jeff Lomicka. There is
407: * some do-it-yourself control expansion.
408: */
1.6 millert 409: static int
1.12 cloder 410: re_readpattern(char *prompt)
1.5 millert 411: {
1.6 millert 412: static int dofree = 0;
1.13 vincent 413: int flags, error, s;
414: char tpat[NPAT], *rep;
1.5 millert 415:
416: if (re_pat[0] == '\0')
1.18 ! kjell 417: rep = eread("%s: ", tpat, NPAT, EFNEW | EFCR, prompt);
1.5 millert 418: else
1.15 kjell 419: rep = eread("%s: (default %s) ", tpat, NPAT,
420: EFNUL | EFNEW | EFCR, prompt, re_pat);
1.18 ! kjell 421: if (rep == NULL)
! 422: return (ABORT);
! 423: if (rep[0] != '\0') {
1.5 millert 424: /* New pattern given */
1.14 db 425: (void)strlcpy(re_pat, tpat, sizeof(re_pat));
1.5 millert 426: if (casefoldsearch)
427: flags = REG_EXTENDED | REG_ICASE;
428: else
429: flags = REG_EXTENDED;
430: if (dofree)
431: regfree(&re_buff);
432: error = regcomp(&re_buff, re_pat, flags);
1.6 millert 433: if (error != 0) {
434: char message[256];
1.5 millert 435: regerror(error, &re_buff, message, sizeof(message));
436: ewprintf("Regex Error: %s", message);
437: re_pat[0] = '\0';
438: return (FALSE);
439: }
440: dofree = 1;
1.13 vincent 441: s = TRUE;
442: } else if (rep[0] == '\0' && re_pat[0] != '\0')
1.5 millert 443: /* Just using old pattern */
444: s = TRUE;
1.13 vincent 445: else
446: s = FALSE;
1.1 deraadt 447: return (s);
448: }
449:
1.5 millert 450: /*
451: * Cause case to not matter in searches. This is the default. If called
452: * with argument cause case to matter.
1.1 deraadt 453: */
1.6 millert 454: int
1.12 cloder 455: setcasefold(int f, int n)
1.5 millert 456: {
457: if (f & FFARG) {
458: casefoldsearch = FALSE;
459: ewprintf("Case-fold-search unset");
460: } else {
461: casefoldsearch = TRUE;
462: ewprintf("Case-fold-search set");
463: }
1.1 deraadt 464:
1.5 millert 465: /*
466: * Invalidate the regular expression pattern since I'm too lazy to
467: * recompile it.
468: */
469: re_pat[0] = '\0';
470: return (TRUE);
1.6 millert 471: }
1.1 deraadt 472:
1.5 millert 473: /*
1.14 db 474: * Delete all lines after dot that contain a string matching regex.
1.1 deraadt 475: */
1.6 millert 476: int
1.12 cloder 477: delmatchlines(int f, int n)
1.5 millert 478: {
1.6 millert 479: int s;
1.1 deraadt 480:
1.9 mickey 481: if ((s = re_readpattern("Flush lines (containing match for regexp)"))
1.6 millert 482: != TRUE)
1.5 millert 483: return (s);
1.1 deraadt 484:
1.5 millert 485: s = killmatches(TRUE);
486: return (s);
1.1 deraadt 487: }
488:
1.5 millert 489: /*
1.14 db 490: * Delete all lines after dot that don't contain a string matching regex.
1.1 deraadt 491: */
1.6 millert 492: int
1.12 cloder 493: delnonmatchlines(int f, int n)
1.5 millert 494: {
1.6 millert 495: int s;
1.1 deraadt 496:
1.9 mickey 497: if ((s = re_readpattern("Keep lines (containing match for regexp)"))
1.6 millert 498: != TRUE)
1.5 millert 499: return (s);
1.1 deraadt 500:
1.5 millert 501: s = killmatches(FALSE);
502: return (s);
1.1 deraadt 503: }
504:
1.9 mickey 505: /*
1.14 db 506: * This function does the work of deleting matching lines.
1.6 millert 507: */
508: static int
1.12 cloder 509: killmatches(int cond)
1.1 deraadt 510: {
1.6 millert 511: int s, error;
512: int count = 0;
513: LINE *clp;
1.5 millert 514:
515: clp = curwp->w_dotp;
516: if (curwp->w_doto == llength(clp))
517: /* Consider dot on next line */
518: clp = lforw(clp);
519:
520: while (clp != (curbp->b_linep)) {
521: /* see if line matches */
522: re_match[0].rm_so = 0;
523: re_match[0].rm_eo = llength(clp);
1.9 mickey 524: error = regexec(&re_buff, ltext(clp), RE_NMATCH, re_match,
1.6 millert 525: REG_STARTEND);
1.5 millert 526:
527: /* Delete line when appropriate */
528: if ((cond == FALSE && error) || (cond == TRUE && !error)) {
529: curwp->w_doto = 0;
530: curwp->w_dotp = clp;
531: count++;
532: s = ldelete(llength(clp) + 1, KNONE);
533: clp = curwp->w_dotp;
534: curwp->w_flag |= WFMOVE;
535: if (s == FALSE)
536: return (FALSE);
537: } else
538: clp = lforw(clp);
539: }
1.1 deraadt 540:
1.5 millert 541: ewprintf("%d line(s) deleted", count);
542: if (count > 0)
543: curwp->w_flag |= WFMOVE;
1.1 deraadt 544:
1.5 millert 545: return (TRUE);
1.1 deraadt 546: }
547:
1.5 millert 548: /*
1.14 db 549: * Count lines matching regex.
1.1 deraadt 550: */
1.6 millert 551: int
1.12 cloder 552: cntmatchlines(int f, int n)
1.5 millert 553: {
1.6 millert 554: int s;
1.1 deraadt 555:
1.5 millert 556: if ((s = re_readpattern("Count lines (matching regexp)")) != TRUE)
557: return (s);
558: s = countmatches(TRUE);
1.14 db 559:
1.5 millert 560: return (s);
1.1 deraadt 561: }
562:
1.5 millert 563: /*
1.14 db 564: * Count lines that fail to match regex.
1.1 deraadt 565: */
1.6 millert 566: int
1.12 cloder 567: cntnonmatchlines(int f, int n)
1.5 millert 568: {
1.6 millert 569: int s;
1.1 deraadt 570:
1.5 millert 571: if ((s = re_readpattern("Count lines (not matching regexp)")) != TRUE)
572: return (s);
573: s = countmatches(FALSE);
1.1 deraadt 574:
1.5 millert 575: return (s);
1.1 deraadt 576: }
577:
1.6 millert 578: /*
579: * This function does the work of counting matching lines.
580: */
581: int
1.12 cloder 582: countmatches(int cond)
1.1 deraadt 583: {
1.6 millert 584: int error;
585: int count = 0;
586: LINE *clp;
1.5 millert 587:
588: clp = curwp->w_dotp;
589: if (curwp->w_doto == llength(clp))
590: /* Consider dot on next line */
591: clp = lforw(clp);
592:
593: while (clp != (curbp->b_linep)) {
594: /* see if line matches */
595: re_match[0].rm_so = 0;
596: re_match[0].rm_eo = llength(clp);
1.9 mickey 597: error = regexec(&re_buff, ltext(clp), RE_NMATCH, re_match,
1.6 millert 598: REG_STARTEND);
1.5 millert 599:
600: /* Count line when appropriate */
601: if ((cond == FALSE && error) || (cond == TRUE && !error))
602: count++;
603: clp = lforw(clp);
604: }
1.1 deraadt 605:
1.5 millert 606: if (cond)
607: ewprintf("Number of lines matching: %d", count);
608: else
609: ewprintf("Number of lines not matching: %d", count);
610:
611: return (TRUE);
1.1 deraadt 612: }
1.6 millert 613: #endif /* REGEX */