Annotation of src/usr.bin/mg/re_search.c, Revision 1.23
1.23 ! kjell 1: /* $OpenBSD: re_search.c,v 1.22 2005/12/13 06:01:27 kjell Exp $ */
1.17 kjell 2:
3: /* This file is in the public domain. */
1.7 niklas 4:
1.1 deraadt 5: /*
1.6 millert 6: * regular expression search commands for Mg
1.1 deraadt 7: *
1.6 millert 8: * This file contains functions to implement several of gnuemacs's regular
9: * expression functions for Mg. Several of the routines below are just minor
10: * re-arrangements of Mg's non-regular expression search functions. Some of
1.9 mickey 11: * them are similar in structure to the original MicroEMACS, others are
1.6 millert 12: * modifications of Rich Ellison's code. Peter Newton re-wrote about half of
13: * them from scratch.
1.1 deraadt 14: */
15:
1.6 millert 16: #ifdef REGEX
1.2 millert 17: #include <sys/types.h>
18: #include <regex.h>
19:
1.6 millert 20: #include "def.h"
21: #include "macro.h"
1.1 deraadt 22:
1.6 millert 23: #define SRCH_BEGIN (0) /* search sub-codes */
1.1 deraadt 24: #define SRCH_FORW (-1)
25: #define SRCH_BACK (-2)
26: #define SRCH_NOPR (-3)
27: #define SRCH_ACCM (-4)
28: #define SRCH_MARK (-5)
29:
1.6 millert 30: #define RE_NMATCH 10 /* max number of matches */
31: #define REPLEN 256 /* max length of replacement string */
1.2 millert 32:
1.6 millert 33: char re_pat[NPAT]; /* regex pattern */
34: int re_srch_lastdir = SRCH_NOPR; /* last search flags */
35: int casefoldsearch = TRUE; /* does search ignore case? */
36:
1.20 kjell 37: static int re_doreplace(RSIZE, char *);
1.11 millert 38: static int re_forwsrch(void);
39: static int re_backsrch(void);
40: static int re_readpattern(char *);
41: static int killmatches(int);
42: static int countmatches(int);
1.1 deraadt 43:
44: /*
45: * Search forward.
1.9 mickey 46: * Get a search string from the user and search for it starting at ".". If
47: * found, move "." to just after the matched characters. display does all
1.6 millert 48: * the hard stuff. If not found, it just prints a message.
1.1 deraadt 49: */
1.5 millert 50: /* ARGSUSED */
1.6 millert 51: int
1.12 cloder 52: re_forwsearch(int f, int n)
1.5 millert 53: {
1.6 millert 54: int s;
1.1 deraadt 55:
1.5 millert 56: if ((s = re_readpattern("RE Search")) != TRUE)
1.1 deraadt 57: return (s);
58: if (re_forwsrch() == FALSE) {
59: ewprintf("Search failed: \"%s\"", re_pat);
60: return (FALSE);
61: }
62: re_srch_lastdir = SRCH_FORW;
63: return (TRUE);
64: }
65:
66: /*
67: * Reverse search.
1.14 db 68: * Get a search string from the user, and search, starting at "."
1.1 deraadt 69: * and proceeding toward the front of the buffer. If found "." is left
70: * pointing at the first character of the pattern [the last character that
71: * was matched].
72: */
1.5 millert 73: /* ARGSUSED */
1.6 millert 74: int
1.12 cloder 75: re_backsearch(int f, int n)
1.5 millert 76: {
1.6 millert 77: int s;
1.1 deraadt 78:
1.5 millert 79: if ((s = re_readpattern("RE Search backward")) != TRUE)
1.1 deraadt 80: return (s);
81: if (re_backsrch() == FALSE) {
82: ewprintf("Search failed: \"%s\"", re_pat);
83: return (FALSE);
84: }
85: re_srch_lastdir = SRCH_BACK;
86: return (TRUE);
87: }
88:
89: /*
1.9 mickey 90: * Search again, using the same search string and direction as the last search
91: * command. The direction has been saved in "srch_lastdir", so you know which
1.6 millert 92: * way to go.
93: *
94: * XXX: This code has problems -- some incompatibility(?) with extend.c causes
95: * match to fail when it should not.
1.1 deraadt 96: */
1.5 millert 97: /* ARGSUSED */
1.6 millert 98: int
1.12 cloder 99: re_searchagain(int f, int n)
1.5 millert 100: {
101: if (re_srch_lastdir == SRCH_NOPR) {
102: ewprintf("No last search");
103: return (FALSE);
104: }
105: if (re_srch_lastdir == SRCH_FORW) {
106: if (re_forwsrch() == FALSE) {
107: ewprintf("Search failed: \"%s\"", re_pat);
108: return (FALSE);
109: }
110: return (TRUE);
111: }
1.9 mickey 112: if (re_srch_lastdir == SRCH_BACK)
1.5 millert 113: if (re_backsrch() == FALSE) {
114: ewprintf("Search failed: \"%s\"", re_pat);
115: return (FALSE);
116: }
1.6 millert 117:
118: return (TRUE);
1.1 deraadt 119: }
120:
121: /* Compiled regex goes here-- changed only when new pattern read */
1.6 millert 122: static regex_t re_buff;
123: static regmatch_t re_match[RE_NMATCH];
1.1 deraadt 124:
125: /*
126: * Re-Query Replace.
127: * Replace strings selectively. Does a search and replace operation.
128: */
1.5 millert 129: /* ARGSUSED */
1.6 millert 130: int
1.12 cloder 131: re_queryrepl(int f, int n)
1.5 millert 132: {
1.14 db 133: int rcnt = 0; /* replacements made so far */
1.13 vincent 134: int plen, s; /* length of found string */
1.19 deraadt 135: char news[NPAT]; /* replacement string */
1.1 deraadt 136:
1.5 millert 137: if ((s = re_readpattern("RE Query replace")) != TRUE)
1.1 deraadt 138: return (s);
1.19 deraadt 139: if (eread("Query replace %s with: ", news, NPAT,
140: EFNUL | EFNEW | EFCR, re_pat) == NULL)
1.13 vincent 141: return (ABORT);
1.1 deraadt 142: ewprintf("Query replacing %s with %s:", re_pat, news);
143:
144: /*
145: * Search forward repeatedly, checking each time whether to insert
146: * or not. The "!" case makes the check always true, so it gets put
147: * into a tighter loop for efficiency.
148: */
149: while (re_forwsrch() == TRUE) {
1.5 millert 150: retry:
1.1 deraadt 151: update();
152: switch (getkey(FALSE)) {
153: case ' ':
1.2 millert 154: plen = re_match[0].rm_eo - re_match[0].rm_so;
1.20 kjell 155: if (re_doreplace((RSIZE)plen, news) == FALSE)
1.1 deraadt 156: return (FALSE);
157: rcnt++;
158: break;
159:
160: case '.':
1.2 millert 161: plen = re_match[0].rm_eo - re_match[0].rm_so;
1.20 kjell 162: if (re_doreplace((RSIZE)plen, news) == FALSE)
1.1 deraadt 163: return (FALSE);
164: rcnt++;
165: goto stopsearch;
166:
1.6 millert 167: case CCHR('G'): /* ^G */
1.8 art 168: (void)ctrlg(FFRAND, 0);
1.22 kjell 169: goto stopsearch;
1.6 millert 170: case CCHR('['): /* ESC */
1.1 deraadt 171: case '`':
172: goto stopsearch;
173: case '!':
174: do {
1.2 millert 175: plen = re_match[0].rm_eo - re_match[0].rm_so;
1.20 kjell 176: if (re_doreplace((RSIZE)plen, news) == FALSE)
1.1 deraadt 177: return (FALSE);
178: rcnt++;
179: } while (re_forwsrch() == TRUE);
180: goto stopsearch;
181:
1.6 millert 182: case CCHR('?'): /* To not replace */
1.1 deraadt 183: break;
184:
185: default:
1.5 millert 186: ewprintf("<SP> replace, [.] rep-end, <DEL> don't, [!] repl rest <ESC> quit");
1.1 deraadt 187: goto retry;
188: }
189: }
1.6 millert 190:
1.1 deraadt 191: stopsearch:
1.23 ! kjell 192: curwp->w_flag |= WFFULL;
1.1 deraadt 193: update();
194: if (!inmacro) {
195: if (rcnt == 0)
196: ewprintf("(No replacements done)");
197: else if (rcnt == 1)
198: ewprintf("(1 replacement done)");
199: else
200: ewprintf("(%d replacements done)", rcnt);
201: }
1.14 db 202: return (TRUE);
1.1 deraadt 203: }
204:
1.5 millert 205: /*
206: * Routine re_doreplace calls lreplace to make replacements needed by
207: * re_query replace. Its reason for existence is to deal with \1, \2. etc.
1.12 cloder 208: * plen: length to remove
209: * st: replacement string
1.1 deraadt 210: */
1.6 millert 211: static int
1.20 kjell 212: re_doreplace(RSIZE plen, char *st)
1.6 millert 213: {
214: int j, k, s, more, num, state;
1.21 deraadt 215: struct line *clp;
1.6 millert 216: char repstr[REPLEN];
1.5 millert 217:
218: clp = curwp->w_dotp;
219: more = TRUE;
220: j = 0;
221: state = 0;
1.6 millert 222: num = 0;
1.5 millert 223:
224: /* The following FSA parses the replacement string */
225: while (more) {
226: switch (state) {
227: case 0:
228: if (*st == '\\') {
229: st++;
230: state = 1;
231: } else if (*st == '\0')
232: more = FALSE;
233: else {
234: repstr[j] = *st;
235: j++;
236: if (j >= REPLEN)
237: return (FALSE);
238: st++;
239: }
240: break;
241: case 1:
242: if (*st >= '0' && *st <= '9') {
243: num = *st - '0';
244: st++;
245: state = 2;
246: } else if (*st == '\0')
247: more = FALSE;
248: else {
249: repstr[j] = *st;
250: j++;
251: if (j >= REPLEN)
252: return (FALSE);
253: st++;
254: state = 0;
255: }
256: break;
257: case 2:
258: if (*st >= '0' && *st <= '9') {
259: num = 10 * num + *st - '0';
260: st++;
261: } else {
262: if (num >= RE_NMATCH)
263: return (FALSE);
264: k = re_match[num].rm_eo - re_match[num].rm_so;
265: if (j + k >= REPLEN)
266: return (FALSE);
1.9 mickey 267: bcopy(&(clp->l_text[re_match[num].rm_so]),
1.6 millert 268: &repstr[j], k);
1.5 millert 269: j += k;
270: if (*st == '\0')
271: more = FALSE;
272: if (*st == '\\') {
273: st++;
274: state = 1;
275: } else {
276: repstr[j] = *st;
277: j++;
278: if (j >= REPLEN)
279: return (FALSE);
280: st++;
281: state = 0;
282: }
283: }
284: break;
1.6 millert 285: } /* switch (state) */
286: } /* while (more) */
1.1 deraadt 287:
1.5 millert 288: repstr[j] = '\0';
1.20 kjell 289: s = lreplace(plen, repstr);
1.5 millert 290: return (s);
1.1 deraadt 291: }
292:
293: /*
1.9 mickey 294: * This routine does the real work of a forward search. The pattern is
295: * sitting in the external variable "pat". If found, dot is updated, the
1.6 millert 296: * window system is notified of the change, and TRUE is returned. If the
1.1 deraadt 297: * string isn't found, FALSE is returned.
298: */
1.6 millert 299: static int
1.12 cloder 300: re_forwsrch(void)
1.5 millert 301: {
1.6 millert 302: int tbo, error;
1.21 deraadt 303: struct line *clp;
1.5 millert 304:
305: clp = curwp->w_dotp;
306: tbo = curwp->w_doto;
307:
308: if (tbo == clp->l_used)
309: /*
1.6 millert 310: * Don't start matching past end of line -- must move to
311: * beginning of next line, unless at end of file.
1.5 millert 312: */
313: if (clp != curbp->b_linep) {
314: clp = lforw(clp);
315: tbo = 0;
316: }
317: /*
318: * Note this loop does not process the last line, but this editor
319: * always makes the last line empty so this is good.
320: */
321: while (clp != (curbp->b_linep)) {
322: re_match[0].rm_so = tbo;
323: re_match[0].rm_eo = llength(clp);
1.9 mickey 324: error = regexec(&re_buff, ltext(clp), RE_NMATCH, re_match,
1.6 millert 325: REG_STARTEND);
326: if (error != 0) {
1.5 millert 327: clp = lforw(clp);
328: tbo = 0;
329: } else {
330: curwp->w_doto = re_match[0].rm_eo;
331: curwp->w_dotp = clp;
332: curwp->w_flag |= WFMOVE;
333: return (TRUE);
334: }
335: }
336: return (FALSE);
1.1 deraadt 337: }
338:
339: /*
1.6 millert 340: * This routine does the real work of a backward search. The pattern is sitting
1.9 mickey 341: * in the external variable "re_pat". If found, dot is updated, the window
342: * system is notified of the change, and TRUE is returned. If the string isn't
1.6 millert 343: * found, FALSE is returned.
1.1 deraadt 344: */
1.6 millert 345: static int
1.12 cloder 346: re_backsrch(void)
1.5 millert 347: {
1.21 deraadt 348: struct line *clp;
1.6 millert 349: int tbo;
350: regmatch_t lastmatch;
1.5 millert 351:
352: clp = curwp->w_dotp;
353: tbo = curwp->w_doto;
354:
355: /* Start search one position to the left of dot */
356: tbo = tbo - 1;
357: if (tbo < 0) {
358: /* must move up one line */
359: clp = lback(clp);
360: tbo = llength(clp);
361: }
1.6 millert 362:
1.5 millert 363: /*
364: * Note this loop does not process the last line, but this editor
365: * always makes the last line empty so this is good.
366: */
367: while (clp != (curbp->b_linep)) {
368: re_match[0].rm_so = 0;
369: re_match[0].rm_eo = llength(clp);
370: lastmatch.rm_so = -1;
371: /*
372: * Keep searching until we don't match any longer. Assumes a
373: * non-match does not modify the re_match array. We have to
374: * do this character-by-character after the first match since
375: * POSIX regexps don't give you a way to do reverse matches.
376: */
377: while (!regexec(&re_buff, ltext(clp), RE_NMATCH, re_match,
378: REG_STARTEND) && re_match[0].rm_so < tbo) {
379: memcpy(&lastmatch, &re_match[0], sizeof(regmatch_t));
380: re_match[0].rm_so++;
381: re_match[0].rm_eo = llength(clp);
382: }
383: if (lastmatch.rm_so == -1) {
384: clp = lback(clp);
385: tbo = llength(clp);
386: } else {
387: memcpy(&re_match[0], &lastmatch, sizeof(regmatch_t));
388: curwp->w_doto = re_match[0].rm_so;
389: curwp->w_dotp = clp;
390: curwp->w_flag |= WFMOVE;
391: return (TRUE);
392: }
393: }
394: return (FALSE);
1.1 deraadt 395: }
396:
397: /*
398: * Read a pattern.
399: * Stash it in the external variable "re_pat". The "pat" is
400: * not updated if the user types in an empty line. If the user typed
401: * an empty line, and there is no old pattern, it is an error.
402: * Display the old pattern, in the style of Jeff Lomicka. There is
403: * some do-it-yourself control expansion.
404: */
1.6 millert 405: static int
1.12 cloder 406: re_readpattern(char *prompt)
1.5 millert 407: {
1.6 millert 408: static int dofree = 0;
1.13 vincent 409: int flags, error, s;
410: char tpat[NPAT], *rep;
1.5 millert 411:
412: if (re_pat[0] == '\0')
1.18 kjell 413: rep = eread("%s: ", tpat, NPAT, EFNEW | EFCR, prompt);
1.5 millert 414: else
1.15 kjell 415: rep = eread("%s: (default %s) ", tpat, NPAT,
416: EFNUL | EFNEW | EFCR, prompt, re_pat);
1.18 kjell 417: if (rep == NULL)
418: return (ABORT);
419: if (rep[0] != '\0') {
1.5 millert 420: /* New pattern given */
1.14 db 421: (void)strlcpy(re_pat, tpat, sizeof(re_pat));
1.5 millert 422: if (casefoldsearch)
423: flags = REG_EXTENDED | REG_ICASE;
424: else
425: flags = REG_EXTENDED;
426: if (dofree)
427: regfree(&re_buff);
428: error = regcomp(&re_buff, re_pat, flags);
1.6 millert 429: if (error != 0) {
430: char message[256];
1.5 millert 431: regerror(error, &re_buff, message, sizeof(message));
432: ewprintf("Regex Error: %s", message);
433: re_pat[0] = '\0';
434: return (FALSE);
435: }
436: dofree = 1;
1.13 vincent 437: s = TRUE;
438: } else if (rep[0] == '\0' && re_pat[0] != '\0')
1.5 millert 439: /* Just using old pattern */
440: s = TRUE;
1.13 vincent 441: else
442: s = FALSE;
1.1 deraadt 443: return (s);
444: }
445:
1.5 millert 446: /*
447: * Cause case to not matter in searches. This is the default. If called
448: * with argument cause case to matter.
1.1 deraadt 449: */
1.22 kjell 450: /* ARGSUSED*/
1.6 millert 451: int
1.12 cloder 452: setcasefold(int f, int n)
1.5 millert 453: {
454: if (f & FFARG) {
455: casefoldsearch = FALSE;
456: ewprintf("Case-fold-search unset");
457: } else {
458: casefoldsearch = TRUE;
459: ewprintf("Case-fold-search set");
460: }
1.1 deraadt 461:
1.5 millert 462: /*
463: * Invalidate the regular expression pattern since I'm too lazy to
464: * recompile it.
465: */
466: re_pat[0] = '\0';
467: return (TRUE);
1.6 millert 468: }
1.1 deraadt 469:
1.5 millert 470: /*
1.14 db 471: * Delete all lines after dot that contain a string matching regex.
1.1 deraadt 472: */
1.22 kjell 473: /* ARGSUSED */
1.6 millert 474: int
1.12 cloder 475: delmatchlines(int f, int n)
1.5 millert 476: {
1.6 millert 477: int s;
1.1 deraadt 478:
1.9 mickey 479: if ((s = re_readpattern("Flush lines (containing match for regexp)"))
1.6 millert 480: != TRUE)
1.5 millert 481: return (s);
1.1 deraadt 482:
1.5 millert 483: s = killmatches(TRUE);
484: return (s);
1.1 deraadt 485: }
486:
1.5 millert 487: /*
1.14 db 488: * Delete all lines after dot that don't contain a string matching regex.
1.1 deraadt 489: */
1.22 kjell 490: /* ARGSUSED */
1.6 millert 491: int
1.12 cloder 492: delnonmatchlines(int f, int n)
1.5 millert 493: {
1.6 millert 494: int s;
1.1 deraadt 495:
1.9 mickey 496: if ((s = re_readpattern("Keep lines (containing match for regexp)"))
1.6 millert 497: != TRUE)
1.5 millert 498: return (s);
1.1 deraadt 499:
1.5 millert 500: s = killmatches(FALSE);
501: return (s);
1.1 deraadt 502: }
503:
1.9 mickey 504: /*
1.14 db 505: * This function does the work of deleting matching lines.
1.6 millert 506: */
507: static int
1.12 cloder 508: killmatches(int cond)
1.1 deraadt 509: {
1.6 millert 510: int s, error;
511: int count = 0;
1.21 deraadt 512: struct line *clp;
1.5 millert 513:
514: clp = curwp->w_dotp;
515: if (curwp->w_doto == llength(clp))
516: /* Consider dot on next line */
517: clp = lforw(clp);
518:
519: while (clp != (curbp->b_linep)) {
520: /* see if line matches */
521: re_match[0].rm_so = 0;
522: re_match[0].rm_eo = llength(clp);
1.9 mickey 523: error = regexec(&re_buff, ltext(clp), RE_NMATCH, re_match,
1.6 millert 524: REG_STARTEND);
1.5 millert 525:
526: /* Delete line when appropriate */
527: if ((cond == FALSE && error) || (cond == TRUE && !error)) {
528: curwp->w_doto = 0;
529: curwp->w_dotp = clp;
530: count++;
531: s = ldelete(llength(clp) + 1, KNONE);
532: clp = curwp->w_dotp;
533: curwp->w_flag |= WFMOVE;
534: if (s == FALSE)
535: return (FALSE);
536: } else
537: clp = lforw(clp);
538: }
1.1 deraadt 539:
1.5 millert 540: ewprintf("%d line(s) deleted", count);
541: if (count > 0)
542: curwp->w_flag |= WFMOVE;
1.1 deraadt 543:
1.5 millert 544: return (TRUE);
1.1 deraadt 545: }
546:
1.5 millert 547: /*
1.14 db 548: * Count lines matching regex.
1.1 deraadt 549: */
1.22 kjell 550: /* ARGSUSED */
1.6 millert 551: int
1.12 cloder 552: cntmatchlines(int f, int n)
1.5 millert 553: {
1.6 millert 554: int s;
1.1 deraadt 555:
1.5 millert 556: if ((s = re_readpattern("Count lines (matching regexp)")) != TRUE)
557: return (s);
558: s = countmatches(TRUE);
1.14 db 559:
1.5 millert 560: return (s);
1.1 deraadt 561: }
562:
1.5 millert 563: /*
1.14 db 564: * Count lines that fail to match regex.
1.1 deraadt 565: */
1.22 kjell 566: /* ARGSUSED */
1.6 millert 567: int
1.12 cloder 568: cntnonmatchlines(int f, int n)
1.5 millert 569: {
1.6 millert 570: int s;
1.1 deraadt 571:
1.5 millert 572: if ((s = re_readpattern("Count lines (not matching regexp)")) != TRUE)
573: return (s);
574: s = countmatches(FALSE);
1.1 deraadt 575:
1.5 millert 576: return (s);
1.1 deraadt 577: }
578:
1.6 millert 579: /*
580: * This function does the work of counting matching lines.
581: */
582: int
1.12 cloder 583: countmatches(int cond)
1.1 deraadt 584: {
1.6 millert 585: int error;
586: int count = 0;
1.21 deraadt 587: struct line *clp;
1.5 millert 588:
589: clp = curwp->w_dotp;
590: if (curwp->w_doto == llength(clp))
591: /* Consider dot on next line */
592: clp = lforw(clp);
593:
594: while (clp != (curbp->b_linep)) {
595: /* see if line matches */
596: re_match[0].rm_so = 0;
597: re_match[0].rm_eo = llength(clp);
1.9 mickey 598: error = regexec(&re_buff, ltext(clp), RE_NMATCH, re_match,
1.6 millert 599: REG_STARTEND);
1.5 millert 600:
601: /* Count line when appropriate */
602: if ((cond == FALSE && error) || (cond == TRUE && !error))
603: count++;
604: clp = lforw(clp);
605: }
1.1 deraadt 606:
1.5 millert 607: if (cond)
608: ewprintf("Number of lines matching: %d", count);
609: else
610: ewprintf("Number of lines not matching: %d", count);
611:
612: return (TRUE);
1.1 deraadt 613: }
1.6 millert 614: #endif /* REGEX */