Annotation of src/usr.bin/mg/re_search.c, Revision 1.26
1.26 ! kjell 1: /* $OpenBSD: re_search.c,v 1.25 2009/06/04 02:23:37 kjell Exp $ */
1.17 kjell 2:
3: /* This file is in the public domain. */
1.7 niklas 4:
1.1 deraadt 5: /*
1.6 millert 6: * regular expression search commands for Mg
1.1 deraadt 7: *
1.6 millert 8: * This file contains functions to implement several of gnuemacs's regular
9: * expression functions for Mg. Several of the routines below are just minor
10: * re-arrangements of Mg's non-regular expression search functions. Some of
1.9 mickey 11: * them are similar in structure to the original MicroEMACS, others are
1.6 millert 12: * modifications of Rich Ellison's code. Peter Newton re-wrote about half of
13: * them from scratch.
1.1 deraadt 14: */
15:
1.6 millert 16: #ifdef REGEX
1.26 ! kjell 17: #include "def.h"
! 18:
1.2 millert 19: #include <sys/types.h>
20: #include <regex.h>
21:
1.6 millert 22: #include "macro.h"
1.1 deraadt 23:
1.6 millert 24: #define SRCH_BEGIN (0) /* search sub-codes */
1.1 deraadt 25: #define SRCH_FORW (-1)
26: #define SRCH_BACK (-2)
27: #define SRCH_NOPR (-3)
28: #define SRCH_ACCM (-4)
29: #define SRCH_MARK (-5)
30:
1.6 millert 31: #define RE_NMATCH 10 /* max number of matches */
32: #define REPLEN 256 /* max length of replacement string */
1.2 millert 33:
1.6 millert 34: char re_pat[NPAT]; /* regex pattern */
35: int re_srch_lastdir = SRCH_NOPR; /* last search flags */
36: int casefoldsearch = TRUE; /* does search ignore case? */
37:
1.20 kjell 38: static int re_doreplace(RSIZE, char *);
1.11 millert 39: static int re_forwsrch(void);
40: static int re_backsrch(void);
41: static int re_readpattern(char *);
42: static int killmatches(int);
43: static int countmatches(int);
1.1 deraadt 44:
45: /*
46: * Search forward.
1.9 mickey 47: * Get a search string from the user and search for it starting at ".". If
48: * found, move "." to just after the matched characters. display does all
1.6 millert 49: * the hard stuff. If not found, it just prints a message.
1.1 deraadt 50: */
1.5 millert 51: /* ARGSUSED */
1.6 millert 52: int
1.12 cloder 53: re_forwsearch(int f, int n)
1.5 millert 54: {
1.6 millert 55: int s;
1.1 deraadt 56:
1.5 millert 57: if ((s = re_readpattern("RE Search")) != TRUE)
1.1 deraadt 58: return (s);
59: if (re_forwsrch() == FALSE) {
60: ewprintf("Search failed: \"%s\"", re_pat);
61: return (FALSE);
62: }
63: re_srch_lastdir = SRCH_FORW;
64: return (TRUE);
65: }
66:
67: /*
68: * Reverse search.
1.14 db 69: * Get a search string from the user, and search, starting at "."
1.1 deraadt 70: * and proceeding toward the front of the buffer. If found "." is left
71: * pointing at the first character of the pattern [the last character that
72: * was matched].
73: */
1.5 millert 74: /* ARGSUSED */
1.6 millert 75: int
1.12 cloder 76: re_backsearch(int f, int n)
1.5 millert 77: {
1.6 millert 78: int s;
1.1 deraadt 79:
1.5 millert 80: if ((s = re_readpattern("RE Search backward")) != TRUE)
1.1 deraadt 81: return (s);
82: if (re_backsrch() == FALSE) {
83: ewprintf("Search failed: \"%s\"", re_pat);
84: return (FALSE);
85: }
86: re_srch_lastdir = SRCH_BACK;
87: return (TRUE);
88: }
89:
90: /*
1.9 mickey 91: * Search again, using the same search string and direction as the last search
92: * command. The direction has been saved in "srch_lastdir", so you know which
1.6 millert 93: * way to go.
94: *
95: * XXX: This code has problems -- some incompatibility(?) with extend.c causes
96: * match to fail when it should not.
1.1 deraadt 97: */
1.5 millert 98: /* ARGSUSED */
1.6 millert 99: int
1.12 cloder 100: re_searchagain(int f, int n)
1.5 millert 101: {
102: if (re_srch_lastdir == SRCH_NOPR) {
103: ewprintf("No last search");
104: return (FALSE);
105: }
106: if (re_srch_lastdir == SRCH_FORW) {
107: if (re_forwsrch() == FALSE) {
108: ewprintf("Search failed: \"%s\"", re_pat);
109: return (FALSE);
110: }
111: return (TRUE);
112: }
1.9 mickey 113: if (re_srch_lastdir == SRCH_BACK)
1.5 millert 114: if (re_backsrch() == FALSE) {
115: ewprintf("Search failed: \"%s\"", re_pat);
116: return (FALSE);
117: }
1.6 millert 118:
119: return (TRUE);
1.1 deraadt 120: }
121:
122: /* Compiled regex goes here-- changed only when new pattern read */
1.6 millert 123: static regex_t re_buff;
124: static regmatch_t re_match[RE_NMATCH];
1.1 deraadt 125:
126: /*
127: * Re-Query Replace.
128: * Replace strings selectively. Does a search and replace operation.
129: */
1.5 millert 130: /* ARGSUSED */
1.6 millert 131: int
1.12 cloder 132: re_queryrepl(int f, int n)
1.5 millert 133: {
1.14 db 134: int rcnt = 0; /* replacements made so far */
1.13 vincent 135: int plen, s; /* length of found string */
1.19 deraadt 136: char news[NPAT]; /* replacement string */
1.1 deraadt 137:
1.5 millert 138: if ((s = re_readpattern("RE Query replace")) != TRUE)
1.1 deraadt 139: return (s);
1.19 deraadt 140: if (eread("Query replace %s with: ", news, NPAT,
141: EFNUL | EFNEW | EFCR, re_pat) == NULL)
1.13 vincent 142: return (ABORT);
1.1 deraadt 143: ewprintf("Query replacing %s with %s:", re_pat, news);
144:
145: /*
146: * Search forward repeatedly, checking each time whether to insert
147: * or not. The "!" case makes the check always true, so it gets put
148: * into a tighter loop for efficiency.
149: */
150: while (re_forwsrch() == TRUE) {
1.5 millert 151: retry:
1.1 deraadt 152: update();
153: switch (getkey(FALSE)) {
154: case ' ':
1.2 millert 155: plen = re_match[0].rm_eo - re_match[0].rm_so;
1.20 kjell 156: if (re_doreplace((RSIZE)plen, news) == FALSE)
1.1 deraadt 157: return (FALSE);
158: rcnt++;
159: break;
160:
161: case '.':
1.2 millert 162: plen = re_match[0].rm_eo - re_match[0].rm_so;
1.20 kjell 163: if (re_doreplace((RSIZE)plen, news) == FALSE)
1.1 deraadt 164: return (FALSE);
165: rcnt++;
166: goto stopsearch;
167:
1.6 millert 168: case CCHR('G'): /* ^G */
1.8 art 169: (void)ctrlg(FFRAND, 0);
1.22 kjell 170: goto stopsearch;
1.6 millert 171: case CCHR('['): /* ESC */
1.1 deraadt 172: case '`':
173: goto stopsearch;
174: case '!':
175: do {
1.2 millert 176: plen = re_match[0].rm_eo - re_match[0].rm_so;
1.20 kjell 177: if (re_doreplace((RSIZE)plen, news) == FALSE)
1.1 deraadt 178: return (FALSE);
179: rcnt++;
180: } while (re_forwsrch() == TRUE);
181: goto stopsearch;
182:
1.6 millert 183: case CCHR('?'): /* To not replace */
1.1 deraadt 184: break;
185:
186: default:
1.5 millert 187: ewprintf("<SP> replace, [.] rep-end, <DEL> don't, [!] repl rest <ESC> quit");
1.1 deraadt 188: goto retry;
189: }
190: }
1.6 millert 191:
1.1 deraadt 192: stopsearch:
1.25 kjell 193: curwp->w_rflag |= WFFULL;
1.1 deraadt 194: update();
195: if (!inmacro) {
196: if (rcnt == 0)
197: ewprintf("(No replacements done)");
198: else if (rcnt == 1)
199: ewprintf("(1 replacement done)");
200: else
201: ewprintf("(%d replacements done)", rcnt);
202: }
1.14 db 203: return (TRUE);
1.1 deraadt 204: }
205:
1.5 millert 206: /*
207: * Routine re_doreplace calls lreplace to make replacements needed by
208: * re_query replace. Its reason for existence is to deal with \1, \2. etc.
1.12 cloder 209: * plen: length to remove
210: * st: replacement string
1.1 deraadt 211: */
1.6 millert 212: static int
1.20 kjell 213: re_doreplace(RSIZE plen, char *st)
1.6 millert 214: {
215: int j, k, s, more, num, state;
1.21 deraadt 216: struct line *clp;
1.6 millert 217: char repstr[REPLEN];
1.5 millert 218:
219: clp = curwp->w_dotp;
220: more = TRUE;
221: j = 0;
222: state = 0;
1.6 millert 223: num = 0;
1.5 millert 224:
225: /* The following FSA parses the replacement string */
226: while (more) {
227: switch (state) {
228: case 0:
229: if (*st == '\\') {
230: st++;
231: state = 1;
232: } else if (*st == '\0')
233: more = FALSE;
234: else {
235: repstr[j] = *st;
236: j++;
237: if (j >= REPLEN)
238: return (FALSE);
239: st++;
240: }
241: break;
242: case 1:
243: if (*st >= '0' && *st <= '9') {
244: num = *st - '0';
245: st++;
246: state = 2;
247: } else if (*st == '\0')
248: more = FALSE;
249: else {
250: repstr[j] = *st;
251: j++;
252: if (j >= REPLEN)
253: return (FALSE);
254: st++;
255: state = 0;
256: }
257: break;
258: case 2:
259: if (*st >= '0' && *st <= '9') {
260: num = 10 * num + *st - '0';
261: st++;
262: } else {
263: if (num >= RE_NMATCH)
264: return (FALSE);
265: k = re_match[num].rm_eo - re_match[num].rm_so;
266: if (j + k >= REPLEN)
267: return (FALSE);
1.9 mickey 268: bcopy(&(clp->l_text[re_match[num].rm_so]),
1.6 millert 269: &repstr[j], k);
1.5 millert 270: j += k;
271: if (*st == '\0')
272: more = FALSE;
273: if (*st == '\\') {
274: st++;
275: state = 1;
276: } else {
277: repstr[j] = *st;
278: j++;
279: if (j >= REPLEN)
280: return (FALSE);
281: st++;
282: state = 0;
283: }
284: }
285: break;
1.6 millert 286: } /* switch (state) */
287: } /* while (more) */
1.1 deraadt 288:
1.5 millert 289: repstr[j] = '\0';
1.20 kjell 290: s = lreplace(plen, repstr);
1.5 millert 291: return (s);
1.1 deraadt 292: }
293:
294: /*
1.9 mickey 295: * This routine does the real work of a forward search. The pattern is
296: * sitting in the external variable "pat". If found, dot is updated, the
1.6 millert 297: * window system is notified of the change, and TRUE is returned. If the
1.1 deraadt 298: * string isn't found, FALSE is returned.
299: */
1.6 millert 300: static int
1.12 cloder 301: re_forwsrch(void)
1.5 millert 302: {
1.6 millert 303: int tbo, error;
1.21 deraadt 304: struct line *clp;
1.5 millert 305:
306: clp = curwp->w_dotp;
307: tbo = curwp->w_doto;
308:
309: if (tbo == clp->l_used)
310: /*
1.6 millert 311: * Don't start matching past end of line -- must move to
312: * beginning of next line, unless at end of file.
1.5 millert 313: */
1.24 kjell 314: if (clp != curbp->b_headp) {
1.5 millert 315: clp = lforw(clp);
316: tbo = 0;
317: }
318: /*
319: * Note this loop does not process the last line, but this editor
320: * always makes the last line empty so this is good.
321: */
1.24 kjell 322: while (clp != (curbp->b_headp)) {
1.5 millert 323: re_match[0].rm_so = tbo;
324: re_match[0].rm_eo = llength(clp);
1.9 mickey 325: error = regexec(&re_buff, ltext(clp), RE_NMATCH, re_match,
1.6 millert 326: REG_STARTEND);
327: if (error != 0) {
1.5 millert 328: clp = lforw(clp);
329: tbo = 0;
330: } else {
331: curwp->w_doto = re_match[0].rm_eo;
332: curwp->w_dotp = clp;
1.25 kjell 333: curwp->w_rflag |= WFMOVE;
1.5 millert 334: return (TRUE);
335: }
336: }
337: return (FALSE);
1.1 deraadt 338: }
339:
340: /*
1.6 millert 341: * This routine does the real work of a backward search. The pattern is sitting
1.9 mickey 342: * in the external variable "re_pat". If found, dot is updated, the window
343: * system is notified of the change, and TRUE is returned. If the string isn't
1.6 millert 344: * found, FALSE is returned.
1.1 deraadt 345: */
1.6 millert 346: static int
1.12 cloder 347: re_backsrch(void)
1.5 millert 348: {
1.21 deraadt 349: struct line *clp;
1.6 millert 350: int tbo;
351: regmatch_t lastmatch;
1.5 millert 352:
353: clp = curwp->w_dotp;
354: tbo = curwp->w_doto;
355:
356: /* Start search one position to the left of dot */
357: tbo = tbo - 1;
358: if (tbo < 0) {
359: /* must move up one line */
360: clp = lback(clp);
361: tbo = llength(clp);
362: }
1.6 millert 363:
1.5 millert 364: /*
365: * Note this loop does not process the last line, but this editor
366: * always makes the last line empty so this is good.
367: */
1.24 kjell 368: while (clp != (curbp->b_headp)) {
1.5 millert 369: re_match[0].rm_so = 0;
370: re_match[0].rm_eo = llength(clp);
371: lastmatch.rm_so = -1;
372: /*
373: * Keep searching until we don't match any longer. Assumes a
374: * non-match does not modify the re_match array. We have to
375: * do this character-by-character after the first match since
376: * POSIX regexps don't give you a way to do reverse matches.
377: */
378: while (!regexec(&re_buff, ltext(clp), RE_NMATCH, re_match,
379: REG_STARTEND) && re_match[0].rm_so < tbo) {
380: memcpy(&lastmatch, &re_match[0], sizeof(regmatch_t));
381: re_match[0].rm_so++;
382: re_match[0].rm_eo = llength(clp);
383: }
384: if (lastmatch.rm_so == -1) {
385: clp = lback(clp);
386: tbo = llength(clp);
387: } else {
388: memcpy(&re_match[0], &lastmatch, sizeof(regmatch_t));
389: curwp->w_doto = re_match[0].rm_so;
390: curwp->w_dotp = clp;
1.25 kjell 391: curwp->w_rflag |= WFMOVE;
1.5 millert 392: return (TRUE);
393: }
394: }
395: return (FALSE);
1.1 deraadt 396: }
397:
398: /*
399: * Read a pattern.
400: * Stash it in the external variable "re_pat". The "pat" is
401: * not updated if the user types in an empty line. If the user typed
402: * an empty line, and there is no old pattern, it is an error.
403: * Display the old pattern, in the style of Jeff Lomicka. There is
404: * some do-it-yourself control expansion.
405: */
1.6 millert 406: static int
1.12 cloder 407: re_readpattern(char *prompt)
1.5 millert 408: {
1.6 millert 409: static int dofree = 0;
1.13 vincent 410: int flags, error, s;
411: char tpat[NPAT], *rep;
1.5 millert 412:
413: if (re_pat[0] == '\0')
1.18 kjell 414: rep = eread("%s: ", tpat, NPAT, EFNEW | EFCR, prompt);
1.5 millert 415: else
1.15 kjell 416: rep = eread("%s: (default %s) ", tpat, NPAT,
417: EFNUL | EFNEW | EFCR, prompt, re_pat);
1.18 kjell 418: if (rep == NULL)
419: return (ABORT);
420: if (rep[0] != '\0') {
1.5 millert 421: /* New pattern given */
1.14 db 422: (void)strlcpy(re_pat, tpat, sizeof(re_pat));
1.5 millert 423: if (casefoldsearch)
424: flags = REG_EXTENDED | REG_ICASE;
425: else
426: flags = REG_EXTENDED;
427: if (dofree)
428: regfree(&re_buff);
429: error = regcomp(&re_buff, re_pat, flags);
1.6 millert 430: if (error != 0) {
431: char message[256];
1.5 millert 432: regerror(error, &re_buff, message, sizeof(message));
433: ewprintf("Regex Error: %s", message);
434: re_pat[0] = '\0';
435: return (FALSE);
436: }
437: dofree = 1;
1.13 vincent 438: s = TRUE;
439: } else if (rep[0] == '\0' && re_pat[0] != '\0')
1.5 millert 440: /* Just using old pattern */
441: s = TRUE;
1.13 vincent 442: else
443: s = FALSE;
1.1 deraadt 444: return (s);
445: }
446:
1.5 millert 447: /*
448: * Cause case to not matter in searches. This is the default. If called
449: * with argument cause case to matter.
1.1 deraadt 450: */
1.22 kjell 451: /* ARGSUSED*/
1.6 millert 452: int
1.12 cloder 453: setcasefold(int f, int n)
1.5 millert 454: {
455: if (f & FFARG) {
456: casefoldsearch = FALSE;
457: ewprintf("Case-fold-search unset");
458: } else {
459: casefoldsearch = TRUE;
460: ewprintf("Case-fold-search set");
461: }
1.1 deraadt 462:
1.5 millert 463: /*
464: * Invalidate the regular expression pattern since I'm too lazy to
465: * recompile it.
466: */
467: re_pat[0] = '\0';
468: return (TRUE);
1.6 millert 469: }
1.1 deraadt 470:
1.5 millert 471: /*
1.14 db 472: * Delete all lines after dot that contain a string matching regex.
1.1 deraadt 473: */
1.22 kjell 474: /* ARGSUSED */
1.6 millert 475: int
1.12 cloder 476: delmatchlines(int f, int n)
1.5 millert 477: {
1.6 millert 478: int s;
1.1 deraadt 479:
1.9 mickey 480: if ((s = re_readpattern("Flush lines (containing match for regexp)"))
1.6 millert 481: != TRUE)
1.5 millert 482: return (s);
1.1 deraadt 483:
1.5 millert 484: s = killmatches(TRUE);
485: return (s);
1.1 deraadt 486: }
487:
1.5 millert 488: /*
1.14 db 489: * Delete all lines after dot that don't contain a string matching regex.
1.1 deraadt 490: */
1.22 kjell 491: /* ARGSUSED */
1.6 millert 492: int
1.12 cloder 493: delnonmatchlines(int f, int n)
1.5 millert 494: {
1.6 millert 495: int s;
1.1 deraadt 496:
1.9 mickey 497: if ((s = re_readpattern("Keep lines (containing match for regexp)"))
1.6 millert 498: != TRUE)
1.5 millert 499: return (s);
1.1 deraadt 500:
1.5 millert 501: s = killmatches(FALSE);
502: return (s);
1.1 deraadt 503: }
504:
1.9 mickey 505: /*
1.14 db 506: * This function does the work of deleting matching lines.
1.6 millert 507: */
508: static int
1.12 cloder 509: killmatches(int cond)
1.1 deraadt 510: {
1.6 millert 511: int s, error;
512: int count = 0;
1.21 deraadt 513: struct line *clp;
1.5 millert 514:
515: clp = curwp->w_dotp;
516: if (curwp->w_doto == llength(clp))
517: /* Consider dot on next line */
518: clp = lforw(clp);
519:
1.24 kjell 520: while (clp != (curbp->b_headp)) {
1.5 millert 521: /* see if line matches */
522: re_match[0].rm_so = 0;
523: re_match[0].rm_eo = llength(clp);
1.9 mickey 524: error = regexec(&re_buff, ltext(clp), RE_NMATCH, re_match,
1.6 millert 525: REG_STARTEND);
1.5 millert 526:
527: /* Delete line when appropriate */
528: if ((cond == FALSE && error) || (cond == TRUE && !error)) {
529: curwp->w_doto = 0;
530: curwp->w_dotp = clp;
531: count++;
532: s = ldelete(llength(clp) + 1, KNONE);
533: clp = curwp->w_dotp;
1.25 kjell 534: curwp->w_rflag |= WFMOVE;
1.5 millert 535: if (s == FALSE)
536: return (FALSE);
537: } else
538: clp = lforw(clp);
539: }
1.1 deraadt 540:
1.5 millert 541: ewprintf("%d line(s) deleted", count);
542: if (count > 0)
1.25 kjell 543: curwp->w_rflag |= WFMOVE;
1.1 deraadt 544:
1.5 millert 545: return (TRUE);
1.1 deraadt 546: }
547:
1.5 millert 548: /*
1.14 db 549: * Count lines matching regex.
1.1 deraadt 550: */
1.22 kjell 551: /* ARGSUSED */
1.6 millert 552: int
1.12 cloder 553: cntmatchlines(int f, int n)
1.5 millert 554: {
1.6 millert 555: int s;
1.1 deraadt 556:
1.5 millert 557: if ((s = re_readpattern("Count lines (matching regexp)")) != TRUE)
558: return (s);
559: s = countmatches(TRUE);
1.14 db 560:
1.5 millert 561: return (s);
1.1 deraadt 562: }
563:
1.5 millert 564: /*
1.14 db 565: * Count lines that fail to match regex.
1.1 deraadt 566: */
1.22 kjell 567: /* ARGSUSED */
1.6 millert 568: int
1.12 cloder 569: cntnonmatchlines(int f, int n)
1.5 millert 570: {
1.6 millert 571: int s;
1.1 deraadt 572:
1.5 millert 573: if ((s = re_readpattern("Count lines (not matching regexp)")) != TRUE)
574: return (s);
575: s = countmatches(FALSE);
1.1 deraadt 576:
1.5 millert 577: return (s);
1.1 deraadt 578: }
579:
1.6 millert 580: /*
581: * This function does the work of counting matching lines.
582: */
583: int
1.12 cloder 584: countmatches(int cond)
1.1 deraadt 585: {
1.6 millert 586: int error;
587: int count = 0;
1.21 deraadt 588: struct line *clp;
1.5 millert 589:
590: clp = curwp->w_dotp;
591: if (curwp->w_doto == llength(clp))
592: /* Consider dot on next line */
593: clp = lforw(clp);
594:
1.24 kjell 595: while (clp != (curbp->b_headp)) {
1.5 millert 596: /* see if line matches */
597: re_match[0].rm_so = 0;
598: re_match[0].rm_eo = llength(clp);
1.9 mickey 599: error = regexec(&re_buff, ltext(clp), RE_NMATCH, re_match,
1.6 millert 600: REG_STARTEND);
1.5 millert 601:
602: /* Count line when appropriate */
603: if ((cond == FALSE && error) || (cond == TRUE && !error))
604: count++;
605: clp = lforw(clp);
606: }
1.1 deraadt 607:
1.5 millert 608: if (cond)
609: ewprintf("Number of lines matching: %d", count);
610: else
611: ewprintf("Number of lines not matching: %d", count);
612:
613: return (TRUE);
1.1 deraadt 614: }
1.6 millert 615: #endif /* REGEX */