Annotation of src/usr.bin/mg/re_search.c, Revision 1.30
1.30 ! lum 1: /* $OpenBSD: re_search.c,v 1.29 2013/12/20 18:44:13 florian Exp $ */
1.17 kjell 2:
3: /* This file is in the public domain. */
1.7 niklas 4:
1.1 deraadt 5: /*
1.6 millert 6: * regular expression search commands for Mg
1.1 deraadt 7: *
1.6 millert 8: * This file contains functions to implement several of gnuemacs's regular
9: * expression functions for Mg. Several of the routines below are just minor
10: * re-arrangements of Mg's non-regular expression search functions. Some of
1.9 mickey 11: * them are similar in structure to the original MicroEMACS, others are
1.6 millert 12: * modifications of Rich Ellison's code. Peter Newton re-wrote about half of
13: * them from scratch.
1.1 deraadt 14: */
15:
1.6 millert 16: #ifdef REGEX
1.26 kjell 17: #include "def.h"
18:
1.2 millert 19: #include <sys/types.h>
20: #include <regex.h>
21:
1.6 millert 22: #include "macro.h"
1.1 deraadt 23:
1.6 millert 24: #define SRCH_BEGIN (0) /* search sub-codes */
1.1 deraadt 25: #define SRCH_FORW (-1)
26: #define SRCH_BACK (-2)
27: #define SRCH_NOPR (-3)
28: #define SRCH_ACCM (-4)
29: #define SRCH_MARK (-5)
30:
1.6 millert 31: #define RE_NMATCH 10 /* max number of matches */
32: #define REPLEN 256 /* max length of replacement string */
1.2 millert 33:
1.6 millert 34: char re_pat[NPAT]; /* regex pattern */
35: int re_srch_lastdir = SRCH_NOPR; /* last search flags */
36: int casefoldsearch = TRUE; /* does search ignore case? */
37:
1.20 kjell 38: static int re_doreplace(RSIZE, char *);
1.11 millert 39: static int re_forwsrch(void);
40: static int re_backsrch(void);
41: static int re_readpattern(char *);
42: static int killmatches(int);
43: static int countmatches(int);
1.1 deraadt 44:
45: /*
46: * Search forward.
1.9 mickey 47: * Get a search string from the user and search for it starting at ".". If
48: * found, move "." to just after the matched characters. display does all
1.6 millert 49: * the hard stuff. If not found, it just prints a message.
1.1 deraadt 50: */
1.5 millert 51: /* ARGSUSED */
1.6 millert 52: int
1.12 cloder 53: re_forwsearch(int f, int n)
1.5 millert 54: {
1.6 millert 55: int s;
1.1 deraadt 56:
1.5 millert 57: if ((s = re_readpattern("RE Search")) != TRUE)
1.1 deraadt 58: return (s);
59: if (re_forwsrch() == FALSE) {
1.30 ! lum 60: dobeep();
1.1 deraadt 61: ewprintf("Search failed: \"%s\"", re_pat);
62: return (FALSE);
63: }
64: re_srch_lastdir = SRCH_FORW;
65: return (TRUE);
66: }
67:
68: /*
69: * Reverse search.
1.14 db 70: * Get a search string from the user, and search, starting at "."
1.1 deraadt 71: * and proceeding toward the front of the buffer. If found "." is left
72: * pointing at the first character of the pattern [the last character that
73: * was matched].
74: */
1.5 millert 75: /* ARGSUSED */
1.6 millert 76: int
1.12 cloder 77: re_backsearch(int f, int n)
1.5 millert 78: {
1.6 millert 79: int s;
1.1 deraadt 80:
1.5 millert 81: if ((s = re_readpattern("RE Search backward")) != TRUE)
1.1 deraadt 82: return (s);
83: if (re_backsrch() == FALSE) {
1.30 ! lum 84: dobeep();
1.1 deraadt 85: ewprintf("Search failed: \"%s\"", re_pat);
86: return (FALSE);
87: }
88: re_srch_lastdir = SRCH_BACK;
89: return (TRUE);
90: }
91:
92: /*
1.9 mickey 93: * Search again, using the same search string and direction as the last search
94: * command. The direction has been saved in "srch_lastdir", so you know which
1.6 millert 95: * way to go.
96: *
97: * XXX: This code has problems -- some incompatibility(?) with extend.c causes
98: * match to fail when it should not.
1.1 deraadt 99: */
1.5 millert 100: /* ARGSUSED */
1.6 millert 101: int
1.12 cloder 102: re_searchagain(int f, int n)
1.5 millert 103: {
104: if (re_srch_lastdir == SRCH_NOPR) {
1.30 ! lum 105: dobeep();
1.5 millert 106: ewprintf("No last search");
107: return (FALSE);
108: }
109: if (re_srch_lastdir == SRCH_FORW) {
110: if (re_forwsrch() == FALSE) {
1.30 ! lum 111: dobeep();
1.5 millert 112: ewprintf("Search failed: \"%s\"", re_pat);
113: return (FALSE);
114: }
115: return (TRUE);
116: }
1.9 mickey 117: if (re_srch_lastdir == SRCH_BACK)
1.5 millert 118: if (re_backsrch() == FALSE) {
1.30 ! lum 119: dobeep();
1.5 millert 120: ewprintf("Search failed: \"%s\"", re_pat);
121: return (FALSE);
122: }
1.6 millert 123:
124: return (TRUE);
1.1 deraadt 125: }
126:
127: /* Compiled regex goes here-- changed only when new pattern read */
1.28 jasper 128: static regex_t regex_buff;
129: static regmatch_t regex_match[RE_NMATCH];
1.1 deraadt 130:
131: /*
132: * Re-Query Replace.
133: * Replace strings selectively. Does a search and replace operation.
134: */
1.5 millert 135: /* ARGSUSED */
1.6 millert 136: int
1.12 cloder 137: re_queryrepl(int f, int n)
1.5 millert 138: {
1.14 db 139: int rcnt = 0; /* replacements made so far */
1.13 vincent 140: int plen, s; /* length of found string */
1.19 deraadt 141: char news[NPAT]; /* replacement string */
1.1 deraadt 142:
1.5 millert 143: if ((s = re_readpattern("RE Query replace")) != TRUE)
1.1 deraadt 144: return (s);
1.19 deraadt 145: if (eread("Query replace %s with: ", news, NPAT,
146: EFNUL | EFNEW | EFCR, re_pat) == NULL)
1.13 vincent 147: return (ABORT);
1.1 deraadt 148: ewprintf("Query replacing %s with %s:", re_pat, news);
149:
150: /*
151: * Search forward repeatedly, checking each time whether to insert
152: * or not. The "!" case makes the check always true, so it gets put
153: * into a tighter loop for efficiency.
154: */
155: while (re_forwsrch() == TRUE) {
1.5 millert 156: retry:
1.27 lum 157: update(CMODE);
1.1 deraadt 158: switch (getkey(FALSE)) {
159: case ' ':
1.28 jasper 160: plen = regex_match[0].rm_eo - regex_match[0].rm_so;
1.20 kjell 161: if (re_doreplace((RSIZE)plen, news) == FALSE)
1.1 deraadt 162: return (FALSE);
163: rcnt++;
164: break;
165:
166: case '.':
1.28 jasper 167: plen = regex_match[0].rm_eo - regex_match[0].rm_so;
1.20 kjell 168: if (re_doreplace((RSIZE)plen, news) == FALSE)
1.1 deraadt 169: return (FALSE);
170: rcnt++;
171: goto stopsearch;
172:
1.6 millert 173: case CCHR('G'): /* ^G */
1.8 art 174: (void)ctrlg(FFRAND, 0);
1.22 kjell 175: goto stopsearch;
1.6 millert 176: case CCHR('['): /* ESC */
1.1 deraadt 177: case '`':
178: goto stopsearch;
179: case '!':
180: do {
1.28 jasper 181: plen = regex_match[0].rm_eo - regex_match[0].rm_so;
1.20 kjell 182: if (re_doreplace((RSIZE)plen, news) == FALSE)
1.1 deraadt 183: return (FALSE);
184: rcnt++;
185: } while (re_forwsrch() == TRUE);
186: goto stopsearch;
187:
1.6 millert 188: case CCHR('?'): /* To not replace */
1.1 deraadt 189: break;
190:
191: default:
1.5 millert 192: ewprintf("<SP> replace, [.] rep-end, <DEL> don't, [!] repl rest <ESC> quit");
1.1 deraadt 193: goto retry;
194: }
195: }
1.6 millert 196:
1.1 deraadt 197: stopsearch:
1.25 kjell 198: curwp->w_rflag |= WFFULL;
1.27 lum 199: update(CMODE);
1.1 deraadt 200: if (!inmacro) {
201: if (rcnt == 0)
202: ewprintf("(No replacements done)");
203: else if (rcnt == 1)
204: ewprintf("(1 replacement done)");
205: else
206: ewprintf("(%d replacements done)", rcnt);
207: }
1.14 db 208: return (TRUE);
1.1 deraadt 209: }
210:
1.5 millert 211: /*
212: * Routine re_doreplace calls lreplace to make replacements needed by
213: * re_query replace. Its reason for existence is to deal with \1, \2. etc.
1.12 cloder 214: * plen: length to remove
215: * st: replacement string
1.1 deraadt 216: */
1.6 millert 217: static int
1.20 kjell 218: re_doreplace(RSIZE plen, char *st)
1.6 millert 219: {
220: int j, k, s, more, num, state;
1.21 deraadt 221: struct line *clp;
1.6 millert 222: char repstr[REPLEN];
1.5 millert 223:
224: clp = curwp->w_dotp;
225: more = TRUE;
226: j = 0;
227: state = 0;
1.6 millert 228: num = 0;
1.5 millert 229:
230: /* The following FSA parses the replacement string */
231: while (more) {
232: switch (state) {
233: case 0:
234: if (*st == '\\') {
235: st++;
236: state = 1;
237: } else if (*st == '\0')
238: more = FALSE;
239: else {
240: repstr[j] = *st;
241: j++;
242: if (j >= REPLEN)
243: return (FALSE);
244: st++;
245: }
246: break;
247: case 1:
248: if (*st >= '0' && *st <= '9') {
249: num = *st - '0';
250: st++;
251: state = 2;
252: } else if (*st == '\0')
253: more = FALSE;
254: else {
255: repstr[j] = *st;
256: j++;
257: if (j >= REPLEN)
258: return (FALSE);
259: st++;
260: state = 0;
261: }
262: break;
263: case 2:
264: if (*st >= '0' && *st <= '9') {
265: num = 10 * num + *st - '0';
266: st++;
267: } else {
268: if (num >= RE_NMATCH)
269: return (FALSE);
1.28 jasper 270: k = regex_match[num].rm_eo - regex_match[num].rm_so;
1.5 millert 271: if (j + k >= REPLEN)
272: return (FALSE);
1.28 jasper 273: bcopy(&(clp->l_text[regex_match[num].rm_so]),
1.6 millert 274: &repstr[j], k);
1.5 millert 275: j += k;
276: if (*st == '\0')
277: more = FALSE;
278: if (*st == '\\') {
279: st++;
280: state = 1;
281: } else {
282: repstr[j] = *st;
283: j++;
284: if (j >= REPLEN)
285: return (FALSE);
286: st++;
287: state = 0;
288: }
289: }
290: break;
1.6 millert 291: } /* switch (state) */
292: } /* while (more) */
1.1 deraadt 293:
1.5 millert 294: repstr[j] = '\0';
1.20 kjell 295: s = lreplace(plen, repstr);
1.5 millert 296: return (s);
1.1 deraadt 297: }
298:
299: /*
1.9 mickey 300: * This routine does the real work of a forward search. The pattern is
301: * sitting in the external variable "pat". If found, dot is updated, the
1.6 millert 302: * window system is notified of the change, and TRUE is returned. If the
1.1 deraadt 303: * string isn't found, FALSE is returned.
304: */
1.6 millert 305: static int
1.12 cloder 306: re_forwsrch(void)
1.5 millert 307: {
1.29 florian 308: int tbo, tdotline, error;
1.21 deraadt 309: struct line *clp;
1.5 millert 310:
311: clp = curwp->w_dotp;
312: tbo = curwp->w_doto;
1.29 florian 313: tdotline = curwp->w_dotline;
1.5 millert 314:
315: if (tbo == clp->l_used)
316: /*
1.6 millert 317: * Don't start matching past end of line -- must move to
318: * beginning of next line, unless at end of file.
1.5 millert 319: */
1.24 kjell 320: if (clp != curbp->b_headp) {
1.5 millert 321: clp = lforw(clp);
1.29 florian 322: tdotline++;
1.5 millert 323: tbo = 0;
324: }
325: /*
326: * Note this loop does not process the last line, but this editor
327: * always makes the last line empty so this is good.
328: */
1.24 kjell 329: while (clp != (curbp->b_headp)) {
1.28 jasper 330: regex_match[0].rm_so = tbo;
331: regex_match[0].rm_eo = llength(clp);
332: error = regexec(®ex_buff, ltext(clp), RE_NMATCH, regex_match,
1.6 millert 333: REG_STARTEND);
334: if (error != 0) {
1.5 millert 335: clp = lforw(clp);
1.29 florian 336: tdotline++;
1.5 millert 337: tbo = 0;
338: } else {
1.28 jasper 339: curwp->w_doto = regex_match[0].rm_eo;
1.5 millert 340: curwp->w_dotp = clp;
1.29 florian 341: curwp->w_dotline = tdotline;
1.25 kjell 342: curwp->w_rflag |= WFMOVE;
1.5 millert 343: return (TRUE);
344: }
345: }
346: return (FALSE);
1.1 deraadt 347: }
348:
349: /*
1.6 millert 350: * This routine does the real work of a backward search. The pattern is sitting
1.9 mickey 351: * in the external variable "re_pat". If found, dot is updated, the window
352: * system is notified of the change, and TRUE is returned. If the string isn't
1.6 millert 353: * found, FALSE is returned.
1.1 deraadt 354: */
1.6 millert 355: static int
1.12 cloder 356: re_backsrch(void)
1.5 millert 357: {
1.21 deraadt 358: struct line *clp;
1.29 florian 359: int tbo, tdotline;
1.6 millert 360: regmatch_t lastmatch;
1.5 millert 361:
362: clp = curwp->w_dotp;
363: tbo = curwp->w_doto;
1.29 florian 364: tdotline = curwp->w_dotline;
1.5 millert 365:
366: /* Start search one position to the left of dot */
367: tbo = tbo - 1;
368: if (tbo < 0) {
369: /* must move up one line */
370: clp = lback(clp);
1.29 florian 371: tdotline--;
1.5 millert 372: tbo = llength(clp);
373: }
1.6 millert 374:
1.5 millert 375: /*
376: * Note this loop does not process the last line, but this editor
377: * always makes the last line empty so this is good.
378: */
1.24 kjell 379: while (clp != (curbp->b_headp)) {
1.28 jasper 380: regex_match[0].rm_so = 0;
381: regex_match[0].rm_eo = llength(clp);
1.5 millert 382: lastmatch.rm_so = -1;
383: /*
384: * Keep searching until we don't match any longer. Assumes a
1.28 jasper 385: * non-match does not modify the regex_match array. We have to
1.5 millert 386: * do this character-by-character after the first match since
387: * POSIX regexps don't give you a way to do reverse matches.
388: */
1.28 jasper 389: while (!regexec(®ex_buff, ltext(clp), RE_NMATCH, regex_match,
390: REG_STARTEND) && regex_match[0].rm_so < tbo) {
391: memcpy(&lastmatch, ®ex_match[0], sizeof(regmatch_t));
392: regex_match[0].rm_so++;
393: regex_match[0].rm_eo = llength(clp);
1.5 millert 394: }
395: if (lastmatch.rm_so == -1) {
396: clp = lback(clp);
1.29 florian 397: tdotline--;
1.5 millert 398: tbo = llength(clp);
399: } else {
1.28 jasper 400: memcpy(®ex_match[0], &lastmatch, sizeof(regmatch_t));
401: curwp->w_doto = regex_match[0].rm_so;
1.5 millert 402: curwp->w_dotp = clp;
1.29 florian 403: curwp->w_dotline = tdotline;
1.25 kjell 404: curwp->w_rflag |= WFMOVE;
1.5 millert 405: return (TRUE);
406: }
407: }
408: return (FALSE);
1.1 deraadt 409: }
410:
411: /*
412: * Read a pattern.
413: * Stash it in the external variable "re_pat". The "pat" is
414: * not updated if the user types in an empty line. If the user typed
415: * an empty line, and there is no old pattern, it is an error.
416: * Display the old pattern, in the style of Jeff Lomicka. There is
417: * some do-it-yourself control expansion.
418: */
1.6 millert 419: static int
1.12 cloder 420: re_readpattern(char *prompt)
1.5 millert 421: {
1.6 millert 422: static int dofree = 0;
1.13 vincent 423: int flags, error, s;
424: char tpat[NPAT], *rep;
1.5 millert 425:
426: if (re_pat[0] == '\0')
1.18 kjell 427: rep = eread("%s: ", tpat, NPAT, EFNEW | EFCR, prompt);
1.5 millert 428: else
1.15 kjell 429: rep = eread("%s: (default %s) ", tpat, NPAT,
430: EFNUL | EFNEW | EFCR, prompt, re_pat);
1.18 kjell 431: if (rep == NULL)
432: return (ABORT);
433: if (rep[0] != '\0') {
1.5 millert 434: /* New pattern given */
1.14 db 435: (void)strlcpy(re_pat, tpat, sizeof(re_pat));
1.5 millert 436: if (casefoldsearch)
437: flags = REG_EXTENDED | REG_ICASE;
438: else
439: flags = REG_EXTENDED;
440: if (dofree)
1.28 jasper 441: regfree(®ex_buff);
442: error = regcomp(®ex_buff, re_pat, flags);
1.6 millert 443: if (error != 0) {
444: char message[256];
1.28 jasper 445: regerror(error, ®ex_buff, message, sizeof(message));
1.30 ! lum 446: dobeep();
1.5 millert 447: ewprintf("Regex Error: %s", message);
448: re_pat[0] = '\0';
449: return (FALSE);
450: }
451: dofree = 1;
1.13 vincent 452: s = TRUE;
453: } else if (rep[0] == '\0' && re_pat[0] != '\0')
1.5 millert 454: /* Just using old pattern */
455: s = TRUE;
1.13 vincent 456: else
457: s = FALSE;
1.1 deraadt 458: return (s);
459: }
460:
1.5 millert 461: /*
462: * Cause case to not matter in searches. This is the default. If called
463: * with argument cause case to matter.
1.1 deraadt 464: */
1.22 kjell 465: /* ARGSUSED*/
1.6 millert 466: int
1.12 cloder 467: setcasefold(int f, int n)
1.5 millert 468: {
469: if (f & FFARG) {
470: casefoldsearch = FALSE;
471: ewprintf("Case-fold-search unset");
472: } else {
473: casefoldsearch = TRUE;
474: ewprintf("Case-fold-search set");
475: }
1.1 deraadt 476:
1.5 millert 477: /*
478: * Invalidate the regular expression pattern since I'm too lazy to
479: * recompile it.
480: */
481: re_pat[0] = '\0';
482: return (TRUE);
1.6 millert 483: }
1.1 deraadt 484:
1.5 millert 485: /*
1.14 db 486: * Delete all lines after dot that contain a string matching regex.
1.1 deraadt 487: */
1.22 kjell 488: /* ARGSUSED */
1.6 millert 489: int
1.12 cloder 490: delmatchlines(int f, int n)
1.5 millert 491: {
1.6 millert 492: int s;
1.1 deraadt 493:
1.9 mickey 494: if ((s = re_readpattern("Flush lines (containing match for regexp)"))
1.6 millert 495: != TRUE)
1.5 millert 496: return (s);
1.1 deraadt 497:
1.5 millert 498: s = killmatches(TRUE);
499: return (s);
1.1 deraadt 500: }
501:
1.5 millert 502: /*
1.14 db 503: * Delete all lines after dot that don't contain a string matching regex.
1.1 deraadt 504: */
1.22 kjell 505: /* ARGSUSED */
1.6 millert 506: int
1.12 cloder 507: delnonmatchlines(int f, int n)
1.5 millert 508: {
1.6 millert 509: int s;
1.1 deraadt 510:
1.9 mickey 511: if ((s = re_readpattern("Keep lines (containing match for regexp)"))
1.6 millert 512: != TRUE)
1.5 millert 513: return (s);
1.1 deraadt 514:
1.5 millert 515: s = killmatches(FALSE);
516: return (s);
1.1 deraadt 517: }
518:
1.9 mickey 519: /*
1.14 db 520: * This function does the work of deleting matching lines.
1.6 millert 521: */
522: static int
1.12 cloder 523: killmatches(int cond)
1.1 deraadt 524: {
1.6 millert 525: int s, error;
526: int count = 0;
1.21 deraadt 527: struct line *clp;
1.5 millert 528:
529: clp = curwp->w_dotp;
530: if (curwp->w_doto == llength(clp))
531: /* Consider dot on next line */
532: clp = lforw(clp);
533:
1.24 kjell 534: while (clp != (curbp->b_headp)) {
1.5 millert 535: /* see if line matches */
1.28 jasper 536: regex_match[0].rm_so = 0;
537: regex_match[0].rm_eo = llength(clp);
538: error = regexec(®ex_buff, ltext(clp), RE_NMATCH, regex_match,
1.6 millert 539: REG_STARTEND);
1.5 millert 540:
541: /* Delete line when appropriate */
542: if ((cond == FALSE && error) || (cond == TRUE && !error)) {
543: curwp->w_doto = 0;
544: curwp->w_dotp = clp;
545: count++;
546: s = ldelete(llength(clp) + 1, KNONE);
547: clp = curwp->w_dotp;
1.25 kjell 548: curwp->w_rflag |= WFMOVE;
1.5 millert 549: if (s == FALSE)
550: return (FALSE);
551: } else
552: clp = lforw(clp);
553: }
1.1 deraadt 554:
1.5 millert 555: ewprintf("%d line(s) deleted", count);
556: if (count > 0)
1.25 kjell 557: curwp->w_rflag |= WFMOVE;
1.1 deraadt 558:
1.5 millert 559: return (TRUE);
1.1 deraadt 560: }
561:
1.5 millert 562: /*
1.14 db 563: * Count lines matching regex.
1.1 deraadt 564: */
1.22 kjell 565: /* ARGSUSED */
1.6 millert 566: int
1.12 cloder 567: cntmatchlines(int f, int n)
1.5 millert 568: {
1.6 millert 569: int s;
1.1 deraadt 570:
1.5 millert 571: if ((s = re_readpattern("Count lines (matching regexp)")) != TRUE)
572: return (s);
573: s = countmatches(TRUE);
1.14 db 574:
1.5 millert 575: return (s);
1.1 deraadt 576: }
577:
1.5 millert 578: /*
1.14 db 579: * Count lines that fail to match regex.
1.1 deraadt 580: */
1.22 kjell 581: /* ARGSUSED */
1.6 millert 582: int
1.12 cloder 583: cntnonmatchlines(int f, int n)
1.5 millert 584: {
1.6 millert 585: int s;
1.1 deraadt 586:
1.5 millert 587: if ((s = re_readpattern("Count lines (not matching regexp)")) != TRUE)
588: return (s);
589: s = countmatches(FALSE);
1.1 deraadt 590:
1.5 millert 591: return (s);
1.1 deraadt 592: }
593:
1.6 millert 594: /*
595: * This function does the work of counting matching lines.
596: */
597: int
1.12 cloder 598: countmatches(int cond)
1.1 deraadt 599: {
1.6 millert 600: int error;
601: int count = 0;
1.21 deraadt 602: struct line *clp;
1.5 millert 603:
604: clp = curwp->w_dotp;
605: if (curwp->w_doto == llength(clp))
606: /* Consider dot on next line */
607: clp = lforw(clp);
608:
1.24 kjell 609: while (clp != (curbp->b_headp)) {
1.5 millert 610: /* see if line matches */
1.28 jasper 611: regex_match[0].rm_so = 0;
612: regex_match[0].rm_eo = llength(clp);
613: error = regexec(®ex_buff, ltext(clp), RE_NMATCH, regex_match,
1.6 millert 614: REG_STARTEND);
1.5 millert 615:
616: /* Count line when appropriate */
617: if ((cond == FALSE && error) || (cond == TRUE && !error))
618: count++;
619: clp = lforw(clp);
620: }
1.1 deraadt 621:
1.5 millert 622: if (cond)
623: ewprintf("Number of lines matching: %d", count);
624: else
625: ewprintf("Number of lines not matching: %d", count);
626:
627: return (TRUE);
1.1 deraadt 628: }
1.6 millert 629: #endif /* REGEX */