Annotation of src/usr.bin/mg/re_search.c, Revision 1.31
1.31 ! bcallah 1: /* $OpenBSD: re_search.c,v 1.30 2014/03/20 07:47:29 lum Exp $ */
1.17 kjell 2:
3: /* This file is in the public domain. */
1.7 niklas 4:
1.1 deraadt 5: /*
1.6 millert 6: * regular expression search commands for Mg
1.1 deraadt 7: *
1.6 millert 8: * This file contains functions to implement several of gnuemacs's regular
9: * expression functions for Mg. Several of the routines below are just minor
10: * re-arrangements of Mg's non-regular expression search functions. Some of
1.9 mickey 11: * them are similar in structure to the original MicroEMACS, others are
1.6 millert 12: * modifications of Rich Ellison's code. Peter Newton re-wrote about half of
13: * them from scratch.
1.1 deraadt 14: */
15:
1.6 millert 16: #ifdef REGEX
1.31 ! bcallah 17: #include <sys/queue.h>
1.2 millert 18: #include <sys/types.h>
19: #include <regex.h>
1.31 ! bcallah 20: #include <signal.h>
! 21: #include <stdio.h>
! 22: #include <string.h>
1.2 millert 23:
1.31 ! bcallah 24: #include "def.h"
1.6 millert 25: #include "macro.h"
1.1 deraadt 26:
1.6 millert 27: #define SRCH_BEGIN (0) /* search sub-codes */
1.1 deraadt 28: #define SRCH_FORW (-1)
29: #define SRCH_BACK (-2)
30: #define SRCH_NOPR (-3)
31: #define SRCH_ACCM (-4)
32: #define SRCH_MARK (-5)
33:
1.6 millert 34: #define RE_NMATCH 10 /* max number of matches */
35: #define REPLEN 256 /* max length of replacement string */
1.2 millert 36:
1.6 millert 37: char re_pat[NPAT]; /* regex pattern */
38: int re_srch_lastdir = SRCH_NOPR; /* last search flags */
39: int casefoldsearch = TRUE; /* does search ignore case? */
40:
1.20 kjell 41: static int re_doreplace(RSIZE, char *);
1.11 millert 42: static int re_forwsrch(void);
43: static int re_backsrch(void);
44: static int re_readpattern(char *);
45: static int killmatches(int);
46: static int countmatches(int);
1.1 deraadt 47:
48: /*
49: * Search forward.
1.9 mickey 50: * Get a search string from the user and search for it starting at ".". If
51: * found, move "." to just after the matched characters. display does all
1.6 millert 52: * the hard stuff. If not found, it just prints a message.
1.1 deraadt 53: */
1.5 millert 54: /* ARGSUSED */
1.6 millert 55: int
1.12 cloder 56: re_forwsearch(int f, int n)
1.5 millert 57: {
1.6 millert 58: int s;
1.1 deraadt 59:
1.5 millert 60: if ((s = re_readpattern("RE Search")) != TRUE)
1.1 deraadt 61: return (s);
62: if (re_forwsrch() == FALSE) {
1.30 lum 63: dobeep();
1.1 deraadt 64: ewprintf("Search failed: \"%s\"", re_pat);
65: return (FALSE);
66: }
67: re_srch_lastdir = SRCH_FORW;
68: return (TRUE);
69: }
70:
71: /*
72: * Reverse search.
1.14 db 73: * Get a search string from the user, and search, starting at "."
1.1 deraadt 74: * and proceeding toward the front of the buffer. If found "." is left
75: * pointing at the first character of the pattern [the last character that
76: * was matched].
77: */
1.5 millert 78: /* ARGSUSED */
1.6 millert 79: int
1.12 cloder 80: re_backsearch(int f, int n)
1.5 millert 81: {
1.6 millert 82: int s;
1.1 deraadt 83:
1.5 millert 84: if ((s = re_readpattern("RE Search backward")) != TRUE)
1.1 deraadt 85: return (s);
86: if (re_backsrch() == FALSE) {
1.30 lum 87: dobeep();
1.1 deraadt 88: ewprintf("Search failed: \"%s\"", re_pat);
89: return (FALSE);
90: }
91: re_srch_lastdir = SRCH_BACK;
92: return (TRUE);
93: }
94:
95: /*
1.9 mickey 96: * Search again, using the same search string and direction as the last search
97: * command. The direction has been saved in "srch_lastdir", so you know which
1.6 millert 98: * way to go.
99: *
100: * XXX: This code has problems -- some incompatibility(?) with extend.c causes
101: * match to fail when it should not.
1.1 deraadt 102: */
1.5 millert 103: /* ARGSUSED */
1.6 millert 104: int
1.12 cloder 105: re_searchagain(int f, int n)
1.5 millert 106: {
107: if (re_srch_lastdir == SRCH_NOPR) {
1.30 lum 108: dobeep();
1.5 millert 109: ewprintf("No last search");
110: return (FALSE);
111: }
112: if (re_srch_lastdir == SRCH_FORW) {
113: if (re_forwsrch() == FALSE) {
1.30 lum 114: dobeep();
1.5 millert 115: ewprintf("Search failed: \"%s\"", re_pat);
116: return (FALSE);
117: }
118: return (TRUE);
119: }
1.9 mickey 120: if (re_srch_lastdir == SRCH_BACK)
1.5 millert 121: if (re_backsrch() == FALSE) {
1.30 lum 122: dobeep();
1.5 millert 123: ewprintf("Search failed: \"%s\"", re_pat);
124: return (FALSE);
125: }
1.6 millert 126:
127: return (TRUE);
1.1 deraadt 128: }
129:
130: /* Compiled regex goes here-- changed only when new pattern read */
1.28 jasper 131: static regex_t regex_buff;
132: static regmatch_t regex_match[RE_NMATCH];
1.1 deraadt 133:
134: /*
135: * Re-Query Replace.
136: * Replace strings selectively. Does a search and replace operation.
137: */
1.5 millert 138: /* ARGSUSED */
1.6 millert 139: int
1.12 cloder 140: re_queryrepl(int f, int n)
1.5 millert 141: {
1.14 db 142: int rcnt = 0; /* replacements made so far */
1.13 vincent 143: int plen, s; /* length of found string */
1.19 deraadt 144: char news[NPAT]; /* replacement string */
1.1 deraadt 145:
1.5 millert 146: if ((s = re_readpattern("RE Query replace")) != TRUE)
1.1 deraadt 147: return (s);
1.19 deraadt 148: if (eread("Query replace %s with: ", news, NPAT,
149: EFNUL | EFNEW | EFCR, re_pat) == NULL)
1.13 vincent 150: return (ABORT);
1.1 deraadt 151: ewprintf("Query replacing %s with %s:", re_pat, news);
152:
153: /*
154: * Search forward repeatedly, checking each time whether to insert
155: * or not. The "!" case makes the check always true, so it gets put
156: * into a tighter loop for efficiency.
157: */
158: while (re_forwsrch() == TRUE) {
1.5 millert 159: retry:
1.27 lum 160: update(CMODE);
1.1 deraadt 161: switch (getkey(FALSE)) {
162: case ' ':
1.28 jasper 163: plen = regex_match[0].rm_eo - regex_match[0].rm_so;
1.20 kjell 164: if (re_doreplace((RSIZE)plen, news) == FALSE)
1.1 deraadt 165: return (FALSE);
166: rcnt++;
167: break;
168:
169: case '.':
1.28 jasper 170: plen = regex_match[0].rm_eo - regex_match[0].rm_so;
1.20 kjell 171: if (re_doreplace((RSIZE)plen, news) == FALSE)
1.1 deraadt 172: return (FALSE);
173: rcnt++;
174: goto stopsearch;
175:
1.6 millert 176: case CCHR('G'): /* ^G */
1.8 art 177: (void)ctrlg(FFRAND, 0);
1.22 kjell 178: goto stopsearch;
1.6 millert 179: case CCHR('['): /* ESC */
1.1 deraadt 180: case '`':
181: goto stopsearch;
182: case '!':
183: do {
1.28 jasper 184: plen = regex_match[0].rm_eo - regex_match[0].rm_so;
1.20 kjell 185: if (re_doreplace((RSIZE)plen, news) == FALSE)
1.1 deraadt 186: return (FALSE);
187: rcnt++;
188: } while (re_forwsrch() == TRUE);
189: goto stopsearch;
190:
1.6 millert 191: case CCHR('?'): /* To not replace */
1.1 deraadt 192: break;
193:
194: default:
1.5 millert 195: ewprintf("<SP> replace, [.] rep-end, <DEL> don't, [!] repl rest <ESC> quit");
1.1 deraadt 196: goto retry;
197: }
198: }
1.6 millert 199:
1.1 deraadt 200: stopsearch:
1.25 kjell 201: curwp->w_rflag |= WFFULL;
1.27 lum 202: update(CMODE);
1.1 deraadt 203: if (!inmacro) {
204: if (rcnt == 0)
205: ewprintf("(No replacements done)");
206: else if (rcnt == 1)
207: ewprintf("(1 replacement done)");
208: else
209: ewprintf("(%d replacements done)", rcnt);
210: }
1.14 db 211: return (TRUE);
1.1 deraadt 212: }
213:
1.5 millert 214: /*
215: * Routine re_doreplace calls lreplace to make replacements needed by
216: * re_query replace. Its reason for existence is to deal with \1, \2. etc.
1.12 cloder 217: * plen: length to remove
218: * st: replacement string
1.1 deraadt 219: */
1.6 millert 220: static int
1.20 kjell 221: re_doreplace(RSIZE plen, char *st)
1.6 millert 222: {
223: int j, k, s, more, num, state;
1.21 deraadt 224: struct line *clp;
1.6 millert 225: char repstr[REPLEN];
1.5 millert 226:
227: clp = curwp->w_dotp;
228: more = TRUE;
229: j = 0;
230: state = 0;
1.6 millert 231: num = 0;
1.5 millert 232:
233: /* The following FSA parses the replacement string */
234: while (more) {
235: switch (state) {
236: case 0:
237: if (*st == '\\') {
238: st++;
239: state = 1;
240: } else if (*st == '\0')
241: more = FALSE;
242: else {
243: repstr[j] = *st;
244: j++;
245: if (j >= REPLEN)
246: return (FALSE);
247: st++;
248: }
249: break;
250: case 1:
251: if (*st >= '0' && *st <= '9') {
252: num = *st - '0';
253: st++;
254: state = 2;
255: } else if (*st == '\0')
256: more = FALSE;
257: else {
258: repstr[j] = *st;
259: j++;
260: if (j >= REPLEN)
261: return (FALSE);
262: st++;
263: state = 0;
264: }
265: break;
266: case 2:
267: if (*st >= '0' && *st <= '9') {
268: num = 10 * num + *st - '0';
269: st++;
270: } else {
271: if (num >= RE_NMATCH)
272: return (FALSE);
1.28 jasper 273: k = regex_match[num].rm_eo - regex_match[num].rm_so;
1.5 millert 274: if (j + k >= REPLEN)
275: return (FALSE);
1.28 jasper 276: bcopy(&(clp->l_text[regex_match[num].rm_so]),
1.6 millert 277: &repstr[j], k);
1.5 millert 278: j += k;
279: if (*st == '\0')
280: more = FALSE;
281: if (*st == '\\') {
282: st++;
283: state = 1;
284: } else {
285: repstr[j] = *st;
286: j++;
287: if (j >= REPLEN)
288: return (FALSE);
289: st++;
290: state = 0;
291: }
292: }
293: break;
1.6 millert 294: } /* switch (state) */
295: } /* while (more) */
1.1 deraadt 296:
1.5 millert 297: repstr[j] = '\0';
1.20 kjell 298: s = lreplace(plen, repstr);
1.5 millert 299: return (s);
1.1 deraadt 300: }
301:
302: /*
1.9 mickey 303: * This routine does the real work of a forward search. The pattern is
304: * sitting in the external variable "pat". If found, dot is updated, the
1.6 millert 305: * window system is notified of the change, and TRUE is returned. If the
1.1 deraadt 306: * string isn't found, FALSE is returned.
307: */
1.6 millert 308: static int
1.12 cloder 309: re_forwsrch(void)
1.5 millert 310: {
1.29 florian 311: int tbo, tdotline, error;
1.21 deraadt 312: struct line *clp;
1.5 millert 313:
314: clp = curwp->w_dotp;
315: tbo = curwp->w_doto;
1.29 florian 316: tdotline = curwp->w_dotline;
1.5 millert 317:
318: if (tbo == clp->l_used)
319: /*
1.6 millert 320: * Don't start matching past end of line -- must move to
321: * beginning of next line, unless at end of file.
1.5 millert 322: */
1.24 kjell 323: if (clp != curbp->b_headp) {
1.5 millert 324: clp = lforw(clp);
1.29 florian 325: tdotline++;
1.5 millert 326: tbo = 0;
327: }
328: /*
329: * Note this loop does not process the last line, but this editor
330: * always makes the last line empty so this is good.
331: */
1.24 kjell 332: while (clp != (curbp->b_headp)) {
1.28 jasper 333: regex_match[0].rm_so = tbo;
334: regex_match[0].rm_eo = llength(clp);
335: error = regexec(®ex_buff, ltext(clp), RE_NMATCH, regex_match,
1.6 millert 336: REG_STARTEND);
337: if (error != 0) {
1.5 millert 338: clp = lforw(clp);
1.29 florian 339: tdotline++;
1.5 millert 340: tbo = 0;
341: } else {
1.28 jasper 342: curwp->w_doto = regex_match[0].rm_eo;
1.5 millert 343: curwp->w_dotp = clp;
1.29 florian 344: curwp->w_dotline = tdotline;
1.25 kjell 345: curwp->w_rflag |= WFMOVE;
1.5 millert 346: return (TRUE);
347: }
348: }
349: return (FALSE);
1.1 deraadt 350: }
351:
352: /*
1.6 millert 353: * This routine does the real work of a backward search. The pattern is sitting
1.9 mickey 354: * in the external variable "re_pat". If found, dot is updated, the window
355: * system is notified of the change, and TRUE is returned. If the string isn't
1.6 millert 356: * found, FALSE is returned.
1.1 deraadt 357: */
1.6 millert 358: static int
1.12 cloder 359: re_backsrch(void)
1.5 millert 360: {
1.21 deraadt 361: struct line *clp;
1.29 florian 362: int tbo, tdotline;
1.6 millert 363: regmatch_t lastmatch;
1.5 millert 364:
365: clp = curwp->w_dotp;
366: tbo = curwp->w_doto;
1.29 florian 367: tdotline = curwp->w_dotline;
1.5 millert 368:
369: /* Start search one position to the left of dot */
370: tbo = tbo - 1;
371: if (tbo < 0) {
372: /* must move up one line */
373: clp = lback(clp);
1.29 florian 374: tdotline--;
1.5 millert 375: tbo = llength(clp);
376: }
1.6 millert 377:
1.5 millert 378: /*
379: * Note this loop does not process the last line, but this editor
380: * always makes the last line empty so this is good.
381: */
1.24 kjell 382: while (clp != (curbp->b_headp)) {
1.28 jasper 383: regex_match[0].rm_so = 0;
384: regex_match[0].rm_eo = llength(clp);
1.5 millert 385: lastmatch.rm_so = -1;
386: /*
387: * Keep searching until we don't match any longer. Assumes a
1.28 jasper 388: * non-match does not modify the regex_match array. We have to
1.5 millert 389: * do this character-by-character after the first match since
390: * POSIX regexps don't give you a way to do reverse matches.
391: */
1.28 jasper 392: while (!regexec(®ex_buff, ltext(clp), RE_NMATCH, regex_match,
393: REG_STARTEND) && regex_match[0].rm_so < tbo) {
394: memcpy(&lastmatch, ®ex_match[0], sizeof(regmatch_t));
395: regex_match[0].rm_so++;
396: regex_match[0].rm_eo = llength(clp);
1.5 millert 397: }
398: if (lastmatch.rm_so == -1) {
399: clp = lback(clp);
1.29 florian 400: tdotline--;
1.5 millert 401: tbo = llength(clp);
402: } else {
1.28 jasper 403: memcpy(®ex_match[0], &lastmatch, sizeof(regmatch_t));
404: curwp->w_doto = regex_match[0].rm_so;
1.5 millert 405: curwp->w_dotp = clp;
1.29 florian 406: curwp->w_dotline = tdotline;
1.25 kjell 407: curwp->w_rflag |= WFMOVE;
1.5 millert 408: return (TRUE);
409: }
410: }
411: return (FALSE);
1.1 deraadt 412: }
413:
414: /*
415: * Read a pattern.
416: * Stash it in the external variable "re_pat". The "pat" is
417: * not updated if the user types in an empty line. If the user typed
418: * an empty line, and there is no old pattern, it is an error.
419: * Display the old pattern, in the style of Jeff Lomicka. There is
420: * some do-it-yourself control expansion.
421: */
1.6 millert 422: static int
1.12 cloder 423: re_readpattern(char *prompt)
1.5 millert 424: {
1.6 millert 425: static int dofree = 0;
1.13 vincent 426: int flags, error, s;
427: char tpat[NPAT], *rep;
1.5 millert 428:
429: if (re_pat[0] == '\0')
1.18 kjell 430: rep = eread("%s: ", tpat, NPAT, EFNEW | EFCR, prompt);
1.5 millert 431: else
1.15 kjell 432: rep = eread("%s: (default %s) ", tpat, NPAT,
433: EFNUL | EFNEW | EFCR, prompt, re_pat);
1.18 kjell 434: if (rep == NULL)
435: return (ABORT);
436: if (rep[0] != '\0') {
1.5 millert 437: /* New pattern given */
1.14 db 438: (void)strlcpy(re_pat, tpat, sizeof(re_pat));
1.5 millert 439: if (casefoldsearch)
440: flags = REG_EXTENDED | REG_ICASE;
441: else
442: flags = REG_EXTENDED;
443: if (dofree)
1.28 jasper 444: regfree(®ex_buff);
445: error = regcomp(®ex_buff, re_pat, flags);
1.6 millert 446: if (error != 0) {
447: char message[256];
1.28 jasper 448: regerror(error, ®ex_buff, message, sizeof(message));
1.30 lum 449: dobeep();
1.5 millert 450: ewprintf("Regex Error: %s", message);
451: re_pat[0] = '\0';
452: return (FALSE);
453: }
454: dofree = 1;
1.13 vincent 455: s = TRUE;
456: } else if (rep[0] == '\0' && re_pat[0] != '\0')
1.5 millert 457: /* Just using old pattern */
458: s = TRUE;
1.13 vincent 459: else
460: s = FALSE;
1.1 deraadt 461: return (s);
462: }
463:
1.5 millert 464: /*
465: * Cause case to not matter in searches. This is the default. If called
466: * with argument cause case to matter.
1.1 deraadt 467: */
1.22 kjell 468: /* ARGSUSED*/
1.6 millert 469: int
1.12 cloder 470: setcasefold(int f, int n)
1.5 millert 471: {
472: if (f & FFARG) {
473: casefoldsearch = FALSE;
474: ewprintf("Case-fold-search unset");
475: } else {
476: casefoldsearch = TRUE;
477: ewprintf("Case-fold-search set");
478: }
1.1 deraadt 479:
1.5 millert 480: /*
481: * Invalidate the regular expression pattern since I'm too lazy to
482: * recompile it.
483: */
484: re_pat[0] = '\0';
485: return (TRUE);
1.6 millert 486: }
1.1 deraadt 487:
1.5 millert 488: /*
1.14 db 489: * Delete all lines after dot that contain a string matching regex.
1.1 deraadt 490: */
1.22 kjell 491: /* ARGSUSED */
1.6 millert 492: int
1.12 cloder 493: delmatchlines(int f, int n)
1.5 millert 494: {
1.6 millert 495: int s;
1.1 deraadt 496:
1.9 mickey 497: if ((s = re_readpattern("Flush lines (containing match for regexp)"))
1.6 millert 498: != TRUE)
1.5 millert 499: return (s);
1.1 deraadt 500:
1.5 millert 501: s = killmatches(TRUE);
502: return (s);
1.1 deraadt 503: }
504:
1.5 millert 505: /*
1.14 db 506: * Delete all lines after dot that don't contain a string matching regex.
1.1 deraadt 507: */
1.22 kjell 508: /* ARGSUSED */
1.6 millert 509: int
1.12 cloder 510: delnonmatchlines(int f, int n)
1.5 millert 511: {
1.6 millert 512: int s;
1.1 deraadt 513:
1.9 mickey 514: if ((s = re_readpattern("Keep lines (containing match for regexp)"))
1.6 millert 515: != TRUE)
1.5 millert 516: return (s);
1.1 deraadt 517:
1.5 millert 518: s = killmatches(FALSE);
519: return (s);
1.1 deraadt 520: }
521:
1.9 mickey 522: /*
1.14 db 523: * This function does the work of deleting matching lines.
1.6 millert 524: */
525: static int
1.12 cloder 526: killmatches(int cond)
1.1 deraadt 527: {
1.6 millert 528: int s, error;
529: int count = 0;
1.21 deraadt 530: struct line *clp;
1.5 millert 531:
532: clp = curwp->w_dotp;
533: if (curwp->w_doto == llength(clp))
534: /* Consider dot on next line */
535: clp = lforw(clp);
536:
1.24 kjell 537: while (clp != (curbp->b_headp)) {
1.5 millert 538: /* see if line matches */
1.28 jasper 539: regex_match[0].rm_so = 0;
540: regex_match[0].rm_eo = llength(clp);
541: error = regexec(®ex_buff, ltext(clp), RE_NMATCH, regex_match,
1.6 millert 542: REG_STARTEND);
1.5 millert 543:
544: /* Delete line when appropriate */
545: if ((cond == FALSE && error) || (cond == TRUE && !error)) {
546: curwp->w_doto = 0;
547: curwp->w_dotp = clp;
548: count++;
549: s = ldelete(llength(clp) + 1, KNONE);
550: clp = curwp->w_dotp;
1.25 kjell 551: curwp->w_rflag |= WFMOVE;
1.5 millert 552: if (s == FALSE)
553: return (FALSE);
554: } else
555: clp = lforw(clp);
556: }
1.1 deraadt 557:
1.5 millert 558: ewprintf("%d line(s) deleted", count);
559: if (count > 0)
1.25 kjell 560: curwp->w_rflag |= WFMOVE;
1.1 deraadt 561:
1.5 millert 562: return (TRUE);
1.1 deraadt 563: }
564:
1.5 millert 565: /*
1.14 db 566: * Count lines matching regex.
1.1 deraadt 567: */
1.22 kjell 568: /* ARGSUSED */
1.6 millert 569: int
1.12 cloder 570: cntmatchlines(int f, int n)
1.5 millert 571: {
1.6 millert 572: int s;
1.1 deraadt 573:
1.5 millert 574: if ((s = re_readpattern("Count lines (matching regexp)")) != TRUE)
575: return (s);
576: s = countmatches(TRUE);
1.14 db 577:
1.5 millert 578: return (s);
1.1 deraadt 579: }
580:
1.5 millert 581: /*
1.14 db 582: * Count lines that fail to match regex.
1.1 deraadt 583: */
1.22 kjell 584: /* ARGSUSED */
1.6 millert 585: int
1.12 cloder 586: cntnonmatchlines(int f, int n)
1.5 millert 587: {
1.6 millert 588: int s;
1.1 deraadt 589:
1.5 millert 590: if ((s = re_readpattern("Count lines (not matching regexp)")) != TRUE)
591: return (s);
592: s = countmatches(FALSE);
1.1 deraadt 593:
1.5 millert 594: return (s);
1.1 deraadt 595: }
596:
1.6 millert 597: /*
598: * This function does the work of counting matching lines.
599: */
600: int
1.12 cloder 601: countmatches(int cond)
1.1 deraadt 602: {
1.6 millert 603: int error;
604: int count = 0;
1.21 deraadt 605: struct line *clp;
1.5 millert 606:
607: clp = curwp->w_dotp;
608: if (curwp->w_doto == llength(clp))
609: /* Consider dot on next line */
610: clp = lforw(clp);
611:
1.24 kjell 612: while (clp != (curbp->b_headp)) {
1.5 millert 613: /* see if line matches */
1.28 jasper 614: regex_match[0].rm_so = 0;
615: regex_match[0].rm_eo = llength(clp);
616: error = regexec(®ex_buff, ltext(clp), RE_NMATCH, regex_match,
1.6 millert 617: REG_STARTEND);
1.5 millert 618:
619: /* Count line when appropriate */
620: if ((cond == FALSE && error) || (cond == TRUE && !error))
621: count++;
622: clp = lforw(clp);
623: }
1.1 deraadt 624:
1.5 millert 625: if (cond)
626: ewprintf("Number of lines matching: %d", count);
627: else
628: ewprintf("Number of lines not matching: %d", count);
629:
630: return (TRUE);
1.1 deraadt 631: }
1.6 millert 632: #endif /* REGEX */