Annotation of src/usr.bin/mg/re_search.c, Revision 1.12
1.12 ! cloder 1: /* $OpenBSD: re_search.c,v 1.11 2002/02/16 21:27:49 millert Exp $ */
1.7 niklas 2:
1.1 deraadt 3: /*
1.6 millert 4: * regular expression search commands for Mg
1.1 deraadt 5: *
1.6 millert 6: * This file contains functions to implement several of gnuemacs's regular
7: * expression functions for Mg. Several of the routines below are just minor
8: * re-arrangements of Mg's non-regular expression search functions. Some of
1.9 mickey 9: * them are similar in structure to the original MicroEMACS, others are
1.6 millert 10: * modifications of Rich Ellison's code. Peter Newton re-wrote about half of
11: * them from scratch.
1.1 deraadt 12: */
13:
1.6 millert 14: #ifdef REGEX
1.2 millert 15: #include <sys/types.h>
16: #include <regex.h>
17:
1.6 millert 18: #include "def.h"
19: #include "macro.h"
1.1 deraadt 20:
1.6 millert 21: #define SRCH_BEGIN (0) /* search sub-codes */
1.1 deraadt 22: #define SRCH_FORW (-1)
23: #define SRCH_BACK (-2)
24: #define SRCH_NOPR (-3)
25: #define SRCH_ACCM (-4)
26: #define SRCH_MARK (-5)
27:
1.6 millert 28: #define RE_NMATCH 10 /* max number of matches */
29: #define REPLEN 256 /* max length of replacement string */
1.2 millert 30:
1.6 millert 31: char re_pat[NPAT]; /* regex pattern */
32: int re_srch_lastdir = SRCH_NOPR; /* last search flags */
33: int casefoldsearch = TRUE; /* does search ignore case? */
34:
1.11 millert 35: static int re_doreplace(RSIZE, char *, int);
36: static int re_forwsrch(void);
37: static int re_backsrch(void);
38: static int re_readpattern(char *);
39: static int killmatches(int);
40: static int countmatches(int);
1.1 deraadt 41:
42: /*
43: * Search forward.
1.9 mickey 44: * Get a search string from the user and search for it starting at ".". If
45: * found, move "." to just after the matched characters. display does all
1.6 millert 46: * the hard stuff. If not found, it just prints a message.
1.1 deraadt 47: */
1.5 millert 48: /* ARGSUSED */
1.6 millert 49: int
1.12 ! cloder 50: re_forwsearch(int f, int n)
1.5 millert 51: {
1.6 millert 52: int s;
1.1 deraadt 53:
1.5 millert 54: if ((s = re_readpattern("RE Search")) != TRUE)
1.1 deraadt 55: return (s);
56: if (re_forwsrch() == FALSE) {
57: ewprintf("Search failed: \"%s\"", re_pat);
58: return (FALSE);
59: }
60: re_srch_lastdir = SRCH_FORW;
61: return (TRUE);
62: }
63:
64: /*
65: * Reverse search.
66: * Get a search string from the user, and search, starting at "."
67: * and proceeding toward the front of the buffer. If found "." is left
68: * pointing at the first character of the pattern [the last character that
69: * was matched].
70: */
1.5 millert 71: /* ARGSUSED */
1.6 millert 72: int
1.12 ! cloder 73: re_backsearch(int f, int n)
1.5 millert 74: {
1.6 millert 75: int s;
1.1 deraadt 76:
1.5 millert 77: if ((s = re_readpattern("RE Search backward")) != TRUE)
1.1 deraadt 78: return (s);
79: if (re_backsrch() == FALSE) {
80: ewprintf("Search failed: \"%s\"", re_pat);
81: return (FALSE);
82: }
83: re_srch_lastdir = SRCH_BACK;
84: return (TRUE);
85: }
86:
87: /*
1.9 mickey 88: * Search again, using the same search string and direction as the last search
89: * command. The direction has been saved in "srch_lastdir", so you know which
1.6 millert 90: * way to go.
91: *
92: * XXX: This code has problems -- some incompatibility(?) with extend.c causes
93: * match to fail when it should not.
1.1 deraadt 94: */
1.5 millert 95: /* ARGSUSED */
1.6 millert 96: int
1.12 ! cloder 97: re_searchagain(int f, int n)
1.5 millert 98: {
99: if (re_srch_lastdir == SRCH_NOPR) {
100: ewprintf("No last search");
101: return (FALSE);
102: }
103: if (re_srch_lastdir == SRCH_FORW) {
104: if (re_forwsrch() == FALSE) {
105: ewprintf("Search failed: \"%s\"", re_pat);
106: return (FALSE);
107: }
108: return (TRUE);
109: }
1.9 mickey 110: if (re_srch_lastdir == SRCH_BACK)
1.5 millert 111: if (re_backsrch() == FALSE) {
112: ewprintf("Search failed: \"%s\"", re_pat);
113: return (FALSE);
114: }
1.6 millert 115:
116: return (TRUE);
1.1 deraadt 117: }
118:
119: /* Compiled regex goes here-- changed only when new pattern read */
1.6 millert 120: static regex_t re_buff;
121: static regmatch_t re_match[RE_NMATCH];
1.1 deraadt 122:
123: /*
124: * Re-Query Replace.
125: * Replace strings selectively. Does a search and replace operation.
126: */
1.5 millert 127: /* ARGSUSED */
1.6 millert 128: int
1.12 ! cloder 129: re_queryrepl(int f, int n)
1.5 millert 130: {
1.6 millert 131: int s;
132: int rcnt = 0; /* replacements made so far */
133: int plen; /* length of found string */
134: char news[NPAT]; /* replacement string */
1.1 deraadt 135:
136: /* Casefold check */
1.5 millert 137: if (!casefoldsearch)
138: f = TRUE;
1.1 deraadt 139:
1.5 millert 140: if ((s = re_readpattern("RE Query replace")) != TRUE)
1.1 deraadt 141: return (s);
1.9 mickey 142: if ((s =
1.6 millert 143: ereply("Query replace %s with: ", news, NPAT, re_pat)) == ABORT)
1.1 deraadt 144: return (s);
145: if (s == FALSE)
146: news[0] = '\0';
147: ewprintf("Query replacing %s with %s:", re_pat, news);
148:
149: /*
150: * Search forward repeatedly, checking each time whether to insert
151: * or not. The "!" case makes the check always true, so it gets put
152: * into a tighter loop for efficiency.
153: */
154: while (re_forwsrch() == TRUE) {
1.5 millert 155: retry:
1.1 deraadt 156: update();
157: switch (getkey(FALSE)) {
158: case ' ':
1.2 millert 159: plen = re_match[0].rm_eo - re_match[0].rm_so;
1.6 millert 160: if (re_doreplace((RSIZE)plen, news, f) == FALSE)
1.1 deraadt 161: return (FALSE);
162: rcnt++;
163: break;
164:
165: case '.':
1.2 millert 166: plen = re_match[0].rm_eo - re_match[0].rm_so;
1.6 millert 167: if (re_doreplace((RSIZE)plen, news, f) == FALSE)
1.1 deraadt 168: return (FALSE);
169: rcnt++;
170: goto stopsearch;
171:
1.6 millert 172: case CCHR('G'): /* ^G */
1.8 art 173: (void)ctrlg(FFRAND, 0);
1.6 millert 174: case CCHR('['): /* ESC */
1.1 deraadt 175: case '`':
176: goto stopsearch;
177: case '!':
178: do {
1.2 millert 179: plen = re_match[0].rm_eo - re_match[0].rm_so;
1.6 millert 180: if (re_doreplace((RSIZE)plen, news, f) == FALSE)
1.1 deraadt 181: return (FALSE);
182: rcnt++;
183: } while (re_forwsrch() == TRUE);
184: goto stopsearch;
185:
1.6 millert 186: case CCHR('?'): /* To not replace */
1.1 deraadt 187: break;
188:
189: default:
1.5 millert 190: ewprintf("<SP> replace, [.] rep-end, <DEL> don't, [!] repl rest <ESC> quit");
1.1 deraadt 191: goto retry;
192: }
193: }
1.6 millert 194:
1.1 deraadt 195: stopsearch:
196: curwp->w_flag |= WFHARD;
197: update();
198: if (!inmacro) {
199: if (rcnt == 0)
200: ewprintf("(No replacements done)");
201: else if (rcnt == 1)
202: ewprintf("(1 replacement done)");
203: else
204: ewprintf("(%d replacements done)", rcnt);
205: }
206: return TRUE;
207: }
208:
1.5 millert 209: /*
210: * Routine re_doreplace calls lreplace to make replacements needed by
211: * re_query replace. Its reason for existence is to deal with \1, \2. etc.
1.12 ! cloder 212: * plen: length to remove
! 213: * st: replacement string
! 214: * f: case hack disable
1.1 deraadt 215: */
1.6 millert 216: static int
1.12 ! cloder 217: re_doreplace(RSIZE plen, char *st, int f)
1.6 millert 218: {
219: int j, k, s, more, num, state;
220: LINE *clp;
221: char repstr[REPLEN];
1.5 millert 222:
223: clp = curwp->w_dotp;
224: more = TRUE;
225: j = 0;
226: state = 0;
1.6 millert 227: num = 0;
1.5 millert 228:
229: /* The following FSA parses the replacement string */
230: while (more) {
231: switch (state) {
232: case 0:
233: if (*st == '\\') {
234: st++;
235: state = 1;
236: } else if (*st == '\0')
237: more = FALSE;
238: else {
239: repstr[j] = *st;
240: j++;
241: if (j >= REPLEN)
242: return (FALSE);
243: st++;
244: }
245: break;
246: case 1:
247: if (*st >= '0' && *st <= '9') {
248: num = *st - '0';
249: st++;
250: state = 2;
251: } else if (*st == '\0')
252: more = FALSE;
253: else {
254: repstr[j] = *st;
255: j++;
256: if (j >= REPLEN)
257: return (FALSE);
258: st++;
259: state = 0;
260: }
261: break;
262: case 2:
263: if (*st >= '0' && *st <= '9') {
264: num = 10 * num + *st - '0';
265: st++;
266: } else {
267: if (num >= RE_NMATCH)
268: return (FALSE);
269: k = re_match[num].rm_eo - re_match[num].rm_so;
270: if (j + k >= REPLEN)
271: return (FALSE);
1.9 mickey 272: bcopy(&(clp->l_text[re_match[num].rm_so]),
1.6 millert 273: &repstr[j], k);
1.5 millert 274: j += k;
275: if (*st == '\0')
276: more = FALSE;
277: if (*st == '\\') {
278: st++;
279: state = 1;
280: } else {
281: repstr[j] = *st;
282: j++;
283: if (j >= REPLEN)
284: return (FALSE);
285: st++;
286: state = 0;
287: }
288: }
289: break;
1.6 millert 290: } /* switch (state) */
291: } /* while (more) */
1.1 deraadt 292:
1.5 millert 293: repstr[j] = '\0';
294: s = lreplace(plen, repstr, f);
295: return (s);
1.1 deraadt 296: }
297:
298:
299:
300: /*
1.9 mickey 301: * This routine does the real work of a forward search. The pattern is
302: * sitting in the external variable "pat". If found, dot is updated, the
1.6 millert 303: * window system is notified of the change, and TRUE is returned. If the
1.1 deraadt 304: * string isn't found, FALSE is returned.
305: */
1.6 millert 306: static int
1.12 ! cloder 307: re_forwsrch(void)
1.5 millert 308: {
1.6 millert 309: int tbo, error;
310: LINE *clp;
1.5 millert 311:
312: clp = curwp->w_dotp;
313: tbo = curwp->w_doto;
314:
315: if (tbo == clp->l_used)
316: /*
1.6 millert 317: * Don't start matching past end of line -- must move to
318: * beginning of next line, unless at end of file.
1.5 millert 319: */
320: if (clp != curbp->b_linep) {
321: clp = lforw(clp);
322: tbo = 0;
323: }
324: /*
325: * Note this loop does not process the last line, but this editor
326: * always makes the last line empty so this is good.
327: */
328: while (clp != (curbp->b_linep)) {
329: re_match[0].rm_so = tbo;
330: re_match[0].rm_eo = llength(clp);
1.9 mickey 331: error = regexec(&re_buff, ltext(clp), RE_NMATCH, re_match,
1.6 millert 332: REG_STARTEND);
333: if (error != 0) {
1.5 millert 334: clp = lforw(clp);
335: tbo = 0;
336: } else {
337: curwp->w_doto = re_match[0].rm_eo;
338: curwp->w_dotp = clp;
339: curwp->w_flag |= WFMOVE;
340: return (TRUE);
341: }
342: }
343: return (FALSE);
1.1 deraadt 344: }
345:
346:
347: /*
1.6 millert 348: * This routine does the real work of a backward search. The pattern is sitting
1.9 mickey 349: * in the external variable "re_pat". If found, dot is updated, the window
350: * system is notified of the change, and TRUE is returned. If the string isn't
1.6 millert 351: * found, FALSE is returned.
1.1 deraadt 352: */
1.6 millert 353: static int
1.12 ! cloder 354: re_backsrch(void)
1.5 millert 355: {
1.6 millert 356: LINE *clp;
357: int tbo;
358: regmatch_t lastmatch;
1.5 millert 359:
360: clp = curwp->w_dotp;
361: tbo = curwp->w_doto;
362:
363: /* Start search one position to the left of dot */
364: tbo = tbo - 1;
365: if (tbo < 0) {
366: /* must move up one line */
367: clp = lback(clp);
368: tbo = llength(clp);
369: }
1.6 millert 370:
1.5 millert 371: /*
372: * Note this loop does not process the last line, but this editor
373: * always makes the last line empty so this is good.
374: */
375: while (clp != (curbp->b_linep)) {
376: re_match[0].rm_so = 0;
377: re_match[0].rm_eo = llength(clp);
378: lastmatch.rm_so = -1;
379: /*
380: * Keep searching until we don't match any longer. Assumes a
381: * non-match does not modify the re_match array. We have to
382: * do this character-by-character after the first match since
383: * POSIX regexps don't give you a way to do reverse matches.
384: */
385: while (!regexec(&re_buff, ltext(clp), RE_NMATCH, re_match,
386: REG_STARTEND) && re_match[0].rm_so < tbo) {
387: memcpy(&lastmatch, &re_match[0], sizeof(regmatch_t));
388: re_match[0].rm_so++;
389: re_match[0].rm_eo = llength(clp);
390: }
391: if (lastmatch.rm_so == -1) {
392: clp = lback(clp);
393: tbo = llength(clp);
394: } else {
395: memcpy(&re_match[0], &lastmatch, sizeof(regmatch_t));
396: curwp->w_doto = re_match[0].rm_so;
397: curwp->w_dotp = clp;
398: curwp->w_flag |= WFMOVE;
399: return (TRUE);
400: }
401: }
402: return (FALSE);
1.1 deraadt 403: }
404:
405:
406: /*
407: * Read a pattern.
408: * Stash it in the external variable "re_pat". The "pat" is
409: * not updated if the user types in an empty line. If the user typed
410: * an empty line, and there is no old pattern, it is an error.
411: * Display the old pattern, in the style of Jeff Lomicka. There is
412: * some do-it-yourself control expansion.
413: */
1.6 millert 414: static int
1.12 ! cloder 415: re_readpattern(char *prompt)
1.5 millert 416: {
1.6 millert 417: int s, flags, error;
418: char tpat[NPAT];
419: static int dofree = 0;
1.5 millert 420:
421: if (re_pat[0] == '\0')
422: s = ereply("%s: ", tpat, NPAT, prompt);
423: else
424: s = ereply("%s: (default %s) ", tpat, NPAT, prompt, re_pat);
1.1 deraadt 425:
426: if (s == TRUE) {
1.5 millert 427: /* New pattern given */
1.10 vincent 428: (void)strlcpy(re_pat, tpat, sizeof re_pat);
1.5 millert 429: if (casefoldsearch)
430: flags = REG_EXTENDED | REG_ICASE;
431: else
432: flags = REG_EXTENDED;
433: if (dofree)
434: regfree(&re_buff);
435: error = regcomp(&re_buff, re_pat, flags);
1.6 millert 436: if (error != 0) {
437: char message[256];
1.5 millert 438: regerror(error, &re_buff, message, sizeof(message));
439: ewprintf("Regex Error: %s", message);
440: re_pat[0] = '\0';
441: return (FALSE);
442: }
443: dofree = 1;
444: } else if (s == FALSE && re_pat[0] != '\0')
445: /* Just using old pattern */
446: s = TRUE;
1.1 deraadt 447: return (s);
448: }
449:
1.5 millert 450: /*
451: * Cause case to not matter in searches. This is the default. If called
452: * with argument cause case to matter.
1.1 deraadt 453: */
1.6 millert 454: int
1.12 ! cloder 455: setcasefold(int f, int n)
1.5 millert 456: {
457: if (f & FFARG) {
458: casefoldsearch = FALSE;
459: ewprintf("Case-fold-search unset");
460: } else {
461: casefoldsearch = TRUE;
462: ewprintf("Case-fold-search set");
463: }
1.1 deraadt 464:
1.5 millert 465: /*
466: * Invalidate the regular expression pattern since I'm too lazy to
467: * recompile it.
468: */
469: re_pat[0] = '\0';
470: return (TRUE);
1.6 millert 471: }
1.1 deraadt 472:
473:
1.5 millert 474: /*
475: * Delete all lines after dot that contain a string matching regex
1.1 deraadt 476: */
1.6 millert 477: int
1.12 ! cloder 478: delmatchlines(int f, int n)
1.5 millert 479: {
1.6 millert 480: int s;
1.1 deraadt 481:
1.9 mickey 482: if ((s = re_readpattern("Flush lines (containing match for regexp)"))
1.6 millert 483: != TRUE)
1.5 millert 484: return (s);
1.1 deraadt 485:
1.5 millert 486: s = killmatches(TRUE);
487: return (s);
1.1 deraadt 488: }
489:
1.5 millert 490: /*
491: * Delete all lines after dot that don't contain a string matching regex
1.1 deraadt 492: */
1.6 millert 493: int
1.12 ! cloder 494: delnonmatchlines(int f, int n)
1.5 millert 495: {
1.6 millert 496: int s;
1.1 deraadt 497:
1.9 mickey 498: if ((s = re_readpattern("Keep lines (containing match for regexp)"))
1.6 millert 499: != TRUE)
1.5 millert 500: return (s);
1.1 deraadt 501:
1.5 millert 502: s = killmatches(FALSE);
503: return (s);
1.1 deraadt 504: }
505:
1.9 mickey 506: /*
507: * This function does the work of deleting matching lines
1.6 millert 508: */
509: static int
1.12 ! cloder 510: killmatches(int cond)
1.1 deraadt 511: {
1.6 millert 512: int s, error;
513: int count = 0;
514: LINE *clp;
1.5 millert 515:
516: clp = curwp->w_dotp;
517: if (curwp->w_doto == llength(clp))
518: /* Consider dot on next line */
519: clp = lforw(clp);
520:
521: while (clp != (curbp->b_linep)) {
522: /* see if line matches */
523: re_match[0].rm_so = 0;
524: re_match[0].rm_eo = llength(clp);
1.9 mickey 525: error = regexec(&re_buff, ltext(clp), RE_NMATCH, re_match,
1.6 millert 526: REG_STARTEND);
1.5 millert 527:
528: /* Delete line when appropriate */
529: if ((cond == FALSE && error) || (cond == TRUE && !error)) {
530: curwp->w_doto = 0;
531: curwp->w_dotp = clp;
532: count++;
533: s = ldelete(llength(clp) + 1, KNONE);
534: clp = curwp->w_dotp;
535: curwp->w_flag |= WFMOVE;
536: if (s == FALSE)
537: return (FALSE);
538: } else
539: clp = lforw(clp);
540: }
1.1 deraadt 541:
1.5 millert 542: ewprintf("%d line(s) deleted", count);
543: if (count > 0)
544: curwp->w_flag |= WFMOVE;
1.1 deraadt 545:
1.5 millert 546: return (TRUE);
1.1 deraadt 547: }
548:
1.5 millert 549: /*
550: * Count lines matching regex
1.1 deraadt 551: */
1.6 millert 552: int
1.12 ! cloder 553: cntmatchlines(int f, int n)
1.5 millert 554: {
1.6 millert 555: int s;
1.1 deraadt 556:
1.5 millert 557: if ((s = re_readpattern("Count lines (matching regexp)")) != TRUE)
558: return (s);
559: s = countmatches(TRUE);
560: return (s);
1.1 deraadt 561: }
562:
1.5 millert 563: /*
564: * Count lines that fail to match regex
1.1 deraadt 565: */
1.6 millert 566: int
1.12 ! cloder 567: cntnonmatchlines(int f, int n)
1.5 millert 568: {
1.6 millert 569: int s;
1.1 deraadt 570:
1.5 millert 571: if ((s = re_readpattern("Count lines (not matching regexp)")) != TRUE)
572: return (s);
1.1 deraadt 573:
1.5 millert 574: s = countmatches(FALSE);
1.1 deraadt 575:
1.5 millert 576: return (s);
1.1 deraadt 577: }
578:
1.6 millert 579: /*
580: * This function does the work of counting matching lines.
581: */
582: int
1.12 ! cloder 583: countmatches(int cond)
1.1 deraadt 584: {
1.6 millert 585: int error;
586: int count = 0;
587: LINE *clp;
1.5 millert 588:
589: clp = curwp->w_dotp;
590: if (curwp->w_doto == llength(clp))
591: /* Consider dot on next line */
592: clp = lforw(clp);
593:
594: while (clp != (curbp->b_linep)) {
595: /* see if line matches */
596: re_match[0].rm_so = 0;
597: re_match[0].rm_eo = llength(clp);
1.9 mickey 598: error = regexec(&re_buff, ltext(clp), RE_NMATCH, re_match,
1.6 millert 599: REG_STARTEND);
1.5 millert 600:
601: /* Count line when appropriate */
602: if ((cond == FALSE && error) || (cond == TRUE && !error))
603: count++;
604: clp = lforw(clp);
605: }
1.1 deraadt 606:
1.5 millert 607: if (cond)
608: ewprintf("Number of lines matching: %d", count);
609: else
610: ewprintf("Number of lines not matching: %d", count);
611:
612: return (TRUE);
1.1 deraadt 613: }
1.6 millert 614: #endif /* REGEX */