Annotation of src/usr.bin/mg/re_search.c, Revision 1.4
1.1 deraadt 1: /*
2: * regular expression search commands for
3: * MicroGnuEmacs
4: *
5: * This file contains functions to implement several of gnuemacs'
6: * regular expression functions for MicroGnuEmacs. Several of
7: * the routines below are just minor rearrangements of the MicroGnuEmacs
8: * non-regular expression search functions. Hence some of them date back
9: * in essential structure to the original MicroEMACS; others are modifications
10: * of Rich Ellison's code. I, Peter Newton, wrote about half from scratch.
11: */
12:
13:
14: #ifdef REGEX
1.2 millert 15: #include <sys/types.h>
16: #include <regex.h>
17:
1.1 deraadt 18: #include "def.h"
19: #include "macro.h"
20:
21: #define SRCH_BEGIN (0) /* Search sub-codes. */
22: #define SRCH_FORW (-1)
23: #define SRCH_BACK (-2)
24: #define SRCH_NOPR (-3)
25: #define SRCH_ACCM (-4)
26: #define SRCH_MARK (-5)
27:
1.2 millert 28: #define RE_NMATCH 10 /* max number of matches */
29:
1.1 deraadt 30: char re_pat[NPAT]; /* Regex pattern */
31: int re_srch_lastdir = SRCH_NOPR; /* Last search flags. */
32: int casefoldsearch = TRUE; /* Does search ignore case ? */
33:
34: /* Indexed by a character, gives the upper case equivalent of the character */
35:
36: static char upcase[0400] =
37: { 000, 001, 002, 003, 004, 005, 006, 007,
38: 010, 011, 012, 013, 014, 015, 016, 017,
39: 020, 021, 022, 023, 024, 025, 026, 027,
40: 030, 031, 032, 033, 034, 035, 036, 037,
41: 040, 041, 042, 043, 044, 045, 046, 047,
42: 050, 051, 052, 053, 054, 055, 056, 057,
43: 060, 061, 062, 063, 064, 065, 066, 067,
44: 070, 071, 072, 073, 074, 075, 076, 077,
45: 0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
46: 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
47: 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
48: 0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
49: 0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
50: 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
51: 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
52: 0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
53: 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
54: 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
55: 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
56: 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
57: 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
58: 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
59: 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
60: 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
61: 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
62: 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
63: 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
64: 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
65: 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
66: 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
67: 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
68: 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
69: };
70:
71: /*
72: * Search forward.
73: * Get a search string from the user, and search for it,
74: * starting at ".". If found, "." gets moved to just after the
75: * matched characters, and display does all the hard stuff.
76: * If not found, it just prints a message.
77: */
78: /*ARGSUSED*/
79: re_forwsearch(f, n) {
80: register int s;
81:
82: if ((s=re_readpattern("RE Search")) != TRUE)
83: return (s);
84: if (re_forwsrch() == FALSE) {
85: ewprintf("Search failed: \"%s\"", re_pat);
86: return (FALSE);
87: }
88: re_srch_lastdir = SRCH_FORW;
89: return (TRUE);
90: }
91:
92: /*
93: * Reverse search.
94: * Get a search string from the user, and search, starting at "."
95: * and proceeding toward the front of the buffer. If found "." is left
96: * pointing at the first character of the pattern [the last character that
97: * was matched].
98: */
99: /*ARGSUSED*/
100: re_backsearch(f, n) {
101: register int s;
102:
103: if ((s=re_readpattern("RE Search backward")) != TRUE)
104: return (s);
105: if (re_backsrch() == FALSE) {
106: ewprintf("Search failed: \"%s\"", re_pat);
107: return (FALSE);
108: }
109: re_srch_lastdir = SRCH_BACK;
110: return (TRUE);
111: }
112:
113:
114:
115: /*
116: * Search again, using the same search string
117: * and direction as the last search command. The direction
118: * has been saved in "srch_lastdir", so you know which way
119: * to go.
120: */
121: /*ARGSUSED*/
122: /* This code has problems-- some incompatibility(?) with
123: extend.c causes match to fail when it should not.
124: */
125: re_searchagain(f, n) {
126:
127: if (re_srch_lastdir == SRCH_NOPR) {
128: ewprintf("No last search");
129: return (FALSE);
130: }
131:
132: if (re_srch_lastdir == SRCH_FORW) {
133: if (re_forwsrch() == FALSE) {
134: ewprintf("Search failed: \"%s\"", re_pat);
135: return (FALSE);
136: }
137: return (TRUE);
138: }
139: if (re_srch_lastdir == SRCH_BACK) {
140: if (re_backsrch() == FALSE) {
141: ewprintf("Search failed: \"%s\"", re_pat);
142: return (FALSE);
143: }
144: return (TRUE);
145: }
146: }
147:
148:
149: /* Compiled regex goes here-- changed only when new pattern read */
1.2 millert 150: static regex_t re_buff;
151: static regmatch_t re_match[RE_NMATCH];
1.1 deraadt 152:
153: /*
154: * Re-Query Replace.
155: * Replace strings selectively. Does a search and replace operation.
156: */
157: /*ARGSUSED*/
158: re_queryrepl(f, n) {
159: register int s;
160: register int rcnt = 0; /* Replacements made so far */
161: register int plen; /* length of found string */
162: char news[NPAT]; /* replacement string */
163:
164: /* Casefold check */
165: if (!casefoldsearch) f = TRUE;
166:
167: if ((s=re_readpattern("RE Query replace")) != TRUE)
168: return (s);
169: if ((s=ereply("Query replace %s with: ",news, NPAT, re_pat)) == ABORT)
170: return (s);
171: if (s == FALSE)
172: news[0] = '\0';
173: ewprintf("Query replacing %s with %s:", re_pat, news);
174:
175: /*
176: * Search forward repeatedly, checking each time whether to insert
177: * or not. The "!" case makes the check always true, so it gets put
178: * into a tighter loop for efficiency.
179: */
180:
181: while (re_forwsrch() == TRUE) {
182: retry:
183: update();
184: switch (getkey(FALSE)) {
185: case ' ':
1.2 millert 186: plen = re_match[0].rm_eo - re_match[0].rm_so;
1.1 deraadt 187: if (re_doreplace((RSIZE) plen, news, f) == FALSE)
188: return (FALSE);
189: rcnt++;
190: break;
191:
192: case '.':
1.2 millert 193: plen = re_match[0].rm_eo - re_match[0].rm_so;
1.1 deraadt 194: if (re_doreplace((RSIZE) plen, news, f) == FALSE)
195: return (FALSE);
196: rcnt++;
197: goto stopsearch;
198:
199: case CCHR('G'): /* ^G */
200: (VOID) ctrlg(FFRAND, 0);
201: case CCHR('['): /* ESC */
202: case '`':
203: goto stopsearch;
204:
205: case '!':
206: do {
1.2 millert 207: plen = re_match[0].rm_eo - re_match[0].rm_so;
1.1 deraadt 208: if (re_doreplace((RSIZE) plen, news, f) == FALSE)
209: return (FALSE);
210: rcnt++;
211: } while (re_forwsrch() == TRUE);
212: goto stopsearch;
213:
214: case CCHR('?'): /* To not replace */
215: break;
216:
217: default:
218: ewprintf("<SP> replace, [.] rep-end, <DEL> don't, [!] repl rest <ESC> quit");
219: goto retry;
220: }
221: }
222: stopsearch:
223: curwp->w_flag |= WFHARD;
224: update();
225: if (!inmacro) {
226: if (rcnt == 0)
227: ewprintf("(No replacements done)");
228: else if (rcnt == 1)
229: ewprintf("(1 replacement done)");
230: else
231: ewprintf("(%d replacements done)", rcnt);
232: }
233: return TRUE;
234: }
235:
236:
237:
238: /* Routine re_doreplace calls lreplace to make replacements needed by
239: * re_query replace. Its reason for existence is to deal with \1,
240: * \2. etc.
241: */
242:
243: /* Maximum length of replacement string */
244: #define REPLEN 256
245:
246: re_doreplace(plen, st, f)
247: register RSIZE plen; /* length to remove */
248: char *st; /* replacement string */
249: int f; /* case hack disable */
250: {
251: int s;
252: int num, k;
253: register int j;
254: int more, state;
255: LINE *clp;
256: char repstr[REPLEN];
257:
258: clp = curwp->w_dotp;
259: more = TRUE;
260: j = 0;
261: state = 0;
262:
263: /* The following FSA parses the replacement string */
264: while (more) {
265: switch (state) {
266:
267: case 0: if (*st == '\\') {
268: st++;
269: state = 1;
270: }
271: else if (*st == '\0')
272: more = FALSE;
273: else {
274: repstr[j] = *st;
275: j++; if (j >= REPLEN) return(FALSE);
276: st++;
277: }
278: break;
279: case 1: if (*st >= '0' && *st <= '9') {
280: num = *st - '0';
281: st++;
282: state = 2;
283: }
284: else if (*st == '\0')
285: more = FALSE;
286: else {
287: repstr[j] = *st;
288: j++; if (j >= REPLEN) return(FALSE);
289: st++;
290: state = 0;
291: }
292: break;
293: case 2: if (*st >= '0' && *st <= '9') {
294: num = 10*num + *st - '0';
295: st++;
296: }
297: else {
1.2 millert 298: if (num >= RE_NMATCH) return(FALSE);
299: k = re_match[num].rm_eo - re_match[num].rm_so;
1.1 deraadt 300: if (j+k >= REPLEN) return(FALSE);
1.2 millert 301: bcopy(&(clp->l_text[re_match[num].rm_so]), &repstr[j], k);
1.1 deraadt 302: j += k;
303: if (*st == '\0')
304: more = FALSE;
305: if (*st == '\\') {
306: st++;
307: state = 1;
308: }
309: else {
310: repstr[j] = *st;
311: j++; if (j >= REPLEN) return(FALSE);
312: st++;
313: state = 0;
314: }
315: }
316: break;
317: } /* end case */
318: } /* end while */
319:
320: repstr[j] = '\0';
321:
322: s = lreplace(plen, repstr, f);
323:
324: return(s);
325: }
326:
327:
328:
329: /*
330: * This routine does the real work of a
331: * forward search. The pattern is sitting in the external
332: * variable "pat". If found, dot is updated, the window system
333: * is notified of the change, and TRUE is returned. If the
334: * string isn't found, FALSE is returned.
335: */
336: re_forwsrch() {
337:
338: register LINE *clp;
339: register int tbo;
1.2 millert 340: int error, plen;
1.1 deraadt 341:
342: clp = curwp->w_dotp;
343: tbo = curwp->w_doto;
344:
345: if (tbo == clp->l_used)
346: /* Don't start matching off end of line-- must
347: * move to beginning of next line, unless at end
348: */
349: if (clp != curbp->b_linep) {
350: clp = lforw(clp);
351: tbo = 0;
352: }
353:
354:
355: /* Note this loop does not process the last line, but this editor
356: always makes the last line empty so this is good.
357: */
358:
359: while (clp != (curbp->b_linep)) {
360:
1.2 millert 361: re_match[0].rm_so = tbo;
362: re_match[0].rm_eo = llength(clp);
363: error = regexec(&re_buff, ltext(clp), RE_NMATCH, re_match, REG_STARTEND);
1.1 deraadt 364:
1.2 millert 365: if (error) {
1.1 deraadt 366: clp = lforw(clp);
367: tbo = 0;
1.2 millert 368: } else {
369: curwp->w_doto = re_match[0].rm_eo;
1.1 deraadt 370: curwp->w_dotp = clp;
371: curwp->w_flag |= WFMOVE;
372: return (TRUE);
373: }
374:
375: }
376:
377: return(FALSE);
378:
379: }
380:
381:
382: /*
383: * This routine does the real work of a
384: * backward search. The pattern is sitting in the external
385: * variable "re_pat". If found, dot is updated, the window system
386: * is notified of the change, and TRUE is returned. If the
387: * string isn't found, FALSE is returned.
388: */
389: re_backsrch() {
390:
391: register LINE *clp;
392: register int tbo;
1.3 millert 393: regmatch_t lastmatch;
1.2 millert 394: char m[1];
1.1 deraadt 395:
396: clp = curwp->w_dotp;
397: tbo = curwp->w_doto;
398:
399: /* Start search one position to the left of dot */
400: tbo = tbo - 1;
401: if (tbo < 0) {
402: /* must move up one line */
403: clp = lback(clp);
404: tbo = llength(clp);
405: }
406:
407: /* Note this loop does not process the last line, but this editor
408: always makes the last line empty so this is good.
409: */
410:
411: while (clp != (curbp->b_linep)) {
412:
1.3 millert 413: re_match[0].rm_so = 0;
1.4 ! millert 414: re_match[0].rm_eo = llength(clp);
1.3 millert 415: lastmatch.rm_so = -1;
416: /* Keep searching until we don't match any longer. Assumes a non-match
1.4 ! millert 417: does not modify the re_match array. We have to do this
! 418: character-by-character after the first match since POSIX regexps don't
! 419: give you a way to do reverse matches.
1.3 millert 420: */
1.4 ! millert 421: while (!regexec(&re_buff, ltext(clp), RE_NMATCH, re_match, REG_STARTEND) &&
! 422: re_match[0].rm_so < tbo) {
1.3 millert 423: memcpy(&lastmatch, &re_match[0], sizeof(regmatch_t));
1.4 ! millert 424: re_match[0].rm_so++;
! 425: re_match[0].rm_eo = llength(clp);
1.3 millert 426: }
1.1 deraadt 427:
1.3 millert 428: if (lastmatch.rm_so == -1) {
1.1 deraadt 429: clp = lback(clp);
430: tbo = llength(clp);
1.2 millert 431: } else {
1.3 millert 432: memcpy(&re_match[0], &lastmatch, sizeof(regmatch_t));
1.2 millert 433: curwp->w_doto = re_match[0].rm_so;
1.1 deraadt 434: curwp->w_dotp = clp;
435: curwp->w_flag |= WFMOVE;
436: return (TRUE);
437: }
438:
439: }
440:
441: return(FALSE);
442:
443: }
444:
445:
446: /*
447: * Read a pattern.
448: * Stash it in the external variable "re_pat". The "pat" is
449: * not updated if the user types in an empty line. If the user typed
450: * an empty line, and there is no old pattern, it is an error.
451: * Display the old pattern, in the style of Jeff Lomicka. There is
452: * some do-it-yourself control expansion.
453: */
454: re_readpattern(prompt) char *prompt; {
1.2 millert 455: int s;
456: int flags;
457: int error;
1.1 deraadt 458: char tpat[NPAT];
1.2 millert 459: static int dofree = 0;
1.1 deraadt 460:
461: if (re_pat[0] == '\0') s = ereply("%s: ", tpat, NPAT, prompt);
462: else s = ereply("%s: (default %s) ", tpat, NPAT, prompt, re_pat);
463:
464: if (s == TRUE) {
465: /* New pattern given */
466: (VOID) strcpy(re_pat, tpat);
467: if (casefoldsearch)
1.2 millert 468: flags = REG_EXTENDED|REG_ICASE;
1.1 deraadt 469: else
1.2 millert 470: flags = REG_EXTENDED;
471: if (dofree)
472: regfree(&re_buff);
473: error = regcomp(&re_buff, re_pat, flags);
474: if (error) {
475: char message[256];
476: regerror(error, &re_buff, message, sizeof(message));
1.1 deraadt 477: ewprintf("Regex Error: %s", message);
478: re_pat[0] = '\0';
479: return(FALSE);
480: }
1.2 millert 481: dofree = 1;
1.1 deraadt 482: }
483: else if (s==FALSE && re_pat[0]!='\0')
484: /* Just using old pattern */
485: s = TRUE;
486: return (s);
487: }
488:
489:
490:
491: /* Cause case to not matter in searches. This is the default. If
492: * called with argument cause case to matter.
493: */
494: setcasefold(f, n) {
495:
496: if (f & FFARG) {
497: casefoldsearch = FALSE;
498: ewprintf("Case-fold-search unset");
499: }
500: else {
501: casefoldsearch = TRUE;
502: ewprintf("Case-fold-search set");
503: }
504:
505: /* Invalidate the regular expression pattern since I'm too lazy
506: * to recompile it.
507: */
508:
509: re_pat[0] = '\0';
510:
511: return(TRUE);
512:
513: } /* end setcasefold */
514:
515:
516: /* Delete all lines after dot that contain a string matching regex
517: */
518: delmatchlines(f, n) {
519: int s;
520:
521: if ((s=re_readpattern("Flush lines (containing match for regexp)")) != TRUE)
522: return (s);
523:
524: s = killmatches(TRUE);
525:
526: return(s);
527: }
528:
529:
530:
531: /* Delete all lines after dot that don't contain a string matching regex
532: */
533: delnonmatchlines(f, n) {
534: int s;
535:
536:
537: if ((s=re_readpattern("Keep lines (containing match for regexp)")) != TRUE)
538: return (s);
539:
540: s = killmatches(FALSE);
541:
542: return(s);
543: }
544:
545:
546:
547: /* This function does the work of deleting matching lines */
548: killmatches(cond)
549: int cond;
550: {
1.2 millert 551: int s, error;
1.1 deraadt 552: int count = 0;
553: LINE *clp;
554:
555: clp = curwp->w_dotp;
556: if (curwp->w_doto == llength(clp))
557: /* Consider dot on next line */
558: clp = lforw(clp);
559:
560: while (clp != (curbp->b_linep)) {
561:
562: /* see if line matches */
1.2 millert 563: re_match[0].rm_so = 0;
564: re_match[0].rm_eo = llength(clp);
565: error = regexec(&re_buff, ltext(clp), RE_NMATCH, re_match, REG_STARTEND);
566:
1.1 deraadt 567: /* Delete line when appropriate */
1.2 millert 568: if ((cond == FALSE && error) || (cond == TRUE && !error)) {
1.1 deraadt 569: curwp->w_doto = 0;
570: curwp->w_dotp = clp;
571: count++;
572: s = ldelete(llength(clp)+1, KNONE);
573: clp = curwp->w_dotp;
574: curwp->w_flag |= WFMOVE;
575: if (s == FALSE) return(FALSE);
576: }
577: else
578: clp = lforw(clp);
579: }
580:
581: ewprintf("%d line(s) deleted", count);
582: if (count > 0) curwp->w_flag |= WFMOVE;
583:
584: return(TRUE);
585: }
586:
587:
588: petersfunc(f, n) {
589:
590: int s;
591: LINE *clp;
592: char c;
593:
594: curwp->w_doto = 0;
595: s = ldelete(llength(curwp->w_dotp)+1, KNONE);
596: curwp->w_flag |= WFMOVE;
597: return(s);
598:
599: }
600:
601:
602: /* Count lines matching regex
603: */
604: cntmatchlines(f, n) {
605: int s;
606:
607: if ((s=re_readpattern("Count lines (matching regexp)")) != TRUE)
608: return (s);
609:
610: s = countmatches(TRUE);
611:
612: return(s);
613: }
614:
615:
616:
617: /* Count lines that fail to match regex
618: */
619: cntnonmatchlines(f, n) {
620: int s;
621:
622:
623: if ((s=re_readpattern("Count lines (not matching regexp)")) != TRUE)
624: return (s);
625:
626: s = countmatches(FALSE);
627:
628: return(s);
629: }
630:
631:
632:
633: /* This function does the work of counting matching lines */
634: countmatches(cond)
635: int cond;
636: {
1.2 millert 637: int s, error;
1.1 deraadt 638: int count = 0;
639: LINE *clp;
640:
641: clp = curwp->w_dotp;
642: if (curwp->w_doto == llength(clp))
643: /* Consider dot on next line */
644: clp = lforw(clp);
645:
646: while (clp != (curbp->b_linep)) {
647:
648: /* see if line matches */
1.2 millert 649: re_match[0].rm_so = 0;
650: re_match[0].rm_eo = llength(clp);
651: error = regexec(&re_buff, ltext(clp), RE_NMATCH, re_match, REG_STARTEND);
652:
1.1 deraadt 653: /* Count line when appropriate */
1.2 millert 654: if ((cond == FALSE && error) || (cond == TRUE && !error)) count++;
1.1 deraadt 655: clp = lforw(clp);
656: }
657:
658: if (cond)
659: ewprintf("Number of lines matching: %d", count);
660: else
661: ewprintf("Number of lines not matching: %d", count);
662:
663: return(TRUE);
664: }
665: #endif