Annotation of src/usr.bin/less/pattern.c, Revision 1.4
1.1 shadchin 1: /*
1.3 shadchin 2: * Copyright (C) 1984-2012 Mark Nudelman
1.1 shadchin 3: *
4: * You may distribute under the terms of either the GNU General Public
5: * License or the Less License, as specified in the README file.
6: *
1.3 shadchin 7: * For more information, see the README file.
1.1 shadchin 8: */
9:
10: /*
11: * Routines to do pattern matching.
12: */
13:
14: #include "less.h"
15: #include "pattern.h"
16:
17: extern int caseless;
1.2 millert 18: extern int less_is_more;
1.1 shadchin 19:
20: /*
21: * Compile a search pattern, for future use by match_pattern.
22: */
23: static int
24: compile_pattern2(pattern, search_type, comp_pattern)
25: char *pattern;
26: int search_type;
27: void **comp_pattern;
28: {
1.3 shadchin 29: if (search_type & SRCH_NO_REGEX)
30: return (0);
31: {
32: #if HAVE_GNU_REGEX
33: struct re_pattern_buffer *comp = (struct re_pattern_buffer *)
34: ecalloc(1, sizeof(struct re_pattern_buffer));
35: struct re_pattern_buffer **pcomp =
36: (struct re_pattern_buffer **) comp_pattern;
37: re_set_syntax(RE_SYNTAX_POSIX_EXTENDED);
38: if (re_compile_pattern(pattern, strlen(pattern), comp))
1.1 shadchin 39: {
1.3 shadchin 40: free(comp);
41: error("Invalid pattern", NULL_PARG);
42: return (-1);
43: }
44: if (*pcomp != NULL)
45: regfree(*pcomp);
46: *pcomp = comp;
47: #endif
1.1 shadchin 48: #if HAVE_POSIX_REGCOMP
1.3 shadchin 49: regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t));
50: regex_t **pcomp = (regex_t **) comp_pattern;
51: if (regcomp(comp, pattern, less_is_more ? 0 : REGCOMP_FLAG))
52: {
53: free(comp);
54: error("Invalid pattern", NULL_PARG);
55: return (-1);
56: }
57: if (*pcomp != NULL)
58: regfree(*pcomp);
59: *pcomp = comp;
1.1 shadchin 60: #endif
61: #if HAVE_PCRE
1.3 shadchin 62: pcre *comp;
63: pcre **pcomp = (pcre **) comp_pattern;
64: constant char *errstring;
65: int erroffset;
66: PARG parg;
67: comp = pcre_compile(pattern, 0,
68: &errstring, &erroffset, NULL);
69: if (comp == NULL)
70: {
71: parg.p_string = (char *) errstring;
72: error("%s", &parg);
73: return (-1);
74: }
75: *pcomp = comp;
1.1 shadchin 76: #endif
77: #if HAVE_RE_COMP
1.3 shadchin 78: PARG parg;
79: int *pcomp = (int *) comp_pattern;
80: if ((parg.p_string = re_comp(pattern)) != NULL)
81: {
82: error("%s", &parg);
83: return (-1);
84: }
85: *pcomp = 1;
1.1 shadchin 86: #endif
87: #if HAVE_REGCMP
1.3 shadchin 88: char *comp;
89: char **pcomp = (char **) comp_pattern;
90: if ((comp = regcmp(pattern, 0)) == NULL)
91: {
92: error("Invalid pattern", NULL_PARG);
93: return (-1);
94: }
95: if (pcomp != NULL)
96: free(*pcomp);
97: *pcomp = comp;
1.1 shadchin 98: #endif
99: #if HAVE_V8_REGCOMP
1.3 shadchin 100: struct regexp *comp;
101: struct regexp **pcomp = (struct regexp **) comp_pattern;
102: if ((comp = regcomp(pattern)) == NULL)
103: {
104: /*
105: * regcomp has already printed an error message
106: * via regerror().
107: */
108: return (-1);
109: }
110: if (*pcomp != NULL)
111: free(*pcomp);
112: *pcomp = comp;
1.1 shadchin 113: #endif
1.3 shadchin 114: }
1.1 shadchin 115: return (0);
116: }
117:
118: /*
119: * Like compile_pattern2, but convert the pattern to lowercase if necessary.
120: */
121: public int
122: compile_pattern(pattern, search_type, comp_pattern)
123: char *pattern;
124: int search_type;
125: void **comp_pattern;
126: {
127: char *cvt_pattern;
128: int result;
129:
130: if (caseless != OPT_ONPLUS)
131: cvt_pattern = pattern;
132: else
133: {
134: cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC));
135: cvt_text(cvt_pattern, pattern, (int *)NULL, (int *)NULL, CVT_TO_LC);
136: }
137: result = compile_pattern2(cvt_pattern, search_type, comp_pattern);
138: if (cvt_pattern != pattern)
139: free(cvt_pattern);
140: return (result);
141: }
142:
143: /*
144: * Forget that we have a compiled pattern.
145: */
146: public void
147: uncompile_pattern(pattern)
148: void **pattern;
149: {
1.3 shadchin 150: #if HAVE_GNU_REGEX
151: struct re_pattern_buffer **pcomp = (struct re_pattern_buffer **) pattern;
152: if (*pcomp != NULL)
153: regfree(*pcomp);
154: *pcomp = NULL;
155: #endif
1.1 shadchin 156: #if HAVE_POSIX_REGCOMP
157: regex_t **pcomp = (regex_t **) pattern;
158: if (*pcomp != NULL)
159: regfree(*pcomp);
160: *pcomp = NULL;
161: #endif
162: #if HAVE_PCRE
163: pcre **pcomp = (pcre **) pattern;
164: if (*pcomp != NULL)
165: pcre_free(*pcomp);
166: *pcomp = NULL;
167: #endif
168: #if HAVE_RE_COMP
169: int *pcomp = (int *) pattern;
170: *pcomp = 0;
171: #endif
172: #if HAVE_REGCMP
173: char **pcomp = (char **) pattern;
174: if (*pcomp != NULL)
175: free(*pcomp);
176: *pcomp = NULL;
177: #endif
178: #if HAVE_V8_REGCOMP
179: struct regexp **pcomp = (struct regexp **) pattern;
180: if (*pcomp != NULL)
181: free(*pcomp);
182: *pcomp = NULL;
183: #endif
184: }
185:
186: /*
187: * Is a compiled pattern null?
188: */
189: public int
190: is_null_pattern(pattern)
191: void *pattern;
192: {
1.3 shadchin 193: #if HAVE_GNU_REGEX
194: return (pattern == NULL);
195: #endif
1.1 shadchin 196: #if HAVE_POSIX_REGCOMP
197: return (pattern == NULL);
198: #endif
199: #if HAVE_PCRE
200: return (pattern == NULL);
201: #endif
202: #if HAVE_RE_COMP
203: return (pattern == 0);
204: #endif
205: #if HAVE_REGCMP
206: return (pattern == NULL);
207: #endif
208: #if HAVE_V8_REGCOMP
209: return (pattern == NULL);
210: #endif
211: }
212:
213: /*
214: * Simple pattern matching function.
215: * It supports no metacharacters like *, etc.
216: */
217: static int
218: match(pattern, pattern_len, buf, buf_len, pfound, pend)
219: char *pattern;
220: int pattern_len;
221: char *buf;
222: int buf_len;
223: char **pfound, **pend;
224: {
225: register char *pp, *lp;
226: register char *pattern_end = pattern + pattern_len;
227: register char *buf_end = buf + buf_len;
228:
229: for ( ; buf < buf_end; buf++)
230: {
231: for (pp = pattern, lp = buf; *pp == *lp; pp++, lp++)
232: if (pp == pattern_end || lp == buf_end)
233: break;
234: if (pp == pattern_end)
235: {
236: if (pfound != NULL)
237: *pfound = buf;
238: if (pend != NULL)
239: *pend = lp;
240: return (1);
241: }
242: }
243: return (0);
244: }
245:
246: /*
247: * Perform a pattern match with the previously compiled pattern.
248: * Set sp and ep to the start and end of the matched string.
249: */
250: public int
251: match_pattern(pattern, tpattern, line, line_len, sp, ep, notbol, search_type)
252: void *pattern;
253: char *tpattern;
254: char *line;
255: int line_len;
256: char **sp;
257: char **ep;
258: int notbol;
259: int search_type;
260: {
261: int matched;
1.3 shadchin 262: #if HAVE_GNU_REGEX
263: struct re_pattern_buffer *spattern = (struct re_pattern_buffer *) pattern;
264: #endif
1.1 shadchin 265: #if HAVE_POSIX_REGCOMP
266: regex_t *spattern = (regex_t *) pattern;
267: #endif
268: #if HAVE_PCRE
269: pcre *spattern = (pcre *) pattern;
270: #endif
271: #if HAVE_RE_COMP
272: int spattern = (int) pattern;
273: #endif
274: #if HAVE_REGCMP
275: char *spattern = (char *) pattern;
276: #endif
277: #if HAVE_V8_REGCOMP
278: struct regexp *spattern = (struct regexp *) pattern;
279: #endif
280:
1.3 shadchin 281: #if NO_REGEX
282: search_type |= SRCH_NO_REGEX;
283: #endif
1.1 shadchin 284: if (search_type & SRCH_NO_REGEX)
285: matched = match(tpattern, strlen(tpattern), line, line_len, sp, ep);
286: else
287: {
1.3 shadchin 288: #if HAVE_GNU_REGEX
289: {
290: struct re_registers search_regs;
291: regoff_t *starts = (regoff_t *) ecalloc(1, sizeof (regoff_t));
292: regoff_t *ends = (regoff_t *) ecalloc(1, sizeof (regoff_t));
293: spattern->not_bol = notbol;
294: re_set_registers(spattern, &search_regs, 1, starts, ends);
295: matched = re_search(spattern, line, line_len, 0, line_len, &search_regs) >= 0;
296: if (matched)
297: {
298: *sp = line + search_regs.start[0];
299: *ep = line + search_regs.end[0];
300: }
301: free(starts);
302: free(ends);
303: }
304: #endif
1.1 shadchin 305: #if HAVE_POSIX_REGCOMP
306: {
307: regmatch_t rm;
308: int flags = (notbol) ? REG_NOTBOL : 0;
1.4 ! guenther 309: #ifdef REG_STARTEND
! 310: flags |= REG_STARTEND;
! 311: rm.rm_so = 0;
! 312: rm.rm_eo = line_len;
! 313: #endif
1.1 shadchin 314: matched = !regexec(spattern, line, 1, &rm, flags);
315: if (matched)
316: {
317: #ifndef __WATCOMC__
318: *sp = line + rm.rm_so;
319: *ep = line + rm.rm_eo;
320: #else
321: *sp = rm.rm_sp;
322: *ep = rm.rm_ep;
323: #endif
324: }
325: }
326: #endif
327: #if HAVE_PCRE
328: {
329: int flags = (notbol) ? PCRE_NOTBOL : 0;
330: int ovector[3];
331: matched = pcre_exec(spattern, NULL, line, line_len,
332: 0, flags, ovector, 3) >= 0;
333: if (matched)
334: {
335: *sp = line + ovector[0];
336: *ep = line + ovector[1];
337: }
338: }
339: #endif
340: #if HAVE_RE_COMP
341: matched = (re_exec(line) == 1);
342: /*
343: * re_exec doesn't seem to provide a way to get the matched string.
344: */
345: *sp = *ep = NULL;
346: #endif
347: #if HAVE_REGCMP
348: *ep = regex(spattern, line);
349: matched = (*ep != NULL);
350: if (matched)
351: *sp = __loc1;
352: #endif
353: #if HAVE_V8_REGCOMP
354: #if HAVE_REGEXEC2
355: matched = regexec2(spattern, line, notbol);
356: #else
357: matched = regexec(spattern, line);
358: #endif
359: if (matched)
360: {
361: *sp = spattern->startp[0];
362: *ep = spattern->endp[0];
363: }
364: #endif
365: }
366: matched = (!(search_type & SRCH_NO_MATCH) && matched) ||
367: ((search_type & SRCH_NO_MATCH) && !matched);
368: return (matched);
369: }
370: