version 1.5, 2003/06/23 07:52:18 |
version 1.6, 2003/06/23 22:05:23 |
|
|
|
|
static int linesqueued; |
static int linesqueued; |
static int procline(str_t *l, int); |
static int procline(str_t *l, int); |
|
static int grep_search(fastgrep_t *, unsigned char *, int); |
|
static int grep_cmp(const unsigned char *, const unsigned char *, size_t); |
|
static void grep_revstr(unsigned char *, int); |
|
|
int |
int |
grep_tree(char **argv) |
grep_tree(char **argv) |
|
|
pmatch.rm_so = 0; |
pmatch.rm_so = 0; |
pmatch.rm_eo = l->len; |
pmatch.rm_eo = l->len; |
for (c = i = 0; i < patterns; i++) { |
for (c = i = 0; i < patterns; i++) { |
r = regexec(&r_pattern[i], l->dat, 0, &pmatch, eflags); |
if (fg_pattern[i].pattern) |
|
r = grep_search(&fg_pattern[i], (unsigned char *)l->dat, |
|
l->len); |
|
else |
|
r = regexec(&r_pattern[i], l->dat, 0, &pmatch, eflags); |
if (r == REG_NOMATCH && t == 0) |
if (r == REG_NOMATCH && t == 0) |
continue; |
continue; |
if (r == 0) { |
if (r == 0) { |
|
|
return c; |
return c; |
} |
} |
|
|
|
/* |
|
* Returns: -1 on failure |
|
* 0 on success |
|
*/ |
|
int |
|
fastcomp(fastgrep_t *fg, const char *pattern) |
|
{ |
|
int i; |
|
int bol = 0; |
|
int eol = 0; |
|
int origPatternLen; |
|
int shiftPatternLen; |
|
int hasDot = 0; |
|
int firstHalfDot = -1; |
|
int firstLastHalfDot = -1; |
|
int lastHalfDot = 0; |
|
|
|
/* Initialize. */ |
|
origPatternLen = fg->patternLen = strlen(pattern); |
|
fg->bol = 0; |
|
fg->eol = 0; |
|
fg->reversedSearch = 0; |
|
|
|
/* Remove end-of-line character ('$'). */ |
|
if (pattern[fg->patternLen - 1] == '$') { |
|
eol++; |
|
fg->eol = 1; |
|
fg->patternLen--; |
|
boleol = 1; |
|
} |
|
|
|
/* Remove beginning-of-line character ('^'). */ |
|
if (pattern[0] == '^') { |
|
bol++; |
|
fg->bol = 1; |
|
fg->patternLen--; |
|
boleol = 1; |
|
} |
|
|
|
/* |
|
* Copy pattern minus '^' and '$' characters at the beginning and ending of |
|
* the string respectively. |
|
*/ |
|
fg->pattern = grep_strdup(pattern + bol); |
|
|
|
/* Look for ways to cheat...er...avoid the full regex engine. */ |
|
for (i = 0; i < fg->patternLen; i++) |
|
{ |
|
/* Can still cheat? */ |
|
if ((isalnum(fg->pattern[i])) || isspace(fg->pattern[i]) || |
|
(fg->pattern[i] == '_') || (fg->pattern[i] == ',') || |
|
(fg->pattern[i] == '^') || (fg->pattern[i] == '$') || |
|
(fg->pattern[i] == '=') || (fg->pattern[i] == '-') || |
|
(fg->pattern[i] == ':') || (fg->pattern[i] == '/')) { |
|
/* As long as it is good, upper case it for later. */ |
|
if (iflag) |
|
fg->pattern[i] = toupper(fg->pattern[i]); |
|
} else if (fg->pattern[i] == '.') { |
|
hasDot = i; |
|
if (i < fg->patternLen / 2) { |
|
if (firstHalfDot < -1) |
|
/* Closest dot to the beginning */ |
|
firstHalfDot = i; |
|
} else { |
|
/* Closest dot to the end of the pattern. */ |
|
lastHalfDot = i; |
|
if (firstLastHalfDot < 0) |
|
firstLastHalfDot = i; |
|
} |
|
} else { |
|
/* Free memory and let others know this is empty. */ |
|
free(fg->pattern); |
|
fg->pattern = NULL; |
|
return (-1); |
|
} |
|
} |
|
|
|
/* |
|
* Determine if a reverse search would be faster based on the placement |
|
* of the dots. |
|
*/ |
|
if ((!(lflag || cflag)) && ((!(bol || eol)) && |
|
((lastHalfDot) && ((firstHalfDot < 0) || |
|
((fg->patternLen - (lastHalfDot + 1)) < firstHalfDot))))) { |
|
fg->reversedSearch = 1; |
|
hasDot = fg->patternLen - (firstHalfDot < 0 ? |
|
firstLastHalfDot : firstHalfDot) - 1; |
|
grep_revstr(fg->pattern, fg->patternLen); |
|
} |
|
|
|
/* |
|
* Normal Quick Search would require a shift based on the position the |
|
* next character after the comparison is within the pattern. With |
|
* wildcards, the position of the last dot effects the maximum shift |
|
* distance. |
|
* The closer to the end the wild card is the slower the search. A |
|
* reverse version of this algorithm would be useful for wildcards near |
|
* the end of the string. |
|
* |
|
* Examples: |
|
* Pattern Max shift |
|
* ------- --------- |
|
* this 5 |
|
* .his 4 |
|
* t.is 3 |
|
* th.s 2 |
|
* thi. 1 |
|
*/ |
|
|
|
/* Adjust the shift based on location of the last dot ('.'). */ |
|
shiftPatternLen = fg->patternLen - hasDot; |
|
|
|
/* Preprocess pattern. */ |
|
for (i = 0; i <= UCHAR_MAX; i++) |
|
fg->qsBc[i] = shiftPatternLen; |
|
for (i = hasDot + 1; i < fg->patternLen; i++) { |
|
fg->qsBc[fg->pattern[i]] = fg->patternLen - i; |
|
/* |
|
* If case is ignored, make the jump apply to both upper and |
|
* lower cased characters. As the pattern is stored in upper |
|
* case, apply the same to the lower case equivalents. |
|
*/ |
|
if (iflag) |
|
fg->qsBc[tolower(fg->pattern[i])] = fg->patternLen - i; |
|
} |
|
|
|
/* |
|
* Put pattern back to normal after pre-processing to allow for easy |
|
* comparisons later. |
|
*/ |
|
if (fg->reversedSearch) |
|
grep_revstr(fg->pattern, fg->patternLen); |
|
|
|
return (0); |
|
} |
|
|
|
static int grep_search(fastgrep_t *fg, unsigned char *data, int dataLen) |
|
{ |
|
int j; |
|
int rtrnVal = REG_NOMATCH; |
|
|
|
/* No point in going farther if we do not have enough data. */ |
|
if (dataLen < fg->patternLen) |
|
return (rtrnVal); |
|
|
|
/* Only try once at the beginning or ending of the line. */ |
|
if (fg->bol || fg->eol) { |
|
/* Simple text comparison. */ |
|
/* Verify data is >= pattern length before searching on it. */ |
|
if (dataLen >= fg->patternLen) { |
|
/* Determine where in data to start search at. */ |
|
if (fg->eol) |
|
j = dataLen - fg->patternLen; |
|
else |
|
j = 0; |
|
if (!((fg->bol && fg->eol) && (dataLen != fg->patternLen))) |
|
if (grep_cmp(fg->pattern, data + j, fg->patternLen) == -1) |
|
rtrnVal = 0; |
|
} |
|
} else if (fg->reversedSearch) { |
|
/* Quick Search algorithm. */ |
|
j = dataLen; |
|
do { |
|
if (grep_cmp(fg->pattern, data + j - fg->patternLen, |
|
fg->patternLen) == -1) { |
|
rtrnVal = 0; |
|
break; |
|
} |
|
|
|
/* Shift if within bounds, otherwise, we are done. */ |
|
if (j == 0) |
|
break; |
|
else |
|
j -= fg->qsBc[data[j - fg->patternLen - 1]]; |
|
} while (j >= 0); |
|
} else { |
|
/* Quick Search algorithm. */ |
|
j = 0; |
|
do { |
|
if (grep_cmp(fg->pattern, data + j, fg->patternLen) == -1) { |
|
rtrnVal = 0; |
|
break; |
|
} |
|
|
|
/* Shift if within bounds, otherwise, we are done. */ |
|
if (j + fg->patternLen == dataLen) |
|
break; |
|
else |
|
j += fg->qsBc[data[j + fg->patternLen]]; |
|
} while (j <= (dataLen - fg->patternLen)); |
|
} |
|
|
|
return (rtrnVal); |
|
} |
|
|
|
|
void * |
void * |
grep_malloc(size_t size) |
grep_malloc(size_t size) |
{ |
{ |
|
|
if ((ptr = realloc(ptr, size)) == NULL) |
if ((ptr = realloc(ptr, size)) == NULL) |
err(1, "realloc"); |
err(1, "realloc"); |
return ptr; |
return ptr; |
|
} |
|
|
|
unsigned char * |
|
grep_strdup(const char *str) |
|
{ |
|
unsigned char *ptr; |
|
|
|
if ((ptr = (unsigned char *)strdup(str)) == NULL) |
|
err(1, "strdup"); |
|
return ptr; |
|
} |
|
|
|
/* |
|
* Returns: i >= 0 on failure (position that it failed) |
|
* -1 on success |
|
*/ |
|
int |
|
grep_cmp(const unsigned char *pattern, const unsigned char *data, |
|
size_t len) |
|
{ |
|
int i; |
|
|
|
for (i = 0; i < len; i++) { |
|
if (((pattern[i] == data[i]) || (pattern[i] == '.')) || |
|
(iflag && pattern[i] == toupper(data[i]))) |
|
continue; |
|
return (i); |
|
} |
|
|
|
return (-1); |
|
} |
|
|
|
static void |
|
grep_revstr(unsigned char *str, int len) |
|
{ |
|
int i; |
|
char c; |
|
|
|
for (i = 0; i < len / 2; i++) { |
|
c = str[i]; |
|
str[i] = str[len - i - 1]; |
|
str[len - i - 1] = c; |
|
} |
} |
} |
|
|
void |
void |