Annotation of src/usr.bin/grep/grep.c, Revision 1.1
1.1 ! deraadt 1: /* $OpenBSD$ */
! 2:
! 3: /*-
! 4: * Copyright (c) 2000 Carson Harding. All rights reserved.
! 5: * This code was written and contributed to OpenBSD by Carson Harding.
! 6: *
! 7: * Redistribution and use in source and binary forms, with or without
! 8: * modification, are permitted provided that the following conditions
! 9: * are met:
! 10: * 1. Redistributions of source code must retain the above copyright
! 11: * notice, this list of conditions and the following disclaimer.
! 12: * 2. Redistributions in binary form must reproduce the above copyright
! 13: * notice, this list of conditions and the following disclaimer in the
! 14: * documentation and/or other materials provided with the distribution.
! 15: * 3. Neither the name of the author, or the names of contributors may be
! 16: * used to endorse or promote products derived from this software without
! 17: * specific prior written permission.
! 18: *
! 19: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
! 20: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
! 21: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
! 22: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
! 23: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
! 24: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
! 25: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
! 26: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
! 27: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
! 28: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
! 29: * SUCH DAMAGE.
! 30: */
! 31:
! 32: #ifndef lint
! 33: static char rcsid[] = "$OpenBSD$";
! 34: #endif /* not lint */
! 35:
! 36: #include <sys/types.h>
! 37: #include <stdio.h>
! 38: #include <stdlib.h>
! 39: #include <unistd.h>
! 40: #include <regex.h>
! 41: #include <string.h>
! 42: #include <ctype.h>
! 43: #include <sys/param.h>
! 44: #include <fts.h>
! 45: #include <err.h>
! 46:
! 47: extern char *__progname;
! 48:
! 49:
! 50: void usage(void);
! 51: void err_regerror(int r, regex_t *rexp);
! 52: int grep_files(int regexc, regex_t *regexv, char **files);
! 53: int grep_tree(int regexc, regex_t *regexv, char **paths);
! 54: int grep_file(int regexc, regex_t *rexp, char *fname);
! 55: void arg_patt(char *s);
! 56: char *chop_patt(char *s, size_t *len);
! 57: void add_patt(char *s, size_t len);
! 58: void load_patt(char *fname);
! 59: regex_t *regcomp_patt(int pattc, char *pattvp[], int cflags);
! 60:
! 61:
! 62: int f_bytecount; /* -b prepend byte count */
! 63: int f_countonly; /* -c return only count */
! 64: int f_nofname; /* -h do not prepend filenames on multiple */
! 65: int f_fnameonly; /* -l only print file name with match */
! 66: int f_suppress; /* -s suppress error messages; 1/2 -q */
! 67: int f_lineno; /* -n prepend with line numbers */
! 68: int f_quiet; /* -q no output, only status */
! 69: int f_wmatch; /* -w match words */
! 70: int f_xmatch; /* -x match line */
! 71: int f_zerobyte; /* -z NUL character after filename with -l */
! 72: int f_match; /* = REG_MATCH; else = REG_NOMATCH for -v */
! 73: int f_multifile; /* multiple files: prepend file names */
! 74: int f_matchall; /* empty pattern, matches all input */
! 75: int f_error; /* saw error; set exit status */
! 76:
! 77: /* default traversal flags */
! 78: int f_ftsflags = FTS_LOGICAL|FTS_NOCHDIR|FTS_NOSTAT;
! 79:
! 80: int f_debug; /* temporary debugging flag */
! 81:
! 82: #define START_PATT_SZ 8 /* start with room for 8 patterns */
! 83: char **pattv; /* array of patterns from -e and -f */
! 84: int pattc; /* patterns in pattern array */
! 85: int pattn; /* patterns we have seen, including nulls */
! 86:
! 87: int
! 88: main(int argc, char **argv)
! 89: {
! 90: int c;
! 91: int ch;
! 92: int cflags; /* flags to regcomp() */
! 93: int sawfile; /* did we see a pattern file? */
! 94: regex_t *regexv; /* start of array of compiled patterns */
! 95:
! 96: int (*grepf)(int regexc, regex_t *regexv, char **argv);
! 97:
! 98: sawfile = 0;
! 99: cflags = REG_BASIC|REG_NEWLINE;
! 100: grepf = grep_files;
! 101:
! 102: if (*__progname == 'e')
! 103: cflags |= REG_EXTENDED;
! 104: else if (*__progname == 'f')
! 105: cflags |= REG_NOSPEC;
! 106:
! 107: while ((ch = getopt(argc, argv, "DEFRHLPXabce:f:hilnqsvwxz")) != -1) {
! 108: switch(ch) {
! 109: case 'D':
! 110: f_debug = 1;
! 111: break;
! 112: case 'E':
! 113: cflags |= REG_EXTENDED;
! 114: break;
! 115: case 'F':
! 116: cflags |= REG_NOSPEC;
! 117: break;
! 118: case 'H':
! 119: f_ftsflags |= FTS_COMFOLLOW;
! 120: break;
! 121: case 'L':
! 122: f_ftsflags |= FTS_LOGICAL;
! 123: break;
! 124: case 'P':
! 125: f_ftsflags |= FTS_PHYSICAL;
! 126: break;
! 127: case 'R':
! 128: grepf = grep_tree;
! 129: /*
! 130: * If walking the tree we don't know how many files
! 131: * we'll actually find. So assume multiple, if
! 132: * you don't want names, there's always -h ....
! 133: */
! 134: f_multifile = 1;
! 135: break;
! 136: case 'X':
! 137: f_ftsflags |= FTS_XDEV;
! 138: break;
! 139: case 'a':
! 140: /*
! 141: * Silently eat -a; we don't use the default
! 142: * behaviour it toggles off in gnugrep.
! 143: */
! 144: break;
! 145: case 'b':
! 146: f_bytecount = 1;
! 147: break;
! 148: case 'c':
! 149: f_countonly = 1;
! 150: break;
! 151: case 'e':
! 152: arg_patt(optarg);
! 153: break;
! 154: case 'f':
! 155: load_patt(optarg);
! 156: sawfile = 1;
! 157: break;
! 158: case 'h':
! 159: f_nofname = 1;
! 160: break;
! 161: case 'i':
! 162: cflags |= REG_ICASE;
! 163: break;
! 164: case 'l':
! 165: f_fnameonly = 1;
! 166: break;
! 167: case 'n':
! 168: f_lineno = 1;
! 169: break;
! 170: case 'q':
! 171: f_quiet = 1;
! 172: break;
! 173: case 's':
! 174: f_suppress = 1;
! 175: break;
! 176: case 'v':
! 177: f_match = REG_NOMATCH;
! 178: break;
! 179: case 'w':
! 180: f_wmatch = 1;
! 181: break;
! 182: case 'x':
! 183: f_xmatch = 1;
! 184: break;
! 185: case 'z':
! 186: f_zerobyte = 1;
! 187: break;
! 188: default:
! 189: usage();
! 190: break;
! 191: }
! 192: }
! 193:
! 194: if ((cflags & REG_EXTENDED) && (cflags & REG_NOSPEC))
! 195: usage();
! 196:
! 197: /*
! 198: * If we read one or more pattern files, and still
! 199: * didn't end up with any pattern, any pattern file
! 200: * we read was empty. This is different than failing
! 201: * to provide a pattern as an argument, and we fail
! 202: * on this case as if we had searched and found
! 203: * no matches. (At least this is what GNU grep and
! 204: * Solaris's grep do.)
! 205: */
! 206: if (!pattn && !argv[optind]) {
! 207: if (sawfile)
! 208: exit(1);
! 209: else usage();
! 210: }
! 211:
! 212: if (!pattn) {
! 213: arg_patt(argv[optind]);
! 214: optind++;
! 215: }
! 216:
! 217: /* why bother ... just do nothing sooner */
! 218: if (f_matchall && f_match == REG_NOMATCH)
! 219: exit(1);
! 220:
! 221: regexv = regcomp_patt(pattc, pattv, cflags);
! 222:
! 223: if (optind == argc) {
! 224: c = grep_file(pattc, regexv, NULL);
! 225: } else {
! 226: if (argc - optind > 1 && !f_nofname)
! 227: f_multifile = 1;
! 228: c = (*grepf)(pattc, regexv, &argv[optind]);
! 229: }
! 230:
! 231: /* XX ugh */
! 232: if (f_error) {
! 233: if (c && f_quiet)
! 234: exit(0);
! 235: else
! 236: exit(2);
! 237: } else if (c)
! 238: exit(0);
! 239: else
! 240: exit(1);
! 241: }
! 242:
! 243: void
! 244: usage(void)
! 245: {
! 246: fprintf(stderr, "usage: %s [-E|-F] [-abchilnqsvwx] [-RXH[-L|-P]]"
! 247: " {patt | -e patt | -f patt_file} [files]\n",
! 248: __progname);
! 249: exit(2);
! 250: }
! 251:
! 252: /*
! 253: * Patterns as arguments may have embedded newlines.
! 254: * When read from file, these are detected by fgetln();
! 255: * in arguments we have to find and cut out the segments.
! 256: */
! 257: void
! 258: arg_patt(char *s)
! 259: {
! 260: size_t len;
! 261: char *sp;
! 262:
! 263: if (f_debug)
! 264: fprintf(stderr, "arg_patt(\"%s\")\n", s);
! 265:
! 266: len = strlen(s);
! 267: if (!len) { /* got "" on the command-line */
! 268: add_patt(s, len);
! 269: return;
! 270: }
! 271: for (sp = chop_patt(s, &len); sp; sp = chop_patt(NULL, &len)) {
! 272: if (f_debug) {
! 273: fprintf(stderr, "adding pattern \"");
! 274: fwrite(sp, len, 1, stderr);
! 275: fprintf(stderr, "\", length %lu\n",(unsigned long)len);
! 276: if (pattc > 20) {
! 277: fprintf(stderr, "too many, exiting ...\n");
! 278: exit(2);
! 279: }
! 280: }
! 281: add_patt(sp, len);
! 282: }
! 283: }
! 284:
! 285: /*
! 286: * Kind of like strtok; pass char *, then NULL for rest.
! 287: * Call it memtok()... New size gets written into len.
! 288: */
! 289: char *
! 290: chop_patt(char *s, size_t *len)
! 291: {
! 292: char *cp;
! 293: static char *save_s;
! 294: static int save_n;
! 295:
! 296: if (s)
! 297: save_n = *len;
! 298: else
! 299: s = save_s;
! 300:
! 301: if (save_n <= 0) {
! 302: s = save_s = NULL;
! 303: } else if (s) {
! 304: if ((cp = memchr(s, '\n', save_n)) != NULL) {
! 305: *len = cp - s; /* returned segment */
! 306: save_n -= *len;
! 307: save_s = ++cp; /* adjust past newline */
! 308: save_n--;
! 309: } else {
! 310: *len = save_n; /* else return the whole string */
! 311: save_n = 0;
! 312: }
! 313: }
! 314:
! 315: return s;
! 316: }
! 317:
! 318: /*
! 319: * Start with an array for 8 patterns, and double it
! 320: * each time we outgrow it. If pattern is empty (0 length),
! 321: * or if f_matchall is already set, set f_matchall and return.
! 322: * No use adding a pattern if all input is going to match
! 323: * anyhow.
! 324: */
! 325: void
! 326: add_patt(char *s, size_t len)
! 327: {
! 328: char *p;
! 329: static size_t pattmax = START_PATT_SZ;
! 330: static size_t sumlen;
! 331:
! 332: pattn++;
! 333: sumlen += len;
! 334:
! 335: if (!len || f_matchall) {
! 336: f_matchall = 1;
! 337: return;
! 338: }
! 339:
! 340: if (!pattv) {
! 341: pattv = malloc(START_PATT_SZ * sizeof(char *));
! 342: if (!pattv)
! 343: err(2, "malloc");
! 344: pattc = 0;
! 345: } else if (pattc >= pattmax) {
! 346: pattmax *= 2;
! 347: pattv = realloc(pattv, pattmax * sizeof(char *));
! 348: if (!pattv)
! 349: err(2, "realloc");
! 350: }
! 351: p = malloc(len+1);
! 352: if (!p) err(2, "malloc");
! 353: memmove(p, s, len);
! 354: p[len] = '\0';
! 355: pattv[pattc++] = p;
! 356: }
! 357:
! 358: /*
! 359: * Load patterns from file.
! 360: */
! 361: void
! 362: load_patt(char *fname)
! 363: {
! 364: char *buf;
! 365: size_t len;
! 366: FILE *fr;
! 367:
! 368: fr = fopen(fname, "r");
! 369: if (!fr)
! 370: err(2, fname);
! 371: while ((buf = fgetln(fr, &len)) != NULL) {
! 372: if (buf[len-1] == '\n')
! 373: buf[--len] = '\0';
! 374: add_patt(buf, len);
! 375: }
! 376: fclose(fr);
! 377: }
! 378:
! 379: /*
! 380: * Compile the collected pattern strings into an array
! 381: * of regex_t.
! 382: */
! 383: regex_t *
! 384: regcomp_patt(int lpattc, char *lpattv[], int cflags)
! 385: {
! 386: int i;
! 387: int r;
! 388: regex_t *rxv;
! 389:
! 390: if (f_matchall)
! 391: return NULL;
! 392:
! 393: rxv = malloc(sizeof(regex_t) * lpattc);
! 394: if (!rxv)
! 395: err(2, "malloc");
! 396: for (i = 0; i < lpattc; i++) {
! 397: if ((r = regcomp(&rxv[i], lpattv[i], cflags)) != 0)
! 398: err_regerror(r, &rxv[i]);
! 399: }
! 400: return rxv;
! 401: }
! 402:
! 403: /*
! 404: * Print out regcomp error, and exit.
! 405: */
! 406: void
! 407: err_regerror(int r, regex_t *rexp)
! 408: {
! 409: size_t n;
! 410: char *buf;
! 411:
! 412: n = regerror(r, rexp, NULL, 0);
! 413: buf = malloc(n);
! 414: if (!buf)
! 415: err(2, "malloc");
! 416: (void)regerror(r, rexp, buf, n);
! 417: errx(2, "%s", buf);
! 418: }
! 419:
! 420: /*
! 421: * Little wrapper so we can use function pointer above.
! 422: */
! 423: int
! 424: grep_files(int regexc, regex_t *regexv, char **files)
! 425: {
! 426: int c;
! 427: char **fname;
! 428:
! 429: c = 0;
! 430: for (fname = files; *fname; fname++)
! 431: c += grep_file(regexc, regexv, *fname);
! 432:
! 433: return c;
! 434: }
! 435:
! 436: /*
! 437: * Modified from James Howard and Dag-Erling Co?dan Sm?rgrav's grep:
! 438: * add FTS_D to FTS_DP (especially since D was the one being used)
! 439: * pass in regex_t array, and set fts flags above in main().
! 440: */
! 441: int
! 442: grep_tree(int regexc, regex_t *regexv, char **paths)
! 443: {
! 444: int c;
! 445: FTS *fts;
! 446: FTSENT *p;
! 447:
! 448: c = 0;
! 449:
! 450: if (!(fts = fts_open(paths, f_ftsflags, (int (*) ()) NULL)))
! 451: err(2, "fts_open");
! 452: while ((p = fts_read(fts)) != NULL) {
! 453: switch (p->fts_info) {
! 454: case FTS_D:
! 455: case FTS_DP:
! 456: case FTS_DNR:
! 457: break;
! 458: case FTS_ERR:
! 459: errx(2, "%s: %s", p->fts_path, strerror(p->fts_errno));
! 460: break;
! 461: default:
! 462: if (f_debug)
! 463: printf("%s\n", p->fts_path);
! 464: c += grep_file(regexc, regexv, p->fts_path);
! 465: break;
! 466: }
! 467: }
! 468:
! 469: return c;
! 470: }
! 471:
! 472: /*
! 473: * Open and grep the named file. If fname is NULL, read
! 474: * from stdin.
! 475: */
! 476:
! 477: #define isword(x) (isalnum(x) || (x) == '_')
! 478:
! 479: int
! 480: grep_file(int regexc, regex_t *regexv, char *fname)
! 481: {
! 482: int i;
! 483: int c;
! 484: int n;
! 485: int r;
! 486: int match;
! 487: char *buf;
! 488: size_t b;
! 489: size_t len;
! 490: FILE *fr;
! 491: regmatch_t pmatch[1];
! 492: regoff_t so, eo;
! 493:
! 494: b = 0; /* byte count */
! 495: c = 0; /* match count */
! 496: n = 0; /* line count */
! 497:
! 498: if (!fname) {
! 499: fr = stdin;
! 500: fname = "(standard input)";
! 501: } else {
! 502: fr = fopen(fname, "r");
! 503: if (!fr) {
! 504: if (!f_suppress)
! 505: warn("%s", fname);
! 506: f_error = 1;
! 507: return 0;
! 508: }
! 509: }
! 510:
! 511: while ((buf = fgetln(fr, &len)) != NULL) {
! 512: n++;
! 513: if (f_matchall)
! 514: goto printmatch;
! 515: match = 0;
! 516: for (i = 0; i < regexc; i++) {
! 517: pmatch[0].rm_so = 0;
! 518: pmatch[0].rm_eo = len-1;
! 519: r = regexec(®exv[i], buf, 1, pmatch, REG_STARTEND);
! 520: if (r == f_match) {
! 521: /*
! 522: * XX gnu grep allows both -w and -x;
! 523: * XX but seems bizarre. sometimes -w seems
! 524: * XX to override, at other times, not.
! 525: * XX Need to figure that out.
! 526: * XX It seems logical to go with the most
! 527: * XX restrictive argument: -x, as -x is
! 528: * XX a boundary case of -w anyhow.
! 529: */
! 530: if (f_xmatch) {
! 531: if (pmatch[0].rm_so != 0 ||
! 532: pmatch[0].rm_eo != len-1)
! 533: continue;
! 534: } else if (f_wmatch) {
! 535: so = pmatch[0].rm_so;
! 536: eo = pmatch[0].rm_eo;
! 537: if (!((so == 0 || !isword(buf[so-1])) &&
! 538: (eo == len || !isword(buf[eo]))))
! 539: continue;
! 540: }
! 541: match = 1;
! 542: break;
! 543: }
! 544: /* XX test for regexec() errors ?? */
! 545: }
! 546: if (match) {
! 547: printmatch:
! 548: c++;
! 549: if (f_fnameonly || f_quiet)
! 550: break;
! 551: if (f_countonly)
! 552: continue;
! 553: if (f_multifile && !f_nofname)
! 554: printf("%s:", fname);
! 555: if (f_lineno)
! 556: printf("%d:", n);
! 557: if (f_bytecount)
! 558: printf("%lu:", (unsigned long)b);
! 559: fwrite(buf, len, 1, stdout);
! 560: }
! 561: /* save position in stream before next line */
! 562: b += len;
! 563: }
! 564:
! 565: if (!buf && ferror(fr)) {
! 566: warn("%s", fname);
! 567: f_error = 1;
! 568: /*
! 569: * XX or do we spit out what result we did have?
! 570: */
! 571: } else if (!f_quiet) {
! 572: /*
! 573: * XX test -c and -l together: gnu grep
! 574: * XX allows (although ugly), do others?
! 575: */
! 576: if (f_countonly) {
! 577: if (f_multifile)
! 578: printf("%s:", fname);
! 579: printf("%d\n", c);
! 580: }
! 581: if (c && f_fnameonly) {
! 582: fputs(fname, stdout);
! 583: if (f_zerobyte)
! 584: fputc('\0', stdout);
! 585: else
! 586: fputc('\n', stdout);
! 587: }
! 588: }
! 589:
! 590: if (fr != stdin)
! 591: fclose(fr);
! 592:
! 593: return c;
! 594: }
! 595: