Annotation of src/usr.bin/csplit/csplit.c, Revision 1.1
1.1 ! millert 1: /* $OpenBSD$ */
! 2: /* $FreeBSD: src/usr.bin/csplit/csplit.c,v 1.9 2004/03/22 11:15:03 tjr Exp $ */
! 3:
! 4: /*-
! 5: * Copyright (c) 2002 Tim J. Robbins.
! 6: * All rights reserved.
! 7: *
! 8: * Redistribution and use in source and binary forms, with or without
! 9: * modification, are permitted provided that the following conditions
! 10: * are met:
! 11: * 1. Redistributions of source code must retain the above copyright
! 12: * notice, this list of conditions and the following disclaimer.
! 13: * 2. Redistributions in binary form must reproduce the above copyright
! 14: * notice, this list of conditions and the following disclaimer in the
! 15: * documentation and/or other materials provided with the distribution.
! 16: *
! 17: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
! 18: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
! 19: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
! 20: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
! 21: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
! 22: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
! 23: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
! 24: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
! 25: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
! 26: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
! 27: * SUCH DAMAGE.
! 28: */
! 29:
! 30: /*
! 31: * csplit -- split files based on context
! 32: *
! 33: * This utility splits its input into numbered output files by line number
! 34: * or by a regular expression. Regular expression matches have an optional
! 35: * offset with them, allowing the split to occur a specified number of
! 36: * lines before or after the match.
! 37: *
! 38: * To handle negative offsets, we stop reading when the match occurs and
! 39: * store the offset that the file should have been split at, then use
! 40: * this output file as input until all the "overflowed" lines have been read.
! 41: * The file is then closed and truncated to the correct length.
! 42: *
! 43: * We assume that the output files can be seeked upon (ie. they cannot be
! 44: * symlinks to named pipes or character devices), but make no such
! 45: * assumption about the input.
! 46: */
! 47:
! 48: #include <sys/types.h>
! 49:
! 50: #include <ctype.h>
! 51: #include <err.h>
! 52: #include <errno.h>
! 53: #include <limits.h>
! 54: #include <locale.h>
! 55: #include <regex.h>
! 56: #include <signal.h>
! 57: #include <stdint.h>
! 58: #include <stdio.h>
! 59: #include <stdlib.h>
! 60: #include <string.h>
! 61: #include <unistd.h>
! 62:
! 63: void cleanup(void);
! 64: void do_lineno(const char *);
! 65: void do_rexp(const char *);
! 66: char *getline(void);
! 67: void handlesig(int);
! 68: FILE *newfile(void);
! 69: void toomuch(FILE *, long);
! 70: void usage(void);
! 71:
! 72: /*
! 73: * Command line options
! 74: */
! 75: const char *prefix; /* File name prefix */
! 76: long sufflen; /* Number of decimal digits for suffix */
! 77: int sflag; /* Suppress output of file names */
! 78: int kflag; /* Keep output if error occurs */
! 79:
! 80: /*
! 81: * Other miscellaneous globals (XXX too many)
! 82: */
! 83: long lineno; /* Current line number in input file */
! 84: long reps; /* Number of repetitions for this pattern */
! 85: long nfiles; /* Number of files output so far */
! 86: long maxfiles; /* Maximum number of files we can create */
! 87: char currfile[PATH_MAX]; /* Current output file */
! 88: const char *infn; /* Name of the input file */
! 89: FILE *infile; /* Input file handle */
! 90: FILE *overfile; /* Overflow file for toomuch() */
! 91: off_t truncofs; /* Offset this file should be truncated at */
! 92: int doclean; /* Should cleanup() remove output? */
! 93:
! 94: int
! 95: main(int argc, char *argv[])
! 96: {
! 97: struct sigaction sa;
! 98: long i;
! 99: int ch;
! 100: const char *expr;
! 101: char *ep, *p;
! 102: FILE *ofp;
! 103:
! 104: setlocale(LC_ALL, "");
! 105:
! 106: kflag = sflag = 0;
! 107: prefix = "xx";
! 108: sufflen = 2;
! 109: while ((ch = getopt(argc, argv, "f:kn:s")) != -1) {
! 110: switch (ch) {
! 111: case 'f':
! 112: prefix = optarg;
! 113: break;
! 114: case 'k':
! 115: kflag = 1;
! 116: break;
! 117: case 'n':
! 118: errno = 0;
! 119: sufflen = strtol(optarg, &ep, 10);
! 120: if (sufflen <= 0 || *ep != '\0' || errno != 0)
! 121: errx(1, "%s: bad suffix length", optarg);
! 122: break;
! 123: case 's':
! 124: sflag = 1;
! 125: break;
! 126: default:
! 127: usage();
! 128: /*NOTREACHED*/
! 129: }
! 130: }
! 131:
! 132: if (sufflen + strlen(prefix) >= PATH_MAX)
! 133: errx(1, "name too long");
! 134:
! 135: argc -= optind;
! 136: argv += optind;
! 137:
! 138: if ((infn = *argv++) == NULL)
! 139: usage();
! 140: if (strcmp(infn, "-") == 0) {
! 141: infile = stdin;
! 142: infn = "stdin";
! 143: } else if ((infile = fopen(infn, "r")) == NULL)
! 144: err(1, "%s", infn);
! 145:
! 146: if (!kflag) {
! 147: doclean = 1;
! 148: atexit(cleanup);
! 149: sa.sa_flags = 0;
! 150: sa.sa_handler = handlesig;
! 151: sigemptyset(&sa.sa_mask);
! 152: sigaddset(&sa.sa_mask, SIGHUP);
! 153: sigaddset(&sa.sa_mask, SIGINT);
! 154: sigaddset(&sa.sa_mask, SIGTERM);
! 155: sigaction(SIGHUP, &sa, NULL);
! 156: sigaction(SIGINT, &sa, NULL);
! 157: sigaction(SIGTERM, &sa, NULL);
! 158: }
! 159:
! 160: lineno = 0;
! 161: nfiles = 0;
! 162: truncofs = 0;
! 163: overfile = NULL;
! 164:
! 165: /* Ensure 10^sufflen < LONG_MAX. */
! 166: for (maxfiles = 1, i = 0; i < sufflen; i++) {
! 167: if (maxfiles > LONG_MAX / 10)
! 168: errx(1, "%ld: suffix too long (limit %ld)",
! 169: sufflen, i);
! 170: maxfiles *= 10;
! 171: }
! 172:
! 173: /* Create files based on supplied patterns. */
! 174: while (nfiles < maxfiles - 1 && (expr = *argv++) != NULL) {
! 175: /* Look ahead & see if this pattern has any repetitions. */
! 176: if (*argv != NULL && **argv == '{') {
! 177: errno = 0;
! 178: reps = strtol(*argv + 1, &ep, 10);
! 179: if (reps < 0 || *ep != '}' || errno != 0)
! 180: errx(1, "%s: bad repetition count", *argv + 1);
! 181: argv++;
! 182: } else
! 183: reps = 0;
! 184:
! 185: if (*expr == '/' || *expr == '%') {
! 186: do {
! 187: do_rexp(expr);
! 188: } while (reps-- != 0 && nfiles < maxfiles - 1);
! 189: } else if (isdigit((unsigned char)*expr))
! 190: do_lineno(expr);
! 191: else
! 192: errx(1, "%s: unrecognised pattern", expr);
! 193: }
! 194:
! 195: /* Copy the rest into a new file. */
! 196: if (!feof(infile)) {
! 197: ofp = newfile();
! 198: while ((p = getline()) != NULL && fputs(p, ofp) == 0)
! 199: ;
! 200: if (!sflag)
! 201: printf("%jd\n", (intmax_t)ftello(ofp));
! 202: if (fclose(ofp) != 0)
! 203: err(1, "%s", currfile);
! 204: }
! 205:
! 206: toomuch(NULL, 0);
! 207: doclean = 0;
! 208:
! 209: return (0);
! 210: }
! 211:
! 212: void
! 213: usage(void)
! 214: {
! 215: extern char *__progname;
! 216:
! 217: fprintf(stderr,
! 218: "usage: %s [-ks] [-f prefix] [-n number] file args ...\n",
! 219: __progname);
! 220: exit(1);
! 221: }
! 222:
! 223: void
! 224: handlesig(int sig)
! 225: {
! 226: const char msg[] = "csplit: caught signal, cleaning up\n";
! 227:
! 228: write(STDERR_FILENO, msg, sizeof(msg) - 1);
! 229: cleanup();
! 230: _exit(2);
! 231: }
! 232:
! 233: /* Create a new output file. */
! 234: FILE *
! 235: newfile(void)
! 236: {
! 237: FILE *fp;
! 238:
! 239: if ((size_t)snprintf(currfile, sizeof(currfile), "%s%0*ld", prefix,
! 240: (int)sufflen, nfiles) >= sizeof(currfile))
! 241: errx(1, "%s: %s", currfile, strerror(ENAMETOOLONG));
! 242: if ((fp = fopen(currfile, "w+")) == NULL)
! 243: err(1, "%s", currfile);
! 244: nfiles++;
! 245:
! 246: return (fp);
! 247: }
! 248:
! 249: /* Remove partial output, called before exiting. */
! 250: void
! 251: cleanup(void)
! 252: {
! 253: char fnbuf[PATH_MAX];
! 254: long i;
! 255:
! 256: if (!doclean)
! 257: return;
! 258:
! 259: /*
! 260: * NOTE: One cannot portably assume to be able to call snprintf()
! 261: * from inside a signal handler. It does, however, appear to be safe
! 262: * to do on FreeBSD. The solution to this problem is worse than the
! 263: * problem itself.
! 264: */
! 265:
! 266: for (i = 0; i < nfiles; i++) {
! 267: snprintf(fnbuf, sizeof(fnbuf), "%s%0*ld", prefix,
! 268: (int)sufflen, i);
! 269: unlink(fnbuf);
! 270: }
! 271: }
! 272:
! 273: /* Read a line from the input into a static buffer. */
! 274: char *
! 275: getline(void)
! 276: {
! 277: static char lbuf[LINE_MAX];
! 278: FILE *src;
! 279:
! 280: src = overfile != NULL ? overfile : infile;
! 281:
! 282: again: if (fgets(lbuf, sizeof(lbuf), src) == NULL) {
! 283: if (src == overfile) {
! 284: src = infile;
! 285: goto again;
! 286: }
! 287: return (NULL);
! 288: }
! 289: if (ferror(src))
! 290: err(1, "%s", infn);
! 291: lineno++;
! 292:
! 293: return (lbuf);
! 294: }
! 295:
! 296: /* Conceptually rewind the input (as obtained by getline()) back `n' lines. */
! 297: void
! 298: toomuch(FILE *ofp, long n)
! 299: {
! 300: char buf[BUFSIZ];
! 301: size_t i, nread;
! 302:
! 303: if (overfile != NULL) {
! 304: /*
! 305: * Truncate the previous file we overflowed into back to
! 306: * the correct length, close it.
! 307: */
! 308: if (fflush(overfile) != 0)
! 309: err(1, "overflow");
! 310: if (ftruncate(fileno(overfile), truncofs) != 0)
! 311: err(1, "overflow");
! 312: if (fclose(overfile) != 0)
! 313: err(1, "overflow");
! 314: overfile = NULL;
! 315: }
! 316:
! 317: if (n == 0)
! 318: /* Just tidying up */
! 319: return;
! 320:
! 321: lineno -= n;
! 322:
! 323: /*
! 324: * Wind the overflow file backwards to `n' lines before the
! 325: * current one.
! 326: */
! 327: do {
! 328: if (ftello(ofp) < (off_t)sizeof(buf))
! 329: rewind(ofp);
! 330: else
! 331: fseeko(ofp, -(off_t)sizeof(buf), SEEK_CUR);
! 332: if (ferror(ofp))
! 333: errx(1, "%s: can't seek", currfile);
! 334: if ((nread = fread(buf, 1, sizeof(buf), ofp)) == 0)
! 335: errx(1, "can't read overflowed output");
! 336: if (fseeko(ofp, -(off_t)nread, SEEK_CUR) != 0)
! 337: err(1, "%s", currfile);
! 338: for (i = 1; i <= nread; i++)
! 339: if (buf[nread - i] == '\n' && n-- == 0)
! 340: break;
! 341: if (ftello(ofp) == 0)
! 342: break;
! 343: } while (n > 0);
! 344: if (fseeko(ofp, (off_t)(nread - i + 1), SEEK_CUR) != 0)
! 345: err(1, "%s", currfile);
! 346:
! 347: /*
! 348: * getline() will read from here. Next call will truncate to
! 349: * truncofs in this file.
! 350: */
! 351: overfile = ofp;
! 352: truncofs = ftello(overfile);
! 353: }
! 354:
! 355: /* Handle splits for /regexp/ and %regexp% patterns. */
! 356: void
! 357: do_rexp(const char *expr)
! 358: {
! 359: regex_t cre;
! 360: intmax_t nwritten;
! 361: long ofs;
! 362: int first;
! 363: char *ecopy, *ep, *p, *pofs, *re;
! 364: FILE *ofp;
! 365:
! 366: if ((ecopy = strdup(expr)) == NULL)
! 367: err(1, "strdup");
! 368:
! 369: re = ecopy + 1;
! 370: if ((pofs = strrchr(ecopy, *expr)) == NULL || pofs[-1] == '\\')
! 371: errx(1, "%s: missing trailing %c", expr, *expr);
! 372: *pofs++ = '\0';
! 373:
! 374: if (*pofs != '\0') {
! 375: errno = 0;
! 376: ofs = strtol(pofs, &ep, 10);
! 377: if (*ep != '\0' || errno != 0)
! 378: errx(1, "%s: bad offset", pofs);
! 379: } else
! 380: ofs = 0;
! 381:
! 382: if (regcomp(&cre, re, REG_BASIC|REG_NOSUB) != 0)
! 383: errx(1, "%s: bad regular expression", re);
! 384:
! 385: if (*expr == '/')
! 386: /* /regexp/: Save results to a file. */
! 387: ofp = newfile();
! 388: else {
! 389: /* %regexp%: Make a temporary file for overflow. */
! 390: if ((ofp = tmpfile()) == NULL)
! 391: err(1, "tmpfile");
! 392: }
! 393:
! 394: /* Read and output lines until we get a match. */
! 395: first = 1;
! 396: while ((p = getline()) != NULL) {
! 397: if (fputs(p, ofp) != 0)
! 398: break;
! 399: if (!first && regexec(&cre, p, 0, NULL, 0) == 0)
! 400: break;
! 401: first = 0;
! 402: }
! 403:
! 404: if (p == NULL)
! 405: errx(1, "%s: no match", re);
! 406:
! 407: if (ofs <= 0) {
! 408: /*
! 409: * Negative (or zero) offset: throw back any lines we should
! 410: * not have read yet.
! 411: */
! 412: if (p != NULL) {
! 413: toomuch(ofp, -ofs + 1);
! 414: nwritten = (intmax_t)truncofs;
! 415: } else
! 416: nwritten = (intmax_t)ftello(ofp);
! 417: } else {
! 418: /*
! 419: * Positive offset: copy the requested number of lines
! 420: * after the match.
! 421: */
! 422: while (--ofs > 0 && (p = getline()) != NULL)
! 423: fputs(p, ofp);
! 424: toomuch(NULL, 0);
! 425: nwritten = (intmax_t)ftello(ofp);
! 426: if (fclose(ofp) != 0)
! 427: err(1, "%s", currfile);
! 428: }
! 429:
! 430: if (!sflag && *expr == '/')
! 431: printf("%jd\n", nwritten);
! 432:
! 433: regfree(&cre);
! 434: free(ecopy);
! 435: }
! 436:
! 437: /* Handle splits based on line number. */
! 438: void
! 439: do_lineno(const char *expr)
! 440: {
! 441: long lastline, tgtline;
! 442: char *ep, *p;
! 443: FILE *ofp;
! 444:
! 445: errno = 0;
! 446: tgtline = strtol(expr, &ep, 10);
! 447: if (tgtline <= 0 || errno != 0 || *ep != '\0')
! 448: errx(1, "%s: bad line number", expr);
! 449: lastline = tgtline;
! 450: if (lastline <= lineno)
! 451: errx(1, "%s: can't go backwards", expr);
! 452:
! 453: while (nfiles < maxfiles - 1) {
! 454: ofp = newfile();
! 455: while (lineno + 1 != lastline) {
! 456: if ((p = getline()) == NULL)
! 457: errx(1, "%ld: out of range", lastline);
! 458: if (fputs(p, ofp) != 0)
! 459: break;
! 460: }
! 461: if (!sflag)
! 462: printf("%jd\n", (intmax_t)ftello(ofp));
! 463: if (fclose(ofp) != 0)
! 464: err(1, "%s", currfile);
! 465: if (reps-- == 0)
! 466: break;
! 467: lastline += tgtline;
! 468: }
! 469: }