[BACK]Return to csplit.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / csplit

Annotation of src/usr.bin/csplit/csplit.c, Revision 1.1

1.1     ! millert     1: /*     $OpenBSD$       */
        !             2: /*     $FreeBSD: src/usr.bin/csplit/csplit.c,v 1.9 2004/03/22 11:15:03 tjr Exp $       */
        !             3:
        !             4: /*-
        !             5:  * Copyright (c) 2002 Tim J. Robbins.
        !             6:  * All rights reserved.
        !             7:  *
        !             8:  * Redistribution and use in source and binary forms, with or without
        !             9:  * modification, are permitted provided that the following conditions
        !            10:  * are met:
        !            11:  * 1. Redistributions of source code must retain the above copyright
        !            12:  *    notice, this list of conditions and the following disclaimer.
        !            13:  * 2. Redistributions in binary form must reproduce the above copyright
        !            14:  *    notice, this list of conditions and the following disclaimer in the
        !            15:  *    documentation and/or other materials provided with the distribution.
        !            16:  *
        !            17:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
        !            18:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
        !            19:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
        !            20:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
        !            21:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
        !            22:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
        !            23:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
        !            24:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
        !            25:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
        !            26:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
        !            27:  * SUCH DAMAGE.
        !            28:  */
        !            29:
        !            30: /*
        !            31:  * csplit -- split files based on context
        !            32:  *
        !            33:  * This utility splits its input into numbered output files by line number
        !            34:  * or by a regular expression. Regular expression matches have an optional
        !            35:  * offset with them, allowing the split to occur a specified number of
        !            36:  * lines before or after the match.
        !            37:  *
        !            38:  * To handle negative offsets, we stop reading when the match occurs and
        !            39:  * store the offset that the file should have been split at, then use
        !            40:  * this output file as input until all the "overflowed" lines have been read.
        !            41:  * The file is then closed and truncated to the correct length.
        !            42:  *
        !            43:  * We assume that the output files can be seeked upon (ie. they cannot be
        !            44:  * symlinks to named pipes or character devices), but make no such
        !            45:  * assumption about the input.
        !            46:  */
        !            47:
        !            48: #include <sys/types.h>
        !            49:
        !            50: #include <ctype.h>
        !            51: #include <err.h>
        !            52: #include <errno.h>
        !            53: #include <limits.h>
        !            54: #include <locale.h>
        !            55: #include <regex.h>
        !            56: #include <signal.h>
        !            57: #include <stdint.h>
        !            58: #include <stdio.h>
        !            59: #include <stdlib.h>
        !            60: #include <string.h>
        !            61: #include <unistd.h>
        !            62:
        !            63: void    cleanup(void);
        !            64: void    do_lineno(const char *);
        !            65: void    do_rexp(const char *);
        !            66: char   *getline(void);
        !            67: void    handlesig(int);
        !            68: FILE   *newfile(void);
        !            69: void    toomuch(FILE *, long);
        !            70: void    usage(void);
        !            71:
        !            72: /*
        !            73:  * Command line options
        !            74:  */
        !            75: const char *prefix;            /* File name prefix */
        !            76: long    sufflen;               /* Number of decimal digits for suffix */
        !            77: int     sflag;                 /* Suppress output of file names */
        !            78: int     kflag;                 /* Keep output if error occurs */
        !            79:
        !            80: /*
        !            81:  * Other miscellaneous globals (XXX too many)
        !            82:  */
        !            83: long    lineno;                /* Current line number in input file */
        !            84: long    reps;                  /* Number of repetitions for this pattern */
        !            85: long    nfiles;                /* Number of files output so far */
        !            86: long    maxfiles;              /* Maximum number of files we can create */
        !            87: char    currfile[PATH_MAX];    /* Current output file */
        !            88: const char *infn;              /* Name of the input file */
        !            89: FILE   *infile;                /* Input file handle */
        !            90: FILE   *overfile;              /* Overflow file for toomuch() */
        !            91: off_t   truncofs;              /* Offset this file should be truncated at */
        !            92: int     doclean;               /* Should cleanup() remove output? */
        !            93:
        !            94: int
        !            95: main(int argc, char *argv[])
        !            96: {
        !            97:        struct sigaction sa;
        !            98:        long i;
        !            99:        int ch;
        !           100:        const char *expr;
        !           101:        char *ep, *p;
        !           102:        FILE *ofp;
        !           103:
        !           104:        setlocale(LC_ALL, "");
        !           105:
        !           106:        kflag = sflag = 0;
        !           107:        prefix = "xx";
        !           108:        sufflen = 2;
        !           109:        while ((ch = getopt(argc, argv, "f:kn:s")) != -1) {
        !           110:                switch (ch) {
        !           111:                case 'f':
        !           112:                        prefix = optarg;
        !           113:                        break;
        !           114:                case 'k':
        !           115:                        kflag = 1;
        !           116:                        break;
        !           117:                case 'n':
        !           118:                        errno = 0;
        !           119:                        sufflen = strtol(optarg, &ep, 10);
        !           120:                        if (sufflen <= 0 || *ep != '\0' || errno != 0)
        !           121:                                errx(1, "%s: bad suffix length", optarg);
        !           122:                        break;
        !           123:                case 's':
        !           124:                        sflag = 1;
        !           125:                        break;
        !           126:                default:
        !           127:                        usage();
        !           128:                        /*NOTREACHED*/
        !           129:                }
        !           130:        }
        !           131:
        !           132:        if (sufflen + strlen(prefix) >= PATH_MAX)
        !           133:                errx(1, "name too long");
        !           134:
        !           135:        argc -= optind;
        !           136:        argv += optind;
        !           137:
        !           138:        if ((infn = *argv++) == NULL)
        !           139:                usage();
        !           140:        if (strcmp(infn, "-") == 0) {
        !           141:                infile = stdin;
        !           142:                infn = "stdin";
        !           143:        } else if ((infile = fopen(infn, "r")) == NULL)
        !           144:                err(1, "%s", infn);
        !           145:
        !           146:        if (!kflag) {
        !           147:                doclean = 1;
        !           148:                atexit(cleanup);
        !           149:                sa.sa_flags = 0;
        !           150:                sa.sa_handler = handlesig;
        !           151:                sigemptyset(&sa.sa_mask);
        !           152:                sigaddset(&sa.sa_mask, SIGHUP);
        !           153:                sigaddset(&sa.sa_mask, SIGINT);
        !           154:                sigaddset(&sa.sa_mask, SIGTERM);
        !           155:                sigaction(SIGHUP, &sa, NULL);
        !           156:                sigaction(SIGINT, &sa, NULL);
        !           157:                sigaction(SIGTERM, &sa, NULL);
        !           158:        }
        !           159:
        !           160:        lineno = 0;
        !           161:        nfiles = 0;
        !           162:        truncofs = 0;
        !           163:        overfile = NULL;
        !           164:
        !           165:        /* Ensure 10^sufflen < LONG_MAX. */
        !           166:        for (maxfiles = 1, i = 0; i < sufflen; i++) {
        !           167:                if (maxfiles > LONG_MAX / 10)
        !           168:                        errx(1, "%ld: suffix too long (limit %ld)",
        !           169:                            sufflen, i);
        !           170:                maxfiles *= 10;
        !           171:        }
        !           172:
        !           173:        /* Create files based on supplied patterns. */
        !           174:        while (nfiles < maxfiles - 1 && (expr = *argv++) != NULL) {
        !           175:                /* Look ahead & see if this pattern has any repetitions. */
        !           176:                if (*argv != NULL && **argv == '{') {
        !           177:                        errno = 0;
        !           178:                        reps = strtol(*argv + 1, &ep, 10);
        !           179:                        if (reps < 0 || *ep != '}' || errno != 0)
        !           180:                                errx(1, "%s: bad repetition count", *argv + 1);
        !           181:                        argv++;
        !           182:                } else
        !           183:                        reps = 0;
        !           184:
        !           185:                if (*expr == '/' || *expr == '%') {
        !           186:                        do {
        !           187:                                do_rexp(expr);
        !           188:                        } while (reps-- != 0 && nfiles < maxfiles - 1);
        !           189:                } else if (isdigit((unsigned char)*expr))
        !           190:                        do_lineno(expr);
        !           191:                else
        !           192:                        errx(1, "%s: unrecognised pattern", expr);
        !           193:        }
        !           194:
        !           195:        /* Copy the rest into a new file. */
        !           196:        if (!feof(infile)) {
        !           197:                ofp = newfile();
        !           198:                while ((p = getline()) != NULL && fputs(p, ofp) == 0)
        !           199:                        ;
        !           200:                if (!sflag)
        !           201:                        printf("%jd\n", (intmax_t)ftello(ofp));
        !           202:                if (fclose(ofp) != 0)
        !           203:                        err(1, "%s", currfile);
        !           204:        }
        !           205:
        !           206:        toomuch(NULL, 0);
        !           207:        doclean = 0;
        !           208:
        !           209:        return (0);
        !           210: }
        !           211:
        !           212: void
        !           213: usage(void)
        !           214: {
        !           215:        extern char *__progname;
        !           216:
        !           217:        fprintf(stderr,
        !           218:            "usage: %s [-ks] [-f prefix] [-n number] file args ...\n",
        !           219:            __progname);
        !           220:        exit(1);
        !           221: }
        !           222:
        !           223: void
        !           224: handlesig(int sig)
        !           225: {
        !           226:        const char msg[] = "csplit: caught signal, cleaning up\n";
        !           227:
        !           228:        write(STDERR_FILENO, msg, sizeof(msg) - 1);
        !           229:        cleanup();
        !           230:        _exit(2);
        !           231: }
        !           232:
        !           233: /* Create a new output file. */
        !           234: FILE *
        !           235: newfile(void)
        !           236: {
        !           237:        FILE *fp;
        !           238:
        !           239:        if ((size_t)snprintf(currfile, sizeof(currfile), "%s%0*ld", prefix,
        !           240:            (int)sufflen, nfiles) >= sizeof(currfile))
        !           241:                errx(1, "%s: %s", currfile, strerror(ENAMETOOLONG));
        !           242:        if ((fp = fopen(currfile, "w+")) == NULL)
        !           243:                err(1, "%s", currfile);
        !           244:        nfiles++;
        !           245:
        !           246:        return (fp);
        !           247: }
        !           248:
        !           249: /* Remove partial output, called before exiting. */
        !           250: void
        !           251: cleanup(void)
        !           252: {
        !           253:        char fnbuf[PATH_MAX];
        !           254:        long i;
        !           255:
        !           256:        if (!doclean)
        !           257:                return;
        !           258:
        !           259:        /*
        !           260:         * NOTE: One cannot portably assume to be able to call snprintf()
        !           261:         * from inside a signal handler. It does, however, appear to be safe
        !           262:         * to do on FreeBSD. The solution to this problem is worse than the
        !           263:         * problem itself.
        !           264:         */
        !           265:
        !           266:        for (i = 0; i < nfiles; i++) {
        !           267:                snprintf(fnbuf, sizeof(fnbuf), "%s%0*ld", prefix,
        !           268:                    (int)sufflen, i);
        !           269:                unlink(fnbuf);
        !           270:        }
        !           271: }
        !           272:
        !           273: /* Read a line from the input into a static buffer. */
        !           274: char *
        !           275: getline(void)
        !           276: {
        !           277:        static char lbuf[LINE_MAX];
        !           278:        FILE *src;
        !           279:
        !           280:        src = overfile != NULL ? overfile : infile;
        !           281:
        !           282: again: if (fgets(lbuf, sizeof(lbuf), src) == NULL) {
        !           283:                if (src == overfile) {
        !           284:                        src = infile;
        !           285:                        goto again;
        !           286:                }
        !           287:                return (NULL);
        !           288:        }
        !           289:        if (ferror(src))
        !           290:                err(1, "%s", infn);
        !           291:        lineno++;
        !           292:
        !           293:        return (lbuf);
        !           294: }
        !           295:
        !           296: /* Conceptually rewind the input (as obtained by getline()) back `n' lines. */
        !           297: void
        !           298: toomuch(FILE *ofp, long n)
        !           299: {
        !           300:        char buf[BUFSIZ];
        !           301:        size_t i, nread;
        !           302:
        !           303:        if (overfile != NULL) {
        !           304:                /*
        !           305:                 * Truncate the previous file we overflowed into back to
        !           306:                 * the correct length, close it.
        !           307:                 */
        !           308:                if (fflush(overfile) != 0)
        !           309:                        err(1, "overflow");
        !           310:                if (ftruncate(fileno(overfile), truncofs) != 0)
        !           311:                        err(1, "overflow");
        !           312:                if (fclose(overfile) != 0)
        !           313:                        err(1, "overflow");
        !           314:                overfile = NULL;
        !           315:        }
        !           316:
        !           317:        if (n == 0)
        !           318:                /* Just tidying up */
        !           319:                return;
        !           320:
        !           321:        lineno -= n;
        !           322:
        !           323:        /*
        !           324:         * Wind the overflow file backwards to `n' lines before the
        !           325:         * current one.
        !           326:         */
        !           327:        do {
        !           328:                if (ftello(ofp) < (off_t)sizeof(buf))
        !           329:                        rewind(ofp);
        !           330:                else
        !           331:                        fseeko(ofp, -(off_t)sizeof(buf), SEEK_CUR);
        !           332:                if (ferror(ofp))
        !           333:                        errx(1, "%s: can't seek", currfile);
        !           334:                if ((nread = fread(buf, 1, sizeof(buf), ofp)) == 0)
        !           335:                        errx(1, "can't read overflowed output");
        !           336:                if (fseeko(ofp, -(off_t)nread, SEEK_CUR) != 0)
        !           337:                        err(1, "%s", currfile);
        !           338:                for (i = 1; i <= nread; i++)
        !           339:                        if (buf[nread - i] == '\n' && n-- == 0)
        !           340:                                break;
        !           341:                if (ftello(ofp) == 0)
        !           342:                        break;
        !           343:        } while (n > 0);
        !           344:        if (fseeko(ofp, (off_t)(nread - i + 1), SEEK_CUR) != 0)
        !           345:                err(1, "%s", currfile);
        !           346:
        !           347:        /*
        !           348:         * getline() will read from here. Next call will truncate to
        !           349:         * truncofs in this file.
        !           350:         */
        !           351:        overfile = ofp;
        !           352:        truncofs = ftello(overfile);
        !           353: }
        !           354:
        !           355: /* Handle splits for /regexp/ and %regexp% patterns. */
        !           356: void
        !           357: do_rexp(const char *expr)
        !           358: {
        !           359:        regex_t cre;
        !           360:        intmax_t nwritten;
        !           361:        long ofs;
        !           362:        int first;
        !           363:        char *ecopy, *ep, *p, *pofs, *re;
        !           364:        FILE *ofp;
        !           365:
        !           366:        if ((ecopy = strdup(expr)) == NULL)
        !           367:                err(1, "strdup");
        !           368:
        !           369:        re = ecopy + 1;
        !           370:        if ((pofs = strrchr(ecopy, *expr)) == NULL || pofs[-1] == '\\')
        !           371:                errx(1, "%s: missing trailing %c", expr, *expr);
        !           372:        *pofs++ = '\0';
        !           373:
        !           374:        if (*pofs != '\0') {
        !           375:                errno = 0;
        !           376:                ofs = strtol(pofs, &ep, 10);
        !           377:                if (*ep != '\0' || errno != 0)
        !           378:                        errx(1, "%s: bad offset", pofs);
        !           379:        } else
        !           380:                ofs = 0;
        !           381:
        !           382:        if (regcomp(&cre, re, REG_BASIC|REG_NOSUB) != 0)
        !           383:                errx(1, "%s: bad regular expression", re);
        !           384:
        !           385:        if (*expr == '/')
        !           386:                /* /regexp/: Save results to a file. */
        !           387:                ofp = newfile();
        !           388:        else {
        !           389:                /* %regexp%: Make a temporary file for overflow. */
        !           390:                if ((ofp = tmpfile()) == NULL)
        !           391:                        err(1, "tmpfile");
        !           392:        }
        !           393:
        !           394:        /* Read and output lines until we get a match. */
        !           395:        first = 1;
        !           396:        while ((p = getline()) != NULL) {
        !           397:                if (fputs(p, ofp) != 0)
        !           398:                        break;
        !           399:                if (!first && regexec(&cre, p, 0, NULL, 0) == 0)
        !           400:                        break;
        !           401:                first = 0;
        !           402:        }
        !           403:
        !           404:        if (p == NULL)
        !           405:                errx(1, "%s: no match", re);
        !           406:
        !           407:        if (ofs <= 0) {
        !           408:                /*
        !           409:                 * Negative (or zero) offset: throw back any lines we should
        !           410:                 * not have read yet.
        !           411:                  */
        !           412:                if (p != NULL) {
        !           413:                        toomuch(ofp, -ofs + 1);
        !           414:                        nwritten = (intmax_t)truncofs;
        !           415:                } else
        !           416:                        nwritten = (intmax_t)ftello(ofp);
        !           417:        } else {
        !           418:                /*
        !           419:                 * Positive offset: copy the requested number of lines
        !           420:                 * after the match.
        !           421:                 */
        !           422:                while (--ofs > 0 && (p = getline()) != NULL)
        !           423:                        fputs(p, ofp);
        !           424:                toomuch(NULL, 0);
        !           425:                nwritten = (intmax_t)ftello(ofp);
        !           426:                if (fclose(ofp) != 0)
        !           427:                        err(1, "%s", currfile);
        !           428:        }
        !           429:
        !           430:        if (!sflag && *expr == '/')
        !           431:                printf("%jd\n", nwritten);
        !           432:
        !           433:        regfree(&cre);
        !           434:        free(ecopy);
        !           435: }
        !           436:
        !           437: /* Handle splits based on line number. */
        !           438: void
        !           439: do_lineno(const char *expr)
        !           440: {
        !           441:        long lastline, tgtline;
        !           442:        char *ep, *p;
        !           443:        FILE *ofp;
        !           444:
        !           445:        errno = 0;
        !           446:        tgtline = strtol(expr, &ep, 10);
        !           447:        if (tgtline <= 0 || errno != 0 || *ep != '\0')
        !           448:                errx(1, "%s: bad line number", expr);
        !           449:        lastline = tgtline;
        !           450:        if (lastline <= lineno)
        !           451:                errx(1, "%s: can't go backwards", expr);
        !           452:
        !           453:        while (nfiles < maxfiles - 1) {
        !           454:                ofp = newfile();
        !           455:                while (lineno + 1 != lastline) {
        !           456:                        if ((p = getline()) == NULL)
        !           457:                                errx(1, "%ld: out of range", lastline);
        !           458:                        if (fputs(p, ofp) != 0)
        !           459:                                break;
        !           460:                }
        !           461:                if (!sflag)
        !           462:                        printf("%jd\n", (intmax_t)ftello(ofp));
        !           463:                if (fclose(ofp) != 0)
        !           464:                        err(1, "%s", currfile);
        !           465:                if (reps-- == 0)
        !           466:                        break;
        !           467:                lastline += tgtline;
        !           468:        }
        !           469: }