[BACK]Return to csplit.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / csplit

Annotation of src/usr.bin/csplit/csplit.c, Revision 1.4

1.4     ! fgsch       1: /*     $OpenBSD: csplit.c,v 1.3 2006/07/17 22:28:11 deraadt Exp $      */
1.1       millert     2: /*     $FreeBSD: src/usr.bin/csplit/csplit.c,v 1.9 2004/03/22 11:15:03 tjr Exp $       */
                      3:
                      4: /*-
                      5:  * Copyright (c) 2002 Tim J. Robbins.
                      6:  * All rights reserved.
                      7:  *
                      8:  * Redistribution and use in source and binary forms, with or without
                      9:  * modification, are permitted provided that the following conditions
                     10:  * are met:
                     11:  * 1. Redistributions of source code must retain the above copyright
                     12:  *    notice, this list of conditions and the following disclaimer.
                     13:  * 2. Redistributions in binary form must reproduce the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer in the
                     15:  *    documentation and/or other materials provided with the distribution.
                     16:  *
                     17:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
                     18:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     19:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     20:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
                     21:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     22:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     23:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     24:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     25:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     26:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     27:  * SUCH DAMAGE.
                     28:  */
                     29:
                     30: /*
                     31:  * csplit -- split files based on context
                     32:  *
                     33:  * This utility splits its input into numbered output files by line number
                     34:  * or by a regular expression. Regular expression matches have an optional
                     35:  * offset with them, allowing the split to occur a specified number of
                     36:  * lines before or after the match.
                     37:  *
                     38:  * To handle negative offsets, we stop reading when the match occurs and
                     39:  * store the offset that the file should have been split at, then use
                     40:  * this output file as input until all the "overflowed" lines have been read.
                     41:  * The file is then closed and truncated to the correct length.
                     42:  *
                     43:  * We assume that the output files can be seeked upon (ie. they cannot be
                     44:  * symlinks to named pipes or character devices), but make no such
                     45:  * assumption about the input.
                     46:  */
                     47:
                     48: #include <sys/types.h>
                     49:
                     50: #include <ctype.h>
                     51: #include <err.h>
                     52: #include <errno.h>
                     53: #include <limits.h>
                     54: #include <locale.h>
                     55: #include <regex.h>
                     56: #include <signal.h>
                     57: #include <stdint.h>
                     58: #include <stdio.h>
                     59: #include <stdlib.h>
                     60: #include <string.h>
                     61: #include <unistd.h>
                     62:
                     63: void    cleanup(void);
                     64: void    do_lineno(const char *);
                     65: void    do_rexp(const char *);
1.4     ! fgsch      66: char   *get_line(void);
1.1       millert    67: void    handlesig(int);
                     68: FILE   *newfile(void);
                     69: void    toomuch(FILE *, long);
                     70: void    usage(void);
                     71:
                     72: /*
                     73:  * Command line options
                     74:  */
                     75: const char *prefix;            /* File name prefix */
                     76: long    sufflen;               /* Number of decimal digits for suffix */
                     77: int     sflag;                 /* Suppress output of file names */
                     78: int     kflag;                 /* Keep output if error occurs */
                     79:
                     80: /*
                     81:  * Other miscellaneous globals (XXX too many)
                     82:  */
                     83: long    lineno;                /* Current line number in input file */
                     84: long    reps;                  /* Number of repetitions for this pattern */
                     85: long    nfiles;                /* Number of files output so far */
                     86: long    maxfiles;              /* Maximum number of files we can create */
                     87: char    currfile[PATH_MAX];    /* Current output file */
                     88: const char *infn;              /* Name of the input file */
                     89: FILE   *infile;                /* Input file handle */
                     90: FILE   *overfile;              /* Overflow file for toomuch() */
                     91: off_t   truncofs;              /* Offset this file should be truncated at */
                     92: int     doclean;               /* Should cleanup() remove output? */
                     93:
                     94: int
                     95: main(int argc, char *argv[])
                     96: {
                     97:        struct sigaction sa;
                     98:        long i;
                     99:        int ch;
                    100:        const char *expr;
                    101:        char *ep, *p;
                    102:        FILE *ofp;
                    103:
                    104:        setlocale(LC_ALL, "");
                    105:
                    106:        kflag = sflag = 0;
                    107:        prefix = "xx";
                    108:        sufflen = 2;
                    109:        while ((ch = getopt(argc, argv, "f:kn:s")) != -1) {
                    110:                switch (ch) {
                    111:                case 'f':
                    112:                        prefix = optarg;
                    113:                        break;
                    114:                case 'k':
                    115:                        kflag = 1;
                    116:                        break;
                    117:                case 'n':
                    118:                        errno = 0;
                    119:                        sufflen = strtol(optarg, &ep, 10);
                    120:                        if (sufflen <= 0 || *ep != '\0' || errno != 0)
                    121:                                errx(1, "%s: bad suffix length", optarg);
                    122:                        break;
                    123:                case 's':
                    124:                        sflag = 1;
                    125:                        break;
                    126:                default:
                    127:                        usage();
                    128:                        /*NOTREACHED*/
                    129:                }
                    130:        }
                    131:
                    132:        if (sufflen + strlen(prefix) >= PATH_MAX)
                    133:                errx(1, "name too long");
                    134:
                    135:        argc -= optind;
                    136:        argv += optind;
                    137:
                    138:        if ((infn = *argv++) == NULL)
                    139:                usage();
                    140:        if (strcmp(infn, "-") == 0) {
                    141:                infile = stdin;
                    142:                infn = "stdin";
                    143:        } else if ((infile = fopen(infn, "r")) == NULL)
                    144:                err(1, "%s", infn);
                    145:
                    146:        if (!kflag) {
                    147:                doclean = 1;
                    148:                atexit(cleanup);
                    149:                sa.sa_flags = 0;
                    150:                sa.sa_handler = handlesig;
                    151:                sigemptyset(&sa.sa_mask);
                    152:                sigaddset(&sa.sa_mask, SIGHUP);
                    153:                sigaddset(&sa.sa_mask, SIGINT);
                    154:                sigaddset(&sa.sa_mask, SIGTERM);
                    155:                sigaction(SIGHUP, &sa, NULL);
                    156:                sigaction(SIGINT, &sa, NULL);
                    157:                sigaction(SIGTERM, &sa, NULL);
                    158:        }
                    159:
                    160:        lineno = 0;
                    161:        nfiles = 0;
                    162:        truncofs = 0;
                    163:        overfile = NULL;
                    164:
                    165:        /* Ensure 10^sufflen < LONG_MAX. */
                    166:        for (maxfiles = 1, i = 0; i < sufflen; i++) {
                    167:                if (maxfiles > LONG_MAX / 10)
                    168:                        errx(1, "%ld: suffix too long (limit %ld)",
                    169:                            sufflen, i);
                    170:                maxfiles *= 10;
                    171:        }
                    172:
                    173:        /* Create files based on supplied patterns. */
                    174:        while (nfiles < maxfiles - 1 && (expr = *argv++) != NULL) {
                    175:                /* Look ahead & see if this pattern has any repetitions. */
                    176:                if (*argv != NULL && **argv == '{') {
                    177:                        errno = 0;
                    178:                        reps = strtol(*argv + 1, &ep, 10);
                    179:                        if (reps < 0 || *ep != '}' || errno != 0)
                    180:                                errx(1, "%s: bad repetition count", *argv + 1);
                    181:                        argv++;
                    182:                } else
                    183:                        reps = 0;
                    184:
                    185:                if (*expr == '/' || *expr == '%') {
                    186:                        do {
                    187:                                do_rexp(expr);
                    188:                        } while (reps-- != 0 && nfiles < maxfiles - 1);
                    189:                } else if (isdigit((unsigned char)*expr))
                    190:                        do_lineno(expr);
                    191:                else
                    192:                        errx(1, "%s: unrecognised pattern", expr);
                    193:        }
                    194:
                    195:        /* Copy the rest into a new file. */
                    196:        if (!feof(infile)) {
                    197:                ofp = newfile();
1.4     ! fgsch     198:                while ((p = get_line()) != NULL && fputs(p, ofp) == 0)
1.1       millert   199:                        ;
                    200:                if (!sflag)
                    201:                        printf("%jd\n", (intmax_t)ftello(ofp));
                    202:                if (fclose(ofp) != 0)
                    203:                        err(1, "%s", currfile);
                    204:        }
                    205:
                    206:        toomuch(NULL, 0);
                    207:        doclean = 0;
                    208:
                    209:        return (0);
                    210: }
                    211:
                    212: void
                    213: usage(void)
                    214: {
                    215:        extern char *__progname;
                    216:
                    217:        fprintf(stderr,
                    218:            "usage: %s [-ks] [-f prefix] [-n number] file args ...\n",
                    219:            __progname);
                    220:        exit(1);
                    221: }
                    222:
1.3       deraadt   223: /* ARGSUSED */
1.1       millert   224: void
                    225: handlesig(int sig)
                    226: {
                    227:        const char msg[] = "csplit: caught signal, cleaning up\n";
                    228:
                    229:        write(STDERR_FILENO, msg, sizeof(msg) - 1);
                    230:        cleanup();
                    231:        _exit(2);
                    232: }
                    233:
                    234: /* Create a new output file. */
                    235: FILE *
                    236: newfile(void)
                    237: {
                    238:        FILE *fp;
                    239:
                    240:        if ((size_t)snprintf(currfile, sizeof(currfile), "%s%0*ld", prefix,
                    241:            (int)sufflen, nfiles) >= sizeof(currfile))
                    242:                errx(1, "%s: %s", currfile, strerror(ENAMETOOLONG));
                    243:        if ((fp = fopen(currfile, "w+")) == NULL)
                    244:                err(1, "%s", currfile);
                    245:        nfiles++;
                    246:
                    247:        return (fp);
                    248: }
                    249:
                    250: /* Remove partial output, called before exiting. */
                    251: void
                    252: cleanup(void)
                    253: {
                    254:        char fnbuf[PATH_MAX];
                    255:        long i;
                    256:
                    257:        if (!doclean)
                    258:                return;
                    259:
                    260:        /*
1.2       millert   261:         * NOTE: One cannot portably assume to be able to call snprintf() from
                    262:         * inside a signal handler.  It is, however, safe to do on OpenBSD.
1.1       millert   263:         */
                    264:        for (i = 0; i < nfiles; i++) {
                    265:                snprintf(fnbuf, sizeof(fnbuf), "%s%0*ld", prefix,
                    266:                    (int)sufflen, i);
                    267:                unlink(fnbuf);
                    268:        }
                    269: }
                    270:
                    271: /* Read a line from the input into a static buffer. */
                    272: char *
1.4     ! fgsch     273: get_line(void)
1.1       millert   274: {
                    275:        static char lbuf[LINE_MAX];
                    276:        FILE *src;
                    277:
                    278:        src = overfile != NULL ? overfile : infile;
                    279:
                    280: again: if (fgets(lbuf, sizeof(lbuf), src) == NULL) {
                    281:                if (src == overfile) {
                    282:                        src = infile;
                    283:                        goto again;
                    284:                }
                    285:                return (NULL);
                    286:        }
                    287:        if (ferror(src))
                    288:                err(1, "%s", infn);
                    289:        lineno++;
                    290:
                    291:        return (lbuf);
                    292: }
                    293:
1.4     ! fgsch     294: /* Conceptually rewind the input (as obtained by get_line()) back `n' lines. */
1.1       millert   295: void
                    296: toomuch(FILE *ofp, long n)
                    297: {
                    298:        char buf[BUFSIZ];
                    299:        size_t i, nread;
                    300:
                    301:        if (overfile != NULL) {
                    302:                /*
                    303:                 * Truncate the previous file we overflowed into back to
                    304:                 * the correct length, close it.
                    305:                 */
                    306:                if (fflush(overfile) != 0)
                    307:                        err(1, "overflow");
                    308:                if (ftruncate(fileno(overfile), truncofs) != 0)
                    309:                        err(1, "overflow");
                    310:                if (fclose(overfile) != 0)
                    311:                        err(1, "overflow");
                    312:                overfile = NULL;
                    313:        }
                    314:
                    315:        if (n == 0)
                    316:                /* Just tidying up */
                    317:                return;
                    318:
                    319:        lineno -= n;
                    320:
                    321:        /*
                    322:         * Wind the overflow file backwards to `n' lines before the
                    323:         * current one.
                    324:         */
                    325:        do {
                    326:                if (ftello(ofp) < (off_t)sizeof(buf))
                    327:                        rewind(ofp);
                    328:                else
                    329:                        fseeko(ofp, -(off_t)sizeof(buf), SEEK_CUR);
                    330:                if (ferror(ofp))
                    331:                        errx(1, "%s: can't seek", currfile);
                    332:                if ((nread = fread(buf, 1, sizeof(buf), ofp)) == 0)
                    333:                        errx(1, "can't read overflowed output");
                    334:                if (fseeko(ofp, -(off_t)nread, SEEK_CUR) != 0)
                    335:                        err(1, "%s", currfile);
                    336:                for (i = 1; i <= nread; i++)
                    337:                        if (buf[nread - i] == '\n' && n-- == 0)
                    338:                                break;
                    339:                if (ftello(ofp) == 0)
                    340:                        break;
                    341:        } while (n > 0);
                    342:        if (fseeko(ofp, (off_t)(nread - i + 1), SEEK_CUR) != 0)
                    343:                err(1, "%s", currfile);
                    344:
                    345:        /*
1.4     ! fgsch     346:         * get_line() will read from here. Next call will truncate to
1.1       millert   347:         * truncofs in this file.
                    348:         */
                    349:        overfile = ofp;
                    350:        truncofs = ftello(overfile);
                    351: }
                    352:
                    353: /* Handle splits for /regexp/ and %regexp% patterns. */
                    354: void
                    355: do_rexp(const char *expr)
                    356: {
                    357:        regex_t cre;
                    358:        intmax_t nwritten;
                    359:        long ofs;
                    360:        int first;
                    361:        char *ecopy, *ep, *p, *pofs, *re;
                    362:        FILE *ofp;
                    363:
                    364:        if ((ecopy = strdup(expr)) == NULL)
                    365:                err(1, "strdup");
                    366:
                    367:        re = ecopy + 1;
                    368:        if ((pofs = strrchr(ecopy, *expr)) == NULL || pofs[-1] == '\\')
                    369:                errx(1, "%s: missing trailing %c", expr, *expr);
                    370:        *pofs++ = '\0';
                    371:
                    372:        if (*pofs != '\0') {
                    373:                errno = 0;
                    374:                ofs = strtol(pofs, &ep, 10);
                    375:                if (*ep != '\0' || errno != 0)
                    376:                        errx(1, "%s: bad offset", pofs);
                    377:        } else
                    378:                ofs = 0;
                    379:
                    380:        if (regcomp(&cre, re, REG_BASIC|REG_NOSUB) != 0)
                    381:                errx(1, "%s: bad regular expression", re);
                    382:
                    383:        if (*expr == '/')
                    384:                /* /regexp/: Save results to a file. */
                    385:                ofp = newfile();
                    386:        else {
                    387:                /* %regexp%: Make a temporary file for overflow. */
                    388:                if ((ofp = tmpfile()) == NULL)
                    389:                        err(1, "tmpfile");
                    390:        }
                    391:
                    392:        /* Read and output lines until we get a match. */
                    393:        first = 1;
1.4     ! fgsch     394:        while ((p = get_line()) != NULL) {
1.1       millert   395:                if (fputs(p, ofp) != 0)
                    396:                        break;
                    397:                if (!first && regexec(&cre, p, 0, NULL, 0) == 0)
                    398:                        break;
                    399:                first = 0;
                    400:        }
                    401:
                    402:        if (p == NULL)
                    403:                errx(1, "%s: no match", re);
                    404:
                    405:        if (ofs <= 0) {
                    406:                /*
                    407:                 * Negative (or zero) offset: throw back any lines we should
                    408:                 * not have read yet.
                    409:                  */
                    410:                if (p != NULL) {
                    411:                        toomuch(ofp, -ofs + 1);
                    412:                        nwritten = (intmax_t)truncofs;
                    413:                } else
                    414:                        nwritten = (intmax_t)ftello(ofp);
                    415:        } else {
                    416:                /*
                    417:                 * Positive offset: copy the requested number of lines
                    418:                 * after the match.
                    419:                 */
1.4     ! fgsch     420:                while (--ofs > 0 && (p = get_line()) != NULL)
1.1       millert   421:                        fputs(p, ofp);
                    422:                toomuch(NULL, 0);
                    423:                nwritten = (intmax_t)ftello(ofp);
                    424:                if (fclose(ofp) != 0)
                    425:                        err(1, "%s", currfile);
                    426:        }
                    427:
                    428:        if (!sflag && *expr == '/')
                    429:                printf("%jd\n", nwritten);
                    430:
                    431:        regfree(&cre);
                    432:        free(ecopy);
                    433: }
                    434:
                    435: /* Handle splits based on line number. */
                    436: void
                    437: do_lineno(const char *expr)
                    438: {
                    439:        long lastline, tgtline;
                    440:        char *ep, *p;
                    441:        FILE *ofp;
                    442:
                    443:        errno = 0;
                    444:        tgtline = strtol(expr, &ep, 10);
                    445:        if (tgtline <= 0 || errno != 0 || *ep != '\0')
                    446:                errx(1, "%s: bad line number", expr);
                    447:        lastline = tgtline;
                    448:        if (lastline <= lineno)
                    449:                errx(1, "%s: can't go backwards", expr);
                    450:
                    451:        while (nfiles < maxfiles - 1) {
                    452:                ofp = newfile();
                    453:                while (lineno + 1 != lastline) {
1.4     ! fgsch     454:                        if ((p = get_line()) == NULL)
1.1       millert   455:                                errx(1, "%ld: out of range", lastline);
                    456:                        if (fputs(p, ofp) != 0)
                    457:                                break;
                    458:                }
                    459:                if (!sflag)
                    460:                        printf("%jd\n", (intmax_t)ftello(ofp));
                    461:                if (fclose(ofp) != 0)
                    462:                        err(1, "%s", currfile);
                    463:                if (reps-- == 0)
                    464:                        break;
                    465:                lastline += tgtline;
                    466:        }
                    467: }