[BACK]Return to csplit.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / csplit

Annotation of src/usr.bin/csplit/csplit.c, Revision 1.8

1.8     ! semarie     1: /*     $OpenBSD: csplit.c,v 1.7 2015/10/09 01:37:07 deraadt Exp $      */
1.1       millert     2: /*     $FreeBSD: src/usr.bin/csplit/csplit.c,v 1.9 2004/03/22 11:15:03 tjr Exp $       */
                      3:
                      4: /*-
                      5:  * Copyright (c) 2002 Tim J. Robbins.
                      6:  * All rights reserved.
                      7:  *
                      8:  * Redistribution and use in source and binary forms, with or without
                      9:  * modification, are permitted provided that the following conditions
                     10:  * are met:
                     11:  * 1. Redistributions of source code must retain the above copyright
                     12:  *    notice, this list of conditions and the following disclaimer.
                     13:  * 2. Redistributions in binary form must reproduce the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer in the
                     15:  *    documentation and/or other materials provided with the distribution.
                     16:  *
                     17:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
                     18:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     19:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     20:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
                     21:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     22:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     23:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     24:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     25:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     26:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     27:  * SUCH DAMAGE.
                     28:  */
                     29:
                     30: /*
                     31:  * csplit -- split files based on context
                     32:  *
                     33:  * This utility splits its input into numbered output files by line number
                     34:  * or by a regular expression. Regular expression matches have an optional
                     35:  * offset with them, allowing the split to occur a specified number of
                     36:  * lines before or after the match.
                     37:  *
                     38:  * To handle negative offsets, we stop reading when the match occurs and
                     39:  * store the offset that the file should have been split at, then use
                     40:  * this output file as input until all the "overflowed" lines have been read.
                     41:  * The file is then closed and truncated to the correct length.
                     42:  *
                     43:  * We assume that the output files can be seeked upon (ie. they cannot be
                     44:  * symlinks to named pipes or character devices), but make no such
                     45:  * assumption about the input.
                     46:  */
                     47:
                     48: #include <sys/types.h>
                     49:
                     50: #include <ctype.h>
                     51: #include <err.h>
                     52: #include <errno.h>
                     53: #include <limits.h>
                     54: #include <locale.h>
                     55: #include <regex.h>
                     56: #include <signal.h>
                     57: #include <stdint.h>
                     58: #include <stdio.h>
                     59: #include <stdlib.h>
                     60: #include <string.h>
                     61: #include <unistd.h>
                     62:
                     63: void    cleanup(void);
                     64: void    do_lineno(const char *);
                     65: void    do_rexp(const char *);
1.4       fgsch      66: char   *get_line(void);
1.1       millert    67: void    handlesig(int);
                     68: FILE   *newfile(void);
                     69: void    toomuch(FILE *, long);
                     70: void    usage(void);
                     71:
                     72: /*
                     73:  * Command line options
                     74:  */
                     75: const char *prefix;            /* File name prefix */
                     76: long    sufflen;               /* Number of decimal digits for suffix */
                     77: int     sflag;                 /* Suppress output of file names */
                     78: int     kflag;                 /* Keep output if error occurs */
                     79:
                     80: /*
                     81:  * Other miscellaneous globals (XXX too many)
                     82:  */
                     83: long    lineno;                /* Current line number in input file */
                     84: long    reps;                  /* Number of repetitions for this pattern */
                     85: long    nfiles;                /* Number of files output so far */
                     86: long    maxfiles;              /* Maximum number of files we can create */
                     87: char    currfile[PATH_MAX];    /* Current output file */
                     88: const char *infn;              /* Name of the input file */
                     89: FILE   *infile;                /* Input file handle */
                     90: FILE   *overfile;              /* Overflow file for toomuch() */
                     91: off_t   truncofs;              /* Offset this file should be truncated at */
                     92: int     doclean;               /* Should cleanup() remove output? */
                     93:
                     94: int
                     95: main(int argc, char *argv[])
                     96: {
                     97:        struct sigaction sa;
                     98:        long i;
                     99:        int ch;
                    100:        const char *expr;
                    101:        char *ep, *p;
                    102:        FILE *ofp;
                    103:
                    104:        setlocale(LC_ALL, "");
                    105:
1.7       deraadt   106:        if (pledge("stdio rpath wpath cpath", NULL) == -1)
                    107:                err(1, "pledge");
1.6       deraadt   108:
1.1       millert   109:        kflag = sflag = 0;
                    110:        prefix = "xx";
                    111:        sufflen = 2;
                    112:        while ((ch = getopt(argc, argv, "f:kn:s")) != -1) {
                    113:                switch (ch) {
                    114:                case 'f':
                    115:                        prefix = optarg;
                    116:                        break;
                    117:                case 'k':
                    118:                        kflag = 1;
                    119:                        break;
                    120:                case 'n':
                    121:                        errno = 0;
                    122:                        sufflen = strtol(optarg, &ep, 10);
                    123:                        if (sufflen <= 0 || *ep != '\0' || errno != 0)
                    124:                                errx(1, "%s: bad suffix length", optarg);
                    125:                        break;
                    126:                case 's':
                    127:                        sflag = 1;
                    128:                        break;
                    129:                default:
                    130:                        usage();
                    131:                        /*NOTREACHED*/
                    132:                }
                    133:        }
                    134:
                    135:        if (sufflen + strlen(prefix) >= PATH_MAX)
                    136:                errx(1, "name too long");
                    137:
                    138:        argc -= optind;
                    139:        argv += optind;
                    140:
                    141:        if ((infn = *argv++) == NULL)
                    142:                usage();
                    143:        if (strcmp(infn, "-") == 0) {
                    144:                infile = stdin;
                    145:                infn = "stdin";
                    146:        } else if ((infile = fopen(infn, "r")) == NULL)
                    147:                err(1, "%s", infn);
                    148:
                    149:        if (!kflag) {
                    150:                doclean = 1;
                    151:                atexit(cleanup);
                    152:                sa.sa_flags = 0;
                    153:                sa.sa_handler = handlesig;
                    154:                sigemptyset(&sa.sa_mask);
                    155:                sigaddset(&sa.sa_mask, SIGHUP);
                    156:                sigaddset(&sa.sa_mask, SIGINT);
                    157:                sigaddset(&sa.sa_mask, SIGTERM);
                    158:                sigaction(SIGHUP, &sa, NULL);
                    159:                sigaction(SIGINT, &sa, NULL);
                    160:                sigaction(SIGTERM, &sa, NULL);
                    161:        }
                    162:
                    163:        lineno = 0;
                    164:        nfiles = 0;
                    165:        truncofs = 0;
                    166:        overfile = NULL;
                    167:
                    168:        /* Ensure 10^sufflen < LONG_MAX. */
                    169:        for (maxfiles = 1, i = 0; i < sufflen; i++) {
                    170:                if (maxfiles > LONG_MAX / 10)
                    171:                        errx(1, "%ld: suffix too long (limit %ld)",
                    172:                            sufflen, i);
                    173:                maxfiles *= 10;
                    174:        }
                    175:
                    176:        /* Create files based on supplied patterns. */
                    177:        while (nfiles < maxfiles - 1 && (expr = *argv++) != NULL) {
                    178:                /* Look ahead & see if this pattern has any repetitions. */
                    179:                if (*argv != NULL && **argv == '{') {
                    180:                        errno = 0;
                    181:                        reps = strtol(*argv + 1, &ep, 10);
                    182:                        if (reps < 0 || *ep != '}' || errno != 0)
                    183:                                errx(1, "%s: bad repetition count", *argv + 1);
                    184:                        argv++;
                    185:                } else
                    186:                        reps = 0;
                    187:
                    188:                if (*expr == '/' || *expr == '%') {
                    189:                        do {
                    190:                                do_rexp(expr);
                    191:                        } while (reps-- != 0 && nfiles < maxfiles - 1);
                    192:                } else if (isdigit((unsigned char)*expr))
                    193:                        do_lineno(expr);
                    194:                else
                    195:                        errx(1, "%s: unrecognised pattern", expr);
                    196:        }
                    197:
                    198:        /* Copy the rest into a new file. */
                    199:        if (!feof(infile)) {
                    200:                ofp = newfile();
1.4       fgsch     201:                while ((p = get_line()) != NULL && fputs(p, ofp) == 0)
1.1       millert   202:                        ;
                    203:                if (!sflag)
                    204:                        printf("%jd\n", (intmax_t)ftello(ofp));
                    205:                if (fclose(ofp) != 0)
                    206:                        err(1, "%s", currfile);
                    207:        }
                    208:
                    209:        toomuch(NULL, 0);
                    210:        doclean = 0;
                    211:
                    212:        return (0);
                    213: }
                    214:
                    215: void
                    216: usage(void)
                    217: {
                    218:        extern char *__progname;
                    219:
                    220:        fprintf(stderr,
                    221:            "usage: %s [-ks] [-f prefix] [-n number] file args ...\n",
                    222:            __progname);
                    223:        exit(1);
                    224: }
                    225:
1.3       deraadt   226: /* ARGSUSED */
1.1       millert   227: void
                    228: handlesig(int sig)
                    229: {
                    230:        const char msg[] = "csplit: caught signal, cleaning up\n";
                    231:
                    232:        write(STDERR_FILENO, msg, sizeof(msg) - 1);
                    233:        cleanup();
                    234:        _exit(2);
                    235: }
                    236:
                    237: /* Create a new output file. */
                    238: FILE *
                    239: newfile(void)
                    240: {
                    241:        FILE *fp;
                    242:
                    243:        if ((size_t)snprintf(currfile, sizeof(currfile), "%s%0*ld", prefix,
                    244:            (int)sufflen, nfiles) >= sizeof(currfile))
1.5       guenther  245:                errc(1, ENAMETOOLONG, "%s", currfile);
1.1       millert   246:        if ((fp = fopen(currfile, "w+")) == NULL)
                    247:                err(1, "%s", currfile);
                    248:        nfiles++;
                    249:
                    250:        return (fp);
                    251: }
                    252:
                    253: /* Remove partial output, called before exiting. */
                    254: void
                    255: cleanup(void)
                    256: {
                    257:        char fnbuf[PATH_MAX];
                    258:        long i;
                    259:
                    260:        if (!doclean)
                    261:                return;
                    262:
                    263:        /*
1.2       millert   264:         * NOTE: One cannot portably assume to be able to call snprintf() from
                    265:         * inside a signal handler.  It is, however, safe to do on OpenBSD.
1.1       millert   266:         */
                    267:        for (i = 0; i < nfiles; i++) {
                    268:                snprintf(fnbuf, sizeof(fnbuf), "%s%0*ld", prefix,
                    269:                    (int)sufflen, i);
                    270:                unlink(fnbuf);
                    271:        }
                    272: }
                    273:
                    274: /* Read a line from the input into a static buffer. */
                    275: char *
1.4       fgsch     276: get_line(void)
1.1       millert   277: {
                    278:        static char lbuf[LINE_MAX];
                    279:        FILE *src;
                    280:
                    281:        src = overfile != NULL ? overfile : infile;
                    282:
                    283: again: if (fgets(lbuf, sizeof(lbuf), src) == NULL) {
                    284:                if (src == overfile) {
                    285:                        src = infile;
                    286:                        goto again;
                    287:                }
                    288:                return (NULL);
                    289:        }
                    290:        if (ferror(src))
                    291:                err(1, "%s", infn);
                    292:        lineno++;
                    293:
                    294:        return (lbuf);
                    295: }
                    296:
1.4       fgsch     297: /* Conceptually rewind the input (as obtained by get_line()) back `n' lines. */
1.1       millert   298: void
                    299: toomuch(FILE *ofp, long n)
                    300: {
                    301:        char buf[BUFSIZ];
                    302:        size_t i, nread;
                    303:
                    304:        if (overfile != NULL) {
                    305:                /*
                    306:                 * Truncate the previous file we overflowed into back to
                    307:                 * the correct length, close it.
                    308:                 */
                    309:                if (fflush(overfile) != 0)
                    310:                        err(1, "overflow");
                    311:                if (ftruncate(fileno(overfile), truncofs) != 0)
                    312:                        err(1, "overflow");
                    313:                if (fclose(overfile) != 0)
                    314:                        err(1, "overflow");
                    315:                overfile = NULL;
                    316:        }
                    317:
                    318:        if (n == 0)
                    319:                /* Just tidying up */
                    320:                return;
                    321:
                    322:        lineno -= n;
                    323:
                    324:        /*
                    325:         * Wind the overflow file backwards to `n' lines before the
                    326:         * current one.
                    327:         */
                    328:        do {
                    329:                if (ftello(ofp) < (off_t)sizeof(buf))
                    330:                        rewind(ofp);
                    331:                else
                    332:                        fseeko(ofp, -(off_t)sizeof(buf), SEEK_CUR);
                    333:                if (ferror(ofp))
                    334:                        errx(1, "%s: can't seek", currfile);
                    335:                if ((nread = fread(buf, 1, sizeof(buf), ofp)) == 0)
                    336:                        errx(1, "can't read overflowed output");
                    337:                if (fseeko(ofp, -(off_t)nread, SEEK_CUR) != 0)
                    338:                        err(1, "%s", currfile);
                    339:                for (i = 1; i <= nread; i++)
                    340:                        if (buf[nread - i] == '\n' && n-- == 0)
                    341:                                break;
                    342:                if (ftello(ofp) == 0)
                    343:                        break;
                    344:        } while (n > 0);
                    345:        if (fseeko(ofp, (off_t)(nread - i + 1), SEEK_CUR) != 0)
                    346:                err(1, "%s", currfile);
                    347:
                    348:        /*
1.4       fgsch     349:         * get_line() will read from here. Next call will truncate to
1.1       millert   350:         * truncofs in this file.
                    351:         */
                    352:        overfile = ofp;
                    353:        truncofs = ftello(overfile);
                    354: }
                    355:
                    356: /* Handle splits for /regexp/ and %regexp% patterns. */
                    357: void
                    358: do_rexp(const char *expr)
                    359: {
                    360:        regex_t cre;
                    361:        intmax_t nwritten;
                    362:        long ofs;
                    363:        int first;
                    364:        char *ecopy, *ep, *p, *pofs, *re;
                    365:        FILE *ofp;
                    366:
                    367:        if ((ecopy = strdup(expr)) == NULL)
                    368:                err(1, "strdup");
                    369:
                    370:        re = ecopy + 1;
                    371:        if ((pofs = strrchr(ecopy, *expr)) == NULL || pofs[-1] == '\\')
                    372:                errx(1, "%s: missing trailing %c", expr, *expr);
                    373:        *pofs++ = '\0';
                    374:
                    375:        if (*pofs != '\0') {
                    376:                errno = 0;
                    377:                ofs = strtol(pofs, &ep, 10);
                    378:                if (*ep != '\0' || errno != 0)
                    379:                        errx(1, "%s: bad offset", pofs);
                    380:        } else
                    381:                ofs = 0;
                    382:
                    383:        if (regcomp(&cre, re, REG_BASIC|REG_NOSUB) != 0)
                    384:                errx(1, "%s: bad regular expression", re);
                    385:
                    386:        if (*expr == '/')
                    387:                /* /regexp/: Save results to a file. */
                    388:                ofp = newfile();
                    389:        else {
                    390:                /* %regexp%: Make a temporary file for overflow. */
                    391:                if ((ofp = tmpfile()) == NULL)
                    392:                        err(1, "tmpfile");
                    393:        }
                    394:
                    395:        /* Read and output lines until we get a match. */
                    396:        first = 1;
1.4       fgsch     397:        while ((p = get_line()) != NULL) {
1.1       millert   398:                if (fputs(p, ofp) != 0)
                    399:                        break;
                    400:                if (!first && regexec(&cre, p, 0, NULL, 0) == 0)
                    401:                        break;
                    402:                first = 0;
                    403:        }
                    404:
                    405:        if (p == NULL)
                    406:                errx(1, "%s: no match", re);
                    407:
                    408:        if (ofs <= 0) {
                    409:                /*
                    410:                 * Negative (or zero) offset: throw back any lines we should
                    411:                 * not have read yet.
                    412:                  */
                    413:                if (p != NULL) {
                    414:                        toomuch(ofp, -ofs + 1);
                    415:                        nwritten = (intmax_t)truncofs;
                    416:                } else
                    417:                        nwritten = (intmax_t)ftello(ofp);
                    418:        } else {
                    419:                /*
                    420:                 * Positive offset: copy the requested number of lines
                    421:                 * after the match.
                    422:                 */
1.4       fgsch     423:                while (--ofs > 0 && (p = get_line()) != NULL)
1.1       millert   424:                        fputs(p, ofp);
                    425:                toomuch(NULL, 0);
                    426:                nwritten = (intmax_t)ftello(ofp);
                    427:                if (fclose(ofp) != 0)
                    428:                        err(1, "%s", currfile);
                    429:        }
                    430:
                    431:        if (!sflag && *expr == '/')
                    432:                printf("%jd\n", nwritten);
                    433:
                    434:        regfree(&cre);
                    435:        free(ecopy);
                    436: }
                    437:
                    438: /* Handle splits based on line number. */
                    439: void
                    440: do_lineno(const char *expr)
                    441: {
                    442:        long lastline, tgtline;
                    443:        char *ep, *p;
                    444:        FILE *ofp;
                    445:
                    446:        errno = 0;
                    447:        tgtline = strtol(expr, &ep, 10);
                    448:        if (tgtline <= 0 || errno != 0 || *ep != '\0')
                    449:                errx(1, "%s: bad line number", expr);
                    450:        lastline = tgtline;
                    451:        if (lastline <= lineno)
                    452:                errx(1, "%s: can't go backwards", expr);
                    453:
                    454:        while (nfiles < maxfiles - 1) {
                    455:                ofp = newfile();
                    456:                while (lineno + 1 != lastline) {
1.4       fgsch     457:                        if ((p = get_line()) == NULL)
1.1       millert   458:                                errx(1, "%ld: out of range", lastline);
                    459:                        if (fputs(p, ofp) != 0)
                    460:                                break;
                    461:                }
                    462:                if (!sflag)
                    463:                        printf("%jd\n", (intmax_t)ftello(ofp));
                    464:                if (fclose(ofp) != 0)
                    465:                        err(1, "%s", currfile);
                    466:                if (reps-- == 0)
                    467:                        break;
                    468:                lastline += tgtline;
                    469:        }
                    470: }