[BACK]Return to csplit.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / csplit

Annotation of src/usr.bin/csplit/csplit.c, Revision 1.9

1.9     ! schwarze    1: /*     $OpenBSD: csplit.c,v 1.8 2015/10/11 17:43:03 semarie Exp $      */
1.1       millert     2: /*     $FreeBSD: src/usr.bin/csplit/csplit.c,v 1.9 2004/03/22 11:15:03 tjr Exp $       */
                      3:
                      4: /*-
                      5:  * Copyright (c) 2002 Tim J. Robbins.
                      6:  * All rights reserved.
                      7:  *
                      8:  * Redistribution and use in source and binary forms, with or without
                      9:  * modification, are permitted provided that the following conditions
                     10:  * are met:
                     11:  * 1. Redistributions of source code must retain the above copyright
                     12:  *    notice, this list of conditions and the following disclaimer.
                     13:  * 2. Redistributions in binary form must reproduce the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer in the
                     15:  *    documentation and/or other materials provided with the distribution.
                     16:  *
                     17:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
                     18:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     19:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     20:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
                     21:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     22:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     23:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     24:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     25:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     26:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     27:  * SUCH DAMAGE.
                     28:  */
                     29:
                     30: /*
                     31:  * csplit -- split files based on context
                     32:  *
                     33:  * This utility splits its input into numbered output files by line number
                     34:  * or by a regular expression. Regular expression matches have an optional
                     35:  * offset with them, allowing the split to occur a specified number of
                     36:  * lines before or after the match.
                     37:  *
                     38:  * To handle negative offsets, we stop reading when the match occurs and
                     39:  * store the offset that the file should have been split at, then use
                     40:  * this output file as input until all the "overflowed" lines have been read.
                     41:  * The file is then closed and truncated to the correct length.
                     42:  *
                     43:  * We assume that the output files can be seeked upon (ie. they cannot be
                     44:  * symlinks to named pipes or character devices), but make no such
                     45:  * assumption about the input.
                     46:  */
                     47:
                     48: #include <sys/types.h>
                     49:
                     50: #include <ctype.h>
                     51: #include <err.h>
                     52: #include <errno.h>
                     53: #include <limits.h>
                     54: #include <regex.h>
                     55: #include <signal.h>
                     56: #include <stdint.h>
                     57: #include <stdio.h>
                     58: #include <stdlib.h>
                     59: #include <string.h>
                     60: #include <unistd.h>
                     61:
                     62: void    cleanup(void);
                     63: void    do_lineno(const char *);
                     64: void    do_rexp(const char *);
1.4       fgsch      65: char   *get_line(void);
1.1       millert    66: void    handlesig(int);
                     67: FILE   *newfile(void);
                     68: void    toomuch(FILE *, long);
1.9     ! schwarze   69: static void __dead usage(void);
1.1       millert    70:
                     71: /*
                     72:  * Command line options
                     73:  */
                     74: const char *prefix;            /* File name prefix */
                     75: long    sufflen;               /* Number of decimal digits for suffix */
                     76: int     sflag;                 /* Suppress output of file names */
                     77: int     kflag;                 /* Keep output if error occurs */
                     78:
                     79: /*
                     80:  * Other miscellaneous globals (XXX too many)
                     81:  */
                     82: long    lineno;                /* Current line number in input file */
                     83: long    reps;                  /* Number of repetitions for this pattern */
                     84: long    nfiles;                /* Number of files output so far */
                     85: long    maxfiles;              /* Maximum number of files we can create */
                     86: char    currfile[PATH_MAX];    /* Current output file */
                     87: const char *infn;              /* Name of the input file */
                     88: FILE   *infile;                /* Input file handle */
                     89: FILE   *overfile;              /* Overflow file for toomuch() */
                     90: off_t   truncofs;              /* Offset this file should be truncated at */
                     91: int     doclean;               /* Should cleanup() remove output? */
                     92:
                     93: int
                     94: main(int argc, char *argv[])
                     95: {
                     96:        struct sigaction sa;
                     97:        long i;
                     98:        int ch;
                     99:        const char *expr;
                    100:        char *ep, *p;
                    101:        FILE *ofp;
                    102:
1.7       deraadt   103:        if (pledge("stdio rpath wpath cpath", NULL) == -1)
                    104:                err(1, "pledge");
1.6       deraadt   105:
1.1       millert   106:        kflag = sflag = 0;
                    107:        prefix = "xx";
                    108:        sufflen = 2;
                    109:        while ((ch = getopt(argc, argv, "f:kn:s")) != -1) {
                    110:                switch (ch) {
                    111:                case 'f':
                    112:                        prefix = optarg;
                    113:                        break;
                    114:                case 'k':
                    115:                        kflag = 1;
                    116:                        break;
                    117:                case 'n':
                    118:                        errno = 0;
                    119:                        sufflen = strtol(optarg, &ep, 10);
                    120:                        if (sufflen <= 0 || *ep != '\0' || errno != 0)
                    121:                                errx(1, "%s: bad suffix length", optarg);
                    122:                        break;
                    123:                case 's':
                    124:                        sflag = 1;
                    125:                        break;
                    126:                default:
                    127:                        usage();
                    128:                }
                    129:        }
                    130:
                    131:        if (sufflen + strlen(prefix) >= PATH_MAX)
                    132:                errx(1, "name too long");
                    133:
                    134:        argc -= optind;
                    135:        argv += optind;
                    136:
                    137:        if ((infn = *argv++) == NULL)
                    138:                usage();
                    139:        if (strcmp(infn, "-") == 0) {
                    140:                infile = stdin;
                    141:                infn = "stdin";
                    142:        } else if ((infile = fopen(infn, "r")) == NULL)
                    143:                err(1, "%s", infn);
                    144:
                    145:        if (!kflag) {
                    146:                doclean = 1;
                    147:                atexit(cleanup);
                    148:                sa.sa_flags = 0;
                    149:                sa.sa_handler = handlesig;
                    150:                sigemptyset(&sa.sa_mask);
                    151:                sigaddset(&sa.sa_mask, SIGHUP);
                    152:                sigaddset(&sa.sa_mask, SIGINT);
                    153:                sigaddset(&sa.sa_mask, SIGTERM);
                    154:                sigaction(SIGHUP, &sa, NULL);
                    155:                sigaction(SIGINT, &sa, NULL);
                    156:                sigaction(SIGTERM, &sa, NULL);
                    157:        }
                    158:
                    159:        lineno = 0;
                    160:        nfiles = 0;
                    161:        truncofs = 0;
                    162:        overfile = NULL;
                    163:
                    164:        /* Ensure 10^sufflen < LONG_MAX. */
                    165:        for (maxfiles = 1, i = 0; i < sufflen; i++) {
                    166:                if (maxfiles > LONG_MAX / 10)
                    167:                        errx(1, "%ld: suffix too long (limit %ld)",
                    168:                            sufflen, i);
                    169:                maxfiles *= 10;
                    170:        }
                    171:
                    172:        /* Create files based on supplied patterns. */
                    173:        while (nfiles < maxfiles - 1 && (expr = *argv++) != NULL) {
                    174:                /* Look ahead & see if this pattern has any repetitions. */
                    175:                if (*argv != NULL && **argv == '{') {
                    176:                        errno = 0;
                    177:                        reps = strtol(*argv + 1, &ep, 10);
                    178:                        if (reps < 0 || *ep != '}' || errno != 0)
                    179:                                errx(1, "%s: bad repetition count", *argv + 1);
                    180:                        argv++;
                    181:                } else
                    182:                        reps = 0;
                    183:
                    184:                if (*expr == '/' || *expr == '%') {
                    185:                        do {
                    186:                                do_rexp(expr);
                    187:                        } while (reps-- != 0 && nfiles < maxfiles - 1);
                    188:                } else if (isdigit((unsigned char)*expr))
                    189:                        do_lineno(expr);
                    190:                else
                    191:                        errx(1, "%s: unrecognised pattern", expr);
                    192:        }
                    193:
                    194:        /* Copy the rest into a new file. */
                    195:        if (!feof(infile)) {
                    196:                ofp = newfile();
1.4       fgsch     197:                while ((p = get_line()) != NULL && fputs(p, ofp) == 0)
1.1       millert   198:                        ;
                    199:                if (!sflag)
                    200:                        printf("%jd\n", (intmax_t)ftello(ofp));
                    201:                if (fclose(ofp) != 0)
                    202:                        err(1, "%s", currfile);
                    203:        }
                    204:
                    205:        toomuch(NULL, 0);
                    206:        doclean = 0;
                    207:
                    208:        return (0);
                    209: }
                    210:
1.9     ! schwarze  211: static void __dead
1.1       millert   212: usage(void)
                    213: {
                    214:        extern char *__progname;
                    215:
                    216:        fprintf(stderr,
                    217:            "usage: %s [-ks] [-f prefix] [-n number] file args ...\n",
                    218:            __progname);
                    219:        exit(1);
                    220: }
                    221:
1.3       deraadt   222: /* ARGSUSED */
1.1       millert   223: void
                    224: handlesig(int sig)
                    225: {
                    226:        const char msg[] = "csplit: caught signal, cleaning up\n";
                    227:
                    228:        write(STDERR_FILENO, msg, sizeof(msg) - 1);
                    229:        cleanup();
                    230:        _exit(2);
                    231: }
                    232:
                    233: /* Create a new output file. */
                    234: FILE *
                    235: newfile(void)
                    236: {
                    237:        FILE *fp;
                    238:
                    239:        if ((size_t)snprintf(currfile, sizeof(currfile), "%s%0*ld", prefix,
                    240:            (int)sufflen, nfiles) >= sizeof(currfile))
1.5       guenther  241:                errc(1, ENAMETOOLONG, "%s", currfile);
1.1       millert   242:        if ((fp = fopen(currfile, "w+")) == NULL)
                    243:                err(1, "%s", currfile);
                    244:        nfiles++;
                    245:
                    246:        return (fp);
                    247: }
                    248:
                    249: /* Remove partial output, called before exiting. */
                    250: void
                    251: cleanup(void)
                    252: {
                    253:        char fnbuf[PATH_MAX];
                    254:        long i;
                    255:
                    256:        if (!doclean)
                    257:                return;
                    258:
                    259:        /*
1.2       millert   260:         * NOTE: One cannot portably assume to be able to call snprintf() from
                    261:         * inside a signal handler.  It is, however, safe to do on OpenBSD.
1.1       millert   262:         */
                    263:        for (i = 0; i < nfiles; i++) {
                    264:                snprintf(fnbuf, sizeof(fnbuf), "%s%0*ld", prefix,
                    265:                    (int)sufflen, i);
                    266:                unlink(fnbuf);
                    267:        }
                    268: }
                    269:
                    270: /* Read a line from the input into a static buffer. */
                    271: char *
1.4       fgsch     272: get_line(void)
1.1       millert   273: {
                    274:        static char lbuf[LINE_MAX];
                    275:        FILE *src;
                    276:
                    277:        src = overfile != NULL ? overfile : infile;
                    278:
                    279: again: if (fgets(lbuf, sizeof(lbuf), src) == NULL) {
                    280:                if (src == overfile) {
                    281:                        src = infile;
                    282:                        goto again;
                    283:                }
                    284:                return (NULL);
                    285:        }
                    286:        if (ferror(src))
                    287:                err(1, "%s", infn);
                    288:        lineno++;
                    289:
                    290:        return (lbuf);
                    291: }
                    292:
1.4       fgsch     293: /* Conceptually rewind the input (as obtained by get_line()) back `n' lines. */
1.1       millert   294: void
                    295: toomuch(FILE *ofp, long n)
                    296: {
                    297:        char buf[BUFSIZ];
                    298:        size_t i, nread;
                    299:
                    300:        if (overfile != NULL) {
                    301:                /*
                    302:                 * Truncate the previous file we overflowed into back to
                    303:                 * the correct length, close it.
                    304:                 */
                    305:                if (fflush(overfile) != 0)
                    306:                        err(1, "overflow");
                    307:                if (ftruncate(fileno(overfile), truncofs) != 0)
                    308:                        err(1, "overflow");
                    309:                if (fclose(overfile) != 0)
                    310:                        err(1, "overflow");
                    311:                overfile = NULL;
                    312:        }
                    313:
                    314:        if (n == 0)
                    315:                /* Just tidying up */
                    316:                return;
                    317:
                    318:        lineno -= n;
                    319:
                    320:        /*
                    321:         * Wind the overflow file backwards to `n' lines before the
                    322:         * current one.
                    323:         */
                    324:        do {
                    325:                if (ftello(ofp) < (off_t)sizeof(buf))
                    326:                        rewind(ofp);
                    327:                else
                    328:                        fseeko(ofp, -(off_t)sizeof(buf), SEEK_CUR);
                    329:                if (ferror(ofp))
                    330:                        errx(1, "%s: can't seek", currfile);
                    331:                if ((nread = fread(buf, 1, sizeof(buf), ofp)) == 0)
                    332:                        errx(1, "can't read overflowed output");
                    333:                if (fseeko(ofp, -(off_t)nread, SEEK_CUR) != 0)
                    334:                        err(1, "%s", currfile);
                    335:                for (i = 1; i <= nread; i++)
                    336:                        if (buf[nread - i] == '\n' && n-- == 0)
                    337:                                break;
                    338:                if (ftello(ofp) == 0)
                    339:                        break;
                    340:        } while (n > 0);
                    341:        if (fseeko(ofp, (off_t)(nread - i + 1), SEEK_CUR) != 0)
                    342:                err(1, "%s", currfile);
                    343:
                    344:        /*
1.4       fgsch     345:         * get_line() will read from here. Next call will truncate to
1.1       millert   346:         * truncofs in this file.
                    347:         */
                    348:        overfile = ofp;
                    349:        truncofs = ftello(overfile);
                    350: }
                    351:
                    352: /* Handle splits for /regexp/ and %regexp% patterns. */
                    353: void
                    354: do_rexp(const char *expr)
                    355: {
                    356:        regex_t cre;
                    357:        intmax_t nwritten;
                    358:        long ofs;
                    359:        int first;
                    360:        char *ecopy, *ep, *p, *pofs, *re;
                    361:        FILE *ofp;
                    362:
                    363:        if ((ecopy = strdup(expr)) == NULL)
                    364:                err(1, "strdup");
                    365:
                    366:        re = ecopy + 1;
                    367:        if ((pofs = strrchr(ecopy, *expr)) == NULL || pofs[-1] == '\\')
                    368:                errx(1, "%s: missing trailing %c", expr, *expr);
                    369:        *pofs++ = '\0';
                    370:
                    371:        if (*pofs != '\0') {
                    372:                errno = 0;
                    373:                ofs = strtol(pofs, &ep, 10);
                    374:                if (*ep != '\0' || errno != 0)
                    375:                        errx(1, "%s: bad offset", pofs);
                    376:        } else
                    377:                ofs = 0;
                    378:
                    379:        if (regcomp(&cre, re, REG_BASIC|REG_NOSUB) != 0)
                    380:                errx(1, "%s: bad regular expression", re);
                    381:
                    382:        if (*expr == '/')
                    383:                /* /regexp/: Save results to a file. */
                    384:                ofp = newfile();
                    385:        else {
                    386:                /* %regexp%: Make a temporary file for overflow. */
                    387:                if ((ofp = tmpfile()) == NULL)
                    388:                        err(1, "tmpfile");
                    389:        }
                    390:
                    391:        /* Read and output lines until we get a match. */
                    392:        first = 1;
1.4       fgsch     393:        while ((p = get_line()) != NULL) {
1.1       millert   394:                if (fputs(p, ofp) != 0)
                    395:                        break;
                    396:                if (!first && regexec(&cre, p, 0, NULL, 0) == 0)
                    397:                        break;
                    398:                first = 0;
                    399:        }
                    400:
                    401:        if (p == NULL)
                    402:                errx(1, "%s: no match", re);
                    403:
                    404:        if (ofs <= 0) {
                    405:                /*
                    406:                 * Negative (or zero) offset: throw back any lines we should
                    407:                 * not have read yet.
                    408:                  */
                    409:                if (p != NULL) {
                    410:                        toomuch(ofp, -ofs + 1);
                    411:                        nwritten = (intmax_t)truncofs;
                    412:                } else
                    413:                        nwritten = (intmax_t)ftello(ofp);
                    414:        } else {
                    415:                /*
                    416:                 * Positive offset: copy the requested number of lines
                    417:                 * after the match.
                    418:                 */
1.4       fgsch     419:                while (--ofs > 0 && (p = get_line()) != NULL)
1.1       millert   420:                        fputs(p, ofp);
                    421:                toomuch(NULL, 0);
                    422:                nwritten = (intmax_t)ftello(ofp);
                    423:                if (fclose(ofp) != 0)
                    424:                        err(1, "%s", currfile);
                    425:        }
                    426:
                    427:        if (!sflag && *expr == '/')
                    428:                printf("%jd\n", nwritten);
                    429:
                    430:        regfree(&cre);
                    431:        free(ecopy);
                    432: }
                    433:
                    434: /* Handle splits based on line number. */
                    435: void
                    436: do_lineno(const char *expr)
                    437: {
                    438:        long lastline, tgtline;
                    439:        char *ep, *p;
                    440:        FILE *ofp;
                    441:
                    442:        errno = 0;
                    443:        tgtline = strtol(expr, &ep, 10);
                    444:        if (tgtline <= 0 || errno != 0 || *ep != '\0')
                    445:                errx(1, "%s: bad line number", expr);
                    446:        lastline = tgtline;
                    447:        if (lastline <= lineno)
                    448:                errx(1, "%s: can't go backwards", expr);
                    449:
                    450:        while (nfiles < maxfiles - 1) {
                    451:                ofp = newfile();
                    452:                while (lineno + 1 != lastline) {
1.4       fgsch     453:                        if ((p = get_line()) == NULL)
1.1       millert   454:                                errx(1, "%ld: out of range", lastline);
                    455:                        if (fputs(p, ofp) != 0)
                    456:                                break;
                    457:                }
                    458:                if (!sflag)
                    459:                        printf("%jd\n", (intmax_t)ftello(ofp));
                    460:                if (fclose(ofp) != 0)
                    461:                        err(1, "%s", currfile);
                    462:                if (reps-- == 0)
                    463:                        break;
                    464:                lastline += tgtline;
                    465:        }
                    466: }