[BACK]Return to csplit.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / csplit

Annotation of src/usr.bin/csplit/csplit.c, Revision 1.12

1.12    ! guenther    1: /*     $OpenBSD: csplit.c,v 1.11 2022/12/22 19:53:22 kn Exp $  */
1.1       millert     2: /*     $FreeBSD: src/usr.bin/csplit/csplit.c,v 1.9 2004/03/22 11:15:03 tjr Exp $       */
                      3:
                      4: /*-
                      5:  * Copyright (c) 2002 Tim J. Robbins.
                      6:  * All rights reserved.
                      7:  *
                      8:  * Redistribution and use in source and binary forms, with or without
                      9:  * modification, are permitted provided that the following conditions
                     10:  * are met:
                     11:  * 1. Redistributions of source code must retain the above copyright
                     12:  *    notice, this list of conditions and the following disclaimer.
                     13:  * 2. Redistributions in binary form must reproduce the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer in the
                     15:  *    documentation and/or other materials provided with the distribution.
                     16:  *
                     17:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
                     18:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     19:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     20:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
                     21:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     22:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     23:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     24:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     25:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     26:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     27:  * SUCH DAMAGE.
                     28:  */
                     29:
                     30: /*
                     31:  * csplit -- split files based on context
                     32:  *
                     33:  * This utility splits its input into numbered output files by line number
                     34:  * or by a regular expression. Regular expression matches have an optional
                     35:  * offset with them, allowing the split to occur a specified number of
                     36:  * lines before or after the match.
                     37:  *
                     38:  * To handle negative offsets, we stop reading when the match occurs and
                     39:  * store the offset that the file should have been split at, then use
                     40:  * this output file as input until all the "overflowed" lines have been read.
                     41:  * The file is then closed and truncated to the correct length.
                     42:  *
                     43:  * We assume that the output files can be seeked upon (ie. they cannot be
                     44:  * symlinks to named pipes or character devices), but make no such
                     45:  * assumption about the input.
                     46:  */
                     47:
                     48: #include <sys/types.h>
                     49:
                     50: #include <ctype.h>
                     51: #include <err.h>
                     52: #include <errno.h>
                     53: #include <limits.h>
                     54: #include <regex.h>
                     55: #include <signal.h>
                     56: #include <stdint.h>
                     57: #include <stdio.h>
                     58: #include <stdlib.h>
                     59: #include <string.h>
                     60: #include <unistd.h>
                     61:
                     62: void    cleanup(void);
                     63: void    do_lineno(const char *);
                     64: void    do_rexp(const char *);
1.4       fgsch      65: char   *get_line(void);
1.1       millert    66: void    handlesig(int);
                     67: FILE   *newfile(void);
                     68: void    toomuch(FILE *, long);
1.9       schwarze   69: static void __dead usage(void);
1.1       millert    70:
                     71: /*
                     72:  * Command line options
                     73:  */
                     74: const char *prefix;            /* File name prefix */
                     75: long    sufflen;               /* Number of decimal digits for suffix */
                     76: int     sflag;                 /* Suppress output of file names */
                     77: int     kflag;                 /* Keep output if error occurs */
                     78:
                     79: /*
                     80:  * Other miscellaneous globals (XXX too many)
                     81:  */
                     82: long    lineno;                /* Current line number in input file */
                     83: long    reps;                  /* Number of repetitions for this pattern */
                     84: long    nfiles;                /* Number of files output so far */
                     85: long    maxfiles;              /* Maximum number of files we can create */
                     86: char    currfile[PATH_MAX];    /* Current output file */
                     87: const char *infn;              /* Name of the input file */
                     88: FILE   *infile;                /* Input file handle */
                     89: FILE   *overfile;              /* Overflow file for toomuch() */
                     90: off_t   truncofs;              /* Offset this file should be truncated at */
                     91: int     doclean;               /* Should cleanup() remove output? */
                     92:
                     93: int
                     94: main(int argc, char *argv[])
                     95: {
                     96:        struct sigaction sa;
                     97:        long i;
                     98:        int ch;
                     99:        const char *expr;
                    100:        char *ep, *p;
                    101:        FILE *ofp;
                    102:
1.7       deraadt   103:        if (pledge("stdio rpath wpath cpath", NULL) == -1)
                    104:                err(1, "pledge");
1.6       deraadt   105:
1.1       millert   106:        kflag = sflag = 0;
                    107:        prefix = "xx";
                    108:        sufflen = 2;
                    109:        while ((ch = getopt(argc, argv, "f:kn:s")) != -1) {
                    110:                switch (ch) {
                    111:                case 'f':
                    112:                        prefix = optarg;
                    113:                        break;
                    114:                case 'k':
                    115:                        kflag = 1;
                    116:                        break;
                    117:                case 'n':
                    118:                        errno = 0;
                    119:                        sufflen = strtol(optarg, &ep, 10);
                    120:                        if (sufflen <= 0 || *ep != '\0' || errno != 0)
                    121:                                errx(1, "%s: bad suffix length", optarg);
                    122:                        break;
                    123:                case 's':
                    124:                        sflag = 1;
                    125:                        break;
                    126:                default:
                    127:                        usage();
                    128:                }
                    129:        }
                    130:
                    131:        if (sufflen + strlen(prefix) >= PATH_MAX)
                    132:                errx(1, "name too long");
                    133:
                    134:        argc -= optind;
                    135:        argv += optind;
                    136:
                    137:        if ((infn = *argv++) == NULL)
                    138:                usage();
                    139:        if (strcmp(infn, "-") == 0) {
                    140:                infile = stdin;
                    141:                infn = "stdin";
                    142:        } else if ((infile = fopen(infn, "r")) == NULL)
                    143:                err(1, "%s", infn);
                    144:
                    145:        if (!kflag) {
                    146:                doclean = 1;
                    147:                atexit(cleanup);
                    148:                sa.sa_flags = 0;
                    149:                sa.sa_handler = handlesig;
                    150:                sigemptyset(&sa.sa_mask);
                    151:                sigaddset(&sa.sa_mask, SIGHUP);
                    152:                sigaddset(&sa.sa_mask, SIGINT);
                    153:                sigaddset(&sa.sa_mask, SIGTERM);
                    154:                sigaction(SIGHUP, &sa, NULL);
                    155:                sigaction(SIGINT, &sa, NULL);
                    156:                sigaction(SIGTERM, &sa, NULL);
                    157:        }
                    158:
                    159:        lineno = 0;
                    160:        nfiles = 0;
                    161:        truncofs = 0;
                    162:        overfile = NULL;
                    163:
                    164:        /* Ensure 10^sufflen < LONG_MAX. */
                    165:        for (maxfiles = 1, i = 0; i < sufflen; i++) {
                    166:                if (maxfiles > LONG_MAX / 10)
                    167:                        errx(1, "%ld: suffix too long (limit %ld)",
                    168:                            sufflen, i);
                    169:                maxfiles *= 10;
                    170:        }
                    171:
                    172:        /* Create files based on supplied patterns. */
                    173:        while (nfiles < maxfiles - 1 && (expr = *argv++) != NULL) {
                    174:                /* Look ahead & see if this pattern has any repetitions. */
                    175:                if (*argv != NULL && **argv == '{') {
                    176:                        errno = 0;
                    177:                        reps = strtol(*argv + 1, &ep, 10);
                    178:                        if (reps < 0 || *ep != '}' || errno != 0)
                    179:                                errx(1, "%s: bad repetition count", *argv + 1);
                    180:                        argv++;
                    181:                } else
                    182:                        reps = 0;
                    183:
                    184:                if (*expr == '/' || *expr == '%') {
                    185:                        do {
                    186:                                do_rexp(expr);
                    187:                        } while (reps-- != 0 && nfiles < maxfiles - 1);
                    188:                } else if (isdigit((unsigned char)*expr))
                    189:                        do_lineno(expr);
                    190:                else
                    191:                        errx(1, "%s: unrecognised pattern", expr);
                    192:        }
                    193:
                    194:        /* Copy the rest into a new file. */
                    195:        if (!feof(infile)) {
                    196:                ofp = newfile();
1.4       fgsch     197:                while ((p = get_line()) != NULL && fputs(p, ofp) == 0)
1.1       millert   198:                        ;
                    199:                if (!sflag)
                    200:                        printf("%jd\n", (intmax_t)ftello(ofp));
                    201:                if (fclose(ofp) != 0)
                    202:                        err(1, "%s", currfile);
                    203:        }
                    204:
                    205:        toomuch(NULL, 0);
                    206:        doclean = 0;
                    207:
                    208:        return (0);
                    209: }
                    210:
1.9       schwarze  211: static void __dead
1.1       millert   212: usage(void)
                    213: {
                    214:        extern char *__progname;
                    215:
                    216:        fprintf(stderr,
1.11      kn        217:            "usage: %s [-ks] [-f prefix] [-n number] file arg ...\n",
1.1       millert   218:            __progname);
                    219:        exit(1);
                    220: }
                    221:
                    222: void
                    223: handlesig(int sig)
                    224: {
                    225:        const char msg[] = "csplit: caught signal, cleaning up\n";
                    226:
                    227:        write(STDERR_FILENO, msg, sizeof(msg) - 1);
                    228:        cleanup();
                    229:        _exit(2);
                    230: }
                    231:
                    232: /* Create a new output file. */
                    233: FILE *
                    234: newfile(void)
                    235: {
                    236:        FILE *fp;
                    237:
                    238:        if ((size_t)snprintf(currfile, sizeof(currfile), "%s%0*ld", prefix,
                    239:            (int)sufflen, nfiles) >= sizeof(currfile))
1.5       guenther  240:                errc(1, ENAMETOOLONG, "%s", currfile);
1.1       millert   241:        if ((fp = fopen(currfile, "w+")) == NULL)
                    242:                err(1, "%s", currfile);
                    243:        nfiles++;
                    244:
                    245:        return (fp);
                    246: }
                    247:
                    248: /* Remove partial output, called before exiting. */
                    249: void
                    250: cleanup(void)
                    251: {
                    252:        char fnbuf[PATH_MAX];
                    253:        long i;
                    254:
                    255:        if (!doclean)
                    256:                return;
                    257:
                    258:        /*
1.2       millert   259:         * NOTE: One cannot portably assume to be able to call snprintf() from
                    260:         * inside a signal handler.  It is, however, safe to do on OpenBSD.
1.1       millert   261:         */
                    262:        for (i = 0; i < nfiles; i++) {
                    263:                snprintf(fnbuf, sizeof(fnbuf), "%s%0*ld", prefix,
                    264:                    (int)sufflen, i);
                    265:                unlink(fnbuf);
                    266:        }
                    267: }
                    268:
                    269: /* Read a line from the input into a static buffer. */
                    270: char *
1.4       fgsch     271: get_line(void)
1.1       millert   272: {
                    273:        static char lbuf[LINE_MAX];
                    274:        FILE *src;
                    275:
                    276:        src = overfile != NULL ? overfile : infile;
                    277:
                    278: again: if (fgets(lbuf, sizeof(lbuf), src) == NULL) {
                    279:                if (src == overfile) {
                    280:                        src = infile;
                    281:                        goto again;
                    282:                }
                    283:                return (NULL);
                    284:        }
                    285:        if (ferror(src))
                    286:                err(1, "%s", infn);
                    287:        lineno++;
                    288:
                    289:        return (lbuf);
                    290: }
                    291:
1.4       fgsch     292: /* Conceptually rewind the input (as obtained by get_line()) back `n' lines. */
1.1       millert   293: void
                    294: toomuch(FILE *ofp, long n)
                    295: {
                    296:        char buf[BUFSIZ];
                    297:        size_t i, nread;
                    298:
                    299:        if (overfile != NULL) {
                    300:                /*
                    301:                 * Truncate the previous file we overflowed into back to
                    302:                 * the correct length, close it.
                    303:                 */
                    304:                if (fflush(overfile) != 0)
                    305:                        err(1, "overflow");
                    306:                if (ftruncate(fileno(overfile), truncofs) != 0)
                    307:                        err(1, "overflow");
                    308:                if (fclose(overfile) != 0)
                    309:                        err(1, "overflow");
                    310:                overfile = NULL;
                    311:        }
                    312:
                    313:        if (n == 0)
                    314:                /* Just tidying up */
                    315:                return;
                    316:
                    317:        lineno -= n;
                    318:
                    319:        /*
                    320:         * Wind the overflow file backwards to `n' lines before the
                    321:         * current one.
                    322:         */
                    323:        do {
                    324:                if (ftello(ofp) < (off_t)sizeof(buf))
                    325:                        rewind(ofp);
                    326:                else
                    327:                        fseeko(ofp, -(off_t)sizeof(buf), SEEK_CUR);
                    328:                if (ferror(ofp))
                    329:                        errx(1, "%s: can't seek", currfile);
                    330:                if ((nread = fread(buf, 1, sizeof(buf), ofp)) == 0)
                    331:                        errx(1, "can't read overflowed output");
                    332:                if (fseeko(ofp, -(off_t)nread, SEEK_CUR) != 0)
                    333:                        err(1, "%s", currfile);
                    334:                for (i = 1; i <= nread; i++)
                    335:                        if (buf[nread - i] == '\n' && n-- == 0)
                    336:                                break;
                    337:                if (ftello(ofp) == 0)
                    338:                        break;
                    339:        } while (n > 0);
                    340:        if (fseeko(ofp, (off_t)(nread - i + 1), SEEK_CUR) != 0)
                    341:                err(1, "%s", currfile);
                    342:
                    343:        /*
1.4       fgsch     344:         * get_line() will read from here. Next call will truncate to
1.1       millert   345:         * truncofs in this file.
                    346:         */
                    347:        overfile = ofp;
                    348:        truncofs = ftello(overfile);
                    349: }
                    350:
                    351: /* Handle splits for /regexp/ and %regexp% patterns. */
                    352: void
                    353: do_rexp(const char *expr)
                    354: {
                    355:        regex_t cre;
                    356:        intmax_t nwritten;
                    357:        long ofs;
                    358:        int first;
                    359:        char *ecopy, *ep, *p, *pofs, *re;
                    360:        FILE *ofp;
                    361:
                    362:        if ((ecopy = strdup(expr)) == NULL)
                    363:                err(1, "strdup");
                    364:
                    365:        re = ecopy + 1;
                    366:        if ((pofs = strrchr(ecopy, *expr)) == NULL || pofs[-1] == '\\')
                    367:                errx(1, "%s: missing trailing %c", expr, *expr);
                    368:        *pofs++ = '\0';
                    369:
                    370:        if (*pofs != '\0') {
                    371:                errno = 0;
                    372:                ofs = strtol(pofs, &ep, 10);
                    373:                if (*ep != '\0' || errno != 0)
                    374:                        errx(1, "%s: bad offset", pofs);
                    375:        } else
                    376:                ofs = 0;
                    377:
                    378:        if (regcomp(&cre, re, REG_BASIC|REG_NOSUB) != 0)
                    379:                errx(1, "%s: bad regular expression", re);
                    380:
                    381:        if (*expr == '/')
                    382:                /* /regexp/: Save results to a file. */
                    383:                ofp = newfile();
                    384:        else {
                    385:                /* %regexp%: Make a temporary file for overflow. */
                    386:                if ((ofp = tmpfile()) == NULL)
                    387:                        err(1, "tmpfile");
                    388:        }
                    389:
                    390:        /* Read and output lines until we get a match. */
                    391:        first = 1;
1.4       fgsch     392:        while ((p = get_line()) != NULL) {
1.1       millert   393:                if (fputs(p, ofp) != 0)
                    394:                        break;
                    395:                if (!first && regexec(&cre, p, 0, NULL, 0) == 0)
                    396:                        break;
                    397:                first = 0;
                    398:        }
                    399:
1.10      millert   400:        if (p == NULL) {
                    401:                toomuch(NULL, 0);
1.1       millert   402:                errx(1, "%s: no match", re);
1.10      millert   403:        }
1.1       millert   404:
                    405:        if (ofs <= 0) {
                    406:                /*
                    407:                 * Negative (or zero) offset: throw back any lines we should
                    408:                 * not have read yet.
                    409:                  */
                    410:                if (p != NULL) {
                    411:                        toomuch(ofp, -ofs + 1);
                    412:                        nwritten = (intmax_t)truncofs;
                    413:                } else
                    414:                        nwritten = (intmax_t)ftello(ofp);
                    415:        } else {
                    416:                /*
                    417:                 * Positive offset: copy the requested number of lines
                    418:                 * after the match.
                    419:                 */
1.4       fgsch     420:                while (--ofs > 0 && (p = get_line()) != NULL)
1.1       millert   421:                        fputs(p, ofp);
                    422:                toomuch(NULL, 0);
                    423:                nwritten = (intmax_t)ftello(ofp);
                    424:                if (fclose(ofp) != 0)
                    425:                        err(1, "%s", currfile);
                    426:        }
                    427:
                    428:        if (!sflag && *expr == '/')
                    429:                printf("%jd\n", nwritten);
                    430:
                    431:        regfree(&cre);
                    432:        free(ecopy);
                    433: }
                    434:
                    435: /* Handle splits based on line number. */
                    436: void
                    437: do_lineno(const char *expr)
                    438: {
                    439:        long lastline, tgtline;
                    440:        char *ep, *p;
                    441:        FILE *ofp;
                    442:
                    443:        errno = 0;
                    444:        tgtline = strtol(expr, &ep, 10);
                    445:        if (tgtline <= 0 || errno != 0 || *ep != '\0')
                    446:                errx(1, "%s: bad line number", expr);
                    447:        lastline = tgtline;
                    448:        if (lastline <= lineno)
                    449:                errx(1, "%s: can't go backwards", expr);
                    450:
                    451:        while (nfiles < maxfiles - 1) {
                    452:                ofp = newfile();
                    453:                while (lineno + 1 != lastline) {
1.4       fgsch     454:                        if ((p = get_line()) == NULL)
1.1       millert   455:                                errx(1, "%ld: out of range", lastline);
                    456:                        if (fputs(p, ofp) != 0)
                    457:                                break;
                    458:                }
                    459:                if (!sflag)
                    460:                        printf("%jd\n", (intmax_t)ftello(ofp));
                    461:                if (fclose(ofp) != 0)
                    462:                        err(1, "%s", currfile);
                    463:                if (reps-- == 0)
                    464:                        break;
                    465:                lastline += tgtline;
                    466:        }
                    467: }