Annotation of src/usr.bin/wc/wc.c, Revision 1.20
1.20 ! schwarze 1: /* $OpenBSD: wc.c,v 1.19 2015/10/09 01:37:09 deraadt Exp $ */
1.2 deraadt 2:
1.1 deraadt 3: /*
1.3 millert 4: * Copyright (c) 1980, 1987, 1991, 1993
5: * The Regents of the University of California. All rights reserved.
1.1 deraadt 6: *
7: * Redistribution and use in source and binary forms, with or without
8: * modification, are permitted provided that the following conditions
9: * are met:
10: * 1. Redistributions of source code must retain the above copyright
11: * notice, this list of conditions and the following disclaimer.
12: * 2. Redistributions in binary form must reproduce the above copyright
13: * notice, this list of conditions and the following disclaimer in the
14: * documentation and/or other materials provided with the distribution.
1.9 millert 15: * 3. Neither the name of the University nor the names of its contributors
1.1 deraadt 16: * may be used to endorse or promote products derived from this software
17: * without specific prior written permission.
18: *
19: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29: * SUCH DAMAGE.
30: */
31:
1.17 deraadt 32: #include <sys/param.h> /* MAXBSIZE */
33: #include <sys/stat.h>
34: #include <sys/file.h>
1.1 deraadt 35: #include <stdio.h>
36: #include <stdlib.h>
37: #include <string.h>
38: #include <locale.h>
39: #include <ctype.h>
1.3 millert 40: #include <err.h>
1.1 deraadt 41: #include <unistd.h>
1.11 espie 42: #include <util.h>
1.20 ! schwarze 43: #include <wchar.h>
! 44: #include <wctype.h>
1.1 deraadt 45:
1.3 millert 46: int64_t tlinect, twordct, tcharct;
1.20 ! schwarze 47: int doline, doword, dochar, humanchar, multibyte;
1.3 millert 48: int rval;
49: extern char *__progname;
50:
1.7 millert 51: void print_counts(int64_t, int64_t, int64_t, char *);
1.14 deraadt 52: void format_and_print(long long);
1.7 millert 53: void cnt(char *);
1.1 deraadt 54:
55: int
1.8 deraadt 56: main(int argc, char *argv[])
1.1 deraadt 57: {
1.6 mpech 58: int ch;
1.1 deraadt 59:
1.20 ! schwarze 60: setlocale(LC_CTYPE, "");
1.18 deraadt 61:
1.19 deraadt 62: if (pledge("stdio rpath", NULL) == -1)
63: err(1, "pledge");
1.1 deraadt 64:
1.11 espie 65: while ((ch = getopt(argc, argv, "lwchm")) != -1)
1.16 okan 66: switch(ch) {
1.1 deraadt 67: case 'l':
68: doline = 1;
69: break;
70: case 'w':
71: doword = 1;
72: break;
1.20 ! schwarze 73: case 'm':
! 74: if (MB_CUR_MAX > 1)
! 75: multibyte = 1;
! 76: /* FALLTHROUGH */
1.1 deraadt 77: case 'c':
78: dochar = 1;
79: break;
1.11 espie 80: case 'h':
81: humanchar = 1;
82: break;
1.1 deraadt 83: case '?':
84: default:
1.3 millert 85: (void)fprintf(stderr,
1.11 espie 86: "usage: %s [-c | -m] [-hlw] [file ...]\n",
1.3 millert 87: __progname);
1.1 deraadt 88: exit(1);
89: }
90: argv += optind;
91: argc -= optind;
92:
93: /*
94: * wc is unusual in that its flags are on by default, so,
95: * if you don't get any arguments, you have to turn them
96: * all on.
97: */
1.3 millert 98: if (!doline && !doword && !dochar)
1.1 deraadt 99: doline = doword = dochar = 1;
100:
101: if (!*argv) {
102: cnt((char *)NULL);
103: } else {
104: int dototal = (argc > 1);
105:
106: do {
107: cnt(*argv);
108: } while(*++argv);
109:
1.3 millert 110: if (dototal)
1.10 deraadt 111: print_counts(tlinect, twordct, tcharct, "total");
1.1 deraadt 112: }
113:
114: exit(rval);
115: }
116:
1.3 millert 117: void
1.8 deraadt 118: cnt(char *file)
1.1 deraadt 119: {
1.20 ! schwarze 120: static char *buf;
! 121: static ssize_t bufsz;
! 122:
! 123: FILE *stream;
! 124: char *C;
! 125: wchar_t wc;
1.6 mpech 126: short gotsp;
1.20 ! schwarze 127: ssize_t len;
1.6 mpech 128: int64_t linect, wordct, charct;
1.1 deraadt 129: struct stat sbuf;
130: int fd;
131:
132: linect = wordct = charct = 0;
1.20 ! schwarze 133: stream = NULL;
1.1 deraadt 134: if (file) {
135: if ((fd = open(file, O_RDONLY, 0)) < 0) {
1.3 millert 136: warn("%s", file);
1.1 deraadt 137: rval = 1;
138: return;
139: }
140: } else {
141: fd = STDIN_FILENO;
142: }
1.10 deraadt 143:
1.20 ! schwarze 144: if (!doword && !multibyte) {
! 145: if (bufsz < MAXBSIZE &&
! 146: (buf = realloc(buf, MAXBSIZE)) == NULL)
! 147: err(1, NULL);
1.1 deraadt 148: /*
1.3 millert 149: * Line counting is split out because it's a lot
1.1 deraadt 150: * faster to get lines than to get words, since
151: * the word count requires some logic.
152: */
153: if (doline) {
1.3 millert 154: while ((len = read(fd, buf, MAXBSIZE)) > 0) {
1.1 deraadt 155: charct += len;
156: for (C = buf; len--; ++C)
157: if (*C == '\n')
158: ++linect;
159: }
160: if (len == -1) {
1.3 millert 161: warn("%s", file);
1.1 deraadt 162: rval = 1;
163: }
164: }
165: /*
1.3 millert 166: * If all we need is the number of characters and
1.1 deraadt 167: * it's a directory or a regular or linked file, just
168: * stat the puppy. We avoid testing for it not being
169: * a special device in case someone adds a new type
170: * of inode.
171: */
172: else if (dochar) {
1.3 millert 173: mode_t ifmt;
1.1 deraadt 174:
175: if (fstat(fd, &sbuf)) {
1.3 millert 176: warn("%s", file);
1.1 deraadt 177: rval = 1;
178: } else {
179: ifmt = sbuf.st_mode & S_IFMT;
180: if (ifmt == S_IFREG || ifmt == S_IFLNK
1.3 millert 181: || ifmt == S_IFDIR) {
1.1 deraadt 182: charct = sbuf.st_size;
183: } else {
1.3 millert 184: while ((len = read(fd, buf, MAXBSIZE)) > 0)
1.1 deraadt 185: charct += len;
186: if (len == -1) {
1.3 millert 187: warn("%s", file);
1.1 deraadt 188: rval = 1;
189: }
190: }
191: }
192: }
1.3 millert 193: } else {
1.20 ! schwarze 194: if (file == NULL)
! 195: stream = stdin;
! 196: else if ((stream = fdopen(fd, "r")) == NULL) {
! 197: warn("%s", file);
! 198: close(fd);
! 199: rval = 1;
! 200: return;
! 201: }
! 202:
! 203: /*
! 204: * Do it the hard way.
! 205: * According to POSIX, a word is a "maximal string of
! 206: * characters delimited by whitespace." Nothing is said
! 207: * about a character being printing or non-printing.
! 208: */
1.1 deraadt 209: gotsp = 1;
1.20 ! schwarze 210: while ((len = getline(&buf, &bufsz, stream)) > 0) {
! 211: if (multibyte) {
! 212: for (C = buf; *C != '\0'; C += len) {
! 213: ++charct;
! 214: len = mbtowc(&wc, C, MB_CUR_MAX);
! 215: if (len == -1) {
! 216: (void)mbtowc(NULL, NULL,
! 217: MB_CUR_MAX);
! 218: len = 1;
! 219: wc = L' ';
! 220: }
! 221: if (iswspace(wc)) {
! 222: gotsp = 1;
! 223: if (wc == L'\n')
! 224: ++linect;
! 225: } else if (gotsp) {
! 226: gotsp = 0;
! 227: ++wordct;
! 228: }
! 229: }
! 230: } else {
! 231: charct += len;
! 232: for (C = buf; *C != '\0'; ++C) {
! 233: if (isspace((unsigned char)*C)) {
! 234: gotsp = 1;
! 235: if (*C == '\n')
! 236: ++linect;
! 237: } else if (gotsp) {
1.1 deraadt 238: gotsp = 0;
239: ++wordct;
240: }
241: }
242: }
243: }
1.20 ! schwarze 244: if (ferror(stream)) {
1.3 millert 245: warn("%s", file);
1.1 deraadt 246: rval = 1;
247: }
248: }
249:
1.12 otto 250: print_counts(linect, wordct, charct, file);
1.1 deraadt 251:
1.3 millert 252: /*
253: * Don't bother checking doline, doword, or dochar -- speeds
1.10 deraadt 254: * up the common case
1.3 millert 255: */
1.1 deraadt 256: tlinect += linect;
257: twordct += wordct;
258: tcharct += charct;
259:
1.20 ! schwarze 260: if ((stream == NULL ? close(fd) : fclose(stream)) != 0) {
1.3 millert 261: warn("%s", file);
1.1 deraadt 262: rval = 1;
263: }
264: }
265:
1.11 espie 266: void
267: format_and_print(long long v)
268: {
269: if (humanchar) {
270: char result[FMT_SCALED_STRSIZE];
271:
272: (void)fmt_scaled(v, result);
273: (void)printf("%7s", result);
274: } else {
275: (void)printf(" %7lld", v);
276: }
277: }
278:
1.1 deraadt 279: void
1.8 deraadt 280: print_counts(int64_t lines, int64_t words, int64_t chars, char *name)
1.1 deraadt 281: {
282: if (doline)
1.11 espie 283: format_and_print((long long)lines);
1.1 deraadt 284: if (doword)
1.11 espie 285: format_and_print((long long)words);
1.1 deraadt 286: if (dochar)
1.11 espie 287: format_and_print((long long)chars);
1.1 deraadt 288:
1.12 otto 289: if (name)
290: (void)printf(" %s\n", name);
291: else
292: (void)printf("\n");
1.1 deraadt 293: }