Annotation of src/usr.bin/wc/wc.c, Revision 1.8
1.8 ! deraadt 1: /* $OpenBSD: wc.c,v 1.7 2002/02/16 21:27:58 millert Exp $ */
1.2 deraadt 2:
1.1 deraadt 3: /*
1.3 millert 4: * Copyright (c) 1980, 1987, 1991, 1993
5: * The Regents of the University of California. All rights reserved.
1.1 deraadt 6: *
7: * Redistribution and use in source and binary forms, with or without
8: * modification, are permitted provided that the following conditions
9: * are met:
10: * 1. Redistributions of source code must retain the above copyright
11: * notice, this list of conditions and the following disclaimer.
12: * 2. Redistributions in binary form must reproduce the above copyright
13: * notice, this list of conditions and the following disclaimer in the
14: * documentation and/or other materials provided with the distribution.
15: * 3. All advertising materials mentioning features or use of this software
16: * must display the following acknowledgement:
17: * This product includes software developed by the University of
18: * California, Berkeley and its contributors.
19: * 4. Neither the name of the University nor the names of its contributors
20: * may be used to endorse or promote products derived from this software
21: * without specific prior written permission.
22: *
23: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33: * SUCH DAMAGE.
34: */
35:
36: #ifndef lint
1.3 millert 37: static char copyright[] =
38: "@(#) Copyright (c) 1980, 1987, 1991, 1993\n\
39: The Regents of the University of California. All rights reserved.\n";
1.1 deraadt 40: #endif /* not lint */
41:
42: #ifndef lint
1.3 millert 43: #if 0
44: static char sccsid[] = "@(#)wc.c 8.2 (Berkeley) 5/2/95";
45: #else
1.8 ! deraadt 46: static char rcsid[] = "$OpenBSD: wc.c,v 1.7 2002/02/16 21:27:58 millert Exp $";
1.3 millert 47: #endif
1.1 deraadt 48: #endif /* not lint */
49:
50: #include <stdio.h>
51: #include <stdlib.h>
52: #include <string.h>
53: #include <locale.h>
54: #include <ctype.h>
1.3 millert 55: #include <err.h>
1.1 deraadt 56: #include <sys/param.h>
57: #include <sys/stat.h>
58: #include <sys/file.h>
59: #include <unistd.h>
60:
1.3 millert 61: int64_t tlinect, twordct, tcharct;
62: int doline, doword, dochar;
63: int rval;
64: extern char *__progname;
65:
1.7 millert 66: void print_counts(int64_t, int64_t, int64_t, char *);
67: void cnt(char *);
1.1 deraadt 68:
69: int
1.8 ! deraadt 70: main(int argc, char *argv[])
1.1 deraadt 71: {
1.6 mpech 72: int ch;
1.1 deraadt 73:
74: setlocale(LC_ALL, "");
75:
76: while ((ch = getopt(argc, argv, "lwcm")) != -1)
77: switch((char)ch) {
78: case 'l':
79: doline = 1;
80: break;
81: case 'w':
82: doword = 1;
83: break;
84: case 'c':
85: case 'm':
86: dochar = 1;
87: break;
88: case '?':
89: default:
1.3 millert 90: (void)fprintf(stderr,
91: "usage: %s [-c | -m] [-lw] [file ...]\n",
92: __progname);
1.1 deraadt 93: exit(1);
94: }
95: argv += optind;
96: argc -= optind;
97:
98: /*
99: * wc is unusual in that its flags are on by default, so,
100: * if you don't get any arguments, you have to turn them
101: * all on.
102: */
1.3 millert 103: if (!doline && !doword && !dochar)
1.1 deraadt 104: doline = doword = dochar = 1;
105:
106: if (!*argv) {
107: cnt((char *)NULL);
108: } else {
109: int dototal = (argc > 1);
110:
111: do {
112: cnt(*argv);
113: } while(*++argv);
114:
1.3 millert 115: if (dototal)
116: print_counts(tlinect, twordct, tcharct, "total");
1.1 deraadt 117: }
118:
119: exit(rval);
120: }
121:
1.3 millert 122: void
1.8 ! deraadt 123: cnt(char *file)
1.1 deraadt 124: {
1.6 mpech 125: u_char *C;
126: short gotsp;
127: int len;
128: int64_t linect, wordct, charct;
1.1 deraadt 129: struct stat sbuf;
130: int fd;
131: u_char buf[MAXBSIZE];
132:
133: linect = wordct = charct = 0;
134: if (file) {
135: if ((fd = open(file, O_RDONLY, 0)) < 0) {
1.3 millert 136: warn("%s", file);
1.1 deraadt 137: rval = 1;
138: return;
139: }
140: } else {
141: fd = STDIN_FILENO;
142: }
143:
144: if (!doword) {
145: /*
1.3 millert 146: * Line counting is split out because it's a lot
1.1 deraadt 147: * faster to get lines than to get words, since
148: * the word count requires some logic.
149: */
150: if (doline) {
1.3 millert 151: while ((len = read(fd, buf, MAXBSIZE)) > 0) {
1.1 deraadt 152: charct += len;
153: for (C = buf; len--; ++C)
154: if (*C == '\n')
155: ++linect;
156: }
157: if (len == -1) {
1.3 millert 158: warn("%s", file);
1.1 deraadt 159: rval = 1;
160: }
161: }
162: /*
1.3 millert 163: * If all we need is the number of characters and
1.1 deraadt 164: * it's a directory or a regular or linked file, just
165: * stat the puppy. We avoid testing for it not being
166: * a special device in case someone adds a new type
167: * of inode.
168: */
169: else if (dochar) {
1.3 millert 170: mode_t ifmt;
1.1 deraadt 171:
172: if (fstat(fd, &sbuf)) {
1.3 millert 173: warn("%s", file);
1.1 deraadt 174: rval = 1;
175: } else {
176: ifmt = sbuf.st_mode & S_IFMT;
177: if (ifmt == S_IFREG || ifmt == S_IFLNK
1.3 millert 178: || ifmt == S_IFDIR) {
1.1 deraadt 179: charct = sbuf.st_size;
180: } else {
1.3 millert 181: while ((len = read(fd, buf, MAXBSIZE)) > 0)
1.1 deraadt 182: charct += len;
183: if (len == -1) {
1.3 millert 184: warn("%s", file);
1.1 deraadt 185: rval = 1;
186: }
187: }
188: }
189: }
1.3 millert 190: } else {
191: /* Do it the hard way... */
1.1 deraadt 192: gotsp = 1;
193: while ((len = read(fd, buf, MAXBSIZE)) > 0) {
1.3 millert 194: /*
195: * This loses in the presence of multi-byte characters.
196: * To do it right would require a function to return a
197: * character while knowing how many bytes it consumed.
198: */
1.1 deraadt 199: charct += len;
200: for (C = buf; len--; ++C) {
201: if (isspace (*C)) {
202: gotsp = 1;
1.3 millert 203: if (*C == '\n')
1.1 deraadt 204: ++linect;
205: } else {
206: /*
207: * This line implements the POSIX
208: * spec, i.e. a word is a "maximal
209: * string of characters delimited by
210: * whitespace." Notice nothing was
211: * said about a character being
212: * printing or non-printing.
213: */
214: if (gotsp) {
215: gotsp = 0;
216: ++wordct;
217: }
218: }
219: }
220: }
221: if (len == -1) {
1.3 millert 222: warn("%s", file);
1.1 deraadt 223: rval = 1;
224: }
225: }
226:
1.3 millert 227: print_counts(linect, wordct, charct, file ? file : "");
1.1 deraadt 228:
1.3 millert 229: /*
230: * Don't bother checking doline, doword, or dochar -- speeds
231: * up the common case
232: */
1.1 deraadt 233: tlinect += linect;
234: twordct += wordct;
235: tcharct += charct;
236:
1.3 millert 237: if (close(fd) != 0) {
238: warn("%s", file);
1.1 deraadt 239: rval = 1;
240: }
241: }
242:
243: void
1.8 ! deraadt 244: print_counts(int64_t lines, int64_t words, int64_t chars, char *name)
1.1 deraadt 245: {
246:
247: if (doline)
1.5 deraadt 248: (void)printf(" %7lld", (long long)lines);
1.1 deraadt 249: if (doword)
1.5 deraadt 250: (void)printf(" %7lld", (long long)words);
1.1 deraadt 251: if (dochar)
1.5 deraadt 252: (void)printf(" %7lld", (long long)chars);
1.1 deraadt 253:
1.3 millert 254: (void)printf(" %s\n", name);
1.1 deraadt 255: }