Annotation of src/usr.bin/wc/wc.c, Revision 1.21
1.21 ! fcambus 1: /* $OpenBSD: wc.c,v 1.20 2015/12/08 01:00:45 schwarze Exp $ */
1.2 deraadt 2:
1.1 deraadt 3: /*
1.3 millert 4: * Copyright (c) 1980, 1987, 1991, 1993
5: * The Regents of the University of California. All rights reserved.
1.1 deraadt 6: *
7: * Redistribution and use in source and binary forms, with or without
8: * modification, are permitted provided that the following conditions
9: * are met:
10: * 1. Redistributions of source code must retain the above copyright
11: * notice, this list of conditions and the following disclaimer.
12: * 2. Redistributions in binary form must reproduce the above copyright
13: * notice, this list of conditions and the following disclaimer in the
14: * documentation and/or other materials provided with the distribution.
1.9 millert 15: * 3. Neither the name of the University nor the names of its contributors
1.1 deraadt 16: * may be used to endorse or promote products derived from this software
17: * without specific prior written permission.
18: *
19: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29: * SUCH DAMAGE.
30: */
31:
1.17 deraadt 32: #include <sys/param.h> /* MAXBSIZE */
33: #include <sys/stat.h>
34: #include <sys/file.h>
1.1 deraadt 35: #include <stdio.h>
36: #include <stdlib.h>
37: #include <locale.h>
38: #include <ctype.h>
1.3 millert 39: #include <err.h>
1.1 deraadt 40: #include <unistd.h>
1.11 espie 41: #include <util.h>
1.20 schwarze 42: #include <wchar.h>
43: #include <wctype.h>
1.1 deraadt 44:
1.3 millert 45: int64_t tlinect, twordct, tcharct;
1.20 schwarze 46: int doline, doword, dochar, humanchar, multibyte;
1.21 ! fcambus 47: int rval;
1.3 millert 48: extern char *__progname;
49:
1.21 ! fcambus 50: static void print_counts(int64_t, int64_t, int64_t, char *);
! 51: static void format_and_print(int64_t);
! 52: static void cnt(char *);
1.1 deraadt 53:
54: int
1.8 deraadt 55: main(int argc, char *argv[])
1.1 deraadt 56: {
1.6 mpech 57: int ch;
1.1 deraadt 58:
1.20 schwarze 59: setlocale(LC_CTYPE, "");
1.18 deraadt 60:
1.19 deraadt 61: if (pledge("stdio rpath", NULL) == -1)
62: err(1, "pledge");
1.1 deraadt 63:
1.11 espie 64: while ((ch = getopt(argc, argv, "lwchm")) != -1)
1.16 okan 65: switch(ch) {
1.1 deraadt 66: case 'l':
67: doline = 1;
68: break;
69: case 'w':
70: doword = 1;
71: break;
1.20 schwarze 72: case 'm':
73: if (MB_CUR_MAX > 1)
74: multibyte = 1;
75: /* FALLTHROUGH */
1.1 deraadt 76: case 'c':
77: dochar = 1;
78: break;
1.11 espie 79: case 'h':
80: humanchar = 1;
81: break;
1.1 deraadt 82: case '?':
83: default:
1.21 ! fcambus 84: fprintf(stderr,
1.11 espie 85: "usage: %s [-c | -m] [-hlw] [file ...]\n",
1.3 millert 86: __progname);
1.21 ! fcambus 87: return 1;
1.1 deraadt 88: }
89: argv += optind;
90: argc -= optind;
91:
92: /*
93: * wc is unusual in that its flags are on by default, so,
94: * if you don't get any arguments, you have to turn them
95: * all on.
96: */
1.3 millert 97: if (!doline && !doword && !dochar)
1.1 deraadt 98: doline = doword = dochar = 1;
99:
100: if (!*argv) {
1.21 ! fcambus 101: cnt(NULL);
1.1 deraadt 102: } else {
103: int dototal = (argc > 1);
104:
105: do {
106: cnt(*argv);
107: } while(*++argv);
108:
1.3 millert 109: if (dototal)
1.10 deraadt 110: print_counts(tlinect, twordct, tcharct, "total");
1.1 deraadt 111: }
112:
1.21 ! fcambus 113: return rval;
1.1 deraadt 114: }
115:
1.21 ! fcambus 116: static void
1.8 deraadt 117: cnt(char *file)
1.1 deraadt 118: {
1.20 schwarze 119: static char *buf;
1.21 ! fcambus 120: static size_t bufsz;
1.20 schwarze 121:
122: FILE *stream;
123: char *C;
124: wchar_t wc;
1.6 mpech 125: short gotsp;
1.20 schwarze 126: ssize_t len;
1.6 mpech 127: int64_t linect, wordct, charct;
1.1 deraadt 128: struct stat sbuf;
129: int fd;
130:
131: linect = wordct = charct = 0;
1.20 schwarze 132: stream = NULL;
1.1 deraadt 133: if (file) {
134: if ((fd = open(file, O_RDONLY, 0)) < 0) {
1.3 millert 135: warn("%s", file);
1.1 deraadt 136: rval = 1;
137: return;
138: }
139: } else {
140: fd = STDIN_FILENO;
141: }
1.10 deraadt 142:
1.20 schwarze 143: if (!doword && !multibyte) {
144: if (bufsz < MAXBSIZE &&
145: (buf = realloc(buf, MAXBSIZE)) == NULL)
146: err(1, NULL);
1.1 deraadt 147: /*
1.3 millert 148: * Line counting is split out because it's a lot
1.1 deraadt 149: * faster to get lines than to get words, since
150: * the word count requires some logic.
151: */
152: if (doline) {
1.3 millert 153: while ((len = read(fd, buf, MAXBSIZE)) > 0) {
1.1 deraadt 154: charct += len;
155: for (C = buf; len--; ++C)
156: if (*C == '\n')
157: ++linect;
158: }
159: if (len == -1) {
1.3 millert 160: warn("%s", file);
1.1 deraadt 161: rval = 1;
162: }
163: }
164: /*
1.3 millert 165: * If all we need is the number of characters and
1.1 deraadt 166: * it's a directory or a regular or linked file, just
167: * stat the puppy. We avoid testing for it not being
168: * a special device in case someone adds a new type
169: * of inode.
170: */
171: else if (dochar) {
1.3 millert 172: mode_t ifmt;
1.1 deraadt 173:
174: if (fstat(fd, &sbuf)) {
1.3 millert 175: warn("%s", file);
1.1 deraadt 176: rval = 1;
177: } else {
178: ifmt = sbuf.st_mode & S_IFMT;
179: if (ifmt == S_IFREG || ifmt == S_IFLNK
1.3 millert 180: || ifmt == S_IFDIR) {
1.1 deraadt 181: charct = sbuf.st_size;
182: } else {
1.3 millert 183: while ((len = read(fd, buf, MAXBSIZE)) > 0)
1.1 deraadt 184: charct += len;
185: if (len == -1) {
1.3 millert 186: warn("%s", file);
1.1 deraadt 187: rval = 1;
188: }
189: }
190: }
191: }
1.3 millert 192: } else {
1.20 schwarze 193: if (file == NULL)
194: stream = stdin;
195: else if ((stream = fdopen(fd, "r")) == NULL) {
196: warn("%s", file);
197: close(fd);
198: rval = 1;
199: return;
200: }
201:
202: /*
203: * Do it the hard way.
204: * According to POSIX, a word is a "maximal string of
205: * characters delimited by whitespace." Nothing is said
206: * about a character being printing or non-printing.
207: */
1.1 deraadt 208: gotsp = 1;
1.20 schwarze 209: while ((len = getline(&buf, &bufsz, stream)) > 0) {
210: if (multibyte) {
211: for (C = buf; *C != '\0'; C += len) {
212: ++charct;
213: len = mbtowc(&wc, C, MB_CUR_MAX);
214: if (len == -1) {
1.21 ! fcambus 215: mbtowc(NULL, NULL,
1.20 schwarze 216: MB_CUR_MAX);
217: len = 1;
218: wc = L' ';
219: }
220: if (iswspace(wc)) {
221: gotsp = 1;
222: if (wc == L'\n')
223: ++linect;
224: } else if (gotsp) {
225: gotsp = 0;
226: ++wordct;
227: }
228: }
229: } else {
230: charct += len;
231: for (C = buf; *C != '\0'; ++C) {
232: if (isspace((unsigned char)*C)) {
233: gotsp = 1;
234: if (*C == '\n')
235: ++linect;
236: } else if (gotsp) {
1.1 deraadt 237: gotsp = 0;
238: ++wordct;
239: }
240: }
241: }
242: }
1.20 schwarze 243: if (ferror(stream)) {
1.3 millert 244: warn("%s", file);
1.1 deraadt 245: rval = 1;
246: }
247: }
248:
1.12 otto 249: print_counts(linect, wordct, charct, file);
1.1 deraadt 250:
1.3 millert 251: /*
252: * Don't bother checking doline, doword, or dochar -- speeds
1.10 deraadt 253: * up the common case
1.3 millert 254: */
1.1 deraadt 255: tlinect += linect;
256: twordct += wordct;
257: tcharct += charct;
258:
1.20 schwarze 259: if ((stream == NULL ? close(fd) : fclose(stream)) != 0) {
1.3 millert 260: warn("%s", file);
1.1 deraadt 261: rval = 1;
262: }
263: }
264:
1.21 ! fcambus 265: static void
! 266: format_and_print(int64_t v)
1.11 espie 267: {
268: if (humanchar) {
269: char result[FMT_SCALED_STRSIZE];
270:
1.21 ! fcambus 271: fmt_scaled((long long)v, result);
! 272: printf("%7s", result);
1.11 espie 273: } else {
1.21 ! fcambus 274: printf(" %7lld", v);
1.11 espie 275: }
276: }
277:
1.21 ! fcambus 278: static void
1.8 deraadt 279: print_counts(int64_t lines, int64_t words, int64_t chars, char *name)
1.1 deraadt 280: {
281: if (doline)
1.21 ! fcambus 282: format_and_print(lines);
1.1 deraadt 283: if (doword)
1.21 ! fcambus 284: format_and_print(words);
1.1 deraadt 285: if (dochar)
1.21 ! fcambus 286: format_and_print(chars);
1.1 deraadt 287:
1.12 otto 288: if (name)
1.21 ! fcambus 289: printf(" %s\n", name);
1.12 otto 290: else
1.21 ! fcambus 291: printf("\n");
1.1 deraadt 292: }