Annotation of src/usr.bin/wc/wc.c, Revision 1.30
1.30 ! cheloha 1: /* $OpenBSD: wc.c,v 1.29 2021/11/28 19:28:42 deraadt Exp $ */
1.2 deraadt 2:
1.1 deraadt 3: /*
1.3 millert 4: * Copyright (c) 1980, 1987, 1991, 1993
5: * The Regents of the University of California. All rights reserved.
1.1 deraadt 6: *
7: * Redistribution and use in source and binary forms, with or without
8: * modification, are permitted provided that the following conditions
9: * are met:
10: * 1. Redistributions of source code must retain the above copyright
11: * notice, this list of conditions and the following disclaimer.
12: * 2. Redistributions in binary form must reproduce the above copyright
13: * notice, this list of conditions and the following disclaimer in the
14: * documentation and/or other materials provided with the distribution.
1.9 millert 15: * 3. Neither the name of the University nor the names of its contributors
1.1 deraadt 16: * may be used to endorse or promote products derived from this software
17: * without specific prior written permission.
18: *
19: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29: * SUCH DAMAGE.
30: */
31:
1.17 deraadt 32: #include <sys/stat.h>
1.22 guenther 33:
34: #include <fcntl.h>
1.1 deraadt 35: #include <stdio.h>
36: #include <stdlib.h>
37: #include <locale.h>
38: #include <ctype.h>
1.3 millert 39: #include <err.h>
1.1 deraadt 40: #include <unistd.h>
1.11 espie 41: #include <util.h>
1.20 schwarze 42: #include <wchar.h>
43: #include <wctype.h>
1.1 deraadt 44:
1.29 deraadt 45: #define _MAXBSIZE (64 * 1024)
46:
1.3 millert 47: int64_t tlinect, twordct, tcharct;
1.20 schwarze 48: int doline, doword, dochar, humanchar, multibyte;
1.21 fcambus 49: int rval;
1.3 millert 50: extern char *__progname;
51:
1.28 cheloha 52: static void print_counts(int64_t, int64_t, int64_t, const char *);
1.21 fcambus 53: static void format_and_print(int64_t);
1.28 cheloha 54: static void cnt(const char *);
1.1 deraadt 55:
56: int
1.8 deraadt 57: main(int argc, char *argv[])
1.1 deraadt 58: {
1.6 mpech 59: int ch;
1.1 deraadt 60:
1.20 schwarze 61: setlocale(LC_CTYPE, "");
1.18 deraadt 62:
1.19 deraadt 63: if (pledge("stdio rpath", NULL) == -1)
64: err(1, "pledge");
1.1 deraadt 65:
1.11 espie 66: while ((ch = getopt(argc, argv, "lwchm")) != -1)
1.16 okan 67: switch(ch) {
1.1 deraadt 68: case 'l':
69: doline = 1;
70: break;
71: case 'w':
72: doword = 1;
73: break;
1.20 schwarze 74: case 'm':
75: if (MB_CUR_MAX > 1)
76: multibyte = 1;
77: /* FALLTHROUGH */
1.1 deraadt 78: case 'c':
79: dochar = 1;
80: break;
1.11 espie 81: case 'h':
82: humanchar = 1;
83: break;
1.1 deraadt 84: case '?':
85: default:
1.21 fcambus 86: fprintf(stderr,
1.11 espie 87: "usage: %s [-c | -m] [-hlw] [file ...]\n",
1.3 millert 88: __progname);
1.21 fcambus 89: return 1;
1.1 deraadt 90: }
91: argv += optind;
92: argc -= optind;
93:
94: /*
95: * wc is unusual in that its flags are on by default, so,
96: * if you don't get any arguments, you have to turn them
97: * all on.
98: */
1.3 millert 99: if (!doline && !doword && !dochar)
1.1 deraadt 100: doline = doword = dochar = 1;
101:
102: if (!*argv) {
1.21 fcambus 103: cnt(NULL);
1.1 deraadt 104: } else {
105: int dototal = (argc > 1);
106:
107: do {
108: cnt(*argv);
109: } while(*++argv);
110:
1.3 millert 111: if (dototal)
1.10 deraadt 112: print_counts(tlinect, twordct, tcharct, "total");
1.1 deraadt 113: }
114:
1.21 fcambus 115: return rval;
1.1 deraadt 116: }
117:
1.21 fcambus 118: static void
1.28 cheloha 119: cnt(const char *path)
1.1 deraadt 120: {
1.20 schwarze 121: static char *buf;
1.21 fcambus 122: static size_t bufsz;
1.20 schwarze 123:
124: FILE *stream;
1.28 cheloha 125: const char *file;
1.20 schwarze 126: char *C;
127: wchar_t wc;
1.6 mpech 128: short gotsp;
1.20 schwarze 129: ssize_t len;
1.6 mpech 130: int64_t linect, wordct, charct;
1.1 deraadt 131: struct stat sbuf;
132: int fd;
133:
134: linect = wordct = charct = 0;
1.20 schwarze 135: stream = NULL;
1.28 cheloha 136: if (path != NULL) {
137: file = path;
1.27 deraadt 138: if ((fd = open(file, O_RDONLY)) == -1) {
1.3 millert 139: warn("%s", file);
1.1 deraadt 140: rval = 1;
141: return;
142: }
143: } else {
1.28 cheloha 144: file = "(stdin)";
1.1 deraadt 145: fd = STDIN_FILENO;
146: }
1.10 deraadt 147:
1.30 ! cheloha 148: if (!multibyte) {
1.29 deraadt 149: if (bufsz < _MAXBSIZE &&
150: (buf = realloc(buf, _MAXBSIZE)) == NULL)
1.20 schwarze 151: err(1, NULL);
1.30 ! cheloha 152:
! 153: /*
! 154: * According to POSIX, a word is a "maximal string of
! 155: * characters delimited by whitespace." Nothing is said
! 156: * about a character being printing or non-printing.
! 157: */
! 158: if (doword) {
! 159: gotsp = 1;
! 160: while ((len = read(fd, buf, _MAXBSIZE)) > 0) {
! 161: charct += len;
! 162: for (C = buf; len--; ++C) {
! 163: if (isspace((unsigned char)*C)) {
! 164: gotsp = 1;
! 165: if (*C == '\n')
! 166: ++linect;
! 167: } else if (gotsp) {
! 168: gotsp = 0;
! 169: ++wordct;
! 170: }
! 171: }
! 172: }
! 173: if (len == -1) {
! 174: warn("%s", file);
! 175: rval = 1;
! 176: }
! 177: }
1.1 deraadt 178: /*
1.3 millert 179: * Line counting is split out because it's a lot
1.1 deraadt 180: * faster to get lines than to get words, since
181: * the word count requires some logic.
182: */
1.30 ! cheloha 183: else if (doline) {
1.29 deraadt 184: while ((len = read(fd, buf, _MAXBSIZE)) > 0) {
1.1 deraadt 185: charct += len;
186: for (C = buf; len--; ++C)
187: if (*C == '\n')
188: ++linect;
189: }
190: if (len == -1) {
1.3 millert 191: warn("%s", file);
1.1 deraadt 192: rval = 1;
193: }
194: }
195: /*
1.3 millert 196: * If all we need is the number of characters and
1.1 deraadt 197: * it's a directory or a regular or linked file, just
198: * stat the puppy. We avoid testing for it not being
199: * a special device in case someone adds a new type
200: * of inode.
201: */
202: else if (dochar) {
1.3 millert 203: mode_t ifmt;
1.1 deraadt 204:
205: if (fstat(fd, &sbuf)) {
1.3 millert 206: warn("%s", file);
1.1 deraadt 207: rval = 1;
208: } else {
209: ifmt = sbuf.st_mode & S_IFMT;
210: if (ifmt == S_IFREG || ifmt == S_IFLNK
1.3 millert 211: || ifmt == S_IFDIR) {
1.1 deraadt 212: charct = sbuf.st_size;
213: } else {
1.29 deraadt 214: while ((len = read(fd, buf, _MAXBSIZE)) > 0)
1.1 deraadt 215: charct += len;
216: if (len == -1) {
1.3 millert 217: warn("%s", file);
1.1 deraadt 218: rval = 1;
219: }
220: }
221: }
222: }
1.3 millert 223: } else {
1.28 cheloha 224: if (path == NULL)
1.20 schwarze 225: stream = stdin;
226: else if ((stream = fdopen(fd, "r")) == NULL) {
227: warn("%s", file);
228: close(fd);
229: rval = 1;
230: return;
231: }
232:
1.1 deraadt 233: gotsp = 1;
1.20 schwarze 234: while ((len = getline(&buf, &bufsz, stream)) > 0) {
1.30 ! cheloha 235: const char *end = buf + len;
! 236: for (C = buf; C < end; C += len) {
! 237: ++charct;
! 238: len = mbtowc(&wc, C, MB_CUR_MAX);
! 239: if (len == -1) {
! 240: mbtowc(NULL, NULL,
! 241: MB_CUR_MAX);
! 242: len = 1;
! 243: wc = L'?';
! 244: } else if (len == 0)
! 245: len = 1;
! 246: if (iswspace(wc)) {
! 247: gotsp = 1;
! 248: if (wc == L'\n')
! 249: ++linect;
! 250: } else if (gotsp) {
! 251: gotsp = 0;
! 252: ++wordct;
1.1 deraadt 253: }
254: }
255: }
1.20 schwarze 256: if (ferror(stream)) {
1.3 millert 257: warn("%s", file);
1.1 deraadt 258: rval = 1;
259: }
260: }
261:
1.28 cheloha 262: print_counts(linect, wordct, charct, path);
1.1 deraadt 263:
1.3 millert 264: /*
265: * Don't bother checking doline, doword, or dochar -- speeds
1.10 deraadt 266: * up the common case
1.3 millert 267: */
1.1 deraadt 268: tlinect += linect;
269: twordct += wordct;
270: tcharct += charct;
271:
1.20 schwarze 272: if ((stream == NULL ? close(fd) : fclose(stream)) != 0) {
1.3 millert 273: warn("%s", file);
1.1 deraadt 274: rval = 1;
275: }
276: }
277:
1.21 fcambus 278: static void
279: format_and_print(int64_t v)
1.11 espie 280: {
281: if (humanchar) {
282: char result[FMT_SCALED_STRSIZE];
283:
1.21 fcambus 284: fmt_scaled((long long)v, result);
285: printf("%7s", result);
1.11 espie 286: } else {
1.21 fcambus 287: printf(" %7lld", v);
1.11 espie 288: }
289: }
290:
1.21 fcambus 291: static void
1.28 cheloha 292: print_counts(int64_t lines, int64_t words, int64_t chars, const char *name)
1.1 deraadt 293: {
294: if (doline)
1.21 fcambus 295: format_and_print(lines);
1.1 deraadt 296: if (doword)
1.21 fcambus 297: format_and_print(words);
1.1 deraadt 298: if (dochar)
1.21 fcambus 299: format_and_print(chars);
1.1 deraadt 300:
1.12 otto 301: if (name)
1.21 fcambus 302: printf(" %s\n", name);
1.12 otto 303: else
1.21 fcambus 304: printf("\n");
1.1 deraadt 305: }