Annotation of src/usr.bin/uniq/uniq.c, Revision 1.24
1.24 ! schwarze 1: /* $OpenBSD: uniq.c,v 1.23 2015/11/02 20:25:42 mmcc Exp $ */
1.1 deraadt 2: /* $NetBSD: uniq.c,v 1.7 1995/08/31 22:03:48 jtc Exp $ */
3:
4: /*
5: * Copyright (c) 1989, 1993
6: * The Regents of the University of California. All rights reserved.
7: *
8: * This code is derived from software contributed to Berkeley by
9: * Case Larsen.
10: *
11: * Redistribution and use in source and binary forms, with or without
12: * modification, are permitted provided that the following conditions
13: * are met:
14: * 1. Redistributions of source code must retain the above copyright
15: * notice, this list of conditions and the following disclaimer.
16: * 2. Redistributions in binary form must reproduce the above copyright
17: * notice, this list of conditions and the following disclaimer in the
18: * documentation and/or other materials provided with the distribution.
1.14 millert 19: * 3. Neither the name of the University nor the names of its contributors
1.1 deraadt 20: * may be used to endorse or promote products derived from this software
21: * without specific prior written permission.
22: *
23: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33: * SUCH DAMAGE.
34: */
35:
1.15 ray 36: #include <ctype.h>
37: #include <err.h>
1.1 deraadt 38: #include <errno.h>
1.15 ray 39: #include <limits.h>
1.24 ! schwarze 40: #include <locale.h>
1.1 deraadt 41: #include <stdio.h>
42: #include <stdlib.h>
43: #include <string.h>
44: #include <unistd.h>
1.24 ! schwarze 45: #include <wchar.h>
! 46: #include <wctype.h>
1.1 deraadt 47:
48: #define MAXLINELEN (8 * 1024)
49:
50: int cflag, dflag, uflag;
51: int numchars, numfields, repeats;
52:
1.9 millert 53: FILE *file(char *, char *);
54: void show(FILE *, char *);
55: char *skip(char *);
56: void obsolete(char *[]);
1.15 ray 57: __dead void usage(void);
1.1 deraadt 58:
59: int
1.10 deraadt 60: main(int argc, char *argv[])
1.1 deraadt 61: {
1.8 mpech 62: char *t1, *t2;
1.10 deraadt 63: FILE *ifp = NULL, *ofp = NULL;
1.1 deraadt 64: int ch;
1.15 ray 65: char *prevline, *thisline;
1.1 deraadt 66:
1.24 ! schwarze 67: setlocale(LC_CTYPE, "");
! 68:
1.22 deraadt 69: if (pledge("stdio rpath wpath cpath", NULL) == -1)
70: err(1, "pledge");
1.20 deraadt 71:
1.1 deraadt 72: obsolete(argv);
1.15 ray 73: while ((ch = getopt(argc, argv, "cdf:s:u")) != -1) {
74: const char *errstr;
75:
1.1 deraadt 76: switch (ch) {
77: case 'c':
78: cflag = 1;
79: break;
80: case 'd':
81: dflag = 1;
82: break;
83: case 'f':
1.15 ray 84: numfields = (int)strtonum(optarg, 0, INT_MAX,
85: &errstr);
86: if (errstr)
87: errx(1, "field skip value is %s: %s",
88: errstr, optarg);
1.1 deraadt 89: break;
90: case 's':
1.15 ray 91: numchars = (int)strtonum(optarg, 0, INT_MAX,
92: &errstr);
93: if (errstr)
94: errx(1,
95: "character skip value is %s: %s",
96: errstr, optarg);
1.1 deraadt 97: break;
98: case 'u':
99: uflag = 1;
100: break;
101: default:
102: usage();
1.15 ray 103: }
1.1 deraadt 104: }
105:
1.11 millert 106: argc -= optind;
1.15 ray 107: argv += optind;
1.1 deraadt 108:
1.16 kili 109: /* If neither -d nor -u are set, default is -d -u. */
110: if (!dflag && !uflag)
1.1 deraadt 111: dflag = uflag = 1;
112:
113: switch(argc) {
114: case 0:
115: ifp = stdin;
116: ofp = stdout;
117: break;
118: case 1:
119: ifp = file(argv[0], "r");
120: ofp = stdout;
121: break;
122: case 2:
123: ifp = file(argv[0], "r");
124: ofp = file(argv[1], "w");
125: break;
126: default:
127: usage();
128: }
1.20 deraadt 129:
1.22 deraadt 130: if (pledge("stdio", NULL) == -1)
131: err(1, "pledge");
1.1 deraadt 132:
133: prevline = malloc(MAXLINELEN);
134: thisline = malloc(MAXLINELEN);
135: if (prevline == NULL || thisline == NULL)
1.5 mickey 136: err(1, "malloc");
1.1 deraadt 137:
138: if (fgets(prevline, MAXLINELEN, ifp) == NULL)
139: exit(0);
140:
141: while (fgets(thisline, MAXLINELEN, ifp)) {
142: /* If requested get the chosen fields + character offsets. */
143: if (numfields || numchars) {
144: t1 = skip(thisline);
145: t2 = skip(prevline);
146: } else {
147: t1 = thisline;
148: t2 = prevline;
149: }
150:
151: /* If different, print; set previous to new value. */
152: if (strcmp(t1, t2)) {
153: show(ofp, prevline);
154: t1 = prevline;
155: prevline = thisline;
156: thisline = t1;
157: repeats = 0;
158: } else
159: ++repeats;
160: }
161: show(ofp, prevline);
162: exit(0);
163: }
164:
165: /*
166: * show --
167: * Output a line depending on the flags and number of repetitions
168: * of the line.
169: */
170: void
1.10 deraadt 171: show(FILE *ofp, char *str)
1.1 deraadt 172: {
1.16 kili 173: if ((dflag && repeats) || (uflag && !repeats)) {
174: if (cflag)
175: (void)fprintf(ofp, "%4d %s", repeats + 1, str);
176: else
177: (void)fprintf(ofp, "%s", str);
178: }
1.1 deraadt 179: }
180:
181: char *
1.10 deraadt 182: skip(char *str)
1.1 deraadt 183: {
1.24 ! schwarze 184: wchar_t wc;
1.16 kili 185: int nchars, nfields;
1.24 ! schwarze 186: int len;
! 187: int field_started;
1.1 deraadt 188:
1.16 kili 189: for (nfields = numfields; nfields && *str; nfields--) {
1.24 ! schwarze 190: /* Skip one field, including preceding blanks. */
! 191: for (field_started = 0; *str != '\0'; str += len) {
! 192: if ((len = mbtowc(&wc, str, MB_CUR_MAX)) == -1) {
! 193: (void)mbtowc(NULL, NULL, MB_CUR_MAX);
! 194: wc = L'?';
! 195: len = 1;
! 196: }
! 197: if (iswblank(wc)) {
! 198: if (field_started)
! 199: break;
! 200: } else
! 201: field_started = 1;
! 202: }
1.16 kili 203: }
1.24 ! schwarze 204:
! 205: /* Skip some additional characters. */
! 206: for (nchars = numchars; nchars-- && *str != '\0'; str += len)
! 207: if ((len = mblen(str, MB_CUR_MAX)) == -1)
! 208: len = 1;
! 209:
1.15 ray 210: return (str);
1.1 deraadt 211: }
212:
213: FILE *
1.10 deraadt 214: file(char *name, char *mode)
1.1 deraadt 215: {
216: FILE *fp;
217:
1.12 millert 218: if (strcmp(name, "-") == 0)
219: return(*mode == 'r' ? stdin : stdout);
1.1 deraadt 220: if ((fp = fopen(name, mode)) == NULL)
1.6 millert 221: err(1, "%s", name);
1.15 ray 222: return (fp);
1.1 deraadt 223: }
224:
225: void
1.10 deraadt 226: obsolete(char *argv[])
1.1 deraadt 227: {
1.15 ray 228: size_t len;
1.1 deraadt 229: char *ap, *p, *start;
230:
1.7 deraadt 231: while ((ap = *++argv)) {
1.1 deraadt 232: /* Return if "--" or not an option of any form. */
233: if (ap[0] != '-') {
234: if (ap[0] != '+')
235: return;
236: } else if (ap[1] == '-')
237: return;
1.23 mmcc 238: if (!isdigit((unsigned char)ap[1]))
1.1 deraadt 239: continue;
240: /*
241: * Digit signifies an old-style option. Malloc space for dash,
242: * new option and argument.
243: */
1.13 deraadt 244: len = strlen(ap) + 3;
245: if ((start = p = malloc(len)) == NULL)
1.5 mickey 246: err(1, "malloc");
1.1 deraadt 247: *p++ = '-';
248: *p++ = ap[0] == '+' ? 's' : 'f';
1.13 deraadt 249: (void)strlcpy(p, ap + 1, len - 2);
1.1 deraadt 250: *argv = start;
251: }
252: }
253:
1.15 ray 254: __dead void
1.10 deraadt 255: usage(void)
1.1 deraadt 256: {
1.15 ray 257: extern char *__progname;
1.16 kili 258:
1.1 deraadt 259: (void)fprintf(stderr,
1.17 kili 260: "usage: %s [-c] [-d | -u] [-f fields] [-s chars] [input_file [output_file]]\n",
1.15 ray 261: __progname);
1.1 deraadt 262: exit(1);
263: }