Annotation of src/usr.bin/look/look.c, Revision 1.1.1.1
1.1 deraadt 1: /* $NetBSD: look.c,v 1.7 1995/08/31 22:41:02 jtc Exp $ */
2:
3: /*-
4: * Copyright (c) 1991, 1993
5: * The Regents of the University of California. All rights reserved.
6: *
7: * This code is derived from software contributed to Berkeley by
8: * David Hitz of Auspex Systems, Inc.
9: *
10: * Redistribution and use in source and binary forms, with or without
11: * modification, are permitted provided that the following conditions
12: * are met:
13: * 1. Redistributions of source code must retain the above copyright
14: * notice, this list of conditions and the following disclaimer.
15: * 2. Redistributions in binary form must reproduce the above copyright
16: * notice, this list of conditions and the following disclaimer in the
17: * documentation and/or other materials provided with the distribution.
18: * 3. All advertising materials mentioning features or use of this software
19: * must display the following acknowledgement:
20: * This product includes software developed by the University of
21: * California, Berkeley and its contributors.
22: * 4. Neither the name of the University nor the names of its contributors
23: * may be used to endorse or promote products derived from this software
24: * without specific prior written permission.
25: *
26: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36: * SUCH DAMAGE.
37: */
38:
39: #ifndef lint
40: static char copyright[] =
41: "@(#) Copyright (c) 1991, 1993\n\
42: The Regents of the University of California. All rights reserved.\n";
43: #endif /* not lint */
44:
45: #ifndef lint
46: #if 0
47: static char sccsid[] = "@(#)look.c 8.2 (Berkeley) 5/4/95";
48: #endif
49: static char rcsid[] = "$NetBSD: look.c,v 1.7 1995/08/31 22:41:02 jtc Exp $";
50: #endif /* not lint */
51:
52: /*
53: * look -- find lines in a sorted list.
54: *
55: * The man page said that TABs and SPACEs participate in -d comparisons.
56: * In fact, they were ignored. This implements historic practice, not
57: * the manual page.
58: */
59:
60: #include <sys/types.h>
61: #include <sys/mman.h>
62: #include <sys/stat.h>
63:
64: #include <ctype.h>
65: #include <errno.h>
66: #include <fcntl.h>
67: #include <limits.h>
68: #include <stdio.h>
69: #include <stdlib.h>
70: #include <string.h>
71: #include <unistd.h>
72: #include <err.h>
73:
74: #include "pathnames.h"
75:
76: /*
77: * FOLD and DICT convert characters to a normal form for comparison,
78: * according to the user specified flags.
79: *
80: * DICT expects integers because it uses a non-character value to
81: * indicate a character which should not participate in comparisons.
82: */
83: #define EQUAL 0
84: #define GREATER 1
85: #define LESS (-1)
86: #define NO_COMPARE (-2)
87:
88: #define FOLD(c) (isascii(c) && isupper(c) ? tolower(c) : (c))
89: #define DICT(c) (isascii(c) && isalnum(c) ? (c) : NO_COMPARE)
90:
91: int dflag, fflag;
92:
93: char *binary_search __P((char *, char *, char *));
94: int compare __P((char *, char *, char *));
95: char *linear_search __P((char *, char *, char *));
96: int look __P((char *, char *, char *));
97: void print_from __P((char *, char *, char *));
98: void usage __P((void));
99:
100: int
101: main(argc, argv)
102: int argc;
103: char *argv[];
104: {
105: struct stat sb;
106: int ch, fd, termchar;
107: char *back, *file, *front, *string, *p;
108:
109: file = _PATH_WORDS;
110: termchar = '\0';
111: while ((ch = getopt(argc, argv, "dft:")) != EOF)
112: switch(ch) {
113: case 'd':
114: dflag = 1;
115: break;
116: case 'f':
117: fflag = 1;
118: break;
119: case 't':
120: termchar = *optarg;
121: break;
122: case '?':
123: default:
124: usage();
125: }
126: argc -= optind;
127: argv += optind;
128:
129: switch (argc) {
130: case 2: /* Don't set -df for user. */
131: string = *argv++;
132: file = *argv;
133: break;
134: case 1: /* But set -df by default. */
135: dflag = fflag = 1;
136: string = *argv;
137: break;
138: default:
139: usage();
140: }
141:
142: if (termchar != '\0' && (p = strchr(string, termchar)) != NULL)
143: *++p = '\0';
144:
145: if ((fd = open(file, O_RDONLY, 0)) < 0 || fstat(fd, &sb))
146: err(2, "%s", file);
147: if (sb.st_size > SIZE_T_MAX)
148: err(2, "%s: %s", file, strerror(EFBIG));
149: if ((front = mmap(NULL,
150: (size_t)sb.st_size, PROT_READ, 0, fd, (off_t)0)) == NULL)
151: err(2, "%s", file);
152: back = front + sb.st_size;
153: exit(look(string, front, back));
154: }
155:
156: int
157: look(string, front, back)
158: char *string, *front, *back;
159: {
160: register int ch;
161: register char *readp, *writep;
162:
163: /* Reformat string string to avoid doing it multiple times later. */
164: for (readp = writep = string; ch = *readp++;) {
165: if (fflag)
166: ch = FOLD(ch);
167: if (dflag)
168: ch = DICT(ch);
169: if (ch != NO_COMPARE)
170: *(writep++) = ch;
171: }
172: *writep = '\0';
173:
174: front = binary_search(string, front, back);
175: front = linear_search(string, front, back);
176:
177: if (front)
178: print_from(string, front, back);
179: return (front ? 0 : 1);
180: }
181:
182:
183: /*
184: * Binary search for "string" in memory between "front" and "back".
185: *
186: * This routine is expected to return a pointer to the start of a line at
187: * *or before* the first word matching "string". Relaxing the constraint
188: * this way simplifies the algorithm.
189: *
190: * Invariants:
191: * front points to the beginning of a line at or before the first
192: * matching string.
193: *
194: * back points to the beginning of a line at or after the first
195: * matching line.
196: *
197: * Base of the Invariants.
198: * front = NULL;
199: * back = EOF;
200: *
201: * Advancing the Invariants:
202: *
203: * p = first newline after halfway point from front to back.
204: *
205: * If the string at "p" is not greater than the string to match,
206: * p is the new front. Otherwise it is the new back.
207: *
208: * Termination:
209: *
210: * The definition of the routine allows it return at any point,
211: * since front is always at or before the line to print.
212: *
213: * In fact, it returns when the chosen "p" equals "back". This
214: * implies that there exists a string is least half as long as
215: * (back - front), which in turn implies that a linear search will
216: * be no more expensive than the cost of simply printing a string or two.
217: *
218: * Trying to continue with binary search at this point would be
219: * more trouble than it's worth.
220: */
221: #define SKIP_PAST_NEWLINE(p, back) \
222: while (p < back && *p++ != '\n');
223:
224: char *
225: binary_search(string, front, back)
226: register char *string, *front, *back;
227: {
228: register char *p;
229:
230: p = front + (back - front) / 2;
231: SKIP_PAST_NEWLINE(p, back);
232:
233: /*
234: * If the file changes underneath us, make sure we don't
235: * infinitely loop.
236: */
237: while (p < back && back > front) {
238: if (compare(string, p, back) == GREATER)
239: front = p;
240: else
241: back = p;
242: p = front + (back - front) / 2;
243: SKIP_PAST_NEWLINE(p, back);
244: }
245: return (front);
246: }
247:
248: /*
249: * Find the first line that starts with string, linearly searching from front
250: * to back.
251: *
252: * Return NULL for no such line.
253: *
254: * This routine assumes:
255: *
256: * o front points at the first character in a line.
257: * o front is before or at the first line to be printed.
258: */
259: char *
260: linear_search(string, front, back)
261: char *string, *front, *back;
262: {
263: while (front < back) {
264: switch (compare(string, front, back)) {
265: case EQUAL: /* Found it. */
266: return (front);
267: break;
268: case LESS: /* No such string. */
269: return (NULL);
270: break;
271: case GREATER: /* Keep going. */
272: break;
273: }
274: SKIP_PAST_NEWLINE(front, back);
275: }
276: return (NULL);
277: }
278:
279: /*
280: * Print as many lines as match string, starting at front.
281: */
282: void
283: print_from(string, front, back)
284: register char *string, *front, *back;
285: {
286: for (; front < back && compare(string, front, back) == EQUAL; ++front) {
287: for (; front < back && *front != '\n'; ++front)
288: if (putchar(*front) == EOF)
289: err(2, "stdout");
290: if (putchar('\n') == EOF)
291: err(2, "stdout");
292: }
293: }
294:
295: /*
296: * Return LESS, GREATER, or EQUAL depending on how the string1 compares with
297: * string2 (s1 ??? s2).
298: *
299: * o Matches up to len(s1) are EQUAL.
300: * o Matches up to len(s2) are GREATER.
301: *
302: * Compare understands about the -f and -d flags, and treats comparisons
303: * appropriately.
304: *
305: * The string "s1" is null terminated. The string s2 is '\n' terminated (or
306: * "back" terminated).
307: */
308: int
309: compare(s1, s2, back)
310: register char *s1, *s2, *back;
311: {
312: register int ch;
313:
314: for (; *s1 && s2 < back && *s2 != '\n'; ++s1, ++s2) {
315: ch = *s2;
316: if (fflag)
317: ch = FOLD(ch);
318: if (dflag)
319: ch = DICT(ch);
320:
321: if (ch == NO_COMPARE) {
322: ++s2; /* Ignore character in comparison. */
323: continue;
324: }
325: if (*s1 != ch)
326: return (*s1 < ch ? LESS : GREATER);
327: }
328: return (*s1 ? GREATER : EQUAL);
329: }
330:
331: void
332: usage()
333: {
334: (void)fprintf(stderr, "usage: look [-df] [-t char] string [file]\n");
335: exit(2);
336: }