Annotation of src/usr.bin/look/look.c, Revision 1.9
1.9 ! millert 1: /* $OpenBSD: look.c,v 1.8 2002/03/01 00:51:08 millert Exp $ */
1.1 deraadt 2: /* $NetBSD: look.c,v 1.7 1995/08/31 22:41:02 jtc Exp $ */
3:
4: /*-
5: * Copyright (c) 1991, 1993
6: * The Regents of the University of California. All rights reserved.
7: *
8: * This code is derived from software contributed to Berkeley by
9: * David Hitz of Auspex Systems, Inc.
10: *
11: * Redistribution and use in source and binary forms, with or without
12: * modification, are permitted provided that the following conditions
13: * are met:
14: * 1. Redistributions of source code must retain the above copyright
15: * notice, this list of conditions and the following disclaimer.
16: * 2. Redistributions in binary form must reproduce the above copyright
17: * notice, this list of conditions and the following disclaimer in the
18: * documentation and/or other materials provided with the distribution.
1.9 ! millert 19: * 3. Neither the name of the University nor the names of its contributors
1.1 deraadt 20: * may be used to endorse or promote products derived from this software
21: * without specific prior written permission.
22: *
23: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33: * SUCH DAMAGE.
34: */
35:
36: #ifndef lint
37: static char copyright[] =
38: "@(#) Copyright (c) 1991, 1993\n\
39: The Regents of the University of California. All rights reserved.\n";
40: #endif /* not lint */
41:
42: #ifndef lint
43: #if 0
44: static char sccsid[] = "@(#)look.c 8.2 (Berkeley) 5/4/95";
45: #endif
1.9 ! millert 46: static char rcsid[] = "$OpenBSD: look.c,v 1.8 2002/03/01 00:51:08 millert Exp $";
1.1 deraadt 47: #endif /* not lint */
48:
49: /*
50: * look -- find lines in a sorted list.
51: *
52: * The man page said that TABs and SPACEs participate in -d comparisons.
53: * In fact, they were ignored. This implements historic practice, not
54: * the manual page.
55: */
56:
57: #include <sys/types.h>
58: #include <sys/mman.h>
59: #include <sys/stat.h>
60:
61: #include <ctype.h>
62: #include <errno.h>
63: #include <fcntl.h>
64: #include <limits.h>
65: #include <stdio.h>
66: #include <stdlib.h>
67: #include <string.h>
68: #include <unistd.h>
69: #include <err.h>
70:
71: #include "pathnames.h"
72:
73: /*
74: * FOLD and DICT convert characters to a normal form for comparison,
75: * according to the user specified flags.
76: *
77: * DICT expects integers because it uses a non-character value to
78: * indicate a character which should not participate in comparisons.
79: */
80: #define EQUAL 0
81: #define GREATER 1
82: #define LESS (-1)
83: #define NO_COMPARE (-2)
84:
85: #define FOLD(c) (isascii(c) && isupper(c) ? tolower(c) : (c))
86: #define DICT(c) (isascii(c) && isalnum(c) ? (c) : NO_COMPARE)
87:
88: int dflag, fflag;
89:
1.7 millert 90: char *binary_search(char *, char *, char *);
91: int compare(char *, char *, char *);
92: char *linear_search(char *, char *, char *);
93: int look(char *, char *, char *);
94: void print_from(char *, char *, char *);
95: void usage(void);
1.1 deraadt 96:
97: int
98: main(argc, argv)
99: int argc;
100: char *argv[];
101: {
102: struct stat sb;
103: int ch, fd, termchar;
104: char *back, *file, *front, *string, *p;
105:
106: file = _PATH_WORDS;
107: termchar = '\0';
1.3 millert 108: while ((ch = getopt(argc, argv, "dft:")) != -1)
1.1 deraadt 109: switch(ch) {
110: case 'd':
111: dflag = 1;
112: break;
113: case 'f':
114: fflag = 1;
115: break;
116: case 't':
117: termchar = *optarg;
118: break;
119: case '?':
120: default:
121: usage();
122: }
123: argc -= optind;
124: argv += optind;
125:
126: switch (argc) {
127: case 2: /* Don't set -df for user. */
128: string = *argv++;
129: file = *argv;
130: break;
131: case 1: /* But set -df by default. */
132: dflag = fflag = 1;
133: string = *argv;
134: break;
135: default:
136: usage();
137: }
138:
139: if (termchar != '\0' && (p = strchr(string, termchar)) != NULL)
140: *++p = '\0';
141:
142: if ((fd = open(file, O_RDONLY, 0)) < 0 || fstat(fd, &sb))
143: err(2, "%s", file);
144: if (sb.st_size > SIZE_T_MAX)
1.8 millert 145: errx(2, "%s: %s", file, strerror(EFBIG));
1.1 deraadt 146: if ((front = mmap(NULL,
1.5 art 147: (size_t)sb.st_size, PROT_READ, MAP_PRIVATE, fd, (off_t)0)) == MAP_FAILED)
1.1 deraadt 148: err(2, "%s", file);
149: back = front + sb.st_size;
150: exit(look(string, front, back));
151: }
152:
153: int
154: look(string, front, back)
155: char *string, *front, *back;
156: {
1.6 mpech 157: int ch;
158: char *readp, *writep;
1.1 deraadt 159:
160: /* Reformat string string to avoid doing it multiple times later. */
161: for (readp = writep = string; ch = *readp++;) {
162: if (fflag)
163: ch = FOLD(ch);
164: if (dflag)
165: ch = DICT(ch);
166: if (ch != NO_COMPARE)
167: *(writep++) = ch;
168: }
169: *writep = '\0';
170:
171: front = binary_search(string, front, back);
172: front = linear_search(string, front, back);
173:
174: if (front)
175: print_from(string, front, back);
176: return (front ? 0 : 1);
177: }
178:
179:
180: /*
181: * Binary search for "string" in memory between "front" and "back".
182: *
183: * This routine is expected to return a pointer to the start of a line at
184: * *or before* the first word matching "string". Relaxing the constraint
185: * this way simplifies the algorithm.
186: *
187: * Invariants:
188: * front points to the beginning of a line at or before the first
189: * matching string.
190: *
191: * back points to the beginning of a line at or after the first
192: * matching line.
193: *
194: * Base of the Invariants.
195: * front = NULL;
196: * back = EOF;
197: *
198: * Advancing the Invariants:
199: *
200: * p = first newline after halfway point from front to back.
201: *
202: * If the string at "p" is not greater than the string to match,
203: * p is the new front. Otherwise it is the new back.
204: *
205: * Termination:
206: *
207: * The definition of the routine allows it return at any point,
208: * since front is always at or before the line to print.
209: *
210: * In fact, it returns when the chosen "p" equals "back". This
211: * implies that there exists a string is least half as long as
212: * (back - front), which in turn implies that a linear search will
213: * be no more expensive than the cost of simply printing a string or two.
214: *
215: * Trying to continue with binary search at this point would be
216: * more trouble than it's worth.
217: */
218: #define SKIP_PAST_NEWLINE(p, back) \
219: while (p < back && *p++ != '\n');
220:
221: char *
222: binary_search(string, front, back)
1.6 mpech 223: char *string, *front, *back;
1.1 deraadt 224: {
1.6 mpech 225: char *p;
1.1 deraadt 226:
227: p = front + (back - front) / 2;
228: SKIP_PAST_NEWLINE(p, back);
229:
230: /*
231: * If the file changes underneath us, make sure we don't
232: * infinitely loop.
233: */
234: while (p < back && back > front) {
235: if (compare(string, p, back) == GREATER)
236: front = p;
237: else
238: back = p;
239: p = front + (back - front) / 2;
240: SKIP_PAST_NEWLINE(p, back);
241: }
242: return (front);
243: }
244:
245: /*
246: * Find the first line that starts with string, linearly searching from front
247: * to back.
248: *
249: * Return NULL for no such line.
250: *
251: * This routine assumes:
252: *
253: * o front points at the first character in a line.
254: * o front is before or at the first line to be printed.
255: */
256: char *
257: linear_search(string, front, back)
258: char *string, *front, *back;
259: {
260: while (front < back) {
261: switch (compare(string, front, back)) {
262: case EQUAL: /* Found it. */
263: return (front);
264: break;
265: case LESS: /* No such string. */
266: return (NULL);
267: break;
268: case GREATER: /* Keep going. */
269: break;
270: }
271: SKIP_PAST_NEWLINE(front, back);
272: }
273: return (NULL);
274: }
275:
276: /*
277: * Print as many lines as match string, starting at front.
278: */
279: void
280: print_from(string, front, back)
1.6 mpech 281: char *string, *front, *back;
1.1 deraadt 282: {
283: for (; front < back && compare(string, front, back) == EQUAL; ++front) {
284: for (; front < back && *front != '\n'; ++front)
285: if (putchar(*front) == EOF)
286: err(2, "stdout");
287: if (putchar('\n') == EOF)
288: err(2, "stdout");
289: }
290: }
291:
292: /*
293: * Return LESS, GREATER, or EQUAL depending on how the string1 compares with
294: * string2 (s1 ??? s2).
295: *
296: * o Matches up to len(s1) are EQUAL.
297: * o Matches up to len(s2) are GREATER.
298: *
299: * Compare understands about the -f and -d flags, and treats comparisons
300: * appropriately.
301: *
302: * The string "s1" is null terminated. The string s2 is '\n' terminated (or
303: * "back" terminated).
304: */
305: int
306: compare(s1, s2, back)
1.6 mpech 307: char *s1, *s2, *back;
1.1 deraadt 308: {
1.6 mpech 309: int ch;
1.1 deraadt 310:
311: for (; *s1 && s2 < back && *s2 != '\n'; ++s1, ++s2) {
312: ch = *s2;
313: if (fflag)
314: ch = FOLD(ch);
315: if (dflag)
316: ch = DICT(ch);
317:
318: if (ch == NO_COMPARE) {
319: ++s2; /* Ignore character in comparison. */
320: continue;
321: }
322: if (*s1 != ch)
323: return (*s1 < ch ? LESS : GREATER);
324: }
325: return (*s1 ? GREATER : EQUAL);
326: }
327:
328: void
329: usage()
330: {
331: (void)fprintf(stderr, "usage: look [-df] [-t char] string [file]\n");
332: exit(2);
333: }