Annotation of src/usr.bin/look/look.c, Revision 1.8
1.8 ! millert 1: /* $OpenBSD: look.c,v 1.7 2002/02/16 21:27:48 millert Exp $ */
1.1 deraadt 2: /* $NetBSD: look.c,v 1.7 1995/08/31 22:41:02 jtc Exp $ */
3:
4: /*-
5: * Copyright (c) 1991, 1993
6: * The Regents of the University of California. All rights reserved.
7: *
8: * This code is derived from software contributed to Berkeley by
9: * David Hitz of Auspex Systems, Inc.
10: *
11: * Redistribution and use in source and binary forms, with or without
12: * modification, are permitted provided that the following conditions
13: * are met:
14: * 1. Redistributions of source code must retain the above copyright
15: * notice, this list of conditions and the following disclaimer.
16: * 2. Redistributions in binary form must reproduce the above copyright
17: * notice, this list of conditions and the following disclaimer in the
18: * documentation and/or other materials provided with the distribution.
19: * 3. All advertising materials mentioning features or use of this software
20: * must display the following acknowledgement:
21: * This product includes software developed by the University of
22: * California, Berkeley and its contributors.
23: * 4. Neither the name of the University nor the names of its contributors
24: * may be used to endorse or promote products derived from this software
25: * without specific prior written permission.
26: *
27: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37: * SUCH DAMAGE.
38: */
39:
40: #ifndef lint
41: static char copyright[] =
42: "@(#) Copyright (c) 1991, 1993\n\
43: The Regents of the University of California. All rights reserved.\n";
44: #endif /* not lint */
45:
46: #ifndef lint
47: #if 0
48: static char sccsid[] = "@(#)look.c 8.2 (Berkeley) 5/4/95";
49: #endif
1.8 ! millert 50: static char rcsid[] = "$OpenBSD: look.c,v 1.7 2002/02/16 21:27:48 millert Exp $";
1.1 deraadt 51: #endif /* not lint */
52:
53: /*
54: * look -- find lines in a sorted list.
55: *
56: * The man page said that TABs and SPACEs participate in -d comparisons.
57: * In fact, they were ignored. This implements historic practice, not
58: * the manual page.
59: */
60:
61: #include <sys/types.h>
62: #include <sys/mman.h>
63: #include <sys/stat.h>
64:
65: #include <ctype.h>
66: #include <errno.h>
67: #include <fcntl.h>
68: #include <limits.h>
69: #include <stdio.h>
70: #include <stdlib.h>
71: #include <string.h>
72: #include <unistd.h>
73: #include <err.h>
74:
75: #include "pathnames.h"
76:
77: /*
78: * FOLD and DICT convert characters to a normal form for comparison,
79: * according to the user specified flags.
80: *
81: * DICT expects integers because it uses a non-character value to
82: * indicate a character which should not participate in comparisons.
83: */
84: #define EQUAL 0
85: #define GREATER 1
86: #define LESS (-1)
87: #define NO_COMPARE (-2)
88:
89: #define FOLD(c) (isascii(c) && isupper(c) ? tolower(c) : (c))
90: #define DICT(c) (isascii(c) && isalnum(c) ? (c) : NO_COMPARE)
91:
92: int dflag, fflag;
93:
1.7 millert 94: char *binary_search(char *, char *, char *);
95: int compare(char *, char *, char *);
96: char *linear_search(char *, char *, char *);
97: int look(char *, char *, char *);
98: void print_from(char *, char *, char *);
99: void usage(void);
1.1 deraadt 100:
101: int
102: main(argc, argv)
103: int argc;
104: char *argv[];
105: {
106: struct stat sb;
107: int ch, fd, termchar;
108: char *back, *file, *front, *string, *p;
109:
110: file = _PATH_WORDS;
111: termchar = '\0';
1.3 millert 112: while ((ch = getopt(argc, argv, "dft:")) != -1)
1.1 deraadt 113: switch(ch) {
114: case 'd':
115: dflag = 1;
116: break;
117: case 'f':
118: fflag = 1;
119: break;
120: case 't':
121: termchar = *optarg;
122: break;
123: case '?':
124: default:
125: usage();
126: }
127: argc -= optind;
128: argv += optind;
129:
130: switch (argc) {
131: case 2: /* Don't set -df for user. */
132: string = *argv++;
133: file = *argv;
134: break;
135: case 1: /* But set -df by default. */
136: dflag = fflag = 1;
137: string = *argv;
138: break;
139: default:
140: usage();
141: }
142:
143: if (termchar != '\0' && (p = strchr(string, termchar)) != NULL)
144: *++p = '\0';
145:
146: if ((fd = open(file, O_RDONLY, 0)) < 0 || fstat(fd, &sb))
147: err(2, "%s", file);
148: if (sb.st_size > SIZE_T_MAX)
1.8 ! millert 149: errx(2, "%s: %s", file, strerror(EFBIG));
1.1 deraadt 150: if ((front = mmap(NULL,
1.5 art 151: (size_t)sb.st_size, PROT_READ, MAP_PRIVATE, fd, (off_t)0)) == MAP_FAILED)
1.1 deraadt 152: err(2, "%s", file);
153: back = front + sb.st_size;
154: exit(look(string, front, back));
155: }
156:
157: int
158: look(string, front, back)
159: char *string, *front, *back;
160: {
1.6 mpech 161: int ch;
162: char *readp, *writep;
1.1 deraadt 163:
164: /* Reformat string string to avoid doing it multiple times later. */
165: for (readp = writep = string; ch = *readp++;) {
166: if (fflag)
167: ch = FOLD(ch);
168: if (dflag)
169: ch = DICT(ch);
170: if (ch != NO_COMPARE)
171: *(writep++) = ch;
172: }
173: *writep = '\0';
174:
175: front = binary_search(string, front, back);
176: front = linear_search(string, front, back);
177:
178: if (front)
179: print_from(string, front, back);
180: return (front ? 0 : 1);
181: }
182:
183:
184: /*
185: * Binary search for "string" in memory between "front" and "back".
186: *
187: * This routine is expected to return a pointer to the start of a line at
188: * *or before* the first word matching "string". Relaxing the constraint
189: * this way simplifies the algorithm.
190: *
191: * Invariants:
192: * front points to the beginning of a line at or before the first
193: * matching string.
194: *
195: * back points to the beginning of a line at or after the first
196: * matching line.
197: *
198: * Base of the Invariants.
199: * front = NULL;
200: * back = EOF;
201: *
202: * Advancing the Invariants:
203: *
204: * p = first newline after halfway point from front to back.
205: *
206: * If the string at "p" is not greater than the string to match,
207: * p is the new front. Otherwise it is the new back.
208: *
209: * Termination:
210: *
211: * The definition of the routine allows it return at any point,
212: * since front is always at or before the line to print.
213: *
214: * In fact, it returns when the chosen "p" equals "back". This
215: * implies that there exists a string is least half as long as
216: * (back - front), which in turn implies that a linear search will
217: * be no more expensive than the cost of simply printing a string or two.
218: *
219: * Trying to continue with binary search at this point would be
220: * more trouble than it's worth.
221: */
222: #define SKIP_PAST_NEWLINE(p, back) \
223: while (p < back && *p++ != '\n');
224:
225: char *
226: binary_search(string, front, back)
1.6 mpech 227: char *string, *front, *back;
1.1 deraadt 228: {
1.6 mpech 229: char *p;
1.1 deraadt 230:
231: p = front + (back - front) / 2;
232: SKIP_PAST_NEWLINE(p, back);
233:
234: /*
235: * If the file changes underneath us, make sure we don't
236: * infinitely loop.
237: */
238: while (p < back && back > front) {
239: if (compare(string, p, back) == GREATER)
240: front = p;
241: else
242: back = p;
243: p = front + (back - front) / 2;
244: SKIP_PAST_NEWLINE(p, back);
245: }
246: return (front);
247: }
248:
249: /*
250: * Find the first line that starts with string, linearly searching from front
251: * to back.
252: *
253: * Return NULL for no such line.
254: *
255: * This routine assumes:
256: *
257: * o front points at the first character in a line.
258: * o front is before or at the first line to be printed.
259: */
260: char *
261: linear_search(string, front, back)
262: char *string, *front, *back;
263: {
264: while (front < back) {
265: switch (compare(string, front, back)) {
266: case EQUAL: /* Found it. */
267: return (front);
268: break;
269: case LESS: /* No such string. */
270: return (NULL);
271: break;
272: case GREATER: /* Keep going. */
273: break;
274: }
275: SKIP_PAST_NEWLINE(front, back);
276: }
277: return (NULL);
278: }
279:
280: /*
281: * Print as many lines as match string, starting at front.
282: */
283: void
284: print_from(string, front, back)
1.6 mpech 285: char *string, *front, *back;
1.1 deraadt 286: {
287: for (; front < back && compare(string, front, back) == EQUAL; ++front) {
288: for (; front < back && *front != '\n'; ++front)
289: if (putchar(*front) == EOF)
290: err(2, "stdout");
291: if (putchar('\n') == EOF)
292: err(2, "stdout");
293: }
294: }
295:
296: /*
297: * Return LESS, GREATER, or EQUAL depending on how the string1 compares with
298: * string2 (s1 ??? s2).
299: *
300: * o Matches up to len(s1) are EQUAL.
301: * o Matches up to len(s2) are GREATER.
302: *
303: * Compare understands about the -f and -d flags, and treats comparisons
304: * appropriately.
305: *
306: * The string "s1" is null terminated. The string s2 is '\n' terminated (or
307: * "back" terminated).
308: */
309: int
310: compare(s1, s2, back)
1.6 mpech 311: char *s1, *s2, *back;
1.1 deraadt 312: {
1.6 mpech 313: int ch;
1.1 deraadt 314:
315: for (; *s1 && s2 < back && *s2 != '\n'; ++s1, ++s2) {
316: ch = *s2;
317: if (fflag)
318: ch = FOLD(ch);
319: if (dflag)
320: ch = DICT(ch);
321:
322: if (ch == NO_COMPARE) {
323: ++s2; /* Ignore character in comparison. */
324: continue;
325: }
326: if (*s1 != ch)
327: return (*s1 < ch ? LESS : GREATER);
328: }
329: return (*s1 ? GREATER : EQUAL);
330: }
331:
332: void
333: usage()
334: {
335: (void)fprintf(stderr, "usage: look [-df] [-t char] string [file]\n");
336: exit(2);
337: }