Annotation of src/usr.bin/look/look.c, Revision 1.12
1.12 ! sobrado 1: /* $OpenBSD: look.c,v 1.11 2005/06/25 17:00:35 niallo Exp $ */
1.1 deraadt 2: /* $NetBSD: look.c,v 1.7 1995/08/31 22:41:02 jtc Exp $ */
3:
4: /*-
5: * Copyright (c) 1991, 1993
6: * The Regents of the University of California. All rights reserved.
7: *
8: * This code is derived from software contributed to Berkeley by
9: * David Hitz of Auspex Systems, Inc.
10: *
11: * Redistribution and use in source and binary forms, with or without
12: * modification, are permitted provided that the following conditions
13: * are met:
14: * 1. Redistributions of source code must retain the above copyright
15: * notice, this list of conditions and the following disclaimer.
16: * 2. Redistributions in binary form must reproduce the above copyright
17: * notice, this list of conditions and the following disclaimer in the
18: * documentation and/or other materials provided with the distribution.
1.9 millert 19: * 3. Neither the name of the University nor the names of its contributors
1.1 deraadt 20: * may be used to endorse or promote products derived from this software
21: * without specific prior written permission.
22: *
23: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33: * SUCH DAMAGE.
34: */
35:
36: #ifndef lint
37: static char copyright[] =
38: "@(#) Copyright (c) 1991, 1993\n\
39: The Regents of the University of California. All rights reserved.\n";
40: #endif /* not lint */
41:
42: #ifndef lint
43: #if 0
44: static char sccsid[] = "@(#)look.c 8.2 (Berkeley) 5/4/95";
45: #endif
1.12 ! sobrado 46: static char rcsid[] = "$OpenBSD: look.c,v 1.11 2005/06/25 17:00:35 niallo Exp $";
1.1 deraadt 47: #endif /* not lint */
48:
49: /*
50: * look -- find lines in a sorted list.
51: *
52: * The man page said that TABs and SPACEs participate in -d comparisons.
53: * In fact, they were ignored. This implements historic practice, not
54: * the manual page.
55: */
56:
57: #include <sys/types.h>
58: #include <sys/mman.h>
59: #include <sys/stat.h>
60:
61: #include <ctype.h>
62: #include <errno.h>
63: #include <fcntl.h>
64: #include <limits.h>
65: #include <stdio.h>
66: #include <stdlib.h>
67: #include <string.h>
68: #include <unistd.h>
69: #include <err.h>
70:
71: #include "pathnames.h"
72:
73: /*
74: * FOLD and DICT convert characters to a normal form for comparison,
75: * according to the user specified flags.
76: *
77: * DICT expects integers because it uses a non-character value to
78: * indicate a character which should not participate in comparisons.
79: */
80: #define EQUAL 0
81: #define GREATER 1
82: #define LESS (-1)
83: #define NO_COMPARE (-2)
84:
85: #define FOLD(c) (isascii(c) && isupper(c) ? tolower(c) : (c))
86: #define DICT(c) (isascii(c) && isalnum(c) ? (c) : NO_COMPARE)
87:
88: int dflag, fflag;
89:
1.7 millert 90: char *binary_search(char *, char *, char *);
91: int compare(char *, char *, char *);
92: char *linear_search(char *, char *, char *);
93: int look(char *, char *, char *);
94: void print_from(char *, char *, char *);
95: void usage(void);
1.1 deraadt 96:
97: int
1.10 deraadt 98: main(int argc, char *argv[])
1.1 deraadt 99: {
100: struct stat sb;
101: int ch, fd, termchar;
102: char *back, *file, *front, *string, *p;
103:
104: file = _PATH_WORDS;
105: termchar = '\0';
1.3 millert 106: while ((ch = getopt(argc, argv, "dft:")) != -1)
1.1 deraadt 107: switch(ch) {
108: case 'd':
109: dflag = 1;
110: break;
111: case 'f':
112: fflag = 1;
113: break;
114: case 't':
115: termchar = *optarg;
116: break;
117: case '?':
118: default:
119: usage();
120: }
121: argc -= optind;
122: argv += optind;
123:
124: switch (argc) {
125: case 2: /* Don't set -df for user. */
126: string = *argv++;
127: file = *argv;
128: break;
129: case 1: /* But set -df by default. */
130: dflag = fflag = 1;
131: string = *argv;
132: break;
133: default:
134: usage();
135: }
136:
137: if (termchar != '\0' && (p = strchr(string, termchar)) != NULL)
138: *++p = '\0';
139:
140: if ((fd = open(file, O_RDONLY, 0)) < 0 || fstat(fd, &sb))
141: err(2, "%s", file);
142: if (sb.st_size > SIZE_T_MAX)
1.8 millert 143: errx(2, "%s: %s", file, strerror(EFBIG));
1.1 deraadt 144: if ((front = mmap(NULL,
1.5 art 145: (size_t)sb.st_size, PROT_READ, MAP_PRIVATE, fd, (off_t)0)) == MAP_FAILED)
1.1 deraadt 146: err(2, "%s", file);
147: back = front + sb.st_size;
148: exit(look(string, front, back));
149: }
150:
151: int
1.10 deraadt 152: look(char *string, char *front, char *back)
1.1 deraadt 153: {
1.6 mpech 154: int ch;
155: char *readp, *writep;
1.1 deraadt 156:
1.11 niallo 157: /* Reformat string to avoid doing it multiple times later. */
1.1 deraadt 158: for (readp = writep = string; ch = *readp++;) {
159: if (fflag)
160: ch = FOLD(ch);
161: if (dflag)
162: ch = DICT(ch);
163: if (ch != NO_COMPARE)
164: *(writep++) = ch;
165: }
166: *writep = '\0';
167:
168: front = binary_search(string, front, back);
169: front = linear_search(string, front, back);
170:
171: if (front)
172: print_from(string, front, back);
173: return (front ? 0 : 1);
174: }
175:
176:
177: /*
178: * Binary search for "string" in memory between "front" and "back".
179: *
180: * This routine is expected to return a pointer to the start of a line at
181: * *or before* the first word matching "string". Relaxing the constraint
182: * this way simplifies the algorithm.
183: *
184: * Invariants:
185: * front points to the beginning of a line at or before the first
186: * matching string.
187: *
188: * back points to the beginning of a line at or after the first
189: * matching line.
190: *
191: * Base of the Invariants.
192: * front = NULL;
193: * back = EOF;
194: *
195: * Advancing the Invariants:
196: *
197: * p = first newline after halfway point from front to back.
198: *
199: * If the string at "p" is not greater than the string to match,
200: * p is the new front. Otherwise it is the new back.
201: *
202: * Termination:
203: *
204: * The definition of the routine allows it return at any point,
205: * since front is always at or before the line to print.
206: *
207: * In fact, it returns when the chosen "p" equals "back". This
208: * implies that there exists a string is least half as long as
209: * (back - front), which in turn implies that a linear search will
210: * be no more expensive than the cost of simply printing a string or two.
211: *
212: * Trying to continue with binary search at this point would be
213: * more trouble than it's worth.
214: */
215: #define SKIP_PAST_NEWLINE(p, back) \
216: while (p < back && *p++ != '\n');
217:
218: char *
1.10 deraadt 219: binary_search(char *string, char *front, char *back)
1.1 deraadt 220: {
1.6 mpech 221: char *p;
1.1 deraadt 222:
223: p = front + (back - front) / 2;
224: SKIP_PAST_NEWLINE(p, back);
225:
226: /*
227: * If the file changes underneath us, make sure we don't
228: * infinitely loop.
229: */
230: while (p < back && back > front) {
231: if (compare(string, p, back) == GREATER)
232: front = p;
233: else
234: back = p;
235: p = front + (back - front) / 2;
236: SKIP_PAST_NEWLINE(p, back);
237: }
238: return (front);
239: }
240:
241: /*
242: * Find the first line that starts with string, linearly searching from front
243: * to back.
244: *
245: * Return NULL for no such line.
246: *
247: * This routine assumes:
248: *
249: * o front points at the first character in a line.
250: * o front is before or at the first line to be printed.
251: */
252: char *
1.10 deraadt 253: linear_search(char *string, char *front, char *back)
1.1 deraadt 254: {
255: while (front < back) {
256: switch (compare(string, front, back)) {
257: case EQUAL: /* Found it. */
258: return (front);
259: break;
260: case LESS: /* No such string. */
261: return (NULL);
262: break;
263: case GREATER: /* Keep going. */
264: break;
265: }
266: SKIP_PAST_NEWLINE(front, back);
267: }
268: return (NULL);
269: }
270:
271: /*
272: * Print as many lines as match string, starting at front.
273: */
274: void
1.10 deraadt 275: print_from(char *string, char *front, char *back)
1.1 deraadt 276: {
277: for (; front < back && compare(string, front, back) == EQUAL; ++front) {
278: for (; front < back && *front != '\n'; ++front)
279: if (putchar(*front) == EOF)
280: err(2, "stdout");
281: if (putchar('\n') == EOF)
282: err(2, "stdout");
283: }
284: }
285:
286: /*
287: * Return LESS, GREATER, or EQUAL depending on how the string1 compares with
288: * string2 (s1 ??? s2).
289: *
290: * o Matches up to len(s1) are EQUAL.
291: * o Matches up to len(s2) are GREATER.
292: *
293: * Compare understands about the -f and -d flags, and treats comparisons
294: * appropriately.
295: *
296: * The string "s1" is null terminated. The string s2 is '\n' terminated (or
297: * "back" terminated).
298: */
299: int
1.10 deraadt 300: compare(char *s1, char *s2, char *back)
1.1 deraadt 301: {
1.6 mpech 302: int ch;
1.1 deraadt 303:
304: for (; *s1 && s2 < back && *s2 != '\n'; ++s1, ++s2) {
305: ch = *s2;
306: if (fflag)
307: ch = FOLD(ch);
308: if (dflag)
309: ch = DICT(ch);
310:
311: if (ch == NO_COMPARE) {
312: ++s2; /* Ignore character in comparison. */
313: continue;
314: }
315: if (*s1 != ch)
316: return (*s1 < ch ? LESS : GREATER);
317: }
318: return (*s1 ? GREATER : EQUAL);
319: }
320:
321: void
1.10 deraadt 322: usage(void)
1.1 deraadt 323: {
1.12 ! sobrado 324: (void)fprintf(stderr,
! 325: "usage: look [-df] [-t termchar] string [file]\n");
1.1 deraadt 326: exit(2);
327: }