Annotation of src/usr.bin/look/look.c, Revision 1.16
1.16 ! millert 1: /* $OpenBSD: look.c,v 1.15 2014/05/20 01:25:23 guenther Exp $ */
1.1 deraadt 2: /* $NetBSD: look.c,v 1.7 1995/08/31 22:41:02 jtc Exp $ */
3:
4: /*-
5: * Copyright (c) 1991, 1993
6: * The Regents of the University of California. All rights reserved.
7: *
8: * This code is derived from software contributed to Berkeley by
9: * David Hitz of Auspex Systems, Inc.
10: *
11: * Redistribution and use in source and binary forms, with or without
12: * modification, are permitted provided that the following conditions
13: * are met:
14: * 1. Redistributions of source code must retain the above copyright
15: * notice, this list of conditions and the following disclaimer.
16: * 2. Redistributions in binary form must reproduce the above copyright
17: * notice, this list of conditions and the following disclaimer in the
18: * documentation and/or other materials provided with the distribution.
1.9 millert 19: * 3. Neither the name of the University nor the names of its contributors
1.1 deraadt 20: * may be used to endorse or promote products derived from this software
21: * without specific prior written permission.
22: *
23: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33: * SUCH DAMAGE.
34: */
35:
36: /*
37: * look -- find lines in a sorted list.
38: *
39: * The man page said that TABs and SPACEs participate in -d comparisons.
40: * In fact, they were ignored. This implements historic practice, not
41: * the manual page.
42: */
43:
44: #include <sys/types.h>
45: #include <sys/mman.h>
46: #include <sys/stat.h>
47:
48: #include <ctype.h>
49: #include <errno.h>
50: #include <fcntl.h>
1.16 ! millert 51: #include <stdint.h>
1.1 deraadt 52: #include <stdio.h>
53: #include <stdlib.h>
54: #include <string.h>
55: #include <unistd.h>
56: #include <err.h>
57:
58: #include "pathnames.h"
59:
60: /*
61: * FOLD and DICT convert characters to a normal form for comparison,
62: * according to the user specified flags.
63: *
64: * DICT expects integers because it uses a non-character value to
65: * indicate a character which should not participate in comparisons.
66: */
67: #define EQUAL 0
68: #define GREATER 1
69: #define LESS (-1)
70: #define NO_COMPARE (-2)
71:
72: #define FOLD(c) (isascii(c) && isupper(c) ? tolower(c) : (c))
73: #define DICT(c) (isascii(c) && isalnum(c) ? (c) : NO_COMPARE)
74:
75: int dflag, fflag;
76:
1.7 millert 77: char *binary_search(char *, char *, char *);
78: int compare(char *, char *, char *);
79: char *linear_search(char *, char *, char *);
80: int look(char *, char *, char *);
81: void print_from(char *, char *, char *);
82: void usage(void);
1.1 deraadt 83:
84: int
1.10 deraadt 85: main(int argc, char *argv[])
1.1 deraadt 86: {
87: struct stat sb;
88: int ch, fd, termchar;
89: char *back, *file, *front, *string, *p;
90:
91: file = _PATH_WORDS;
92: termchar = '\0';
1.3 millert 93: while ((ch = getopt(argc, argv, "dft:")) != -1)
1.1 deraadt 94: switch(ch) {
95: case 'd':
96: dflag = 1;
97: break;
98: case 'f':
99: fflag = 1;
100: break;
101: case 't':
102: termchar = *optarg;
103: break;
104: case '?':
105: default:
106: usage();
107: }
108: argc -= optind;
109: argv += optind;
110:
111: switch (argc) {
112: case 2: /* Don't set -df for user. */
113: string = *argv++;
114: file = *argv;
115: break;
116: case 1: /* But set -df by default. */
117: dflag = fflag = 1;
118: string = *argv;
119: break;
120: default:
121: usage();
122: }
123:
124: if (termchar != '\0' && (p = strchr(string, termchar)) != NULL)
125: *++p = '\0';
126:
127: if ((fd = open(file, O_RDONLY, 0)) < 0 || fstat(fd, &sb))
128: err(2, "%s", file);
1.16 ! millert 129: if (sb.st_size > SIZE_MAX)
1.15 guenther 130: errc(2, EFBIG, "%s", file);
1.1 deraadt 131: if ((front = mmap(NULL,
1.5 art 132: (size_t)sb.st_size, PROT_READ, MAP_PRIVATE, fd, (off_t)0)) == MAP_FAILED)
1.1 deraadt 133: err(2, "%s", file);
134: back = front + sb.st_size;
135: exit(look(string, front, back));
136: }
137:
138: int
1.10 deraadt 139: look(char *string, char *front, char *back)
1.1 deraadt 140: {
1.6 mpech 141: int ch;
142: char *readp, *writep;
1.1 deraadt 143:
1.11 niallo 144: /* Reformat string to avoid doing it multiple times later. */
1.1 deraadt 145: for (readp = writep = string; ch = *readp++;) {
146: if (fflag)
1.14 deraadt 147: ch = FOLD((unsigned char)ch);
1.1 deraadt 148: if (dflag)
1.14 deraadt 149: ch = DICT((unsigned char)ch);
1.1 deraadt 150: if (ch != NO_COMPARE)
151: *(writep++) = ch;
152: }
153: *writep = '\0';
154:
155: front = binary_search(string, front, back);
156: front = linear_search(string, front, back);
157:
158: if (front)
159: print_from(string, front, back);
160: return (front ? 0 : 1);
161: }
162:
163:
164: /*
165: * Binary search for "string" in memory between "front" and "back".
166: *
167: * This routine is expected to return a pointer to the start of a line at
168: * *or before* the first word matching "string". Relaxing the constraint
169: * this way simplifies the algorithm.
170: *
171: * Invariants:
172: * front points to the beginning of a line at or before the first
173: * matching string.
174: *
175: * back points to the beginning of a line at or after the first
176: * matching line.
177: *
178: * Base of the Invariants.
179: * front = NULL;
180: * back = EOF;
181: *
182: * Advancing the Invariants:
183: *
184: * p = first newline after halfway point from front to back.
185: *
186: * If the string at "p" is not greater than the string to match,
187: * p is the new front. Otherwise it is the new back.
188: *
189: * Termination:
190: *
191: * The definition of the routine allows it return at any point,
192: * since front is always at or before the line to print.
193: *
194: * In fact, it returns when the chosen "p" equals "back". This
195: * implies that there exists a string is least half as long as
196: * (back - front), which in turn implies that a linear search will
197: * be no more expensive than the cost of simply printing a string or two.
198: *
199: * Trying to continue with binary search at this point would be
200: * more trouble than it's worth.
201: */
202: #define SKIP_PAST_NEWLINE(p, back) \
203: while (p < back && *p++ != '\n');
204:
205: char *
1.10 deraadt 206: binary_search(char *string, char *front, char *back)
1.1 deraadt 207: {
1.6 mpech 208: char *p;
1.1 deraadt 209:
210: p = front + (back - front) / 2;
211: SKIP_PAST_NEWLINE(p, back);
212:
213: /*
214: * If the file changes underneath us, make sure we don't
215: * infinitely loop.
216: */
217: while (p < back && back > front) {
218: if (compare(string, p, back) == GREATER)
219: front = p;
220: else
221: back = p;
222: p = front + (back - front) / 2;
223: SKIP_PAST_NEWLINE(p, back);
224: }
225: return (front);
226: }
227:
228: /*
229: * Find the first line that starts with string, linearly searching from front
230: * to back.
231: *
232: * Return NULL for no such line.
233: *
234: * This routine assumes:
235: *
236: * o front points at the first character in a line.
237: * o front is before or at the first line to be printed.
238: */
239: char *
1.10 deraadt 240: linear_search(char *string, char *front, char *back)
1.1 deraadt 241: {
242: while (front < back) {
243: switch (compare(string, front, back)) {
244: case EQUAL: /* Found it. */
245: return (front);
246: break;
247: case LESS: /* No such string. */
248: return (NULL);
249: break;
250: case GREATER: /* Keep going. */
251: break;
252: }
253: SKIP_PAST_NEWLINE(front, back);
254: }
255: return (NULL);
256: }
257:
258: /*
259: * Print as many lines as match string, starting at front.
260: */
261: void
1.10 deraadt 262: print_from(char *string, char *front, char *back)
1.1 deraadt 263: {
264: for (; front < back && compare(string, front, back) == EQUAL; ++front) {
265: for (; front < back && *front != '\n'; ++front)
266: if (putchar(*front) == EOF)
267: err(2, "stdout");
268: if (putchar('\n') == EOF)
269: err(2, "stdout");
270: }
271: }
272:
273: /*
274: * Return LESS, GREATER, or EQUAL depending on how the string1 compares with
275: * string2 (s1 ??? s2).
276: *
277: * o Matches up to len(s1) are EQUAL.
278: * o Matches up to len(s2) are GREATER.
279: *
280: * Compare understands about the -f and -d flags, and treats comparisons
281: * appropriately.
282: *
283: * The string "s1" is null terminated. The string s2 is '\n' terminated (or
284: * "back" terminated).
285: */
286: int
1.10 deraadt 287: compare(char *s1, char *s2, char *back)
1.1 deraadt 288: {
1.6 mpech 289: int ch;
1.1 deraadt 290:
291: for (; *s1 && s2 < back && *s2 != '\n'; ++s1, ++s2) {
292: ch = *s2;
293: if (fflag)
1.14 deraadt 294: ch = FOLD((unsigned char)ch);
1.1 deraadt 295: if (dflag)
1.14 deraadt 296: ch = DICT((unsigned char)ch);
1.1 deraadt 297:
298: if (ch == NO_COMPARE) {
299: ++s2; /* Ignore character in comparison. */
300: continue;
301: }
302: if (*s1 != ch)
303: return (*s1 < ch ? LESS : GREATER);
304: }
305: return (*s1 ? GREATER : EQUAL);
306: }
307:
308: void
1.10 deraadt 309: usage(void)
1.1 deraadt 310: {
1.12 sobrado 311: (void)fprintf(stderr,
312: "usage: look [-df] [-t termchar] string [file]\n");
1.1 deraadt 313: exit(2);
314: }