Annotation of src/usr.bin/look/look.c, Revision 1.17
1.17 ! deraadt 1: /* $OpenBSD: look.c,v 1.16 2015/02/06 23:21:59 millert Exp $ */
1.1 deraadt 2: /* $NetBSD: look.c,v 1.7 1995/08/31 22:41:02 jtc Exp $ */
3:
4: /*-
5: * Copyright (c) 1991, 1993
6: * The Regents of the University of California. All rights reserved.
7: *
8: * This code is derived from software contributed to Berkeley by
9: * David Hitz of Auspex Systems, Inc.
10: *
11: * Redistribution and use in source and binary forms, with or without
12: * modification, are permitted provided that the following conditions
13: * are met:
14: * 1. Redistributions of source code must retain the above copyright
15: * notice, this list of conditions and the following disclaimer.
16: * 2. Redistributions in binary form must reproduce the above copyright
17: * notice, this list of conditions and the following disclaimer in the
18: * documentation and/or other materials provided with the distribution.
1.9 millert 19: * 3. Neither the name of the University nor the names of its contributors
1.1 deraadt 20: * may be used to endorse or promote products derived from this software
21: * without specific prior written permission.
22: *
23: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33: * SUCH DAMAGE.
34: */
35:
36: /*
37: * look -- find lines in a sorted list.
38: *
39: * The man page said that TABs and SPACEs participate in -d comparisons.
40: * In fact, they were ignored. This implements historic practice, not
41: * the manual page.
42: */
43:
44: #include <sys/types.h>
45: #include <sys/mman.h>
46: #include <sys/stat.h>
47:
48: #include <ctype.h>
49: #include <errno.h>
50: #include <fcntl.h>
1.16 millert 51: #include <stdint.h>
1.1 deraadt 52: #include <stdio.h>
53: #include <stdlib.h>
54: #include <string.h>
55: #include <unistd.h>
56: #include <err.h>
57:
58: #include "pathnames.h"
59:
60: /*
61: * FOLD and DICT convert characters to a normal form for comparison,
62: * according to the user specified flags.
63: *
64: * DICT expects integers because it uses a non-character value to
65: * indicate a character which should not participate in comparisons.
66: */
67: #define EQUAL 0
68: #define GREATER 1
69: #define LESS (-1)
70: #define NO_COMPARE (-2)
71:
72: #define FOLD(c) (isascii(c) && isupper(c) ? tolower(c) : (c))
73: #define DICT(c) (isascii(c) && isalnum(c) ? (c) : NO_COMPARE)
74:
75: int dflag, fflag;
76:
1.7 millert 77: char *binary_search(char *, char *, char *);
78: int compare(char *, char *, char *);
79: char *linear_search(char *, char *, char *);
80: int look(char *, char *, char *);
81: void print_from(char *, char *, char *);
82: void usage(void);
1.1 deraadt 83:
84: int
1.10 deraadt 85: main(int argc, char *argv[])
1.1 deraadt 86: {
87: struct stat sb;
88: int ch, fd, termchar;
89: char *back, *file, *front, *string, *p;
1.17 ! deraadt 90:
! 91: if (tame("stdio rpath", NULL) == -1)
! 92: err(1, "tame");
1.1 deraadt 93:
94: file = _PATH_WORDS;
95: termchar = '\0';
1.3 millert 96: while ((ch = getopt(argc, argv, "dft:")) != -1)
1.1 deraadt 97: switch(ch) {
98: case 'd':
99: dflag = 1;
100: break;
101: case 'f':
102: fflag = 1;
103: break;
104: case 't':
105: termchar = *optarg;
106: break;
107: case '?':
108: default:
109: usage();
110: }
111: argc -= optind;
112: argv += optind;
113:
114: switch (argc) {
115: case 2: /* Don't set -df for user. */
116: string = *argv++;
117: file = *argv;
118: break;
119: case 1: /* But set -df by default. */
120: dflag = fflag = 1;
121: string = *argv;
122: break;
123: default:
124: usage();
125: }
126:
127: if (termchar != '\0' && (p = strchr(string, termchar)) != NULL)
128: *++p = '\0';
129:
130: if ((fd = open(file, O_RDONLY, 0)) < 0 || fstat(fd, &sb))
131: err(2, "%s", file);
1.16 millert 132: if (sb.st_size > SIZE_MAX)
1.15 guenther 133: errc(2, EFBIG, "%s", file);
1.1 deraadt 134: if ((front = mmap(NULL,
1.5 art 135: (size_t)sb.st_size, PROT_READ, MAP_PRIVATE, fd, (off_t)0)) == MAP_FAILED)
1.1 deraadt 136: err(2, "%s", file);
137: back = front + sb.st_size;
138: exit(look(string, front, back));
139: }
140:
141: int
1.10 deraadt 142: look(char *string, char *front, char *back)
1.1 deraadt 143: {
1.6 mpech 144: int ch;
145: char *readp, *writep;
1.1 deraadt 146:
1.11 niallo 147: /* Reformat string to avoid doing it multiple times later. */
1.1 deraadt 148: for (readp = writep = string; ch = *readp++;) {
149: if (fflag)
1.14 deraadt 150: ch = FOLD((unsigned char)ch);
1.1 deraadt 151: if (dflag)
1.14 deraadt 152: ch = DICT((unsigned char)ch);
1.1 deraadt 153: if (ch != NO_COMPARE)
154: *(writep++) = ch;
155: }
156: *writep = '\0';
157:
158: front = binary_search(string, front, back);
159: front = linear_search(string, front, back);
160:
161: if (front)
162: print_from(string, front, back);
163: return (front ? 0 : 1);
164: }
165:
166:
167: /*
168: * Binary search for "string" in memory between "front" and "back".
169: *
170: * This routine is expected to return a pointer to the start of a line at
171: * *or before* the first word matching "string". Relaxing the constraint
172: * this way simplifies the algorithm.
173: *
174: * Invariants:
175: * front points to the beginning of a line at or before the first
176: * matching string.
177: *
178: * back points to the beginning of a line at or after the first
179: * matching line.
180: *
181: * Base of the Invariants.
182: * front = NULL;
183: * back = EOF;
184: *
185: * Advancing the Invariants:
186: *
187: * p = first newline after halfway point from front to back.
188: *
189: * If the string at "p" is not greater than the string to match,
190: * p is the new front. Otherwise it is the new back.
191: *
192: * Termination:
193: *
194: * The definition of the routine allows it return at any point,
195: * since front is always at or before the line to print.
196: *
197: * In fact, it returns when the chosen "p" equals "back". This
198: * implies that there exists a string is least half as long as
199: * (back - front), which in turn implies that a linear search will
200: * be no more expensive than the cost of simply printing a string or two.
201: *
202: * Trying to continue with binary search at this point would be
203: * more trouble than it's worth.
204: */
205: #define SKIP_PAST_NEWLINE(p, back) \
206: while (p < back && *p++ != '\n');
207:
208: char *
1.10 deraadt 209: binary_search(char *string, char *front, char *back)
1.1 deraadt 210: {
1.6 mpech 211: char *p;
1.1 deraadt 212:
213: p = front + (back - front) / 2;
214: SKIP_PAST_NEWLINE(p, back);
215:
216: /*
217: * If the file changes underneath us, make sure we don't
218: * infinitely loop.
219: */
220: while (p < back && back > front) {
221: if (compare(string, p, back) == GREATER)
222: front = p;
223: else
224: back = p;
225: p = front + (back - front) / 2;
226: SKIP_PAST_NEWLINE(p, back);
227: }
228: return (front);
229: }
230:
231: /*
232: * Find the first line that starts with string, linearly searching from front
233: * to back.
234: *
235: * Return NULL for no such line.
236: *
237: * This routine assumes:
238: *
239: * o front points at the first character in a line.
240: * o front is before or at the first line to be printed.
241: */
242: char *
1.10 deraadt 243: linear_search(char *string, char *front, char *back)
1.1 deraadt 244: {
245: while (front < back) {
246: switch (compare(string, front, back)) {
247: case EQUAL: /* Found it. */
248: return (front);
249: break;
250: case LESS: /* No such string. */
251: return (NULL);
252: break;
253: case GREATER: /* Keep going. */
254: break;
255: }
256: SKIP_PAST_NEWLINE(front, back);
257: }
258: return (NULL);
259: }
260:
261: /*
262: * Print as many lines as match string, starting at front.
263: */
264: void
1.10 deraadt 265: print_from(char *string, char *front, char *back)
1.1 deraadt 266: {
267: for (; front < back && compare(string, front, back) == EQUAL; ++front) {
268: for (; front < back && *front != '\n'; ++front)
269: if (putchar(*front) == EOF)
270: err(2, "stdout");
271: if (putchar('\n') == EOF)
272: err(2, "stdout");
273: }
274: }
275:
276: /*
277: * Return LESS, GREATER, or EQUAL depending on how the string1 compares with
278: * string2 (s1 ??? s2).
279: *
280: * o Matches up to len(s1) are EQUAL.
281: * o Matches up to len(s2) are GREATER.
282: *
283: * Compare understands about the -f and -d flags, and treats comparisons
284: * appropriately.
285: *
286: * The string "s1" is null terminated. The string s2 is '\n' terminated (or
287: * "back" terminated).
288: */
289: int
1.10 deraadt 290: compare(char *s1, char *s2, char *back)
1.1 deraadt 291: {
1.6 mpech 292: int ch;
1.1 deraadt 293:
294: for (; *s1 && s2 < back && *s2 != '\n'; ++s1, ++s2) {
295: ch = *s2;
296: if (fflag)
1.14 deraadt 297: ch = FOLD((unsigned char)ch);
1.1 deraadt 298: if (dflag)
1.14 deraadt 299: ch = DICT((unsigned char)ch);
1.1 deraadt 300:
301: if (ch == NO_COMPARE) {
302: ++s2; /* Ignore character in comparison. */
303: continue;
304: }
305: if (*s1 != ch)
306: return (*s1 < ch ? LESS : GREATER);
307: }
308: return (*s1 ? GREATER : EQUAL);
309: }
310:
311: void
1.10 deraadt 312: usage(void)
1.1 deraadt 313: {
1.12 sobrado 314: (void)fprintf(stderr,
315: "usage: look [-df] [-t termchar] string [file]\n");
1.1 deraadt 316: exit(2);
317: }