Annotation of src/usr.bin/look/look.c, Revision 1.24
1.24 ! beck 1: /* $OpenBSD: look.c,v 1.23 2019/06/28 13:35:01 deraadt Exp $ */
1.1 deraadt 2: /* $NetBSD: look.c,v 1.7 1995/08/31 22:41:02 jtc Exp $ */
3:
4: /*-
5: * Copyright (c) 1991, 1993
6: * The Regents of the University of California. All rights reserved.
7: *
8: * This code is derived from software contributed to Berkeley by
9: * David Hitz of Auspex Systems, Inc.
10: *
11: * Redistribution and use in source and binary forms, with or without
12: * modification, are permitted provided that the following conditions
13: * are met:
14: * 1. Redistributions of source code must retain the above copyright
15: * notice, this list of conditions and the following disclaimer.
16: * 2. Redistributions in binary form must reproduce the above copyright
17: * notice, this list of conditions and the following disclaimer in the
18: * documentation and/or other materials provided with the distribution.
1.9 millert 19: * 3. Neither the name of the University nor the names of its contributors
1.1 deraadt 20: * may be used to endorse or promote products derived from this software
21: * without specific prior written permission.
22: *
23: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33: * SUCH DAMAGE.
34: */
35:
36: /*
37: * look -- find lines in a sorted list.
1.21 krw 38: *
1.1 deraadt 39: * The man page said that TABs and SPACEs participate in -d comparisons.
40: * In fact, they were ignored. This implements historic practice, not
41: * the manual page.
42: */
43:
44: #include <sys/types.h>
45: #include <sys/mman.h>
46: #include <sys/stat.h>
47:
48: #include <ctype.h>
49: #include <errno.h>
50: #include <fcntl.h>
1.16 millert 51: #include <stdint.h>
1.1 deraadt 52: #include <stdio.h>
53: #include <stdlib.h>
54: #include <string.h>
55: #include <unistd.h>
56: #include <err.h>
57:
58: #include "pathnames.h"
59:
60: #define EQUAL 0
61: #define GREATER 1
62: #define LESS (-1)
63:
64: int dflag, fflag;
65:
1.7 millert 66: char *binary_search(char *, char *, char *);
67: int compare(char *, char *, char *);
68: char *linear_search(char *, char *, char *);
69: int look(char *, char *, char *);
70: void print_from(char *, char *, char *);
71: void usage(void);
1.1 deraadt 72:
73: int
1.10 deraadt 74: main(int argc, char *argv[])
1.1 deraadt 75: {
76: struct stat sb;
77: int ch, fd, termchar;
78: char *back, *file, *front, *string, *p;
1.17 deraadt 79:
1.1 deraadt 80: file = _PATH_WORDS;
81: termchar = '\0';
1.3 millert 82: while ((ch = getopt(argc, argv, "dft:")) != -1)
1.1 deraadt 83: switch(ch) {
84: case 'd':
85: dflag = 1;
86: break;
87: case 'f':
88: fflag = 1;
89: break;
90: case 't':
91: termchar = *optarg;
92: break;
93: case '?':
94: default:
95: usage();
96: }
97: argc -= optind;
98: argv += optind;
99:
100: switch (argc) {
101: case 2: /* Don't set -df for user. */
102: string = *argv++;
103: file = *argv;
104: break;
105: case 1: /* But set -df by default. */
106: dflag = fflag = 1;
107: string = *argv;
108: break;
109: default:
110: usage();
111: }
1.22 mestre 112:
113: if (unveil(file, "r") == -1)
1.24 ! beck 114: err(2, "unveil %s", file);
1.22 mestre 115: if (pledge("stdio rpath", NULL) == -1)
116: err(2, "pledge");
1.1 deraadt 117:
118: if (termchar != '\0' && (p = strchr(string, termchar)) != NULL)
119: *++p = '\0';
120:
1.23 deraadt 121: if ((fd = open(file, O_RDONLY, 0)) == -1 || fstat(fd, &sb) == -1)
1.1 deraadt 122: err(2, "%s", file);
1.16 millert 123: if (sb.st_size > SIZE_MAX)
1.15 guenther 124: errc(2, EFBIG, "%s", file);
1.1 deraadt 125: if ((front = mmap(NULL,
1.5 art 126: (size_t)sb.st_size, PROT_READ, MAP_PRIVATE, fd, (off_t)0)) == MAP_FAILED)
1.1 deraadt 127: err(2, "%s", file);
128: back = front + sb.st_size;
129: exit(look(string, front, back));
130: }
131:
132: int
1.10 deraadt 133: look(char *string, char *front, char *back)
1.1 deraadt 134: {
1.6 mpech 135: int ch;
136: char *readp, *writep;
1.1 deraadt 137:
1.11 niallo 138: /* Reformat string to avoid doing it multiple times later. */
1.20 krw 139: for (readp = writep = string; (ch = *readp++);) {
1.1 deraadt 140: if (fflag)
1.19 millert 141: ch = tolower((unsigned char)ch);
142: if (!dflag || isalnum((unsigned char)ch))
1.1 deraadt 143: *(writep++) = ch;
144: }
145: *writep = '\0';
146:
147: front = binary_search(string, front, back);
148: front = linear_search(string, front, back);
149:
150: if (front)
151: print_from(string, front, back);
152: return (front ? 0 : 1);
153: }
154:
155:
156: /*
157: * Binary search for "string" in memory between "front" and "back".
1.21 krw 158: *
1.1 deraadt 159: * This routine is expected to return a pointer to the start of a line at
160: * *or before* the first word matching "string". Relaxing the constraint
161: * this way simplifies the algorithm.
1.21 krw 162: *
1.1 deraadt 163: * Invariants:
1.21 krw 164: * front points to the beginning of a line at or before the first
1.1 deraadt 165: * matching string.
1.21 krw 166: *
167: * back points to the beginning of a line at or after the first
1.1 deraadt 168: * matching line.
1.21 krw 169: *
1.1 deraadt 170: * Base of the Invariants.
1.21 krw 171: * front = NULL;
1.1 deraadt 172: * back = EOF;
1.21 krw 173: *
1.1 deraadt 174: * Advancing the Invariants:
1.21 krw 175: *
176: * p = first newline after halfway point from front to back.
177: *
178: * If the string at "p" is not greater than the string to match,
1.1 deraadt 179: * p is the new front. Otherwise it is the new back.
1.21 krw 180: *
1.1 deraadt 181: * Termination:
1.21 krw 182: *
183: * The definition of the routine allows it return at any point,
1.1 deraadt 184: * since front is always at or before the line to print.
1.21 krw 185: *
186: * In fact, it returns when the chosen "p" equals "back". This
187: * implies that there exists a string is least half as long as
188: * (back - front), which in turn implies that a linear search will
1.1 deraadt 189: * be no more expensive than the cost of simply printing a string or two.
1.21 krw 190: *
191: * Trying to continue with binary search at this point would be
1.1 deraadt 192: * more trouble than it's worth.
193: */
194: #define SKIP_PAST_NEWLINE(p, back) \
195: while (p < back && *p++ != '\n');
196:
197: char *
1.10 deraadt 198: binary_search(char *string, char *front, char *back)
1.1 deraadt 199: {
1.6 mpech 200: char *p;
1.1 deraadt 201:
202: p = front + (back - front) / 2;
203: SKIP_PAST_NEWLINE(p, back);
204:
205: /*
206: * If the file changes underneath us, make sure we don't
207: * infinitely loop.
208: */
209: while (p < back && back > front) {
210: if (compare(string, p, back) == GREATER)
211: front = p;
212: else
213: back = p;
214: p = front + (back - front) / 2;
215: SKIP_PAST_NEWLINE(p, back);
216: }
217: return (front);
218: }
219:
220: /*
221: * Find the first line that starts with string, linearly searching from front
222: * to back.
1.21 krw 223: *
1.1 deraadt 224: * Return NULL for no such line.
1.21 krw 225: *
1.1 deraadt 226: * This routine assumes:
1.21 krw 227: *
228: * o front points at the first character in a line.
1.1 deraadt 229: * o front is before or at the first line to be printed.
230: */
231: char *
1.10 deraadt 232: linear_search(char *string, char *front, char *back)
1.1 deraadt 233: {
234: while (front < back) {
235: switch (compare(string, front, back)) {
236: case EQUAL: /* Found it. */
237: return (front);
238: break;
239: case LESS: /* No such string. */
240: return (NULL);
241: break;
242: case GREATER: /* Keep going. */
243: break;
244: }
245: SKIP_PAST_NEWLINE(front, back);
246: }
247: return (NULL);
248: }
249:
250: /*
251: * Print as many lines as match string, starting at front.
252: */
1.21 krw 253: void
1.10 deraadt 254: print_from(char *string, char *front, char *back)
1.1 deraadt 255: {
256: for (; front < back && compare(string, front, back) == EQUAL; ++front) {
257: for (; front < back && *front != '\n'; ++front)
258: if (putchar(*front) == EOF)
259: err(2, "stdout");
260: if (putchar('\n') == EOF)
261: err(2, "stdout");
262: }
263: }
264:
265: /*
266: * Return LESS, GREATER, or EQUAL depending on how the string1 compares with
267: * string2 (s1 ??? s2).
1.21 krw 268: *
269: * o Matches up to len(s1) are EQUAL.
1.1 deraadt 270: * o Matches up to len(s2) are GREATER.
1.21 krw 271: *
1.1 deraadt 272: * Compare understands about the -f and -d flags, and treats comparisons
273: * appropriately.
1.21 krw 274: *
1.1 deraadt 275: * The string "s1" is null terminated. The string s2 is '\n' terminated (or
276: * "back" terminated).
277: */
278: int
1.10 deraadt 279: compare(char *s1, char *s2, char *back)
1.1 deraadt 280: {
1.6 mpech 281: int ch;
1.1 deraadt 282:
283: for (; *s1 && s2 < back && *s2 != '\n'; ++s1, ++s2) {
284: ch = *s2;
285: if (fflag)
1.19 millert 286: ch = tolower((unsigned char)ch);
287: if (dflag && !isalnum((unsigned char)ch)) {
1.1 deraadt 288: ++s2; /* Ignore character in comparison. */
289: continue;
290: }
291: if (*s1 != ch)
292: return (*s1 < ch ? LESS : GREATER);
293: }
294: return (*s1 ? GREATER : EQUAL);
295: }
296:
297: void
1.10 deraadt 298: usage(void)
1.1 deraadt 299: {
1.12 sobrado 300: (void)fprintf(stderr,
301: "usage: look [-df] [-t termchar] string [file]\n");
1.1 deraadt 302: exit(2);
303: }