Annotation of src/usr.bin/hexdump/parse.c, Revision 1.1.1.1
1.1 deraadt 1: /*
2: * Copyright (c) 1989 The Regents of the University of California.
3: * All rights reserved.
4: *
5: * Redistribution and use in source and binary forms, with or without
6: * modification, are permitted provided that the following conditions
7: * are met:
8: * 1. Redistributions of source code must retain the above copyright
9: * notice, this list of conditions and the following disclaimer.
10: * 2. Redistributions in binary form must reproduce the above copyright
11: * notice, this list of conditions and the following disclaimer in the
12: * documentation and/or other materials provided with the distribution.
13: * 3. All advertising materials mentioning features or use of this software
14: * must display the following acknowledgement:
15: * This product includes software developed by the University of
16: * California, Berkeley and its contributors.
17: * 4. Neither the name of the University nor the names of its contributors
18: * may be used to endorse or promote products derived from this software
19: * without specific prior written permission.
20: *
21: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31: * SUCH DAMAGE.
32: */
33:
34: #ifndef lint
35: /*static char sccsid[] = "from: @(#)parse.c 5.6 (Berkeley) 3/9/91";*/
36: static char rcsid[] = "$Id: parse.c,v 1.3 1994/05/20 15:57:26 pk Exp $";
37: #endif /* not lint */
38:
39: #include <sys/types.h>
40: #include <sys/file.h>
41: #include <stdio.h>
42: #include <stdlib.h>
43: #include <ctype.h>
44: #include <string.h>
45: #include "hexdump.h"
46:
47: FU *endfu; /* format at end-of-data */
48:
49: addfile(name)
50: char *name;
51: {
52: register char *p;
53: FILE *fp;
54: int ch;
55: char buf[2048 + 1];
56:
57: if (!(fp = fopen(name, "r"))) {
58: (void)fprintf(stderr, "hexdump: can't read %s.\n", name);
59: exit(1);
60: }
61: while (fgets(buf, sizeof(buf), fp)) {
62: if (!(p = index(buf, '\n'))) {
63: (void)fprintf(stderr, "hexdump: line too long.\n");
64: while ((ch = getchar()) != '\n' && ch != EOF);
65: continue;
66: }
67: *p = '\0';
68: for (p = buf; *p && isspace(*p); ++p);
69: if (!*p || *p == '#')
70: continue;
71: add(p);
72: }
73: (void)fclose(fp);
74: }
75:
76: add(fmt)
77: char *fmt;
78: {
79: register char *p;
80: static FS **nextfs;
81: FS *tfs;
82: FU *tfu, **nextfu;
83: char *savep, *emalloc();
84:
85: /* start new linked list of format units */
86: /* NOSTRICT */
87: tfs = (FS *)emalloc(sizeof(FS));
88: if (!fshead)
89: fshead = tfs;
90: else
91: *nextfs = tfs;
92: nextfs = &tfs->nextfs;
93: nextfu = &tfs->nextfu;
94:
95: /* take the format string and break it up into format units */
96: for (p = fmt;;) {
97: /* skip leading white space */
98: for (; isspace(*p); ++p);
99: if (!*p)
100: break;
101:
102: /* allocate a new format unit and link it in */
103: /* NOSTRICT */
104: tfu = (FU *)emalloc(sizeof(FU));
105: *nextfu = tfu;
106: nextfu = &tfu->nextfu;
107: tfu->reps = 1;
108:
109: /* if leading digit, repetition count */
110: if (isdigit(*p)) {
111: for (savep = p; isdigit(*p); ++p);
112: if (!isspace(*p) && *p != '/')
113: badfmt(fmt);
114: /* may overwrite either white space or slash */
115: tfu->reps = atoi(savep);
116: tfu->flags = F_SETREP;
117: /* skip trailing white space */
118: for (++p; isspace(*p); ++p);
119: }
120:
121: /* skip slash and trailing white space */
122: if (*p == '/')
123: while (isspace(*++p));
124:
125: /* byte count */
126: if (isdigit(*p)) {
127: for (savep = p; isdigit(*p); ++p);
128: if (!isspace(*p))
129: badfmt(fmt);
130: tfu->bcnt = atoi(savep);
131: /* skip trailing white space */
132: for (++p; isspace(*p); ++p);
133: }
134:
135: /* format */
136: if (*p != '"')
137: badfmt(fmt);
138: for (savep = ++p; *p != '"';)
139: if (*p++ == 0)
140: badfmt(fmt);
141: if (!(tfu->fmt = malloc(p - savep + 1)))
142: nomem();
143: (void) strncpy(tfu->fmt, savep, p - savep);
144: tfu->fmt[p - savep] = '\0';
145: escape(tfu->fmt);
146: p++;
147: }
148: }
149:
150: static char *spec = ".#-+ 0123456789";
151: size(fs)
152: FS *fs;
153: {
154: register FU *fu;
155: register int bcnt, cursize;
156: register char *fmt;
157: int prec;
158:
159: /* figure out the data block size needed for each format unit */
160: for (cursize = 0, fu = fs->nextfu; fu; fu = fu->nextfu) {
161: if (fu->bcnt) {
162: cursize += fu->bcnt * fu->reps;
163: continue;
164: }
165: for (bcnt = prec = 0, fmt = fu->fmt; *fmt; ++fmt) {
166: if (*fmt != '%')
167: continue;
168: /*
169: * skip any special chars -- save precision in
170: * case it's a %s format.
171: */
172: while (index(spec + 1, *++fmt));
173: if (*fmt == '.' && isdigit(*++fmt)) {
174: prec = atoi(fmt);
175: while (isdigit(*++fmt));
176: }
177: switch(*fmt) {
178: case 'c':
179: bcnt += 1;
180: break;
181: case 'd': case 'i': case 'o': case 'u':
182: case 'x': case 'X':
183: bcnt += 4;
184: break;
185: case 'e': case 'E': case 'f': case 'g': case 'G':
186: bcnt += 8;
187: break;
188: case 's':
189: bcnt += prec;
190: break;
191: case '_':
192: switch(*++fmt) {
193: case 'c': case 'p': case 'u':
194: bcnt += 1;
195: break;
196: }
197: }
198: }
199: cursize += bcnt * fu->reps;
200: }
201: return(cursize);
202: }
203:
204: rewrite(fs)
205: FS *fs;
206: {
207: enum { NOTOKAY, USEBCNT, USEPREC } sokay;
208: register PR *pr, **nextpr;
209: register FU *fu;
210: register char *p1, *p2;
211: char savech, *fmtp;
212: int nconv, prec;
213:
214: for (fu = fs->nextfu; fu; fu = fu->nextfu) {
215: /*
216: * break each format unit into print units; each
217: * conversion character gets its own.
218: */
219: for (nconv = 0, fmtp = fu->fmt; *fmtp; nextpr = &pr->nextpr) {
220: /* NOSTRICT */
221: pr = (PR *)emalloc(sizeof(PR));
222: if (!fu->nextpr)
223: fu->nextpr = pr;
224: else
225: *nextpr = pr;
226:
227: /* skip preceding text and up to the next % sign */
228: for (p1 = fmtp; *p1 && *p1 != '%'; ++p1);
229:
230: /* only text in the string */
231: if (!*p1) {
232: pr->fmt = fmtp;
233: pr->flags = F_TEXT;
234: break;
235: }
236:
237: /*
238: * get precision for %s -- if have a byte count, don't
239: * need it.
240: */
241: if (fu->bcnt) {
242: sokay = USEBCNT;
243: /* skip to conversion character */
244: for (++p1; index(spec, *p1); ++p1);
245: } else {
246: /* skip any special chars, field width */
247: while (index(spec + 1, *++p1));
248: if (*p1 == '.' && isdigit(*++p1)) {
249: sokay = USEPREC;
250: prec = atoi(p1);
251: while (isdigit(*++p1));
252: }
253: else
254: sokay = NOTOKAY;
255: }
256:
257: p2 = p1 + 1; /* set end pointer */
258:
259: /*
260: * figure out the byte count for each conversion;
261: * rewrite the format as necessary, set up blank-
262: * padding for end of data.
263: */
264: switch(*p1) {
265: case 'c':
266: pr->flags = F_CHAR;
267: switch(fu->bcnt) {
268: case 0: case 1:
269: pr->bcnt = 1;
270: break;
271: default:
272: p1[1] = '\0';
273: badcnt(p1);
274: }
275: break;
276: case 'd': case 'i':
277: pr->flags = F_INT;
278: goto sw1;
279: case 'l':
280: ++p2;
281: switch(p1[1]) {
282: case 'd': case 'i':
283: ++p1;
284: pr->flags = F_INT;
285: goto sw1;
286: case 'o': case 'u': case 'x': case 'X':
287: ++p1;
288: pr->flags = F_UINT;
289: goto sw1;
290: default:
291: p1[2] = '\0';
292: badconv(p1);
293: }
294: /* NOTREACHED */
295: case 'o': case 'u': case 'x': case 'X':
296: pr->flags = F_UINT;
297: sw1: switch(fu->bcnt) {
298: case 0: case 4:
299: pr->bcnt = 4;
300: break;
301: case 1:
302: pr->bcnt = 1;
303: break;
304: case 2:
305: pr->bcnt = 2;
306: break;
307: default:
308: p1[1] = '\0';
309: badcnt(p1);
310: }
311: break;
312: case 'e': case 'E': case 'f': case 'g': case 'G':
313: pr->flags = F_DBL;
314: switch(fu->bcnt) {
315: case 0: case 8:
316: pr->bcnt = 8;
317: break;
318: case 4:
319: pr->bcnt = 4;
320: break;
321: default:
322: p1[1] = '\0';
323: badcnt(p1);
324: }
325: break;
326: case 's':
327: pr->flags = F_STR;
328: switch(sokay) {
329: case NOTOKAY:
330: badsfmt();
331: case USEBCNT:
332: pr->bcnt = fu->bcnt;
333: break;
334: case USEPREC:
335: pr->bcnt = prec;
336: break;
337: }
338: break;
339: case '_':
340: ++p2;
341: switch(p1[1]) {
342: case 'A':
343: endfu = fu;
344: fu->flags |= F_IGNORE;
345: /* FALLTHROUGH */
346: case 'a':
347: pr->flags = F_ADDRESS;
348: ++p2;
349: switch(p1[2]) {
350: case 'd': case 'o': case'x':
351: *p1 = 'q';
352: p1[1] = p1[2];
353: break;
354: default:
355: p1[3] = '\0';
356: badconv(p1);
357: }
358: break;
359: case 'c':
360: pr->flags = F_C;
361: /* *p1 = 'c'; set in conv_c */
362: goto sw2;
363: case 'p':
364: pr->flags = F_P;
365: *p1 = 'c';
366: goto sw2;
367: case 'u':
368: pr->flags = F_U;
369: /* *p1 = 'c'; set in conv_u */
370: sw2: switch(fu->bcnt) {
371: case 0: case 1:
372: pr->bcnt = 1;
373: break;
374: default:
375: p1[2] = '\0';
376: badcnt(p1);
377: }
378: break;
379: default:
380: p1[2] = '\0';
381: badconv(p1);
382: }
383: break;
384: default:
385: p1[1] = '\0';
386: badconv(p1);
387: }
388:
389: /*
390: * copy to PR format string, set conversion character
391: * pointer, update original.
392: */
393: savech = *p2;
394: p1[(pr->flags&F_ADDRESS)?2:1] = '\0';
395: if (!(pr->fmt = strdup(fmtp)))
396: nomem();
397: *p2 = savech;
398: pr->cchar = pr->fmt + (p1 - fmtp);
399: fmtp = p2;
400:
401: /* only one conversion character if byte count */
402: if (!(pr->flags&F_ADDRESS) && fu->bcnt && nconv++) {
403: (void)fprintf(stderr,
404: "hexdump: byte count with multiple conversion characters.\n");
405: exit(1);
406: }
407: }
408: /*
409: * if format unit byte count not specified, figure it out
410: * so can adjust rep count later.
411: */
412: if (!fu->bcnt)
413: for (pr = fu->nextpr; pr; pr = pr->nextpr)
414: fu->bcnt += pr->bcnt;
415: }
416: /*
417: * if the format string interprets any data at all, and it's
418: * not the same as the blocksize, and its last format unit
419: * interprets any data at all, and has no iteration count,
420: * repeat it as necessary.
421: *
422: * if, rep count is greater than 1, no trailing whitespace
423: * gets output from the last iteration of the format unit.
424: */
425: for (fu = fs->nextfu;; fu = fu->nextfu) {
426: if (!fu->nextfu && fs->bcnt < blocksize &&
427: !(fu->flags&F_SETREP) && fu->bcnt)
428: fu->reps += (blocksize - fs->bcnt) / fu->bcnt;
429: if (fu->reps > 1) {
430: for (pr = fu->nextpr;; pr = pr->nextpr)
431: if (!pr->nextpr)
432: break;
433: for (p1 = pr->fmt, p2 = NULL; *p1; ++p1)
434: p2 = isspace(*p1) ? p1 : NULL;
435: if (p2)
436: pr->nospace = p2;
437: }
438: if (!fu->nextfu)
439: break;
440: }
441: }
442:
443:
444: escape(p1)
445: register char *p1;
446: {
447: register char *p2;
448:
449: /* alphabetic escape sequences have to be done in place */
450: for (p2 = p1;; ++p1, ++p2) {
451: if (!*p1) {
452: *p2 = *p1;
453: break;
454: }
455: if (*p1 == '\\')
456: switch(*++p1) {
457: case 'a':
458: /* *p2 = '\a'; */
459: *p2 = '\007';
460: break;
461: case 'b':
462: *p2 = '\b';
463: break;
464: case 'f':
465: *p2 = '\f';
466: break;
467: case 'n':
468: *p2 = '\n';
469: break;
470: case 'r':
471: *p2 = '\r';
472: break;
473: case 't':
474: *p2 = '\t';
475: break;
476: case 'v':
477: *p2 = '\v';
478: break;
479: default:
480: *p2 = *p1;
481: break;
482: }
483: }
484: }
485:
486: badcnt(s)
487: char *s;
488: {
489: (void)fprintf(stderr,
490: "hexdump: bad byte count for conversion character %s.\n", s);
491: exit(1);
492: }
493:
494: badsfmt()
495: {
496: (void)fprintf(stderr,
497: "hexdump: %%s requires a precision or a byte count.\n");
498: exit(1);
499: }
500:
501: badfmt(fmt)
502: char *fmt;
503: {
504: (void)fprintf(stderr, "hexdump: bad format {%s}\n", fmt);
505: exit(1);
506: }
507:
508: badconv(ch)
509: char *ch;
510: {
511: (void)fprintf(stderr, "hexdump: bad conversion character %%%s.\n", ch);
512: exit(1);
513: }