Annotation of src/usr.bin/hexdump/parse.c, Revision 1.4
1.4 ! provos 1: /* $OpenBSD: parse.c,v 1.3 1997/01/17 07:12:39 millert Exp $ */
1.2 deraadt 2:
1.1 deraadt 3: /*
4: * Copyright (c) 1989 The Regents of the University of California.
5: * All rights reserved.
6: *
7: * Redistribution and use in source and binary forms, with or without
8: * modification, are permitted provided that the following conditions
9: * are met:
10: * 1. Redistributions of source code must retain the above copyright
11: * notice, this list of conditions and the following disclaimer.
12: * 2. Redistributions in binary form must reproduce the above copyright
13: * notice, this list of conditions and the following disclaimer in the
14: * documentation and/or other materials provided with the distribution.
15: * 3. All advertising materials mentioning features or use of this software
16: * must display the following acknowledgement:
17: * This product includes software developed by the University of
18: * California, Berkeley and its contributors.
19: * 4. Neither the name of the University nor the names of its contributors
20: * may be used to endorse or promote products derived from this software
21: * without specific prior written permission.
22: *
23: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33: * SUCH DAMAGE.
34: */
35:
36: #ifndef lint
37: /*static char sccsid[] = "from: @(#)parse.c 5.6 (Berkeley) 3/9/91";*/
1.4 ! provos 38: static char rcsid[] = "$OpenBSD: parse.c,v 1.3 1997/01/17 07:12:39 millert Exp $";
1.1 deraadt 39: #endif /* not lint */
40:
41: #include <sys/types.h>
42: #include <sys/file.h>
43: #include <stdio.h>
44: #include <stdlib.h>
45: #include <ctype.h>
46: #include <string.h>
47: #include "hexdump.h"
48:
49: FU *endfu; /* format at end-of-data */
50:
51: addfile(name)
52: char *name;
53: {
54: register char *p;
55: FILE *fp;
56: int ch;
57: char buf[2048 + 1];
58:
59: if (!(fp = fopen(name, "r"))) {
60: (void)fprintf(stderr, "hexdump: can't read %s.\n", name);
61: exit(1);
62: }
63: while (fgets(buf, sizeof(buf), fp)) {
1.3 millert 64: if (!(p = strchr(buf, '\n'))) {
1.1 deraadt 65: (void)fprintf(stderr, "hexdump: line too long.\n");
66: while ((ch = getchar()) != '\n' && ch != EOF);
67: continue;
68: }
69: *p = '\0';
70: for (p = buf; *p && isspace(*p); ++p);
71: if (!*p || *p == '#')
72: continue;
73: add(p);
74: }
75: (void)fclose(fp);
76: }
77:
78: add(fmt)
79: char *fmt;
80: {
81: register char *p;
82: static FS **nextfs;
83: FS *tfs;
84: FU *tfu, **nextfu;
85: char *savep, *emalloc();
86:
87: /* start new linked list of format units */
88: /* NOSTRICT */
89: tfs = (FS *)emalloc(sizeof(FS));
90: if (!fshead)
91: fshead = tfs;
92: else
93: *nextfs = tfs;
94: nextfs = &tfs->nextfs;
95: nextfu = &tfs->nextfu;
96:
97: /* take the format string and break it up into format units */
98: for (p = fmt;;) {
99: /* skip leading white space */
100: for (; isspace(*p); ++p);
101: if (!*p)
102: break;
103:
104: /* allocate a new format unit and link it in */
105: /* NOSTRICT */
106: tfu = (FU *)emalloc(sizeof(FU));
107: *nextfu = tfu;
108: nextfu = &tfu->nextfu;
109: tfu->reps = 1;
110:
111: /* if leading digit, repetition count */
112: if (isdigit(*p)) {
113: for (savep = p; isdigit(*p); ++p);
114: if (!isspace(*p) && *p != '/')
115: badfmt(fmt);
116: /* may overwrite either white space or slash */
117: tfu->reps = atoi(savep);
118: tfu->flags = F_SETREP;
119: /* skip trailing white space */
120: for (++p; isspace(*p); ++p);
121: }
122:
123: /* skip slash and trailing white space */
124: if (*p == '/')
125: while (isspace(*++p));
126:
127: /* byte count */
128: if (isdigit(*p)) {
129: for (savep = p; isdigit(*p); ++p);
130: if (!isspace(*p))
131: badfmt(fmt);
132: tfu->bcnt = atoi(savep);
133: /* skip trailing white space */
134: for (++p; isspace(*p); ++p);
135: }
136:
137: /* format */
138: if (*p != '"')
139: badfmt(fmt);
140: for (savep = ++p; *p != '"';)
141: if (*p++ == 0)
142: badfmt(fmt);
143: if (!(tfu->fmt = malloc(p - savep + 1)))
144: nomem();
145: (void) strncpy(tfu->fmt, savep, p - savep);
146: tfu->fmt[p - savep] = '\0';
147: escape(tfu->fmt);
148: p++;
149: }
1.4 ! provos 150: /* no single fu in fmt */
! 151: if (tfs->nextfu == NULL)
! 152: badfmt(fmt);
1.1 deraadt 153: }
154:
155: static char *spec = ".#-+ 0123456789";
156: size(fs)
157: FS *fs;
158: {
159: register FU *fu;
160: register int bcnt, cursize;
161: register char *fmt;
162: int prec;
163:
164: /* figure out the data block size needed for each format unit */
165: for (cursize = 0, fu = fs->nextfu; fu; fu = fu->nextfu) {
166: if (fu->bcnt) {
167: cursize += fu->bcnt * fu->reps;
168: continue;
169: }
170: for (bcnt = prec = 0, fmt = fu->fmt; *fmt; ++fmt) {
171: if (*fmt != '%')
172: continue;
173: /*
174: * skip any special chars -- save precision in
175: * case it's a %s format.
176: */
1.3 millert 177: while (strchr(spec + 1, *++fmt));
1.1 deraadt 178: if (*fmt == '.' && isdigit(*++fmt)) {
179: prec = atoi(fmt);
180: while (isdigit(*++fmt));
181: }
182: switch(*fmt) {
183: case 'c':
184: bcnt += 1;
185: break;
186: case 'd': case 'i': case 'o': case 'u':
187: case 'x': case 'X':
188: bcnt += 4;
189: break;
190: case 'e': case 'E': case 'f': case 'g': case 'G':
191: bcnt += 8;
192: break;
193: case 's':
194: bcnt += prec;
195: break;
196: case '_':
197: switch(*++fmt) {
198: case 'c': case 'p': case 'u':
199: bcnt += 1;
200: break;
201: }
202: }
203: }
204: cursize += bcnt * fu->reps;
205: }
206: return(cursize);
207: }
208:
209: rewrite(fs)
210: FS *fs;
211: {
212: enum { NOTOKAY, USEBCNT, USEPREC } sokay;
213: register PR *pr, **nextpr;
214: register FU *fu;
215: register char *p1, *p2;
216: char savech, *fmtp;
217: int nconv, prec;
218:
219: for (fu = fs->nextfu; fu; fu = fu->nextfu) {
220: /*
221: * break each format unit into print units; each
222: * conversion character gets its own.
223: */
224: for (nconv = 0, fmtp = fu->fmt; *fmtp; nextpr = &pr->nextpr) {
225: /* NOSTRICT */
226: pr = (PR *)emalloc(sizeof(PR));
227: if (!fu->nextpr)
228: fu->nextpr = pr;
229: else
230: *nextpr = pr;
231:
232: /* skip preceding text and up to the next % sign */
233: for (p1 = fmtp; *p1 && *p1 != '%'; ++p1);
234:
235: /* only text in the string */
236: if (!*p1) {
237: pr->fmt = fmtp;
238: pr->flags = F_TEXT;
239: break;
240: }
241:
242: /*
243: * get precision for %s -- if have a byte count, don't
244: * need it.
245: */
246: if (fu->bcnt) {
247: sokay = USEBCNT;
248: /* skip to conversion character */
1.3 millert 249: for (++p1; strchr(spec, *p1); ++p1);
1.1 deraadt 250: } else {
251: /* skip any special chars, field width */
1.3 millert 252: while (strchr(spec + 1, *++p1));
1.1 deraadt 253: if (*p1 == '.' && isdigit(*++p1)) {
254: sokay = USEPREC;
255: prec = atoi(p1);
256: while (isdigit(*++p1));
257: }
258: else
259: sokay = NOTOKAY;
260: }
261:
262: p2 = p1 + 1; /* set end pointer */
263:
264: /*
265: * figure out the byte count for each conversion;
266: * rewrite the format as necessary, set up blank-
267: * padding for end of data.
268: */
269: switch(*p1) {
270: case 'c':
271: pr->flags = F_CHAR;
272: switch(fu->bcnt) {
273: case 0: case 1:
274: pr->bcnt = 1;
275: break;
276: default:
277: p1[1] = '\0';
278: badcnt(p1);
279: }
280: break;
281: case 'd': case 'i':
282: pr->flags = F_INT;
283: goto sw1;
284: case 'l':
285: ++p2;
286: switch(p1[1]) {
287: case 'd': case 'i':
288: ++p1;
289: pr->flags = F_INT;
290: goto sw1;
291: case 'o': case 'u': case 'x': case 'X':
292: ++p1;
293: pr->flags = F_UINT;
294: goto sw1;
295: default:
296: p1[2] = '\0';
297: badconv(p1);
298: }
299: /* NOTREACHED */
300: case 'o': case 'u': case 'x': case 'X':
301: pr->flags = F_UINT;
302: sw1: switch(fu->bcnt) {
303: case 0: case 4:
304: pr->bcnt = 4;
305: break;
306: case 1:
307: pr->bcnt = 1;
308: break;
309: case 2:
310: pr->bcnt = 2;
311: break;
312: default:
313: p1[1] = '\0';
314: badcnt(p1);
315: }
316: break;
317: case 'e': case 'E': case 'f': case 'g': case 'G':
318: pr->flags = F_DBL;
319: switch(fu->bcnt) {
320: case 0: case 8:
321: pr->bcnt = 8;
322: break;
323: case 4:
324: pr->bcnt = 4;
325: break;
326: default:
327: p1[1] = '\0';
328: badcnt(p1);
329: }
330: break;
331: case 's':
332: pr->flags = F_STR;
333: switch(sokay) {
334: case NOTOKAY:
335: badsfmt();
336: case USEBCNT:
337: pr->bcnt = fu->bcnt;
338: break;
339: case USEPREC:
340: pr->bcnt = prec;
341: break;
342: }
343: break;
344: case '_':
345: ++p2;
346: switch(p1[1]) {
347: case 'A':
348: endfu = fu;
349: fu->flags |= F_IGNORE;
350: /* FALLTHROUGH */
351: case 'a':
352: pr->flags = F_ADDRESS;
353: ++p2;
354: switch(p1[2]) {
355: case 'd': case 'o': case'x':
356: *p1 = 'q';
357: p1[1] = p1[2];
358: break;
359: default:
360: p1[3] = '\0';
361: badconv(p1);
362: }
363: break;
364: case 'c':
365: pr->flags = F_C;
366: /* *p1 = 'c'; set in conv_c */
367: goto sw2;
368: case 'p':
369: pr->flags = F_P;
370: *p1 = 'c';
371: goto sw2;
372: case 'u':
373: pr->flags = F_U;
374: /* *p1 = 'c'; set in conv_u */
375: sw2: switch(fu->bcnt) {
376: case 0: case 1:
377: pr->bcnt = 1;
378: break;
379: default:
380: p1[2] = '\0';
381: badcnt(p1);
382: }
383: break;
384: default:
385: p1[2] = '\0';
386: badconv(p1);
387: }
388: break;
389: default:
390: p1[1] = '\0';
391: badconv(p1);
392: }
393:
394: /*
395: * copy to PR format string, set conversion character
396: * pointer, update original.
397: */
398: savech = *p2;
399: p1[(pr->flags&F_ADDRESS)?2:1] = '\0';
400: if (!(pr->fmt = strdup(fmtp)))
401: nomem();
402: *p2 = savech;
403: pr->cchar = pr->fmt + (p1 - fmtp);
404: fmtp = p2;
405:
406: /* only one conversion character if byte count */
407: if (!(pr->flags&F_ADDRESS) && fu->bcnt && nconv++) {
408: (void)fprintf(stderr,
409: "hexdump: byte count with multiple conversion characters.\n");
410: exit(1);
411: }
412: }
413: /*
414: * if format unit byte count not specified, figure it out
415: * so can adjust rep count later.
416: */
417: if (!fu->bcnt)
418: for (pr = fu->nextpr; pr; pr = pr->nextpr)
419: fu->bcnt += pr->bcnt;
420: }
421: /*
422: * if the format string interprets any data at all, and it's
423: * not the same as the blocksize, and its last format unit
424: * interprets any data at all, and has no iteration count,
425: * repeat it as necessary.
426: *
427: * if, rep count is greater than 1, no trailing whitespace
428: * gets output from the last iteration of the format unit.
429: */
430: for (fu = fs->nextfu;; fu = fu->nextfu) {
431: if (!fu->nextfu && fs->bcnt < blocksize &&
432: !(fu->flags&F_SETREP) && fu->bcnt)
433: fu->reps += (blocksize - fs->bcnt) / fu->bcnt;
434: if (fu->reps > 1) {
435: for (pr = fu->nextpr;; pr = pr->nextpr)
436: if (!pr->nextpr)
437: break;
438: for (p1 = pr->fmt, p2 = NULL; *p1; ++p1)
439: p2 = isspace(*p1) ? p1 : NULL;
440: if (p2)
441: pr->nospace = p2;
442: }
443: if (!fu->nextfu)
444: break;
445: }
446: }
447:
448:
449: escape(p1)
450: register char *p1;
451: {
452: register char *p2;
453:
454: /* alphabetic escape sequences have to be done in place */
455: for (p2 = p1;; ++p1, ++p2) {
456: if (!*p1) {
457: *p2 = *p1;
458: break;
459: }
460: if (*p1 == '\\')
461: switch(*++p1) {
462: case 'a':
463: /* *p2 = '\a'; */
464: *p2 = '\007';
465: break;
466: case 'b':
467: *p2 = '\b';
468: break;
469: case 'f':
470: *p2 = '\f';
471: break;
472: case 'n':
473: *p2 = '\n';
474: break;
475: case 'r':
476: *p2 = '\r';
477: break;
478: case 't':
479: *p2 = '\t';
480: break;
481: case 'v':
482: *p2 = '\v';
483: break;
484: default:
485: *p2 = *p1;
486: break;
487: }
488: }
489: }
490:
491: badcnt(s)
492: char *s;
493: {
494: (void)fprintf(stderr,
495: "hexdump: bad byte count for conversion character %s.\n", s);
496: exit(1);
497: }
498:
499: badsfmt()
500: {
501: (void)fprintf(stderr,
502: "hexdump: %%s requires a precision or a byte count.\n");
503: exit(1);
504: }
505:
506: badfmt(fmt)
507: char *fmt;
508: {
509: (void)fprintf(stderr, "hexdump: bad format {%s}\n", fmt);
510: exit(1);
511: }
512:
513: badconv(ch)
514: char *ch;
515: {
516: (void)fprintf(stderr, "hexdump: bad conversion character %%%s.\n", ch);
517: exit(1);
518: }