Annotation of src/usr.bin/hexdump/parse.c, Revision 1.18
1.18 ! tb 1: /* $OpenBSD: parse.c,v 1.17 2009/10/27 23:59:39 deraadt Exp $ */
1.9 pvalchev 2: /* $NetBSD: parse.c,v 1.12 2001/12/07 13:37:39 bjh21 Exp $ */
1.2 deraadt 3:
1.1 deraadt 4: /*
1.9 pvalchev 5: * Copyright (c) 1989, 1993
6: * The Regents of the University of California. All rights reserved.
1.1 deraadt 7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
1.12 millert 16: * 3. Neither the name of the University nor the names of its contributors
1.1 deraadt 17: * may be used to endorse or promote products derived from this software
18: * without specific prior written permission.
19: *
20: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30: * SUCH DAMAGE.
31: */
32:
33: #include <sys/types.h>
34: #include <sys/file.h>
1.9 pvalchev 35:
36: #include <ctype.h>
37: #include <err.h>
38: #include <errno.h>
39: #include <fcntl.h>
1.1 deraadt 40: #include <stdio.h>
41: #include <stdlib.h>
42: #include <string.h>
1.9 pvalchev 43:
1.1 deraadt 44: #include "hexdump.h"
45:
46: FU *endfu; /* format at end-of-data */
47:
1.6 pvalchev 48: void
1.13 deraadt 49: addfile(char *name)
1.1 deraadt 50: {
51: FILE *fp;
1.15 ray 52: size_t len;
53: char *buf, *lbuf, *p;
1.1 deraadt 54:
1.9 pvalchev 55: if ((fp = fopen(name, "r")) == NULL)
56: err(1, "fopen %s", name);
1.15 ray 57:
58: lbuf = NULL;
59: while ((buf = fgetln(fp, &len))) {
60: if (buf[len - 1] == '\n')
61: buf[len - 1] = '\0';
62: else {
63: /* EOF without EOL, copy and add the NUL */
64: if ((lbuf = malloc(len + 1)) == NULL)
65: err(1, NULL);
66: memcpy(lbuf, buf, len);
67: lbuf[len] = '\0';
68: buf = lbuf;
1.1 deraadt 69: }
1.16 tedu 70: for (p = buf; isspace((unsigned char)*p); ++p);
1.1 deraadt 71: if (!*p || *p == '#')
72: continue;
73: add(p);
74: }
1.15 ray 75: free(lbuf);
1.1 deraadt 76: (void)fclose(fp);
77: }
78:
1.6 pvalchev 79: void
1.13 deraadt 80: add(const char *fmt)
1.1 deraadt 81: {
1.9 pvalchev 82: const char *p;
1.1 deraadt 83: static FS **nextfs;
84: FS *tfs;
85: FU *tfu, **nextfu;
1.9 pvalchev 86: const char *savep;
1.1 deraadt 87:
88: /* start new linked list of format units */
1.18 ! tb 89: if ((tfs = calloc(1, sizeof(FS))) == NULL)
! 90: err(1, NULL);
1.1 deraadt 91: if (!fshead)
92: fshead = tfs;
93: else
94: *nextfs = tfs;
95: nextfs = &tfs->nextfs;
96: nextfu = &tfs->nextfu;
97:
98: /* take the format string and break it up into format units */
99: for (p = fmt;;) {
100: /* skip leading white space */
1.9 pvalchev 101: for (; isspace((unsigned char)*p); ++p);
1.1 deraadt 102: if (!*p)
103: break;
104:
105: /* allocate a new format unit and link it in */
1.18 ! tb 106: if ((tfu = calloc(1, sizeof(FU))) == NULL)
! 107: err(1, NULL);
1.1 deraadt 108: *nextfu = tfu;
109: nextfu = &tfu->nextfu;
110: tfu->reps = 1;
111:
112: /* if leading digit, repetition count */
1.9 pvalchev 113: if (isdigit((unsigned char)*p)) {
114: for (savep = p; isdigit((unsigned char)*p); ++p);
115: if (!isspace((unsigned char)*p) && *p != '/')
116: badfmt(fmt);
1.1 deraadt 117: /* may overwrite either white space or slash */
118: tfu->reps = atoi(savep);
119: tfu->flags = F_SETREP;
120: /* skip trailing white space */
1.9 pvalchev 121: for (++p; isspace((unsigned char)*p); ++p);
1.1 deraadt 122: }
123:
124: /* skip slash and trailing white space */
125: if (*p == '/')
1.9 pvalchev 126: while (isspace((unsigned char)*++p));
1.1 deraadt 127:
128: /* byte count */
1.9 pvalchev 129: if (isdigit((unsigned char)*p)) {
130: for (savep = p; isdigit((unsigned char)*p); ++p);
131: if (!isspace((unsigned char)*p))
132: badfmt(fmt);
1.1 deraadt 133: tfu->bcnt = atoi(savep);
134: /* skip trailing white space */
1.9 pvalchev 135: for (++p; isspace((unsigned char)*p); ++p);
1.1 deraadt 136: }
137:
138: /* format */
139: if (*p != '"')
1.9 pvalchev 140: badfmt(fmt);
1.1 deraadt 141: for (savep = ++p; *p != '"';)
142: if (*p++ == 0)
1.9 pvalchev 143: badfmt(fmt);
1.18 ! tb 144: if ((tfu->fmt = malloc(p - savep + 1)) == NULL)
! 145: err(1, NULL);
1.1 deraadt 146: (void) strncpy(tfu->fmt, savep, p - savep);
147: tfu->fmt[p - savep] = '\0';
148: escape(tfu->fmt);
149: p++;
150: }
151: }
152:
1.7 mickey 153: static const char *spec = ".#-+ 0123456789";
1.9 pvalchev 154:
1.6 pvalchev 155: int
1.13 deraadt 156: size(FS *fs)
1.1 deraadt 157: {
1.8 mpech 158: FU *fu;
159: int bcnt, cursize;
160: char *fmt;
1.1 deraadt 161: int prec;
162:
163: /* figure out the data block size needed for each format unit */
164: for (cursize = 0, fu = fs->nextfu; fu; fu = fu->nextfu) {
165: if (fu->bcnt) {
166: cursize += fu->bcnt * fu->reps;
167: continue;
168: }
169: for (bcnt = prec = 0, fmt = fu->fmt; *fmt; ++fmt) {
170: if (*fmt != '%')
171: continue;
172: /*
173: * skip any special chars -- save precision in
174: * case it's a %s format.
175: */
1.14 otto 176: while (*++fmt && strchr(spec + 1, *fmt));
1.9 pvalchev 177: if (*fmt == '.' && isdigit((unsigned char)*++fmt)) {
1.1 deraadt 178: prec = atoi(fmt);
1.9 pvalchev 179: while (isdigit((unsigned char)*++fmt));
1.1 deraadt 180: }
181: switch(*fmt) {
182: case 'c':
183: bcnt += 1;
184: break;
185: case 'd': case 'i': case 'o': case 'u':
186: case 'x': case 'X':
187: bcnt += 4;
188: break;
189: case 'e': case 'E': case 'f': case 'g': case 'G':
190: bcnt += 8;
191: break;
192: case 's':
193: bcnt += prec;
194: break;
195: case '_':
196: switch(*++fmt) {
197: case 'c': case 'p': case 'u':
198: bcnt += 1;
199: break;
200: }
201: }
202: }
203: cursize += bcnt * fu->reps;
204: }
1.9 pvalchev 205: return (cursize);
1.1 deraadt 206: }
207:
1.6 pvalchev 208: void
1.13 deraadt 209: rewrite(FS *fs)
1.1 deraadt 210: {
211: enum { NOTOKAY, USEBCNT, USEPREC } sokay;
1.8 mpech 212: PR *pr, **nextpr;
213: FU *fu;
214: char *p1, *p2;
1.9 pvalchev 215: char savech, *fmtp, cs[3];
1.1 deraadt 216: int nconv, prec;
1.11 deraadt 217: size_t len;
1.1 deraadt 218:
1.9 pvalchev 219: nextpr = NULL;
220: prec = 0;
1.1 deraadt 221: for (fu = fs->nextfu; fu; fu = fu->nextfu) {
222: /*
1.9 pvalchev 223: * Break each format unit into print units; each conversion
224: * character gets its own.
1.1 deraadt 225: */
226: for (nconv = 0, fmtp = fu->fmt; *fmtp; nextpr = &pr->nextpr) {
1.18 ! tb 227: if ((pr = calloc(1, sizeof(PR))) == NULL)
! 228: err(1, NULL);
1.1 deraadt 229: if (!fu->nextpr)
230: fu->nextpr = pr;
231: else
232: *nextpr = pr;
233:
1.9 pvalchev 234: /* Skip preceding text and up to the next % sign. */
1.1 deraadt 235: for (p1 = fmtp; *p1 && *p1 != '%'; ++p1);
236:
1.9 pvalchev 237: /* Only text in the string. */
1.1 deraadt 238: if (!*p1) {
239: pr->fmt = fmtp;
240: pr->flags = F_TEXT;
241: break;
242: }
243:
244: /*
1.9 pvalchev 245: * Get precision for %s -- if have a byte count, don't
1.1 deraadt 246: * need it.
247: */
248: if (fu->bcnt) {
249: sokay = USEBCNT;
1.9 pvalchev 250: /* Skip to conversion character. */
1.14 otto 251: for (++p1; *p1 && strchr(spec, *p1); ++p1);
1.1 deraadt 252: } else {
1.9 pvalchev 253: /* Skip any special chars, field width. */
1.14 otto 254: while (*++p1 && strchr(spec + 1, *p1));
1.9 pvalchev 255: if (*p1 == '.' &&
256: isdigit((unsigned char)*++p1)) {
1.1 deraadt 257: sokay = USEPREC;
258: prec = atoi(p1);
1.9 pvalchev 259: while (isdigit((unsigned char)*++p1))
260: continue;
261: } else
1.1 deraadt 262: sokay = NOTOKAY;
263: }
264:
1.14 otto 265: p2 = *p1 ? p1 + 1 : p1; /* Set end pointer. */
1.9 pvalchev 266: cs[0] = *p1; /* Set conversion string. */
267: cs[1] = '\0';
1.1 deraadt 268:
269: /*
1.9 pvalchev 270: * Figure out the byte count for each conversion;
1.1 deraadt 271: * rewrite the format as necessary, set up blank-
272: * padding for end of data.
273: */
1.9 pvalchev 274: switch(cs[0]) {
1.1 deraadt 275: case 'c':
276: pr->flags = F_CHAR;
277: switch(fu->bcnt) {
278: case 0: case 1:
279: pr->bcnt = 1;
280: break;
281: default:
1.9 pvalchev 282: p1[1] = '\0';
283: badcnt(p1);
1.1 deraadt 284: }
285: break;
286: case 'd': case 'i':
287: case 'o': case 'u': case 'x': case 'X':
1.14 otto 288: if (cs[0] == 'd' || cs[0] == 'i')
289: pr->flags = F_INT;
290: else
291: pr->flags = F_UINT;
292:
293: cs[2] = '\0';
1.9 pvalchev 294: cs[1] = cs[0];
295: cs[0] = 'q';
296: switch(fu->bcnt) {
1.1 deraadt 297: case 0: case 4:
298: pr->bcnt = 4;
299: break;
300: case 1:
301: pr->bcnt = 1;
302: break;
303: case 2:
304: pr->bcnt = 2;
305: break;
1.9 pvalchev 306: case 8:
307: pr->bcnt = 8;
308: break;
1.1 deraadt 309: default:
1.9 pvalchev 310: p1[1] = '\0';
311: badcnt(p1);
1.1 deraadt 312: }
313: break;
314: case 'e': case 'E': case 'f': case 'g': case 'G':
315: pr->flags = F_DBL;
316: switch(fu->bcnt) {
317: case 0: case 8:
318: pr->bcnt = 8;
319: break;
320: case 4:
321: pr->bcnt = 4;
322: break;
323: default:
1.9 pvalchev 324: p1[1] = '\0';
325: badcnt(p1);
1.1 deraadt 326: }
327: break;
328: case 's':
329: pr->flags = F_STR;
330: switch(sokay) {
331: case NOTOKAY:
1.9 pvalchev 332: badsfmt();
1.1 deraadt 333: case USEBCNT:
334: pr->bcnt = fu->bcnt;
335: break;
336: case USEPREC:
337: pr->bcnt = prec;
338: break;
339: }
340: break;
341: case '_':
342: ++p2;
343: switch(p1[1]) {
344: case 'A':
345: endfu = fu;
346: fu->flags |= F_IGNORE;
347: /* FALLTHROUGH */
348: case 'a':
349: pr->flags = F_ADDRESS;
350: ++p2;
351: switch(p1[2]) {
352: case 'd': case 'o': case'x':
1.9 pvalchev 353: cs[0] = 'q';
354: cs[1] = p1[2];
355: cs[2] = '\0';
1.1 deraadt 356: break;
357: default:
1.14 otto 358: if (p1[2])
359: p1[3] = '\0';
1.9 pvalchev 360: badconv(p1);
1.1 deraadt 361: }
362: break;
363: case 'c':
364: case 'p':
365: case 'u':
1.14 otto 366: if (p1[1] == 'c') {
367: pr->flags = F_C;
368: /* cs[0] = 'c'; set in conv_c */
369: } else if (p1[1] == 'p') {
370: pr->flags = F_P;
371: cs[0] = 'c';
372: } else {
373: pr->flags = F_U;
374: /* cs[0] = 'c'; set in conv_u */
375: }
376:
377: switch(fu->bcnt) {
1.1 deraadt 378: case 0: case 1:
379: pr->bcnt = 1;
380: break;
381: default:
382: p1[2] = '\0';
1.9 pvalchev 383: badcnt(p1);
1.1 deraadt 384: }
385: break;
386: default:
1.14 otto 387: if (p1[1])
388: p1[2] = '\0';
1.9 pvalchev 389: badconv(p1);
1.1 deraadt 390: }
391: break;
392: default:
1.14 otto 393: if (cs[0])
394: p1[1] = '\0';
1.9 pvalchev 395: badconv(p1);
1.1 deraadt 396: }
397:
398: /*
1.9 pvalchev 399: * Copy to PR format string, set conversion character
1.1 deraadt 400: * pointer, update original.
401: */
402: savech = *p2;
1.9 pvalchev 403: p1[0] = '\0';
1.11 deraadt 404: len = strlen(fmtp) + strlen(cs) + 1;
1.18 ! tb 405: if ((pr->fmt = calloc(1, len)) == NULL)
! 406: err(1, NULL);
1.11 deraadt 407: snprintf(pr->fmt, len, "%s%s", fmtp, cs);
1.1 deraadt 408: *p2 = savech;
409: pr->cchar = pr->fmt + (p1 - fmtp);
410: fmtp = p2;
411:
1.9 pvalchev 412: /* Only one conversion character if byte count. */
1.7 mickey 413: if (!(pr->flags&F_ADDRESS) && fu->bcnt && nconv++)
414: errx(1,
1.9 pvalchev 415: "byte count with multiple conversion characters");
1.1 deraadt 416: }
417: /*
1.9 pvalchev 418: * If format unit byte count not specified, figure it out
1.1 deraadt 419: * so can adjust rep count later.
420: */
421: if (!fu->bcnt)
422: for (pr = fu->nextpr; pr; pr = pr->nextpr)
423: fu->bcnt += pr->bcnt;
424: }
425: /*
1.9 pvalchev 426: * If the format string interprets any data at all, and it's
1.1 deraadt 427: * not the same as the blocksize, and its last format unit
428: * interprets any data at all, and has no iteration count,
429: * repeat it as necessary.
430: *
1.9 pvalchev 431: * If, rep count is greater than 1, no trailing whitespace
1.1 deraadt 432: * gets output from the last iteration of the format unit.
433: */
1.9 pvalchev 434: for (fu = fs->nextfu; fu; fu = fu->nextfu) {
1.1 deraadt 435: if (!fu->nextfu && fs->bcnt < blocksize &&
436: !(fu->flags&F_SETREP) && fu->bcnt)
437: fu->reps += (blocksize - fs->bcnt) / fu->bcnt;
438: if (fu->reps > 1) {
1.14 otto 439: if (!fu->nextpr)
440: break;
1.1 deraadt 441: for (pr = fu->nextpr;; pr = pr->nextpr)
442: if (!pr->nextpr)
443: break;
444: for (p1 = pr->fmt, p2 = NULL; *p1; ++p1)
1.9 pvalchev 445: p2 = isspace((unsigned char)*p1) ? p1 : NULL;
1.1 deraadt 446: if (p2)
447: pr->nospace = p2;
448: }
449: }
1.9 pvalchev 450: #ifdef DEBUG
451: for (fu = fs->nextfu; fu; fu = fu->nextfu) {
452: (void)printf("fmt:");
453: for (pr = fu->nextpr; pr; pr = pr->nextpr)
454: (void)printf(" {%s}", pr->fmt);
455: (void)printf("\n");
456: }
457: #endif
1.1 deraadt 458: }
459:
1.6 pvalchev 460: void
1.13 deraadt 461: escape(char *p1)
1.1 deraadt 462: {
1.8 mpech 463: char *p2;
1.1 deraadt 464:
465: /* alphabetic escape sequences have to be done in place */
466: for (p2 = p1;; ++p1, ++p2) {
467: if (!*p1) {
468: *p2 = *p1;
469: break;
470: }
1.14 otto 471: if (*p1 == '\\') {
1.1 deraadt 472: switch(*++p1) {
1.14 otto 473: case '\0':
474: *p2++ = '\\';
475: *p2 = '\0';
476: return; /* incomplete escape sequence */
1.1 deraadt 477: case 'a':
478: /* *p2 = '\a'; */
479: *p2 = '\007';
480: break;
481: case 'b':
482: *p2 = '\b';
483: break;
484: case 'f':
485: *p2 = '\f';
486: break;
487: case 'n':
488: *p2 = '\n';
489: break;
490: case 'r':
491: *p2 = '\r';
492: break;
493: case 't':
494: *p2 = '\t';
495: break;
496: case 'v':
497: *p2 = '\v';
498: break;
499: default:
500: *p2 = *p1;
501: break;
502: }
1.14 otto 503: } else
504: *p2 = *p1;
1.1 deraadt 505: }
1.9 pvalchev 506: }
507:
508: void
1.13 deraadt 509: badcnt(char *s)
1.9 pvalchev 510: {
511: errx(1, "%s: bad byte count", s);
512: }
513:
514: void
1.13 deraadt 515: badsfmt(void)
1.9 pvalchev 516: {
1.10 mpech 517: errx(1, "%%s: requires a precision or a byte count");
1.9 pvalchev 518: }
519:
520: void
1.13 deraadt 521: badfmt(const char *fmt)
1.9 pvalchev 522: {
1.10 mpech 523: errx(1, "\"%s\": bad format", fmt);
1.9 pvalchev 524: }
525:
526: void
1.13 deraadt 527: badconv(char *ch)
1.9 pvalchev 528: {
1.10 mpech 529: errx(1, "%%%s: bad conversion character", ch);
1.1 deraadt 530: }