Annotation of src/usr.bin/hexdump/parse.c, Revision 1.19
1.19 ! mmcc 1: /* $OpenBSD: parse.c,v 1.18 2016/02/09 01:29:12 tb Exp $ */
1.9 pvalchev 2: /* $NetBSD: parse.c,v 1.12 2001/12/07 13:37:39 bjh21 Exp $ */
1.2 deraadt 3:
1.1 deraadt 4: /*
1.9 pvalchev 5: * Copyright (c) 1989, 1993
6: * The Regents of the University of California. All rights reserved.
1.1 deraadt 7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
1.12 millert 16: * 3. Neither the name of the University nor the names of its contributors
1.1 deraadt 17: * may be used to endorse or promote products derived from this software
18: * without specific prior written permission.
19: *
20: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30: * SUCH DAMAGE.
31: */
32:
33: #include <sys/types.h>
34: #include <sys/file.h>
1.9 pvalchev 35:
36: #include <ctype.h>
37: #include <err.h>
38: #include <errno.h>
39: #include <fcntl.h>
1.1 deraadt 40: #include <stdio.h>
41: #include <stdlib.h>
42: #include <string.h>
1.9 pvalchev 43:
1.1 deraadt 44: #include "hexdump.h"
45:
46: FU *endfu; /* format at end-of-data */
47:
1.6 pvalchev 48: void
1.13 deraadt 49: addfile(char *name)
1.1 deraadt 50: {
51: FILE *fp;
1.15 ray 52: size_t len;
53: char *buf, *lbuf, *p;
1.1 deraadt 54:
1.9 pvalchev 55: if ((fp = fopen(name, "r")) == NULL)
56: err(1, "fopen %s", name);
1.15 ray 57:
58: lbuf = NULL;
59: while ((buf = fgetln(fp, &len))) {
60: if (buf[len - 1] == '\n')
61: buf[len - 1] = '\0';
62: else {
63: /* EOF without EOL, copy and add the NUL */
64: if ((lbuf = malloc(len + 1)) == NULL)
65: err(1, NULL);
66: memcpy(lbuf, buf, len);
67: lbuf[len] = '\0';
68: buf = lbuf;
1.1 deraadt 69: }
1.16 tedu 70: for (p = buf; isspace((unsigned char)*p); ++p);
1.1 deraadt 71: if (!*p || *p == '#')
72: continue;
73: add(p);
74: }
1.15 ray 75: free(lbuf);
1.1 deraadt 76: (void)fclose(fp);
77: }
78:
1.6 pvalchev 79: void
1.13 deraadt 80: add(const char *fmt)
1.1 deraadt 81: {
1.9 pvalchev 82: const char *p;
1.1 deraadt 83: static FS **nextfs;
84: FS *tfs;
85: FU *tfu, **nextfu;
1.9 pvalchev 86: const char *savep;
1.1 deraadt 87:
88: /* start new linked list of format units */
1.18 tb 89: if ((tfs = calloc(1, sizeof(FS))) == NULL)
90: err(1, NULL);
1.1 deraadt 91: if (!fshead)
92: fshead = tfs;
93: else
94: *nextfs = tfs;
95: nextfs = &tfs->nextfs;
96: nextfu = &tfs->nextfu;
97:
98: /* take the format string and break it up into format units */
99: for (p = fmt;;) {
100: /* skip leading white space */
1.9 pvalchev 101: for (; isspace((unsigned char)*p); ++p);
1.1 deraadt 102: if (!*p)
103: break;
104:
105: /* allocate a new format unit and link it in */
1.18 tb 106: if ((tfu = calloc(1, sizeof(FU))) == NULL)
107: err(1, NULL);
1.1 deraadt 108: *nextfu = tfu;
109: nextfu = &tfu->nextfu;
110: tfu->reps = 1;
111:
112: /* if leading digit, repetition count */
1.9 pvalchev 113: if (isdigit((unsigned char)*p)) {
114: for (savep = p; isdigit((unsigned char)*p); ++p);
115: if (!isspace((unsigned char)*p) && *p != '/')
116: badfmt(fmt);
1.1 deraadt 117: /* may overwrite either white space or slash */
118: tfu->reps = atoi(savep);
119: tfu->flags = F_SETREP;
120: /* skip trailing white space */
1.9 pvalchev 121: for (++p; isspace((unsigned char)*p); ++p);
1.1 deraadt 122: }
123:
124: /* skip slash and trailing white space */
125: if (*p == '/')
1.9 pvalchev 126: while (isspace((unsigned char)*++p));
1.1 deraadt 127:
128: /* byte count */
1.9 pvalchev 129: if (isdigit((unsigned char)*p)) {
130: for (savep = p; isdigit((unsigned char)*p); ++p);
131: if (!isspace((unsigned char)*p))
132: badfmt(fmt);
1.1 deraadt 133: tfu->bcnt = atoi(savep);
134: /* skip trailing white space */
1.9 pvalchev 135: for (++p; isspace((unsigned char)*p); ++p);
1.1 deraadt 136: }
137:
138: /* format */
139: if (*p != '"')
1.9 pvalchev 140: badfmt(fmt);
1.1 deraadt 141: for (savep = ++p; *p != '"';)
142: if (*p++ == 0)
1.9 pvalchev 143: badfmt(fmt);
1.19 ! mmcc 144: tfu->fmt = strndup(savep, p - savep);
! 145: if (tfu->fmt == NULL)
1.18 tb 146: err(1, NULL);
1.1 deraadt 147: escape(tfu->fmt);
148: p++;
149: }
150: }
151:
1.7 mickey 152: static const char *spec = ".#-+ 0123456789";
1.9 pvalchev 153:
1.6 pvalchev 154: int
1.13 deraadt 155: size(FS *fs)
1.1 deraadt 156: {
1.8 mpech 157: FU *fu;
158: int bcnt, cursize;
159: char *fmt;
1.1 deraadt 160: int prec;
161:
162: /* figure out the data block size needed for each format unit */
163: for (cursize = 0, fu = fs->nextfu; fu; fu = fu->nextfu) {
164: if (fu->bcnt) {
165: cursize += fu->bcnt * fu->reps;
166: continue;
167: }
168: for (bcnt = prec = 0, fmt = fu->fmt; *fmt; ++fmt) {
169: if (*fmt != '%')
170: continue;
171: /*
172: * skip any special chars -- save precision in
173: * case it's a %s format.
174: */
1.14 otto 175: while (*++fmt && strchr(spec + 1, *fmt));
1.9 pvalchev 176: if (*fmt == '.' && isdigit((unsigned char)*++fmt)) {
1.1 deraadt 177: prec = atoi(fmt);
1.9 pvalchev 178: while (isdigit((unsigned char)*++fmt));
1.1 deraadt 179: }
180: switch(*fmt) {
181: case 'c':
182: bcnt += 1;
183: break;
184: case 'd': case 'i': case 'o': case 'u':
185: case 'x': case 'X':
186: bcnt += 4;
187: break;
188: case 'e': case 'E': case 'f': case 'g': case 'G':
189: bcnt += 8;
190: break;
191: case 's':
192: bcnt += prec;
193: break;
194: case '_':
195: switch(*++fmt) {
196: case 'c': case 'p': case 'u':
197: bcnt += 1;
198: break;
199: }
200: }
201: }
202: cursize += bcnt * fu->reps;
203: }
1.9 pvalchev 204: return (cursize);
1.1 deraadt 205: }
206:
1.6 pvalchev 207: void
1.13 deraadt 208: rewrite(FS *fs)
1.1 deraadt 209: {
210: enum { NOTOKAY, USEBCNT, USEPREC } sokay;
1.8 mpech 211: PR *pr, **nextpr;
212: FU *fu;
213: char *p1, *p2;
1.9 pvalchev 214: char savech, *fmtp, cs[3];
1.1 deraadt 215: int nconv, prec;
1.11 deraadt 216: size_t len;
1.1 deraadt 217:
1.9 pvalchev 218: nextpr = NULL;
219: prec = 0;
1.1 deraadt 220: for (fu = fs->nextfu; fu; fu = fu->nextfu) {
221: /*
1.9 pvalchev 222: * Break each format unit into print units; each conversion
223: * character gets its own.
1.1 deraadt 224: */
225: for (nconv = 0, fmtp = fu->fmt; *fmtp; nextpr = &pr->nextpr) {
1.18 tb 226: if ((pr = calloc(1, sizeof(PR))) == NULL)
227: err(1, NULL);
1.1 deraadt 228: if (!fu->nextpr)
229: fu->nextpr = pr;
230: else
231: *nextpr = pr;
232:
1.9 pvalchev 233: /* Skip preceding text and up to the next % sign. */
1.1 deraadt 234: for (p1 = fmtp; *p1 && *p1 != '%'; ++p1);
235:
1.9 pvalchev 236: /* Only text in the string. */
1.1 deraadt 237: if (!*p1) {
238: pr->fmt = fmtp;
239: pr->flags = F_TEXT;
240: break;
241: }
242:
243: /*
1.9 pvalchev 244: * Get precision for %s -- if have a byte count, don't
1.1 deraadt 245: * need it.
246: */
247: if (fu->bcnt) {
248: sokay = USEBCNT;
1.9 pvalchev 249: /* Skip to conversion character. */
1.14 otto 250: for (++p1; *p1 && strchr(spec, *p1); ++p1);
1.1 deraadt 251: } else {
1.9 pvalchev 252: /* Skip any special chars, field width. */
1.14 otto 253: while (*++p1 && strchr(spec + 1, *p1));
1.9 pvalchev 254: if (*p1 == '.' &&
255: isdigit((unsigned char)*++p1)) {
1.1 deraadt 256: sokay = USEPREC;
257: prec = atoi(p1);
1.9 pvalchev 258: while (isdigit((unsigned char)*++p1))
259: continue;
260: } else
1.1 deraadt 261: sokay = NOTOKAY;
262: }
263:
1.14 otto 264: p2 = *p1 ? p1 + 1 : p1; /* Set end pointer. */
1.9 pvalchev 265: cs[0] = *p1; /* Set conversion string. */
266: cs[1] = '\0';
1.1 deraadt 267:
268: /*
1.9 pvalchev 269: * Figure out the byte count for each conversion;
1.1 deraadt 270: * rewrite the format as necessary, set up blank-
271: * padding for end of data.
272: */
1.9 pvalchev 273: switch(cs[0]) {
1.1 deraadt 274: case 'c':
275: pr->flags = F_CHAR;
276: switch(fu->bcnt) {
277: case 0: case 1:
278: pr->bcnt = 1;
279: break;
280: default:
1.9 pvalchev 281: p1[1] = '\0';
282: badcnt(p1);
1.1 deraadt 283: }
284: break;
285: case 'd': case 'i':
286: case 'o': case 'u': case 'x': case 'X':
1.14 otto 287: if (cs[0] == 'd' || cs[0] == 'i')
288: pr->flags = F_INT;
289: else
290: pr->flags = F_UINT;
291:
292: cs[2] = '\0';
1.9 pvalchev 293: cs[1] = cs[0];
294: cs[0] = 'q';
295: switch(fu->bcnt) {
1.1 deraadt 296: case 0: case 4:
297: pr->bcnt = 4;
298: break;
299: case 1:
300: pr->bcnt = 1;
301: break;
302: case 2:
303: pr->bcnt = 2;
304: break;
1.9 pvalchev 305: case 8:
306: pr->bcnt = 8;
307: break;
1.1 deraadt 308: default:
1.9 pvalchev 309: p1[1] = '\0';
310: badcnt(p1);
1.1 deraadt 311: }
312: break;
313: case 'e': case 'E': case 'f': case 'g': case 'G':
314: pr->flags = F_DBL;
315: switch(fu->bcnt) {
316: case 0: case 8:
317: pr->bcnt = 8;
318: break;
319: case 4:
320: pr->bcnt = 4;
321: break;
322: default:
1.9 pvalchev 323: p1[1] = '\0';
324: badcnt(p1);
1.1 deraadt 325: }
326: break;
327: case 's':
328: pr->flags = F_STR;
329: switch(sokay) {
330: case NOTOKAY:
1.9 pvalchev 331: badsfmt();
1.1 deraadt 332: case USEBCNT:
333: pr->bcnt = fu->bcnt;
334: break;
335: case USEPREC:
336: pr->bcnt = prec;
337: break;
338: }
339: break;
340: case '_':
341: ++p2;
342: switch(p1[1]) {
343: case 'A':
344: endfu = fu;
345: fu->flags |= F_IGNORE;
346: /* FALLTHROUGH */
347: case 'a':
348: pr->flags = F_ADDRESS;
349: ++p2;
350: switch(p1[2]) {
351: case 'd': case 'o': case'x':
1.9 pvalchev 352: cs[0] = 'q';
353: cs[1] = p1[2];
354: cs[2] = '\0';
1.1 deraadt 355: break;
356: default:
1.14 otto 357: if (p1[2])
358: p1[3] = '\0';
1.9 pvalchev 359: badconv(p1);
1.1 deraadt 360: }
361: break;
362: case 'c':
363: case 'p':
364: case 'u':
1.14 otto 365: if (p1[1] == 'c') {
366: pr->flags = F_C;
367: /* cs[0] = 'c'; set in conv_c */
368: } else if (p1[1] == 'p') {
369: pr->flags = F_P;
370: cs[0] = 'c';
371: } else {
372: pr->flags = F_U;
373: /* cs[0] = 'c'; set in conv_u */
374: }
375:
376: switch(fu->bcnt) {
1.1 deraadt 377: case 0: case 1:
378: pr->bcnt = 1;
379: break;
380: default:
381: p1[2] = '\0';
1.9 pvalchev 382: badcnt(p1);
1.1 deraadt 383: }
384: break;
385: default:
1.14 otto 386: if (p1[1])
387: p1[2] = '\0';
1.9 pvalchev 388: badconv(p1);
1.1 deraadt 389: }
390: break;
391: default:
1.14 otto 392: if (cs[0])
393: p1[1] = '\0';
1.9 pvalchev 394: badconv(p1);
1.1 deraadt 395: }
396:
397: /*
1.9 pvalchev 398: * Copy to PR format string, set conversion character
1.1 deraadt 399: * pointer, update original.
400: */
401: savech = *p2;
1.9 pvalchev 402: p1[0] = '\0';
1.11 deraadt 403: len = strlen(fmtp) + strlen(cs) + 1;
1.18 tb 404: if ((pr->fmt = calloc(1, len)) == NULL)
405: err(1, NULL);
1.11 deraadt 406: snprintf(pr->fmt, len, "%s%s", fmtp, cs);
1.1 deraadt 407: *p2 = savech;
408: pr->cchar = pr->fmt + (p1 - fmtp);
409: fmtp = p2;
410:
1.9 pvalchev 411: /* Only one conversion character if byte count. */
1.7 mickey 412: if (!(pr->flags&F_ADDRESS) && fu->bcnt && nconv++)
413: errx(1,
1.9 pvalchev 414: "byte count with multiple conversion characters");
1.1 deraadt 415: }
416: /*
1.9 pvalchev 417: * If format unit byte count not specified, figure it out
1.1 deraadt 418: * so can adjust rep count later.
419: */
420: if (!fu->bcnt)
421: for (pr = fu->nextpr; pr; pr = pr->nextpr)
422: fu->bcnt += pr->bcnt;
423: }
424: /*
1.9 pvalchev 425: * If the format string interprets any data at all, and it's
1.1 deraadt 426: * not the same as the blocksize, and its last format unit
427: * interprets any data at all, and has no iteration count,
428: * repeat it as necessary.
429: *
1.9 pvalchev 430: * If, rep count is greater than 1, no trailing whitespace
1.1 deraadt 431: * gets output from the last iteration of the format unit.
432: */
1.9 pvalchev 433: for (fu = fs->nextfu; fu; fu = fu->nextfu) {
1.1 deraadt 434: if (!fu->nextfu && fs->bcnt < blocksize &&
435: !(fu->flags&F_SETREP) && fu->bcnt)
436: fu->reps += (blocksize - fs->bcnt) / fu->bcnt;
437: if (fu->reps > 1) {
1.14 otto 438: if (!fu->nextpr)
439: break;
1.1 deraadt 440: for (pr = fu->nextpr;; pr = pr->nextpr)
441: if (!pr->nextpr)
442: break;
443: for (p1 = pr->fmt, p2 = NULL; *p1; ++p1)
1.9 pvalchev 444: p2 = isspace((unsigned char)*p1) ? p1 : NULL;
1.1 deraadt 445: if (p2)
446: pr->nospace = p2;
447: }
448: }
1.9 pvalchev 449: #ifdef DEBUG
450: for (fu = fs->nextfu; fu; fu = fu->nextfu) {
451: (void)printf("fmt:");
452: for (pr = fu->nextpr; pr; pr = pr->nextpr)
453: (void)printf(" {%s}", pr->fmt);
454: (void)printf("\n");
455: }
456: #endif
1.1 deraadt 457: }
458:
1.6 pvalchev 459: void
1.13 deraadt 460: escape(char *p1)
1.1 deraadt 461: {
1.8 mpech 462: char *p2;
1.1 deraadt 463:
464: /* alphabetic escape sequences have to be done in place */
465: for (p2 = p1;; ++p1, ++p2) {
466: if (!*p1) {
467: *p2 = *p1;
468: break;
469: }
1.14 otto 470: if (*p1 == '\\') {
1.1 deraadt 471: switch(*++p1) {
1.14 otto 472: case '\0':
473: *p2++ = '\\';
474: *p2 = '\0';
475: return; /* incomplete escape sequence */
1.1 deraadt 476: case 'a':
477: /* *p2 = '\a'; */
478: *p2 = '\007';
479: break;
480: case 'b':
481: *p2 = '\b';
482: break;
483: case 'f':
484: *p2 = '\f';
485: break;
486: case 'n':
487: *p2 = '\n';
488: break;
489: case 'r':
490: *p2 = '\r';
491: break;
492: case 't':
493: *p2 = '\t';
494: break;
495: case 'v':
496: *p2 = '\v';
497: break;
498: default:
499: *p2 = *p1;
500: break;
501: }
1.14 otto 502: } else
503: *p2 = *p1;
1.1 deraadt 504: }
1.9 pvalchev 505: }
506:
507: void
1.13 deraadt 508: badcnt(char *s)
1.9 pvalchev 509: {
510: errx(1, "%s: bad byte count", s);
511: }
512:
513: void
1.13 deraadt 514: badsfmt(void)
1.9 pvalchev 515: {
1.10 mpech 516: errx(1, "%%s: requires a precision or a byte count");
1.9 pvalchev 517: }
518:
519: void
1.13 deraadt 520: badfmt(const char *fmt)
1.9 pvalchev 521: {
1.10 mpech 522: errx(1, "\"%s\": bad format", fmt);
1.9 pvalchev 523: }
524:
525: void
1.13 deraadt 526: badconv(char *ch)
1.9 pvalchev 527: {
1.10 mpech 528: errx(1, "%%%s: bad conversion character", ch);
1.1 deraadt 529: }