Annotation of src/usr.bin/hexdump/parse.c, Revision 1.20
1.20 ! mmcc 1: /* $OpenBSD: parse.c,v 1.19 2016/02/09 02:13:12 mmcc Exp $ */
1.9 pvalchev 2: /* $NetBSD: parse.c,v 1.12 2001/12/07 13:37:39 bjh21 Exp $ */
1.2 deraadt 3:
1.1 deraadt 4: /*
1.9 pvalchev 5: * Copyright (c) 1989, 1993
6: * The Regents of the University of California. All rights reserved.
1.1 deraadt 7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
1.12 millert 16: * 3. Neither the name of the University nor the names of its contributors
1.1 deraadt 17: * may be used to endorse or promote products derived from this software
18: * without specific prior written permission.
19: *
20: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30: * SUCH DAMAGE.
31: */
32:
33: #include <sys/types.h>
34: #include <sys/file.h>
1.9 pvalchev 35:
36: #include <ctype.h>
37: #include <err.h>
38: #include <errno.h>
39: #include <fcntl.h>
1.1 deraadt 40: #include <stdio.h>
41: #include <stdlib.h>
42: #include <string.h>
1.9 pvalchev 43:
1.1 deraadt 44: #include "hexdump.h"
45:
46: FU *endfu; /* format at end-of-data */
47:
1.20 ! mmcc 48: static __dead void badcnt(char *);
! 49: static __dead void badconv(char *);
! 50: static __dead void badfmt(const char *);
! 51: static __dead void badsfmt(void);
! 52: static void escape(char *);
! 53:
1.6 pvalchev 54: void
1.13 deraadt 55: addfile(char *name)
1.1 deraadt 56: {
57: FILE *fp;
1.15 ray 58: size_t len;
59: char *buf, *lbuf, *p;
1.1 deraadt 60:
1.9 pvalchev 61: if ((fp = fopen(name, "r")) == NULL)
62: err(1, "fopen %s", name);
1.15 ray 63:
64: lbuf = NULL;
65: while ((buf = fgetln(fp, &len))) {
66: if (buf[len - 1] == '\n')
67: buf[len - 1] = '\0';
68: else {
69: /* EOF without EOL, copy and add the NUL */
70: if ((lbuf = malloc(len + 1)) == NULL)
71: err(1, NULL);
72: memcpy(lbuf, buf, len);
73: lbuf[len] = '\0';
74: buf = lbuf;
1.1 deraadt 75: }
1.16 tedu 76: for (p = buf; isspace((unsigned char)*p); ++p);
1.1 deraadt 77: if (!*p || *p == '#')
78: continue;
79: add(p);
80: }
1.15 ray 81: free(lbuf);
1.1 deraadt 82: (void)fclose(fp);
83: }
84:
1.6 pvalchev 85: void
1.13 deraadt 86: add(const char *fmt)
1.1 deraadt 87: {
1.9 pvalchev 88: const char *p;
1.1 deraadt 89: static FS **nextfs;
90: FS *tfs;
91: FU *tfu, **nextfu;
1.9 pvalchev 92: const char *savep;
1.1 deraadt 93:
94: /* start new linked list of format units */
1.18 tb 95: if ((tfs = calloc(1, sizeof(FS))) == NULL)
96: err(1, NULL);
1.1 deraadt 97: if (!fshead)
98: fshead = tfs;
99: else
100: *nextfs = tfs;
101: nextfs = &tfs->nextfs;
102: nextfu = &tfs->nextfu;
103:
104: /* take the format string and break it up into format units */
105: for (p = fmt;;) {
106: /* skip leading white space */
1.9 pvalchev 107: for (; isspace((unsigned char)*p); ++p);
1.1 deraadt 108: if (!*p)
109: break;
110:
111: /* allocate a new format unit and link it in */
1.18 tb 112: if ((tfu = calloc(1, sizeof(FU))) == NULL)
113: err(1, NULL);
1.1 deraadt 114: *nextfu = tfu;
115: nextfu = &tfu->nextfu;
116: tfu->reps = 1;
117:
118: /* if leading digit, repetition count */
1.9 pvalchev 119: if (isdigit((unsigned char)*p)) {
120: for (savep = p; isdigit((unsigned char)*p); ++p);
121: if (!isspace((unsigned char)*p) && *p != '/')
122: badfmt(fmt);
1.1 deraadt 123: /* may overwrite either white space or slash */
124: tfu->reps = atoi(savep);
125: tfu->flags = F_SETREP;
126: /* skip trailing white space */
1.9 pvalchev 127: for (++p; isspace((unsigned char)*p); ++p);
1.1 deraadt 128: }
129:
130: /* skip slash and trailing white space */
131: if (*p == '/')
1.9 pvalchev 132: while (isspace((unsigned char)*++p));
1.1 deraadt 133:
134: /* byte count */
1.9 pvalchev 135: if (isdigit((unsigned char)*p)) {
136: for (savep = p; isdigit((unsigned char)*p); ++p);
137: if (!isspace((unsigned char)*p))
138: badfmt(fmt);
1.1 deraadt 139: tfu->bcnt = atoi(savep);
140: /* skip trailing white space */
1.9 pvalchev 141: for (++p; isspace((unsigned char)*p); ++p);
1.1 deraadt 142: }
143:
144: /* format */
145: if (*p != '"')
1.9 pvalchev 146: badfmt(fmt);
1.1 deraadt 147: for (savep = ++p; *p != '"';)
148: if (*p++ == 0)
1.9 pvalchev 149: badfmt(fmt);
1.19 mmcc 150: tfu->fmt = strndup(savep, p - savep);
151: if (tfu->fmt == NULL)
1.18 tb 152: err(1, NULL);
1.1 deraadt 153: escape(tfu->fmt);
154: p++;
155: }
156: }
157:
1.7 mickey 158: static const char *spec = ".#-+ 0123456789";
1.9 pvalchev 159:
1.6 pvalchev 160: int
1.13 deraadt 161: size(FS *fs)
1.1 deraadt 162: {
1.8 mpech 163: FU *fu;
164: int bcnt, cursize;
165: char *fmt;
1.1 deraadt 166: int prec;
167:
168: /* figure out the data block size needed for each format unit */
169: for (cursize = 0, fu = fs->nextfu; fu; fu = fu->nextfu) {
170: if (fu->bcnt) {
171: cursize += fu->bcnt * fu->reps;
172: continue;
173: }
174: for (bcnt = prec = 0, fmt = fu->fmt; *fmt; ++fmt) {
175: if (*fmt != '%')
176: continue;
177: /*
178: * skip any special chars -- save precision in
179: * case it's a %s format.
180: */
1.14 otto 181: while (*++fmt && strchr(spec + 1, *fmt));
1.9 pvalchev 182: if (*fmt == '.' && isdigit((unsigned char)*++fmt)) {
1.1 deraadt 183: prec = atoi(fmt);
1.9 pvalchev 184: while (isdigit((unsigned char)*++fmt));
1.1 deraadt 185: }
186: switch(*fmt) {
187: case 'c':
188: bcnt += 1;
189: break;
190: case 'd': case 'i': case 'o': case 'u':
191: case 'x': case 'X':
192: bcnt += 4;
193: break;
194: case 'e': case 'E': case 'f': case 'g': case 'G':
195: bcnt += 8;
196: break;
197: case 's':
198: bcnt += prec;
199: break;
200: case '_':
201: switch(*++fmt) {
202: case 'c': case 'p': case 'u':
203: bcnt += 1;
204: break;
205: }
206: }
207: }
208: cursize += bcnt * fu->reps;
209: }
1.9 pvalchev 210: return (cursize);
1.1 deraadt 211: }
212:
1.6 pvalchev 213: void
1.13 deraadt 214: rewrite(FS *fs)
1.1 deraadt 215: {
216: enum { NOTOKAY, USEBCNT, USEPREC } sokay;
1.8 mpech 217: PR *pr, **nextpr;
218: FU *fu;
219: char *p1, *p2;
1.9 pvalchev 220: char savech, *fmtp, cs[3];
1.1 deraadt 221: int nconv, prec;
1.11 deraadt 222: size_t len;
1.1 deraadt 223:
1.9 pvalchev 224: nextpr = NULL;
225: prec = 0;
1.1 deraadt 226: for (fu = fs->nextfu; fu; fu = fu->nextfu) {
227: /*
1.9 pvalchev 228: * Break each format unit into print units; each conversion
229: * character gets its own.
1.1 deraadt 230: */
231: for (nconv = 0, fmtp = fu->fmt; *fmtp; nextpr = &pr->nextpr) {
1.18 tb 232: if ((pr = calloc(1, sizeof(PR))) == NULL)
233: err(1, NULL);
1.1 deraadt 234: if (!fu->nextpr)
235: fu->nextpr = pr;
236: else
237: *nextpr = pr;
238:
1.9 pvalchev 239: /* Skip preceding text and up to the next % sign. */
1.1 deraadt 240: for (p1 = fmtp; *p1 && *p1 != '%'; ++p1);
241:
1.9 pvalchev 242: /* Only text in the string. */
1.1 deraadt 243: if (!*p1) {
244: pr->fmt = fmtp;
245: pr->flags = F_TEXT;
246: break;
247: }
248:
249: /*
1.9 pvalchev 250: * Get precision for %s -- if have a byte count, don't
1.1 deraadt 251: * need it.
252: */
253: if (fu->bcnt) {
254: sokay = USEBCNT;
1.9 pvalchev 255: /* Skip to conversion character. */
1.14 otto 256: for (++p1; *p1 && strchr(spec, *p1); ++p1);
1.1 deraadt 257: } else {
1.9 pvalchev 258: /* Skip any special chars, field width. */
1.14 otto 259: while (*++p1 && strchr(spec + 1, *p1));
1.9 pvalchev 260: if (*p1 == '.' &&
261: isdigit((unsigned char)*++p1)) {
1.1 deraadt 262: sokay = USEPREC;
263: prec = atoi(p1);
1.9 pvalchev 264: while (isdigit((unsigned char)*++p1))
265: continue;
266: } else
1.1 deraadt 267: sokay = NOTOKAY;
268: }
269:
1.14 otto 270: p2 = *p1 ? p1 + 1 : p1; /* Set end pointer. */
1.9 pvalchev 271: cs[0] = *p1; /* Set conversion string. */
272: cs[1] = '\0';
1.1 deraadt 273:
274: /*
1.9 pvalchev 275: * Figure out the byte count for each conversion;
1.1 deraadt 276: * rewrite the format as necessary, set up blank-
277: * padding for end of data.
278: */
1.9 pvalchev 279: switch(cs[0]) {
1.1 deraadt 280: case 'c':
281: pr->flags = F_CHAR;
282: switch(fu->bcnt) {
283: case 0: case 1:
284: pr->bcnt = 1;
285: break;
286: default:
1.9 pvalchev 287: p1[1] = '\0';
288: badcnt(p1);
1.1 deraadt 289: }
290: break;
291: case 'd': case 'i':
292: case 'o': case 'u': case 'x': case 'X':
1.14 otto 293: if (cs[0] == 'd' || cs[0] == 'i')
294: pr->flags = F_INT;
295: else
296: pr->flags = F_UINT;
297:
298: cs[2] = '\0';
1.9 pvalchev 299: cs[1] = cs[0];
300: cs[0] = 'q';
301: switch(fu->bcnt) {
1.1 deraadt 302: case 0: case 4:
303: pr->bcnt = 4;
304: break;
305: case 1:
306: pr->bcnt = 1;
307: break;
308: case 2:
309: pr->bcnt = 2;
310: break;
1.9 pvalchev 311: case 8:
312: pr->bcnt = 8;
313: break;
1.1 deraadt 314: default:
1.9 pvalchev 315: p1[1] = '\0';
316: badcnt(p1);
1.1 deraadt 317: }
318: break;
319: case 'e': case 'E': case 'f': case 'g': case 'G':
320: pr->flags = F_DBL;
321: switch(fu->bcnt) {
322: case 0: case 8:
323: pr->bcnt = 8;
324: break;
325: case 4:
326: pr->bcnt = 4;
327: break;
328: default:
1.9 pvalchev 329: p1[1] = '\0';
330: badcnt(p1);
1.1 deraadt 331: }
332: break;
333: case 's':
334: pr->flags = F_STR;
335: switch(sokay) {
336: case NOTOKAY:
1.9 pvalchev 337: badsfmt();
1.1 deraadt 338: case USEBCNT:
339: pr->bcnt = fu->bcnt;
340: break;
341: case USEPREC:
342: pr->bcnt = prec;
343: break;
344: }
345: break;
346: case '_':
347: ++p2;
348: switch(p1[1]) {
349: case 'A':
350: endfu = fu;
351: fu->flags |= F_IGNORE;
352: /* FALLTHROUGH */
353: case 'a':
354: pr->flags = F_ADDRESS;
355: ++p2;
356: switch(p1[2]) {
357: case 'd': case 'o': case'x':
1.9 pvalchev 358: cs[0] = 'q';
359: cs[1] = p1[2];
360: cs[2] = '\0';
1.1 deraadt 361: break;
362: default:
1.14 otto 363: if (p1[2])
364: p1[3] = '\0';
1.9 pvalchev 365: badconv(p1);
1.1 deraadt 366: }
367: break;
368: case 'c':
369: case 'p':
370: case 'u':
1.14 otto 371: if (p1[1] == 'c') {
372: pr->flags = F_C;
373: /* cs[0] = 'c'; set in conv_c */
374: } else if (p1[1] == 'p') {
375: pr->flags = F_P;
376: cs[0] = 'c';
377: } else {
378: pr->flags = F_U;
379: /* cs[0] = 'c'; set in conv_u */
380: }
381:
382: switch(fu->bcnt) {
1.1 deraadt 383: case 0: case 1:
384: pr->bcnt = 1;
385: break;
386: default:
387: p1[2] = '\0';
1.9 pvalchev 388: badcnt(p1);
1.1 deraadt 389: }
390: break;
391: default:
1.14 otto 392: if (p1[1])
393: p1[2] = '\0';
1.9 pvalchev 394: badconv(p1);
1.1 deraadt 395: }
396: break;
397: default:
1.14 otto 398: if (cs[0])
399: p1[1] = '\0';
1.9 pvalchev 400: badconv(p1);
1.1 deraadt 401: }
402:
403: /*
1.9 pvalchev 404: * Copy to PR format string, set conversion character
1.1 deraadt 405: * pointer, update original.
406: */
407: savech = *p2;
1.9 pvalchev 408: p1[0] = '\0';
1.11 deraadt 409: len = strlen(fmtp) + strlen(cs) + 1;
1.18 tb 410: if ((pr->fmt = calloc(1, len)) == NULL)
411: err(1, NULL);
1.11 deraadt 412: snprintf(pr->fmt, len, "%s%s", fmtp, cs);
1.1 deraadt 413: *p2 = savech;
414: pr->cchar = pr->fmt + (p1 - fmtp);
415: fmtp = p2;
416:
1.9 pvalchev 417: /* Only one conversion character if byte count. */
1.7 mickey 418: if (!(pr->flags&F_ADDRESS) && fu->bcnt && nconv++)
419: errx(1,
1.9 pvalchev 420: "byte count with multiple conversion characters");
1.1 deraadt 421: }
422: /*
1.9 pvalchev 423: * If format unit byte count not specified, figure it out
1.1 deraadt 424: * so can adjust rep count later.
425: */
426: if (!fu->bcnt)
427: for (pr = fu->nextpr; pr; pr = pr->nextpr)
428: fu->bcnt += pr->bcnt;
429: }
430: /*
1.9 pvalchev 431: * If the format string interprets any data at all, and it's
1.1 deraadt 432: * not the same as the blocksize, and its last format unit
433: * interprets any data at all, and has no iteration count,
434: * repeat it as necessary.
435: *
1.9 pvalchev 436: * If, rep count is greater than 1, no trailing whitespace
1.1 deraadt 437: * gets output from the last iteration of the format unit.
438: */
1.9 pvalchev 439: for (fu = fs->nextfu; fu; fu = fu->nextfu) {
1.1 deraadt 440: if (!fu->nextfu && fs->bcnt < blocksize &&
441: !(fu->flags&F_SETREP) && fu->bcnt)
442: fu->reps += (blocksize - fs->bcnt) / fu->bcnt;
443: if (fu->reps > 1) {
1.14 otto 444: if (!fu->nextpr)
445: break;
1.1 deraadt 446: for (pr = fu->nextpr;; pr = pr->nextpr)
447: if (!pr->nextpr)
448: break;
449: for (p1 = pr->fmt, p2 = NULL; *p1; ++p1)
1.9 pvalchev 450: p2 = isspace((unsigned char)*p1) ? p1 : NULL;
1.1 deraadt 451: if (p2)
452: pr->nospace = p2;
453: }
454: }
1.9 pvalchev 455: #ifdef DEBUG
456: for (fu = fs->nextfu; fu; fu = fu->nextfu) {
457: (void)printf("fmt:");
458: for (pr = fu->nextpr; pr; pr = pr->nextpr)
459: (void)printf(" {%s}", pr->fmt);
460: (void)printf("\n");
461: }
462: #endif
1.1 deraadt 463: }
464:
1.20 ! mmcc 465: static void
1.13 deraadt 466: escape(char *p1)
1.1 deraadt 467: {
1.8 mpech 468: char *p2;
1.1 deraadt 469:
470: /* alphabetic escape sequences have to be done in place */
471: for (p2 = p1;; ++p1, ++p2) {
472: if (!*p1) {
473: *p2 = *p1;
474: break;
475: }
1.14 otto 476: if (*p1 == '\\') {
1.1 deraadt 477: switch(*++p1) {
1.14 otto 478: case '\0':
479: *p2++ = '\\';
480: *p2 = '\0';
481: return; /* incomplete escape sequence */
1.1 deraadt 482: case 'a':
483: /* *p2 = '\a'; */
484: *p2 = '\007';
485: break;
486: case 'b':
487: *p2 = '\b';
488: break;
489: case 'f':
490: *p2 = '\f';
491: break;
492: case 'n':
493: *p2 = '\n';
494: break;
495: case 'r':
496: *p2 = '\r';
497: break;
498: case 't':
499: *p2 = '\t';
500: break;
501: case 'v':
502: *p2 = '\v';
503: break;
504: default:
505: *p2 = *p1;
506: break;
507: }
1.14 otto 508: } else
509: *p2 = *p1;
1.1 deraadt 510: }
1.9 pvalchev 511: }
512:
1.20 ! mmcc 513: static __dead void
1.13 deraadt 514: badcnt(char *s)
1.9 pvalchev 515: {
516: errx(1, "%s: bad byte count", s);
517: }
518:
1.20 ! mmcc 519: static __dead void
1.13 deraadt 520: badsfmt(void)
1.9 pvalchev 521: {
1.10 mpech 522: errx(1, "%%s: requires a precision or a byte count");
1.9 pvalchev 523: }
524:
1.20 ! mmcc 525: static __dead void
1.13 deraadt 526: badfmt(const char *fmt)
1.9 pvalchev 527: {
1.10 mpech 528: errx(1, "\"%s\": bad format", fmt);
1.9 pvalchev 529: }
530:
1.20 ! mmcc 531: static __dead void
1.13 deraadt 532: badconv(char *ch)
1.9 pvalchev 533: {
1.10 mpech 534: errx(1, "%%%s: bad conversion character", ch);
1.1 deraadt 535: }