Annotation of src/usr.bin/hexdump/parse.c, Revision 1.21
1.21 ! guenther 1: /* $OpenBSD: parse.c,v 1.20 2016/03/15 04:19:13 mmcc Exp $ */
1.9 pvalchev 2: /* $NetBSD: parse.c,v 1.12 2001/12/07 13:37:39 bjh21 Exp $ */
1.2 deraadt 3:
1.1 deraadt 4: /*
1.9 pvalchev 5: * Copyright (c) 1989, 1993
6: * The Regents of the University of California. All rights reserved.
1.1 deraadt 7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
1.12 millert 16: * 3. Neither the name of the University nor the names of its contributors
1.1 deraadt 17: * may be used to endorse or promote products derived from this software
18: * without specific prior written permission.
19: *
20: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30: * SUCH DAMAGE.
31: */
32:
33: #include <sys/types.h>
34: #include <sys/file.h>
1.9 pvalchev 35:
36: #include <ctype.h>
37: #include <err.h>
38: #include <errno.h>
39: #include <fcntl.h>
1.1 deraadt 40: #include <stdio.h>
41: #include <stdlib.h>
42: #include <string.h>
1.9 pvalchev 43:
1.1 deraadt 44: #include "hexdump.h"
45:
46: FU *endfu; /* format at end-of-data */
47:
1.20 mmcc 48: static __dead void badcnt(char *);
49: static __dead void badconv(char *);
50: static __dead void badfmt(const char *);
51: static __dead void badsfmt(void);
52: static void escape(char *);
53:
1.6 pvalchev 54: void
1.13 deraadt 55: addfile(char *name)
1.1 deraadt 56: {
57: FILE *fp;
1.15 ray 58: size_t len;
59: char *buf, *lbuf, *p;
1.1 deraadt 60:
1.9 pvalchev 61: if ((fp = fopen(name, "r")) == NULL)
62: err(1, "fopen %s", name);
1.15 ray 63:
64: lbuf = NULL;
65: while ((buf = fgetln(fp, &len))) {
66: if (buf[len - 1] == '\n')
67: buf[len - 1] = '\0';
68: else {
69: /* EOF without EOL, copy and add the NUL */
70: if ((lbuf = malloc(len + 1)) == NULL)
71: err(1, NULL);
72: memcpy(lbuf, buf, len);
73: lbuf[len] = '\0';
74: buf = lbuf;
1.1 deraadt 75: }
1.16 tedu 76: for (p = buf; isspace((unsigned char)*p); ++p);
1.1 deraadt 77: if (!*p || *p == '#')
78: continue;
79: add(p);
80: }
1.15 ray 81: free(lbuf);
1.1 deraadt 82: (void)fclose(fp);
83: }
84:
1.6 pvalchev 85: void
1.13 deraadt 86: add(const char *fmt)
1.1 deraadt 87: {
1.9 pvalchev 88: const char *p;
1.1 deraadt 89: static FS **nextfs;
90: FS *tfs;
91: FU *tfu, **nextfu;
1.9 pvalchev 92: const char *savep;
1.1 deraadt 93:
94: /* start new linked list of format units */
1.18 tb 95: if ((tfs = calloc(1, sizeof(FS))) == NULL)
96: err(1, NULL);
1.1 deraadt 97: if (!fshead)
98: fshead = tfs;
99: else
100: *nextfs = tfs;
101: nextfs = &tfs->nextfs;
102: nextfu = &tfs->nextfu;
103:
104: /* take the format string and break it up into format units */
105: for (p = fmt;;) {
106: /* skip leading white space */
1.9 pvalchev 107: for (; isspace((unsigned char)*p); ++p);
1.1 deraadt 108: if (!*p)
109: break;
110:
111: /* allocate a new format unit and link it in */
1.18 tb 112: if ((tfu = calloc(1, sizeof(FU))) == NULL)
113: err(1, NULL);
1.1 deraadt 114: *nextfu = tfu;
115: nextfu = &tfu->nextfu;
116: tfu->reps = 1;
117:
118: /* if leading digit, repetition count */
1.9 pvalchev 119: if (isdigit((unsigned char)*p)) {
120: for (savep = p; isdigit((unsigned char)*p); ++p);
121: if (!isspace((unsigned char)*p) && *p != '/')
122: badfmt(fmt);
1.1 deraadt 123: /* may overwrite either white space or slash */
124: tfu->reps = atoi(savep);
125: tfu->flags = F_SETREP;
126: /* skip trailing white space */
1.9 pvalchev 127: for (++p; isspace((unsigned char)*p); ++p);
1.1 deraadt 128: }
129:
130: /* skip slash and trailing white space */
131: if (*p == '/')
1.9 pvalchev 132: while (isspace((unsigned char)*++p));
1.1 deraadt 133:
134: /* byte count */
1.9 pvalchev 135: if (isdigit((unsigned char)*p)) {
136: for (savep = p; isdigit((unsigned char)*p); ++p);
137: if (!isspace((unsigned char)*p))
138: badfmt(fmt);
1.1 deraadt 139: tfu->bcnt = atoi(savep);
140: /* skip trailing white space */
1.9 pvalchev 141: for (++p; isspace((unsigned char)*p); ++p);
1.1 deraadt 142: }
143:
144: /* format */
145: if (*p != '"')
1.9 pvalchev 146: badfmt(fmt);
1.1 deraadt 147: for (savep = ++p; *p != '"';)
148: if (*p++ == 0)
1.9 pvalchev 149: badfmt(fmt);
1.19 mmcc 150: tfu->fmt = strndup(savep, p - savep);
151: if (tfu->fmt == NULL)
1.18 tb 152: err(1, NULL);
1.1 deraadt 153: escape(tfu->fmt);
154: p++;
155: }
156: }
157:
1.7 mickey 158: static const char *spec = ".#-+ 0123456789";
1.9 pvalchev 159:
1.6 pvalchev 160: int
1.13 deraadt 161: size(FS *fs)
1.1 deraadt 162: {
1.8 mpech 163: FU *fu;
164: int bcnt, cursize;
165: char *fmt;
1.1 deraadt 166: int prec;
167:
168: /* figure out the data block size needed for each format unit */
169: for (cursize = 0, fu = fs->nextfu; fu; fu = fu->nextfu) {
170: if (fu->bcnt) {
171: cursize += fu->bcnt * fu->reps;
172: continue;
173: }
174: for (bcnt = prec = 0, fmt = fu->fmt; *fmt; ++fmt) {
175: if (*fmt != '%')
176: continue;
177: /*
178: * skip any special chars -- save precision in
179: * case it's a %s format.
180: */
1.14 otto 181: while (*++fmt && strchr(spec + 1, *fmt));
1.9 pvalchev 182: if (*fmt == '.' && isdigit((unsigned char)*++fmt)) {
1.1 deraadt 183: prec = atoi(fmt);
1.9 pvalchev 184: while (isdigit((unsigned char)*++fmt));
1.1 deraadt 185: }
186: switch(*fmt) {
187: case 'c':
188: bcnt += 1;
189: break;
190: case 'd': case 'i': case 'o': case 'u':
191: case 'x': case 'X':
192: bcnt += 4;
193: break;
194: case 'e': case 'E': case 'f': case 'g': case 'G':
195: bcnt += 8;
196: break;
197: case 's':
198: bcnt += prec;
199: break;
200: case '_':
201: switch(*++fmt) {
202: case 'c': case 'p': case 'u':
203: bcnt += 1;
204: break;
205: }
206: }
207: }
208: cursize += bcnt * fu->reps;
209: }
1.9 pvalchev 210: return (cursize);
1.1 deraadt 211: }
212:
1.6 pvalchev 213: void
1.13 deraadt 214: rewrite(FS *fs)
1.1 deraadt 215: {
216: enum { NOTOKAY, USEBCNT, USEPREC } sokay;
1.8 mpech 217: PR *pr, **nextpr;
218: FU *fu;
219: char *p1, *p2;
1.21 ! guenther 220: char savech, *fmtp, cs[4];
1.1 deraadt 221: int nconv, prec;
1.11 deraadt 222: size_t len;
1.1 deraadt 223:
1.9 pvalchev 224: nextpr = NULL;
225: prec = 0;
1.1 deraadt 226: for (fu = fs->nextfu; fu; fu = fu->nextfu) {
227: /*
1.9 pvalchev 228: * Break each format unit into print units; each conversion
229: * character gets its own.
1.1 deraadt 230: */
231: for (nconv = 0, fmtp = fu->fmt; *fmtp; nextpr = &pr->nextpr) {
1.18 tb 232: if ((pr = calloc(1, sizeof(PR))) == NULL)
233: err(1, NULL);
1.1 deraadt 234: if (!fu->nextpr)
235: fu->nextpr = pr;
236: else
237: *nextpr = pr;
238:
1.9 pvalchev 239: /* Skip preceding text and up to the next % sign. */
1.1 deraadt 240: for (p1 = fmtp; *p1 && *p1 != '%'; ++p1);
241:
1.9 pvalchev 242: /* Only text in the string. */
1.1 deraadt 243: if (!*p1) {
244: pr->fmt = fmtp;
245: pr->flags = F_TEXT;
246: break;
247: }
248:
249: /*
1.9 pvalchev 250: * Get precision for %s -- if have a byte count, don't
1.1 deraadt 251: * need it.
252: */
253: if (fu->bcnt) {
254: sokay = USEBCNT;
1.9 pvalchev 255: /* Skip to conversion character. */
1.14 otto 256: for (++p1; *p1 && strchr(spec, *p1); ++p1);
1.1 deraadt 257: } else {
1.9 pvalchev 258: /* Skip any special chars, field width. */
1.14 otto 259: while (*++p1 && strchr(spec + 1, *p1));
1.9 pvalchev 260: if (*p1 == '.' &&
261: isdigit((unsigned char)*++p1)) {
1.1 deraadt 262: sokay = USEPREC;
263: prec = atoi(p1);
1.9 pvalchev 264: while (isdigit((unsigned char)*++p1))
265: continue;
266: } else
1.1 deraadt 267: sokay = NOTOKAY;
268: }
269:
1.14 otto 270: p2 = *p1 ? p1 + 1 : p1; /* Set end pointer. */
1.9 pvalchev 271: cs[0] = *p1; /* Set conversion string. */
272: cs[1] = '\0';
1.1 deraadt 273:
274: /*
1.9 pvalchev 275: * Figure out the byte count for each conversion;
1.1 deraadt 276: * rewrite the format as necessary, set up blank-
277: * padding for end of data.
278: */
1.9 pvalchev 279: switch(cs[0]) {
1.1 deraadt 280: case 'c':
281: pr->flags = F_CHAR;
282: switch(fu->bcnt) {
283: case 0: case 1:
284: pr->bcnt = 1;
285: break;
286: default:
1.9 pvalchev 287: p1[1] = '\0';
288: badcnt(p1);
1.1 deraadt 289: }
290: break;
291: case 'd': case 'i':
292: case 'o': case 'u': case 'x': case 'X':
1.14 otto 293: if (cs[0] == 'd' || cs[0] == 'i')
294: pr->flags = F_INT;
295: else
296: pr->flags = F_UINT;
297:
1.21 ! guenther 298: cs[3] = '\0';
! 299: cs[2] = cs[0];
! 300: cs[1] = 'l';
! 301: cs[0] = 'l';
1.9 pvalchev 302: switch(fu->bcnt) {
1.1 deraadt 303: case 0: case 4:
304: pr->bcnt = 4;
305: break;
306: case 1:
307: pr->bcnt = 1;
308: break;
309: case 2:
310: pr->bcnt = 2;
311: break;
1.9 pvalchev 312: case 8:
313: pr->bcnt = 8;
314: break;
1.1 deraadt 315: default:
1.9 pvalchev 316: p1[1] = '\0';
317: badcnt(p1);
1.1 deraadt 318: }
319: break;
320: case 'e': case 'E': case 'f': case 'g': case 'G':
321: pr->flags = F_DBL;
322: switch(fu->bcnt) {
323: case 0: case 8:
324: pr->bcnt = 8;
325: break;
326: case 4:
327: pr->bcnt = 4;
328: break;
329: default:
1.9 pvalchev 330: p1[1] = '\0';
331: badcnt(p1);
1.1 deraadt 332: }
333: break;
334: case 's':
335: pr->flags = F_STR;
336: switch(sokay) {
337: case NOTOKAY:
1.9 pvalchev 338: badsfmt();
1.1 deraadt 339: case USEBCNT:
340: pr->bcnt = fu->bcnt;
341: break;
342: case USEPREC:
343: pr->bcnt = prec;
344: break;
345: }
346: break;
347: case '_':
348: ++p2;
349: switch(p1[1]) {
350: case 'A':
351: endfu = fu;
352: fu->flags |= F_IGNORE;
353: /* FALLTHROUGH */
354: case 'a':
355: pr->flags = F_ADDRESS;
356: ++p2;
357: switch(p1[2]) {
358: case 'd': case 'o': case'x':
1.21 ! guenther 359: cs[0] = 'l';
! 360: cs[1] = 'l';
! 361: cs[2] = p1[2];
! 362: cs[3] = '\0';
1.1 deraadt 363: break;
364: default:
1.14 otto 365: if (p1[2])
366: p1[3] = '\0';
1.9 pvalchev 367: badconv(p1);
1.1 deraadt 368: }
369: break;
370: case 'c':
371: case 'p':
372: case 'u':
1.14 otto 373: if (p1[1] == 'c') {
374: pr->flags = F_C;
375: /* cs[0] = 'c'; set in conv_c */
376: } else if (p1[1] == 'p') {
377: pr->flags = F_P;
378: cs[0] = 'c';
379: } else {
380: pr->flags = F_U;
381: /* cs[0] = 'c'; set in conv_u */
382: }
383:
384: switch(fu->bcnt) {
1.1 deraadt 385: case 0: case 1:
386: pr->bcnt = 1;
387: break;
388: default:
389: p1[2] = '\0';
1.9 pvalchev 390: badcnt(p1);
1.1 deraadt 391: }
392: break;
393: default:
1.14 otto 394: if (p1[1])
395: p1[2] = '\0';
1.9 pvalchev 396: badconv(p1);
1.1 deraadt 397: }
398: break;
399: default:
1.14 otto 400: if (cs[0])
401: p1[1] = '\0';
1.9 pvalchev 402: badconv(p1);
1.1 deraadt 403: }
404:
405: /*
1.9 pvalchev 406: * Copy to PR format string, set conversion character
1.1 deraadt 407: * pointer, update original.
408: */
409: savech = *p2;
1.9 pvalchev 410: p1[0] = '\0';
1.11 deraadt 411: len = strlen(fmtp) + strlen(cs) + 1;
1.18 tb 412: if ((pr->fmt = calloc(1, len)) == NULL)
413: err(1, NULL);
1.11 deraadt 414: snprintf(pr->fmt, len, "%s%s", fmtp, cs);
1.1 deraadt 415: *p2 = savech;
416: pr->cchar = pr->fmt + (p1 - fmtp);
417: fmtp = p2;
418:
1.9 pvalchev 419: /* Only one conversion character if byte count. */
1.7 mickey 420: if (!(pr->flags&F_ADDRESS) && fu->bcnt && nconv++)
421: errx(1,
1.9 pvalchev 422: "byte count with multiple conversion characters");
1.1 deraadt 423: }
424: /*
1.9 pvalchev 425: * If format unit byte count not specified, figure it out
1.1 deraadt 426: * so can adjust rep count later.
427: */
428: if (!fu->bcnt)
429: for (pr = fu->nextpr; pr; pr = pr->nextpr)
430: fu->bcnt += pr->bcnt;
431: }
432: /*
1.9 pvalchev 433: * If the format string interprets any data at all, and it's
1.1 deraadt 434: * not the same as the blocksize, and its last format unit
435: * interprets any data at all, and has no iteration count,
436: * repeat it as necessary.
437: *
1.9 pvalchev 438: * If, rep count is greater than 1, no trailing whitespace
1.1 deraadt 439: * gets output from the last iteration of the format unit.
440: */
1.9 pvalchev 441: for (fu = fs->nextfu; fu; fu = fu->nextfu) {
1.1 deraadt 442: if (!fu->nextfu && fs->bcnt < blocksize &&
443: !(fu->flags&F_SETREP) && fu->bcnt)
444: fu->reps += (blocksize - fs->bcnt) / fu->bcnt;
445: if (fu->reps > 1) {
1.14 otto 446: if (!fu->nextpr)
447: break;
1.1 deraadt 448: for (pr = fu->nextpr;; pr = pr->nextpr)
449: if (!pr->nextpr)
450: break;
451: for (p1 = pr->fmt, p2 = NULL; *p1; ++p1)
1.9 pvalchev 452: p2 = isspace((unsigned char)*p1) ? p1 : NULL;
1.1 deraadt 453: if (p2)
454: pr->nospace = p2;
455: }
456: }
1.9 pvalchev 457: #ifdef DEBUG
458: for (fu = fs->nextfu; fu; fu = fu->nextfu) {
459: (void)printf("fmt:");
460: for (pr = fu->nextpr; pr; pr = pr->nextpr)
461: (void)printf(" {%s}", pr->fmt);
462: (void)printf("\n");
463: }
464: #endif
1.1 deraadt 465: }
466:
1.20 mmcc 467: static void
1.13 deraadt 468: escape(char *p1)
1.1 deraadt 469: {
1.8 mpech 470: char *p2;
1.1 deraadt 471:
472: /* alphabetic escape sequences have to be done in place */
473: for (p2 = p1;; ++p1, ++p2) {
474: if (!*p1) {
475: *p2 = *p1;
476: break;
477: }
1.14 otto 478: if (*p1 == '\\') {
1.1 deraadt 479: switch(*++p1) {
1.14 otto 480: case '\0':
481: *p2++ = '\\';
482: *p2 = '\0';
483: return; /* incomplete escape sequence */
1.1 deraadt 484: case 'a':
485: /* *p2 = '\a'; */
486: *p2 = '\007';
487: break;
488: case 'b':
489: *p2 = '\b';
490: break;
491: case 'f':
492: *p2 = '\f';
493: break;
494: case 'n':
495: *p2 = '\n';
496: break;
497: case 'r':
498: *p2 = '\r';
499: break;
500: case 't':
501: *p2 = '\t';
502: break;
503: case 'v':
504: *p2 = '\v';
505: break;
506: default:
507: *p2 = *p1;
508: break;
509: }
1.14 otto 510: } else
511: *p2 = *p1;
1.1 deraadt 512: }
1.9 pvalchev 513: }
514:
1.20 mmcc 515: static __dead void
1.13 deraadt 516: badcnt(char *s)
1.9 pvalchev 517: {
518: errx(1, "%s: bad byte count", s);
519: }
520:
1.20 mmcc 521: static __dead void
1.13 deraadt 522: badsfmt(void)
1.9 pvalchev 523: {
1.10 mpech 524: errx(1, "%%s: requires a precision or a byte count");
1.9 pvalchev 525: }
526:
1.20 mmcc 527: static __dead void
1.13 deraadt 528: badfmt(const char *fmt)
1.9 pvalchev 529: {
1.10 mpech 530: errx(1, "\"%s\": bad format", fmt);
1.9 pvalchev 531: }
532:
1.20 mmcc 533: static __dead void
1.13 deraadt 534: badconv(char *ch)
1.9 pvalchev 535: {
1.10 mpech 536: errx(1, "%%%s: bad conversion character", ch);
1.1 deraadt 537: }