Annotation of src/usr.bin/hexdump/parse.c, Revision 1.22
1.22 ! tb 1: /* $OpenBSD: parse.c,v 1.21 2016/08/24 03:13:45 guenther Exp $ */
1.9 pvalchev 2: /* $NetBSD: parse.c,v 1.12 2001/12/07 13:37:39 bjh21 Exp $ */
1.2 deraadt 3:
1.1 deraadt 4: /*
1.9 pvalchev 5: * Copyright (c) 1989, 1993
6: * The Regents of the University of California. All rights reserved.
1.1 deraadt 7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
1.12 millert 16: * 3. Neither the name of the University nor the names of its contributors
1.1 deraadt 17: * may be used to endorse or promote products derived from this software
18: * without specific prior written permission.
19: *
20: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30: * SUCH DAMAGE.
31: */
32:
33: #include <sys/types.h>
34: #include <sys/file.h>
1.9 pvalchev 35:
36: #include <ctype.h>
37: #include <err.h>
38: #include <errno.h>
39: #include <fcntl.h>
1.1 deraadt 40: #include <stdio.h>
41: #include <stdlib.h>
42: #include <string.h>
1.9 pvalchev 43:
1.1 deraadt 44: #include "hexdump.h"
45:
46: FU *endfu; /* format at end-of-data */
47:
1.20 mmcc 48: static __dead void badcnt(char *);
49: static __dead void badconv(char *);
50: static __dead void badfmt(const char *);
51: static __dead void badsfmt(void);
52: static void escape(char *);
53:
1.6 pvalchev 54: void
1.13 deraadt 55: addfile(char *name)
1.1 deraadt 56: {
57: FILE *fp;
1.15 ray 58: size_t len;
59: char *buf, *lbuf, *p;
1.1 deraadt 60:
1.9 pvalchev 61: if ((fp = fopen(name, "r")) == NULL)
62: err(1, "fopen %s", name);
1.15 ray 63:
64: lbuf = NULL;
65: while ((buf = fgetln(fp, &len))) {
66: if (buf[len - 1] == '\n')
67: buf[len - 1] = '\0';
68: else {
69: /* EOF without EOL, copy and add the NUL */
70: if ((lbuf = malloc(len + 1)) == NULL)
71: err(1, NULL);
72: memcpy(lbuf, buf, len);
73: lbuf[len] = '\0';
74: buf = lbuf;
1.1 deraadt 75: }
1.16 tedu 76: for (p = buf; isspace((unsigned char)*p); ++p);
1.1 deraadt 77: if (!*p || *p == '#')
78: continue;
79: add(p);
80: }
1.15 ray 81: free(lbuf);
1.1 deraadt 82: (void)fclose(fp);
83: }
84:
1.6 pvalchev 85: void
1.13 deraadt 86: add(const char *fmt)
1.1 deraadt 87: {
1.9 pvalchev 88: const char *p;
1.1 deraadt 89: static FS **nextfs;
90: FS *tfs;
91: FU *tfu, **nextfu;
1.9 pvalchev 92: const char *savep;
1.1 deraadt 93:
94: /* start new linked list of format units */
1.18 tb 95: if ((tfs = calloc(1, sizeof(FS))) == NULL)
96: err(1, NULL);
1.1 deraadt 97: if (!fshead)
98: fshead = tfs;
99: else
100: *nextfs = tfs;
101: nextfs = &tfs->nextfs;
102: nextfu = &tfs->nextfu;
103:
104: /* take the format string and break it up into format units */
105: for (p = fmt;;) {
106: /* skip leading white space */
1.9 pvalchev 107: for (; isspace((unsigned char)*p); ++p);
1.1 deraadt 108: if (!*p)
109: break;
110:
111: /* allocate a new format unit and link it in */
1.18 tb 112: if ((tfu = calloc(1, sizeof(FU))) == NULL)
113: err(1, NULL);
1.1 deraadt 114: *nextfu = tfu;
115: nextfu = &tfu->nextfu;
116: tfu->reps = 1;
117:
118: /* if leading digit, repetition count */
1.9 pvalchev 119: if (isdigit((unsigned char)*p)) {
120: for (savep = p; isdigit((unsigned char)*p); ++p);
121: if (!isspace((unsigned char)*p) && *p != '/')
122: badfmt(fmt);
1.1 deraadt 123: /* may overwrite either white space or slash */
124: tfu->reps = atoi(savep);
125: tfu->flags = F_SETREP;
126: /* skip trailing white space */
1.9 pvalchev 127: for (++p; isspace((unsigned char)*p); ++p);
1.1 deraadt 128: }
129:
130: /* skip slash and trailing white space */
131: if (*p == '/')
1.9 pvalchev 132: while (isspace((unsigned char)*++p));
1.1 deraadt 133:
134: /* byte count */
1.9 pvalchev 135: if (isdigit((unsigned char)*p)) {
136: for (savep = p; isdigit((unsigned char)*p); ++p);
137: if (!isspace((unsigned char)*p))
138: badfmt(fmt);
1.1 deraadt 139: tfu->bcnt = atoi(savep);
140: /* skip trailing white space */
1.9 pvalchev 141: for (++p; isspace((unsigned char)*p); ++p);
1.1 deraadt 142: }
143:
144: /* format */
145: if (*p != '"')
1.9 pvalchev 146: badfmt(fmt);
1.1 deraadt 147: for (savep = ++p; *p != '"';)
148: if (*p++ == 0)
1.9 pvalchev 149: badfmt(fmt);
1.22 ! tb 150: if ((tfu->fmt = strndup(savep, p - savep)) == NULL)
1.18 tb 151: err(1, NULL);
1.1 deraadt 152: escape(tfu->fmt);
153: p++;
154: }
155: }
156:
1.7 mickey 157: static const char *spec = ".#-+ 0123456789";
1.9 pvalchev 158:
1.6 pvalchev 159: int
1.13 deraadt 160: size(FS *fs)
1.1 deraadt 161: {
1.8 mpech 162: FU *fu;
163: int bcnt, cursize;
164: char *fmt;
1.1 deraadt 165: int prec;
166:
167: /* figure out the data block size needed for each format unit */
168: for (cursize = 0, fu = fs->nextfu; fu; fu = fu->nextfu) {
169: if (fu->bcnt) {
170: cursize += fu->bcnt * fu->reps;
171: continue;
172: }
173: for (bcnt = prec = 0, fmt = fu->fmt; *fmt; ++fmt) {
174: if (*fmt != '%')
175: continue;
176: /*
177: * skip any special chars -- save precision in
178: * case it's a %s format.
179: */
1.14 otto 180: while (*++fmt && strchr(spec + 1, *fmt));
1.9 pvalchev 181: if (*fmt == '.' && isdigit((unsigned char)*++fmt)) {
1.1 deraadt 182: prec = atoi(fmt);
1.9 pvalchev 183: while (isdigit((unsigned char)*++fmt));
1.1 deraadt 184: }
185: switch(*fmt) {
186: case 'c':
187: bcnt += 1;
188: break;
189: case 'd': case 'i': case 'o': case 'u':
190: case 'x': case 'X':
191: bcnt += 4;
192: break;
193: case 'e': case 'E': case 'f': case 'g': case 'G':
194: bcnt += 8;
195: break;
196: case 's':
197: bcnt += prec;
198: break;
199: case '_':
200: switch(*++fmt) {
201: case 'c': case 'p': case 'u':
202: bcnt += 1;
203: break;
204: }
205: }
206: }
207: cursize += bcnt * fu->reps;
208: }
1.9 pvalchev 209: return (cursize);
1.1 deraadt 210: }
211:
1.6 pvalchev 212: void
1.13 deraadt 213: rewrite(FS *fs)
1.1 deraadt 214: {
215: enum { NOTOKAY, USEBCNT, USEPREC } sokay;
1.8 mpech 216: PR *pr, **nextpr;
217: FU *fu;
218: char *p1, *p2;
1.21 guenther 219: char savech, *fmtp, cs[4];
1.1 deraadt 220: int nconv, prec;
221:
1.9 pvalchev 222: nextpr = NULL;
223: prec = 0;
1.1 deraadt 224: for (fu = fs->nextfu; fu; fu = fu->nextfu) {
225: /*
1.9 pvalchev 226: * Break each format unit into print units; each conversion
227: * character gets its own.
1.1 deraadt 228: */
229: for (nconv = 0, fmtp = fu->fmt; *fmtp; nextpr = &pr->nextpr) {
1.18 tb 230: if ((pr = calloc(1, sizeof(PR))) == NULL)
231: err(1, NULL);
1.1 deraadt 232: if (!fu->nextpr)
233: fu->nextpr = pr;
234: else
235: *nextpr = pr;
236:
1.9 pvalchev 237: /* Skip preceding text and up to the next % sign. */
1.1 deraadt 238: for (p1 = fmtp; *p1 && *p1 != '%'; ++p1);
239:
1.9 pvalchev 240: /* Only text in the string. */
1.1 deraadt 241: if (!*p1) {
242: pr->fmt = fmtp;
243: pr->flags = F_TEXT;
244: break;
245: }
246:
247: /*
1.9 pvalchev 248: * Get precision for %s -- if have a byte count, don't
1.1 deraadt 249: * need it.
250: */
251: if (fu->bcnt) {
252: sokay = USEBCNT;
1.9 pvalchev 253: /* Skip to conversion character. */
1.14 otto 254: for (++p1; *p1 && strchr(spec, *p1); ++p1);
1.1 deraadt 255: } else {
1.9 pvalchev 256: /* Skip any special chars, field width. */
1.14 otto 257: while (*++p1 && strchr(spec + 1, *p1));
1.9 pvalchev 258: if (*p1 == '.' &&
259: isdigit((unsigned char)*++p1)) {
1.1 deraadt 260: sokay = USEPREC;
261: prec = atoi(p1);
1.9 pvalchev 262: while (isdigit((unsigned char)*++p1))
263: continue;
264: } else
1.1 deraadt 265: sokay = NOTOKAY;
266: }
267:
1.14 otto 268: p2 = *p1 ? p1 + 1 : p1; /* Set end pointer. */
1.9 pvalchev 269: cs[0] = *p1; /* Set conversion string. */
270: cs[1] = '\0';
1.1 deraadt 271:
272: /*
1.9 pvalchev 273: * Figure out the byte count for each conversion;
1.1 deraadt 274: * rewrite the format as necessary, set up blank-
275: * padding for end of data.
276: */
1.9 pvalchev 277: switch(cs[0]) {
1.1 deraadt 278: case 'c':
279: pr->flags = F_CHAR;
280: switch(fu->bcnt) {
281: case 0: case 1:
282: pr->bcnt = 1;
283: break;
284: default:
1.9 pvalchev 285: p1[1] = '\0';
286: badcnt(p1);
1.1 deraadt 287: }
288: break;
289: case 'd': case 'i':
290: case 'o': case 'u': case 'x': case 'X':
1.14 otto 291: if (cs[0] == 'd' || cs[0] == 'i')
292: pr->flags = F_INT;
293: else
294: pr->flags = F_UINT;
295:
1.21 guenther 296: cs[3] = '\0';
297: cs[2] = cs[0];
298: cs[1] = 'l';
299: cs[0] = 'l';
1.9 pvalchev 300: switch(fu->bcnt) {
1.1 deraadt 301: case 0: case 4:
302: pr->bcnt = 4;
303: break;
304: case 1:
305: pr->bcnt = 1;
306: break;
307: case 2:
308: pr->bcnt = 2;
309: break;
1.9 pvalchev 310: case 8:
311: pr->bcnt = 8;
312: break;
1.1 deraadt 313: default:
1.9 pvalchev 314: p1[1] = '\0';
315: badcnt(p1);
1.1 deraadt 316: }
317: break;
318: case 'e': case 'E': case 'f': case 'g': case 'G':
319: pr->flags = F_DBL;
320: switch(fu->bcnt) {
321: case 0: case 8:
322: pr->bcnt = 8;
323: break;
324: case 4:
325: pr->bcnt = 4;
326: break;
327: default:
1.9 pvalchev 328: p1[1] = '\0';
329: badcnt(p1);
1.1 deraadt 330: }
331: break;
332: case 's':
333: pr->flags = F_STR;
334: switch(sokay) {
335: case NOTOKAY:
1.9 pvalchev 336: badsfmt();
1.1 deraadt 337: case USEBCNT:
338: pr->bcnt = fu->bcnt;
339: break;
340: case USEPREC:
341: pr->bcnt = prec;
342: break;
343: }
344: break;
345: case '_':
346: ++p2;
347: switch(p1[1]) {
348: case 'A':
349: endfu = fu;
350: fu->flags |= F_IGNORE;
351: /* FALLTHROUGH */
352: case 'a':
353: pr->flags = F_ADDRESS;
354: ++p2;
355: switch(p1[2]) {
356: case 'd': case 'o': case'x':
1.21 guenther 357: cs[0] = 'l';
358: cs[1] = 'l';
359: cs[2] = p1[2];
360: cs[3] = '\0';
1.1 deraadt 361: break;
362: default:
1.14 otto 363: if (p1[2])
364: p1[3] = '\0';
1.9 pvalchev 365: badconv(p1);
1.1 deraadt 366: }
367: break;
368: case 'c':
369: case 'p':
370: case 'u':
1.14 otto 371: if (p1[1] == 'c') {
372: pr->flags = F_C;
373: /* cs[0] = 'c'; set in conv_c */
374: } else if (p1[1] == 'p') {
375: pr->flags = F_P;
376: cs[0] = 'c';
377: } else {
378: pr->flags = F_U;
379: /* cs[0] = 'c'; set in conv_u */
380: }
381:
382: switch(fu->bcnt) {
1.1 deraadt 383: case 0: case 1:
384: pr->bcnt = 1;
385: break;
386: default:
387: p1[2] = '\0';
1.9 pvalchev 388: badcnt(p1);
1.1 deraadt 389: }
390: break;
391: default:
1.14 otto 392: if (p1[1])
393: p1[2] = '\0';
1.9 pvalchev 394: badconv(p1);
1.1 deraadt 395: }
396: break;
397: default:
1.14 otto 398: if (cs[0])
399: p1[1] = '\0';
1.9 pvalchev 400: badconv(p1);
1.1 deraadt 401: }
402:
403: /*
1.9 pvalchev 404: * Copy to PR format string, set conversion character
1.1 deraadt 405: * pointer, update original.
406: */
407: savech = *p2;
1.9 pvalchev 408: p1[0] = '\0';
1.22 ! tb 409: if (asprintf(&pr->fmt, "%s%s", fmtp, cs) == -1)
1.18 tb 410: err(1, NULL);
1.1 deraadt 411: *p2 = savech;
412: pr->cchar = pr->fmt + (p1 - fmtp);
413: fmtp = p2;
414:
1.9 pvalchev 415: /* Only one conversion character if byte count. */
1.7 mickey 416: if (!(pr->flags&F_ADDRESS) && fu->bcnt && nconv++)
417: errx(1,
1.9 pvalchev 418: "byte count with multiple conversion characters");
1.1 deraadt 419: }
420: /*
1.9 pvalchev 421: * If format unit byte count not specified, figure it out
1.1 deraadt 422: * so can adjust rep count later.
423: */
424: if (!fu->bcnt)
425: for (pr = fu->nextpr; pr; pr = pr->nextpr)
426: fu->bcnt += pr->bcnt;
427: }
428: /*
1.9 pvalchev 429: * If the format string interprets any data at all, and it's
1.1 deraadt 430: * not the same as the blocksize, and its last format unit
431: * interprets any data at all, and has no iteration count,
432: * repeat it as necessary.
433: *
1.9 pvalchev 434: * If, rep count is greater than 1, no trailing whitespace
1.1 deraadt 435: * gets output from the last iteration of the format unit.
436: */
1.9 pvalchev 437: for (fu = fs->nextfu; fu; fu = fu->nextfu) {
1.1 deraadt 438: if (!fu->nextfu && fs->bcnt < blocksize &&
439: !(fu->flags&F_SETREP) && fu->bcnt)
440: fu->reps += (blocksize - fs->bcnt) / fu->bcnt;
441: if (fu->reps > 1) {
1.14 otto 442: if (!fu->nextpr)
443: break;
1.1 deraadt 444: for (pr = fu->nextpr;; pr = pr->nextpr)
445: if (!pr->nextpr)
446: break;
447: for (p1 = pr->fmt, p2 = NULL; *p1; ++p1)
1.9 pvalchev 448: p2 = isspace((unsigned char)*p1) ? p1 : NULL;
1.1 deraadt 449: if (p2)
450: pr->nospace = p2;
451: }
452: }
1.9 pvalchev 453: #ifdef DEBUG
454: for (fu = fs->nextfu; fu; fu = fu->nextfu) {
455: (void)printf("fmt:");
456: for (pr = fu->nextpr; pr; pr = pr->nextpr)
457: (void)printf(" {%s}", pr->fmt);
458: (void)printf("\n");
459: }
460: #endif
1.1 deraadt 461: }
462:
1.20 mmcc 463: static void
1.13 deraadt 464: escape(char *p1)
1.1 deraadt 465: {
1.8 mpech 466: char *p2;
1.1 deraadt 467:
468: /* alphabetic escape sequences have to be done in place */
469: for (p2 = p1;; ++p1, ++p2) {
470: if (!*p1) {
471: *p2 = *p1;
472: break;
473: }
1.14 otto 474: if (*p1 == '\\') {
1.1 deraadt 475: switch(*++p1) {
1.14 otto 476: case '\0':
477: *p2++ = '\\';
478: *p2 = '\0';
479: return; /* incomplete escape sequence */
1.1 deraadt 480: case 'a':
481: /* *p2 = '\a'; */
482: *p2 = '\007';
483: break;
484: case 'b':
485: *p2 = '\b';
486: break;
487: case 'f':
488: *p2 = '\f';
489: break;
490: case 'n':
491: *p2 = '\n';
492: break;
493: case 'r':
494: *p2 = '\r';
495: break;
496: case 't':
497: *p2 = '\t';
498: break;
499: case 'v':
500: *p2 = '\v';
501: break;
502: default:
503: *p2 = *p1;
504: break;
505: }
1.14 otto 506: } else
507: *p2 = *p1;
1.1 deraadt 508: }
1.9 pvalchev 509: }
510:
1.20 mmcc 511: static __dead void
1.13 deraadt 512: badcnt(char *s)
1.9 pvalchev 513: {
514: errx(1, "%s: bad byte count", s);
515: }
516:
1.20 mmcc 517: static __dead void
1.13 deraadt 518: badsfmt(void)
1.9 pvalchev 519: {
1.10 mpech 520: errx(1, "%%s: requires a precision or a byte count");
1.9 pvalchev 521: }
522:
1.20 mmcc 523: static __dead void
1.13 deraadt 524: badfmt(const char *fmt)
1.9 pvalchev 525: {
1.10 mpech 526: errx(1, "\"%s\": bad format", fmt);
1.9 pvalchev 527: }
528:
1.20 mmcc 529: static __dead void
1.13 deraadt 530: badconv(char *ch)
1.9 pvalchev 531: {
1.10 mpech 532: errx(1, "%%%s: bad conversion character", ch);
1.1 deraadt 533: }