Annotation of src/usr.bin/hexdump/parse.c, Revision 1.23
1.23 ! guenther 1: /* $OpenBSD: parse.c,v 1.22 2016/09/04 16:41:43 tb Exp $ */
1.9 pvalchev 2: /* $NetBSD: parse.c,v 1.12 2001/12/07 13:37:39 bjh21 Exp $ */
1.2 deraadt 3:
1.1 deraadt 4: /*
1.9 pvalchev 5: * Copyright (c) 1989, 1993
6: * The Regents of the University of California. All rights reserved.
1.1 deraadt 7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
1.12 millert 16: * 3. Neither the name of the University nor the names of its contributors
1.1 deraadt 17: * may be used to endorse or promote products derived from this software
18: * without specific prior written permission.
19: *
20: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30: * SUCH DAMAGE.
31: */
32:
1.9 pvalchev 33: #include <ctype.h>
34: #include <err.h>
1.1 deraadt 35: #include <stdio.h>
36: #include <stdlib.h>
37: #include <string.h>
1.9 pvalchev 38:
1.1 deraadt 39: #include "hexdump.h"
40:
41: FU *endfu; /* format at end-of-data */
42:
1.20 mmcc 43: static __dead void badcnt(char *);
44: static __dead void badconv(char *);
45: static __dead void badfmt(const char *);
46: static __dead void badsfmt(void);
47: static void escape(char *);
48:
1.6 pvalchev 49: void
1.13 deraadt 50: addfile(char *name)
1.1 deraadt 51: {
52: FILE *fp;
1.15 ray 53: size_t len;
54: char *buf, *lbuf, *p;
1.1 deraadt 55:
1.9 pvalchev 56: if ((fp = fopen(name, "r")) == NULL)
57: err(1, "fopen %s", name);
1.15 ray 58:
59: lbuf = NULL;
60: while ((buf = fgetln(fp, &len))) {
61: if (buf[len - 1] == '\n')
62: buf[len - 1] = '\0';
63: else {
64: /* EOF without EOL, copy and add the NUL */
65: if ((lbuf = malloc(len + 1)) == NULL)
66: err(1, NULL);
67: memcpy(lbuf, buf, len);
68: lbuf[len] = '\0';
69: buf = lbuf;
1.1 deraadt 70: }
1.16 tedu 71: for (p = buf; isspace((unsigned char)*p); ++p);
1.1 deraadt 72: if (!*p || *p == '#')
73: continue;
74: add(p);
75: }
1.15 ray 76: free(lbuf);
1.1 deraadt 77: (void)fclose(fp);
78: }
79:
1.6 pvalchev 80: void
1.13 deraadt 81: add(const char *fmt)
1.1 deraadt 82: {
1.9 pvalchev 83: const char *p;
1.1 deraadt 84: static FS **nextfs;
85: FS *tfs;
86: FU *tfu, **nextfu;
1.9 pvalchev 87: const char *savep;
1.1 deraadt 88:
89: /* start new linked list of format units */
1.18 tb 90: if ((tfs = calloc(1, sizeof(FS))) == NULL)
91: err(1, NULL);
1.1 deraadt 92: if (!fshead)
93: fshead = tfs;
94: else
95: *nextfs = tfs;
96: nextfs = &tfs->nextfs;
97: nextfu = &tfs->nextfu;
98:
99: /* take the format string and break it up into format units */
100: for (p = fmt;;) {
101: /* skip leading white space */
1.9 pvalchev 102: for (; isspace((unsigned char)*p); ++p);
1.1 deraadt 103: if (!*p)
104: break;
105:
106: /* allocate a new format unit and link it in */
1.18 tb 107: if ((tfu = calloc(1, sizeof(FU))) == NULL)
108: err(1, NULL);
1.1 deraadt 109: *nextfu = tfu;
110: nextfu = &tfu->nextfu;
111: tfu->reps = 1;
112:
113: /* if leading digit, repetition count */
1.9 pvalchev 114: if (isdigit((unsigned char)*p)) {
115: for (savep = p; isdigit((unsigned char)*p); ++p);
116: if (!isspace((unsigned char)*p) && *p != '/')
117: badfmt(fmt);
1.1 deraadt 118: /* may overwrite either white space or slash */
119: tfu->reps = atoi(savep);
120: tfu->flags = F_SETREP;
121: /* skip trailing white space */
1.9 pvalchev 122: for (++p; isspace((unsigned char)*p); ++p);
1.1 deraadt 123: }
124:
125: /* skip slash and trailing white space */
126: if (*p == '/')
1.9 pvalchev 127: while (isspace((unsigned char)*++p));
1.1 deraadt 128:
129: /* byte count */
1.9 pvalchev 130: if (isdigit((unsigned char)*p)) {
131: for (savep = p; isdigit((unsigned char)*p); ++p);
132: if (!isspace((unsigned char)*p))
133: badfmt(fmt);
1.1 deraadt 134: tfu->bcnt = atoi(savep);
135: /* skip trailing white space */
1.9 pvalchev 136: for (++p; isspace((unsigned char)*p); ++p);
1.1 deraadt 137: }
138:
139: /* format */
140: if (*p != '"')
1.9 pvalchev 141: badfmt(fmt);
1.1 deraadt 142: for (savep = ++p; *p != '"';)
143: if (*p++ == 0)
1.9 pvalchev 144: badfmt(fmt);
1.22 tb 145: if ((tfu->fmt = strndup(savep, p - savep)) == NULL)
1.18 tb 146: err(1, NULL);
1.1 deraadt 147: escape(tfu->fmt);
148: p++;
149: }
150: }
151:
1.7 mickey 152: static const char *spec = ".#-+ 0123456789";
1.9 pvalchev 153:
1.6 pvalchev 154: int
1.13 deraadt 155: size(FS *fs)
1.1 deraadt 156: {
1.8 mpech 157: FU *fu;
158: int bcnt, cursize;
159: char *fmt;
1.1 deraadt 160: int prec;
161:
162: /* figure out the data block size needed for each format unit */
163: for (cursize = 0, fu = fs->nextfu; fu; fu = fu->nextfu) {
164: if (fu->bcnt) {
165: cursize += fu->bcnt * fu->reps;
166: continue;
167: }
168: for (bcnt = prec = 0, fmt = fu->fmt; *fmt; ++fmt) {
169: if (*fmt != '%')
170: continue;
171: /*
172: * skip any special chars -- save precision in
173: * case it's a %s format.
174: */
1.14 otto 175: while (*++fmt && strchr(spec + 1, *fmt));
1.9 pvalchev 176: if (*fmt == '.' && isdigit((unsigned char)*++fmt)) {
1.1 deraadt 177: prec = atoi(fmt);
1.9 pvalchev 178: while (isdigit((unsigned char)*++fmt));
1.1 deraadt 179: }
180: switch(*fmt) {
181: case 'c':
182: bcnt += 1;
183: break;
184: case 'd': case 'i': case 'o': case 'u':
185: case 'x': case 'X':
186: bcnt += 4;
187: break;
188: case 'e': case 'E': case 'f': case 'g': case 'G':
189: bcnt += 8;
190: break;
191: case 's':
192: bcnt += prec;
193: break;
194: case '_':
195: switch(*++fmt) {
196: case 'c': case 'p': case 'u':
197: bcnt += 1;
198: break;
199: }
200: }
201: }
202: cursize += bcnt * fu->reps;
203: }
1.9 pvalchev 204: return (cursize);
1.1 deraadt 205: }
206:
1.6 pvalchev 207: void
1.13 deraadt 208: rewrite(FS *fs)
1.1 deraadt 209: {
210: enum { NOTOKAY, USEBCNT, USEPREC } sokay;
1.8 mpech 211: PR *pr, **nextpr;
212: FU *fu;
213: char *p1, *p2;
1.21 guenther 214: char savech, *fmtp, cs[4];
1.1 deraadt 215: int nconv, prec;
216:
1.9 pvalchev 217: nextpr = NULL;
218: prec = 0;
1.1 deraadt 219: for (fu = fs->nextfu; fu; fu = fu->nextfu) {
220: /*
1.9 pvalchev 221: * Break each format unit into print units; each conversion
222: * character gets its own.
1.1 deraadt 223: */
224: for (nconv = 0, fmtp = fu->fmt; *fmtp; nextpr = &pr->nextpr) {
1.18 tb 225: if ((pr = calloc(1, sizeof(PR))) == NULL)
226: err(1, NULL);
1.1 deraadt 227: if (!fu->nextpr)
228: fu->nextpr = pr;
229: else
230: *nextpr = pr;
231:
1.9 pvalchev 232: /* Skip preceding text and up to the next % sign. */
1.1 deraadt 233: for (p1 = fmtp; *p1 && *p1 != '%'; ++p1);
234:
1.9 pvalchev 235: /* Only text in the string. */
1.1 deraadt 236: if (!*p1) {
237: pr->fmt = fmtp;
238: pr->flags = F_TEXT;
239: break;
240: }
241:
242: /*
1.9 pvalchev 243: * Get precision for %s -- if have a byte count, don't
1.1 deraadt 244: * need it.
245: */
246: if (fu->bcnt) {
247: sokay = USEBCNT;
1.9 pvalchev 248: /* Skip to conversion character. */
1.14 otto 249: for (++p1; *p1 && strchr(spec, *p1); ++p1);
1.1 deraadt 250: } else {
1.9 pvalchev 251: /* Skip any special chars, field width. */
1.14 otto 252: while (*++p1 && strchr(spec + 1, *p1));
1.9 pvalchev 253: if (*p1 == '.' &&
254: isdigit((unsigned char)*++p1)) {
1.1 deraadt 255: sokay = USEPREC;
256: prec = atoi(p1);
1.9 pvalchev 257: while (isdigit((unsigned char)*++p1))
258: continue;
259: } else
1.1 deraadt 260: sokay = NOTOKAY;
261: }
262:
1.14 otto 263: p2 = *p1 ? p1 + 1 : p1; /* Set end pointer. */
1.9 pvalchev 264: cs[0] = *p1; /* Set conversion string. */
265: cs[1] = '\0';
1.1 deraadt 266:
267: /*
1.9 pvalchev 268: * Figure out the byte count for each conversion;
1.1 deraadt 269: * rewrite the format as necessary, set up blank-
270: * padding for end of data.
271: */
1.9 pvalchev 272: switch(cs[0]) {
1.1 deraadt 273: case 'c':
274: pr->flags = F_CHAR;
275: switch(fu->bcnt) {
276: case 0: case 1:
277: pr->bcnt = 1;
278: break;
279: default:
1.9 pvalchev 280: p1[1] = '\0';
281: badcnt(p1);
1.1 deraadt 282: }
283: break;
284: case 'd': case 'i':
285: case 'o': case 'u': case 'x': case 'X':
1.14 otto 286: if (cs[0] == 'd' || cs[0] == 'i')
287: pr->flags = F_INT;
288: else
289: pr->flags = F_UINT;
290:
1.21 guenther 291: cs[3] = '\0';
292: cs[2] = cs[0];
293: cs[1] = 'l';
294: cs[0] = 'l';
1.9 pvalchev 295: switch(fu->bcnt) {
1.1 deraadt 296: case 0: case 4:
297: pr->bcnt = 4;
298: break;
299: case 1:
300: pr->bcnt = 1;
301: break;
302: case 2:
303: pr->bcnt = 2;
304: break;
1.9 pvalchev 305: case 8:
306: pr->bcnt = 8;
307: break;
1.1 deraadt 308: default:
1.9 pvalchev 309: p1[1] = '\0';
310: badcnt(p1);
1.1 deraadt 311: }
312: break;
313: case 'e': case 'E': case 'f': case 'g': case 'G':
314: pr->flags = F_DBL;
315: switch(fu->bcnt) {
316: case 0: case 8:
317: pr->bcnt = 8;
318: break;
319: case 4:
320: pr->bcnt = 4;
321: break;
322: default:
1.9 pvalchev 323: p1[1] = '\0';
324: badcnt(p1);
1.1 deraadt 325: }
326: break;
327: case 's':
328: pr->flags = F_STR;
329: switch(sokay) {
330: case NOTOKAY:
1.9 pvalchev 331: badsfmt();
1.1 deraadt 332: case USEBCNT:
333: pr->bcnt = fu->bcnt;
334: break;
335: case USEPREC:
336: pr->bcnt = prec;
337: break;
338: }
339: break;
340: case '_':
341: ++p2;
342: switch(p1[1]) {
343: case 'A':
344: endfu = fu;
345: fu->flags |= F_IGNORE;
346: /* FALLTHROUGH */
347: case 'a':
348: pr->flags = F_ADDRESS;
349: ++p2;
350: switch(p1[2]) {
351: case 'd': case 'o': case'x':
1.21 guenther 352: cs[0] = 'l';
353: cs[1] = 'l';
354: cs[2] = p1[2];
355: cs[3] = '\0';
1.1 deraadt 356: break;
357: default:
1.14 otto 358: if (p1[2])
359: p1[3] = '\0';
1.9 pvalchev 360: badconv(p1);
1.1 deraadt 361: }
362: break;
363: case 'c':
364: case 'p':
365: case 'u':
1.14 otto 366: if (p1[1] == 'c') {
367: pr->flags = F_C;
368: /* cs[0] = 'c'; set in conv_c */
369: } else if (p1[1] == 'p') {
370: pr->flags = F_P;
371: cs[0] = 'c';
372: } else {
373: pr->flags = F_U;
374: /* cs[0] = 'c'; set in conv_u */
375: }
376:
377: switch(fu->bcnt) {
1.1 deraadt 378: case 0: case 1:
379: pr->bcnt = 1;
380: break;
381: default:
382: p1[2] = '\0';
1.9 pvalchev 383: badcnt(p1);
1.1 deraadt 384: }
385: break;
386: default:
1.14 otto 387: if (p1[1])
388: p1[2] = '\0';
1.9 pvalchev 389: badconv(p1);
1.1 deraadt 390: }
391: break;
392: default:
1.14 otto 393: if (cs[0])
394: p1[1] = '\0';
1.9 pvalchev 395: badconv(p1);
1.1 deraadt 396: }
397:
398: /*
1.9 pvalchev 399: * Copy to PR format string, set conversion character
1.1 deraadt 400: * pointer, update original.
401: */
402: savech = *p2;
1.9 pvalchev 403: p1[0] = '\0';
1.22 tb 404: if (asprintf(&pr->fmt, "%s%s", fmtp, cs) == -1)
1.18 tb 405: err(1, NULL);
1.1 deraadt 406: *p2 = savech;
407: pr->cchar = pr->fmt + (p1 - fmtp);
408: fmtp = p2;
409:
1.9 pvalchev 410: /* Only one conversion character if byte count. */
1.7 mickey 411: if (!(pr->flags&F_ADDRESS) && fu->bcnt && nconv++)
412: errx(1,
1.9 pvalchev 413: "byte count with multiple conversion characters");
1.1 deraadt 414: }
415: /*
1.9 pvalchev 416: * If format unit byte count not specified, figure it out
1.1 deraadt 417: * so can adjust rep count later.
418: */
419: if (!fu->bcnt)
420: for (pr = fu->nextpr; pr; pr = pr->nextpr)
421: fu->bcnt += pr->bcnt;
422: }
423: /*
1.9 pvalchev 424: * If the format string interprets any data at all, and it's
1.1 deraadt 425: * not the same as the blocksize, and its last format unit
426: * interprets any data at all, and has no iteration count,
427: * repeat it as necessary.
428: *
1.9 pvalchev 429: * If, rep count is greater than 1, no trailing whitespace
1.1 deraadt 430: * gets output from the last iteration of the format unit.
431: */
1.9 pvalchev 432: for (fu = fs->nextfu; fu; fu = fu->nextfu) {
1.1 deraadt 433: if (!fu->nextfu && fs->bcnt < blocksize &&
434: !(fu->flags&F_SETREP) && fu->bcnt)
435: fu->reps += (blocksize - fs->bcnt) / fu->bcnt;
436: if (fu->reps > 1) {
1.14 otto 437: if (!fu->nextpr)
438: break;
1.1 deraadt 439: for (pr = fu->nextpr;; pr = pr->nextpr)
440: if (!pr->nextpr)
441: break;
442: for (p1 = pr->fmt, p2 = NULL; *p1; ++p1)
1.9 pvalchev 443: p2 = isspace((unsigned char)*p1) ? p1 : NULL;
1.1 deraadt 444: if (p2)
445: pr->nospace = p2;
446: }
447: }
1.9 pvalchev 448: #ifdef DEBUG
449: for (fu = fs->nextfu; fu; fu = fu->nextfu) {
450: (void)printf("fmt:");
451: for (pr = fu->nextpr; pr; pr = pr->nextpr)
452: (void)printf(" {%s}", pr->fmt);
453: (void)printf("\n");
454: }
455: #endif
1.1 deraadt 456: }
457:
1.20 mmcc 458: static void
1.13 deraadt 459: escape(char *p1)
1.1 deraadt 460: {
1.8 mpech 461: char *p2;
1.1 deraadt 462:
463: /* alphabetic escape sequences have to be done in place */
464: for (p2 = p1;; ++p1, ++p2) {
465: if (!*p1) {
466: *p2 = *p1;
467: break;
468: }
1.14 otto 469: if (*p1 == '\\') {
1.1 deraadt 470: switch(*++p1) {
1.14 otto 471: case '\0':
472: *p2++ = '\\';
473: *p2 = '\0';
474: return; /* incomplete escape sequence */
1.1 deraadt 475: case 'a':
476: /* *p2 = '\a'; */
477: *p2 = '\007';
478: break;
479: case 'b':
480: *p2 = '\b';
481: break;
482: case 'f':
483: *p2 = '\f';
484: break;
485: case 'n':
486: *p2 = '\n';
487: break;
488: case 'r':
489: *p2 = '\r';
490: break;
491: case 't':
492: *p2 = '\t';
493: break;
494: case 'v':
495: *p2 = '\v';
496: break;
497: default:
498: *p2 = *p1;
499: break;
500: }
1.14 otto 501: } else
502: *p2 = *p1;
1.1 deraadt 503: }
1.9 pvalchev 504: }
505:
1.20 mmcc 506: static __dead void
1.13 deraadt 507: badcnt(char *s)
1.9 pvalchev 508: {
509: errx(1, "%s: bad byte count", s);
510: }
511:
1.20 mmcc 512: static __dead void
1.13 deraadt 513: badsfmt(void)
1.9 pvalchev 514: {
1.10 mpech 515: errx(1, "%%s: requires a precision or a byte count");
1.9 pvalchev 516: }
517:
1.20 mmcc 518: static __dead void
1.13 deraadt 519: badfmt(const char *fmt)
1.9 pvalchev 520: {
1.10 mpech 521: errx(1, "\"%s\": bad format", fmt);
1.9 pvalchev 522: }
523:
1.20 mmcc 524: static __dead void
1.13 deraadt 525: badconv(char *ch)
1.9 pvalchev 526: {
1.10 mpech 527: errx(1, "%%%s: bad conversion character", ch);
1.1 deraadt 528: }