Annotation of src/usr.bin/hexdump/parse.c, Revision 1.15
1.15 ! ray 1: /* $OpenBSD: parse.c,v 1.14 2004/11/21 19:57:16 otto Exp $ */
1.9 pvalchev 2: /* $NetBSD: parse.c,v 1.12 2001/12/07 13:37:39 bjh21 Exp $ */
1.2 deraadt 3:
1.1 deraadt 4: /*
1.9 pvalchev 5: * Copyright (c) 1989, 1993
6: * The Regents of the University of California. All rights reserved.
1.1 deraadt 7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
1.12 millert 16: * 3. Neither the name of the University nor the names of its contributors
1.1 deraadt 17: * may be used to endorse or promote products derived from this software
18: * without specific prior written permission.
19: *
20: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30: * SUCH DAMAGE.
31: */
32:
33: #ifndef lint
34: /*static char sccsid[] = "from: @(#)parse.c 5.6 (Berkeley) 3/9/91";*/
1.15 ! ray 35: static char rcsid[] = "$OpenBSD: parse.c,v 1.14 2004/11/21 19:57:16 otto Exp $";
1.1 deraadt 36: #endif /* not lint */
37:
38: #include <sys/types.h>
39: #include <sys/file.h>
1.9 pvalchev 40:
41: #include <ctype.h>
42: #include <err.h>
43: #include <errno.h>
44: #include <fcntl.h>
1.1 deraadt 45: #include <stdio.h>
46: #include <stdlib.h>
47: #include <string.h>
1.9 pvalchev 48:
1.1 deraadt 49: #include "hexdump.h"
50:
51: FU *endfu; /* format at end-of-data */
52:
1.6 pvalchev 53: void
1.13 deraadt 54: addfile(char *name)
1.1 deraadt 55: {
56: FILE *fp;
1.15 ! ray 57: size_t len;
! 58: char *buf, *lbuf, *p;
1.1 deraadt 59:
1.9 pvalchev 60: if ((fp = fopen(name, "r")) == NULL)
61: err(1, "fopen %s", name);
1.15 ! ray 62:
! 63: lbuf = NULL;
! 64: while ((buf = fgetln(fp, &len))) {
! 65: if (buf[len - 1] == '\n')
! 66: buf[len - 1] = '\0';
! 67: else {
! 68: /* EOF without EOL, copy and add the NUL */
! 69: if ((lbuf = malloc(len + 1)) == NULL)
! 70: err(1, NULL);
! 71: memcpy(lbuf, buf, len);
! 72: lbuf[len] = '\0';
! 73: buf = lbuf;
1.1 deraadt 74: }
1.9 pvalchev 75: for (p = buf; *p && isspace((unsigned char)*p); ++p);
1.1 deraadt 76: if (!*p || *p == '#')
77: continue;
78: add(p);
79: }
1.15 ! ray 80: free(lbuf);
1.1 deraadt 81: (void)fclose(fp);
82: }
83:
1.6 pvalchev 84: void
1.13 deraadt 85: add(const char *fmt)
1.1 deraadt 86: {
1.9 pvalchev 87: const char *p;
1.1 deraadt 88: static FS **nextfs;
89: FS *tfs;
90: FU *tfu, **nextfu;
1.9 pvalchev 91: const char *savep;
1.1 deraadt 92:
93: /* start new linked list of format units */
1.9 pvalchev 94: tfs = emalloc(sizeof(FS));
1.1 deraadt 95: if (!fshead)
96: fshead = tfs;
97: else
98: *nextfs = tfs;
99: nextfs = &tfs->nextfs;
100: nextfu = &tfs->nextfu;
101:
102: /* take the format string and break it up into format units */
103: for (p = fmt;;) {
104: /* skip leading white space */
1.9 pvalchev 105: for (; isspace((unsigned char)*p); ++p);
1.1 deraadt 106: if (!*p)
107: break;
108:
109: /* allocate a new format unit and link it in */
1.9 pvalchev 110: tfu = emalloc(sizeof(FU));
1.1 deraadt 111: *nextfu = tfu;
112: nextfu = &tfu->nextfu;
113: tfu->reps = 1;
114:
115: /* if leading digit, repetition count */
1.9 pvalchev 116: if (isdigit((unsigned char)*p)) {
117: for (savep = p; isdigit((unsigned char)*p); ++p);
118: if (!isspace((unsigned char)*p) && *p != '/')
119: badfmt(fmt);
1.1 deraadt 120: /* may overwrite either white space or slash */
121: tfu->reps = atoi(savep);
122: tfu->flags = F_SETREP;
123: /* skip trailing white space */
1.9 pvalchev 124: for (++p; isspace((unsigned char)*p); ++p);
1.1 deraadt 125: }
126:
127: /* skip slash and trailing white space */
128: if (*p == '/')
1.9 pvalchev 129: while (isspace((unsigned char)*++p));
1.1 deraadt 130:
131: /* byte count */
1.9 pvalchev 132: if (isdigit((unsigned char)*p)) {
133: for (savep = p; isdigit((unsigned char)*p); ++p);
134: if (!isspace((unsigned char)*p))
135: badfmt(fmt);
1.1 deraadt 136: tfu->bcnt = atoi(savep);
137: /* skip trailing white space */
1.9 pvalchev 138: for (++p; isspace((unsigned char)*p); ++p);
1.1 deraadt 139: }
140:
141: /* format */
142: if (*p != '"')
1.9 pvalchev 143: badfmt(fmt);
1.1 deraadt 144: for (savep = ++p; *p != '"';)
145: if (*p++ == 0)
1.9 pvalchev 146: badfmt(fmt);
1.1 deraadt 147: if (!(tfu->fmt = malloc(p - savep + 1)))
1.9 pvalchev 148: nomem();
1.1 deraadt 149: (void) strncpy(tfu->fmt, savep, p - savep);
150: tfu->fmt[p - savep] = '\0';
151: escape(tfu->fmt);
152: p++;
153: }
154: }
155:
1.7 mickey 156: static const char *spec = ".#-+ 0123456789";
1.9 pvalchev 157:
1.6 pvalchev 158: int
1.13 deraadt 159: size(FS *fs)
1.1 deraadt 160: {
1.8 mpech 161: FU *fu;
162: int bcnt, cursize;
163: char *fmt;
1.1 deraadt 164: int prec;
165:
166: /* figure out the data block size needed for each format unit */
167: for (cursize = 0, fu = fs->nextfu; fu; fu = fu->nextfu) {
168: if (fu->bcnt) {
169: cursize += fu->bcnt * fu->reps;
170: continue;
171: }
172: for (bcnt = prec = 0, fmt = fu->fmt; *fmt; ++fmt) {
173: if (*fmt != '%')
174: continue;
175: /*
176: * skip any special chars -- save precision in
177: * case it's a %s format.
178: */
1.14 otto 179: while (*++fmt && strchr(spec + 1, *fmt));
1.9 pvalchev 180: if (*fmt == '.' && isdigit((unsigned char)*++fmt)) {
1.1 deraadt 181: prec = atoi(fmt);
1.9 pvalchev 182: while (isdigit((unsigned char)*++fmt));
1.1 deraadt 183: }
184: switch(*fmt) {
185: case 'c':
186: bcnt += 1;
187: break;
188: case 'd': case 'i': case 'o': case 'u':
189: case 'x': case 'X':
190: bcnt += 4;
191: break;
192: case 'e': case 'E': case 'f': case 'g': case 'G':
193: bcnt += 8;
194: break;
195: case 's':
196: bcnt += prec;
197: break;
198: case '_':
199: switch(*++fmt) {
200: case 'c': case 'p': case 'u':
201: bcnt += 1;
202: break;
203: }
204: }
205: }
206: cursize += bcnt * fu->reps;
207: }
1.9 pvalchev 208: return (cursize);
1.1 deraadt 209: }
210:
1.6 pvalchev 211: void
1.13 deraadt 212: rewrite(FS *fs)
1.1 deraadt 213: {
214: enum { NOTOKAY, USEBCNT, USEPREC } sokay;
1.8 mpech 215: PR *pr, **nextpr;
216: FU *fu;
217: char *p1, *p2;
1.9 pvalchev 218: char savech, *fmtp, cs[3];
1.1 deraadt 219: int nconv, prec;
1.11 deraadt 220: size_t len;
1.1 deraadt 221:
1.9 pvalchev 222: nextpr = NULL;
223: prec = 0;
1.1 deraadt 224: for (fu = fs->nextfu; fu; fu = fu->nextfu) {
225: /*
1.9 pvalchev 226: * Break each format unit into print units; each conversion
227: * character gets its own.
1.1 deraadt 228: */
229: for (nconv = 0, fmtp = fu->fmt; *fmtp; nextpr = &pr->nextpr) {
1.9 pvalchev 230: pr = emalloc(sizeof(PR));
1.1 deraadt 231: if (!fu->nextpr)
232: fu->nextpr = pr;
233: else
234: *nextpr = pr;
235:
1.9 pvalchev 236: /* Skip preceding text and up to the next % sign. */
1.1 deraadt 237: for (p1 = fmtp; *p1 && *p1 != '%'; ++p1);
238:
1.9 pvalchev 239: /* Only text in the string. */
1.1 deraadt 240: if (!*p1) {
241: pr->fmt = fmtp;
242: pr->flags = F_TEXT;
243: break;
244: }
245:
246: /*
1.9 pvalchev 247: * Get precision for %s -- if have a byte count, don't
1.1 deraadt 248: * need it.
249: */
250: if (fu->bcnt) {
251: sokay = USEBCNT;
1.9 pvalchev 252: /* Skip to conversion character. */
1.14 otto 253: for (++p1; *p1 && strchr(spec, *p1); ++p1);
1.1 deraadt 254: } else {
1.9 pvalchev 255: /* Skip any special chars, field width. */
1.14 otto 256: while (*++p1 && strchr(spec + 1, *p1));
1.9 pvalchev 257: if (*p1 == '.' &&
258: isdigit((unsigned char)*++p1)) {
1.1 deraadt 259: sokay = USEPREC;
260: prec = atoi(p1);
1.9 pvalchev 261: while (isdigit((unsigned char)*++p1))
262: continue;
263: } else
1.1 deraadt 264: sokay = NOTOKAY;
265: }
266:
1.14 otto 267: p2 = *p1 ? p1 + 1 : p1; /* Set end pointer. */
1.9 pvalchev 268: cs[0] = *p1; /* Set conversion string. */
269: cs[1] = '\0';
1.1 deraadt 270:
271: /*
1.9 pvalchev 272: * Figure out the byte count for each conversion;
1.1 deraadt 273: * rewrite the format as necessary, set up blank-
274: * padding for end of data.
275: */
1.9 pvalchev 276: switch(cs[0]) {
1.1 deraadt 277: case 'c':
278: pr->flags = F_CHAR;
279: switch(fu->bcnt) {
280: case 0: case 1:
281: pr->bcnt = 1;
282: break;
283: default:
1.9 pvalchev 284: p1[1] = '\0';
285: badcnt(p1);
1.1 deraadt 286: }
287: break;
288: case 'd': case 'i':
289: case 'o': case 'u': case 'x': case 'X':
1.14 otto 290: if (cs[0] == 'd' || cs[0] == 'i')
291: pr->flags = F_INT;
292: else
293: pr->flags = F_UINT;
294:
295: cs[2] = '\0';
1.9 pvalchev 296: cs[1] = cs[0];
297: cs[0] = 'q';
298: switch(fu->bcnt) {
1.1 deraadt 299: case 0: case 4:
300: pr->bcnt = 4;
301: break;
302: case 1:
303: pr->bcnt = 1;
304: break;
305: case 2:
306: pr->bcnt = 2;
307: break;
1.9 pvalchev 308: case 8:
309: pr->bcnt = 8;
310: break;
1.1 deraadt 311: default:
1.9 pvalchev 312: p1[1] = '\0';
313: badcnt(p1);
1.1 deraadt 314: }
315: break;
316: case 'e': case 'E': case 'f': case 'g': case 'G':
317: pr->flags = F_DBL;
318: switch(fu->bcnt) {
319: case 0: case 8:
320: pr->bcnt = 8;
321: break;
322: case 4:
323: pr->bcnt = 4;
324: break;
325: default:
1.9 pvalchev 326: p1[1] = '\0';
327: badcnt(p1);
1.1 deraadt 328: }
329: break;
330: case 's':
331: pr->flags = F_STR;
332: switch(sokay) {
333: case NOTOKAY:
1.9 pvalchev 334: badsfmt();
1.1 deraadt 335: case USEBCNT:
336: pr->bcnt = fu->bcnt;
337: break;
338: case USEPREC:
339: pr->bcnt = prec;
340: break;
341: }
342: break;
343: case '_':
344: ++p2;
345: switch(p1[1]) {
346: case 'A':
347: endfu = fu;
348: fu->flags |= F_IGNORE;
349: /* FALLTHROUGH */
350: case 'a':
351: pr->flags = F_ADDRESS;
352: ++p2;
353: switch(p1[2]) {
354: case 'd': case 'o': case'x':
1.9 pvalchev 355: cs[0] = 'q';
356: cs[1] = p1[2];
357: cs[2] = '\0';
1.1 deraadt 358: break;
359: default:
1.14 otto 360: if (p1[2])
361: p1[3] = '\0';
1.9 pvalchev 362: badconv(p1);
1.1 deraadt 363: }
364: break;
365: case 'c':
366: case 'p':
367: case 'u':
1.14 otto 368: if (p1[1] == 'c') {
369: pr->flags = F_C;
370: /* cs[0] = 'c'; set in conv_c */
371: } else if (p1[1] == 'p') {
372: pr->flags = F_P;
373: cs[0] = 'c';
374: } else {
375: pr->flags = F_U;
376: /* cs[0] = 'c'; set in conv_u */
377: }
378:
379: switch(fu->bcnt) {
1.1 deraadt 380: case 0: case 1:
381: pr->bcnt = 1;
382: break;
383: default:
384: p1[2] = '\0';
1.9 pvalchev 385: badcnt(p1);
1.1 deraadt 386: }
387: break;
388: default:
1.14 otto 389: if (p1[1])
390: p1[2] = '\0';
1.9 pvalchev 391: badconv(p1);
1.1 deraadt 392: }
393: break;
394: default:
1.14 otto 395: if (cs[0])
396: p1[1] = '\0';
1.9 pvalchev 397: badconv(p1);
1.1 deraadt 398: }
399:
400: /*
1.9 pvalchev 401: * Copy to PR format string, set conversion character
1.1 deraadt 402: * pointer, update original.
403: */
404: savech = *p2;
1.9 pvalchev 405: p1[0] = '\0';
1.11 deraadt 406: len = strlen(fmtp) + strlen(cs) + 1;
407: pr->fmt = emalloc(len);
408: snprintf(pr->fmt, len, "%s%s", fmtp, cs);
1.1 deraadt 409: *p2 = savech;
410: pr->cchar = pr->fmt + (p1 - fmtp);
411: fmtp = p2;
412:
1.9 pvalchev 413: /* Only one conversion character if byte count. */
1.7 mickey 414: if (!(pr->flags&F_ADDRESS) && fu->bcnt && nconv++)
415: errx(1,
1.9 pvalchev 416: "byte count with multiple conversion characters");
1.1 deraadt 417: }
418: /*
1.9 pvalchev 419: * If format unit byte count not specified, figure it out
1.1 deraadt 420: * so can adjust rep count later.
421: */
422: if (!fu->bcnt)
423: for (pr = fu->nextpr; pr; pr = pr->nextpr)
424: fu->bcnt += pr->bcnt;
425: }
426: /*
1.9 pvalchev 427: * If the format string interprets any data at all, and it's
1.1 deraadt 428: * not the same as the blocksize, and its last format unit
429: * interprets any data at all, and has no iteration count,
430: * repeat it as necessary.
431: *
1.9 pvalchev 432: * If, rep count is greater than 1, no trailing whitespace
1.1 deraadt 433: * gets output from the last iteration of the format unit.
434: */
1.9 pvalchev 435: for (fu = fs->nextfu; fu; fu = fu->nextfu) {
1.1 deraadt 436: if (!fu->nextfu && fs->bcnt < blocksize &&
437: !(fu->flags&F_SETREP) && fu->bcnt)
438: fu->reps += (blocksize - fs->bcnt) / fu->bcnt;
439: if (fu->reps > 1) {
1.14 otto 440: if (!fu->nextpr)
441: break;
1.1 deraadt 442: for (pr = fu->nextpr;; pr = pr->nextpr)
443: if (!pr->nextpr)
444: break;
445: for (p1 = pr->fmt, p2 = NULL; *p1; ++p1)
1.9 pvalchev 446: p2 = isspace((unsigned char)*p1) ? p1 : NULL;
1.1 deraadt 447: if (p2)
448: pr->nospace = p2;
449: }
450: }
1.9 pvalchev 451: #ifdef DEBUG
452: for (fu = fs->nextfu; fu; fu = fu->nextfu) {
453: (void)printf("fmt:");
454: for (pr = fu->nextpr; pr; pr = pr->nextpr)
455: (void)printf(" {%s}", pr->fmt);
456: (void)printf("\n");
457: }
458: #endif
1.1 deraadt 459: }
460:
1.6 pvalchev 461: void
1.13 deraadt 462: escape(char *p1)
1.1 deraadt 463: {
1.8 mpech 464: char *p2;
1.1 deraadt 465:
466: /* alphabetic escape sequences have to be done in place */
467: for (p2 = p1;; ++p1, ++p2) {
468: if (!*p1) {
469: *p2 = *p1;
470: break;
471: }
1.14 otto 472: if (*p1 == '\\') {
1.1 deraadt 473: switch(*++p1) {
1.14 otto 474: case '\0':
475: *p2++ = '\\';
476: *p2 = '\0';
477: return; /* incomplete escape sequence */
1.1 deraadt 478: case 'a':
479: /* *p2 = '\a'; */
480: *p2 = '\007';
481: break;
482: case 'b':
483: *p2 = '\b';
484: break;
485: case 'f':
486: *p2 = '\f';
487: break;
488: case 'n':
489: *p2 = '\n';
490: break;
491: case 'r':
492: *p2 = '\r';
493: break;
494: case 't':
495: *p2 = '\t';
496: break;
497: case 'v':
498: *p2 = '\v';
499: break;
500: default:
501: *p2 = *p1;
502: break;
503: }
1.14 otto 504: } else
505: *p2 = *p1;
1.1 deraadt 506: }
1.9 pvalchev 507: }
508:
509: void
1.13 deraadt 510: badcnt(char *s)
1.9 pvalchev 511: {
512: errx(1, "%s: bad byte count", s);
513: }
514:
515: void
1.13 deraadt 516: badsfmt(void)
1.9 pvalchev 517: {
1.10 mpech 518: errx(1, "%%s: requires a precision or a byte count");
1.9 pvalchev 519: }
520:
521: void
1.13 deraadt 522: badfmt(const char *fmt)
1.9 pvalchev 523: {
1.10 mpech 524: errx(1, "\"%s\": bad format", fmt);
1.9 pvalchev 525: }
526:
527: void
1.13 deraadt 528: badconv(char *ch)
1.9 pvalchev 529: {
1.10 mpech 530: errx(1, "%%%s: bad conversion character", ch);
1.1 deraadt 531: }