Annotation of src/usr.bin/unifdef/unifdef.c, Revision 1.21
1.1 deraadt 1: /*
1.16 sthen 2: * Copyright (c) 2002 - 2014 Tony Finch <dot@dotat.at>
1.1 deraadt 3: *
4: * Redistribution and use in source and binary forms, with or without
5: * modification, are permitted provided that the following conditions
6: * are met:
7: * 1. Redistributions of source code must retain the above copyright
8: * notice, this list of conditions and the following disclaimer.
9: * 2. Redistributions in binary form must reproduce the above copyright
10: * notice, this list of conditions and the following disclaimer in the
11: * documentation and/or other materials provided with the distribution.
12: *
1.16 sthen 13: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1.1 deraadt 14: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1.16 sthen 16: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1.1 deraadt 17: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23: * SUCH DAMAGE.
24: */
25:
26: /*
27: * unifdef - remove ifdef'ed lines
28: *
1.16 sthen 29: * This code was derived from software contributed to Berkeley by Dave Yost.
30: * It was rewritten to support ANSI C by Tony Finch. The original version
31: * of unifdef carried the 4-clause BSD copyright licence. None of its code
32: * remains in this version (though some of the names remain) so it now
33: * carries a more liberal licence.
34: *
1.1 deraadt 35: * Wishlist:
36: * provide an option which will append the name of the
37: * appropriate symbol after #else's and #endif's
38: * provide an option which will check symbols after
39: * #else's and #endif's to see that they match their
40: * corresponding #ifdef or #ifndef
1.9 deraadt 41: *
1.16 sthen 42: * These require better buffer handling, which would also make
43: * it possible to handle all "dodgy" directives correctly.
1.1 deraadt 44: */
45:
1.16 sthen 46: #include "unifdef.h"
47:
48: static const char copyright[] =
49: #include "version.h"
1.20 deraadt 50: "@(#) $Author: deraadt $\n"
1.16 sthen 51: "@(#) $URL: http://dotat.at/prog/unifdef $\n"
52: ;
1.1 deraadt 53:
1.7 deraadt 54: /* types of input lines: */
55: typedef enum {
1.8 deraadt 56: LT_TRUEI, /* a true #if with ignore flag */
57: LT_FALSEI, /* a false #if with ignore flag */
58: LT_IF, /* an unknown #if */
1.7 deraadt 59: LT_TRUE, /* a true #if */
60: LT_FALSE, /* a false #if */
1.8 deraadt 61: LT_ELIF, /* an unknown #elif */
1.7 deraadt 62: LT_ELTRUE, /* a true #elif */
63: LT_ELFALSE, /* a false #elif */
64: LT_ELSE, /* #else */
65: LT_ENDIF, /* #endif */
1.9 deraadt 66: LT_DODGY, /* flag: directive is not on one line */
67: LT_DODGY_LAST = LT_DODGY + LT_ENDIF,
68: LT_PLAIN, /* ordinary line */
1.8 deraadt 69: LT_EOF, /* end of file */
1.16 sthen 70: LT_ERROR, /* unevaluable #if */
1.8 deraadt 71: LT_COUNT
1.7 deraadt 72: } Linetype;
73:
1.8 deraadt 74: static char const * const linetype_name[] = {
1.9 deraadt 75: "TRUEI", "FALSEI", "IF", "TRUE", "FALSE",
76: "ELIF", "ELTRUE", "ELFALSE", "ELSE", "ENDIF",
77: "DODGY TRUEI", "DODGY FALSEI",
78: "DODGY IF", "DODGY TRUE", "DODGY FALSE",
79: "DODGY ELIF", "DODGY ELTRUE", "DODGY ELFALSE",
80: "DODGY ELSE", "DODGY ENDIF",
1.16 sthen 81: "PLAIN", "EOF", "ERROR"
1.8 deraadt 82: };
1.7 deraadt 83:
1.16 sthen 84: #define linetype_if2elif(lt) ((Linetype)(lt - LT_IF + LT_ELIF))
85: #define linetype_2dodgy(lt) ((Linetype)(lt + LT_DODGY))
86:
1.8 deraadt 87: /* state of #if processing */
1.7 deraadt 88: typedef enum {
1.8 deraadt 89: IS_OUTSIDE,
90: IS_FALSE_PREFIX, /* false #if followed by false #elifs */
91: IS_TRUE_PREFIX, /* first non-false #(el)if is true */
92: IS_PASS_MIDDLE, /* first non-false #(el)if is unknown */
93: IS_FALSE_MIDDLE, /* a false #elif after a pass state */
94: IS_TRUE_MIDDLE, /* a true #elif after a pass state */
95: IS_PASS_ELSE, /* an else after a pass state */
96: IS_FALSE_ELSE, /* an else after a true state */
97: IS_TRUE_ELSE, /* an else after only false states */
98: IS_FALSE_TRAILER, /* #elifs after a true are false */
99: IS_COUNT
100: } Ifstate;
101:
102: static char const * const ifstate_name[] = {
103: "OUTSIDE", "FALSE_PREFIX", "TRUE_PREFIX",
104: "PASS_MIDDLE", "FALSE_MIDDLE", "TRUE_MIDDLE",
105: "PASS_ELSE", "FALSE_ELSE", "TRUE_ELSE",
106: "FALSE_TRAILER"
107: };
108:
109: /* state of comment parser */
110: typedef enum {
111: NO_COMMENT = false, /* outside a comment */
112: C_COMMENT, /* in a comment like this one */
113: CXX_COMMENT, /* between // and end of line */
114: STARTING_COMMENT, /* just after slash-backslash-newline */
1.16 sthen 115: FINISHING_COMMENT, /* star-backslash-newline in a C comment */
116: CHAR_LITERAL, /* inside '' */
117: STRING_LITERAL /* inside "" */
1.7 deraadt 118: } Comment_state;
119:
1.8 deraadt 120: static char const * const comment_name[] = {
1.16 sthen 121: "NO", "C", "CXX", "STARTING", "FINISHING", "CHAR", "STRING"
1.1 deraadt 122: };
1.7 deraadt 123:
1.8 deraadt 124: /* state of preprocessor line parser */
125: typedef enum {
126: LS_START, /* only space and comments on this line */
127: LS_HASH, /* only space, comments, and a hash */
128: LS_DIRTY /* this line can't be a preprocessor line */
129: } Line_state;
1.7 deraadt 130:
1.8 deraadt 131: static char const * const linestate_name[] = {
132: "START", "HASH", "DIRTY"
133: };
1.7 deraadt 134:
135: /*
1.8 deraadt 136: * Minimum translation limits from ISO/IEC 9899:1999 5.2.4.1
1.7 deraadt 137: */
1.8 deraadt 138: #define MAXDEPTH 64 /* maximum #if nesting */
139: #define MAXLINE 4096 /* maximum length of line */
1.16 sthen 140: #define MAXSYMS 16384 /* maximum number of symbols */
1.7 deraadt 141:
142: /*
1.9 deraadt 143: * Sometimes when editing a keyword the replacement text is longer, so
144: * we leave some space at the end of the tline buffer to accommodate this.
145: */
146: #define EDITSLOP 10
147:
148: /*
1.8 deraadt 149: * Globals.
1.7 deraadt 150: */
151:
1.16 sthen 152: static bool compblank; /* -B: compress blank lines */
153: static bool lnblank; /* -b: blank deleted lines */
1.8 deraadt 154: static bool complement; /* -c: do the complement */
155: static bool debugging; /* -d: debugging reports */
1.16 sthen 156: static bool inplace; /* -m: modify in place */
1.9 deraadt 157: static bool iocccok; /* -e: fewer IOCCC errors */
1.16 sthen 158: static bool strictlogic; /* -K: keep ambiguous #ifs */
1.8 deraadt 159: static bool killconsts; /* -k: eval constant #ifs */
1.16 sthen 160: static bool lnnum; /* -n: add #line directives */
1.8 deraadt 161: static bool symlist; /* -s: output symbol list */
1.16 sthen 162: static bool symdepth; /* -S: output symbol depth */
1.8 deraadt 163: static bool text; /* -t: this is a text file */
164:
165: static const char *symname[MAXSYMS]; /* symbol name */
166: static const char *value[MAXSYMS]; /* -Dsym=value */
167: static bool ignore[MAXSYMS]; /* -iDsym or -iUsym */
168: static int nsyms; /* number of symbols */
169:
170: static FILE *input; /* input file pointer */
171: static const char *filename; /* input file name */
172: static int linenum; /* current line number */
1.16 sthen 173: static const char *linefile; /* file name for #line */
174: static FILE *output; /* output file pointer */
175: static const char *ofilename; /* output file name */
176: static const char *backext; /* backup extension */
177: static char *tempname; /* avoid splatting input */
1.8 deraadt 178:
1.9 deraadt 179: static char tline[MAXLINE+EDITSLOP];/* input buffer plus space */
1.8 deraadt 180: static char *keyword; /* used for editing #elif's */
181:
1.16 sthen 182: /*
183: * When processing a file, the output's newline style will match the
184: * input's, and unifdef correctly handles CRLF or LF endings whatever
185: * the platform's native style. The stdio streams are opened in binary
186: * mode to accommodate platforms whose native newline style is CRLF.
187: * When the output isn't a processed input file (when it is error /
188: * debug / diagnostic messages) then unifdef uses native line endings.
189: */
190:
191: static const char *newline; /* input file format */
192: static const char newline_unix[] = "\n";
193: static const char newline_crlf[] = "\r\n";
194:
1.8 deraadt 195: static Comment_state incomment; /* comment parser state */
196: static Line_state linestate; /* #if line parser state */
197: static Ifstate ifstate[MAXDEPTH]; /* #if processor state */
198: static bool ignoring[MAXDEPTH]; /* ignore comments state */
199: static int stifline[MAXDEPTH]; /* start of current #if */
200: static int depth; /* current #if nesting */
1.16 sthen 201: static int delcount; /* count of deleted lines */
202: static unsigned blankcount; /* count of blank lines */
203: static unsigned blankmax; /* maximum recent blankcount */
204: static bool constexpr; /* constant #if expression */
205: static bool zerosyms; /* to format symdepth output */
206: static bool firstsym; /* ditto */
1.8 deraadt 207:
1.16 sthen 208: static int exitmode; /* exit status mode */
1.8 deraadt 209: static int exitstat; /* program exit status */
210:
1.16 sthen 211: static void addsym1(bool, bool, char *);
212: static void addsym2(bool, const char *, const char *);
213: static char *astrcat(const char *, const char *);
214: static void cleantemp(void);
215: static void closeio(void);
1.8 deraadt 216: static void debug(const char *, ...);
1.16 sthen 217: static void debugsym(const char *, int);
218: static bool defundef(void);
219: static void defundefile(const char *);
220: static void done(void);
1.8 deraadt 221: static void error(const char *);
1.16 sthen 222: static int findsym(const char **);
1.8 deraadt 223: static void flushline(bool);
1.16 sthen 224: static void hashline(void);
225: static void help(void);
1.8 deraadt 226: static Linetype ifeval(const char **);
1.9 deraadt 227: static void ignoreoff(void);
228: static void ignoreon(void);
1.16 sthen 229: static void indirectsym(void);
1.9 deraadt 230: static void keywordedit(const char *);
1.16 sthen 231: static const char *matchsym(const char *, const char *);
1.8 deraadt 232: static void nest(void);
1.16 sthen 233: static Linetype parseline(void);
1.8 deraadt 234: static void process(void);
1.16 sthen 235: static void processinout(const char *, const char *);
236: static const char *skipargs(const char *);
1.8 deraadt 237: static const char *skipcomment(const char *);
1.16 sthen 238: static const char *skiphash(void);
239: static const char *skipline(const char *);
1.8 deraadt 240: static const char *skipsym(const char *);
241: static void state(Ifstate);
1.16 sthen 242: static void unnest(void);
1.8 deraadt 243: static void usage(void);
1.16 sthen 244: static void version(void);
245: static const char *xstrdup(const char *, const char *);
1.7 deraadt 246:
1.16 sthen 247: #define endsym(c) (!isalnum((unsigned char)c) && c != '_')
1.7 deraadt 248:
1.8 deraadt 249: /*
250: * The main program.
251: */
1.7 deraadt 252: int
253: main(int argc, char *argv[])
254: {
1.19 deraadt 255: const char *errstr;
1.7 deraadt 256: int opt;
257:
1.16 sthen 258: while ((opt = getopt(argc, argv, "i:D:U:f:I:M:o:x:bBcdehKklmnsStV")) != -1)
1.7 deraadt 259: switch (opt) {
260: case 'i': /* treat stuff controlled by these symbols as text */
261: /*
262: * For strict backwards-compatibility the U or D
263: * should be immediately after the -i but it doesn't
264: * matter much if we relax that requirement.
265: */
266: opt = *optarg++;
267: if (opt == 'D')
1.16 sthen 268: addsym1(true, true, optarg);
1.7 deraadt 269: else if (opt == 'U')
1.16 sthen 270: addsym1(true, false, optarg);
1.7 deraadt 271: else
272: usage();
273: break;
274: case 'D': /* define a symbol */
1.16 sthen 275: addsym1(false, true, optarg);
1.7 deraadt 276: break;
277: case 'U': /* undef a symbol */
1.16 sthen 278: addsym1(false, false, optarg);
279: break;
280: case 'I': /* no-op for compatibility with cpp */
281: break;
282: case 'b': /* blank deleted lines instead of omitting them */
283: case 'l': /* backwards compatibility */
284: lnblank = true;
1.7 deraadt 285: break;
1.16 sthen 286: case 'B': /* compress blank lines around removed section */
287: compblank = true;
1.11 avsm 288: break;
1.7 deraadt 289: case 'c': /* treat -D as -U and vice versa */
290: complement = true;
291: break;
1.8 deraadt 292: case 'd':
293: debugging = true;
294: break;
1.9 deraadt 295: case 'e': /* fewer errors from dodgy lines */
296: iocccok = true;
297: break;
1.16 sthen 298: case 'f': /* definitions file */
299: defundefile(optarg);
300: break;
301: case 'h':
302: help();
303: break;
304: case 'K': /* keep ambiguous #ifs */
305: strictlogic = true;
306: break;
1.7 deraadt 307: case 'k': /* process constant #ifs */
308: killconsts = true;
309: break;
1.16 sthen 310: case 'm': /* modify in place */
311: inplace = true;
312: break;
313: case 'M': /* modify in place and keep backup */
314: inplace = true;
315: backext = optarg;
316: break;
317: case 'n': /* add #line directive after deleted lines */
318: lnnum = true;
319: break;
320: case 'o': /* output to a file */
321: ofilename = optarg;
1.7 deraadt 322: break;
323: case 's': /* only output list of symbols that control #ifs */
324: symlist = true;
325: break;
1.16 sthen 326: case 'S': /* list symbols with their nesting depth */
327: symlist = symdepth = true;
328: break;
1.8 deraadt 329: case 't': /* don't parse C comments */
1.7 deraadt 330: text = true;
331: break;
1.16 sthen 332: case 'V':
333: version();
334: break;
335: case 'x':
1.19 deraadt 336: exitmode = strtonum(optarg, 0, 2, &errstr);
337: if (errstr)
338: errx(1, "-x %s: %s", optarg, errstr);
1.16 sthen 339: break;
1.7 deraadt 340: default:
341: usage();
342: }
343: argc -= optind;
344: argv += optind;
1.16 sthen 345: if (compblank && lnblank)
346: errx(2, "-B and -b are mutually exclusive");
347: if (symlist && (ofilename != NULL || inplace || argc > 1))
348: errx(2, "-s only works with one input file");
349: if (argc > 1 && ofilename != NULL)
350: errx(2, "-o cannot be used with multiple input files");
351: if (argc > 1 && !inplace)
352: errx(2, "multiple input files require -m or -M");
1.21 ! tedu 353: if (argc == 0 && inplace)
! 354: errx(2, "can't edit stdin in place");
1.16 sthen 355: if (argc == 0)
356: argc = 1;
357: if (argc == 1 && !inplace && ofilename == NULL)
358: ofilename = "-";
359: indirectsym();
360:
361: atexit(cleantemp);
362: if (ofilename != NULL)
363: processinout(*argv, ofilename);
364: else while (argc-- > 0) {
365: processinout(*argv, *argv);
366: argv++;
367: }
368: switch(exitmode) {
369: case(0): exit(exitstat);
370: case(1): exit(!exitstat);
371: case(2): exit(0);
372: default: abort(); /* bug */
1.7 deraadt 373: }
1.16 sthen 374: }
375:
376: /*
377: * File logistics.
378: */
379: static void
380: processinout(const char *ifn, const char *ofn)
381: {
382: struct stat st;
383:
384: if (ifn == NULL || strcmp(ifn, "-") == 0) {
385: filename = "[stdin]";
386: linefile = NULL;
387: input = fbinmode(stdin);
1.7 deraadt 388: } else {
1.16 sthen 389: filename = ifn;
390: linefile = ifn;
391: input = fopen(ifn, "rb");
392: if (input == NULL)
393: err(2, "can't open %s", ifn);
394: }
395: if (strcmp(ofn, "-") == 0) {
396: output = fbinmode(stdout);
397: process();
398: return;
399: }
400: if (stat(ofn, &st) < 0) {
401: output = fopen(ofn, "wb");
402: if (output == NULL)
403: err(2, "can't create %s", ofn);
1.8 deraadt 404: process();
1.16 sthen 405: return;
406: }
407:
408: tempname = astrcat(ofn, ".XXXXXX");
409: output = mktempmode(tempname, st.st_mode);
410: if (output == NULL)
411: err(2, "can't create %s", tempname);
412:
413: process();
414:
415: if (backext != NULL) {
416: char *backname = astrcat(ofn, backext);
417: if (rename(ofn, backname) < 0)
418: err(2, "can't rename \"%s\" to \"%s\"", ofn, backname);
419: free(backname);
420: }
421: if (replace(tempname, ofn) < 0)
422: err(2, "can't rename \"%s\" to \"%s\"", tempname, ofn);
423: free(tempname);
424: tempname = NULL;
425: }
426:
427: /*
428: * For cleaning up if there is an error.
429: */
430: static void
431: cleantemp(void)
432: {
433: if (tempname != NULL)
434: remove(tempname);
435: }
436:
437: /*
438: * Self-identification functions.
439: */
440:
441: static void
442: version(void)
443: {
444: const char *c = copyright;
445: for (;;) {
446: while (*++c != '$')
447: if (*c == '\0')
448: exit(0);
449: while (*++c != '$')
450: putc(*c, stderr);
451: putc('\n', stderr);
1.7 deraadt 452: }
1.16 sthen 453: }
1.7 deraadt 454:
1.16 sthen 455: static void
456: synopsis(FILE *fp)
457: {
458: fprintf(fp,
1.17 jmc 459: "usage: unifdef [-BbcdehKkmnSstV] [-[i]Dsym[=val]] [-[i]Usym] [-f defile]\n"
460: " [-M backext] [-o outfile] [-x 0 | 1 | 2] file ...\n");
1.7 deraadt 461: }
1.1 deraadt 462:
1.8 deraadt 463: static void
1.7 deraadt 464: usage(void)
1.1 deraadt 465: {
1.16 sthen 466: synopsis(stderr);
1.8 deraadt 467: exit(2);
468: }
469:
1.16 sthen 470: static void
471: help(void)
472: {
473: synopsis(stdout);
474: printf(
475: " -Dsym=val define preprocessor symbol with given value\n"
476: " -Dsym define preprocessor symbol with value 1\n"
477: " -Usym preprocessor symbol is undefined\n"
478: " -iDsym=val \\ ignore C strings and comments\n"
479: " -iDsym ) in sections controlled by these\n"
480: " -iUsym / preprocessor symbols\n"
481: " -fpath file containing #define and #undef directives\n"
482: " -b blank lines instead of deleting them\n"
483: " -B compress blank lines around deleted section\n"
484: " -c complement (invert) keep vs. delete\n"
485: " -d debugging mode\n"
486: " -e ignore multiline preprocessor directives\n"
487: " -h print help\n"
488: " -Ipath extra include file path (ignored)\n"
489: " -K disable && and || short-circuiting\n"
490: " -k process constant #if expressions\n"
491: " -Mext modify in place and keep backups\n"
492: " -m modify input files in place\n"
493: " -n add #line directives to output\n"
494: " -opath output file name\n"
495: " -S list #if control symbols with nesting\n"
496: " -s list #if control symbols\n"
497: " -t ignore C strings and comments\n"
498: " -V print version\n"
499: " -x{012} exit status mode\n"
500: );
501: exit(0);
502: }
503:
1.8 deraadt 504: /*
505: * A state transition function alters the global #if processing state
506: * in a particular way. The table below is indexed by the current
1.16 sthen 507: * processing state and the type of the current line.
1.8 deraadt 508: *
509: * Nesting is handled by keeping a stack of states; some transition
1.9 deraadt 510: * functions increase or decrease the depth. They also maintain the
1.8 deraadt 511: * ignore state on a stack. In some complicated cases they have to
512: * alter the preprocessor directive, as follows.
513: *
514: * When we have processed a group that starts off with a known-false
515: * #if/#elif sequence (which has therefore been deleted) followed by a
1.9 deraadt 516: * #elif that we don't understand and therefore must keep, we edit the
1.16 sthen 517: * latter into a #if to keep the nesting correct. We use memcpy() to
518: * overwrite the 4 byte token "elif" with "if " without a '\0' byte.
1.8 deraadt 519: *
520: * When we find a true #elif in a group, the following block will
521: * always be kept and the rest of the sequence after the next #elif or
1.9 deraadt 522: * #else will be discarded. We edit the #elif into a #else and the
1.8 deraadt 523: * following directive to #endif since this has the desired behaviour.
1.9 deraadt 524: *
525: * "Dodgy" directives are split across multiple lines, the most common
526: * example being a multi-line comment hanging off the right of the
527: * directive. We can handle them correctly only if there is no change
528: * from printing to dropping (or vice versa) caused by that directive.
529: * If the directive is the first of a group we have a choice between
530: * failing with an error, or passing it through unchanged instead of
531: * evaluating it. The latter is not the default to avoid questions from
532: * users about unifdef unexpectedly leaving behind preprocessor directives.
1.8 deraadt 533: */
534: typedef void state_fn(void);
535:
536: /* report an error */
1.16 sthen 537: static void Eelif (void) { error("Inappropriate #elif"); }
538: static void Eelse (void) { error("Inappropriate #else"); }
539: static void Eendif(void) { error("Inappropriate #endif"); }
540: static void Eeof (void) { error("Premature EOF"); }
541: static void Eioccc(void) { error("Obfuscated preprocessor control line"); }
1.8 deraadt 542: /* plain line handling */
1.16 sthen 543: static void print (void) { flushline(true); }
544: static void drop (void) { flushline(false); }
1.8 deraadt 545: /* output lacks group's start line */
1.16 sthen 546: static void Strue (void) { drop(); ignoreoff(); state(IS_TRUE_PREFIX); }
547: static void Sfalse(void) { drop(); ignoreoff(); state(IS_FALSE_PREFIX); }
548: static void Selse (void) { drop(); state(IS_TRUE_ELSE); }
1.8 deraadt 549: /* print/pass this block */
1.16 sthen 550: static void Pelif (void) { print(); ignoreoff(); state(IS_PASS_MIDDLE); }
551: static void Pelse (void) { print(); state(IS_PASS_ELSE); }
552: static void Pendif(void) { print(); unnest(); }
1.8 deraadt 553: /* discard this block */
1.16 sthen 554: static void Dfalse(void) { drop(); ignoreoff(); state(IS_FALSE_TRAILER); }
555: static void Delif (void) { drop(); ignoreoff(); state(IS_FALSE_MIDDLE); }
556: static void Delse (void) { drop(); state(IS_FALSE_ELSE); }
557: static void Dendif(void) { drop(); unnest(); }
1.8 deraadt 558: /* first line of group */
1.16 sthen 559: static void Fdrop (void) { nest(); Dfalse(); }
560: static void Fpass (void) { nest(); Pelif(); }
561: static void Ftrue (void) { nest(); Strue(); }
562: static void Ffalse(void) { nest(); Sfalse(); }
1.9 deraadt 563: /* variable pedantry for obfuscated lines */
1.16 sthen 564: static void Oiffy (void) { if (!iocccok) Eioccc(); Fpass(); ignoreon(); }
565: static void Oif (void) { if (!iocccok) Eioccc(); Fpass(); }
566: static void Oelif (void) { if (!iocccok) Eioccc(); Pelif(); }
1.8 deraadt 567: /* ignore comments in this block */
1.16 sthen 568: static void Idrop (void) { Fdrop(); ignoreon(); }
569: static void Itrue (void) { Ftrue(); ignoreon(); }
570: static void Ifalse(void) { Ffalse(); ignoreon(); }
571: /* modify this line */
572: static void Mpass (void) { memcpy(keyword, "if ", 4); Pelif(); }
573: static void Mtrue (void) { keywordedit("else"); state(IS_TRUE_MIDDLE); }
574: static void Melif (void) { keywordedit("endif"); state(IS_FALSE_TRAILER); }
575: static void Melse (void) { keywordedit("endif"); state(IS_FALSE_ELSE); }
1.8 deraadt 576:
577: static state_fn * const trans_table[IS_COUNT][LT_COUNT] = {
578: /* IS_OUTSIDE */
1.9 deraadt 579: { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Eendif,
580: Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eendif,
1.16 sthen 581: print, done, abort },
1.8 deraadt 582: /* IS_FALSE_PREFIX */
1.9 deraadt 583: { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Strue, Sfalse,Selse, Dendif,
584: Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Eioccc,Eioccc,Eioccc,Eioccc,
1.16 sthen 585: drop, Eeof, abort },
1.8 deraadt 586: /* IS_TRUE_PREFIX */
1.9 deraadt 587: { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Dfalse,Dfalse,Dfalse,Delse, Dendif,
588: Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
1.16 sthen 589: print, Eeof, abort },
1.8 deraadt 590: /* IS_PASS_MIDDLE */
1.9 deraadt 591: { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Pelif, Mtrue, Delif, Pelse, Pendif,
592: Oiffy, Oiffy, Fpass, Oif, Oif, Pelif, Oelif, Oelif, Pelse, Pendif,
1.16 sthen 593: print, Eeof, abort },
1.8 deraadt 594: /* IS_FALSE_MIDDLE */
1.9 deraadt 595: { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Pelif, Mtrue, Delif, Pelse, Pendif,
596: Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
1.16 sthen 597: drop, Eeof, abort },
1.8 deraadt 598: /* IS_TRUE_MIDDLE */
1.9 deraadt 599: { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Melif, Melif, Melif, Melse, Pendif,
600: Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Pendif,
1.16 sthen 601: print, Eeof, abort },
1.8 deraadt 602: /* IS_PASS_ELSE */
1.9 deraadt 603: { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Pendif,
604: Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Pendif,
1.16 sthen 605: print, Eeof, abort },
1.8 deraadt 606: /* IS_FALSE_ELSE */
1.9 deraadt 607: { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Dendif,
608: Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Eioccc,
1.16 sthen 609: drop, Eeof, abort },
1.8 deraadt 610: /* IS_TRUE_ELSE */
1.9 deraadt 611: { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Dendif,
612: Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eioccc,
1.16 sthen 613: print, Eeof, abort },
1.8 deraadt 614: /* IS_FALSE_TRAILER */
1.9 deraadt 615: { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Dendif,
616: Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Eioccc,
1.16 sthen 617: drop, Eeof, abort }
1.9 deraadt 618: /*TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF
619: TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF (DODGY)
1.16 sthen 620: PLAIN EOF ERROR */
1.8 deraadt 621: };
622:
623: /*
624: * State machine utility functions
625: */
626: static void
1.9 deraadt 627: ignoreoff(void)
628: {
1.16 sthen 629: if (depth == 0)
630: abort(); /* bug */
1.9 deraadt 631: ignoring[depth] = ignoring[depth-1];
632: }
633: static void
634: ignoreon(void)
635: {
636: ignoring[depth] = true;
637: }
638: static void
639: keywordedit(const char *replacement)
640: {
1.16 sthen 641: snprintf(keyword, tline + sizeof(tline) - keyword,
642: "%s%s", replacement, newline);
1.9 deraadt 643: print();
644: }
645: static void
1.8 deraadt 646: nest(void)
647: {
1.16 sthen 648: if (depth > MAXDEPTH-1)
649: abort(); /* bug */
650: if (depth == MAXDEPTH-1)
651: error("Too many levels of nesting");
1.8 deraadt 652: depth += 1;
653: stifline[depth] = linenum;
654: }
1.16 sthen 655: static void
656: unnest(void)
657: {
658: if (depth == 0)
659: abort(); /* bug */
660: depth -= 1;
661: }
1.8 deraadt 662: static void
663: state(Ifstate is)
664: {
665: ifstate[depth] = is;
666: }
667:
1.7 deraadt 668: /*
1.16 sthen 669: * The last state transition function. When this is called,
670: * lineval == LT_EOF, so the process() loop will terminate.
671: */
672: static void
673: done(void)
674: {
675: if (incomment)
676: error("EOF in comment");
677: closeio();
678: }
679:
680: /*
1.8 deraadt 681: * Write a line to the output or not, according to command line options.
1.16 sthen 682: * If writing fails, closeio() will print the error and exit.
1.8 deraadt 683: */
684: static void
685: flushline(bool keep)
686: {
687: if (symlist)
688: return;
1.16 sthen 689: if (keep ^ complement) {
690: bool blankline = tline[strspn(tline, " \t\r\n")] == '\0';
691: if (blankline && compblank && blankcount != blankmax) {
692: delcount += 1;
693: blankcount += 1;
694: } else {
695: if (lnnum && delcount > 0)
696: hashline();
697: if (fputs(tline, output) == EOF)
698: closeio();
699: delcount = 0;
700: blankmax = blankcount = blankline ? blankcount + 1 : 0;
701: }
702: } else {
703: if (lnblank && fputs(newline, output) == EOF)
704: closeio();
1.8 deraadt 705: exitstat = 1;
1.16 sthen 706: delcount += 1;
707: blankcount = 0;
1.7 deraadt 708: }
1.16 sthen 709: if (debugging && fflush(output) == EOF)
710: closeio();
711: }
712:
713: /*
714: * Format of #line directives depends on whether we know the input filename.
715: */
716: static void
717: hashline(void)
718: {
719: int e;
720:
721: if (linefile == NULL)
722: e = fprintf(output, "#line %d%s", linenum, newline);
723: else
724: e = fprintf(output, "#line %d \"%s\"%s",
725: linenum, linefile, newline);
726: if (e < 0)
727: closeio();
728: }
729:
730: /*
731: * Flush the output and handle errors.
732: */
733: static void
734: closeio(void)
735: {
736: /* Tidy up after findsym(). */
737: if (symdepth && !zerosyms)
738: printf("\n");
739: if (output != NULL && (ferror(output) || fclose(output) == EOF))
740: err(2, "%s: can't write to output", filename);
741: fclose(input);
1.7 deraadt 742: }
1.3 deraadt 743:
1.7 deraadt 744: /*
1.8 deraadt 745: * The driver for the state machine.
1.7 deraadt 746: */
1.8 deraadt 747: static void
748: process(void)
1.7 deraadt 749: {
1.16 sthen 750: Linetype lineval = LT_PLAIN;
751: /* When compressing blank lines, act as if the file
752: is preceded by a large number of blank lines. */
753: blankmax = blankcount = 1000;
754: zerosyms = true;
755: newline = NULL;
756: linenum = 0;
757: while (lineval != LT_EOF) {
758: lineval = parseline();
759: trans_table[ifstate[depth]][lineval]();
760: debug("process line %d %s -> %s depth %d",
761: linenum, linetype_name[lineval],
1.8 deraadt 762: ifstate_name[ifstate[depth]], depth);
1.1 deraadt 763: }
764: }
765:
1.7 deraadt 766: /*
1.8 deraadt 767: * Parse a line and determine its type. We keep the preprocessor line
1.16 sthen 768: * parser state between calls in the global variable linestate, with
769: * help from skipcomment().
1.7 deraadt 770: */
1.8 deraadt 771: static Linetype
1.16 sthen 772: parseline(void)
1.3 deraadt 773: {
1.7 deraadt 774: const char *cp;
1.8 deraadt 775: int cursym;
1.3 deraadt 776: Linetype retval;
1.8 deraadt 777: Comment_state wascomment;
1.3 deraadt 778:
1.16 sthen 779: wascomment = incomment;
780: cp = skiphash();
781: if (cp == NULL)
1.8 deraadt 782: return (LT_EOF);
1.16 sthen 783: if (newline == NULL) {
784: if (strrchr(tline, '\n') == strrchr(tline, '\r') + 1)
785: newline = newline_crlf;
786: else
787: newline = newline_unix;
788: }
789: if (*cp == '\0') {
790: retval = LT_PLAIN;
791: goto done;
1.8 deraadt 792: }
1.16 sthen 793: keyword = tline + (cp - tline);
794: if ((cp = matchsym("ifdef", keyword)) != NULL ||
795: (cp = matchsym("ifndef", keyword)) != NULL) {
796: cp = skipcomment(cp);
797: if ((cursym = findsym(&cp)) < 0)
798: retval = LT_IF;
799: else {
800: retval = (keyword[2] == 'n')
801: ? LT_FALSE : LT_TRUE;
802: if (value[cursym] == NULL)
803: retval = (retval == LT_TRUE)
804: ? LT_FALSE : LT_TRUE;
805: if (ignore[cursym])
806: retval = (retval == LT_TRUE)
807: ? LT_TRUEI : LT_FALSEI;
808: }
809: } else if ((cp = matchsym("if", keyword)) != NULL)
810: retval = ifeval(&cp);
811: else if ((cp = matchsym("elif", keyword)) != NULL)
812: retval = linetype_if2elif(ifeval(&cp));
813: else if ((cp = matchsym("else", keyword)) != NULL)
814: retval = LT_ELSE;
815: else if ((cp = matchsym("endif", keyword)) != NULL)
816: retval = LT_ENDIF;
817: else {
818: cp = skipsym(keyword);
1.9 deraadt 819: /* no way can we deal with a continuation inside a keyword */
1.16 sthen 820: if (strncmp(cp, "\\\r\n", 3) == 0 ||
821: strncmp(cp, "\\\n", 2) == 0)
1.8 deraadt 822: Eioccc();
1.16 sthen 823: cp = skipline(cp);
824: retval = LT_PLAIN;
825: goto done;
826: }
827: cp = skipcomment(cp);
828: if (*cp != '\0') {
829: cp = skipline(cp);
830: if (retval == LT_TRUE || retval == LT_FALSE ||
831: retval == LT_TRUEI || retval == LT_FALSEI)
832: retval = LT_IF;
833: if (retval == LT_ELTRUE || retval == LT_ELFALSE)
834: retval = LT_ELIF;
835: }
836: /* the following can happen if the last line of the file lacks a
837: newline or if there is too much whitespace in a directive */
838: if (linestate == LS_HASH) {
839: long len = cp - tline;
840: if (fgets(tline + len, MAXLINE - len, input) == NULL) {
841: if (ferror(input))
842: err(2, "can't read %s", filename);
843: /* append the missing newline at eof */
1.18 miod 844: strlcpy(tline + len, newline, sizeof(tline) - len);
1.16 sthen 845: cp += strlen(newline);
846: linestate = LS_START;
847: } else {
1.8 deraadt 848: linestate = LS_DIRTY;
1.7 deraadt 849: }
850: }
1.16 sthen 851: if (retval != LT_PLAIN && (wascomment || linestate != LS_START)) {
852: retval = linetype_2dodgy(retval);
853: linestate = LS_DIRTY;
1.8 deraadt 854: }
1.16 sthen 855: done:
856: debug("parser line %d state %s comment %s line", linenum,
1.8 deraadt 857: comment_name[incomment], linestate_name[linestate]);
1.7 deraadt 858: return (retval);
859: }
860:
861: /*
1.16 sthen 862: * These are the binary operators that are supported by the expression
863: * evaluator.
1.7 deraadt 864: */
1.16 sthen 865: static Linetype op_strict(long *p, long v, Linetype at, Linetype bt) {
866: if(at == LT_IF || bt == LT_IF) return (LT_IF);
867: return (*p = v, v ? LT_TRUE : LT_FALSE);
868: }
869: static Linetype op_lt(long *p, Linetype at, long a, Linetype bt, long b) {
870: return op_strict(p, a < b, at, bt);
1.8 deraadt 871: }
1.16 sthen 872: static Linetype op_gt(long *p, Linetype at, long a, Linetype bt, long b) {
873: return op_strict(p, a > b, at, bt);
1.8 deraadt 874: }
1.16 sthen 875: static Linetype op_le(long *p, Linetype at, long a, Linetype bt, long b) {
876: return op_strict(p, a <= b, at, bt);
1.8 deraadt 877: }
1.16 sthen 878: static Linetype op_ge(long *p, Linetype at, long a, Linetype bt, long b) {
879: return op_strict(p, a >= b, at, bt);
1.8 deraadt 880: }
1.16 sthen 881: static Linetype op_eq(long *p, Linetype at, long a, Linetype bt, long b) {
882: return op_strict(p, a == b, at, bt);
1.8 deraadt 883: }
1.16 sthen 884: static Linetype op_ne(long *p, Linetype at, long a, Linetype bt, long b) {
885: return op_strict(p, a != b, at, bt);
1.8 deraadt 886: }
1.16 sthen 887: static Linetype op_or(long *p, Linetype at, long a, Linetype bt, long b) {
888: if (!strictlogic && (at == LT_TRUE || bt == LT_TRUE))
889: return (*p = 1, LT_TRUE);
890: return op_strict(p, a || b, at, bt);
1.8 deraadt 891: }
1.16 sthen 892: static Linetype op_and(long *p, Linetype at, long a, Linetype bt, long b) {
893: if (!strictlogic && (at == LT_FALSE || bt == LT_FALSE))
894: return (*p = 0, LT_FALSE);
895: return op_strict(p, a && b, at, bt);
1.7 deraadt 896: }
897:
898: /*
1.8 deraadt 899: * An evaluation function takes three arguments, as follows: (1) a pointer to
900: * an element of the precedence table which lists the operators at the current
901: * level of precedence; (2) a pointer to an integer which will receive the
902: * value of the expression; and (3) a pointer to a char* that points to the
903: * expression to be evaluated and that is updated to the end of the expression
904: * when evaluation is complete. The function returns LT_FALSE if the value of
1.16 sthen 905: * the expression is zero, LT_TRUE if it is non-zero, LT_IF if the expression
906: * depends on an unknown symbol, or LT_ERROR if there is a parse failure.
1.7 deraadt 907: */
1.8 deraadt 908: struct ops;
909:
1.16 sthen 910: typedef Linetype eval_fn(const struct ops *, long *, const char **);
1.8 deraadt 911:
912: static eval_fn eval_table, eval_unary;
913:
914: /*
915: * The precedence table. Expressions involving binary operators are evaluated
916: * in a table-driven way by eval_table. When it evaluates a subexpression it
917: * calls the inner function with its first argument pointing to the next
918: * element of the table. Innermost expressions have special non-table-driven
919: * handling.
920: */
1.16 sthen 921: struct op {
922: const char *str;
923: Linetype (*fn)(long *, Linetype, long, Linetype, long);
924: };
925: struct ops {
1.8 deraadt 926: eval_fn *inner;
1.16 sthen 927: struct op op[5];
928: };
929: static const struct ops eval_ops[] = {
1.8 deraadt 930: { eval_table, { { "||", op_or } } },
931: { eval_table, { { "&&", op_and } } },
932: { eval_table, { { "==", op_eq },
933: { "!=", op_ne } } },
934: { eval_unary, { { "<=", op_le },
935: { ">=", op_ge },
936: { "<", op_lt },
937: { ">", op_gt } } }
938: };
1.7 deraadt 939:
1.16 sthen 940: /* Current operator precedence level */
941: static long prec(const struct ops *ops)
942: {
943: return (ops - eval_ops);
944: }
945:
1.7 deraadt 946: /*
947: * Function for evaluating the innermost parts of expressions,
1.16 sthen 948: * viz. !expr (expr) number defined(symbol) symbol
949: * We reset the constexpr flag in the last two cases.
1.7 deraadt 950: */
1.8 deraadt 951: static Linetype
1.16 sthen 952: eval_unary(const struct ops *ops, long *valp, const char **cpp)
1.7 deraadt 953: {
954: const char *cp;
955: char *ep;
956: int sym;
1.16 sthen 957: bool defparen;
958: Linetype lt;
1.7 deraadt 959:
960: cp = skipcomment(*cpp);
1.8 deraadt 961: if (*cp == '!') {
1.16 sthen 962: debug("eval%d !", prec(ops));
1.7 deraadt 963: cp++;
1.16 sthen 964: lt = eval_unary(ops, valp, &cp);
965: if (lt == LT_ERROR)
966: return (LT_ERROR);
967: if (lt != LT_IF) {
968: *valp = !*valp;
969: lt = *valp ? LT_TRUE : LT_FALSE;
970: }
1.7 deraadt 971: } else if (*cp == '(') {
972: cp++;
1.16 sthen 973: debug("eval%d (", prec(ops));
974: lt = eval_table(eval_ops, valp, &cp);
975: if (lt == LT_ERROR)
976: return (LT_ERROR);
1.7 deraadt 977: cp = skipcomment(cp);
978: if (*cp++ != ')')
1.16 sthen 979: return (LT_ERROR);
1.7 deraadt 980: } else if (isdigit((unsigned char)*cp)) {
1.16 sthen 981: debug("eval%d number", prec(ops));
1.7 deraadt 982: *valp = strtol(cp, &ep, 0);
1.16 sthen 983: if (ep == cp)
984: return (LT_ERROR);
985: lt = *valp ? LT_TRUE : LT_FALSE;
986: cp = ep;
987: } else if (matchsym("defined", cp) != NULL) {
1.7 deraadt 988: cp = skipcomment(cp+7);
1.16 sthen 989: if (*cp == '(') {
990: cp = skipcomment(cp+1);
991: defparen = true;
992: } else {
993: defparen = false;
994: }
995: sym = findsym(&cp);
1.7 deraadt 996: cp = skipcomment(cp);
1.16 sthen 997: if (defparen && *cp++ != ')') {
998: debug("eval%d defined missing ')'", prec(ops));
999: return (LT_ERROR);
1000: }
1001: if (sym < 0) {
1002: debug("eval%d defined unknown", prec(ops));
1003: lt = LT_IF;
1004: } else {
1005: debug("eval%d defined %s", prec(ops), symname[sym]);
1006: *valp = (value[sym] != NULL);
1007: lt = *valp ? LT_TRUE : LT_FALSE;
1008: }
1009: constexpr = false;
1.7 deraadt 1010: } else if (!endsym(*cp)) {
1.16 sthen 1011: debug("eval%d symbol", prec(ops));
1012: sym = findsym(&cp);
1013: if (sym < 0) {
1014: lt = LT_IF;
1015: cp = skipargs(cp);
1016: } else if (value[sym] == NULL) {
1.7 deraadt 1017: *valp = 0;
1.16 sthen 1018: lt = LT_FALSE;
1019: } else {
1.7 deraadt 1020: *valp = strtol(value[sym], &ep, 0);
1021: if (*ep != '\0' || ep == value[sym])
1.16 sthen 1022: return (LT_ERROR);
1023: lt = *valp ? LT_TRUE : LT_FALSE;
1024: cp = skipargs(cp);
1.7 deraadt 1025: }
1.16 sthen 1026: constexpr = false;
1027: } else {
1028: debug("eval%d bad expr", prec(ops));
1029: return (LT_ERROR);
1030: }
1.7 deraadt 1031:
1032: *cpp = cp;
1.16 sthen 1033: debug("eval%d = %d", prec(ops), *valp);
1034: return (lt);
1.7 deraadt 1035: }
1036:
1037: /*
1038: * Table-driven evaluation of binary operators.
1039: */
1.8 deraadt 1040: static Linetype
1.16 sthen 1041: eval_table(const struct ops *ops, long *valp, const char **cpp)
1.7 deraadt 1042: {
1.8 deraadt 1043: const struct op *op;
1.7 deraadt 1044: const char *cp;
1.16 sthen 1045: long val;
1046: Linetype lt, rt;
1.7 deraadt 1047:
1.16 sthen 1048: debug("eval%d", prec(ops));
1.7 deraadt 1049: cp = *cpp;
1.16 sthen 1050: lt = ops->inner(ops+1, valp, &cp);
1051: if (lt == LT_ERROR)
1052: return (LT_ERROR);
1.7 deraadt 1053: for (;;) {
1054: cp = skipcomment(cp);
1055: for (op = ops->op; op->str != NULL; op++)
1056: if (strncmp(cp, op->str, strlen(op->str)) == 0)
1057: break;
1058: if (op->str == NULL)
1059: break;
1060: cp += strlen(op->str);
1.16 sthen 1061: debug("eval%d %s", prec(ops), op->str);
1062: rt = ops->inner(ops+1, &val, &cp);
1063: if (rt == LT_ERROR)
1064: return (LT_ERROR);
1065: lt = op->fn(valp, lt, *valp, rt, val);
1.7 deraadt 1066: }
1067:
1068: *cpp = cp;
1.16 sthen 1069: debug("eval%d = %d", prec(ops), *valp);
1070: debug("eval%d lt = %s", prec(ops), linetype_name[lt]);
1071: return (lt);
1.1 deraadt 1072: }
1.7 deraadt 1073:
1.1 deraadt 1074: /*
1.7 deraadt 1075: * Evaluate the expression on a #if or #elif line. If we can work out
1076: * the result we return LT_TRUE or LT_FALSE accordingly, otherwise we
1.8 deraadt 1077: * return just a generic LT_IF.
1.1 deraadt 1078: */
1.8 deraadt 1079: static Linetype
1.7 deraadt 1080: ifeval(const char **cpp)
1081: {
1.16 sthen 1082: Linetype ret;
1083: long val = 0;
1.7 deraadt 1084:
1085: debug("eval %s", *cpp);
1.16 sthen 1086: constexpr = killconsts ? false : true;
1.8 deraadt 1087: ret = eval_table(eval_ops, &val, cpp);
1.16 sthen 1088: debug("eval = %d", val);
1089: return (constexpr ? LT_IF : ret == LT_ERROR ? LT_IF : ret);
1090: }
1091:
1092: /*
1093: * Read a line and examine its initial part to determine if it is a
1094: * preprocessor directive. Returns NULL on EOF, or a pointer to a
1095: * preprocessor directive name, or a pointer to the zero byte at the
1096: * end of the line.
1097: */
1098: static const char *
1099: skiphash(void)
1100: {
1101: const char *cp;
1102:
1103: linenum++;
1104: if (fgets(tline, MAXLINE, input) == NULL) {
1105: if (ferror(input))
1106: err(2, "can't read %s", filename);
1107: else
1108: return (NULL);
1109: }
1110: cp = skipcomment(tline);
1111: if (linestate == LS_START && *cp == '#') {
1112: linestate = LS_HASH;
1113: return (skipcomment(cp + 1));
1114: } else if (*cp == '\0') {
1115: return (cp);
1116: } else {
1117: return (skipline(cp));
1118: }
1119: }
1120:
1121: /*
1122: * Mark a line dirty and consume the rest of it, keeping track of the
1123: * lexical state.
1124: */
1125: static const char *
1126: skipline(const char *cp)
1127: {
1128: if (*cp != '\0')
1129: linestate = LS_DIRTY;
1130: while (*cp != '\0')
1131: cp = skipcomment(cp + 1);
1132: return (cp);
1.7 deraadt 1133: }
1134:
1135: /*
1.16 sthen 1136: * Skip over comments, strings, and character literals and stop at the
1137: * next character position that is not whitespace. Between calls we keep
1138: * the comment state in the global variable incomment, and we also adjust
1139: * the global variable linestate when we see a newline.
1.8 deraadt 1140: * XXX: doesn't cope with the buffer splitting inside a state transition.
1.7 deraadt 1141: */
1.8 deraadt 1142: static const char *
1.7 deraadt 1143: skipcomment(const char *cp)
1.3 deraadt 1144: {
1.8 deraadt 1145: if (text || ignoring[depth]) {
1.11 avsm 1146: for (; isspace((unsigned char)*cp); cp++)
1147: if (*cp == '\n')
1148: linestate = LS_START;
1.8 deraadt 1149: return (cp);
1150: }
1151: while (*cp != '\0')
1.16 sthen 1152: /* don't reset to LS_START after a line continuation */
1153: if (strncmp(cp, "\\\r\n", 3) == 0)
1154: cp += 3;
1155: else if (strncmp(cp, "\\\n", 2) == 0)
1.8 deraadt 1156: cp += 2;
1157: else switch (incomment) {
1158: case NO_COMMENT:
1.16 sthen 1159: if (strncmp(cp, "/\\\r\n", 4) == 0) {
1160: incomment = STARTING_COMMENT;
1161: cp += 4;
1162: } else if (strncmp(cp, "/\\\n", 3) == 0) {
1.8 deraadt 1163: incomment = STARTING_COMMENT;
1164: cp += 3;
1165: } else if (strncmp(cp, "/*", 2) == 0) {
1.3 deraadt 1166: incomment = C_COMMENT;
1.8 deraadt 1167: cp += 2;
1168: } else if (strncmp(cp, "//", 2) == 0) {
1169: incomment = CXX_COMMENT;
1170: cp += 2;
1.16 sthen 1171: } else if (strncmp(cp, "\'", 1) == 0) {
1172: incomment = CHAR_LITERAL;
1173: linestate = LS_DIRTY;
1174: cp += 1;
1175: } else if (strncmp(cp, "\"", 1) == 0) {
1176: incomment = STRING_LITERAL;
1177: linestate = LS_DIRTY;
1178: cp += 1;
1.8 deraadt 1179: } else if (strncmp(cp, "\n", 1) == 0) {
1180: linestate = LS_START;
1181: cp += 1;
1.16 sthen 1182: } else if (strchr(" \r\t", *cp) != NULL) {
1.8 deraadt 1183: cp += 1;
1184: } else
1185: return (cp);
1186: continue;
1187: case CXX_COMMENT:
1188: if (strncmp(cp, "\n", 1) == 0) {
1189: incomment = NO_COMMENT;
1190: linestate = LS_START;
1.3 deraadt 1191: }
1.8 deraadt 1192: cp += 1;
1193: continue;
1.16 sthen 1194: case CHAR_LITERAL:
1195: case STRING_LITERAL:
1196: if ((incomment == CHAR_LITERAL && cp[0] == '\'') ||
1197: (incomment == STRING_LITERAL && cp[0] == '\"')) {
1198: incomment = NO_COMMENT;
1199: cp += 1;
1200: } else if (cp[0] == '\\') {
1201: if (cp[1] == '\0')
1202: cp += 1;
1203: else
1204: cp += 2;
1205: } else if (strncmp(cp, "\n", 1) == 0) {
1206: if (incomment == CHAR_LITERAL)
1207: error("unterminated char literal");
1208: else
1209: error("unterminated string literal");
1210: } else
1211: cp += 1;
1212: continue;
1.8 deraadt 1213: case C_COMMENT:
1.16 sthen 1214: if (strncmp(cp, "*\\\r\n", 4) == 0) {
1215: incomment = FINISHING_COMMENT;
1216: cp += 4;
1217: } else if (strncmp(cp, "*\\\n", 3) == 0) {
1.8 deraadt 1218: incomment = FINISHING_COMMENT;
1219: cp += 3;
1220: } else if (strncmp(cp, "*/", 2) == 0) {
1221: incomment = NO_COMMENT;
1222: cp += 2;
1223: } else
1224: cp += 1;
1225: continue;
1226: case STARTING_COMMENT:
1227: if (*cp == '*') {
1228: incomment = C_COMMENT;
1229: cp += 1;
1230: } else if (*cp == '/') {
1.3 deraadt 1231: incomment = CXX_COMMENT;
1.8 deraadt 1232: cp += 1;
1233: } else {
1234: incomment = NO_COMMENT;
1235: linestate = LS_DIRTY;
1.3 deraadt 1236: }
1.8 deraadt 1237: continue;
1238: case FINISHING_COMMENT:
1239: if (*cp == '/') {
1240: incomment = NO_COMMENT;
1241: cp += 1;
1242: } else
1243: incomment = C_COMMENT;
1244: continue;
1245: default:
1.16 sthen 1246: abort(); /* bug */
1.3 deraadt 1247: }
1.8 deraadt 1248: return (cp);
1.1 deraadt 1249: }
1.7 deraadt 1250:
1251: /*
1.16 sthen 1252: * Skip macro arguments.
1253: */
1254: static const char *
1255: skipargs(const char *cp)
1256: {
1257: const char *ocp = cp;
1258: int level = 0;
1259: cp = skipcomment(cp);
1260: if (*cp != '(')
1261: return (cp);
1262: do {
1263: if (*cp == '(')
1264: level++;
1265: if (*cp == ')')
1266: level--;
1267: cp = skipcomment(cp+1);
1268: } while (level != 0 && *cp != '\0');
1269: if (level == 0)
1270: return (cp);
1271: else
1272: /* Rewind and re-detect the syntax error later. */
1273: return (ocp);
1274: }
1275:
1276: /*
1.7 deraadt 1277: * Skip over an identifier.
1278: */
1.8 deraadt 1279: static const char *
1.7 deraadt 1280: skipsym(const char *cp)
1281: {
1282: while (!endsym(*cp))
1283: ++cp;
1284: return (cp);
1285: }
1286:
1.1 deraadt 1287: /*
1.16 sthen 1288: * Skip whitespace and take a copy of any following identifier.
1289: */
1290: static const char *
1291: getsym(const char **cpp)
1292: {
1293: const char *cp = *cpp, *sym;
1294:
1295: cp = skipcomment(cp);
1296: cp = skipsym(sym = cp);
1297: if (cp == sym)
1298: return NULL;
1299: *cpp = cp;
1300: return (xstrdup(sym, cp));
1301: }
1302:
1303: /*
1304: * Check that s (a symbol) matches the start of t, and that the
1305: * following character in t is not a symbol character. Returns a
1306: * pointer to the following character in t if there is a match,
1307: * otherwise NULL.
1308: */
1309: static const char *
1310: matchsym(const char *s, const char *t)
1311: {
1312: while (*s != '\0' && *t != '\0')
1313: if (*s != *t)
1314: return (NULL);
1315: else
1316: ++s, ++t;
1317: if (*s == '\0' && endsym(*t))
1318: return(t);
1319: else
1320: return(NULL);
1321: }
1322:
1323: /*
1.13 jmc 1324: * Look for the symbol in the symbol table. If it is found, we return
1.8 deraadt 1325: * the symbol table index, else we return -1.
1.1 deraadt 1326: */
1.8 deraadt 1327: static int
1.16 sthen 1328: findsym(const char **strp)
1.1 deraadt 1329: {
1.16 sthen 1330: const char *str;
1.3 deraadt 1331: int symind;
1332:
1.16 sthen 1333: str = *strp;
1334: *strp = skipsym(str);
1335: if (symlist) {
1336: if (*strp == str)
1337: return (-1);
1338: if (symdepth && firstsym)
1339: printf("%s%3d", zerosyms ? "" : "\n", depth);
1340: firstsym = zerosyms = false;
1341: printf("%s%.*s%s",
1342: symdepth ? " " : "",
1343: (int)(*strp-str), str,
1344: symdepth ? "" : "\n");
1345: /* we don't care about the value of the symbol */
1346: return (0);
1347: }
1.8 deraadt 1348: for (symind = 0; symind < nsyms; ++symind) {
1.16 sthen 1349: if (matchsym(symname[symind], str) != NULL) {
1350: debugsym("findsym", symind);
1.7 deraadt 1351: return (symind);
1.3 deraadt 1352: }
1.1 deraadt 1353: }
1.8 deraadt 1354: return (-1);
1.1 deraadt 1355: }
1.7 deraadt 1356:
1.1 deraadt 1357: /*
1.16 sthen 1358: * Resolve indirect symbol values to their final definitions.
1359: */
1360: static void
1361: indirectsym(void)
1362: {
1363: const char *cp;
1364: int changed, sym, ind;
1365:
1366: do {
1367: changed = 0;
1368: for (sym = 0; sym < nsyms; ++sym) {
1369: if (value[sym] == NULL)
1370: continue;
1371: cp = value[sym];
1372: ind = findsym(&cp);
1373: if (ind == -1 || ind == sym ||
1374: *cp != '\0' ||
1375: value[ind] == NULL ||
1376: value[ind] == value[sym])
1377: continue;
1378: debugsym("indir...", sym);
1379: value[sym] = value[ind];
1380: debugsym("...ectsym", sym);
1381: changed++;
1382: }
1383: } while (changed);
1384: }
1385:
1386: /*
1387: * Add a symbol to the symbol table, specified with the format sym=val
1388: */
1389: static void
1390: addsym1(bool ignorethis, bool definethis, char *symval)
1391: {
1392: const char *sym, *val;
1393:
1394: sym = symval;
1395: val = skipsym(sym);
1396: if (definethis && *val == '=') {
1397: symval[val - sym] = '\0';
1398: val = val + 1;
1399: } else if (*val == '\0') {
1400: val = definethis ? "1" : NULL;
1401: } else {
1402: usage();
1403: }
1404: addsym2(ignorethis, sym, val);
1405: }
1406:
1407: /*
1.7 deraadt 1408: * Add a symbol to the symbol table.
1409: */
1.8 deraadt 1410: static void
1.16 sthen 1411: addsym2(bool ignorethis, const char *sym, const char *val)
1.7 deraadt 1412: {
1.16 sthen 1413: const char *cp = sym;
1.7 deraadt 1414: int symind;
1415:
1.16 sthen 1416: symind = findsym(&cp);
1.8 deraadt 1417: if (symind < 0) {
1.7 deraadt 1418: if (nsyms >= MAXSYMS)
1419: errx(2, "too many symbols");
1420: symind = nsyms++;
1421: }
1.16 sthen 1422: ignore[symind] = ignorethis;
1.7 deraadt 1423: symname[symind] = sym;
1.16 sthen 1424: value[symind] = val;
1425: debugsym("addsym", symind);
1426: }
1427:
1428: static void
1429: debugsym(const char *why, int symind)
1430: {
1431: debug("%s %s%c%s", why, symname[symind],
1432: value[symind] ? '=' : ' ',
1433: value[symind] ? value[symind] : "undef");
1434: }
1435:
1436: /*
1437: * Add symbols to the symbol table from a file containing
1438: * #define and #undef preprocessor directives.
1439: */
1440: static void
1441: defundefile(const char *fn)
1442: {
1443: filename = fn;
1444: input = fopen(fn, "rb");
1445: if (input == NULL)
1446: err(2, "can't open %s", fn);
1447: linenum = 0;
1448: while (defundef())
1449: ;
1450: if (ferror(input))
1451: err(2, "can't read %s", filename);
1452: else
1453: fclose(input);
1454: if (incomment)
1455: error("EOF in comment");
1456: }
1457:
1458: /*
1459: * Read and process one #define or #undef directive
1460: */
1461: static bool
1462: defundef(void)
1463: {
1464: const char *cp, *kw, *sym, *val, *end;
1465:
1466: cp = skiphash();
1467: if (cp == NULL)
1468: return (false);
1469: if (*cp == '\0')
1470: goto done;
1471: /* strip trailing whitespace, and do a fairly rough check to
1472: avoid unsupported multi-line preprocessor directives */
1473: end = cp + strlen(cp);
1474: while (end > tline && strchr(" \t\n\r", end[-1]) != NULL)
1475: --end;
1476: if (end > tline && end[-1] == '\\')
1477: Eioccc();
1478:
1479: kw = cp;
1480: if ((cp = matchsym("define", kw)) != NULL) {
1481: sym = getsym(&cp);
1482: if (sym == NULL)
1483: error("missing macro name in #define");
1484: if (*cp == '(') {
1485: val = "1";
1486: } else {
1487: cp = skipcomment(cp);
1488: val = (cp < end) ? xstrdup(cp, end) : "";
1489: }
1490: debug("#define");
1491: addsym2(false, sym, val);
1492: } else if ((cp = matchsym("undef", kw)) != NULL) {
1493: sym = getsym(&cp);
1494: if (sym == NULL)
1495: error("missing macro name in #undef");
1496: cp = skipcomment(cp);
1497: debug("#undef");
1498: addsym2(false, sym, NULL);
1.7 deraadt 1499: } else {
1.16 sthen 1500: error("unrecognized preprocessor directive");
1.7 deraadt 1501: }
1.16 sthen 1502: skipline(cp);
1503: done:
1504: debug("parser line %d state %s comment %s line", linenum,
1505: comment_name[incomment], linestate_name[linestate]);
1506: return (true);
1507: }
1508:
1509: /*
1510: * Concatenate two strings into new memory, checking for failure.
1511: */
1512: static char *
1513: astrcat(const char *s1, const char *s2)
1514: {
1515: char *s;
1516: int len;
1517: size_t size;
1518:
1519: len = snprintf(NULL, 0, "%s%s", s1, s2);
1520: if (len < 0)
1521: err(2, "snprintf");
1522: size = (size_t)len + 1;
1.20 deraadt 1523: s = malloc(size);
1.16 sthen 1524: if (s == NULL)
1525: err(2, "malloc");
1526: snprintf(s, size, "%s%s", s1, s2);
1527: return (s);
1.7 deraadt 1528: }
1529:
1530: /*
1.16 sthen 1531: * Duplicate a segment of a string, checking for failure.
1.1 deraadt 1532: */
1.16 sthen 1533: static const char *
1534: xstrdup(const char *start, const char *end)
1.3 deraadt 1535: {
1.16 sthen 1536: size_t n;
1537: char *s;
1538:
1539: if (end < start) abort(); /* bug */
1540: n = (size_t)(end - start) + 1;
1541: s = malloc(n);
1542: if (s == NULL)
1543: err(2, "malloc");
1544: snprintf(s, n, "%s", start);
1545: return (s);
1.1 deraadt 1546: }
1547:
1.7 deraadt 1548: /*
1.8 deraadt 1549: * Diagnostics.
1.7 deraadt 1550: */
1.8 deraadt 1551: static void
1.7 deraadt 1552: debug(const char *msg, ...)
1.1 deraadt 1553: {
1.7 deraadt 1554: va_list ap;
1555:
1556: if (debugging) {
1557: va_start(ap, msg);
1558: vwarnx(msg, ap);
1559: va_end(ap);
1560: }
1.1 deraadt 1561: }
1562:
1.8 deraadt 1563: static void
1564: error(const char *msg)
1.7 deraadt 1565: {
1.8 deraadt 1566: if (depth == 0)
1.9 deraadt 1567: warnx("%s: %d: %s", filename, linenum, msg);
1.7 deraadt 1568: else
1.9 deraadt 1569: warnx("%s: %d: %s (#if line %d depth %d)",
1.8 deraadt 1570: filename, linenum, msg, stifline[depth], depth);
1.16 sthen 1571: closeio();
1.9 deraadt 1572: errx(2, "output may be truncated");
1.1 deraadt 1573: }