Annotation of src/usr.bin/unifdef/unifdef.c, Revision 1.11
1.11 ! avsm 1: /* $OpenBSD: unifdef.c,v 1.10 2003/06/03 02:56:21 millert Exp $ */
1.1 deraadt 2: /*
1.11 ! avsm 3: * Copyright (c) 2002, 2003 Tony Finch <dot@dotat.at>
1.1 deraadt 4: * Copyright (c) 1985, 1993
5: * The Regents of the University of California. All rights reserved.
6: *
7: * This code is derived from software contributed to Berkeley by
1.7 deraadt 8: * Dave Yost. Support for #if and #elif was added by Tony Finch.
1.1 deraadt 9: *
10: * Redistribution and use in source and binary forms, with or without
11: * modification, are permitted provided that the following conditions
12: * are met:
13: * 1. Redistributions of source code must retain the above copyright
14: * notice, this list of conditions and the following disclaimer.
15: * 2. Redistributions in binary form must reproduce the above copyright
16: * notice, this list of conditions and the following disclaimer in the
17: * documentation and/or other materials provided with the distribution.
1.10 millert 18: * 3. Neither the name of the University nor the names of its contributors
1.1 deraadt 19: * may be used to endorse or promote products derived from this software
20: * without specific prior written permission.
21: *
22: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32: * SUCH DAMAGE.
33: */
34:
35: #ifndef lint
1.7 deraadt 36: static const char copyright[] =
1.1 deraadt 37: "@(#) Copyright (c) 1985, 1993\n\
38: The Regents of the University of California. All rights reserved.\n";
39:
40: #if 0
41: static char sccsid[] = "@(#)unifdef.c 8.1 (Berkeley) 6/6/93";
42: #endif
1.11 ! avsm 43: static const char rcsid[] = "$OpenBSD: unifdef.c,v 1.10 2003/06/03 02:56:21 millert Exp $";
1.7 deraadt 44: #endif
1.1 deraadt 45:
46: /*
47: * unifdef - remove ifdef'ed lines
48: *
49: * Wishlist:
50: * provide an option which will append the name of the
51: * appropriate symbol after #else's and #endif's
52: * provide an option which will check symbols after
53: * #else's and #endif's to see that they match their
54: * corresponding #ifdef or #ifndef
1.7 deraadt 55: * generate #line directives in place of deleted code
1.9 deraadt 56: *
57: * The first two items above require better buffer handling, which would
58: * also make it possible to handle all "dodgy" directives correctly.
1.1 deraadt 59: */
60:
1.7 deraadt 61: #include <ctype.h>
62: #include <err.h>
63: #include <stdarg.h>
64: #include <stdbool.h>
1.1 deraadt 65: #include <stdio.h>
1.7 deraadt 66: #include <stdlib.h>
67: #include <string.h>
68: #include <unistd.h>
1.1 deraadt 69:
1.7 deraadt 70: /* types of input lines: */
71: typedef enum {
1.8 deraadt 72: LT_TRUEI, /* a true #if with ignore flag */
73: LT_FALSEI, /* a false #if with ignore flag */
74: LT_IF, /* an unknown #if */
1.7 deraadt 75: LT_TRUE, /* a true #if */
76: LT_FALSE, /* a false #if */
1.8 deraadt 77: LT_ELIF, /* an unknown #elif */
1.7 deraadt 78: LT_ELTRUE, /* a true #elif */
79: LT_ELFALSE, /* a false #elif */
80: LT_ELSE, /* #else */
81: LT_ENDIF, /* #endif */
1.9 deraadt 82: LT_DODGY, /* flag: directive is not on one line */
83: LT_DODGY_LAST = LT_DODGY + LT_ENDIF,
84: LT_PLAIN, /* ordinary line */
1.8 deraadt 85: LT_EOF, /* end of file */
86: LT_COUNT
1.7 deraadt 87: } Linetype;
88:
1.8 deraadt 89: static char const * const linetype_name[] = {
1.9 deraadt 90: "TRUEI", "FALSEI", "IF", "TRUE", "FALSE",
91: "ELIF", "ELTRUE", "ELFALSE", "ELSE", "ENDIF",
92: "DODGY TRUEI", "DODGY FALSEI",
93: "DODGY IF", "DODGY TRUE", "DODGY FALSE",
94: "DODGY ELIF", "DODGY ELTRUE", "DODGY ELFALSE",
95: "DODGY ELSE", "DODGY ENDIF",
96: "PLAIN", "EOF"
1.8 deraadt 97: };
1.7 deraadt 98:
1.8 deraadt 99: /* state of #if processing */
1.7 deraadt 100: typedef enum {
1.8 deraadt 101: IS_OUTSIDE,
102: IS_FALSE_PREFIX, /* false #if followed by false #elifs */
103: IS_TRUE_PREFIX, /* first non-false #(el)if is true */
104: IS_PASS_MIDDLE, /* first non-false #(el)if is unknown */
105: IS_FALSE_MIDDLE, /* a false #elif after a pass state */
106: IS_TRUE_MIDDLE, /* a true #elif after a pass state */
107: IS_PASS_ELSE, /* an else after a pass state */
108: IS_FALSE_ELSE, /* an else after a true state */
109: IS_TRUE_ELSE, /* an else after only false states */
110: IS_FALSE_TRAILER, /* #elifs after a true are false */
111: IS_COUNT
112: } Ifstate;
113:
114: static char const * const ifstate_name[] = {
115: "OUTSIDE", "FALSE_PREFIX", "TRUE_PREFIX",
116: "PASS_MIDDLE", "FALSE_MIDDLE", "TRUE_MIDDLE",
117: "PASS_ELSE", "FALSE_ELSE", "TRUE_ELSE",
118: "FALSE_TRAILER"
119: };
120:
121: /* state of comment parser */
122: typedef enum {
123: NO_COMMENT = false, /* outside a comment */
124: C_COMMENT, /* in a comment like this one */
125: CXX_COMMENT, /* between // and end of line */
126: STARTING_COMMENT, /* just after slash-backslash-newline */
127: FINISHING_COMMENT /* star-backslash-newline in a C comment */
1.7 deraadt 128: } Comment_state;
129:
1.8 deraadt 130: static char const * const comment_name[] = {
131: "NO", "C", "CXX", "STARTING", "FINISHING"
1.1 deraadt 132: };
1.7 deraadt 133:
1.8 deraadt 134: /* state of preprocessor line parser */
135: typedef enum {
136: LS_START, /* only space and comments on this line */
137: LS_HASH, /* only space, comments, and a hash */
138: LS_DIRTY /* this line can't be a preprocessor line */
139: } Line_state;
1.7 deraadt 140:
1.8 deraadt 141: static char const * const linestate_name[] = {
142: "START", "HASH", "DIRTY"
143: };
1.7 deraadt 144:
145: /*
1.8 deraadt 146: * Minimum translation limits from ISO/IEC 9899:1999 5.2.4.1
1.7 deraadt 147: */
1.8 deraadt 148: #define MAXDEPTH 64 /* maximum #if nesting */
149: #define MAXLINE 4096 /* maximum length of line */
150: #define MAXSYMS 4096 /* maximum number of symbols */
1.7 deraadt 151:
152: /*
1.9 deraadt 153: * Sometimes when editing a keyword the replacement text is longer, so
154: * we leave some space at the end of the tline buffer to accommodate this.
155: */
156: #define EDITSLOP 10
157:
158: /*
1.8 deraadt 159: * Globals.
1.7 deraadt 160: */
161:
1.8 deraadt 162: static bool complement; /* -c: do the complement */
163: static bool debugging; /* -d: debugging reports */
1.9 deraadt 164: static bool iocccok; /* -e: fewer IOCCC errors */
1.8 deraadt 165: static bool killconsts; /* -k: eval constant #ifs */
166: static bool lnblank; /* -l: blank deleted lines */
167: static bool symlist; /* -s: output symbol list */
168: static bool text; /* -t: this is a text file */
169:
170: static const char *symname[MAXSYMS]; /* symbol name */
171: static const char *value[MAXSYMS]; /* -Dsym=value */
172: static bool ignore[MAXSYMS]; /* -iDsym or -iUsym */
173: static int nsyms; /* number of symbols */
174:
175: static FILE *input; /* input file pointer */
176: static const char *filename; /* input file name */
177: static int linenum; /* current line number */
178:
1.9 deraadt 179: static char tline[MAXLINE+EDITSLOP];/* input buffer plus space */
1.8 deraadt 180: static char *keyword; /* used for editing #elif's */
181:
182: static Comment_state incomment; /* comment parser state */
183: static Line_state linestate; /* #if line parser state */
184: static Ifstate ifstate[MAXDEPTH]; /* #if processor state */
185: static bool ignoring[MAXDEPTH]; /* ignore comments state */
186: static int stifline[MAXDEPTH]; /* start of current #if */
187: static int depth; /* current #if nesting */
188: static bool keepthis; /* don't delete constant #if */
189:
190: static int exitstat; /* program exit status */
191:
192: static void addsym(bool, bool, char *);
193: static void debug(const char *, ...);
194: static void error(const char *);
195: static int findsym(const char *);
196: static void flushline(bool);
197: static Linetype getline(void);
198: static Linetype ifeval(const char **);
1.9 deraadt 199: static void ignoreoff(void);
200: static void ignoreon(void);
201: static void keywordedit(const char *);
1.8 deraadt 202: static void nest(void);
203: static void process(void);
204: static const char *skipcomment(const char *);
205: static const char *skipsym(const char *);
206: static void state(Ifstate);
207: static int strlcmp(const char *, const char *, size_t);
208: static void usage(void);
1.7 deraadt 209:
210: #define endsym(c) (!isalpha((unsigned char)c) && !isdigit((unsigned char)c) && c != '_')
211:
1.8 deraadt 212: /*
213: * The main program.
214: */
1.7 deraadt 215: int
216: main(int argc, char *argv[])
217: {
218: int opt;
219:
1.11 ! avsm 220: while ((opt = getopt(argc, argv, "i:D:U:I:cdeklst")) != -1)
1.7 deraadt 221: switch (opt) {
222: case 'i': /* treat stuff controlled by these symbols as text */
223: /*
224: * For strict backwards-compatibility the U or D
225: * should be immediately after the -i but it doesn't
226: * matter much if we relax that requirement.
227: */
228: opt = *optarg++;
229: if (opt == 'D')
230: addsym(true, true, optarg);
231: else if (opt == 'U')
232: addsym(true, false, optarg);
233: else
234: usage();
235: break;
236: case 'D': /* define a symbol */
237: addsym(false, true, optarg);
238: break;
239: case 'U': /* undef a symbol */
240: addsym(false, false, optarg);
241: break;
1.11 ! avsm 242: case 'I':
! 243: /* no-op for compatibility with cpp */
! 244: break;
1.7 deraadt 245: case 'c': /* treat -D as -U and vice versa */
246: complement = true;
247: break;
1.8 deraadt 248: case 'd':
249: debugging = true;
250: break;
1.9 deraadt 251: case 'e': /* fewer errors from dodgy lines */
252: iocccok = true;
253: break;
1.7 deraadt 254: case 'k': /* process constant #ifs */
255: killconsts = true;
256: break;
257: case 'l': /* blank deleted lines instead of omitting them */
258: lnblank = true;
259: break;
260: case 's': /* only output list of symbols that control #ifs */
261: symlist = true;
262: break;
1.8 deraadt 263: case 't': /* don't parse C comments */
1.7 deraadt 264: text = true;
265: break;
266: default:
267: usage();
268: }
269: argc -= optind;
270: argv += optind;
1.8 deraadt 271: if (nsyms == 0 && !symlist) {
1.7 deraadt 272: warnx("must -D or -U at least one symbol");
273: usage();
274: }
275: if (argc > 1) {
276: errx(2, "can only do one file");
277: } else if (argc == 1 && strcmp(*argv, "-") != 0) {
278: filename = *argv;
279: if ((input = fopen(filename, "r")) != NULL) {
1.8 deraadt 280: process();
1.7 deraadt 281: (void) fclose(input);
282: } else
283: err(2, "can't open %s", *argv);
284: } else {
285: filename = "[stdin]";
286: input = stdin;
1.8 deraadt 287: process();
1.7 deraadt 288: }
289:
1.8 deraadt 290: exit(exitstat);
1.7 deraadt 291: }
1.1 deraadt 292:
1.8 deraadt 293: static void
1.7 deraadt 294: usage(void)
1.1 deraadt 295: {
1.9 deraadt 296: fprintf(stderr, "usage: unifdef [-cdeklst]"
297: " [-Dsym[=val]] [-Usym] [-iDsym[=val]] [-iUsym] ... [file]\n");
1.8 deraadt 298: exit(2);
299: }
300:
301: /*
302: * A state transition function alters the global #if processing state
303: * in a particular way. The table below is indexed by the current
304: * processing state and the type of the current line. A NULL entry
1.9 deraadt 305: * indicates that processing is complete.
1.8 deraadt 306: *
307: * Nesting is handled by keeping a stack of states; some transition
1.9 deraadt 308: * functions increase or decrease the depth. They also maintain the
1.8 deraadt 309: * ignore state on a stack. In some complicated cases they have to
310: * alter the preprocessor directive, as follows.
311: *
312: * When we have processed a group that starts off with a known-false
313: * #if/#elif sequence (which has therefore been deleted) followed by a
1.9 deraadt 314: * #elif that we don't understand and therefore must keep, we edit the
1.8 deraadt 315: * latter into a #if to keep the nesting correct.
316: *
317: * When we find a true #elif in a group, the following block will
318: * always be kept and the rest of the sequence after the next #elif or
1.9 deraadt 319: * #else will be discarded. We edit the #elif into a #else and the
1.8 deraadt 320: * following directive to #endif since this has the desired behaviour.
1.9 deraadt 321: *
322: * "Dodgy" directives are split across multiple lines, the most common
323: * example being a multi-line comment hanging off the right of the
324: * directive. We can handle them correctly only if there is no change
325: * from printing to dropping (or vice versa) caused by that directive.
326: * If the directive is the first of a group we have a choice between
327: * failing with an error, or passing it through unchanged instead of
328: * evaluating it. The latter is not the default to avoid questions from
329: * users about unifdef unexpectedly leaving behind preprocessor directives.
1.8 deraadt 330: */
331: typedef void state_fn(void);
332:
333: /* report an error */
334: static void
335: Eelif(void)
336: {
337: error("Inappropriate #elif");
338: }
339:
340: static void
341: Eelse(void)
342: {
343: error("Inappropriate #else");
344: }
345:
346: static void
347: Eendif(void)
348: {
349: error("Inappropriate #endif");
350: }
351:
352: static void
353: Eeof(void)
354: {
355: error("Premature EOF");
356: }
357:
358: static void
359: Eioccc(void)
360: {
361: error("Obfuscated preprocessor control line");
362: }
363:
364: /* plain line handling */
365: static void
366: print(void)
367: {
368: flushline(true);
369: }
370:
371: static void
372: drop(void)
373: {
374: flushline(false);
375: }
376:
377: /* output lacks group's start line */
378: static void
379: Strue(void)
380: {
381: drop();
1.9 deraadt 382: ignoreoff();
1.8 deraadt 383: state(IS_TRUE_PREFIX);
384: }
385:
386: static void
387: Sfalse(void)
388: {
389: drop();
1.9 deraadt 390: ignoreoff();
1.8 deraadt 391: state(IS_FALSE_PREFIX);
392: }
393:
394: static void
395: Selse(void)
396: {
397: drop();
398: state(IS_TRUE_ELSE);
399: }
400:
401: /* print/pass this block */
402: static void
403: Pelif(void)
404: {
405: print();
1.9 deraadt 406: ignoreoff();
1.8 deraadt 407: state(IS_PASS_MIDDLE);
408: }
409:
410: static void
411: Pelse(void)
412: {
413: print();
414: state(IS_PASS_ELSE);
415: }
416:
417: static void
418: Pendif(void)
419: {
420: print();
421: --depth;
422: }
423:
424: /* discard this block */
425: static void
426: Dfalse(void)
427: {
428: drop();
1.9 deraadt 429: ignoreoff();
1.8 deraadt 430: state(IS_FALSE_TRAILER);
431: }
432:
433: static void
434: Delif(void)
435: {
436: drop();
1.9 deraadt 437: ignoreoff();
1.8 deraadt 438: state(IS_FALSE_MIDDLE);
439: }
440:
441: static void
442: Delse(void)
443: {
444: drop();
445: state(IS_FALSE_ELSE);
446: }
447:
448: static void
449: Dendif(void)
450: {
451: drop();
452: --depth;
453: }
454:
455: /* first line of group */
456: static void
457: Fdrop(void)
458: {
459: nest();
460: Dfalse();
461: }
462:
463: static void
464: Fpass(void)
465: {
466: nest();
467: Pelif();
468: }
469:
470: static void
471: Ftrue(void)
472: {
473: nest();
474: Strue();
475: }
476:
477: static void
478: Ffalse(void)
479: {
480: nest();
481: Sfalse();
482: }
483:
1.9 deraadt 484: /* variable pedantry for obfuscated lines */
485: static void
486: Oiffy(void)
487: {
488: if (iocccok)
489: Fpass();
490: else
491: Eioccc();
492: ignoreon();
493: }
494:
495: static void
496: Oif(void)
497: {
498: if (iocccok)
499: Fpass();
500: else
501: Eioccc();
502: }
503:
504: static void
505: Oelif(void)
506: {
507: if (iocccok)
508: Pelif();
509: else
510: Eioccc();
511: }
512:
1.8 deraadt 513: /* ignore comments in this block */
514: static void
515: Idrop(void)
516: {
517: Fdrop();
1.9 deraadt 518: ignoreon();
1.8 deraadt 519: }
520:
521: static void
1.9 deraadt 522: Itrue(void)
523: {
1.8 deraadt 524: Ftrue();
1.9 deraadt 525: ignoreon();
1.8 deraadt 526: }
527:
528: static void
529: Ifalse(void)
530: {
531: Ffalse();
1.9 deraadt 532: ignoreon();
1.8 deraadt 533: }
534:
1.9 deraadt 535: /* edit this line */
1.8 deraadt 536: static void
537: Mpass (void)
538: {
539: strncpy(keyword, "if ", 4);
540: Pelif();
541: }
542:
543: static void
544: Mtrue (void)
545: {
1.9 deraadt 546: keywordedit("else\n");
1.8 deraadt 547: state(IS_TRUE_MIDDLE);
548: }
549:
550: static void
551: Melif (void)
552: {
1.9 deraadt 553: keywordedit("endif\n");
1.8 deraadt 554: state(IS_FALSE_TRAILER);
555: }
556:
557: static void
558: Melse (void)
559: {
1.9 deraadt 560: keywordedit("endif\n");
1.8 deraadt 561: state(IS_FALSE_ELSE);
562: }
563:
564: static state_fn * const trans_table[IS_COUNT][LT_COUNT] = {
565: /* IS_OUTSIDE */
1.9 deraadt 566: { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Eendif,
567: Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eendif,
568: print, NULL },
1.8 deraadt 569: /* IS_FALSE_PREFIX */
1.9 deraadt 570: { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Strue, Sfalse,Selse, Dendif,
571: Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Eioccc,Eioccc,Eioccc,Eioccc,
572: drop, Eeof },
1.8 deraadt 573: /* IS_TRUE_PREFIX */
1.9 deraadt 574: { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Dfalse,Dfalse,Dfalse,Delse, Dendif,
575: Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
576: print, Eeof },
1.8 deraadt 577: /* IS_PASS_MIDDLE */
1.9 deraadt 578: { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Pelif, Mtrue, Delif, Pelse, Pendif,
579: Oiffy, Oiffy, Fpass, Oif, Oif, Pelif, Oelif, Oelif, Pelse, Pendif,
580: print, Eeof },
1.8 deraadt 581: /* IS_FALSE_MIDDLE */
1.9 deraadt 582: { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Pelif, Mtrue, Delif, Pelse, Pendif,
583: Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
584: drop, Eeof },
1.8 deraadt 585: /* IS_TRUE_MIDDLE */
1.9 deraadt 586: { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Melif, Melif, Melif, Melse, Pendif,
587: Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Pendif,
588: print, Eeof },
1.8 deraadt 589: /* IS_PASS_ELSE */
1.9 deraadt 590: { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Pendif,
591: Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Pendif,
592: print, Eeof },
1.8 deraadt 593: /* IS_FALSE_ELSE */
1.9 deraadt 594: { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Dendif,
595: Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Eioccc,
596: drop, Eeof },
1.8 deraadt 597: /* IS_TRUE_ELSE */
1.9 deraadt 598: { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Dendif,
599: Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eioccc,
600: print, Eeof },
1.8 deraadt 601: /* IS_FALSE_TRAILER */
1.9 deraadt 602: { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Dendif,
603: Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Eioccc,
604: drop, Eeof }
605: /*TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF
606: TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF (DODGY)
607: PLAIN EOF */
1.8 deraadt 608: };
609:
610: /*
611: * State machine utility functions
612: */
613: static void
1.9 deraadt 614: ignoreoff(void)
615: {
616: ignoring[depth] = ignoring[depth-1];
617: }
618:
619: static void
620: ignoreon(void)
621: {
622: ignoring[depth] = true;
623: }
624:
625: static void
626: keywordedit(const char *replacement)
627: {
628: strlcpy(keyword, replacement, tline + sizeof(tline) - keyword);
629: print();
630: }
631:
632: static void
1.8 deraadt 633: nest(void)
634: {
635: depth += 1;
636: if (depth >= MAXDEPTH)
637: error("Too many levels of nesting");
638: stifline[depth] = linenum;
639: }
640:
641: static void
642: state(Ifstate is)
643: {
644: ifstate[depth] = is;
645: }
646:
1.7 deraadt 647: /*
1.8 deraadt 648: * Write a line to the output or not, according to command line options.
649: */
650: static void
651: flushline(bool keep)
652: {
653: if (symlist)
654: return;
655: if (keep ^ complement)
656: fputs(tline, stdout);
657: else {
658: if (lnblank)
659: putc('\n', stdout);
660: exitstat = 1;
1.7 deraadt 661: }
662: }
1.3 deraadt 663:
1.7 deraadt 664: /*
1.8 deraadt 665: * The driver for the state machine.
1.7 deraadt 666: */
1.8 deraadt 667: static void
668: process(void)
1.7 deraadt 669: {
670: Linetype lineval;
1.8 deraadt 671: state_fn *trans;
1.7 deraadt 672:
673: for (;;) {
674: linenum++;
1.8 deraadt 675: lineval = getline();
676: trans = trans_table[ifstate[depth]][lineval];
677: if (trans == NULL)
678: break;
679: trans();
680: debug("process %s -> %s depth %d",
681: linetype_name[lineval],
682: ifstate_name[ifstate[depth]], depth);
1.1 deraadt 683: }
1.8 deraadt 684: if (incomment)
685: error("EOF in comment");
1.1 deraadt 686: }
687:
1.7 deraadt 688: /*
1.8 deraadt 689: * Parse a line and determine its type. We keep the preprocessor line
690: * parser state between calls in a global variable.
1.7 deraadt 691: */
1.8 deraadt 692: static Linetype
693: getline(void)
1.3 deraadt 694: {
1.7 deraadt 695: const char *cp;
1.8 deraadt 696: int cursym;
697: int kwlen;
1.3 deraadt 698: Linetype retval;
1.8 deraadt 699: Comment_state wascomment;
1.3 deraadt 700:
1.8 deraadt 701: if (fgets(tline, MAXLINE, input) == NULL)
702: return (LT_EOF);
1.3 deraadt 703: retval = LT_PLAIN;
1.8 deraadt 704: wascomment = incomment;
1.7 deraadt 705: cp = skipcomment(tline);
1.8 deraadt 706: if (linestate == LS_START) {
707: if (*cp == '#') {
708: linestate = LS_HASH;
709: cp = skipcomment(cp + 1);
710: } else if (*cp != '\0')
711: linestate = LS_DIRTY;
712: }
713: if (!incomment && linestate == LS_HASH) {
714: keyword = tline + (cp - tline);
715: cp = skipsym(cp);
716: kwlen = cp - keyword;
1.9 deraadt 717: /* no way can we deal with a continuation inside a keyword */
1.8 deraadt 718: if (strncmp(cp, "\\\n", 2) == 0)
719: Eioccc();
720: if (strlcmp("ifdef", keyword, kwlen) == 0 ||
721: strlcmp("ifndef", keyword, kwlen) == 0) {
722: cp = skipcomment(cp);
723: if ((cursym = findsym(cp)) < 0)
724: retval = LT_IF;
725: else {
726: retval = (keyword[2] == 'n')
727: ? LT_FALSE : LT_TRUE;
728: if (value[cursym] == NULL)
729: retval = (retval == LT_TRUE)
730: ? LT_FALSE : LT_TRUE;
731: if (ignore[cursym])
732: retval = (retval == LT_TRUE)
733: ? LT_TRUEI : LT_FALSEI;
734: }
735: cp = skipsym(cp);
736: } else if (strlcmp("if", keyword, kwlen) == 0)
737: retval = ifeval(&cp);
738: else if (strlcmp("elif", keyword, kwlen) == 0)
739: retval = ifeval(&cp) - LT_IF + LT_ELIF;
740: else if (strlcmp("else", keyword, kwlen) == 0)
741: retval = LT_ELSE;
742: else if (strlcmp("endif", keyword, kwlen) == 0)
743: retval = LT_ENDIF;
744: else {
745: linestate = LS_DIRTY;
1.7 deraadt 746: retval = LT_PLAIN;
747: }
748: cp = skipcomment(cp);
1.8 deraadt 749: if (*cp != '\0') {
750: linestate = LS_DIRTY;
751: if (retval == LT_TRUE || retval == LT_FALSE ||
752: retval == LT_TRUEI || retval == LT_FALSEI)
753: retval = LT_IF;
754: if (retval == LT_ELTRUE || retval == LT_ELFALSE)
755: retval = LT_ELIF;
1.3 deraadt 756: }
1.9 deraadt 757: if (retval != LT_PLAIN && (wascomment || incomment)) {
758: retval += LT_DODGY;
759: if (incomment)
760: linestate = LS_DIRTY;
761: }
762: /* skipcomment should have changed the state */
1.8 deraadt 763: if (linestate == LS_HASH)
764: abort(); /* bug */
1.7 deraadt 765: }
1.8 deraadt 766: if (linestate == LS_DIRTY) {
767: while (*cp != '\0')
768: cp = skipcomment(cp + 1);
769: }
770: debug("parser %s comment %s line",
771: comment_name[incomment], linestate_name[linestate]);
1.7 deraadt 772: return (retval);
773: }
774:
775: /*
1.9 deraadt 776: * These are the operators that are supported by the expression
777: * evaluator. Note that if support for division is added then we also
778: * need short-circuiting booleans because of divide-by-zero.
1.7 deraadt 779: */
1.8 deraadt 780: static int
781: op_lt(int a, int b)
782: {
783: return (a < b);
784: }
785:
786: static int
787: op_gt(int a, int b)
788: {
789: return (a > b);
790: }
791:
792: static int
793: op_le(int a, int b)
794: {
795: return (a <= b);
796: }
797:
798: static int
799: op_ge(int a, int b)
800: {
801: return (a >= b);
802: }
803:
804: static int
805: op_eq(int a, int b)
806: {
807: return (a == b);
808: }
809:
810: static int
811: op_ne(int a, int b)
1.7 deraadt 812: {
1.8 deraadt 813: return (a != b);
814: }
815:
816: static int
817: op_or(int a, int b)
818: {
819: return (a || b);
820: }
821:
822: static int
823: op_and(int a, int b)
824: {
825: return (a && b);
1.7 deraadt 826: }
827:
828: /*
1.8 deraadt 829: * An evaluation function takes three arguments, as follows: (1) a pointer to
830: * an element of the precedence table which lists the operators at the current
831: * level of precedence; (2) a pointer to an integer which will receive the
832: * value of the expression; and (3) a pointer to a char* that points to the
833: * expression to be evaluated and that is updated to the end of the expression
834: * when evaluation is complete. The function returns LT_FALSE if the value of
835: * the expression is zero, LT_TRUE if it is non-zero, or LT_IF if the
836: * expression could not be evaluated.
1.7 deraadt 837: */
1.8 deraadt 838: struct ops;
839:
840: typedef Linetype eval_fn(const struct ops *, int *, const char **);
841:
842: static eval_fn eval_table, eval_unary;
843:
844: /*
845: * The precedence table. Expressions involving binary operators are evaluated
846: * in a table-driven way by eval_table. When it evaluates a subexpression it
847: * calls the inner function with its first argument pointing to the next
848: * element of the table. Innermost expressions have special non-table-driven
849: * handling.
850: */
851: static const struct ops {
852: eval_fn *inner;
853: struct op {
854: const char *str;
855: int (*fn)(int, int);
856: } op[5];
857: } eval_ops[] = {
858: { eval_table, { { "||", op_or } } },
859: { eval_table, { { "&&", op_and } } },
860: { eval_table, { { "==", op_eq },
861: { "!=", op_ne } } },
862: { eval_unary, { { "<=", op_le },
863: { ">=", op_ge },
864: { "<", op_lt },
865: { ">", op_gt } } }
866: };
1.7 deraadt 867:
868: /*
869: * Function for evaluating the innermost parts of expressions,
870: * viz. !expr (expr) defined(symbol) symbol number
871: * We reset the keepthis flag when we find a non-constant subexpression.
872: */
1.8 deraadt 873: static Linetype
874: eval_unary(const struct ops *ops, int *valp, const char **cpp)
1.7 deraadt 875: {
876: const char *cp;
877: char *ep;
878: int sym;
879:
880: cp = skipcomment(*cpp);
1.8 deraadt 881: if (*cp == '!') {
1.7 deraadt 882: debug("eval%d !", ops - eval_ops);
883: cp++;
884: if (eval_unary(ops, valp, &cp) == LT_IF)
885: return (LT_IF);
886: *valp = !*valp;
887: } else if (*cp == '(') {
888: cp++;
889: debug("eval%d (", ops - eval_ops);
890: if (eval_table(eval_ops, valp, &cp) == LT_IF)
891: return (LT_IF);
892: cp = skipcomment(cp);
893: if (*cp++ != ')')
894: return (LT_IF);
895: } else if (isdigit((unsigned char)*cp)) {
896: debug("eval%d number", ops - eval_ops);
897: *valp = strtol(cp, &ep, 0);
898: cp = skipsym(cp);
899: } else if (strncmp(cp, "defined", 7) == 0 && endsym(cp[7])) {
900: cp = skipcomment(cp+7);
901: debug("eval%d defined", ops - eval_ops);
902: if (*cp++ != '(')
903: return (LT_IF);
904: cp = skipcomment(cp);
905: sym = findsym(cp);
1.8 deraadt 906: if (sym < 0 && !symlist)
1.7 deraadt 907: return (LT_IF);
908: *valp = (value[sym] != NULL);
909: cp = skipsym(cp);
910: cp = skipcomment(cp);
911: if (*cp++ != ')')
912: return (LT_IF);
913: keepthis = false;
914: } else if (!endsym(*cp)) {
915: debug("eval%d symbol", ops - eval_ops);
916: sym = findsym(cp);
1.8 deraadt 917: if (sym < 0 && !symlist)
1.7 deraadt 918: return (LT_IF);
919: if (value[sym] == NULL)
920: *valp = 0;
921: else {
922: *valp = strtol(value[sym], &ep, 0);
923: if (*ep != '\0' || ep == value[sym])
924: return (LT_IF);
925: }
926: cp = skipsym(cp);
927: keepthis = false;
928: } else
929: return (LT_IF);
930:
931: *cpp = cp;
932: debug("eval%d = %d", ops - eval_ops, *valp);
933: return (*valp ? LT_TRUE : LT_FALSE);
934: }
935:
936: /*
937: * Table-driven evaluation of binary operators.
938: */
1.8 deraadt 939: static Linetype
940: eval_table(const struct ops *ops, int *valp, const char **cpp)
1.7 deraadt 941: {
1.8 deraadt 942: const struct op *op;
1.7 deraadt 943: const char *cp;
944: int val;
945:
946: debug("eval%d", ops - eval_ops);
947: cp = *cpp;
948: if (ops->inner(ops+1, valp, &cp) == LT_IF)
949: return (LT_IF);
950: for (;;) {
951: cp = skipcomment(cp);
952: for (op = ops->op; op->str != NULL; op++)
953: if (strncmp(cp, op->str, strlen(op->str)) == 0)
954: break;
955: if (op->str == NULL)
956: break;
957: cp += strlen(op->str);
958: debug("eval%d %s", ops - eval_ops, op->str);
959: if (ops->inner(ops+1, &val, &cp) == LT_IF)
1.8 deraadt 960: return (LT_IF);
1.7 deraadt 961: *valp = op->fn(*valp, val);
962: }
963:
964: *cpp = cp;
965: debug("eval%d = %d", ops - eval_ops, *valp);
966: return (*valp ? LT_TRUE : LT_FALSE);
1.1 deraadt 967: }
1.7 deraadt 968:
1.1 deraadt 969: /*
1.7 deraadt 970: * Evaluate the expression on a #if or #elif line. If we can work out
971: * the result we return LT_TRUE or LT_FALSE accordingly, otherwise we
1.8 deraadt 972: * return just a generic LT_IF.
1.1 deraadt 973: */
1.8 deraadt 974: static Linetype
1.7 deraadt 975: ifeval(const char **cpp)
976: {
1.8 deraadt 977: int ret;
1.7 deraadt 978: int val;
979:
980: debug("eval %s", *cpp);
981: keepthis = killconsts ? false : true;
1.8 deraadt 982: ret = eval_table(eval_ops, &val, cpp);
983: return (keepthis ? LT_IF : ret);
1.7 deraadt 984: }
985:
986: /*
987: * Skip over comments and stop at the next character position that is
1.11 ! avsm 988: * not whitespace. Between calls we keep the comment state in the
! 989: * global variable incomment, and we also adjust the global variable
! 990: * linestate when we see a newline.
1.8 deraadt 991: * XXX: doesn't cope with the buffer splitting inside a state transition.
1.7 deraadt 992: */
1.8 deraadt 993: static const char *
1.7 deraadt 994: skipcomment(const char *cp)
1.3 deraadt 995: {
1.8 deraadt 996: if (text || ignoring[depth]) {
1.11 ! avsm 997: for (; isspace((unsigned char)*cp); cp++)
! 998: if (*cp == '\n')
! 999: linestate = LS_START;
1.8 deraadt 1000: return (cp);
1001: }
1002: while (*cp != '\0')
1003: if (strncmp(cp, "\\\n", 2) == 0)
1004: cp += 2;
1005: else switch (incomment) {
1006: case NO_COMMENT:
1007: if (strncmp(cp, "/\\\n", 3) == 0) {
1008: incomment = STARTING_COMMENT;
1009: cp += 3;
1010: } else if (strncmp(cp, "/*", 2) == 0) {
1.3 deraadt 1011: incomment = C_COMMENT;
1.8 deraadt 1012: cp += 2;
1013: } else if (strncmp(cp, "//", 2) == 0) {
1014: incomment = CXX_COMMENT;
1015: cp += 2;
1016: } else if (strncmp(cp, "\n", 1) == 0) {
1017: linestate = LS_START;
1018: cp += 1;
1019: } else if (strchr(" \t", *cp) != NULL) {
1020: cp += 1;
1021: } else
1022: return (cp);
1023: continue;
1024: case CXX_COMMENT:
1025: if (strncmp(cp, "\n", 1) == 0) {
1026: incomment = NO_COMMENT;
1027: linestate = LS_START;
1.3 deraadt 1028: }
1.8 deraadt 1029: cp += 1;
1030: continue;
1031: case C_COMMENT:
1032: if (strncmp(cp, "*\\\n", 3) == 0) {
1033: incomment = FINISHING_COMMENT;
1034: cp += 3;
1035: } else if (strncmp(cp, "*/", 2) == 0) {
1036: incomment = NO_COMMENT;
1037: cp += 2;
1038: } else
1039: cp += 1;
1040: continue;
1041: case STARTING_COMMENT:
1042: if (*cp == '*') {
1043: incomment = C_COMMENT;
1044: cp += 1;
1045: } else if (*cp == '/') {
1.3 deraadt 1046: incomment = CXX_COMMENT;
1.8 deraadt 1047: cp += 1;
1048: } else {
1049: incomment = NO_COMMENT;
1050: linestate = LS_DIRTY;
1.3 deraadt 1051: }
1.8 deraadt 1052: continue;
1053: case FINISHING_COMMENT:
1054: if (*cp == '/') {
1055: incomment = NO_COMMENT;
1056: cp += 1;
1057: } else
1058: incomment = C_COMMENT;
1059: continue;
1060: default:
1061: /* bug */
1062: abort();
1.3 deraadt 1063: }
1.8 deraadt 1064: return (cp);
1.1 deraadt 1065: }
1.7 deraadt 1066:
1067: /*
1068: * Skip over an identifier.
1069: */
1.8 deraadt 1070: static const char *
1.7 deraadt 1071: skipsym(const char *cp)
1072: {
1073: while (!endsym(*cp))
1074: ++cp;
1075: return (cp);
1076: }
1077:
1.1 deraadt 1078: /*
1.7 deraadt 1079: * Look for the symbol in the symbol table. If is is found, we return
1.8 deraadt 1080: * the symbol table index, else we return -1.
1.1 deraadt 1081: */
1.8 deraadt 1082: static int
1.7 deraadt 1083: findsym(const char *str)
1.1 deraadt 1084: {
1.7 deraadt 1085: const char *cp;
1.3 deraadt 1086: int symind;
1087:
1.8 deraadt 1088: cp = skipsym(str);
1089: if (cp == str)
1090: return (-1);
1091: if (symlist)
1.7 deraadt 1092: printf("%.*s\n", (int)(cp-str), str);
1.8 deraadt 1093: for (symind = 0; symind < nsyms; ++symind) {
1094: if (strlcmp(symname[symind], str, cp-str) == 0) {
1.7 deraadt 1095: debug("findsym %s %s", symname[symind],
1096: value[symind] ? value[symind] : "");
1097: return (symind);
1.3 deraadt 1098: }
1.1 deraadt 1099: }
1.8 deraadt 1100: return (-1);
1.1 deraadt 1101: }
1.7 deraadt 1102:
1.1 deraadt 1103: /*
1.7 deraadt 1104: * Add a symbol to the symbol table.
1105: */
1.8 deraadt 1106: static void
1.7 deraadt 1107: addsym(bool ignorethis, bool definethis, char *sym)
1108: {
1109: int symind;
1110: char *val;
1111:
1112: symind = findsym(sym);
1.8 deraadt 1113: if (symind < 0) {
1.7 deraadt 1114: if (nsyms >= MAXSYMS)
1115: errx(2, "too many symbols");
1116: symind = nsyms++;
1117: }
1118: symname[symind] = sym;
1119: ignore[symind] = ignorethis;
1.8 deraadt 1120: val = sym + (skipsym(sym) - sym);
1.7 deraadt 1121: if (definethis) {
1122: if (*val == '=') {
1123: value[symind] = val+1;
1124: *val = '\0';
1125: } else if (*val == '\0')
1126: value[symind] = "";
1127: else
1128: usage();
1129: } else {
1130: if (*val != '\0')
1131: usage();
1132: value[symind] = NULL;
1133: }
1134: }
1135:
1136: /*
1.8 deraadt 1137: * Compare s with n characters of t.
1138: * The same as strncmp() except that it checks that s[n] == '\0'.
1.1 deraadt 1139: */
1.8 deraadt 1140: static int
1141: strlcmp(const char *s, const char *t, size_t n)
1.3 deraadt 1142: {
1.8 deraadt 1143: while (n-- && *t != '\0')
1144: if (*s != *t)
1145: return ((unsigned char)*s - (unsigned char)*t);
1146: else
1147: ++s, ++t;
1148: return ((unsigned char)*s);
1.1 deraadt 1149: }
1150:
1.7 deraadt 1151: /*
1.8 deraadt 1152: * Diagnostics.
1.7 deraadt 1153: */
1.8 deraadt 1154: static void
1.7 deraadt 1155: debug(const char *msg, ...)
1.1 deraadt 1156: {
1.7 deraadt 1157: va_list ap;
1158:
1159: if (debugging) {
1160: va_start(ap, msg);
1161: vwarnx(msg, ap);
1162: va_end(ap);
1163: }
1.1 deraadt 1164: }
1165:
1.8 deraadt 1166: static void
1167: error(const char *msg)
1.7 deraadt 1168: {
1.8 deraadt 1169: if (depth == 0)
1.9 deraadt 1170: warnx("%s: %d: %s", filename, linenum, msg);
1.7 deraadt 1171: else
1.9 deraadt 1172: warnx("%s: %d: %s (#if line %d depth %d)",
1.8 deraadt 1173: filename, linenum, msg, stifline[depth], depth);
1.9 deraadt 1174: errx(2, "output may be truncated");
1.1 deraadt 1175: }