Annotation of src/usr.bin/unifdef/unifdef.c, Revision 1.9
1.9 ! deraadt 1: /* $OpenBSD: unifdef.c,v 1.8 2003/01/18 23:42:51 deraadt Exp $ */
1.1 deraadt 2: /*
3: * Copyright (c) 1985, 1993
4: * The Regents of the University of California. All rights reserved.
5: *
6: * This code is derived from software contributed to Berkeley by
1.7 deraadt 7: * Dave Yost. Support for #if and #elif was added by Tony Finch.
1.1 deraadt 8: *
9: * Redistribution and use in source and binary forms, with or without
10: * modification, are permitted provided that the following conditions
11: * are met:
12: * 1. Redistributions of source code must retain the above copyright
13: * notice, this list of conditions and the following disclaimer.
14: * 2. Redistributions in binary form must reproduce the above copyright
15: * notice, this list of conditions and the following disclaimer in the
16: * documentation and/or other materials provided with the distribution.
17: * 3. All advertising materials mentioning features or use of this software
18: * must display the following acknowledgement:
19: * This product includes software developed by the University of
20: * California, Berkeley and its contributors.
21: * 4. Neither the name of the University nor the names of its contributors
22: * may be used to endorse or promote products derived from this software
23: * without specific prior written permission.
24: *
25: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35: * SUCH DAMAGE.
36: */
37:
38: #ifndef lint
1.7 deraadt 39: static const char copyright[] =
1.1 deraadt 40: "@(#) Copyright (c) 1985, 1993\n\
41: The Regents of the University of California. All rights reserved.\n";
42:
43: #if 0
44: static char sccsid[] = "@(#)unifdef.c 8.1 (Berkeley) 6/6/93";
45: #endif
1.9 ! deraadt 46: static const char rcsid[] = "$OpenBSD: unifdef.c,v 1.8 2003/01/18 23:42:51 deraadt Exp $";
1.7 deraadt 47: #endif
1.1 deraadt 48:
49: /*
50: * unifdef - remove ifdef'ed lines
51: *
52: * Wishlist:
53: * provide an option which will append the name of the
54: * appropriate symbol after #else's and #endif's
55: * provide an option which will check symbols after
56: * #else's and #endif's to see that they match their
57: * corresponding #ifdef or #ifndef
1.7 deraadt 58: * generate #line directives in place of deleted code
1.9 ! deraadt 59: *
! 60: * The first two items above require better buffer handling, which would
! 61: * also make it possible to handle all "dodgy" directives correctly.
1.1 deraadt 62: */
63:
1.7 deraadt 64: #include <ctype.h>
65: #include <err.h>
66: #include <stdarg.h>
67: #include <stdbool.h>
1.1 deraadt 68: #include <stdio.h>
1.7 deraadt 69: #include <stdlib.h>
70: #include <string.h>
71: #include <unistd.h>
1.1 deraadt 72:
1.7 deraadt 73: /* types of input lines: */
74: typedef enum {
1.8 deraadt 75: LT_TRUEI, /* a true #if with ignore flag */
76: LT_FALSEI, /* a false #if with ignore flag */
77: LT_IF, /* an unknown #if */
1.7 deraadt 78: LT_TRUE, /* a true #if */
79: LT_FALSE, /* a false #if */
1.8 deraadt 80: LT_ELIF, /* an unknown #elif */
1.7 deraadt 81: LT_ELTRUE, /* a true #elif */
82: LT_ELFALSE, /* a false #elif */
83: LT_ELSE, /* #else */
84: LT_ENDIF, /* #endif */
1.9 ! deraadt 85: LT_DODGY, /* flag: directive is not on one line */
! 86: LT_DODGY_LAST = LT_DODGY + LT_ENDIF,
! 87: LT_PLAIN, /* ordinary line */
1.8 deraadt 88: LT_EOF, /* end of file */
89: LT_COUNT
1.7 deraadt 90: } Linetype;
91:
1.8 deraadt 92: static char const * const linetype_name[] = {
1.9 ! deraadt 93: "TRUEI", "FALSEI", "IF", "TRUE", "FALSE",
! 94: "ELIF", "ELTRUE", "ELFALSE", "ELSE", "ENDIF",
! 95: "DODGY TRUEI", "DODGY FALSEI",
! 96: "DODGY IF", "DODGY TRUE", "DODGY FALSE",
! 97: "DODGY ELIF", "DODGY ELTRUE", "DODGY ELFALSE",
! 98: "DODGY ELSE", "DODGY ENDIF",
! 99: "PLAIN", "EOF"
1.8 deraadt 100: };
1.7 deraadt 101:
1.8 deraadt 102: /* state of #if processing */
1.7 deraadt 103: typedef enum {
1.8 deraadt 104: IS_OUTSIDE,
105: IS_FALSE_PREFIX, /* false #if followed by false #elifs */
106: IS_TRUE_PREFIX, /* first non-false #(el)if is true */
107: IS_PASS_MIDDLE, /* first non-false #(el)if is unknown */
108: IS_FALSE_MIDDLE, /* a false #elif after a pass state */
109: IS_TRUE_MIDDLE, /* a true #elif after a pass state */
110: IS_PASS_ELSE, /* an else after a pass state */
111: IS_FALSE_ELSE, /* an else after a true state */
112: IS_TRUE_ELSE, /* an else after only false states */
113: IS_FALSE_TRAILER, /* #elifs after a true are false */
114: IS_COUNT
115: } Ifstate;
116:
117: static char const * const ifstate_name[] = {
118: "OUTSIDE", "FALSE_PREFIX", "TRUE_PREFIX",
119: "PASS_MIDDLE", "FALSE_MIDDLE", "TRUE_MIDDLE",
120: "PASS_ELSE", "FALSE_ELSE", "TRUE_ELSE",
121: "FALSE_TRAILER"
122: };
123:
124: /* state of comment parser */
125: typedef enum {
126: NO_COMMENT = false, /* outside a comment */
127: C_COMMENT, /* in a comment like this one */
128: CXX_COMMENT, /* between // and end of line */
129: STARTING_COMMENT, /* just after slash-backslash-newline */
130: FINISHING_COMMENT /* star-backslash-newline in a C comment */
1.7 deraadt 131: } Comment_state;
132:
1.8 deraadt 133: static char const * const comment_name[] = {
134: "NO", "C", "CXX", "STARTING", "FINISHING"
1.1 deraadt 135: };
1.7 deraadt 136:
1.8 deraadt 137: /* state of preprocessor line parser */
138: typedef enum {
139: LS_START, /* only space and comments on this line */
140: LS_HASH, /* only space, comments, and a hash */
141: LS_DIRTY /* this line can't be a preprocessor line */
142: } Line_state;
1.7 deraadt 143:
1.8 deraadt 144: static char const * const linestate_name[] = {
145: "START", "HASH", "DIRTY"
146: };
1.7 deraadt 147:
148: /*
1.8 deraadt 149: * Minimum translation limits from ISO/IEC 9899:1999 5.2.4.1
1.7 deraadt 150: */
1.8 deraadt 151: #define MAXDEPTH 64 /* maximum #if nesting */
152: #define MAXLINE 4096 /* maximum length of line */
153: #define MAXSYMS 4096 /* maximum number of symbols */
1.7 deraadt 154:
155: /*
1.9 ! deraadt 156: * Sometimes when editing a keyword the replacement text is longer, so
! 157: * we leave some space at the end of the tline buffer to accommodate this.
! 158: */
! 159: #define EDITSLOP 10
! 160:
! 161: /*
1.8 deraadt 162: * Globals.
1.7 deraadt 163: */
164:
1.8 deraadt 165: static bool complement; /* -c: do the complement */
166: static bool debugging; /* -d: debugging reports */
1.9 ! deraadt 167: static bool iocccok; /* -e: fewer IOCCC errors */
1.8 deraadt 168: static bool killconsts; /* -k: eval constant #ifs */
169: static bool lnblank; /* -l: blank deleted lines */
170: static bool symlist; /* -s: output symbol list */
171: static bool text; /* -t: this is a text file */
172:
173: static const char *symname[MAXSYMS]; /* symbol name */
174: static const char *value[MAXSYMS]; /* -Dsym=value */
175: static bool ignore[MAXSYMS]; /* -iDsym or -iUsym */
176: static int nsyms; /* number of symbols */
177:
178: static FILE *input; /* input file pointer */
179: static const char *filename; /* input file name */
180: static int linenum; /* current line number */
181:
1.9 ! deraadt 182: static char tline[MAXLINE+EDITSLOP];/* input buffer plus space */
1.8 deraadt 183: static char *keyword; /* used for editing #elif's */
184:
185: static Comment_state incomment; /* comment parser state */
186: static Line_state linestate; /* #if line parser state */
187: static Ifstate ifstate[MAXDEPTH]; /* #if processor state */
188: static bool ignoring[MAXDEPTH]; /* ignore comments state */
189: static int stifline[MAXDEPTH]; /* start of current #if */
190: static int depth; /* current #if nesting */
191: static bool keepthis; /* don't delete constant #if */
192:
193: static int exitstat; /* program exit status */
194:
195: static void addsym(bool, bool, char *);
196: static void debug(const char *, ...);
197: static void error(const char *);
198: static int findsym(const char *);
199: static void flushline(bool);
200: static Linetype getline(void);
201: static Linetype ifeval(const char **);
1.9 ! deraadt 202: static void ignoreoff(void);
! 203: static void ignoreon(void);
! 204: static void keywordedit(const char *);
1.8 deraadt 205: static void nest(void);
206: static void process(void);
207: static const char *skipcomment(const char *);
208: static const char *skipsym(const char *);
209: static void state(Ifstate);
210: static int strlcmp(const char *, const char *, size_t);
211: static void usage(void);
1.7 deraadt 212:
213: #define endsym(c) (!isalpha((unsigned char)c) && !isdigit((unsigned char)c) && c != '_')
214:
1.8 deraadt 215: /*
216: * The main program.
217: */
1.7 deraadt 218: int
219: main(int argc, char *argv[])
220: {
221: int opt;
222:
1.9 ! deraadt 223: while ((opt = getopt(argc, argv, "i:D:U:cdeklst")) != -1)
1.7 deraadt 224: switch (opt) {
225: case 'i': /* treat stuff controlled by these symbols as text */
226: /*
227: * For strict backwards-compatibility the U or D
228: * should be immediately after the -i but it doesn't
229: * matter much if we relax that requirement.
230: */
231: opt = *optarg++;
232: if (opt == 'D')
233: addsym(true, true, optarg);
234: else if (opt == 'U')
235: addsym(true, false, optarg);
236: else
237: usage();
238: break;
239: case 'D': /* define a symbol */
240: addsym(false, true, optarg);
241: break;
242: case 'U': /* undef a symbol */
243: addsym(false, false, optarg);
244: break;
245: case 'c': /* treat -D as -U and vice versa */
246: complement = true;
247: break;
1.8 deraadt 248: case 'd':
249: debugging = true;
250: break;
1.9 ! deraadt 251: case 'e': /* fewer errors from dodgy lines */
! 252: iocccok = true;
! 253: break;
1.7 deraadt 254: case 'k': /* process constant #ifs */
255: killconsts = true;
256: break;
257: case 'l': /* blank deleted lines instead of omitting them */
258: lnblank = true;
259: break;
260: case 's': /* only output list of symbols that control #ifs */
261: symlist = true;
262: break;
1.8 deraadt 263: case 't': /* don't parse C comments */
1.7 deraadt 264: text = true;
265: break;
266: default:
267: usage();
268: }
269: argc -= optind;
270: argv += optind;
1.8 deraadt 271: if (nsyms == 0 && !symlist) {
1.7 deraadt 272: warnx("must -D or -U at least one symbol");
273: usage();
274: }
275: if (argc > 1) {
276: errx(2, "can only do one file");
277: } else if (argc == 1 && strcmp(*argv, "-") != 0) {
278: filename = *argv;
279: if ((input = fopen(filename, "r")) != NULL) {
1.8 deraadt 280: process();
1.7 deraadt 281: (void) fclose(input);
282: } else
283: err(2, "can't open %s", *argv);
284: } else {
285: filename = "[stdin]";
286: input = stdin;
1.8 deraadt 287: process();
1.7 deraadt 288: }
289:
1.8 deraadt 290: exit(exitstat);
1.7 deraadt 291: }
1.1 deraadt 292:
1.8 deraadt 293: static void
1.7 deraadt 294: usage(void)
1.1 deraadt 295: {
1.9 ! deraadt 296: fprintf(stderr, "usage: unifdef [-cdeklst]"
! 297: " [-Dsym[=val]] [-Usym] [-iDsym[=val]] [-iUsym] ... [file]\n");
1.8 deraadt 298: exit(2);
299: }
300:
301: /*
302: * A state transition function alters the global #if processing state
303: * in a particular way. The table below is indexed by the current
304: * processing state and the type of the current line. A NULL entry
1.9 ! deraadt 305: * indicates that processing is complete.
1.8 deraadt 306: *
307: * Nesting is handled by keeping a stack of states; some transition
1.9 ! deraadt 308: * functions increase or decrease the depth. They also maintain the
1.8 deraadt 309: * ignore state on a stack. In some complicated cases they have to
310: * alter the preprocessor directive, as follows.
311: *
312: * When we have processed a group that starts off with a known-false
313: * #if/#elif sequence (which has therefore been deleted) followed by a
1.9 ! deraadt 314: * #elif that we don't understand and therefore must keep, we edit the
1.8 deraadt 315: * latter into a #if to keep the nesting correct.
316: *
317: * When we find a true #elif in a group, the following block will
318: * always be kept and the rest of the sequence after the next #elif or
1.9 ! deraadt 319: * #else will be discarded. We edit the #elif into a #else and the
1.8 deraadt 320: * following directive to #endif since this has the desired behaviour.
1.9 ! deraadt 321: *
! 322: * "Dodgy" directives are split across multiple lines, the most common
! 323: * example being a multi-line comment hanging off the right of the
! 324: * directive. We can handle them correctly only if there is no change
! 325: * from printing to dropping (or vice versa) caused by that directive.
! 326: * If the directive is the first of a group we have a choice between
! 327: * failing with an error, or passing it through unchanged instead of
! 328: * evaluating it. The latter is not the default to avoid questions from
! 329: * users about unifdef unexpectedly leaving behind preprocessor directives.
1.8 deraadt 330: */
331: typedef void state_fn(void);
332:
333: /* report an error */
334: static void
335: Eelif(void)
336: {
337: error("Inappropriate #elif");
338: }
339:
340: static void
341: Eelse(void)
342: {
343: error("Inappropriate #else");
344: }
345:
346: static void
347: Eendif(void)
348: {
349: error("Inappropriate #endif");
350: }
351:
352: static void
353: Eeof(void)
354: {
355: error("Premature EOF");
356: }
357:
358: static void
359: Eioccc(void)
360: {
361: error("Obfuscated preprocessor control line");
362: }
363:
364: /* plain line handling */
365: static void
366: print(void)
367: {
368: flushline(true);
369: }
370:
371: static void
372: drop(void)
373: {
374: flushline(false);
375: }
376:
377: /* output lacks group's start line */
378: static void
379: Strue(void)
380: {
381: drop();
1.9 ! deraadt 382: ignoreoff();
1.8 deraadt 383: state(IS_TRUE_PREFIX);
384: }
385:
386: static void
387: Sfalse(void)
388: {
389: drop();
1.9 ! deraadt 390: ignoreoff();
1.8 deraadt 391: state(IS_FALSE_PREFIX);
392: }
393:
394: static void
395: Selse(void)
396: {
397: drop();
398: state(IS_TRUE_ELSE);
399: }
400:
401: /* print/pass this block */
402: static void
403: Pelif(void)
404: {
405: print();
1.9 ! deraadt 406: ignoreoff();
1.8 deraadt 407: state(IS_PASS_MIDDLE);
408: }
409:
410: static void
411: Pelse(void)
412: {
413: print();
414: state(IS_PASS_ELSE);
415: }
416:
417: static void
418: Pendif(void)
419: {
420: print();
421: --depth;
422: }
423:
424: /* discard this block */
425: static void
426: Dfalse(void)
427: {
428: drop();
1.9 ! deraadt 429: ignoreoff();
1.8 deraadt 430: state(IS_FALSE_TRAILER);
431: }
432:
433: static void
434: Delif(void)
435: {
436: drop();
1.9 ! deraadt 437: ignoreoff();
1.8 deraadt 438: state(IS_FALSE_MIDDLE);
439: }
440:
441: static void
442: Delse(void)
443: {
444: drop();
445: state(IS_FALSE_ELSE);
446: }
447:
448: static void
449: Dendif(void)
450: {
451: drop();
452: --depth;
453: }
454:
455: /* first line of group */
456: static void
457: Fdrop(void)
458: {
459: nest();
460: Dfalse();
461: }
462:
463: static void
464: Fpass(void)
465: {
466: nest();
467: Pelif();
468: }
469:
470: static void
471: Ftrue(void)
472: {
473: nest();
474: Strue();
475: }
476:
477: static void
478: Ffalse(void)
479: {
480: nest();
481: Sfalse();
482: }
483:
1.9 ! deraadt 484: /* variable pedantry for obfuscated lines */
! 485: static void
! 486: Oiffy(void)
! 487: {
! 488: if (iocccok)
! 489: Fpass();
! 490: else
! 491: Eioccc();
! 492: ignoreon();
! 493: }
! 494:
! 495: static void
! 496: Oif(void)
! 497: {
! 498: if (iocccok)
! 499: Fpass();
! 500: else
! 501: Eioccc();
! 502: }
! 503:
! 504: static void
! 505: Oelif(void)
! 506: {
! 507: if (iocccok)
! 508: Pelif();
! 509: else
! 510: Eioccc();
! 511: }
! 512:
1.8 deraadt 513: /* ignore comments in this block */
514: static void
515: Idrop(void)
516: {
517: Fdrop();
1.9 ! deraadt 518: ignoreon();
1.8 deraadt 519: }
520:
521: static void
1.9 ! deraadt 522: Itrue(void)
! 523: {
1.8 deraadt 524: Ftrue();
1.9 ! deraadt 525: ignoreon();
1.8 deraadt 526: }
527:
528: static void
529: Ifalse(void)
530: {
531: Ffalse();
1.9 ! deraadt 532: ignoreon();
1.8 deraadt 533: }
534:
1.9 ! deraadt 535: /* edit this line */
1.8 deraadt 536: static void
537: Mpass (void)
538: {
539: strncpy(keyword, "if ", 4);
540: Pelif();
541: }
542:
543: static void
544: Mtrue (void)
545: {
1.9 ! deraadt 546: keywordedit("else\n");
1.8 deraadt 547: state(IS_TRUE_MIDDLE);
548: }
549:
550: static void
551: Melif (void)
552: {
1.9 ! deraadt 553: keywordedit("endif\n");
1.8 deraadt 554: state(IS_FALSE_TRAILER);
555: }
556:
557: static void
558: Melse (void)
559: {
1.9 ! deraadt 560: keywordedit("endif\n");
1.8 deraadt 561: state(IS_FALSE_ELSE);
562: }
563:
564: static state_fn * const trans_table[IS_COUNT][LT_COUNT] = {
565: /* IS_OUTSIDE */
1.9 ! deraadt 566: { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Eendif,
! 567: Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eendif,
! 568: print, NULL },
1.8 deraadt 569: /* IS_FALSE_PREFIX */
1.9 ! deraadt 570: { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Strue, Sfalse,Selse, Dendif,
! 571: Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Eioccc,Eioccc,Eioccc,Eioccc,
! 572: drop, Eeof },
1.8 deraadt 573: /* IS_TRUE_PREFIX */
1.9 ! deraadt 574: { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Dfalse,Dfalse,Dfalse,Delse, Dendif,
! 575: Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
! 576: print, Eeof },
1.8 deraadt 577: /* IS_PASS_MIDDLE */
1.9 ! deraadt 578: { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Pelif, Mtrue, Delif, Pelse, Pendif,
! 579: Oiffy, Oiffy, Fpass, Oif, Oif, Pelif, Oelif, Oelif, Pelse, Pendif,
! 580: print, Eeof },
1.8 deraadt 581: /* IS_FALSE_MIDDLE */
1.9 ! deraadt 582: { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Pelif, Mtrue, Delif, Pelse, Pendif,
! 583: Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
! 584: drop, Eeof },
1.8 deraadt 585: /* IS_TRUE_MIDDLE */
1.9 ! deraadt 586: { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Melif, Melif, Melif, Melse, Pendif,
! 587: Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Pendif,
! 588: print, Eeof },
1.8 deraadt 589: /* IS_PASS_ELSE */
1.9 ! deraadt 590: { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Pendif,
! 591: Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Pendif,
! 592: print, Eeof },
1.8 deraadt 593: /* IS_FALSE_ELSE */
1.9 ! deraadt 594: { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Dendif,
! 595: Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Eioccc,
! 596: drop, Eeof },
1.8 deraadt 597: /* IS_TRUE_ELSE */
1.9 ! deraadt 598: { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Dendif,
! 599: Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eioccc,
! 600: print, Eeof },
1.8 deraadt 601: /* IS_FALSE_TRAILER */
1.9 ! deraadt 602: { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Dendif,
! 603: Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Eioccc,
! 604: drop, Eeof }
! 605: /*TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF
! 606: TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF (DODGY)
! 607: PLAIN EOF */
1.8 deraadt 608: };
609:
610: /*
611: * State machine utility functions
612: */
613: static void
1.9 ! deraadt 614: ignoreoff(void)
! 615: {
! 616: ignoring[depth] = ignoring[depth-1];
! 617: }
! 618:
! 619: static void
! 620: ignoreon(void)
! 621: {
! 622: ignoring[depth] = true;
! 623: }
! 624:
! 625: static void
! 626: keywordedit(const char *replacement)
! 627: {
! 628: strlcpy(keyword, replacement, tline + sizeof(tline) - keyword);
! 629: print();
! 630: }
! 631:
! 632: static void
1.8 deraadt 633: nest(void)
634: {
635: depth += 1;
636: if (depth >= MAXDEPTH)
637: error("Too many levels of nesting");
638: stifline[depth] = linenum;
639: }
640:
641: static void
642: state(Ifstate is)
643: {
644: ifstate[depth] = is;
645: }
646:
1.7 deraadt 647: /*
1.8 deraadt 648: * Write a line to the output or not, according to command line options.
649: */
650: static void
651: flushline(bool keep)
652: {
653: if (symlist)
654: return;
655: if (keep ^ complement)
656: fputs(tline, stdout);
657: else {
658: if (lnblank)
659: putc('\n', stdout);
660: exitstat = 1;
1.7 deraadt 661: }
662: }
1.3 deraadt 663:
1.7 deraadt 664: /*
1.8 deraadt 665: * The driver for the state machine.
1.7 deraadt 666: */
1.8 deraadt 667: static void
668: process(void)
1.7 deraadt 669: {
670: Linetype lineval;
1.8 deraadt 671: state_fn *trans;
1.7 deraadt 672:
673: for (;;) {
674: linenum++;
1.8 deraadt 675: lineval = getline();
676: trans = trans_table[ifstate[depth]][lineval];
677: if (trans == NULL)
678: break;
679: trans();
680: debug("process %s -> %s depth %d",
681: linetype_name[lineval],
682: ifstate_name[ifstate[depth]], depth);
1.1 deraadt 683: }
1.8 deraadt 684: if (incomment)
685: error("EOF in comment");
1.1 deraadt 686: }
687:
1.7 deraadt 688: /*
1.8 deraadt 689: * Parse a line and determine its type. We keep the preprocessor line
690: * parser state between calls in a global variable.
1.7 deraadt 691: */
1.8 deraadt 692: static Linetype
693: getline(void)
1.3 deraadt 694: {
1.7 deraadt 695: const char *cp;
1.8 deraadt 696: int cursym;
697: int kwlen;
1.3 deraadt 698: Linetype retval;
1.8 deraadt 699: Comment_state wascomment;
1.3 deraadt 700:
1.8 deraadt 701: if (fgets(tline, MAXLINE, input) == NULL)
702: return (LT_EOF);
1.3 deraadt 703: retval = LT_PLAIN;
1.8 deraadt 704: wascomment = incomment;
1.7 deraadt 705: cp = skipcomment(tline);
1.8 deraadt 706: if (linestate == LS_START) {
707: if (*cp == '#') {
708: linestate = LS_HASH;
709: cp = skipcomment(cp + 1);
710: } else if (*cp != '\0')
711: linestate = LS_DIRTY;
712: }
713: if (!incomment && linestate == LS_HASH) {
714: keyword = tline + (cp - tline);
715: cp = skipsym(cp);
716: kwlen = cp - keyword;
1.9 ! deraadt 717: /* no way can we deal with a continuation inside a keyword */
1.8 deraadt 718: if (strncmp(cp, "\\\n", 2) == 0)
719: Eioccc();
720: if (strlcmp("ifdef", keyword, kwlen) == 0 ||
721: strlcmp("ifndef", keyword, kwlen) == 0) {
722: cp = skipcomment(cp);
723: if ((cursym = findsym(cp)) < 0)
724: retval = LT_IF;
725: else {
726: retval = (keyword[2] == 'n')
727: ? LT_FALSE : LT_TRUE;
728: if (value[cursym] == NULL)
729: retval = (retval == LT_TRUE)
730: ? LT_FALSE : LT_TRUE;
731: if (ignore[cursym])
732: retval = (retval == LT_TRUE)
733: ? LT_TRUEI : LT_FALSEI;
734: }
735: cp = skipsym(cp);
736: } else if (strlcmp("if", keyword, kwlen) == 0)
737: retval = ifeval(&cp);
738: else if (strlcmp("elif", keyword, kwlen) == 0)
739: retval = ifeval(&cp) - LT_IF + LT_ELIF;
740: else if (strlcmp("else", keyword, kwlen) == 0)
741: retval = LT_ELSE;
742: else if (strlcmp("endif", keyword, kwlen) == 0)
743: retval = LT_ENDIF;
744: else {
745: linestate = LS_DIRTY;
1.7 deraadt 746: retval = LT_PLAIN;
747: }
748: cp = skipcomment(cp);
1.8 deraadt 749: if (*cp != '\0') {
750: linestate = LS_DIRTY;
751: if (retval == LT_TRUE || retval == LT_FALSE ||
752: retval == LT_TRUEI || retval == LT_FALSEI)
753: retval = LT_IF;
754: if (retval == LT_ELTRUE || retval == LT_ELFALSE)
755: retval = LT_ELIF;
1.3 deraadt 756: }
1.9 ! deraadt 757: if (retval != LT_PLAIN && (wascomment || incomment)) {
! 758: retval += LT_DODGY;
! 759: if (incomment)
! 760: linestate = LS_DIRTY;
! 761: }
! 762: /* skipcomment should have changed the state */
1.8 deraadt 763: if (linestate == LS_HASH)
764: abort(); /* bug */
1.7 deraadt 765: }
1.8 deraadt 766: if (linestate == LS_DIRTY) {
767: while (*cp != '\0')
768: cp = skipcomment(cp + 1);
769: }
770: debug("parser %s comment %s line",
771: comment_name[incomment], linestate_name[linestate]);
1.7 deraadt 772: return (retval);
773: }
774:
775: /*
1.9 ! deraadt 776: * These are the operators that are supported by the expression
! 777: * evaluator. Note that if support for division is added then we also
! 778: * need short-circuiting booleans because of divide-by-zero.
1.7 deraadt 779: */
1.8 deraadt 780: static int
781: op_lt(int a, int b)
782: {
783: return (a < b);
784: }
785:
786: static int
787: op_gt(int a, int b)
788: {
789: return (a > b);
790: }
791:
792: static int
793: op_le(int a, int b)
794: {
795: return (a <= b);
796: }
797:
798: static int
799: op_ge(int a, int b)
800: {
801: return (a >= b);
802: }
803:
804: static int
805: op_eq(int a, int b)
806: {
807: return (a == b);
808: }
809:
810: static int
811: op_ne(int a, int b)
1.7 deraadt 812: {
1.8 deraadt 813: return (a != b);
814: }
815:
816: static int
817: op_or(int a, int b)
818: {
819: return (a || b);
820: }
821:
822: static int
823: op_and(int a, int b)
824: {
825: return (a && b);
1.7 deraadt 826: }
827:
828: /*
1.8 deraadt 829: * An evaluation function takes three arguments, as follows: (1) a pointer to
830: * an element of the precedence table which lists the operators at the current
831: * level of precedence; (2) a pointer to an integer which will receive the
832: * value of the expression; and (3) a pointer to a char* that points to the
833: * expression to be evaluated and that is updated to the end of the expression
834: * when evaluation is complete. The function returns LT_FALSE if the value of
835: * the expression is zero, LT_TRUE if it is non-zero, or LT_IF if the
836: * expression could not be evaluated.
1.7 deraadt 837: */
1.8 deraadt 838: struct ops;
839:
840: typedef Linetype eval_fn(const struct ops *, int *, const char **);
841:
842: static eval_fn eval_table, eval_unary;
843:
844: /*
845: * The precedence table. Expressions involving binary operators are evaluated
846: * in a table-driven way by eval_table. When it evaluates a subexpression it
847: * calls the inner function with its first argument pointing to the next
848: * element of the table. Innermost expressions have special non-table-driven
849: * handling.
850: */
851: static const struct ops {
852: eval_fn *inner;
853: struct op {
854: const char *str;
855: int (*fn)(int, int);
856: } op[5];
857: } eval_ops[] = {
858: { eval_table, { { "||", op_or } } },
859: { eval_table, { { "&&", op_and } } },
860: { eval_table, { { "==", op_eq },
861: { "!=", op_ne } } },
862: { eval_unary, { { "<=", op_le },
863: { ">=", op_ge },
864: { "<", op_lt },
865: { ">", op_gt } } }
866: };
1.7 deraadt 867:
868: /*
869: * Function for evaluating the innermost parts of expressions,
870: * viz. !expr (expr) defined(symbol) symbol number
871: * We reset the keepthis flag when we find a non-constant subexpression.
872: */
1.8 deraadt 873: static Linetype
874: eval_unary(const struct ops *ops, int *valp, const char **cpp)
1.7 deraadt 875: {
876: const char *cp;
877: char *ep;
878: int sym;
879:
880: cp = skipcomment(*cpp);
1.8 deraadt 881: if (*cp == '!') {
1.7 deraadt 882: debug("eval%d !", ops - eval_ops);
883: cp++;
884: if (eval_unary(ops, valp, &cp) == LT_IF)
885: return (LT_IF);
886: *valp = !*valp;
887: } else if (*cp == '(') {
888: cp++;
889: debug("eval%d (", ops - eval_ops);
890: if (eval_table(eval_ops, valp, &cp) == LT_IF)
891: return (LT_IF);
892: cp = skipcomment(cp);
893: if (*cp++ != ')')
894: return (LT_IF);
895: } else if (isdigit((unsigned char)*cp)) {
896: debug("eval%d number", ops - eval_ops);
897: *valp = strtol(cp, &ep, 0);
898: cp = skipsym(cp);
899: } else if (strncmp(cp, "defined", 7) == 0 && endsym(cp[7])) {
900: cp = skipcomment(cp+7);
901: debug("eval%d defined", ops - eval_ops);
902: if (*cp++ != '(')
903: return (LT_IF);
904: cp = skipcomment(cp);
905: sym = findsym(cp);
1.8 deraadt 906: if (sym < 0 && !symlist)
1.7 deraadt 907: return (LT_IF);
908: *valp = (value[sym] != NULL);
909: cp = skipsym(cp);
910: cp = skipcomment(cp);
911: if (*cp++ != ')')
912: return (LT_IF);
913: keepthis = false;
914: } else if (!endsym(*cp)) {
915: debug("eval%d symbol", ops - eval_ops);
916: sym = findsym(cp);
1.8 deraadt 917: if (sym < 0 && !symlist)
1.7 deraadt 918: return (LT_IF);
919: if (value[sym] == NULL)
920: *valp = 0;
921: else {
922: *valp = strtol(value[sym], &ep, 0);
923: if (*ep != '\0' || ep == value[sym])
924: return (LT_IF);
925: }
926: cp = skipsym(cp);
927: keepthis = false;
928: } else
929: return (LT_IF);
930:
931: *cpp = cp;
932: debug("eval%d = %d", ops - eval_ops, *valp);
933: return (*valp ? LT_TRUE : LT_FALSE);
934: }
935:
936: /*
937: * Table-driven evaluation of binary operators.
938: */
1.8 deraadt 939: static Linetype
940: eval_table(const struct ops *ops, int *valp, const char **cpp)
1.7 deraadt 941: {
1.8 deraadt 942: const struct op *op;
1.7 deraadt 943: const char *cp;
944: int val;
945:
946: debug("eval%d", ops - eval_ops);
947: cp = *cpp;
948: if (ops->inner(ops+1, valp, &cp) == LT_IF)
949: return (LT_IF);
950: for (;;) {
951: cp = skipcomment(cp);
952: for (op = ops->op; op->str != NULL; op++)
953: if (strncmp(cp, op->str, strlen(op->str)) == 0)
954: break;
955: if (op->str == NULL)
956: break;
957: cp += strlen(op->str);
958: debug("eval%d %s", ops - eval_ops, op->str);
959: if (ops->inner(ops+1, &val, &cp) == LT_IF)
1.8 deraadt 960: return (LT_IF);
1.7 deraadt 961: *valp = op->fn(*valp, val);
962: }
963:
964: *cpp = cp;
965: debug("eval%d = %d", ops - eval_ops, *valp);
966: return (*valp ? LT_TRUE : LT_FALSE);
1.1 deraadt 967: }
1.7 deraadt 968:
1.1 deraadt 969: /*
1.7 deraadt 970: * Evaluate the expression on a #if or #elif line. If we can work out
971: * the result we return LT_TRUE or LT_FALSE accordingly, otherwise we
1.8 deraadt 972: * return just a generic LT_IF.
1.1 deraadt 973: */
1.8 deraadt 974: static Linetype
1.7 deraadt 975: ifeval(const char **cpp)
976: {
1.8 deraadt 977: int ret;
1.7 deraadt 978: int val;
979:
980: debug("eval %s", *cpp);
981: keepthis = killconsts ? false : true;
1.8 deraadt 982: ret = eval_table(eval_ops, &val, cpp);
983: return (keepthis ? LT_IF : ret);
1.7 deraadt 984: }
985:
986: /*
987: * Skip over comments and stop at the next character position that is
1.8 deraadt 988: * not whitespace. Between calls we keep the comment state in a global
989: * variable, and we also make a note when we get a proper end-of-line.
990: * XXX: doesn't cope with the buffer splitting inside a state transition.
1.7 deraadt 991: */
1.8 deraadt 992: static const char *
1.7 deraadt 993: skipcomment(const char *cp)
1.3 deraadt 994: {
1.8 deraadt 995: if (text || ignoring[depth]) {
996: while (isspace((unsigned char)*cp))
997: cp += 1;
998: return (cp);
999: }
1000: while (*cp != '\0')
1001: if (strncmp(cp, "\\\n", 2) == 0)
1002: cp += 2;
1003: else switch (incomment) {
1004: case NO_COMMENT:
1005: if (strncmp(cp, "/\\\n", 3) == 0) {
1006: incomment = STARTING_COMMENT;
1007: cp += 3;
1008: } else if (strncmp(cp, "/*", 2) == 0) {
1.3 deraadt 1009: incomment = C_COMMENT;
1.8 deraadt 1010: cp += 2;
1011: } else if (strncmp(cp, "//", 2) == 0) {
1012: incomment = CXX_COMMENT;
1013: cp += 2;
1014: } else if (strncmp(cp, "\n", 1) == 0) {
1015: linestate = LS_START;
1016: cp += 1;
1017: } else if (strchr(" \t", *cp) != NULL) {
1018: cp += 1;
1019: } else
1020: return (cp);
1021: continue;
1022: case CXX_COMMENT:
1023: if (strncmp(cp, "\n", 1) == 0) {
1024: incomment = NO_COMMENT;
1025: linestate = LS_START;
1.3 deraadt 1026: }
1.8 deraadt 1027: cp += 1;
1028: continue;
1029: case C_COMMENT:
1030: if (strncmp(cp, "*\\\n", 3) == 0) {
1031: incomment = FINISHING_COMMENT;
1032: cp += 3;
1033: } else if (strncmp(cp, "*/", 2) == 0) {
1034: incomment = NO_COMMENT;
1035: cp += 2;
1036: } else
1037: cp += 1;
1038: continue;
1039: case STARTING_COMMENT:
1040: if (*cp == '*') {
1041: incomment = C_COMMENT;
1042: cp += 1;
1043: } else if (*cp == '/') {
1.3 deraadt 1044: incomment = CXX_COMMENT;
1.8 deraadt 1045: cp += 1;
1046: } else {
1047: incomment = NO_COMMENT;
1048: linestate = LS_DIRTY;
1.3 deraadt 1049: }
1.8 deraadt 1050: continue;
1051: case FINISHING_COMMENT:
1052: if (*cp == '/') {
1053: incomment = NO_COMMENT;
1054: cp += 1;
1055: } else
1056: incomment = C_COMMENT;
1057: continue;
1058: default:
1059: /* bug */
1060: abort();
1.3 deraadt 1061: }
1.8 deraadt 1062: return (cp);
1.1 deraadt 1063: }
1.7 deraadt 1064:
1065: /*
1066: * Skip over an identifier.
1067: */
1.8 deraadt 1068: static const char *
1.7 deraadt 1069: skipsym(const char *cp)
1070: {
1071: while (!endsym(*cp))
1072: ++cp;
1073: return (cp);
1074: }
1075:
1.1 deraadt 1076: /*
1.7 deraadt 1077: * Look for the symbol in the symbol table. If is is found, we return
1.8 deraadt 1078: * the symbol table index, else we return -1.
1.1 deraadt 1079: */
1.8 deraadt 1080: static int
1.7 deraadt 1081: findsym(const char *str)
1.1 deraadt 1082: {
1.7 deraadt 1083: const char *cp;
1.3 deraadt 1084: int symind;
1085:
1.8 deraadt 1086: cp = skipsym(str);
1087: if (cp == str)
1088: return (-1);
1089: if (symlist)
1.7 deraadt 1090: printf("%.*s\n", (int)(cp-str), str);
1.8 deraadt 1091: for (symind = 0; symind < nsyms; ++symind) {
1092: if (strlcmp(symname[symind], str, cp-str) == 0) {
1.7 deraadt 1093: debug("findsym %s %s", symname[symind],
1094: value[symind] ? value[symind] : "");
1095: return (symind);
1.3 deraadt 1096: }
1.1 deraadt 1097: }
1.8 deraadt 1098: return (-1);
1.1 deraadt 1099: }
1.7 deraadt 1100:
1.1 deraadt 1101: /*
1.7 deraadt 1102: * Add a symbol to the symbol table.
1103: */
1.8 deraadt 1104: static void
1.7 deraadt 1105: addsym(bool ignorethis, bool definethis, char *sym)
1106: {
1107: int symind;
1108: char *val;
1109:
1110: symind = findsym(sym);
1.8 deraadt 1111: if (symind < 0) {
1.7 deraadt 1112: if (nsyms >= MAXSYMS)
1113: errx(2, "too many symbols");
1114: symind = nsyms++;
1115: }
1116: symname[symind] = sym;
1117: ignore[symind] = ignorethis;
1.8 deraadt 1118: val = sym + (skipsym(sym) - sym);
1.7 deraadt 1119: if (definethis) {
1120: if (*val == '=') {
1121: value[symind] = val+1;
1122: *val = '\0';
1123: } else if (*val == '\0')
1124: value[symind] = "";
1125: else
1126: usage();
1127: } else {
1128: if (*val != '\0')
1129: usage();
1130: value[symind] = NULL;
1131: }
1132: }
1133:
1134: /*
1.8 deraadt 1135: * Compare s with n characters of t.
1136: * The same as strncmp() except that it checks that s[n] == '\0'.
1.1 deraadt 1137: */
1.8 deraadt 1138: static int
1139: strlcmp(const char *s, const char *t, size_t n)
1.3 deraadt 1140: {
1.8 deraadt 1141: while (n-- && *t != '\0')
1142: if (*s != *t)
1143: return ((unsigned char)*s - (unsigned char)*t);
1144: else
1145: ++s, ++t;
1146: return ((unsigned char)*s);
1.1 deraadt 1147: }
1148:
1.7 deraadt 1149: /*
1.8 deraadt 1150: * Diagnostics.
1.7 deraadt 1151: */
1.8 deraadt 1152: static void
1.7 deraadt 1153: debug(const char *msg, ...)
1.1 deraadt 1154: {
1.7 deraadt 1155: va_list ap;
1156:
1157: if (debugging) {
1158: va_start(ap, msg);
1159: vwarnx(msg, ap);
1160: va_end(ap);
1161: }
1.1 deraadt 1162: }
1163:
1.8 deraadt 1164: static void
1165: error(const char *msg)
1.7 deraadt 1166: {
1.8 deraadt 1167: if (depth == 0)
1.9 ! deraadt 1168: warnx("%s: %d: %s", filename, linenum, msg);
1.7 deraadt 1169: else
1.9 ! deraadt 1170: warnx("%s: %d: %s (#if line %d depth %d)",
1.8 deraadt 1171: filename, linenum, msg, stifline[depth], depth);
1.9 ! deraadt 1172: errx(2, "output may be truncated");
1.1 deraadt 1173: }