Annotation of src/usr.bin/unifdef/unifdef.c, Revision 1.13
1.13 ! jmc 1: /* $OpenBSD: unifdef.c,v 1.12 2007/04/02 14:12:51 jmc Exp $ */
1.1 deraadt 2: /*
1.11 avsm 3: * Copyright (c) 2002, 2003 Tony Finch <dot@dotat.at>
1.1 deraadt 4: * Copyright (c) 1985, 1993
5: * The Regents of the University of California. All rights reserved.
6: *
7: * This code is derived from software contributed to Berkeley by
1.7 deraadt 8: * Dave Yost. Support for #if and #elif was added by Tony Finch.
1.1 deraadt 9: *
10: * Redistribution and use in source and binary forms, with or without
11: * modification, are permitted provided that the following conditions
12: * are met:
13: * 1. Redistributions of source code must retain the above copyright
14: * notice, this list of conditions and the following disclaimer.
15: * 2. Redistributions in binary form must reproduce the above copyright
16: * notice, this list of conditions and the following disclaimer in the
17: * documentation and/or other materials provided with the distribution.
1.10 millert 18: * 3. Neither the name of the University nor the names of its contributors
1.1 deraadt 19: * may be used to endorse or promote products derived from this software
20: * without specific prior written permission.
21: *
22: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32: * SUCH DAMAGE.
33: */
34:
35: #ifndef lint
1.7 deraadt 36: static const char copyright[] =
1.1 deraadt 37: "@(#) Copyright (c) 1985, 1993\n\
38: The Regents of the University of California. All rights reserved.\n";
39:
40: #if 0
41: static char sccsid[] = "@(#)unifdef.c 8.1 (Berkeley) 6/6/93";
42: #endif
1.13 ! jmc 43: static const char rcsid[] = "$OpenBSD: unifdef.c,v 1.12 2007/04/02 14:12:51 jmc Exp $";
1.7 deraadt 44: #endif
1.1 deraadt 45:
46: /*
47: * unifdef - remove ifdef'ed lines
48: *
49: * Wishlist:
50: * provide an option which will append the name of the
51: * appropriate symbol after #else's and #endif's
52: * provide an option which will check symbols after
53: * #else's and #endif's to see that they match their
54: * corresponding #ifdef or #ifndef
1.7 deraadt 55: * generate #line directives in place of deleted code
1.9 deraadt 56: *
57: * The first two items above require better buffer handling, which would
58: * also make it possible to handle all "dodgy" directives correctly.
1.1 deraadt 59: */
60:
1.7 deraadt 61: #include <ctype.h>
62: #include <err.h>
63: #include <stdarg.h>
64: #include <stdbool.h>
1.1 deraadt 65: #include <stdio.h>
1.7 deraadt 66: #include <stdlib.h>
67: #include <string.h>
68: #include <unistd.h>
1.1 deraadt 69:
1.7 deraadt 70: /* types of input lines: */
71: typedef enum {
1.8 deraadt 72: LT_TRUEI, /* a true #if with ignore flag */
73: LT_FALSEI, /* a false #if with ignore flag */
74: LT_IF, /* an unknown #if */
1.7 deraadt 75: LT_TRUE, /* a true #if */
76: LT_FALSE, /* a false #if */
1.8 deraadt 77: LT_ELIF, /* an unknown #elif */
1.7 deraadt 78: LT_ELTRUE, /* a true #elif */
79: LT_ELFALSE, /* a false #elif */
80: LT_ELSE, /* #else */
81: LT_ENDIF, /* #endif */
1.9 deraadt 82: LT_DODGY, /* flag: directive is not on one line */
83: LT_DODGY_LAST = LT_DODGY + LT_ENDIF,
84: LT_PLAIN, /* ordinary line */
1.8 deraadt 85: LT_EOF, /* end of file */
86: LT_COUNT
1.7 deraadt 87: } Linetype;
88:
1.8 deraadt 89: static char const * const linetype_name[] = {
1.9 deraadt 90: "TRUEI", "FALSEI", "IF", "TRUE", "FALSE",
91: "ELIF", "ELTRUE", "ELFALSE", "ELSE", "ENDIF",
92: "DODGY TRUEI", "DODGY FALSEI",
93: "DODGY IF", "DODGY TRUE", "DODGY FALSE",
94: "DODGY ELIF", "DODGY ELTRUE", "DODGY ELFALSE",
95: "DODGY ELSE", "DODGY ENDIF",
96: "PLAIN", "EOF"
1.8 deraadt 97: };
1.7 deraadt 98:
1.8 deraadt 99: /* state of #if processing */
1.7 deraadt 100: typedef enum {
1.8 deraadt 101: IS_OUTSIDE,
102: IS_FALSE_PREFIX, /* false #if followed by false #elifs */
103: IS_TRUE_PREFIX, /* first non-false #(el)if is true */
104: IS_PASS_MIDDLE, /* first non-false #(el)if is unknown */
105: IS_FALSE_MIDDLE, /* a false #elif after a pass state */
106: IS_TRUE_MIDDLE, /* a true #elif after a pass state */
107: IS_PASS_ELSE, /* an else after a pass state */
108: IS_FALSE_ELSE, /* an else after a true state */
109: IS_TRUE_ELSE, /* an else after only false states */
110: IS_FALSE_TRAILER, /* #elifs after a true are false */
111: IS_COUNT
112: } Ifstate;
113:
114: static char const * const ifstate_name[] = {
115: "OUTSIDE", "FALSE_PREFIX", "TRUE_PREFIX",
116: "PASS_MIDDLE", "FALSE_MIDDLE", "TRUE_MIDDLE",
117: "PASS_ELSE", "FALSE_ELSE", "TRUE_ELSE",
118: "FALSE_TRAILER"
119: };
120:
121: /* state of comment parser */
122: typedef enum {
123: NO_COMMENT = false, /* outside a comment */
124: C_COMMENT, /* in a comment like this one */
125: CXX_COMMENT, /* between // and end of line */
126: STARTING_COMMENT, /* just after slash-backslash-newline */
127: FINISHING_COMMENT /* star-backslash-newline in a C comment */
1.7 deraadt 128: } Comment_state;
129:
1.8 deraadt 130: static char const * const comment_name[] = {
131: "NO", "C", "CXX", "STARTING", "FINISHING"
1.1 deraadt 132: };
1.7 deraadt 133:
1.8 deraadt 134: /* state of preprocessor line parser */
135: typedef enum {
136: LS_START, /* only space and comments on this line */
137: LS_HASH, /* only space, comments, and a hash */
138: LS_DIRTY /* this line can't be a preprocessor line */
139: } Line_state;
1.7 deraadt 140:
1.8 deraadt 141: static char const * const linestate_name[] = {
142: "START", "HASH", "DIRTY"
143: };
1.7 deraadt 144:
145: /*
1.8 deraadt 146: * Minimum translation limits from ISO/IEC 9899:1999 5.2.4.1
1.7 deraadt 147: */
1.8 deraadt 148: #define MAXDEPTH 64 /* maximum #if nesting */
149: #define MAXLINE 4096 /* maximum length of line */
150: #define MAXSYMS 4096 /* maximum number of symbols */
1.7 deraadt 151:
152: /*
1.9 deraadt 153: * Sometimes when editing a keyword the replacement text is longer, so
154: * we leave some space at the end of the tline buffer to accommodate this.
155: */
156: #define EDITSLOP 10
157:
158: /*
1.8 deraadt 159: * Globals.
1.7 deraadt 160: */
161:
1.8 deraadt 162: static bool complement; /* -c: do the complement */
163: static bool debugging; /* -d: debugging reports */
1.9 deraadt 164: static bool iocccok; /* -e: fewer IOCCC errors */
1.8 deraadt 165: static bool killconsts; /* -k: eval constant #ifs */
166: static bool lnblank; /* -l: blank deleted lines */
167: static bool symlist; /* -s: output symbol list */
168: static bool text; /* -t: this is a text file */
169:
170: static const char *symname[MAXSYMS]; /* symbol name */
171: static const char *value[MAXSYMS]; /* -Dsym=value */
172: static bool ignore[MAXSYMS]; /* -iDsym or -iUsym */
173: static int nsyms; /* number of symbols */
174:
175: static FILE *input; /* input file pointer */
176: static const char *filename; /* input file name */
177: static int linenum; /* current line number */
178:
1.9 deraadt 179: static char tline[MAXLINE+EDITSLOP];/* input buffer plus space */
1.8 deraadt 180: static char *keyword; /* used for editing #elif's */
181:
182: static Comment_state incomment; /* comment parser state */
183: static Line_state linestate; /* #if line parser state */
184: static Ifstate ifstate[MAXDEPTH]; /* #if processor state */
185: static bool ignoring[MAXDEPTH]; /* ignore comments state */
186: static int stifline[MAXDEPTH]; /* start of current #if */
187: static int depth; /* current #if nesting */
188: static bool keepthis; /* don't delete constant #if */
189:
190: static int exitstat; /* program exit status */
191:
192: static void addsym(bool, bool, char *);
193: static void debug(const char *, ...);
194: static void error(const char *);
195: static int findsym(const char *);
196: static void flushline(bool);
197: static Linetype getline(void);
198: static Linetype ifeval(const char **);
1.9 deraadt 199: static void ignoreoff(void);
200: static void ignoreon(void);
201: static void keywordedit(const char *);
1.8 deraadt 202: static void nest(void);
203: static void process(void);
204: static const char *skipcomment(const char *);
205: static const char *skipsym(const char *);
206: static void state(Ifstate);
207: static int strlcmp(const char *, const char *, size_t);
208: static void usage(void);
1.7 deraadt 209:
210: #define endsym(c) (!isalpha((unsigned char)c) && !isdigit((unsigned char)c) && c != '_')
211:
1.8 deraadt 212: /*
213: * The main program.
214: */
1.7 deraadt 215: int
216: main(int argc, char *argv[])
217: {
218: int opt;
219:
1.11 avsm 220: while ((opt = getopt(argc, argv, "i:D:U:I:cdeklst")) != -1)
1.7 deraadt 221: switch (opt) {
222: case 'i': /* treat stuff controlled by these symbols as text */
223: /*
224: * For strict backwards-compatibility the U or D
225: * should be immediately after the -i but it doesn't
226: * matter much if we relax that requirement.
227: */
228: opt = *optarg++;
229: if (opt == 'D')
230: addsym(true, true, optarg);
231: else if (opt == 'U')
232: addsym(true, false, optarg);
233: else
234: usage();
235: break;
236: case 'D': /* define a symbol */
237: addsym(false, true, optarg);
238: break;
239: case 'U': /* undef a symbol */
240: addsym(false, false, optarg);
241: break;
1.11 avsm 242: case 'I':
243: /* no-op for compatibility with cpp */
244: break;
1.7 deraadt 245: case 'c': /* treat -D as -U and vice versa */
246: complement = true;
247: break;
1.8 deraadt 248: case 'd':
249: debugging = true;
250: break;
1.9 deraadt 251: case 'e': /* fewer errors from dodgy lines */
252: iocccok = true;
253: break;
1.7 deraadt 254: case 'k': /* process constant #ifs */
255: killconsts = true;
256: break;
257: case 'l': /* blank deleted lines instead of omitting them */
258: lnblank = true;
259: break;
260: case 's': /* only output list of symbols that control #ifs */
261: symlist = true;
262: break;
1.8 deraadt 263: case 't': /* don't parse C comments */
1.7 deraadt 264: text = true;
265: break;
266: default:
267: usage();
268: }
269: argc -= optind;
270: argv += optind;
1.8 deraadt 271: if (nsyms == 0 && !symlist) {
1.7 deraadt 272: warnx("must -D or -U at least one symbol");
273: usage();
274: }
275: if (argc > 1) {
276: errx(2, "can only do one file");
277: } else if (argc == 1 && strcmp(*argv, "-") != 0) {
278: filename = *argv;
279: if ((input = fopen(filename, "r")) != NULL) {
1.8 deraadt 280: process();
1.7 deraadt 281: (void) fclose(input);
282: } else
283: err(2, "can't open %s", *argv);
284: } else {
285: filename = "[stdin]";
286: input = stdin;
1.8 deraadt 287: process();
1.7 deraadt 288: }
289:
1.8 deraadt 290: exit(exitstat);
1.7 deraadt 291: }
1.1 deraadt 292:
1.8 deraadt 293: static void
1.7 deraadt 294: usage(void)
1.1 deraadt 295: {
1.12 jmc 296: fprintf(stderr,
297: "usage: unifdef [-ceklst] [-Dsym[=val]] [-Ipath] [-iDsym[=val]] "
298: "[-iUsym] [-Usym]\n"
299: "\t[file]\n");
1.8 deraadt 300: exit(2);
301: }
302:
303: /*
304: * A state transition function alters the global #if processing state
305: * in a particular way. The table below is indexed by the current
306: * processing state and the type of the current line. A NULL entry
1.9 deraadt 307: * indicates that processing is complete.
1.8 deraadt 308: *
309: * Nesting is handled by keeping a stack of states; some transition
1.9 deraadt 310: * functions increase or decrease the depth. They also maintain the
1.8 deraadt 311: * ignore state on a stack. In some complicated cases they have to
312: * alter the preprocessor directive, as follows.
313: *
314: * When we have processed a group that starts off with a known-false
315: * #if/#elif sequence (which has therefore been deleted) followed by a
1.9 deraadt 316: * #elif that we don't understand and therefore must keep, we edit the
1.8 deraadt 317: * latter into a #if to keep the nesting correct.
318: *
319: * When we find a true #elif in a group, the following block will
320: * always be kept and the rest of the sequence after the next #elif or
1.9 deraadt 321: * #else will be discarded. We edit the #elif into a #else and the
1.8 deraadt 322: * following directive to #endif since this has the desired behaviour.
1.9 deraadt 323: *
324: * "Dodgy" directives are split across multiple lines, the most common
325: * example being a multi-line comment hanging off the right of the
326: * directive. We can handle them correctly only if there is no change
327: * from printing to dropping (or vice versa) caused by that directive.
328: * If the directive is the first of a group we have a choice between
329: * failing with an error, or passing it through unchanged instead of
330: * evaluating it. The latter is not the default to avoid questions from
331: * users about unifdef unexpectedly leaving behind preprocessor directives.
1.8 deraadt 332: */
333: typedef void state_fn(void);
334:
335: /* report an error */
336: static void
337: Eelif(void)
338: {
339: error("Inappropriate #elif");
340: }
341:
342: static void
343: Eelse(void)
344: {
345: error("Inappropriate #else");
346: }
347:
348: static void
349: Eendif(void)
350: {
351: error("Inappropriate #endif");
352: }
353:
354: static void
355: Eeof(void)
356: {
357: error("Premature EOF");
358: }
359:
360: static void
361: Eioccc(void)
362: {
363: error("Obfuscated preprocessor control line");
364: }
365:
366: /* plain line handling */
367: static void
368: print(void)
369: {
370: flushline(true);
371: }
372:
373: static void
374: drop(void)
375: {
376: flushline(false);
377: }
378:
379: /* output lacks group's start line */
380: static void
381: Strue(void)
382: {
383: drop();
1.9 deraadt 384: ignoreoff();
1.8 deraadt 385: state(IS_TRUE_PREFIX);
386: }
387:
388: static void
389: Sfalse(void)
390: {
391: drop();
1.9 deraadt 392: ignoreoff();
1.8 deraadt 393: state(IS_FALSE_PREFIX);
394: }
395:
396: static void
397: Selse(void)
398: {
399: drop();
400: state(IS_TRUE_ELSE);
401: }
402:
403: /* print/pass this block */
404: static void
405: Pelif(void)
406: {
407: print();
1.9 deraadt 408: ignoreoff();
1.8 deraadt 409: state(IS_PASS_MIDDLE);
410: }
411:
412: static void
413: Pelse(void)
414: {
415: print();
416: state(IS_PASS_ELSE);
417: }
418:
419: static void
420: Pendif(void)
421: {
422: print();
423: --depth;
424: }
425:
426: /* discard this block */
427: static void
428: Dfalse(void)
429: {
430: drop();
1.9 deraadt 431: ignoreoff();
1.8 deraadt 432: state(IS_FALSE_TRAILER);
433: }
434:
435: static void
436: Delif(void)
437: {
438: drop();
1.9 deraadt 439: ignoreoff();
1.8 deraadt 440: state(IS_FALSE_MIDDLE);
441: }
442:
443: static void
444: Delse(void)
445: {
446: drop();
447: state(IS_FALSE_ELSE);
448: }
449:
450: static void
451: Dendif(void)
452: {
453: drop();
454: --depth;
455: }
456:
457: /* first line of group */
458: static void
459: Fdrop(void)
460: {
461: nest();
462: Dfalse();
463: }
464:
465: static void
466: Fpass(void)
467: {
468: nest();
469: Pelif();
470: }
471:
472: static void
473: Ftrue(void)
474: {
475: nest();
476: Strue();
477: }
478:
479: static void
480: Ffalse(void)
481: {
482: nest();
483: Sfalse();
484: }
485:
1.9 deraadt 486: /* variable pedantry for obfuscated lines */
487: static void
488: Oiffy(void)
489: {
490: if (iocccok)
491: Fpass();
492: else
493: Eioccc();
494: ignoreon();
495: }
496:
497: static void
498: Oif(void)
499: {
500: if (iocccok)
501: Fpass();
502: else
503: Eioccc();
504: }
505:
506: static void
507: Oelif(void)
508: {
509: if (iocccok)
510: Pelif();
511: else
512: Eioccc();
513: }
514:
1.8 deraadt 515: /* ignore comments in this block */
516: static void
517: Idrop(void)
518: {
519: Fdrop();
1.9 deraadt 520: ignoreon();
1.8 deraadt 521: }
522:
523: static void
1.9 deraadt 524: Itrue(void)
525: {
1.8 deraadt 526: Ftrue();
1.9 deraadt 527: ignoreon();
1.8 deraadt 528: }
529:
530: static void
531: Ifalse(void)
532: {
533: Ffalse();
1.9 deraadt 534: ignoreon();
1.8 deraadt 535: }
536:
1.9 deraadt 537: /* edit this line */
1.8 deraadt 538: static void
539: Mpass (void)
540: {
541: strncpy(keyword, "if ", 4);
542: Pelif();
543: }
544:
545: static void
546: Mtrue (void)
547: {
1.9 deraadt 548: keywordedit("else\n");
1.8 deraadt 549: state(IS_TRUE_MIDDLE);
550: }
551:
552: static void
553: Melif (void)
554: {
1.9 deraadt 555: keywordedit("endif\n");
1.8 deraadt 556: state(IS_FALSE_TRAILER);
557: }
558:
559: static void
560: Melse (void)
561: {
1.9 deraadt 562: keywordedit("endif\n");
1.8 deraadt 563: state(IS_FALSE_ELSE);
564: }
565:
566: static state_fn * const trans_table[IS_COUNT][LT_COUNT] = {
567: /* IS_OUTSIDE */
1.9 deraadt 568: { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Eendif,
569: Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eendif,
570: print, NULL },
1.8 deraadt 571: /* IS_FALSE_PREFIX */
1.9 deraadt 572: { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Strue, Sfalse,Selse, Dendif,
573: Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Eioccc,Eioccc,Eioccc,Eioccc,
574: drop, Eeof },
1.8 deraadt 575: /* IS_TRUE_PREFIX */
1.9 deraadt 576: { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Dfalse,Dfalse,Dfalse,Delse, Dendif,
577: Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
578: print, Eeof },
1.8 deraadt 579: /* IS_PASS_MIDDLE */
1.9 deraadt 580: { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Pelif, Mtrue, Delif, Pelse, Pendif,
581: Oiffy, Oiffy, Fpass, Oif, Oif, Pelif, Oelif, Oelif, Pelse, Pendif,
582: print, Eeof },
1.8 deraadt 583: /* IS_FALSE_MIDDLE */
1.9 deraadt 584: { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Pelif, Mtrue, Delif, Pelse, Pendif,
585: Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
586: drop, Eeof },
1.8 deraadt 587: /* IS_TRUE_MIDDLE */
1.9 deraadt 588: { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Melif, Melif, Melif, Melse, Pendif,
589: Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Pendif,
590: print, Eeof },
1.8 deraadt 591: /* IS_PASS_ELSE */
1.9 deraadt 592: { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Pendif,
593: Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Pendif,
594: print, Eeof },
1.8 deraadt 595: /* IS_FALSE_ELSE */
1.9 deraadt 596: { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Dendif,
597: Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Eioccc,
598: drop, Eeof },
1.8 deraadt 599: /* IS_TRUE_ELSE */
1.9 deraadt 600: { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Dendif,
601: Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eioccc,
602: print, Eeof },
1.8 deraadt 603: /* IS_FALSE_TRAILER */
1.9 deraadt 604: { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Dendif,
605: Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Eioccc,
606: drop, Eeof }
607: /*TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF
608: TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF (DODGY)
609: PLAIN EOF */
1.8 deraadt 610: };
611:
612: /*
613: * State machine utility functions
614: */
615: static void
1.9 deraadt 616: ignoreoff(void)
617: {
618: ignoring[depth] = ignoring[depth-1];
619: }
620:
621: static void
622: ignoreon(void)
623: {
624: ignoring[depth] = true;
625: }
626:
627: static void
628: keywordedit(const char *replacement)
629: {
630: strlcpy(keyword, replacement, tline + sizeof(tline) - keyword);
631: print();
632: }
633:
634: static void
1.8 deraadt 635: nest(void)
636: {
637: depth += 1;
638: if (depth >= MAXDEPTH)
639: error("Too many levels of nesting");
640: stifline[depth] = linenum;
641: }
642:
643: static void
644: state(Ifstate is)
645: {
646: ifstate[depth] = is;
647: }
648:
1.7 deraadt 649: /*
1.8 deraadt 650: * Write a line to the output or not, according to command line options.
651: */
652: static void
653: flushline(bool keep)
654: {
655: if (symlist)
656: return;
657: if (keep ^ complement)
658: fputs(tline, stdout);
659: else {
660: if (lnblank)
661: putc('\n', stdout);
662: exitstat = 1;
1.7 deraadt 663: }
664: }
1.3 deraadt 665:
1.7 deraadt 666: /*
1.8 deraadt 667: * The driver for the state machine.
1.7 deraadt 668: */
1.8 deraadt 669: static void
670: process(void)
1.7 deraadt 671: {
672: Linetype lineval;
1.8 deraadt 673: state_fn *trans;
1.7 deraadt 674:
675: for (;;) {
676: linenum++;
1.8 deraadt 677: lineval = getline();
678: trans = trans_table[ifstate[depth]][lineval];
679: if (trans == NULL)
680: break;
681: trans();
682: debug("process %s -> %s depth %d",
683: linetype_name[lineval],
684: ifstate_name[ifstate[depth]], depth);
1.1 deraadt 685: }
1.8 deraadt 686: if (incomment)
687: error("EOF in comment");
1.1 deraadt 688: }
689:
1.7 deraadt 690: /*
1.8 deraadt 691: * Parse a line and determine its type. We keep the preprocessor line
692: * parser state between calls in a global variable.
1.7 deraadt 693: */
1.8 deraadt 694: static Linetype
695: getline(void)
1.3 deraadt 696: {
1.7 deraadt 697: const char *cp;
1.8 deraadt 698: int cursym;
699: int kwlen;
1.3 deraadt 700: Linetype retval;
1.8 deraadt 701: Comment_state wascomment;
1.3 deraadt 702:
1.8 deraadt 703: if (fgets(tline, MAXLINE, input) == NULL)
704: return (LT_EOF);
1.3 deraadt 705: retval = LT_PLAIN;
1.8 deraadt 706: wascomment = incomment;
1.7 deraadt 707: cp = skipcomment(tline);
1.8 deraadt 708: if (linestate == LS_START) {
709: if (*cp == '#') {
710: linestate = LS_HASH;
711: cp = skipcomment(cp + 1);
712: } else if (*cp != '\0')
713: linestate = LS_DIRTY;
714: }
715: if (!incomment && linestate == LS_HASH) {
716: keyword = tline + (cp - tline);
717: cp = skipsym(cp);
718: kwlen = cp - keyword;
1.9 deraadt 719: /* no way can we deal with a continuation inside a keyword */
1.8 deraadt 720: if (strncmp(cp, "\\\n", 2) == 0)
721: Eioccc();
722: if (strlcmp("ifdef", keyword, kwlen) == 0 ||
723: strlcmp("ifndef", keyword, kwlen) == 0) {
724: cp = skipcomment(cp);
725: if ((cursym = findsym(cp)) < 0)
726: retval = LT_IF;
727: else {
728: retval = (keyword[2] == 'n')
729: ? LT_FALSE : LT_TRUE;
730: if (value[cursym] == NULL)
731: retval = (retval == LT_TRUE)
732: ? LT_FALSE : LT_TRUE;
733: if (ignore[cursym])
734: retval = (retval == LT_TRUE)
735: ? LT_TRUEI : LT_FALSEI;
736: }
737: cp = skipsym(cp);
738: } else if (strlcmp("if", keyword, kwlen) == 0)
739: retval = ifeval(&cp);
740: else if (strlcmp("elif", keyword, kwlen) == 0)
741: retval = ifeval(&cp) - LT_IF + LT_ELIF;
742: else if (strlcmp("else", keyword, kwlen) == 0)
743: retval = LT_ELSE;
744: else if (strlcmp("endif", keyword, kwlen) == 0)
745: retval = LT_ENDIF;
746: else {
747: linestate = LS_DIRTY;
1.7 deraadt 748: retval = LT_PLAIN;
749: }
750: cp = skipcomment(cp);
1.8 deraadt 751: if (*cp != '\0') {
752: linestate = LS_DIRTY;
753: if (retval == LT_TRUE || retval == LT_FALSE ||
754: retval == LT_TRUEI || retval == LT_FALSEI)
755: retval = LT_IF;
756: if (retval == LT_ELTRUE || retval == LT_ELFALSE)
757: retval = LT_ELIF;
1.3 deraadt 758: }
1.9 deraadt 759: if (retval != LT_PLAIN && (wascomment || incomment)) {
760: retval += LT_DODGY;
761: if (incomment)
762: linestate = LS_DIRTY;
763: }
764: /* skipcomment should have changed the state */
1.8 deraadt 765: if (linestate == LS_HASH)
766: abort(); /* bug */
1.7 deraadt 767: }
1.8 deraadt 768: if (linestate == LS_DIRTY) {
769: while (*cp != '\0')
770: cp = skipcomment(cp + 1);
771: }
772: debug("parser %s comment %s line",
773: comment_name[incomment], linestate_name[linestate]);
1.7 deraadt 774: return (retval);
775: }
776:
777: /*
1.9 deraadt 778: * These are the operators that are supported by the expression
779: * evaluator. Note that if support for division is added then we also
780: * need short-circuiting booleans because of divide-by-zero.
1.7 deraadt 781: */
1.8 deraadt 782: static int
783: op_lt(int a, int b)
784: {
785: return (a < b);
786: }
787:
788: static int
789: op_gt(int a, int b)
790: {
791: return (a > b);
792: }
793:
794: static int
795: op_le(int a, int b)
796: {
797: return (a <= b);
798: }
799:
800: static int
801: op_ge(int a, int b)
802: {
803: return (a >= b);
804: }
805:
806: static int
807: op_eq(int a, int b)
808: {
809: return (a == b);
810: }
811:
812: static int
813: op_ne(int a, int b)
1.7 deraadt 814: {
1.8 deraadt 815: return (a != b);
816: }
817:
818: static int
819: op_or(int a, int b)
820: {
821: return (a || b);
822: }
823:
824: static int
825: op_and(int a, int b)
826: {
827: return (a && b);
1.7 deraadt 828: }
829:
830: /*
1.8 deraadt 831: * An evaluation function takes three arguments, as follows: (1) a pointer to
832: * an element of the precedence table which lists the operators at the current
833: * level of precedence; (2) a pointer to an integer which will receive the
834: * value of the expression; and (3) a pointer to a char* that points to the
835: * expression to be evaluated and that is updated to the end of the expression
836: * when evaluation is complete. The function returns LT_FALSE if the value of
837: * the expression is zero, LT_TRUE if it is non-zero, or LT_IF if the
838: * expression could not be evaluated.
1.7 deraadt 839: */
1.8 deraadt 840: struct ops;
841:
842: typedef Linetype eval_fn(const struct ops *, int *, const char **);
843:
844: static eval_fn eval_table, eval_unary;
845:
846: /*
847: * The precedence table. Expressions involving binary operators are evaluated
848: * in a table-driven way by eval_table. When it evaluates a subexpression it
849: * calls the inner function with its first argument pointing to the next
850: * element of the table. Innermost expressions have special non-table-driven
851: * handling.
852: */
853: static const struct ops {
854: eval_fn *inner;
855: struct op {
856: const char *str;
857: int (*fn)(int, int);
858: } op[5];
859: } eval_ops[] = {
860: { eval_table, { { "||", op_or } } },
861: { eval_table, { { "&&", op_and } } },
862: { eval_table, { { "==", op_eq },
863: { "!=", op_ne } } },
864: { eval_unary, { { "<=", op_le },
865: { ">=", op_ge },
866: { "<", op_lt },
867: { ">", op_gt } } }
868: };
1.7 deraadt 869:
870: /*
871: * Function for evaluating the innermost parts of expressions,
872: * viz. !expr (expr) defined(symbol) symbol number
873: * We reset the keepthis flag when we find a non-constant subexpression.
874: */
1.8 deraadt 875: static Linetype
876: eval_unary(const struct ops *ops, int *valp, const char **cpp)
1.7 deraadt 877: {
878: const char *cp;
879: char *ep;
880: int sym;
881:
882: cp = skipcomment(*cpp);
1.8 deraadt 883: if (*cp == '!') {
1.7 deraadt 884: debug("eval%d !", ops - eval_ops);
885: cp++;
886: if (eval_unary(ops, valp, &cp) == LT_IF)
887: return (LT_IF);
888: *valp = !*valp;
889: } else if (*cp == '(') {
890: cp++;
891: debug("eval%d (", ops - eval_ops);
892: if (eval_table(eval_ops, valp, &cp) == LT_IF)
893: return (LT_IF);
894: cp = skipcomment(cp);
895: if (*cp++ != ')')
896: return (LT_IF);
897: } else if (isdigit((unsigned char)*cp)) {
898: debug("eval%d number", ops - eval_ops);
899: *valp = strtol(cp, &ep, 0);
900: cp = skipsym(cp);
901: } else if (strncmp(cp, "defined", 7) == 0 && endsym(cp[7])) {
902: cp = skipcomment(cp+7);
903: debug("eval%d defined", ops - eval_ops);
904: if (*cp++ != '(')
905: return (LT_IF);
906: cp = skipcomment(cp);
907: sym = findsym(cp);
1.8 deraadt 908: if (sym < 0 && !symlist)
1.7 deraadt 909: return (LT_IF);
910: *valp = (value[sym] != NULL);
911: cp = skipsym(cp);
912: cp = skipcomment(cp);
913: if (*cp++ != ')')
914: return (LT_IF);
915: keepthis = false;
916: } else if (!endsym(*cp)) {
917: debug("eval%d symbol", ops - eval_ops);
918: sym = findsym(cp);
1.8 deraadt 919: if (sym < 0 && !symlist)
1.7 deraadt 920: return (LT_IF);
921: if (value[sym] == NULL)
922: *valp = 0;
923: else {
924: *valp = strtol(value[sym], &ep, 0);
925: if (*ep != '\0' || ep == value[sym])
926: return (LT_IF);
927: }
928: cp = skipsym(cp);
929: keepthis = false;
930: } else
931: return (LT_IF);
932:
933: *cpp = cp;
934: debug("eval%d = %d", ops - eval_ops, *valp);
935: return (*valp ? LT_TRUE : LT_FALSE);
936: }
937:
938: /*
939: * Table-driven evaluation of binary operators.
940: */
1.8 deraadt 941: static Linetype
942: eval_table(const struct ops *ops, int *valp, const char **cpp)
1.7 deraadt 943: {
1.8 deraadt 944: const struct op *op;
1.7 deraadt 945: const char *cp;
946: int val;
947:
948: debug("eval%d", ops - eval_ops);
949: cp = *cpp;
950: if (ops->inner(ops+1, valp, &cp) == LT_IF)
951: return (LT_IF);
952: for (;;) {
953: cp = skipcomment(cp);
954: for (op = ops->op; op->str != NULL; op++)
955: if (strncmp(cp, op->str, strlen(op->str)) == 0)
956: break;
957: if (op->str == NULL)
958: break;
959: cp += strlen(op->str);
960: debug("eval%d %s", ops - eval_ops, op->str);
961: if (ops->inner(ops+1, &val, &cp) == LT_IF)
1.8 deraadt 962: return (LT_IF);
1.7 deraadt 963: *valp = op->fn(*valp, val);
964: }
965:
966: *cpp = cp;
967: debug("eval%d = %d", ops - eval_ops, *valp);
968: return (*valp ? LT_TRUE : LT_FALSE);
1.1 deraadt 969: }
1.7 deraadt 970:
1.1 deraadt 971: /*
1.7 deraadt 972: * Evaluate the expression on a #if or #elif line. If we can work out
973: * the result we return LT_TRUE or LT_FALSE accordingly, otherwise we
1.8 deraadt 974: * return just a generic LT_IF.
1.1 deraadt 975: */
1.8 deraadt 976: static Linetype
1.7 deraadt 977: ifeval(const char **cpp)
978: {
1.8 deraadt 979: int ret;
1.7 deraadt 980: int val;
981:
982: debug("eval %s", *cpp);
983: keepthis = killconsts ? false : true;
1.8 deraadt 984: ret = eval_table(eval_ops, &val, cpp);
985: return (keepthis ? LT_IF : ret);
1.7 deraadt 986: }
987:
988: /*
989: * Skip over comments and stop at the next character position that is
1.11 avsm 990: * not whitespace. Between calls we keep the comment state in the
991: * global variable incomment, and we also adjust the global variable
992: * linestate when we see a newline.
1.8 deraadt 993: * XXX: doesn't cope with the buffer splitting inside a state transition.
1.7 deraadt 994: */
1.8 deraadt 995: static const char *
1.7 deraadt 996: skipcomment(const char *cp)
1.3 deraadt 997: {
1.8 deraadt 998: if (text || ignoring[depth]) {
1.11 avsm 999: for (; isspace((unsigned char)*cp); cp++)
1000: if (*cp == '\n')
1001: linestate = LS_START;
1.8 deraadt 1002: return (cp);
1003: }
1004: while (*cp != '\0')
1005: if (strncmp(cp, "\\\n", 2) == 0)
1006: cp += 2;
1007: else switch (incomment) {
1008: case NO_COMMENT:
1009: if (strncmp(cp, "/\\\n", 3) == 0) {
1010: incomment = STARTING_COMMENT;
1011: cp += 3;
1012: } else if (strncmp(cp, "/*", 2) == 0) {
1.3 deraadt 1013: incomment = C_COMMENT;
1.8 deraadt 1014: cp += 2;
1015: } else if (strncmp(cp, "//", 2) == 0) {
1016: incomment = CXX_COMMENT;
1017: cp += 2;
1018: } else if (strncmp(cp, "\n", 1) == 0) {
1019: linestate = LS_START;
1020: cp += 1;
1021: } else if (strchr(" \t", *cp) != NULL) {
1022: cp += 1;
1023: } else
1024: return (cp);
1025: continue;
1026: case CXX_COMMENT:
1027: if (strncmp(cp, "\n", 1) == 0) {
1028: incomment = NO_COMMENT;
1029: linestate = LS_START;
1.3 deraadt 1030: }
1.8 deraadt 1031: cp += 1;
1032: continue;
1033: case C_COMMENT:
1034: if (strncmp(cp, "*\\\n", 3) == 0) {
1035: incomment = FINISHING_COMMENT;
1036: cp += 3;
1037: } else if (strncmp(cp, "*/", 2) == 0) {
1038: incomment = NO_COMMENT;
1039: cp += 2;
1040: } else
1041: cp += 1;
1042: continue;
1043: case STARTING_COMMENT:
1044: if (*cp == '*') {
1045: incomment = C_COMMENT;
1046: cp += 1;
1047: } else if (*cp == '/') {
1.3 deraadt 1048: incomment = CXX_COMMENT;
1.8 deraadt 1049: cp += 1;
1050: } else {
1051: incomment = NO_COMMENT;
1052: linestate = LS_DIRTY;
1.3 deraadt 1053: }
1.8 deraadt 1054: continue;
1055: case FINISHING_COMMENT:
1056: if (*cp == '/') {
1057: incomment = NO_COMMENT;
1058: cp += 1;
1059: } else
1060: incomment = C_COMMENT;
1061: continue;
1062: default:
1063: /* bug */
1064: abort();
1.3 deraadt 1065: }
1.8 deraadt 1066: return (cp);
1.1 deraadt 1067: }
1.7 deraadt 1068:
1069: /*
1070: * Skip over an identifier.
1071: */
1.8 deraadt 1072: static const char *
1.7 deraadt 1073: skipsym(const char *cp)
1074: {
1075: while (!endsym(*cp))
1076: ++cp;
1077: return (cp);
1078: }
1079:
1.1 deraadt 1080: /*
1.13 ! jmc 1081: * Look for the symbol in the symbol table. If it is found, we return
1.8 deraadt 1082: * the symbol table index, else we return -1.
1.1 deraadt 1083: */
1.8 deraadt 1084: static int
1.7 deraadt 1085: findsym(const char *str)
1.1 deraadt 1086: {
1.7 deraadt 1087: const char *cp;
1.3 deraadt 1088: int symind;
1089:
1.8 deraadt 1090: cp = skipsym(str);
1091: if (cp == str)
1092: return (-1);
1093: if (symlist)
1.7 deraadt 1094: printf("%.*s\n", (int)(cp-str), str);
1.8 deraadt 1095: for (symind = 0; symind < nsyms; ++symind) {
1096: if (strlcmp(symname[symind], str, cp-str) == 0) {
1.7 deraadt 1097: debug("findsym %s %s", symname[symind],
1098: value[symind] ? value[symind] : "");
1099: return (symind);
1.3 deraadt 1100: }
1.1 deraadt 1101: }
1.8 deraadt 1102: return (-1);
1.1 deraadt 1103: }
1.7 deraadt 1104:
1.1 deraadt 1105: /*
1.7 deraadt 1106: * Add a symbol to the symbol table.
1107: */
1.8 deraadt 1108: static void
1.7 deraadt 1109: addsym(bool ignorethis, bool definethis, char *sym)
1110: {
1111: int symind;
1112: char *val;
1113:
1114: symind = findsym(sym);
1.8 deraadt 1115: if (symind < 0) {
1.7 deraadt 1116: if (nsyms >= MAXSYMS)
1117: errx(2, "too many symbols");
1118: symind = nsyms++;
1119: }
1120: symname[symind] = sym;
1121: ignore[symind] = ignorethis;
1.8 deraadt 1122: val = sym + (skipsym(sym) - sym);
1.7 deraadt 1123: if (definethis) {
1124: if (*val == '=') {
1125: value[symind] = val+1;
1126: *val = '\0';
1127: } else if (*val == '\0')
1128: value[symind] = "";
1129: else
1130: usage();
1131: } else {
1132: if (*val != '\0')
1133: usage();
1134: value[symind] = NULL;
1135: }
1136: }
1137:
1138: /*
1.8 deraadt 1139: * Compare s with n characters of t.
1140: * The same as strncmp() except that it checks that s[n] == '\0'.
1.1 deraadt 1141: */
1.8 deraadt 1142: static int
1143: strlcmp(const char *s, const char *t, size_t n)
1.3 deraadt 1144: {
1.8 deraadt 1145: while (n-- && *t != '\0')
1146: if (*s != *t)
1147: return ((unsigned char)*s - (unsigned char)*t);
1148: else
1149: ++s, ++t;
1150: return ((unsigned char)*s);
1.1 deraadt 1151: }
1152:
1.7 deraadt 1153: /*
1.8 deraadt 1154: * Diagnostics.
1.7 deraadt 1155: */
1.8 deraadt 1156: static void
1.7 deraadt 1157: debug(const char *msg, ...)
1.1 deraadt 1158: {
1.7 deraadt 1159: va_list ap;
1160:
1161: if (debugging) {
1162: va_start(ap, msg);
1163: vwarnx(msg, ap);
1164: va_end(ap);
1165: }
1.1 deraadt 1166: }
1167:
1.8 deraadt 1168: static void
1169: error(const char *msg)
1.7 deraadt 1170: {
1.8 deraadt 1171: if (depth == 0)
1.9 deraadt 1172: warnx("%s: %d: %s", filename, linenum, msg);
1.7 deraadt 1173: else
1.9 deraadt 1174: warnx("%s: %d: %s (#if line %d depth %d)",
1.8 deraadt 1175: filename, linenum, msg, stifline[depth], depth);
1.9 deraadt 1176: errx(2, "output may be truncated");
1.1 deraadt 1177: }