Annotation of src/usr.bin/unifdef/unifdef.c, Revision 1.10
1.10 ! millert 1: /* $OpenBSD: unifdef.c,v 1.9 2003/01/22 18:26:15 deraadt Exp $ */
1.1 deraadt 2: /*
3: * Copyright (c) 1985, 1993
4: * The Regents of the University of California. All rights reserved.
5: *
6: * This code is derived from software contributed to Berkeley by
1.7 deraadt 7: * Dave Yost. Support for #if and #elif was added by Tony Finch.
1.1 deraadt 8: *
9: * Redistribution and use in source and binary forms, with or without
10: * modification, are permitted provided that the following conditions
11: * are met:
12: * 1. Redistributions of source code must retain the above copyright
13: * notice, this list of conditions and the following disclaimer.
14: * 2. Redistributions in binary form must reproduce the above copyright
15: * notice, this list of conditions and the following disclaimer in the
16: * documentation and/or other materials provided with the distribution.
1.10 ! millert 17: * 3. Neither the name of the University nor the names of its contributors
1.1 deraadt 18: * may be used to endorse or promote products derived from this software
19: * without specific prior written permission.
20: *
21: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31: * SUCH DAMAGE.
32: */
33:
34: #ifndef lint
1.7 deraadt 35: static const char copyright[] =
1.1 deraadt 36: "@(#) Copyright (c) 1985, 1993\n\
37: The Regents of the University of California. All rights reserved.\n";
38:
39: #if 0
40: static char sccsid[] = "@(#)unifdef.c 8.1 (Berkeley) 6/6/93";
41: #endif
1.10 ! millert 42: static const char rcsid[] = "$OpenBSD: unifdef.c,v 1.9 2003/01/22 18:26:15 deraadt Exp $";
1.7 deraadt 43: #endif
1.1 deraadt 44:
45: /*
46: * unifdef - remove ifdef'ed lines
47: *
48: * Wishlist:
49: * provide an option which will append the name of the
50: * appropriate symbol after #else's and #endif's
51: * provide an option which will check symbols after
52: * #else's and #endif's to see that they match their
53: * corresponding #ifdef or #ifndef
1.7 deraadt 54: * generate #line directives in place of deleted code
1.9 deraadt 55: *
56: * The first two items above require better buffer handling, which would
57: * also make it possible to handle all "dodgy" directives correctly.
1.1 deraadt 58: */
59:
1.7 deraadt 60: #include <ctype.h>
61: #include <err.h>
62: #include <stdarg.h>
63: #include <stdbool.h>
1.1 deraadt 64: #include <stdio.h>
1.7 deraadt 65: #include <stdlib.h>
66: #include <string.h>
67: #include <unistd.h>
1.1 deraadt 68:
1.7 deraadt 69: /* types of input lines: */
70: typedef enum {
1.8 deraadt 71: LT_TRUEI, /* a true #if with ignore flag */
72: LT_FALSEI, /* a false #if with ignore flag */
73: LT_IF, /* an unknown #if */
1.7 deraadt 74: LT_TRUE, /* a true #if */
75: LT_FALSE, /* a false #if */
1.8 deraadt 76: LT_ELIF, /* an unknown #elif */
1.7 deraadt 77: LT_ELTRUE, /* a true #elif */
78: LT_ELFALSE, /* a false #elif */
79: LT_ELSE, /* #else */
80: LT_ENDIF, /* #endif */
1.9 deraadt 81: LT_DODGY, /* flag: directive is not on one line */
82: LT_DODGY_LAST = LT_DODGY + LT_ENDIF,
83: LT_PLAIN, /* ordinary line */
1.8 deraadt 84: LT_EOF, /* end of file */
85: LT_COUNT
1.7 deraadt 86: } Linetype;
87:
1.8 deraadt 88: static char const * const linetype_name[] = {
1.9 deraadt 89: "TRUEI", "FALSEI", "IF", "TRUE", "FALSE",
90: "ELIF", "ELTRUE", "ELFALSE", "ELSE", "ENDIF",
91: "DODGY TRUEI", "DODGY FALSEI",
92: "DODGY IF", "DODGY TRUE", "DODGY FALSE",
93: "DODGY ELIF", "DODGY ELTRUE", "DODGY ELFALSE",
94: "DODGY ELSE", "DODGY ENDIF",
95: "PLAIN", "EOF"
1.8 deraadt 96: };
1.7 deraadt 97:
1.8 deraadt 98: /* state of #if processing */
1.7 deraadt 99: typedef enum {
1.8 deraadt 100: IS_OUTSIDE,
101: IS_FALSE_PREFIX, /* false #if followed by false #elifs */
102: IS_TRUE_PREFIX, /* first non-false #(el)if is true */
103: IS_PASS_MIDDLE, /* first non-false #(el)if is unknown */
104: IS_FALSE_MIDDLE, /* a false #elif after a pass state */
105: IS_TRUE_MIDDLE, /* a true #elif after a pass state */
106: IS_PASS_ELSE, /* an else after a pass state */
107: IS_FALSE_ELSE, /* an else after a true state */
108: IS_TRUE_ELSE, /* an else after only false states */
109: IS_FALSE_TRAILER, /* #elifs after a true are false */
110: IS_COUNT
111: } Ifstate;
112:
113: static char const * const ifstate_name[] = {
114: "OUTSIDE", "FALSE_PREFIX", "TRUE_PREFIX",
115: "PASS_MIDDLE", "FALSE_MIDDLE", "TRUE_MIDDLE",
116: "PASS_ELSE", "FALSE_ELSE", "TRUE_ELSE",
117: "FALSE_TRAILER"
118: };
119:
120: /* state of comment parser */
121: typedef enum {
122: NO_COMMENT = false, /* outside a comment */
123: C_COMMENT, /* in a comment like this one */
124: CXX_COMMENT, /* between // and end of line */
125: STARTING_COMMENT, /* just after slash-backslash-newline */
126: FINISHING_COMMENT /* star-backslash-newline in a C comment */
1.7 deraadt 127: } Comment_state;
128:
1.8 deraadt 129: static char const * const comment_name[] = {
130: "NO", "C", "CXX", "STARTING", "FINISHING"
1.1 deraadt 131: };
1.7 deraadt 132:
1.8 deraadt 133: /* state of preprocessor line parser */
134: typedef enum {
135: LS_START, /* only space and comments on this line */
136: LS_HASH, /* only space, comments, and a hash */
137: LS_DIRTY /* this line can't be a preprocessor line */
138: } Line_state;
1.7 deraadt 139:
1.8 deraadt 140: static char const * const linestate_name[] = {
141: "START", "HASH", "DIRTY"
142: };
1.7 deraadt 143:
144: /*
1.8 deraadt 145: * Minimum translation limits from ISO/IEC 9899:1999 5.2.4.1
1.7 deraadt 146: */
1.8 deraadt 147: #define MAXDEPTH 64 /* maximum #if nesting */
148: #define MAXLINE 4096 /* maximum length of line */
149: #define MAXSYMS 4096 /* maximum number of symbols */
1.7 deraadt 150:
151: /*
1.9 deraadt 152: * Sometimes when editing a keyword the replacement text is longer, so
153: * we leave some space at the end of the tline buffer to accommodate this.
154: */
155: #define EDITSLOP 10
156:
157: /*
1.8 deraadt 158: * Globals.
1.7 deraadt 159: */
160:
1.8 deraadt 161: static bool complement; /* -c: do the complement */
162: static bool debugging; /* -d: debugging reports */
1.9 deraadt 163: static bool iocccok; /* -e: fewer IOCCC errors */
1.8 deraadt 164: static bool killconsts; /* -k: eval constant #ifs */
165: static bool lnblank; /* -l: blank deleted lines */
166: static bool symlist; /* -s: output symbol list */
167: static bool text; /* -t: this is a text file */
168:
169: static const char *symname[MAXSYMS]; /* symbol name */
170: static const char *value[MAXSYMS]; /* -Dsym=value */
171: static bool ignore[MAXSYMS]; /* -iDsym or -iUsym */
172: static int nsyms; /* number of symbols */
173:
174: static FILE *input; /* input file pointer */
175: static const char *filename; /* input file name */
176: static int linenum; /* current line number */
177:
1.9 deraadt 178: static char tline[MAXLINE+EDITSLOP];/* input buffer plus space */
1.8 deraadt 179: static char *keyword; /* used for editing #elif's */
180:
181: static Comment_state incomment; /* comment parser state */
182: static Line_state linestate; /* #if line parser state */
183: static Ifstate ifstate[MAXDEPTH]; /* #if processor state */
184: static bool ignoring[MAXDEPTH]; /* ignore comments state */
185: static int stifline[MAXDEPTH]; /* start of current #if */
186: static int depth; /* current #if nesting */
187: static bool keepthis; /* don't delete constant #if */
188:
189: static int exitstat; /* program exit status */
190:
191: static void addsym(bool, bool, char *);
192: static void debug(const char *, ...);
193: static void error(const char *);
194: static int findsym(const char *);
195: static void flushline(bool);
196: static Linetype getline(void);
197: static Linetype ifeval(const char **);
1.9 deraadt 198: static void ignoreoff(void);
199: static void ignoreon(void);
200: static void keywordedit(const char *);
1.8 deraadt 201: static void nest(void);
202: static void process(void);
203: static const char *skipcomment(const char *);
204: static const char *skipsym(const char *);
205: static void state(Ifstate);
206: static int strlcmp(const char *, const char *, size_t);
207: static void usage(void);
1.7 deraadt 208:
209: #define endsym(c) (!isalpha((unsigned char)c) && !isdigit((unsigned char)c) && c != '_')
210:
1.8 deraadt 211: /*
212: * The main program.
213: */
1.7 deraadt 214: int
215: main(int argc, char *argv[])
216: {
217: int opt;
218:
1.9 deraadt 219: while ((opt = getopt(argc, argv, "i:D:U:cdeklst")) != -1)
1.7 deraadt 220: switch (opt) {
221: case 'i': /* treat stuff controlled by these symbols as text */
222: /*
223: * For strict backwards-compatibility the U or D
224: * should be immediately after the -i but it doesn't
225: * matter much if we relax that requirement.
226: */
227: opt = *optarg++;
228: if (opt == 'D')
229: addsym(true, true, optarg);
230: else if (opt == 'U')
231: addsym(true, false, optarg);
232: else
233: usage();
234: break;
235: case 'D': /* define a symbol */
236: addsym(false, true, optarg);
237: break;
238: case 'U': /* undef a symbol */
239: addsym(false, false, optarg);
240: break;
241: case 'c': /* treat -D as -U and vice versa */
242: complement = true;
243: break;
1.8 deraadt 244: case 'd':
245: debugging = true;
246: break;
1.9 deraadt 247: case 'e': /* fewer errors from dodgy lines */
248: iocccok = true;
249: break;
1.7 deraadt 250: case 'k': /* process constant #ifs */
251: killconsts = true;
252: break;
253: case 'l': /* blank deleted lines instead of omitting them */
254: lnblank = true;
255: break;
256: case 's': /* only output list of symbols that control #ifs */
257: symlist = true;
258: break;
1.8 deraadt 259: case 't': /* don't parse C comments */
1.7 deraadt 260: text = true;
261: break;
262: default:
263: usage();
264: }
265: argc -= optind;
266: argv += optind;
1.8 deraadt 267: if (nsyms == 0 && !symlist) {
1.7 deraadt 268: warnx("must -D or -U at least one symbol");
269: usage();
270: }
271: if (argc > 1) {
272: errx(2, "can only do one file");
273: } else if (argc == 1 && strcmp(*argv, "-") != 0) {
274: filename = *argv;
275: if ((input = fopen(filename, "r")) != NULL) {
1.8 deraadt 276: process();
1.7 deraadt 277: (void) fclose(input);
278: } else
279: err(2, "can't open %s", *argv);
280: } else {
281: filename = "[stdin]";
282: input = stdin;
1.8 deraadt 283: process();
1.7 deraadt 284: }
285:
1.8 deraadt 286: exit(exitstat);
1.7 deraadt 287: }
1.1 deraadt 288:
1.8 deraadt 289: static void
1.7 deraadt 290: usage(void)
1.1 deraadt 291: {
1.9 deraadt 292: fprintf(stderr, "usage: unifdef [-cdeklst]"
293: " [-Dsym[=val]] [-Usym] [-iDsym[=val]] [-iUsym] ... [file]\n");
1.8 deraadt 294: exit(2);
295: }
296:
297: /*
298: * A state transition function alters the global #if processing state
299: * in a particular way. The table below is indexed by the current
300: * processing state and the type of the current line. A NULL entry
1.9 deraadt 301: * indicates that processing is complete.
1.8 deraadt 302: *
303: * Nesting is handled by keeping a stack of states; some transition
1.9 deraadt 304: * functions increase or decrease the depth. They also maintain the
1.8 deraadt 305: * ignore state on a stack. In some complicated cases they have to
306: * alter the preprocessor directive, as follows.
307: *
308: * When we have processed a group that starts off with a known-false
309: * #if/#elif sequence (which has therefore been deleted) followed by a
1.9 deraadt 310: * #elif that we don't understand and therefore must keep, we edit the
1.8 deraadt 311: * latter into a #if to keep the nesting correct.
312: *
313: * When we find a true #elif in a group, the following block will
314: * always be kept and the rest of the sequence after the next #elif or
1.9 deraadt 315: * #else will be discarded. We edit the #elif into a #else and the
1.8 deraadt 316: * following directive to #endif since this has the desired behaviour.
1.9 deraadt 317: *
318: * "Dodgy" directives are split across multiple lines, the most common
319: * example being a multi-line comment hanging off the right of the
320: * directive. We can handle them correctly only if there is no change
321: * from printing to dropping (or vice versa) caused by that directive.
322: * If the directive is the first of a group we have a choice between
323: * failing with an error, or passing it through unchanged instead of
324: * evaluating it. The latter is not the default to avoid questions from
325: * users about unifdef unexpectedly leaving behind preprocessor directives.
1.8 deraadt 326: */
327: typedef void state_fn(void);
328:
329: /* report an error */
330: static void
331: Eelif(void)
332: {
333: error("Inappropriate #elif");
334: }
335:
336: static void
337: Eelse(void)
338: {
339: error("Inappropriate #else");
340: }
341:
342: static void
343: Eendif(void)
344: {
345: error("Inappropriate #endif");
346: }
347:
348: static void
349: Eeof(void)
350: {
351: error("Premature EOF");
352: }
353:
354: static void
355: Eioccc(void)
356: {
357: error("Obfuscated preprocessor control line");
358: }
359:
360: /* plain line handling */
361: static void
362: print(void)
363: {
364: flushline(true);
365: }
366:
367: static void
368: drop(void)
369: {
370: flushline(false);
371: }
372:
373: /* output lacks group's start line */
374: static void
375: Strue(void)
376: {
377: drop();
1.9 deraadt 378: ignoreoff();
1.8 deraadt 379: state(IS_TRUE_PREFIX);
380: }
381:
382: static void
383: Sfalse(void)
384: {
385: drop();
1.9 deraadt 386: ignoreoff();
1.8 deraadt 387: state(IS_FALSE_PREFIX);
388: }
389:
390: static void
391: Selse(void)
392: {
393: drop();
394: state(IS_TRUE_ELSE);
395: }
396:
397: /* print/pass this block */
398: static void
399: Pelif(void)
400: {
401: print();
1.9 deraadt 402: ignoreoff();
1.8 deraadt 403: state(IS_PASS_MIDDLE);
404: }
405:
406: static void
407: Pelse(void)
408: {
409: print();
410: state(IS_PASS_ELSE);
411: }
412:
413: static void
414: Pendif(void)
415: {
416: print();
417: --depth;
418: }
419:
420: /* discard this block */
421: static void
422: Dfalse(void)
423: {
424: drop();
1.9 deraadt 425: ignoreoff();
1.8 deraadt 426: state(IS_FALSE_TRAILER);
427: }
428:
429: static void
430: Delif(void)
431: {
432: drop();
1.9 deraadt 433: ignoreoff();
1.8 deraadt 434: state(IS_FALSE_MIDDLE);
435: }
436:
437: static void
438: Delse(void)
439: {
440: drop();
441: state(IS_FALSE_ELSE);
442: }
443:
444: static void
445: Dendif(void)
446: {
447: drop();
448: --depth;
449: }
450:
451: /* first line of group */
452: static void
453: Fdrop(void)
454: {
455: nest();
456: Dfalse();
457: }
458:
459: static void
460: Fpass(void)
461: {
462: nest();
463: Pelif();
464: }
465:
466: static void
467: Ftrue(void)
468: {
469: nest();
470: Strue();
471: }
472:
473: static void
474: Ffalse(void)
475: {
476: nest();
477: Sfalse();
478: }
479:
1.9 deraadt 480: /* variable pedantry for obfuscated lines */
481: static void
482: Oiffy(void)
483: {
484: if (iocccok)
485: Fpass();
486: else
487: Eioccc();
488: ignoreon();
489: }
490:
491: static void
492: Oif(void)
493: {
494: if (iocccok)
495: Fpass();
496: else
497: Eioccc();
498: }
499:
500: static void
501: Oelif(void)
502: {
503: if (iocccok)
504: Pelif();
505: else
506: Eioccc();
507: }
508:
1.8 deraadt 509: /* ignore comments in this block */
510: static void
511: Idrop(void)
512: {
513: Fdrop();
1.9 deraadt 514: ignoreon();
1.8 deraadt 515: }
516:
517: static void
1.9 deraadt 518: Itrue(void)
519: {
1.8 deraadt 520: Ftrue();
1.9 deraadt 521: ignoreon();
1.8 deraadt 522: }
523:
524: static void
525: Ifalse(void)
526: {
527: Ffalse();
1.9 deraadt 528: ignoreon();
1.8 deraadt 529: }
530:
1.9 deraadt 531: /* edit this line */
1.8 deraadt 532: static void
533: Mpass (void)
534: {
535: strncpy(keyword, "if ", 4);
536: Pelif();
537: }
538:
539: static void
540: Mtrue (void)
541: {
1.9 deraadt 542: keywordedit("else\n");
1.8 deraadt 543: state(IS_TRUE_MIDDLE);
544: }
545:
546: static void
547: Melif (void)
548: {
1.9 deraadt 549: keywordedit("endif\n");
1.8 deraadt 550: state(IS_FALSE_TRAILER);
551: }
552:
553: static void
554: Melse (void)
555: {
1.9 deraadt 556: keywordedit("endif\n");
1.8 deraadt 557: state(IS_FALSE_ELSE);
558: }
559:
560: static state_fn * const trans_table[IS_COUNT][LT_COUNT] = {
561: /* IS_OUTSIDE */
1.9 deraadt 562: { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Eendif,
563: Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eendif,
564: print, NULL },
1.8 deraadt 565: /* IS_FALSE_PREFIX */
1.9 deraadt 566: { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Strue, Sfalse,Selse, Dendif,
567: Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Eioccc,Eioccc,Eioccc,Eioccc,
568: drop, Eeof },
1.8 deraadt 569: /* IS_TRUE_PREFIX */
1.9 deraadt 570: { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Dfalse,Dfalse,Dfalse,Delse, Dendif,
571: Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
572: print, Eeof },
1.8 deraadt 573: /* IS_PASS_MIDDLE */
1.9 deraadt 574: { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Pelif, Mtrue, Delif, Pelse, Pendif,
575: Oiffy, Oiffy, Fpass, Oif, Oif, Pelif, Oelif, Oelif, Pelse, Pendif,
576: print, Eeof },
1.8 deraadt 577: /* IS_FALSE_MIDDLE */
1.9 deraadt 578: { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Pelif, Mtrue, Delif, Pelse, Pendif,
579: Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
580: drop, Eeof },
1.8 deraadt 581: /* IS_TRUE_MIDDLE */
1.9 deraadt 582: { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Melif, Melif, Melif, Melse, Pendif,
583: Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Pendif,
584: print, Eeof },
1.8 deraadt 585: /* IS_PASS_ELSE */
1.9 deraadt 586: { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Pendif,
587: Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Pendif,
588: print, Eeof },
1.8 deraadt 589: /* IS_FALSE_ELSE */
1.9 deraadt 590: { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Dendif,
591: Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Eioccc,
592: drop, Eeof },
1.8 deraadt 593: /* IS_TRUE_ELSE */
1.9 deraadt 594: { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Dendif,
595: Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eioccc,
596: print, Eeof },
1.8 deraadt 597: /* IS_FALSE_TRAILER */
1.9 deraadt 598: { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Dendif,
599: Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Eioccc,
600: drop, Eeof }
601: /*TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF
602: TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF (DODGY)
603: PLAIN EOF */
1.8 deraadt 604: };
605:
606: /*
607: * State machine utility functions
608: */
609: static void
1.9 deraadt 610: ignoreoff(void)
611: {
612: ignoring[depth] = ignoring[depth-1];
613: }
614:
615: static void
616: ignoreon(void)
617: {
618: ignoring[depth] = true;
619: }
620:
621: static void
622: keywordedit(const char *replacement)
623: {
624: strlcpy(keyword, replacement, tline + sizeof(tline) - keyword);
625: print();
626: }
627:
628: static void
1.8 deraadt 629: nest(void)
630: {
631: depth += 1;
632: if (depth >= MAXDEPTH)
633: error("Too many levels of nesting");
634: stifline[depth] = linenum;
635: }
636:
637: static void
638: state(Ifstate is)
639: {
640: ifstate[depth] = is;
641: }
642:
1.7 deraadt 643: /*
1.8 deraadt 644: * Write a line to the output or not, according to command line options.
645: */
646: static void
647: flushline(bool keep)
648: {
649: if (symlist)
650: return;
651: if (keep ^ complement)
652: fputs(tline, stdout);
653: else {
654: if (lnblank)
655: putc('\n', stdout);
656: exitstat = 1;
1.7 deraadt 657: }
658: }
1.3 deraadt 659:
1.7 deraadt 660: /*
1.8 deraadt 661: * The driver for the state machine.
1.7 deraadt 662: */
1.8 deraadt 663: static void
664: process(void)
1.7 deraadt 665: {
666: Linetype lineval;
1.8 deraadt 667: state_fn *trans;
1.7 deraadt 668:
669: for (;;) {
670: linenum++;
1.8 deraadt 671: lineval = getline();
672: trans = trans_table[ifstate[depth]][lineval];
673: if (trans == NULL)
674: break;
675: trans();
676: debug("process %s -> %s depth %d",
677: linetype_name[lineval],
678: ifstate_name[ifstate[depth]], depth);
1.1 deraadt 679: }
1.8 deraadt 680: if (incomment)
681: error("EOF in comment");
1.1 deraadt 682: }
683:
1.7 deraadt 684: /*
1.8 deraadt 685: * Parse a line and determine its type. We keep the preprocessor line
686: * parser state between calls in a global variable.
1.7 deraadt 687: */
1.8 deraadt 688: static Linetype
689: getline(void)
1.3 deraadt 690: {
1.7 deraadt 691: const char *cp;
1.8 deraadt 692: int cursym;
693: int kwlen;
1.3 deraadt 694: Linetype retval;
1.8 deraadt 695: Comment_state wascomment;
1.3 deraadt 696:
1.8 deraadt 697: if (fgets(tline, MAXLINE, input) == NULL)
698: return (LT_EOF);
1.3 deraadt 699: retval = LT_PLAIN;
1.8 deraadt 700: wascomment = incomment;
1.7 deraadt 701: cp = skipcomment(tline);
1.8 deraadt 702: if (linestate == LS_START) {
703: if (*cp == '#') {
704: linestate = LS_HASH;
705: cp = skipcomment(cp + 1);
706: } else if (*cp != '\0')
707: linestate = LS_DIRTY;
708: }
709: if (!incomment && linestate == LS_HASH) {
710: keyword = tline + (cp - tline);
711: cp = skipsym(cp);
712: kwlen = cp - keyword;
1.9 deraadt 713: /* no way can we deal with a continuation inside a keyword */
1.8 deraadt 714: if (strncmp(cp, "\\\n", 2) == 0)
715: Eioccc();
716: if (strlcmp("ifdef", keyword, kwlen) == 0 ||
717: strlcmp("ifndef", keyword, kwlen) == 0) {
718: cp = skipcomment(cp);
719: if ((cursym = findsym(cp)) < 0)
720: retval = LT_IF;
721: else {
722: retval = (keyword[2] == 'n')
723: ? LT_FALSE : LT_TRUE;
724: if (value[cursym] == NULL)
725: retval = (retval == LT_TRUE)
726: ? LT_FALSE : LT_TRUE;
727: if (ignore[cursym])
728: retval = (retval == LT_TRUE)
729: ? LT_TRUEI : LT_FALSEI;
730: }
731: cp = skipsym(cp);
732: } else if (strlcmp("if", keyword, kwlen) == 0)
733: retval = ifeval(&cp);
734: else if (strlcmp("elif", keyword, kwlen) == 0)
735: retval = ifeval(&cp) - LT_IF + LT_ELIF;
736: else if (strlcmp("else", keyword, kwlen) == 0)
737: retval = LT_ELSE;
738: else if (strlcmp("endif", keyword, kwlen) == 0)
739: retval = LT_ENDIF;
740: else {
741: linestate = LS_DIRTY;
1.7 deraadt 742: retval = LT_PLAIN;
743: }
744: cp = skipcomment(cp);
1.8 deraadt 745: if (*cp != '\0') {
746: linestate = LS_DIRTY;
747: if (retval == LT_TRUE || retval == LT_FALSE ||
748: retval == LT_TRUEI || retval == LT_FALSEI)
749: retval = LT_IF;
750: if (retval == LT_ELTRUE || retval == LT_ELFALSE)
751: retval = LT_ELIF;
1.3 deraadt 752: }
1.9 deraadt 753: if (retval != LT_PLAIN && (wascomment || incomment)) {
754: retval += LT_DODGY;
755: if (incomment)
756: linestate = LS_DIRTY;
757: }
758: /* skipcomment should have changed the state */
1.8 deraadt 759: if (linestate == LS_HASH)
760: abort(); /* bug */
1.7 deraadt 761: }
1.8 deraadt 762: if (linestate == LS_DIRTY) {
763: while (*cp != '\0')
764: cp = skipcomment(cp + 1);
765: }
766: debug("parser %s comment %s line",
767: comment_name[incomment], linestate_name[linestate]);
1.7 deraadt 768: return (retval);
769: }
770:
771: /*
1.9 deraadt 772: * These are the operators that are supported by the expression
773: * evaluator. Note that if support for division is added then we also
774: * need short-circuiting booleans because of divide-by-zero.
1.7 deraadt 775: */
1.8 deraadt 776: static int
777: op_lt(int a, int b)
778: {
779: return (a < b);
780: }
781:
782: static int
783: op_gt(int a, int b)
784: {
785: return (a > b);
786: }
787:
788: static int
789: op_le(int a, int b)
790: {
791: return (a <= b);
792: }
793:
794: static int
795: op_ge(int a, int b)
796: {
797: return (a >= b);
798: }
799:
800: static int
801: op_eq(int a, int b)
802: {
803: return (a == b);
804: }
805:
806: static int
807: op_ne(int a, int b)
1.7 deraadt 808: {
1.8 deraadt 809: return (a != b);
810: }
811:
812: static int
813: op_or(int a, int b)
814: {
815: return (a || b);
816: }
817:
818: static int
819: op_and(int a, int b)
820: {
821: return (a && b);
1.7 deraadt 822: }
823:
824: /*
1.8 deraadt 825: * An evaluation function takes three arguments, as follows: (1) a pointer to
826: * an element of the precedence table which lists the operators at the current
827: * level of precedence; (2) a pointer to an integer which will receive the
828: * value of the expression; and (3) a pointer to a char* that points to the
829: * expression to be evaluated and that is updated to the end of the expression
830: * when evaluation is complete. The function returns LT_FALSE if the value of
831: * the expression is zero, LT_TRUE if it is non-zero, or LT_IF if the
832: * expression could not be evaluated.
1.7 deraadt 833: */
1.8 deraadt 834: struct ops;
835:
836: typedef Linetype eval_fn(const struct ops *, int *, const char **);
837:
838: static eval_fn eval_table, eval_unary;
839:
840: /*
841: * The precedence table. Expressions involving binary operators are evaluated
842: * in a table-driven way by eval_table. When it evaluates a subexpression it
843: * calls the inner function with its first argument pointing to the next
844: * element of the table. Innermost expressions have special non-table-driven
845: * handling.
846: */
847: static const struct ops {
848: eval_fn *inner;
849: struct op {
850: const char *str;
851: int (*fn)(int, int);
852: } op[5];
853: } eval_ops[] = {
854: { eval_table, { { "||", op_or } } },
855: { eval_table, { { "&&", op_and } } },
856: { eval_table, { { "==", op_eq },
857: { "!=", op_ne } } },
858: { eval_unary, { { "<=", op_le },
859: { ">=", op_ge },
860: { "<", op_lt },
861: { ">", op_gt } } }
862: };
1.7 deraadt 863:
864: /*
865: * Function for evaluating the innermost parts of expressions,
866: * viz. !expr (expr) defined(symbol) symbol number
867: * We reset the keepthis flag when we find a non-constant subexpression.
868: */
1.8 deraadt 869: static Linetype
870: eval_unary(const struct ops *ops, int *valp, const char **cpp)
1.7 deraadt 871: {
872: const char *cp;
873: char *ep;
874: int sym;
875:
876: cp = skipcomment(*cpp);
1.8 deraadt 877: if (*cp == '!') {
1.7 deraadt 878: debug("eval%d !", ops - eval_ops);
879: cp++;
880: if (eval_unary(ops, valp, &cp) == LT_IF)
881: return (LT_IF);
882: *valp = !*valp;
883: } else if (*cp == '(') {
884: cp++;
885: debug("eval%d (", ops - eval_ops);
886: if (eval_table(eval_ops, valp, &cp) == LT_IF)
887: return (LT_IF);
888: cp = skipcomment(cp);
889: if (*cp++ != ')')
890: return (LT_IF);
891: } else if (isdigit((unsigned char)*cp)) {
892: debug("eval%d number", ops - eval_ops);
893: *valp = strtol(cp, &ep, 0);
894: cp = skipsym(cp);
895: } else if (strncmp(cp, "defined", 7) == 0 && endsym(cp[7])) {
896: cp = skipcomment(cp+7);
897: debug("eval%d defined", ops - eval_ops);
898: if (*cp++ != '(')
899: return (LT_IF);
900: cp = skipcomment(cp);
901: sym = findsym(cp);
1.8 deraadt 902: if (sym < 0 && !symlist)
1.7 deraadt 903: return (LT_IF);
904: *valp = (value[sym] != NULL);
905: cp = skipsym(cp);
906: cp = skipcomment(cp);
907: if (*cp++ != ')')
908: return (LT_IF);
909: keepthis = false;
910: } else if (!endsym(*cp)) {
911: debug("eval%d symbol", ops - eval_ops);
912: sym = findsym(cp);
1.8 deraadt 913: if (sym < 0 && !symlist)
1.7 deraadt 914: return (LT_IF);
915: if (value[sym] == NULL)
916: *valp = 0;
917: else {
918: *valp = strtol(value[sym], &ep, 0);
919: if (*ep != '\0' || ep == value[sym])
920: return (LT_IF);
921: }
922: cp = skipsym(cp);
923: keepthis = false;
924: } else
925: return (LT_IF);
926:
927: *cpp = cp;
928: debug("eval%d = %d", ops - eval_ops, *valp);
929: return (*valp ? LT_TRUE : LT_FALSE);
930: }
931:
932: /*
933: * Table-driven evaluation of binary operators.
934: */
1.8 deraadt 935: static Linetype
936: eval_table(const struct ops *ops, int *valp, const char **cpp)
1.7 deraadt 937: {
1.8 deraadt 938: const struct op *op;
1.7 deraadt 939: const char *cp;
940: int val;
941:
942: debug("eval%d", ops - eval_ops);
943: cp = *cpp;
944: if (ops->inner(ops+1, valp, &cp) == LT_IF)
945: return (LT_IF);
946: for (;;) {
947: cp = skipcomment(cp);
948: for (op = ops->op; op->str != NULL; op++)
949: if (strncmp(cp, op->str, strlen(op->str)) == 0)
950: break;
951: if (op->str == NULL)
952: break;
953: cp += strlen(op->str);
954: debug("eval%d %s", ops - eval_ops, op->str);
955: if (ops->inner(ops+1, &val, &cp) == LT_IF)
1.8 deraadt 956: return (LT_IF);
1.7 deraadt 957: *valp = op->fn(*valp, val);
958: }
959:
960: *cpp = cp;
961: debug("eval%d = %d", ops - eval_ops, *valp);
962: return (*valp ? LT_TRUE : LT_FALSE);
1.1 deraadt 963: }
1.7 deraadt 964:
1.1 deraadt 965: /*
1.7 deraadt 966: * Evaluate the expression on a #if or #elif line. If we can work out
967: * the result we return LT_TRUE or LT_FALSE accordingly, otherwise we
1.8 deraadt 968: * return just a generic LT_IF.
1.1 deraadt 969: */
1.8 deraadt 970: static Linetype
1.7 deraadt 971: ifeval(const char **cpp)
972: {
1.8 deraadt 973: int ret;
1.7 deraadt 974: int val;
975:
976: debug("eval %s", *cpp);
977: keepthis = killconsts ? false : true;
1.8 deraadt 978: ret = eval_table(eval_ops, &val, cpp);
979: return (keepthis ? LT_IF : ret);
1.7 deraadt 980: }
981:
982: /*
983: * Skip over comments and stop at the next character position that is
1.8 deraadt 984: * not whitespace. Between calls we keep the comment state in a global
985: * variable, and we also make a note when we get a proper end-of-line.
986: * XXX: doesn't cope with the buffer splitting inside a state transition.
1.7 deraadt 987: */
1.8 deraadt 988: static const char *
1.7 deraadt 989: skipcomment(const char *cp)
1.3 deraadt 990: {
1.8 deraadt 991: if (text || ignoring[depth]) {
992: while (isspace((unsigned char)*cp))
993: cp += 1;
994: return (cp);
995: }
996: while (*cp != '\0')
997: if (strncmp(cp, "\\\n", 2) == 0)
998: cp += 2;
999: else switch (incomment) {
1000: case NO_COMMENT:
1001: if (strncmp(cp, "/\\\n", 3) == 0) {
1002: incomment = STARTING_COMMENT;
1003: cp += 3;
1004: } else if (strncmp(cp, "/*", 2) == 0) {
1.3 deraadt 1005: incomment = C_COMMENT;
1.8 deraadt 1006: cp += 2;
1007: } else if (strncmp(cp, "//", 2) == 0) {
1008: incomment = CXX_COMMENT;
1009: cp += 2;
1010: } else if (strncmp(cp, "\n", 1) == 0) {
1011: linestate = LS_START;
1012: cp += 1;
1013: } else if (strchr(" \t", *cp) != NULL) {
1014: cp += 1;
1015: } else
1016: return (cp);
1017: continue;
1018: case CXX_COMMENT:
1019: if (strncmp(cp, "\n", 1) == 0) {
1020: incomment = NO_COMMENT;
1021: linestate = LS_START;
1.3 deraadt 1022: }
1.8 deraadt 1023: cp += 1;
1024: continue;
1025: case C_COMMENT:
1026: if (strncmp(cp, "*\\\n", 3) == 0) {
1027: incomment = FINISHING_COMMENT;
1028: cp += 3;
1029: } else if (strncmp(cp, "*/", 2) == 0) {
1030: incomment = NO_COMMENT;
1031: cp += 2;
1032: } else
1033: cp += 1;
1034: continue;
1035: case STARTING_COMMENT:
1036: if (*cp == '*') {
1037: incomment = C_COMMENT;
1038: cp += 1;
1039: } else if (*cp == '/') {
1.3 deraadt 1040: incomment = CXX_COMMENT;
1.8 deraadt 1041: cp += 1;
1042: } else {
1043: incomment = NO_COMMENT;
1044: linestate = LS_DIRTY;
1.3 deraadt 1045: }
1.8 deraadt 1046: continue;
1047: case FINISHING_COMMENT:
1048: if (*cp == '/') {
1049: incomment = NO_COMMENT;
1050: cp += 1;
1051: } else
1052: incomment = C_COMMENT;
1053: continue;
1054: default:
1055: /* bug */
1056: abort();
1.3 deraadt 1057: }
1.8 deraadt 1058: return (cp);
1.1 deraadt 1059: }
1.7 deraadt 1060:
1061: /*
1062: * Skip over an identifier.
1063: */
1.8 deraadt 1064: static const char *
1.7 deraadt 1065: skipsym(const char *cp)
1066: {
1067: while (!endsym(*cp))
1068: ++cp;
1069: return (cp);
1070: }
1071:
1.1 deraadt 1072: /*
1.7 deraadt 1073: * Look for the symbol in the symbol table. If is is found, we return
1.8 deraadt 1074: * the symbol table index, else we return -1.
1.1 deraadt 1075: */
1.8 deraadt 1076: static int
1.7 deraadt 1077: findsym(const char *str)
1.1 deraadt 1078: {
1.7 deraadt 1079: const char *cp;
1.3 deraadt 1080: int symind;
1081:
1.8 deraadt 1082: cp = skipsym(str);
1083: if (cp == str)
1084: return (-1);
1085: if (symlist)
1.7 deraadt 1086: printf("%.*s\n", (int)(cp-str), str);
1.8 deraadt 1087: for (symind = 0; symind < nsyms; ++symind) {
1088: if (strlcmp(symname[symind], str, cp-str) == 0) {
1.7 deraadt 1089: debug("findsym %s %s", symname[symind],
1090: value[symind] ? value[symind] : "");
1091: return (symind);
1.3 deraadt 1092: }
1.1 deraadt 1093: }
1.8 deraadt 1094: return (-1);
1.1 deraadt 1095: }
1.7 deraadt 1096:
1.1 deraadt 1097: /*
1.7 deraadt 1098: * Add a symbol to the symbol table.
1099: */
1.8 deraadt 1100: static void
1.7 deraadt 1101: addsym(bool ignorethis, bool definethis, char *sym)
1102: {
1103: int symind;
1104: char *val;
1105:
1106: symind = findsym(sym);
1.8 deraadt 1107: if (symind < 0) {
1.7 deraadt 1108: if (nsyms >= MAXSYMS)
1109: errx(2, "too many symbols");
1110: symind = nsyms++;
1111: }
1112: symname[symind] = sym;
1113: ignore[symind] = ignorethis;
1.8 deraadt 1114: val = sym + (skipsym(sym) - sym);
1.7 deraadt 1115: if (definethis) {
1116: if (*val == '=') {
1117: value[symind] = val+1;
1118: *val = '\0';
1119: } else if (*val == '\0')
1120: value[symind] = "";
1121: else
1122: usage();
1123: } else {
1124: if (*val != '\0')
1125: usage();
1126: value[symind] = NULL;
1127: }
1128: }
1129:
1130: /*
1.8 deraadt 1131: * Compare s with n characters of t.
1132: * The same as strncmp() except that it checks that s[n] == '\0'.
1.1 deraadt 1133: */
1.8 deraadt 1134: static int
1135: strlcmp(const char *s, const char *t, size_t n)
1.3 deraadt 1136: {
1.8 deraadt 1137: while (n-- && *t != '\0')
1138: if (*s != *t)
1139: return ((unsigned char)*s - (unsigned char)*t);
1140: else
1141: ++s, ++t;
1142: return ((unsigned char)*s);
1.1 deraadt 1143: }
1144:
1.7 deraadt 1145: /*
1.8 deraadt 1146: * Diagnostics.
1.7 deraadt 1147: */
1.8 deraadt 1148: static void
1.7 deraadt 1149: debug(const char *msg, ...)
1.1 deraadt 1150: {
1.7 deraadt 1151: va_list ap;
1152:
1153: if (debugging) {
1154: va_start(ap, msg);
1155: vwarnx(msg, ap);
1156: va_end(ap);
1157: }
1.1 deraadt 1158: }
1159:
1.8 deraadt 1160: static void
1161: error(const char *msg)
1.7 deraadt 1162: {
1.8 deraadt 1163: if (depth == 0)
1.9 deraadt 1164: warnx("%s: %d: %s", filename, linenum, msg);
1.7 deraadt 1165: else
1.9 deraadt 1166: warnx("%s: %d: %s (#if line %d depth %d)",
1.8 deraadt 1167: filename, linenum, msg, stifline[depth], depth);
1.9 deraadt 1168: errx(2, "output may be truncated");
1.1 deraadt 1169: }