Annotation of src/usr.bin/unifdef/unifdef.c, Revision 1.14
1.14 ! deraadt 1: /* $OpenBSD: unifdef.c,v 1.13 2007/06/25 15:57:28 jmc Exp $ */
1.1 deraadt 2: /*
1.11 avsm 3: * Copyright (c) 2002, 2003 Tony Finch <dot@dotat.at>
1.1 deraadt 4: * Copyright (c) 1985, 1993
5: * The Regents of the University of California. All rights reserved.
6: *
7: * This code is derived from software contributed to Berkeley by
1.7 deraadt 8: * Dave Yost. Support for #if and #elif was added by Tony Finch.
1.1 deraadt 9: *
10: * Redistribution and use in source and binary forms, with or without
11: * modification, are permitted provided that the following conditions
12: * are met:
13: * 1. Redistributions of source code must retain the above copyright
14: * notice, this list of conditions and the following disclaimer.
15: * 2. Redistributions in binary form must reproduce the above copyright
16: * notice, this list of conditions and the following disclaimer in the
17: * documentation and/or other materials provided with the distribution.
1.10 millert 18: * 3. Neither the name of the University nor the names of its contributors
1.1 deraadt 19: * may be used to endorse or promote products derived from this software
20: * without specific prior written permission.
21: *
22: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32: * SUCH DAMAGE.
33: */
34:
35: /*
36: * unifdef - remove ifdef'ed lines
37: *
38: * Wishlist:
39: * provide an option which will append the name of the
40: * appropriate symbol after #else's and #endif's
41: * provide an option which will check symbols after
42: * #else's and #endif's to see that they match their
43: * corresponding #ifdef or #ifndef
1.7 deraadt 44: * generate #line directives in place of deleted code
1.9 deraadt 45: *
46: * The first two items above require better buffer handling, which would
47: * also make it possible to handle all "dodgy" directives correctly.
1.1 deraadt 48: */
49:
1.7 deraadt 50: #include <ctype.h>
51: #include <err.h>
52: #include <stdarg.h>
53: #include <stdbool.h>
1.1 deraadt 54: #include <stdio.h>
1.7 deraadt 55: #include <stdlib.h>
56: #include <string.h>
57: #include <unistd.h>
1.1 deraadt 58:
1.7 deraadt 59: /* types of input lines: */
60: typedef enum {
1.8 deraadt 61: LT_TRUEI, /* a true #if with ignore flag */
62: LT_FALSEI, /* a false #if with ignore flag */
63: LT_IF, /* an unknown #if */
1.7 deraadt 64: LT_TRUE, /* a true #if */
65: LT_FALSE, /* a false #if */
1.8 deraadt 66: LT_ELIF, /* an unknown #elif */
1.7 deraadt 67: LT_ELTRUE, /* a true #elif */
68: LT_ELFALSE, /* a false #elif */
69: LT_ELSE, /* #else */
70: LT_ENDIF, /* #endif */
1.9 deraadt 71: LT_DODGY, /* flag: directive is not on one line */
72: LT_DODGY_LAST = LT_DODGY + LT_ENDIF,
73: LT_PLAIN, /* ordinary line */
1.8 deraadt 74: LT_EOF, /* end of file */
75: LT_COUNT
1.7 deraadt 76: } Linetype;
77:
1.8 deraadt 78: static char const * const linetype_name[] = {
1.9 deraadt 79: "TRUEI", "FALSEI", "IF", "TRUE", "FALSE",
80: "ELIF", "ELTRUE", "ELFALSE", "ELSE", "ENDIF",
81: "DODGY TRUEI", "DODGY FALSEI",
82: "DODGY IF", "DODGY TRUE", "DODGY FALSE",
83: "DODGY ELIF", "DODGY ELTRUE", "DODGY ELFALSE",
84: "DODGY ELSE", "DODGY ENDIF",
85: "PLAIN", "EOF"
1.8 deraadt 86: };
1.7 deraadt 87:
1.8 deraadt 88: /* state of #if processing */
1.7 deraadt 89: typedef enum {
1.8 deraadt 90: IS_OUTSIDE,
91: IS_FALSE_PREFIX, /* false #if followed by false #elifs */
92: IS_TRUE_PREFIX, /* first non-false #(el)if is true */
93: IS_PASS_MIDDLE, /* first non-false #(el)if is unknown */
94: IS_FALSE_MIDDLE, /* a false #elif after a pass state */
95: IS_TRUE_MIDDLE, /* a true #elif after a pass state */
96: IS_PASS_ELSE, /* an else after a pass state */
97: IS_FALSE_ELSE, /* an else after a true state */
98: IS_TRUE_ELSE, /* an else after only false states */
99: IS_FALSE_TRAILER, /* #elifs after a true are false */
100: IS_COUNT
101: } Ifstate;
102:
103: static char const * const ifstate_name[] = {
104: "OUTSIDE", "FALSE_PREFIX", "TRUE_PREFIX",
105: "PASS_MIDDLE", "FALSE_MIDDLE", "TRUE_MIDDLE",
106: "PASS_ELSE", "FALSE_ELSE", "TRUE_ELSE",
107: "FALSE_TRAILER"
108: };
109:
110: /* state of comment parser */
111: typedef enum {
112: NO_COMMENT = false, /* outside a comment */
113: C_COMMENT, /* in a comment like this one */
114: CXX_COMMENT, /* between // and end of line */
115: STARTING_COMMENT, /* just after slash-backslash-newline */
116: FINISHING_COMMENT /* star-backslash-newline in a C comment */
1.7 deraadt 117: } Comment_state;
118:
1.8 deraadt 119: static char const * const comment_name[] = {
120: "NO", "C", "CXX", "STARTING", "FINISHING"
1.1 deraadt 121: };
1.7 deraadt 122:
1.8 deraadt 123: /* state of preprocessor line parser */
124: typedef enum {
125: LS_START, /* only space and comments on this line */
126: LS_HASH, /* only space, comments, and a hash */
127: LS_DIRTY /* this line can't be a preprocessor line */
128: } Line_state;
1.7 deraadt 129:
1.8 deraadt 130: static char const * const linestate_name[] = {
131: "START", "HASH", "DIRTY"
132: };
1.7 deraadt 133:
134: /*
1.8 deraadt 135: * Minimum translation limits from ISO/IEC 9899:1999 5.2.4.1
1.7 deraadt 136: */
1.8 deraadt 137: #define MAXDEPTH 64 /* maximum #if nesting */
138: #define MAXLINE 4096 /* maximum length of line */
139: #define MAXSYMS 4096 /* maximum number of symbols */
1.7 deraadt 140:
141: /*
1.9 deraadt 142: * Sometimes when editing a keyword the replacement text is longer, so
143: * we leave some space at the end of the tline buffer to accommodate this.
144: */
145: #define EDITSLOP 10
146:
147: /*
1.8 deraadt 148: * Globals.
1.7 deraadt 149: */
150:
1.8 deraadt 151: static bool complement; /* -c: do the complement */
152: static bool debugging; /* -d: debugging reports */
1.9 deraadt 153: static bool iocccok; /* -e: fewer IOCCC errors */
1.8 deraadt 154: static bool killconsts; /* -k: eval constant #ifs */
155: static bool lnblank; /* -l: blank deleted lines */
156: static bool symlist; /* -s: output symbol list */
157: static bool text; /* -t: this is a text file */
158:
159: static const char *symname[MAXSYMS]; /* symbol name */
160: static const char *value[MAXSYMS]; /* -Dsym=value */
161: static bool ignore[MAXSYMS]; /* -iDsym or -iUsym */
162: static int nsyms; /* number of symbols */
163:
164: static FILE *input; /* input file pointer */
165: static const char *filename; /* input file name */
166: static int linenum; /* current line number */
167:
1.9 deraadt 168: static char tline[MAXLINE+EDITSLOP];/* input buffer plus space */
1.8 deraadt 169: static char *keyword; /* used for editing #elif's */
170:
171: static Comment_state incomment; /* comment parser state */
172: static Line_state linestate; /* #if line parser state */
173: static Ifstate ifstate[MAXDEPTH]; /* #if processor state */
174: static bool ignoring[MAXDEPTH]; /* ignore comments state */
175: static int stifline[MAXDEPTH]; /* start of current #if */
176: static int depth; /* current #if nesting */
177: static bool keepthis; /* don't delete constant #if */
178:
179: static int exitstat; /* program exit status */
180:
181: static void addsym(bool, bool, char *);
182: static void debug(const char *, ...);
183: static void error(const char *);
184: static int findsym(const char *);
185: static void flushline(bool);
186: static Linetype getline(void);
187: static Linetype ifeval(const char **);
1.9 deraadt 188: static void ignoreoff(void);
189: static void ignoreon(void);
190: static void keywordedit(const char *);
1.8 deraadt 191: static void nest(void);
192: static void process(void);
193: static const char *skipcomment(const char *);
194: static const char *skipsym(const char *);
195: static void state(Ifstate);
196: static int strlcmp(const char *, const char *, size_t);
197: static void usage(void);
1.7 deraadt 198:
199: #define endsym(c) (!isalpha((unsigned char)c) && !isdigit((unsigned char)c) && c != '_')
200:
1.8 deraadt 201: /*
202: * The main program.
203: */
1.7 deraadt 204: int
205: main(int argc, char *argv[])
206: {
207: int opt;
208:
1.11 avsm 209: while ((opt = getopt(argc, argv, "i:D:U:I:cdeklst")) != -1)
1.7 deraadt 210: switch (opt) {
211: case 'i': /* treat stuff controlled by these symbols as text */
212: /*
213: * For strict backwards-compatibility the U or D
214: * should be immediately after the -i but it doesn't
215: * matter much if we relax that requirement.
216: */
217: opt = *optarg++;
218: if (opt == 'D')
219: addsym(true, true, optarg);
220: else if (opt == 'U')
221: addsym(true, false, optarg);
222: else
223: usage();
224: break;
225: case 'D': /* define a symbol */
226: addsym(false, true, optarg);
227: break;
228: case 'U': /* undef a symbol */
229: addsym(false, false, optarg);
230: break;
1.11 avsm 231: case 'I':
232: /* no-op for compatibility with cpp */
233: break;
1.7 deraadt 234: case 'c': /* treat -D as -U and vice versa */
235: complement = true;
236: break;
1.8 deraadt 237: case 'd':
238: debugging = true;
239: break;
1.9 deraadt 240: case 'e': /* fewer errors from dodgy lines */
241: iocccok = true;
242: break;
1.7 deraadt 243: case 'k': /* process constant #ifs */
244: killconsts = true;
245: break;
246: case 'l': /* blank deleted lines instead of omitting them */
247: lnblank = true;
248: break;
249: case 's': /* only output list of symbols that control #ifs */
250: symlist = true;
251: break;
1.8 deraadt 252: case 't': /* don't parse C comments */
1.7 deraadt 253: text = true;
254: break;
255: default:
256: usage();
257: }
258: argc -= optind;
259: argv += optind;
1.8 deraadt 260: if (nsyms == 0 && !symlist) {
1.7 deraadt 261: warnx("must -D or -U at least one symbol");
262: usage();
263: }
264: if (argc > 1) {
265: errx(2, "can only do one file");
266: } else if (argc == 1 && strcmp(*argv, "-") != 0) {
267: filename = *argv;
268: if ((input = fopen(filename, "r")) != NULL) {
1.8 deraadt 269: process();
1.7 deraadt 270: (void) fclose(input);
271: } else
272: err(2, "can't open %s", *argv);
273: } else {
274: filename = "[stdin]";
275: input = stdin;
1.8 deraadt 276: process();
1.7 deraadt 277: }
278:
1.8 deraadt 279: exit(exitstat);
1.7 deraadt 280: }
1.1 deraadt 281:
1.8 deraadt 282: static void
1.7 deraadt 283: usage(void)
1.1 deraadt 284: {
1.12 jmc 285: fprintf(stderr,
286: "usage: unifdef [-ceklst] [-Dsym[=val]] [-Ipath] [-iDsym[=val]] "
287: "[-iUsym] [-Usym]\n"
288: "\t[file]\n");
1.8 deraadt 289: exit(2);
290: }
291:
292: /*
293: * A state transition function alters the global #if processing state
294: * in a particular way. The table below is indexed by the current
295: * processing state and the type of the current line. A NULL entry
1.9 deraadt 296: * indicates that processing is complete.
1.8 deraadt 297: *
298: * Nesting is handled by keeping a stack of states; some transition
1.9 deraadt 299: * functions increase or decrease the depth. They also maintain the
1.8 deraadt 300: * ignore state on a stack. In some complicated cases they have to
301: * alter the preprocessor directive, as follows.
302: *
303: * When we have processed a group that starts off with a known-false
304: * #if/#elif sequence (which has therefore been deleted) followed by a
1.9 deraadt 305: * #elif that we don't understand and therefore must keep, we edit the
1.8 deraadt 306: * latter into a #if to keep the nesting correct.
307: *
308: * When we find a true #elif in a group, the following block will
309: * always be kept and the rest of the sequence after the next #elif or
1.9 deraadt 310: * #else will be discarded. We edit the #elif into a #else and the
1.8 deraadt 311: * following directive to #endif since this has the desired behaviour.
1.9 deraadt 312: *
313: * "Dodgy" directives are split across multiple lines, the most common
314: * example being a multi-line comment hanging off the right of the
315: * directive. We can handle them correctly only if there is no change
316: * from printing to dropping (or vice versa) caused by that directive.
317: * If the directive is the first of a group we have a choice between
318: * failing with an error, or passing it through unchanged instead of
319: * evaluating it. The latter is not the default to avoid questions from
320: * users about unifdef unexpectedly leaving behind preprocessor directives.
1.8 deraadt 321: */
322: typedef void state_fn(void);
323:
324: /* report an error */
325: static void
326: Eelif(void)
327: {
328: error("Inappropriate #elif");
329: }
330:
331: static void
332: Eelse(void)
333: {
334: error("Inappropriate #else");
335: }
336:
337: static void
338: Eendif(void)
339: {
340: error("Inappropriate #endif");
341: }
342:
343: static void
344: Eeof(void)
345: {
346: error("Premature EOF");
347: }
348:
349: static void
350: Eioccc(void)
351: {
352: error("Obfuscated preprocessor control line");
353: }
354:
355: /* plain line handling */
356: static void
357: print(void)
358: {
359: flushline(true);
360: }
361:
362: static void
363: drop(void)
364: {
365: flushline(false);
366: }
367:
368: /* output lacks group's start line */
369: static void
370: Strue(void)
371: {
372: drop();
1.9 deraadt 373: ignoreoff();
1.8 deraadt 374: state(IS_TRUE_PREFIX);
375: }
376:
377: static void
378: Sfalse(void)
379: {
380: drop();
1.9 deraadt 381: ignoreoff();
1.8 deraadt 382: state(IS_FALSE_PREFIX);
383: }
384:
385: static void
386: Selse(void)
387: {
388: drop();
389: state(IS_TRUE_ELSE);
390: }
391:
392: /* print/pass this block */
393: static void
394: Pelif(void)
395: {
396: print();
1.9 deraadt 397: ignoreoff();
1.8 deraadt 398: state(IS_PASS_MIDDLE);
399: }
400:
401: static void
402: Pelse(void)
403: {
404: print();
405: state(IS_PASS_ELSE);
406: }
407:
408: static void
409: Pendif(void)
410: {
411: print();
412: --depth;
413: }
414:
415: /* discard this block */
416: static void
417: Dfalse(void)
418: {
419: drop();
1.9 deraadt 420: ignoreoff();
1.8 deraadt 421: state(IS_FALSE_TRAILER);
422: }
423:
424: static void
425: Delif(void)
426: {
427: drop();
1.9 deraadt 428: ignoreoff();
1.8 deraadt 429: state(IS_FALSE_MIDDLE);
430: }
431:
432: static void
433: Delse(void)
434: {
435: drop();
436: state(IS_FALSE_ELSE);
437: }
438:
439: static void
440: Dendif(void)
441: {
442: drop();
443: --depth;
444: }
445:
446: /* first line of group */
447: static void
448: Fdrop(void)
449: {
450: nest();
451: Dfalse();
452: }
453:
454: static void
455: Fpass(void)
456: {
457: nest();
458: Pelif();
459: }
460:
461: static void
462: Ftrue(void)
463: {
464: nest();
465: Strue();
466: }
467:
468: static void
469: Ffalse(void)
470: {
471: nest();
472: Sfalse();
473: }
474:
1.9 deraadt 475: /* variable pedantry for obfuscated lines */
476: static void
477: Oiffy(void)
478: {
479: if (iocccok)
480: Fpass();
481: else
482: Eioccc();
483: ignoreon();
484: }
485:
486: static void
487: Oif(void)
488: {
489: if (iocccok)
490: Fpass();
491: else
492: Eioccc();
493: }
494:
495: static void
496: Oelif(void)
497: {
498: if (iocccok)
499: Pelif();
500: else
501: Eioccc();
502: }
503:
1.8 deraadt 504: /* ignore comments in this block */
505: static void
506: Idrop(void)
507: {
508: Fdrop();
1.9 deraadt 509: ignoreon();
1.8 deraadt 510: }
511:
512: static void
1.9 deraadt 513: Itrue(void)
514: {
1.8 deraadt 515: Ftrue();
1.9 deraadt 516: ignoreon();
1.8 deraadt 517: }
518:
519: static void
520: Ifalse(void)
521: {
522: Ffalse();
1.9 deraadt 523: ignoreon();
1.8 deraadt 524: }
525:
1.9 deraadt 526: /* edit this line */
1.8 deraadt 527: static void
528: Mpass (void)
529: {
530: strncpy(keyword, "if ", 4);
531: Pelif();
532: }
533:
534: static void
535: Mtrue (void)
536: {
1.9 deraadt 537: keywordedit("else\n");
1.8 deraadt 538: state(IS_TRUE_MIDDLE);
539: }
540:
541: static void
542: Melif (void)
543: {
1.9 deraadt 544: keywordedit("endif\n");
1.8 deraadt 545: state(IS_FALSE_TRAILER);
546: }
547:
548: static void
549: Melse (void)
550: {
1.9 deraadt 551: keywordedit("endif\n");
1.8 deraadt 552: state(IS_FALSE_ELSE);
553: }
554:
555: static state_fn * const trans_table[IS_COUNT][LT_COUNT] = {
556: /* IS_OUTSIDE */
1.9 deraadt 557: { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Eendif,
558: Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eendif,
559: print, NULL },
1.8 deraadt 560: /* IS_FALSE_PREFIX */
1.9 deraadt 561: { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Strue, Sfalse,Selse, Dendif,
562: Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Eioccc,Eioccc,Eioccc,Eioccc,
563: drop, Eeof },
1.8 deraadt 564: /* IS_TRUE_PREFIX */
1.9 deraadt 565: { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Dfalse,Dfalse,Dfalse,Delse, Dendif,
566: Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
567: print, Eeof },
1.8 deraadt 568: /* IS_PASS_MIDDLE */
1.9 deraadt 569: { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Pelif, Mtrue, Delif, Pelse, Pendif,
570: Oiffy, Oiffy, Fpass, Oif, Oif, Pelif, Oelif, Oelif, Pelse, Pendif,
571: print, Eeof },
1.8 deraadt 572: /* IS_FALSE_MIDDLE */
1.9 deraadt 573: { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Pelif, Mtrue, Delif, Pelse, Pendif,
574: Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
575: drop, Eeof },
1.8 deraadt 576: /* IS_TRUE_MIDDLE */
1.9 deraadt 577: { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Melif, Melif, Melif, Melse, Pendif,
578: Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Pendif,
579: print, Eeof },
1.8 deraadt 580: /* IS_PASS_ELSE */
1.9 deraadt 581: { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Pendif,
582: Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Pendif,
583: print, Eeof },
1.8 deraadt 584: /* IS_FALSE_ELSE */
1.9 deraadt 585: { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Dendif,
586: Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Eioccc,
587: drop, Eeof },
1.8 deraadt 588: /* IS_TRUE_ELSE */
1.9 deraadt 589: { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Dendif,
590: Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eioccc,
591: print, Eeof },
1.8 deraadt 592: /* IS_FALSE_TRAILER */
1.9 deraadt 593: { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Dendif,
594: Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Eioccc,
595: drop, Eeof }
596: /*TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF
597: TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF (DODGY)
598: PLAIN EOF */
1.8 deraadt 599: };
600:
601: /*
602: * State machine utility functions
603: */
604: static void
1.9 deraadt 605: ignoreoff(void)
606: {
607: ignoring[depth] = ignoring[depth-1];
608: }
609:
610: static void
611: ignoreon(void)
612: {
613: ignoring[depth] = true;
614: }
615:
616: static void
617: keywordedit(const char *replacement)
618: {
619: strlcpy(keyword, replacement, tline + sizeof(tline) - keyword);
620: print();
621: }
622:
623: static void
1.8 deraadt 624: nest(void)
625: {
626: depth += 1;
627: if (depth >= MAXDEPTH)
628: error("Too many levels of nesting");
629: stifline[depth] = linenum;
630: }
631:
632: static void
633: state(Ifstate is)
634: {
635: ifstate[depth] = is;
636: }
637:
1.7 deraadt 638: /*
1.8 deraadt 639: * Write a line to the output or not, according to command line options.
640: */
641: static void
642: flushline(bool keep)
643: {
644: if (symlist)
645: return;
646: if (keep ^ complement)
647: fputs(tline, stdout);
648: else {
649: if (lnblank)
650: putc('\n', stdout);
651: exitstat = 1;
1.7 deraadt 652: }
653: }
1.3 deraadt 654:
1.7 deraadt 655: /*
1.8 deraadt 656: * The driver for the state machine.
1.7 deraadt 657: */
1.8 deraadt 658: static void
659: process(void)
1.7 deraadt 660: {
661: Linetype lineval;
1.8 deraadt 662: state_fn *trans;
1.7 deraadt 663:
664: for (;;) {
665: linenum++;
1.8 deraadt 666: lineval = getline();
667: trans = trans_table[ifstate[depth]][lineval];
668: if (trans == NULL)
669: break;
670: trans();
671: debug("process %s -> %s depth %d",
672: linetype_name[lineval],
673: ifstate_name[ifstate[depth]], depth);
1.1 deraadt 674: }
1.8 deraadt 675: if (incomment)
676: error("EOF in comment");
1.1 deraadt 677: }
678:
1.7 deraadt 679: /*
1.8 deraadt 680: * Parse a line and determine its type. We keep the preprocessor line
681: * parser state between calls in a global variable.
1.7 deraadt 682: */
1.8 deraadt 683: static Linetype
684: getline(void)
1.3 deraadt 685: {
1.7 deraadt 686: const char *cp;
1.8 deraadt 687: int cursym;
688: int kwlen;
1.3 deraadt 689: Linetype retval;
1.8 deraadt 690: Comment_state wascomment;
1.3 deraadt 691:
1.8 deraadt 692: if (fgets(tline, MAXLINE, input) == NULL)
693: return (LT_EOF);
1.3 deraadt 694: retval = LT_PLAIN;
1.8 deraadt 695: wascomment = incomment;
1.7 deraadt 696: cp = skipcomment(tline);
1.8 deraadt 697: if (linestate == LS_START) {
698: if (*cp == '#') {
699: linestate = LS_HASH;
700: cp = skipcomment(cp + 1);
701: } else if (*cp != '\0')
702: linestate = LS_DIRTY;
703: }
704: if (!incomment && linestate == LS_HASH) {
705: keyword = tline + (cp - tline);
706: cp = skipsym(cp);
707: kwlen = cp - keyword;
1.9 deraadt 708: /* no way can we deal with a continuation inside a keyword */
1.8 deraadt 709: if (strncmp(cp, "\\\n", 2) == 0)
710: Eioccc();
711: if (strlcmp("ifdef", keyword, kwlen) == 0 ||
712: strlcmp("ifndef", keyword, kwlen) == 0) {
713: cp = skipcomment(cp);
714: if ((cursym = findsym(cp)) < 0)
715: retval = LT_IF;
716: else {
717: retval = (keyword[2] == 'n')
718: ? LT_FALSE : LT_TRUE;
719: if (value[cursym] == NULL)
720: retval = (retval == LT_TRUE)
721: ? LT_FALSE : LT_TRUE;
722: if (ignore[cursym])
723: retval = (retval == LT_TRUE)
724: ? LT_TRUEI : LT_FALSEI;
725: }
726: cp = skipsym(cp);
727: } else if (strlcmp("if", keyword, kwlen) == 0)
728: retval = ifeval(&cp);
729: else if (strlcmp("elif", keyword, kwlen) == 0)
730: retval = ifeval(&cp) - LT_IF + LT_ELIF;
731: else if (strlcmp("else", keyword, kwlen) == 0)
732: retval = LT_ELSE;
733: else if (strlcmp("endif", keyword, kwlen) == 0)
734: retval = LT_ENDIF;
735: else {
736: linestate = LS_DIRTY;
1.7 deraadt 737: retval = LT_PLAIN;
738: }
739: cp = skipcomment(cp);
1.8 deraadt 740: if (*cp != '\0') {
741: linestate = LS_DIRTY;
742: if (retval == LT_TRUE || retval == LT_FALSE ||
743: retval == LT_TRUEI || retval == LT_FALSEI)
744: retval = LT_IF;
745: if (retval == LT_ELTRUE || retval == LT_ELFALSE)
746: retval = LT_ELIF;
1.3 deraadt 747: }
1.9 deraadt 748: if (retval != LT_PLAIN && (wascomment || incomment)) {
749: retval += LT_DODGY;
750: if (incomment)
751: linestate = LS_DIRTY;
752: }
753: /* skipcomment should have changed the state */
1.8 deraadt 754: if (linestate == LS_HASH)
755: abort(); /* bug */
1.7 deraadt 756: }
1.8 deraadt 757: if (linestate == LS_DIRTY) {
758: while (*cp != '\0')
759: cp = skipcomment(cp + 1);
760: }
761: debug("parser %s comment %s line",
762: comment_name[incomment], linestate_name[linestate]);
1.7 deraadt 763: return (retval);
764: }
765:
766: /*
1.9 deraadt 767: * These are the operators that are supported by the expression
768: * evaluator. Note that if support for division is added then we also
769: * need short-circuiting booleans because of divide-by-zero.
1.7 deraadt 770: */
1.8 deraadt 771: static int
772: op_lt(int a, int b)
773: {
774: return (a < b);
775: }
776:
777: static int
778: op_gt(int a, int b)
779: {
780: return (a > b);
781: }
782:
783: static int
784: op_le(int a, int b)
785: {
786: return (a <= b);
787: }
788:
789: static int
790: op_ge(int a, int b)
791: {
792: return (a >= b);
793: }
794:
795: static int
796: op_eq(int a, int b)
797: {
798: return (a == b);
799: }
800:
801: static int
802: op_ne(int a, int b)
1.7 deraadt 803: {
1.8 deraadt 804: return (a != b);
805: }
806:
807: static int
808: op_or(int a, int b)
809: {
810: return (a || b);
811: }
812:
813: static int
814: op_and(int a, int b)
815: {
816: return (a && b);
1.7 deraadt 817: }
818:
819: /*
1.8 deraadt 820: * An evaluation function takes three arguments, as follows: (1) a pointer to
821: * an element of the precedence table which lists the operators at the current
822: * level of precedence; (2) a pointer to an integer which will receive the
823: * value of the expression; and (3) a pointer to a char* that points to the
824: * expression to be evaluated and that is updated to the end of the expression
825: * when evaluation is complete. The function returns LT_FALSE if the value of
826: * the expression is zero, LT_TRUE if it is non-zero, or LT_IF if the
827: * expression could not be evaluated.
1.7 deraadt 828: */
1.8 deraadt 829: struct ops;
830:
831: typedef Linetype eval_fn(const struct ops *, int *, const char **);
832:
833: static eval_fn eval_table, eval_unary;
834:
835: /*
836: * The precedence table. Expressions involving binary operators are evaluated
837: * in a table-driven way by eval_table. When it evaluates a subexpression it
838: * calls the inner function with its first argument pointing to the next
839: * element of the table. Innermost expressions have special non-table-driven
840: * handling.
841: */
842: static const struct ops {
843: eval_fn *inner;
844: struct op {
845: const char *str;
846: int (*fn)(int, int);
847: } op[5];
848: } eval_ops[] = {
849: { eval_table, { { "||", op_or } } },
850: { eval_table, { { "&&", op_and } } },
851: { eval_table, { { "==", op_eq },
852: { "!=", op_ne } } },
853: { eval_unary, { { "<=", op_le },
854: { ">=", op_ge },
855: { "<", op_lt },
856: { ">", op_gt } } }
857: };
1.7 deraadt 858:
859: /*
860: * Function for evaluating the innermost parts of expressions,
861: * viz. !expr (expr) defined(symbol) symbol number
862: * We reset the keepthis flag when we find a non-constant subexpression.
863: */
1.8 deraadt 864: static Linetype
865: eval_unary(const struct ops *ops, int *valp, const char **cpp)
1.7 deraadt 866: {
867: const char *cp;
868: char *ep;
869: int sym;
870:
871: cp = skipcomment(*cpp);
1.8 deraadt 872: if (*cp == '!') {
1.7 deraadt 873: debug("eval%d !", ops - eval_ops);
874: cp++;
875: if (eval_unary(ops, valp, &cp) == LT_IF)
876: return (LT_IF);
877: *valp = !*valp;
878: } else if (*cp == '(') {
879: cp++;
880: debug("eval%d (", ops - eval_ops);
881: if (eval_table(eval_ops, valp, &cp) == LT_IF)
882: return (LT_IF);
883: cp = skipcomment(cp);
884: if (*cp++ != ')')
885: return (LT_IF);
886: } else if (isdigit((unsigned char)*cp)) {
887: debug("eval%d number", ops - eval_ops);
888: *valp = strtol(cp, &ep, 0);
889: cp = skipsym(cp);
890: } else if (strncmp(cp, "defined", 7) == 0 && endsym(cp[7])) {
891: cp = skipcomment(cp+7);
892: debug("eval%d defined", ops - eval_ops);
893: if (*cp++ != '(')
894: return (LT_IF);
895: cp = skipcomment(cp);
896: sym = findsym(cp);
1.8 deraadt 897: if (sym < 0 && !symlist)
1.7 deraadt 898: return (LT_IF);
899: *valp = (value[sym] != NULL);
900: cp = skipsym(cp);
901: cp = skipcomment(cp);
902: if (*cp++ != ')')
903: return (LT_IF);
904: keepthis = false;
905: } else if (!endsym(*cp)) {
906: debug("eval%d symbol", ops - eval_ops);
907: sym = findsym(cp);
1.8 deraadt 908: if (sym < 0 && !symlist)
1.7 deraadt 909: return (LT_IF);
910: if (value[sym] == NULL)
911: *valp = 0;
912: else {
913: *valp = strtol(value[sym], &ep, 0);
914: if (*ep != '\0' || ep == value[sym])
915: return (LT_IF);
916: }
917: cp = skipsym(cp);
918: keepthis = false;
919: } else
920: return (LT_IF);
921:
922: *cpp = cp;
923: debug("eval%d = %d", ops - eval_ops, *valp);
924: return (*valp ? LT_TRUE : LT_FALSE);
925: }
926:
927: /*
928: * Table-driven evaluation of binary operators.
929: */
1.8 deraadt 930: static Linetype
931: eval_table(const struct ops *ops, int *valp, const char **cpp)
1.7 deraadt 932: {
1.8 deraadt 933: const struct op *op;
1.7 deraadt 934: const char *cp;
935: int val;
936:
937: debug("eval%d", ops - eval_ops);
938: cp = *cpp;
939: if (ops->inner(ops+1, valp, &cp) == LT_IF)
940: return (LT_IF);
941: for (;;) {
942: cp = skipcomment(cp);
943: for (op = ops->op; op->str != NULL; op++)
944: if (strncmp(cp, op->str, strlen(op->str)) == 0)
945: break;
946: if (op->str == NULL)
947: break;
948: cp += strlen(op->str);
949: debug("eval%d %s", ops - eval_ops, op->str);
950: if (ops->inner(ops+1, &val, &cp) == LT_IF)
1.8 deraadt 951: return (LT_IF);
1.7 deraadt 952: *valp = op->fn(*valp, val);
953: }
954:
955: *cpp = cp;
956: debug("eval%d = %d", ops - eval_ops, *valp);
957: return (*valp ? LT_TRUE : LT_FALSE);
1.1 deraadt 958: }
1.7 deraadt 959:
1.1 deraadt 960: /*
1.7 deraadt 961: * Evaluate the expression on a #if or #elif line. If we can work out
962: * the result we return LT_TRUE or LT_FALSE accordingly, otherwise we
1.8 deraadt 963: * return just a generic LT_IF.
1.1 deraadt 964: */
1.8 deraadt 965: static Linetype
1.7 deraadt 966: ifeval(const char **cpp)
967: {
1.8 deraadt 968: int ret;
1.7 deraadt 969: int val;
970:
971: debug("eval %s", *cpp);
972: keepthis = killconsts ? false : true;
1.8 deraadt 973: ret = eval_table(eval_ops, &val, cpp);
974: return (keepthis ? LT_IF : ret);
1.7 deraadt 975: }
976:
977: /*
978: * Skip over comments and stop at the next character position that is
1.11 avsm 979: * not whitespace. Between calls we keep the comment state in the
980: * global variable incomment, and we also adjust the global variable
981: * linestate when we see a newline.
1.8 deraadt 982: * XXX: doesn't cope with the buffer splitting inside a state transition.
1.7 deraadt 983: */
1.8 deraadt 984: static const char *
1.7 deraadt 985: skipcomment(const char *cp)
1.3 deraadt 986: {
1.8 deraadt 987: if (text || ignoring[depth]) {
1.11 avsm 988: for (; isspace((unsigned char)*cp); cp++)
989: if (*cp == '\n')
990: linestate = LS_START;
1.8 deraadt 991: return (cp);
992: }
993: while (*cp != '\0')
994: if (strncmp(cp, "\\\n", 2) == 0)
995: cp += 2;
996: else switch (incomment) {
997: case NO_COMMENT:
998: if (strncmp(cp, "/\\\n", 3) == 0) {
999: incomment = STARTING_COMMENT;
1000: cp += 3;
1001: } else if (strncmp(cp, "/*", 2) == 0) {
1.3 deraadt 1002: incomment = C_COMMENT;
1.8 deraadt 1003: cp += 2;
1004: } else if (strncmp(cp, "//", 2) == 0) {
1005: incomment = CXX_COMMENT;
1006: cp += 2;
1007: } else if (strncmp(cp, "\n", 1) == 0) {
1008: linestate = LS_START;
1009: cp += 1;
1010: } else if (strchr(" \t", *cp) != NULL) {
1011: cp += 1;
1012: } else
1013: return (cp);
1014: continue;
1015: case CXX_COMMENT:
1016: if (strncmp(cp, "\n", 1) == 0) {
1017: incomment = NO_COMMENT;
1018: linestate = LS_START;
1.3 deraadt 1019: }
1.8 deraadt 1020: cp += 1;
1021: continue;
1022: case C_COMMENT:
1023: if (strncmp(cp, "*\\\n", 3) == 0) {
1024: incomment = FINISHING_COMMENT;
1025: cp += 3;
1026: } else if (strncmp(cp, "*/", 2) == 0) {
1027: incomment = NO_COMMENT;
1028: cp += 2;
1029: } else
1030: cp += 1;
1031: continue;
1032: case STARTING_COMMENT:
1033: if (*cp == '*') {
1034: incomment = C_COMMENT;
1035: cp += 1;
1036: } else if (*cp == '/') {
1.3 deraadt 1037: incomment = CXX_COMMENT;
1.8 deraadt 1038: cp += 1;
1039: } else {
1040: incomment = NO_COMMENT;
1041: linestate = LS_DIRTY;
1.3 deraadt 1042: }
1.8 deraadt 1043: continue;
1044: case FINISHING_COMMENT:
1045: if (*cp == '/') {
1046: incomment = NO_COMMENT;
1047: cp += 1;
1048: } else
1049: incomment = C_COMMENT;
1050: continue;
1051: default:
1052: /* bug */
1053: abort();
1.3 deraadt 1054: }
1.8 deraadt 1055: return (cp);
1.1 deraadt 1056: }
1.7 deraadt 1057:
1058: /*
1059: * Skip over an identifier.
1060: */
1.8 deraadt 1061: static const char *
1.7 deraadt 1062: skipsym(const char *cp)
1063: {
1064: while (!endsym(*cp))
1065: ++cp;
1066: return (cp);
1067: }
1068:
1.1 deraadt 1069: /*
1.13 jmc 1070: * Look for the symbol in the symbol table. If it is found, we return
1.8 deraadt 1071: * the symbol table index, else we return -1.
1.1 deraadt 1072: */
1.8 deraadt 1073: static int
1.7 deraadt 1074: findsym(const char *str)
1.1 deraadt 1075: {
1.7 deraadt 1076: const char *cp;
1.3 deraadt 1077: int symind;
1078:
1.8 deraadt 1079: cp = skipsym(str);
1080: if (cp == str)
1081: return (-1);
1082: if (symlist)
1.7 deraadt 1083: printf("%.*s\n", (int)(cp-str), str);
1.8 deraadt 1084: for (symind = 0; symind < nsyms; ++symind) {
1085: if (strlcmp(symname[symind], str, cp-str) == 0) {
1.7 deraadt 1086: debug("findsym %s %s", symname[symind],
1087: value[symind] ? value[symind] : "");
1088: return (symind);
1.3 deraadt 1089: }
1.1 deraadt 1090: }
1.8 deraadt 1091: return (-1);
1.1 deraadt 1092: }
1.7 deraadt 1093:
1.1 deraadt 1094: /*
1.7 deraadt 1095: * Add a symbol to the symbol table.
1096: */
1.8 deraadt 1097: static void
1.7 deraadt 1098: addsym(bool ignorethis, bool definethis, char *sym)
1099: {
1100: int symind;
1101: char *val;
1102:
1103: symind = findsym(sym);
1.8 deraadt 1104: if (symind < 0) {
1.7 deraadt 1105: if (nsyms >= MAXSYMS)
1106: errx(2, "too many symbols");
1107: symind = nsyms++;
1108: }
1109: symname[symind] = sym;
1110: ignore[symind] = ignorethis;
1.8 deraadt 1111: val = sym + (skipsym(sym) - sym);
1.7 deraadt 1112: if (definethis) {
1113: if (*val == '=') {
1114: value[symind] = val+1;
1115: *val = '\0';
1116: } else if (*val == '\0')
1117: value[symind] = "";
1118: else
1119: usage();
1120: } else {
1121: if (*val != '\0')
1122: usage();
1123: value[symind] = NULL;
1124: }
1125: }
1126:
1127: /*
1.8 deraadt 1128: * Compare s with n characters of t.
1129: * The same as strncmp() except that it checks that s[n] == '\0'.
1.1 deraadt 1130: */
1.8 deraadt 1131: static int
1132: strlcmp(const char *s, const char *t, size_t n)
1.3 deraadt 1133: {
1.8 deraadt 1134: while (n-- && *t != '\0')
1135: if (*s != *t)
1136: return ((unsigned char)*s - (unsigned char)*t);
1137: else
1138: ++s, ++t;
1139: return ((unsigned char)*s);
1.1 deraadt 1140: }
1141:
1.7 deraadt 1142: /*
1.8 deraadt 1143: * Diagnostics.
1.7 deraadt 1144: */
1.8 deraadt 1145: static void
1.7 deraadt 1146: debug(const char *msg, ...)
1.1 deraadt 1147: {
1.7 deraadt 1148: va_list ap;
1149:
1150: if (debugging) {
1151: va_start(ap, msg);
1152: vwarnx(msg, ap);
1153: va_end(ap);
1154: }
1.1 deraadt 1155: }
1156:
1.8 deraadt 1157: static void
1158: error(const char *msg)
1.7 deraadt 1159: {
1.8 deraadt 1160: if (depth == 0)
1.9 deraadt 1161: warnx("%s: %d: %s", filename, linenum, msg);
1.7 deraadt 1162: else
1.9 deraadt 1163: warnx("%s: %d: %s (#if line %d depth %d)",
1.8 deraadt 1164: filename, linenum, msg, stifline[depth], depth);
1.9 deraadt 1165: errx(2, "output may be truncated");
1.1 deraadt 1166: }