Annotation of src/usr.bin/ctags/C.c, Revision 1.9
1.9 ! millert 1: /* $OpenBSD: C.c,v 1.8 2003/05/12 20:41:39 pjanzen Exp $ */
1.1 deraadt 2: /* $NetBSD: C.c,v 1.3 1995/03/26 20:14:02 glass Exp $ */
3:
4: /*
5: * Copyright (c) 1987, 1993, 1994
6: * The Regents of the University of California. All rights reserved.
7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
1.9 ! millert 16: * 3. Neither the name of the University nor the names of its contributors
1.1 deraadt 17: * may be used to endorse or promote products derived from this software
18: * without specific prior written permission.
19: *
20: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30: * SUCH DAMAGE.
31: */
32:
33: #ifndef lint
34: #if 0
35: static char sccsid[] = "@(#)C.c 8.4 (Berkeley) 4/2/94";
36: #else
1.9 ! millert 37: static char rcsid[] = "$OpenBSD: C.c,v 1.8 2003/05/12 20:41:39 pjanzen Exp $";
1.1 deraadt 38: #endif
39: #endif /* not lint */
40:
41: #include <limits.h>
42: #include <stdio.h>
43: #include <string.h>
44:
45: #include "ctags.h"
46:
1.7 millert 47: static int func_entry(void);
48: static void hash_entry(void);
49: static void skip_string(int);
50: static int str_entry(int);
1.1 deraadt 51:
52: /*
53: * c_entries --
54: * read .c and .h files and call appropriate routines
55: */
56: void
57: c_entries()
58: {
59: int c; /* current character */
60: int level; /* brace level */
61: int token; /* if reading a token */
62: int t_def; /* if reading a typedef */
63: int t_level; /* typedef's brace level */
64: char *sp; /* buffer pointer */
65: char tok[MAXTOKEN]; /* token buffer */
66:
67: lineftell = ftell(inf);
68: sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
69: while (GETC(!=, EOF)) {
70: switch (c) {
71: /*
72: * Here's where it DOESN'T handle: {
73: * foo(a)
74: * {
75: * #ifdef notdef
76: * }
77: * #endif
78: * if (a)
79: * puts("hello, world");
80: * }
81: */
82: case '{':
83: ++level;
84: goto endtok;
85: case '}':
86: /*
87: * if level goes below zero, try and fix
88: * it, even though we've already messed up
89: */
90: if (--level < 0)
91: level = 0;
92: goto endtok;
93:
94: case '\n':
95: SETLINE;
96: /*
97: * the above 3 cases are similar in that they
98: * are special characters that also end tokens.
99: */
1.5 deraadt 100: endtok: if (sp > tok) {
1.1 deraadt 101: *sp = EOS;
102: token = YES;
103: sp = tok;
104: }
105: else
106: token = NO;
107: continue;
108:
109: /*
110: * We ignore quoted strings and character constants
111: * completely.
112: */
113: case '"':
114: case '\'':
115: (void)skip_string(c);
116: break;
117:
118: /*
119: * comments can be fun; note the state is unchanged after
120: * return, in case we found:
121: * "foo() XX comment XX { int bar; }"
122: */
123: case '/':
124: if (GETC(==, '*')) {
1.8 pjanzen 125: skip_comment(c);
126: continue;
127: } else if (c == '/') {
128: skip_comment(c);
1.1 deraadt 129: continue;
130: }
131: (void)ungetc(c, inf);
132: c = '/';
133: goto storec;
134:
135: /* hash marks flag #define's. */
136: case '#':
137: if (sp == tok) {
138: hash_entry();
139: break;
140: }
141: goto storec;
142:
143: /*
144: * if we have a current token, parenthesis on
145: * level zero indicates a function.
146: */
147: case '(':
1.8 pjanzen 148: do {
149: c = getc(inf);
150: } while (iswhite(c));
151: if (c == '*')
152: break;
153: else
154: ungetc(c, inf);
1.1 deraadt 155: if (!level && token) {
156: int curline;
157:
158: if (sp != tok)
159: *sp = EOS;
160: /*
161: * grab the line immediately, we may
162: * already be wrong, for example,
163: * foo\n
164: * (arg1,
165: */
166: getline();
167: curline = lineno;
168: if (func_entry()) {
169: ++level;
170: pfnote(tok, curline);
171: }
172: break;
173: }
174: goto storec;
175:
176: /*
177: * semi-colons indicate the end of a typedef; if we find a
178: * typedef we search for the next semi-colon of the same
179: * level as the typedef. Ignoring "structs", they are
180: * tricky, since you can find:
181: *
1.3 deraadt 182: * "typedef int time_t;"
1.1 deraadt 183: * "typedef unsigned int u_int;"
184: * "typedef unsigned int u_int [10];"
185: *
186: * If looking at a typedef, we save a copy of the last token
187: * found. Then, when we find the ';' we take the current
188: * token if it starts with a valid token name, else we take
189: * the one we saved. There's probably some reasonable
190: * alternative to this...
191: */
192: case ';':
193: if (t_def && level == t_level) {
194: t_def = NO;
195: getline();
196: if (sp != tok)
197: *sp = EOS;
198: pfnote(tok, lineno);
199: break;
200: }
201: goto storec;
202:
203: /*
204: * store characters until one that can't be part of a token
205: * comes along; check the current token against certain
206: * reserved words.
207: */
208: default:
1.4 deraadt 209: /*
210: * to treat following function.
211: * func (arg) {
212: * ....
213: * }
214: */
215: if (c == ' ' || c == '\t') {
216: int save = c;
217: while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
218: ;
219: if (c == EOF)
220: return;
221: (void)ungetc(c, inf);
222: c = save;
223: }
1.1 deraadt 224: storec: if (!intoken(c)) {
225: if (sp == tok)
226: break;
227: *sp = EOS;
228: if (tflag) {
229: /* no typedefs inside typedefs */
230: if (!t_def &&
231: !memcmp(tok, "typedef",8)) {
232: t_def = YES;
233: t_level = level;
234: break;
235: }
236: /* catch "typedef struct" */
237: if ((!t_def || t_level < level)
238: && (!memcmp(tok, "struct", 7)
239: || !memcmp(tok, "union", 6)
240: || !memcmp(tok, "enum", 5))) {
241: /*
242: * get line immediately;
243: * may change before '{'
244: */
245: getline();
246: if (str_entry(c))
247: ++level;
248: break;
249: /* } */
250: }
251: }
252: sp = tok;
253: }
254: else if (sp != tok || begtoken(c)) {
1.6 espie 255: /* hell... truncate it */
256: if (sp == tok + sizeof tok - 1)
257: *sp = EOS;
258: else
1.5 deraadt 259: *sp++ = c;
1.1 deraadt 260: token = YES;
261: }
262: continue;
263: }
264:
265: sp = tok;
266: token = NO;
267: }
268: }
269:
270: /*
271: * func_entry --
272: * handle a function reference
273: */
274: static int
275: func_entry()
276: {
277: int c; /* current character */
278: int level = 0; /* for matching '()' */
1.8 pjanzen 279: static char attribute[] = "__attribute__";
280: char maybe_attribute[sizeof attribute + 1];
281: char *anext;
1.1 deraadt 282:
283: /*
284: * Find the end of the assumed function declaration.
285: * Note that ANSI C functions can have type definitions so keep
286: * track of the parentheses nesting level.
287: */
288: while (GETC(!=, EOF)) {
289: switch (c) {
290: case '\'':
291: case '"':
292: /* skip strings and character constants */
293: skip_string(c);
294: break;
295: case '/':
296: /* skip comments */
297: if (GETC(==, '*'))
1.8 pjanzen 298: skip_comment(c);
299: else if (c == '/')
300: skip_comment(c);
1.1 deraadt 301: break;
302: case '(':
303: level++;
304: break;
305: case ')':
306: if (level == 0)
307: goto fnd;
308: level--;
309: break;
310: case '\n':
311: SETLINE;
312: }
313: }
314: return (NO);
315: fnd:
316: /*
317: * we assume that the character after a function's right paren
318: * is a token character if it's a function and a non-token
319: * character if it's a declaration. Comments don't count...
320: */
1.8 pjanzen 321: for (anext = maybe_attribute;;) {
1.1 deraadt 322: while (GETC(!=, EOF) && iswhite(c))
323: if (c == '\n')
324: SETLINE;
1.8 pjanzen 325: if (c == EOF)
326: return NO;
327: /*
328: * Recognize the GNU __attribute__ extension, which would
329: * otherwise make the heuristic test DTWT
330: */
331: if (anext == maybe_attribute) {
332: if (intoken(c)) {
333: *anext++ = c;
334: continue;
335: }
336: } else {
337: if (intoken(c)) {
338: if (anext - maybe_attribute < (int)(sizeof attribute - 1))
339: *anext++ = c;
340: else
341: break;
342: continue;
343: } else {
344: *anext++ = '\0';
345: if (strcmp(maybe_attribute, attribute) == 0) {
346: (void)ungetc(c, inf);
347: return NO;
348: }
349: break;
350: }
351: }
1.1 deraadt 352: if (intoken(c) || c == '{')
353: break;
354: if (c == '/' && GETC(==, '*'))
1.8 pjanzen 355: skip_comment(c);
356: else if (c == '/')
357: skip_comment(c);
1.1 deraadt 358: else { /* don't ever "read" '/' */
359: (void)ungetc(c, inf);
360: return (NO);
361: }
362: }
363: if (c != '{')
364: (void)skip_key('{');
365: return (YES);
366: }
367:
368: /*
369: * hash_entry --
370: * handle a line starting with a '#'
371: */
372: static void
373: hash_entry()
374: {
375: int c; /* character read */
376: int curline; /* line started on */
377: char *sp; /* buffer pointer */
378: char tok[MAXTOKEN]; /* storage buffer */
1.4 deraadt 379:
380: /*
381: * to treat following macro.
382: * # macro(arg) ....
383: */
384: while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
385: ;
386: (void)ungetc(c, inf);
1.1 deraadt 387:
388: curline = lineno;
389: for (sp = tok;;) { /* get next token */
390: if (GETC(==, EOF))
391: return;
392: if (iswhite(c))
393: break;
1.6 espie 394: /* hell... truncate it */
395: if (sp == tok + sizeof tok - 1)
396: *sp = EOS;
397: else
1.5 deraadt 398: *sp++ = c;
1.1 deraadt 399: }
400: *sp = EOS;
401: if (memcmp(tok, "define", 6)) /* only interested in #define's */
402: goto skip;
403: for (;;) { /* this doesn't handle "#define \n" */
404: if (GETC(==, EOF))
405: return;
406: if (!iswhite(c))
407: break;
408: }
409: for (sp = tok;;) { /* get next token */
1.6 espie 410: /* hell... truncate it */
411: if (sp == tok + sizeof tok - 1)
412: *sp = EOS;
413: else
1.5 deraadt 414: *sp++ = c;
1.1 deraadt 415: if (GETC(==, EOF))
416: return;
417: /*
418: * this is where it DOESN'T handle
419: * "#define \n"
420: */
421: if (!intoken(c))
422: break;
423: }
424: *sp = EOS;
425: if (dflag || c == '(') { /* only want macros */
426: getline();
427: pfnote(tok, curline);
428: }
429: skip: if (c == '\n') { /* get rid of rest of define */
430: SETLINE
431: if (*(sp - 1) != '\\')
432: return;
433: }
434: (void)skip_key('\n');
435: }
436:
437: /*
438: * str_entry --
439: * handle a struct, union or enum entry
440: */
441: static int
442: str_entry(c)
443: int c; /* current character */
444: {
445: int curline; /* line started on */
446: char *sp; /* buffer pointer */
447: char tok[LINE_MAX]; /* storage buffer */
448:
449: curline = lineno;
450: while (iswhite(c))
451: if (GETC(==, EOF))
452: return (NO);
453: if (c == '{') /* it was "struct {" */
454: return (YES);
455: for (sp = tok;;) { /* get next token */
1.6 espie 456: /* hell... truncate it */
457: if (sp == tok + sizeof tok - 1)
458: *sp = EOS;
459: else
1.5 deraadt 460: *sp++ = c;
1.1 deraadt 461: if (GETC(==, EOF))
462: return (NO);
463: if (!intoken(c))
464: break;
465: }
466: switch (c) {
467: case '{': /* it was "struct foo{" */
468: --sp;
469: break;
470: case '\n': /* it was "struct foo\n" */
471: SETLINE;
472: /*FALLTHROUGH*/
473: default: /* probably "struct foo " */
474: while (GETC(!=, EOF))
475: if (!iswhite(c))
476: break;
477: if (c != '{') {
478: (void)ungetc(c, inf);
479: return (NO);
480: }
481: }
482: *sp = EOS;
483: pfnote(tok, curline);
484: return (YES);
485: }
486:
487: /*
488: * skip_comment --
489: * skip over comment
490: */
491: void
1.8 pjanzen 492: skip_comment(int commenttype)
1.1 deraadt 493: {
494: int c; /* character read */
495: int star; /* '*' flag */
496:
497: for (star = 0; GETC(!=, EOF);)
498: switch(c) {
499: /* comments don't nest, nor can they be escaped. */
500: case '*':
501: star = YES;
502: break;
503: case '/':
1.8 pjanzen 504: if (commenttype == '*' && star)
1.1 deraadt 505: return;
506: break;
507: case '\n':
1.8 pjanzen 508: if (commenttype == '/') {
509: /* We don't really parse C, so sometimes it
510: * is necessary to see the newline
511: */
512: ungetc(c, inf);
513: return;
514: }
1.1 deraadt 515: SETLINE;
516: /*FALLTHROUGH*/
517: default:
518: star = NO;
519: break;
520: }
521: }
522:
523: /*
524: * skip_string --
525: * skip to the end of a string or character constant.
526: */
527: void
528: skip_string(key)
529: int key;
530: {
531: int c,
532: skip;
533:
534: for (skip = NO; GETC(!=, EOF); )
535: switch (c) {
536: case '\\': /* a backslash escapes anything */
537: skip = !skip; /* we toggle in case it's "\\" */
538: break;
539: case '\n':
540: SETLINE;
541: /*FALLTHROUGH*/
542: default:
543: if (c == key && !skip)
544: return;
545: skip = NO;
546: }
547: }
548:
549: /*
550: * skip_key --
551: * skip to next char "key"
552: */
553: int
554: skip_key(key)
555: int key;
556: {
557: int c,
558: skip,
559: retval;
560:
561: for (skip = retval = NO; GETC(!=, EOF);)
562: switch(c) {
563: case '\\': /* a backslash escapes anything */
564: skip = !skip; /* we toggle in case it's "\\" */
565: break;
566: case ';': /* special case for yacc; if one */
567: case '|': /* of these chars occurs, we may */
568: retval = YES; /* have moved out of the rule */
569: break; /* not used by C */
570: case '\'':
571: case '"':
572: /* skip strings and character constants */
573: skip_string(c);
574: break;
575: case '/':
576: /* skip comments */
577: if (GETC(==, '*')) {
1.8 pjanzen 578: skip_comment(c);
579: break;
580: } else if (c == '/') {
581: skip_comment(c);
1.1 deraadt 582: break;
583: }
584: (void)ungetc(c, inf);
585: c = '/';
586: goto norm;
587: case '\n':
588: SETLINE;
589: /*FALLTHROUGH*/
590: default:
591: norm:
592: if (c == key && !skip)
593: return (retval);
594: skip = NO;
595: }
596: return (retval);
597: }