Annotation of src/usr.bin/ctags/C.c, Revision 1.5
1.5 ! deraadt 1: /* $OpenBSD: C.c,v 1.4 1997/07/21 23:18:45 deraadt Exp $ */
1.1 deraadt 2: /* $NetBSD: C.c,v 1.3 1995/03/26 20:14:02 glass Exp $ */
3:
4: /*
5: * Copyright (c) 1987, 1993, 1994
6: * The Regents of the University of California. All rights reserved.
7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: * 3. All advertising materials mentioning features or use of this software
17: * must display the following acknowledgement:
18: * This product includes software developed by the University of
19: * California, Berkeley and its contributors.
20: * 4. Neither the name of the University nor the names of its contributors
21: * may be used to endorse or promote products derived from this software
22: * without specific prior written permission.
23: *
24: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34: * SUCH DAMAGE.
35: */
36:
37: #ifndef lint
38: #if 0
39: static char sccsid[] = "@(#)C.c 8.4 (Berkeley) 4/2/94";
40: #else
1.5 ! deraadt 41: static char rcsid[] = "$OpenBSD: C.c,v 1.4 1997/07/21 23:18:45 deraadt Exp $";
1.1 deraadt 42: #endif
43: #endif /* not lint */
44:
45: #include <limits.h>
46: #include <stdio.h>
47: #include <string.h>
48:
49: #include "ctags.h"
50:
51: static int func_entry __P((void));
52: static void hash_entry __P((void));
53: static void skip_string __P((int));
54: static int str_entry __P((int));
55:
56: /*
57: * c_entries --
58: * read .c and .h files and call appropriate routines
59: */
60: void
61: c_entries()
62: {
63: int c; /* current character */
64: int level; /* brace level */
65: int token; /* if reading a token */
66: int t_def; /* if reading a typedef */
67: int t_level; /* typedef's brace level */
68: char *sp; /* buffer pointer */
69: char tok[MAXTOKEN]; /* token buffer */
70:
71: lineftell = ftell(inf);
72: sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
73: while (GETC(!=, EOF)) {
74: switch (c) {
75: /*
76: * Here's where it DOESN'T handle: {
77: * foo(a)
78: * {
79: * #ifdef notdef
80: * }
81: * #endif
82: * if (a)
83: * puts("hello, world");
84: * }
85: */
86: case '{':
87: ++level;
88: goto endtok;
89: case '}':
90: /*
91: * if level goes below zero, try and fix
92: * it, even though we've already messed up
93: */
94: if (--level < 0)
95: level = 0;
96: goto endtok;
97:
98: case '\n':
99: SETLINE;
100: /*
101: * the above 3 cases are similar in that they
102: * are special characters that also end tokens.
103: */
1.5 ! deraadt 104: endtok: if (sp > tok) {
1.1 deraadt 105: *sp = EOS;
106: token = YES;
107: sp = tok;
108: }
109: else
110: token = NO;
111: continue;
112:
113: /*
114: * We ignore quoted strings and character constants
115: * completely.
116: */
117: case '"':
118: case '\'':
119: (void)skip_string(c);
120: break;
121:
122: /*
123: * comments can be fun; note the state is unchanged after
124: * return, in case we found:
125: * "foo() XX comment XX { int bar; }"
126: */
127: case '/':
128: if (GETC(==, '*')) {
129: skip_comment();
130: continue;
131: }
132: (void)ungetc(c, inf);
133: c = '/';
134: goto storec;
135:
136: /* hash marks flag #define's. */
137: case '#':
138: if (sp == tok) {
139: hash_entry();
140: break;
141: }
142: goto storec;
143:
144: /*
145: * if we have a current token, parenthesis on
146: * level zero indicates a function.
147: */
148: case '(':
149: if (!level && token) {
150: int curline;
151:
152: if (sp != tok)
153: *sp = EOS;
154: /*
155: * grab the line immediately, we may
156: * already be wrong, for example,
157: * foo\n
158: * (arg1,
159: */
160: getline();
161: curline = lineno;
162: if (func_entry()) {
163: ++level;
164: pfnote(tok, curline);
165: }
166: break;
167: }
168: goto storec;
169:
170: /*
171: * semi-colons indicate the end of a typedef; if we find a
172: * typedef we search for the next semi-colon of the same
173: * level as the typedef. Ignoring "structs", they are
174: * tricky, since you can find:
175: *
1.3 deraadt 176: * "typedef int time_t;"
1.1 deraadt 177: * "typedef unsigned int u_int;"
178: * "typedef unsigned int u_int [10];"
179: *
180: * If looking at a typedef, we save a copy of the last token
181: * found. Then, when we find the ';' we take the current
182: * token if it starts with a valid token name, else we take
183: * the one we saved. There's probably some reasonable
184: * alternative to this...
185: */
186: case ';':
187: if (t_def && level == t_level) {
188: t_def = NO;
189: getline();
190: if (sp != tok)
191: *sp = EOS;
192: pfnote(tok, lineno);
193: break;
194: }
195: goto storec;
196:
197: /*
198: * store characters until one that can't be part of a token
199: * comes along; check the current token against certain
200: * reserved words.
201: */
202: default:
1.4 deraadt 203: /*
204: * to treat following function.
205: * func (arg) {
206: * ....
207: * }
208: */
209: if (c == ' ' || c == '\t') {
210: int save = c;
211: while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
212: ;
213: if (c == EOF)
214: return;
215: (void)ungetc(c, inf);
216: c = save;
217: }
1.1 deraadt 218: storec: if (!intoken(c)) {
219: if (sp == tok)
220: break;
221: *sp = EOS;
222: if (tflag) {
223: /* no typedefs inside typedefs */
224: if (!t_def &&
225: !memcmp(tok, "typedef",8)) {
226: t_def = YES;
227: t_level = level;
228: break;
229: }
230: /* catch "typedef struct" */
231: if ((!t_def || t_level < level)
232: && (!memcmp(tok, "struct", 7)
233: || !memcmp(tok, "union", 6)
234: || !memcmp(tok, "enum", 5))) {
235: /*
236: * get line immediately;
237: * may change before '{'
238: */
239: getline();
240: if (str_entry(c))
241: ++level;
242: break;
243: /* } */
244: }
245: }
246: sp = tok;
247: }
248: else if (sp != tok || begtoken(c)) {
1.5 ! deraadt 249: if (sp >= tok + sizeof tok) {
! 250: /* hell... truncate it */
! 251: if (sp == tok + sizeof(tok))
! 252: *sp = '\0';
! 253: sp++;
! 254: } else
! 255: *sp++ = c;
1.1 deraadt 256: token = YES;
257: }
258: continue;
259: }
260:
261: sp = tok;
262: token = NO;
263: }
264: }
265:
266: /*
267: * func_entry --
268: * handle a function reference
269: */
270: static int
271: func_entry()
272: {
273: int c; /* current character */
274: int level = 0; /* for matching '()' */
275:
276: /*
277: * Find the end of the assumed function declaration.
278: * Note that ANSI C functions can have type definitions so keep
279: * track of the parentheses nesting level.
280: */
281: while (GETC(!=, EOF)) {
282: switch (c) {
283: case '\'':
284: case '"':
285: /* skip strings and character constants */
286: skip_string(c);
287: break;
288: case '/':
289: /* skip comments */
290: if (GETC(==, '*'))
291: skip_comment();
292: break;
293: case '(':
294: level++;
295: break;
296: case ')':
297: if (level == 0)
298: goto fnd;
299: level--;
300: break;
301: case '\n':
302: SETLINE;
303: }
304: }
305: return (NO);
306: fnd:
307: /*
308: * we assume that the character after a function's right paren
309: * is a token character if it's a function and a non-token
310: * character if it's a declaration. Comments don't count...
311: */
312: for (;;) {
313: while (GETC(!=, EOF) && iswhite(c))
314: if (c == '\n')
315: SETLINE;
316: if (intoken(c) || c == '{')
317: break;
318: if (c == '/' && GETC(==, '*'))
319: skip_comment();
320: else { /* don't ever "read" '/' */
321: (void)ungetc(c, inf);
322: return (NO);
323: }
324: }
325: if (c != '{')
326: (void)skip_key('{');
327: return (YES);
328: }
329:
330: /*
331: * hash_entry --
332: * handle a line starting with a '#'
333: */
334: static void
335: hash_entry()
336: {
337: int c; /* character read */
338: int curline; /* line started on */
339: char *sp; /* buffer pointer */
340: char tok[MAXTOKEN]; /* storage buffer */
1.4 deraadt 341:
342: /*
343: * to treat following macro.
344: * # macro(arg) ....
345: */
346: while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
347: ;
348: (void)ungetc(c, inf);
1.1 deraadt 349:
350: curline = lineno;
351: for (sp = tok;;) { /* get next token */
352: if (GETC(==, EOF))
353: return;
354: if (iswhite(c))
355: break;
1.5 ! deraadt 356: if (sp >= tok + sizeof(tok)) {
! 357: /* hell... truncate it */
! 358: if (sp == tok + sizeof(tok))
! 359: *sp = '\0';
! 360: sp++;
! 361: } else
! 362: *sp++ = c;
1.1 deraadt 363: }
364: *sp = EOS;
365: if (memcmp(tok, "define", 6)) /* only interested in #define's */
366: goto skip;
367: for (;;) { /* this doesn't handle "#define \n" */
368: if (GETC(==, EOF))
369: return;
370: if (!iswhite(c))
371: break;
372: }
373: for (sp = tok;;) { /* get next token */
1.5 ! deraadt 374: if (sp >= tok + sizeof tok) {
! 375: /* hell... truncate it */
! 376: if (sp == tok + sizeof(tok))
! 377: *sp = '\0';
! 378: sp++;
! 379: } else
! 380: *sp++ = c;
1.1 deraadt 381: if (GETC(==, EOF))
382: return;
383: /*
384: * this is where it DOESN'T handle
385: * "#define \n"
386: */
387: if (!intoken(c))
388: break;
389: }
390: *sp = EOS;
391: if (dflag || c == '(') { /* only want macros */
392: getline();
393: pfnote(tok, curline);
394: }
395: skip: if (c == '\n') { /* get rid of rest of define */
396: SETLINE
397: if (*(sp - 1) != '\\')
398: return;
399: }
400: (void)skip_key('\n');
401: }
402:
403: /*
404: * str_entry --
405: * handle a struct, union or enum entry
406: */
407: static int
408: str_entry(c)
409: int c; /* current character */
410: {
411: int curline; /* line started on */
412: char *sp; /* buffer pointer */
413: char tok[LINE_MAX]; /* storage buffer */
414:
415: curline = lineno;
416: while (iswhite(c))
417: if (GETC(==, EOF))
418: return (NO);
419: if (c == '{') /* it was "struct {" */
420: return (YES);
421: for (sp = tok;;) { /* get next token */
1.5 ! deraadt 422: if (sp >= tok + sizeof tok) {
! 423: /* hell... truncate it */
! 424: if (sp == tok + sizeof(tok))
! 425: *sp = '\0';
! 426: sp++;
! 427: } else
! 428: *sp++ = c;
1.1 deraadt 429: if (GETC(==, EOF))
430: return (NO);
431: if (!intoken(c))
432: break;
433: }
434: switch (c) {
435: case '{': /* it was "struct foo{" */
436: --sp;
437: break;
438: case '\n': /* it was "struct foo\n" */
439: SETLINE;
440: /*FALLTHROUGH*/
441: default: /* probably "struct foo " */
442: while (GETC(!=, EOF))
443: if (!iswhite(c))
444: break;
445: if (c != '{') {
446: (void)ungetc(c, inf);
447: return (NO);
448: }
449: }
450: *sp = EOS;
451: pfnote(tok, curline);
452: return (YES);
453: }
454:
455: /*
456: * skip_comment --
457: * skip over comment
458: */
459: void
460: skip_comment()
461: {
462: int c; /* character read */
463: int star; /* '*' flag */
464:
465: for (star = 0; GETC(!=, EOF);)
466: switch(c) {
467: /* comments don't nest, nor can they be escaped. */
468: case '*':
469: star = YES;
470: break;
471: case '/':
472: if (star)
473: return;
474: break;
475: case '\n':
476: SETLINE;
477: /*FALLTHROUGH*/
478: default:
479: star = NO;
480: break;
481: }
482: }
483:
484: /*
485: * skip_string --
486: * skip to the end of a string or character constant.
487: */
488: void
489: skip_string(key)
490: int key;
491: {
492: int c,
493: skip;
494:
495: for (skip = NO; GETC(!=, EOF); )
496: switch (c) {
497: case '\\': /* a backslash escapes anything */
498: skip = !skip; /* we toggle in case it's "\\" */
499: break;
500: case '\n':
501: SETLINE;
502: /*FALLTHROUGH*/
503: default:
504: if (c == key && !skip)
505: return;
506: skip = NO;
507: }
508: }
509:
510: /*
511: * skip_key --
512: * skip to next char "key"
513: */
514: int
515: skip_key(key)
516: int key;
517: {
518: int c,
519: skip,
520: retval;
521:
522: for (skip = retval = NO; GETC(!=, EOF);)
523: switch(c) {
524: case '\\': /* a backslash escapes anything */
525: skip = !skip; /* we toggle in case it's "\\" */
526: break;
527: case ';': /* special case for yacc; if one */
528: case '|': /* of these chars occurs, we may */
529: retval = YES; /* have moved out of the rule */
530: break; /* not used by C */
531: case '\'':
532: case '"':
533: /* skip strings and character constants */
534: skip_string(c);
535: break;
536: case '/':
537: /* skip comments */
538: if (GETC(==, '*')) {
539: skip_comment();
540: break;
541: }
542: (void)ungetc(c, inf);
543: c = '/';
544: goto norm;
545: case '\n':
546: SETLINE;
547: /*FALLTHROUGH*/
548: default:
549: norm:
550: if (c == key && !skip)
551: return (retval);
552: skip = NO;
553: }
554: return (retval);
555: }