Annotation of src/usr.bin/ctags/C.c, Revision 1.4
1.4 ! deraadt 1: /* $OpenBSD: C.c,v 1.3 1997/06/30 06:26:33 deraadt Exp $ */
1.1 deraadt 2: /* $NetBSD: C.c,v 1.3 1995/03/26 20:14:02 glass Exp $ */
3:
4: /*
5: * Copyright (c) 1987, 1993, 1994
6: * The Regents of the University of California. All rights reserved.
7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: * 3. All advertising materials mentioning features or use of this software
17: * must display the following acknowledgement:
18: * This product includes software developed by the University of
19: * California, Berkeley and its contributors.
20: * 4. Neither the name of the University nor the names of its contributors
21: * may be used to endorse or promote products derived from this software
22: * without specific prior written permission.
23: *
24: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34: * SUCH DAMAGE.
35: */
36:
37: #ifndef lint
38: #if 0
39: static char sccsid[] = "@(#)C.c 8.4 (Berkeley) 4/2/94";
40: #else
1.4 ! deraadt 41: static char rcsid[] = "$OpenBSD: C.c,v 1.3 1997/06/30 06:26:33 deraadt Exp $";
1.1 deraadt 42: #endif
43: #endif /* not lint */
44:
45: #include <limits.h>
46: #include <stdio.h>
47: #include <string.h>
48:
49: #include "ctags.h"
50:
51: static int func_entry __P((void));
52: static void hash_entry __P((void));
53: static void skip_string __P((int));
54: static int str_entry __P((int));
55:
56: /*
57: * c_entries --
58: * read .c and .h files and call appropriate routines
59: */
60: void
61: c_entries()
62: {
63: int c; /* current character */
64: int level; /* brace level */
65: int token; /* if reading a token */
66: int t_def; /* if reading a typedef */
67: int t_level; /* typedef's brace level */
68: char *sp; /* buffer pointer */
69: char tok[MAXTOKEN]; /* token buffer */
70:
71: lineftell = ftell(inf);
72: sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
73: while (GETC(!=, EOF)) {
74: switch (c) {
75: /*
76: * Here's where it DOESN'T handle: {
77: * foo(a)
78: * {
79: * #ifdef notdef
80: * }
81: * #endif
82: * if (a)
83: * puts("hello, world");
84: * }
85: */
86: case '{':
87: ++level;
88: goto endtok;
89: case '}':
90: /*
91: * if level goes below zero, try and fix
92: * it, even though we've already messed up
93: */
94: if (--level < 0)
95: level = 0;
96: goto endtok;
97:
98: case '\n':
99: SETLINE;
100: /*
101: * the above 3 cases are similar in that they
102: * are special characters that also end tokens.
103: */
104: endtok: if (sp > tok) {
105: *sp = EOS;
106: token = YES;
107: sp = tok;
108: }
109: else
110: token = NO;
111: continue;
112:
113: /*
114: * We ignore quoted strings and character constants
115: * completely.
116: */
117: case '"':
118: case '\'':
119: (void)skip_string(c);
120: break;
121:
122: /*
123: * comments can be fun; note the state is unchanged after
124: * return, in case we found:
125: * "foo() XX comment XX { int bar; }"
126: */
127: case '/':
128: if (GETC(==, '*')) {
129: skip_comment();
130: continue;
131: }
132: (void)ungetc(c, inf);
133: c = '/';
134: goto storec;
135:
136: /* hash marks flag #define's. */
137: case '#':
138: if (sp == tok) {
139: hash_entry();
140: break;
141: }
142: goto storec;
143:
144: /*
145: * if we have a current token, parenthesis on
146: * level zero indicates a function.
147: */
148: case '(':
149: if (!level && token) {
150: int curline;
151:
152: if (sp != tok)
153: *sp = EOS;
154: /*
155: * grab the line immediately, we may
156: * already be wrong, for example,
157: * foo\n
158: * (arg1,
159: */
160: getline();
161: curline = lineno;
162: if (func_entry()) {
163: ++level;
164: pfnote(tok, curline);
165: }
166: break;
167: }
168: goto storec;
169:
170: /*
171: * semi-colons indicate the end of a typedef; if we find a
172: * typedef we search for the next semi-colon of the same
173: * level as the typedef. Ignoring "structs", they are
174: * tricky, since you can find:
175: *
1.3 deraadt 176: * "typedef int time_t;"
1.1 deraadt 177: * "typedef unsigned int u_int;"
178: * "typedef unsigned int u_int [10];"
179: *
180: * If looking at a typedef, we save a copy of the last token
181: * found. Then, when we find the ';' we take the current
182: * token if it starts with a valid token name, else we take
183: * the one we saved. There's probably some reasonable
184: * alternative to this...
185: */
186: case ';':
187: if (t_def && level == t_level) {
188: t_def = NO;
189: getline();
190: if (sp != tok)
191: *sp = EOS;
192: pfnote(tok, lineno);
193: break;
194: }
195: goto storec;
196:
197: /*
198: * store characters until one that can't be part of a token
199: * comes along; check the current token against certain
200: * reserved words.
201: */
202: default:
1.4 ! deraadt 203: /*
! 204: * to treat following function.
! 205: * func (arg) {
! 206: * ....
! 207: * }
! 208: */
! 209: if (c == ' ' || c == '\t') {
! 210: int save = c;
! 211: while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
! 212: ;
! 213: if (c == EOF)
! 214: return;
! 215: (void)ungetc(c, inf);
! 216: c = save;
! 217: }
1.1 deraadt 218: storec: if (!intoken(c)) {
219: if (sp == tok)
220: break;
221: *sp = EOS;
222: if (tflag) {
223: /* no typedefs inside typedefs */
224: if (!t_def &&
225: !memcmp(tok, "typedef",8)) {
226: t_def = YES;
227: t_level = level;
228: break;
229: }
230: /* catch "typedef struct" */
231: if ((!t_def || t_level < level)
232: && (!memcmp(tok, "struct", 7)
233: || !memcmp(tok, "union", 6)
234: || !memcmp(tok, "enum", 5))) {
235: /*
236: * get line immediately;
237: * may change before '{'
238: */
239: getline();
240: if (str_entry(c))
241: ++level;
242: break;
243: /* } */
244: }
245: }
246: sp = tok;
247: }
248: else if (sp != tok || begtoken(c)) {
249: *sp++ = c;
250: token = YES;
251: }
252: continue;
253: }
254:
255: sp = tok;
256: token = NO;
257: }
258: }
259:
260: /*
261: * func_entry --
262: * handle a function reference
263: */
264: static int
265: func_entry()
266: {
267: int c; /* current character */
268: int level = 0; /* for matching '()' */
269:
270: /*
271: * Find the end of the assumed function declaration.
272: * Note that ANSI C functions can have type definitions so keep
273: * track of the parentheses nesting level.
274: */
275: while (GETC(!=, EOF)) {
276: switch (c) {
277: case '\'':
278: case '"':
279: /* skip strings and character constants */
280: skip_string(c);
281: break;
282: case '/':
283: /* skip comments */
284: if (GETC(==, '*'))
285: skip_comment();
286: break;
287: case '(':
288: level++;
289: break;
290: case ')':
291: if (level == 0)
292: goto fnd;
293: level--;
294: break;
295: case '\n':
296: SETLINE;
297: }
298: }
299: return (NO);
300: fnd:
301: /*
302: * we assume that the character after a function's right paren
303: * is a token character if it's a function and a non-token
304: * character if it's a declaration. Comments don't count...
305: */
306: for (;;) {
307: while (GETC(!=, EOF) && iswhite(c))
308: if (c == '\n')
309: SETLINE;
310: if (intoken(c) || c == '{')
311: break;
312: if (c == '/' && GETC(==, '*'))
313: skip_comment();
314: else { /* don't ever "read" '/' */
315: (void)ungetc(c, inf);
316: return (NO);
317: }
318: }
319: if (c != '{')
320: (void)skip_key('{');
321: return (YES);
322: }
323:
324: /*
325: * hash_entry --
326: * handle a line starting with a '#'
327: */
328: static void
329: hash_entry()
330: {
331: int c; /* character read */
332: int curline; /* line started on */
333: char *sp; /* buffer pointer */
334: char tok[MAXTOKEN]; /* storage buffer */
1.4 ! deraadt 335:
! 336: /*
! 337: * to treat following macro.
! 338: * # macro(arg) ....
! 339: */
! 340: while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
! 341: ;
! 342: (void)ungetc(c, inf);
1.1 deraadt 343:
344: curline = lineno;
345: for (sp = tok;;) { /* get next token */
346: if (GETC(==, EOF))
347: return;
348: if (iswhite(c))
349: break;
350: *sp++ = c;
351: }
352: *sp = EOS;
353: if (memcmp(tok, "define", 6)) /* only interested in #define's */
354: goto skip;
355: for (;;) { /* this doesn't handle "#define \n" */
356: if (GETC(==, EOF))
357: return;
358: if (!iswhite(c))
359: break;
360: }
361: for (sp = tok;;) { /* get next token */
362: *sp++ = c;
363: if (GETC(==, EOF))
364: return;
365: /*
366: * this is where it DOESN'T handle
367: * "#define \n"
368: */
369: if (!intoken(c))
370: break;
371: }
372: *sp = EOS;
373: if (dflag || c == '(') { /* only want macros */
374: getline();
375: pfnote(tok, curline);
376: }
377: skip: if (c == '\n') { /* get rid of rest of define */
378: SETLINE
379: if (*(sp - 1) != '\\')
380: return;
381: }
382: (void)skip_key('\n');
383: }
384:
385: /*
386: * str_entry --
387: * handle a struct, union or enum entry
388: */
389: static int
390: str_entry(c)
391: int c; /* current character */
392: {
393: int curline; /* line started on */
394: char *sp; /* buffer pointer */
395: char tok[LINE_MAX]; /* storage buffer */
396:
397: curline = lineno;
398: while (iswhite(c))
399: if (GETC(==, EOF))
400: return (NO);
401: if (c == '{') /* it was "struct {" */
402: return (YES);
403: for (sp = tok;;) { /* get next token */
404: *sp++ = c;
405: if (GETC(==, EOF))
406: return (NO);
407: if (!intoken(c))
408: break;
409: }
410: switch (c) {
411: case '{': /* it was "struct foo{" */
412: --sp;
413: break;
414: case '\n': /* it was "struct foo\n" */
415: SETLINE;
416: /*FALLTHROUGH*/
417: default: /* probably "struct foo " */
418: while (GETC(!=, EOF))
419: if (!iswhite(c))
420: break;
421: if (c != '{') {
422: (void)ungetc(c, inf);
423: return (NO);
424: }
425: }
426: *sp = EOS;
427: pfnote(tok, curline);
428: return (YES);
429: }
430:
431: /*
432: * skip_comment --
433: * skip over comment
434: */
435: void
436: skip_comment()
437: {
438: int c; /* character read */
439: int star; /* '*' flag */
440:
441: for (star = 0; GETC(!=, EOF);)
442: switch(c) {
443: /* comments don't nest, nor can they be escaped. */
444: case '*':
445: star = YES;
446: break;
447: case '/':
448: if (star)
449: return;
450: break;
451: case '\n':
452: SETLINE;
453: /*FALLTHROUGH*/
454: default:
455: star = NO;
456: break;
457: }
458: }
459:
460: /*
461: * skip_string --
462: * skip to the end of a string or character constant.
463: */
464: void
465: skip_string(key)
466: int key;
467: {
468: int c,
469: skip;
470:
471: for (skip = NO; GETC(!=, EOF); )
472: switch (c) {
473: case '\\': /* a backslash escapes anything */
474: skip = !skip; /* we toggle in case it's "\\" */
475: break;
476: case '\n':
477: SETLINE;
478: /*FALLTHROUGH*/
479: default:
480: if (c == key && !skip)
481: return;
482: skip = NO;
483: }
484: }
485:
486: /*
487: * skip_key --
488: * skip to next char "key"
489: */
490: int
491: skip_key(key)
492: int key;
493: {
494: int c,
495: skip,
496: retval;
497:
498: for (skip = retval = NO; GETC(!=, EOF);)
499: switch(c) {
500: case '\\': /* a backslash escapes anything */
501: skip = !skip; /* we toggle in case it's "\\" */
502: break;
503: case ';': /* special case for yacc; if one */
504: case '|': /* of these chars occurs, we may */
505: retval = YES; /* have moved out of the rule */
506: break; /* not used by C */
507: case '\'':
508: case '"':
509: /* skip strings and character constants */
510: skip_string(c);
511: break;
512: case '/':
513: /* skip comments */
514: if (GETC(==, '*')) {
515: skip_comment();
516: break;
517: }
518: (void)ungetc(c, inf);
519: c = '/';
520: goto norm;
521: case '\n':
522: SETLINE;
523: /*FALLTHROUGH*/
524: default:
525: norm:
526: if (c == key && !skip)
527: return (retval);
528: skip = NO;
529: }
530: return (retval);
531: }