Annotation of src/usr.bin/ctags/C.c, Revision 1.6
1.6 ! espie 1: /* $OpenBSD: C.c,v 1.5 2000/07/25 19:28:30 deraadt Exp $ */
1.1 deraadt 2: /* $NetBSD: C.c,v 1.3 1995/03/26 20:14:02 glass Exp $ */
3:
4: /*
5: * Copyright (c) 1987, 1993, 1994
6: * The Regents of the University of California. All rights reserved.
7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: * 3. All advertising materials mentioning features or use of this software
17: * must display the following acknowledgement:
18: * This product includes software developed by the University of
19: * California, Berkeley and its contributors.
20: * 4. Neither the name of the University nor the names of its contributors
21: * may be used to endorse or promote products derived from this software
22: * without specific prior written permission.
23: *
24: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34: * SUCH DAMAGE.
35: */
36:
37: #ifndef lint
38: #if 0
39: static char sccsid[] = "@(#)C.c 8.4 (Berkeley) 4/2/94";
40: #else
1.6 ! espie 41: static char rcsid[] = "$OpenBSD: C.c,v 1.5 2000/07/25 19:28:30 deraadt Exp $";
1.1 deraadt 42: #endif
43: #endif /* not lint */
44:
45: #include <limits.h>
46: #include <stdio.h>
47: #include <string.h>
48:
49: #include "ctags.h"
50:
51: static int func_entry __P((void));
52: static void hash_entry __P((void));
53: static void skip_string __P((int));
54: static int str_entry __P((int));
55:
56: /*
57: * c_entries --
58: * read .c and .h files and call appropriate routines
59: */
60: void
61: c_entries()
62: {
63: int c; /* current character */
64: int level; /* brace level */
65: int token; /* if reading a token */
66: int t_def; /* if reading a typedef */
67: int t_level; /* typedef's brace level */
68: char *sp; /* buffer pointer */
69: char tok[MAXTOKEN]; /* token buffer */
70:
71: lineftell = ftell(inf);
72: sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
73: while (GETC(!=, EOF)) {
74: switch (c) {
75: /*
76: * Here's where it DOESN'T handle: {
77: * foo(a)
78: * {
79: * #ifdef notdef
80: * }
81: * #endif
82: * if (a)
83: * puts("hello, world");
84: * }
85: */
86: case '{':
87: ++level;
88: goto endtok;
89: case '}':
90: /*
91: * if level goes below zero, try and fix
92: * it, even though we've already messed up
93: */
94: if (--level < 0)
95: level = 0;
96: goto endtok;
97:
98: case '\n':
99: SETLINE;
100: /*
101: * the above 3 cases are similar in that they
102: * are special characters that also end tokens.
103: */
1.5 deraadt 104: endtok: if (sp > tok) {
1.1 deraadt 105: *sp = EOS;
106: token = YES;
107: sp = tok;
108: }
109: else
110: token = NO;
111: continue;
112:
113: /*
114: * We ignore quoted strings and character constants
115: * completely.
116: */
117: case '"':
118: case '\'':
119: (void)skip_string(c);
120: break;
121:
122: /*
123: * comments can be fun; note the state is unchanged after
124: * return, in case we found:
125: * "foo() XX comment XX { int bar; }"
126: */
127: case '/':
128: if (GETC(==, '*')) {
129: skip_comment();
130: continue;
131: }
132: (void)ungetc(c, inf);
133: c = '/';
134: goto storec;
135:
136: /* hash marks flag #define's. */
137: case '#':
138: if (sp == tok) {
139: hash_entry();
140: break;
141: }
142: goto storec;
143:
144: /*
145: * if we have a current token, parenthesis on
146: * level zero indicates a function.
147: */
148: case '(':
149: if (!level && token) {
150: int curline;
151:
152: if (sp != tok)
153: *sp = EOS;
154: /*
155: * grab the line immediately, we may
156: * already be wrong, for example,
157: * foo\n
158: * (arg1,
159: */
160: getline();
161: curline = lineno;
162: if (func_entry()) {
163: ++level;
164: pfnote(tok, curline);
165: }
166: break;
167: }
168: goto storec;
169:
170: /*
171: * semi-colons indicate the end of a typedef; if we find a
172: * typedef we search for the next semi-colon of the same
173: * level as the typedef. Ignoring "structs", they are
174: * tricky, since you can find:
175: *
1.3 deraadt 176: * "typedef int time_t;"
1.1 deraadt 177: * "typedef unsigned int u_int;"
178: * "typedef unsigned int u_int [10];"
179: *
180: * If looking at a typedef, we save a copy of the last token
181: * found. Then, when we find the ';' we take the current
182: * token if it starts with a valid token name, else we take
183: * the one we saved. There's probably some reasonable
184: * alternative to this...
185: */
186: case ';':
187: if (t_def && level == t_level) {
188: t_def = NO;
189: getline();
190: if (sp != tok)
191: *sp = EOS;
192: pfnote(tok, lineno);
193: break;
194: }
195: goto storec;
196:
197: /*
198: * store characters until one that can't be part of a token
199: * comes along; check the current token against certain
200: * reserved words.
201: */
202: default:
1.4 deraadt 203: /*
204: * to treat following function.
205: * func (arg) {
206: * ....
207: * }
208: */
209: if (c == ' ' || c == '\t') {
210: int save = c;
211: while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
212: ;
213: if (c == EOF)
214: return;
215: (void)ungetc(c, inf);
216: c = save;
217: }
1.1 deraadt 218: storec: if (!intoken(c)) {
219: if (sp == tok)
220: break;
221: *sp = EOS;
222: if (tflag) {
223: /* no typedefs inside typedefs */
224: if (!t_def &&
225: !memcmp(tok, "typedef",8)) {
226: t_def = YES;
227: t_level = level;
228: break;
229: }
230: /* catch "typedef struct" */
231: if ((!t_def || t_level < level)
232: && (!memcmp(tok, "struct", 7)
233: || !memcmp(tok, "union", 6)
234: || !memcmp(tok, "enum", 5))) {
235: /*
236: * get line immediately;
237: * may change before '{'
238: */
239: getline();
240: if (str_entry(c))
241: ++level;
242: break;
243: /* } */
244: }
245: }
246: sp = tok;
247: }
248: else if (sp != tok || begtoken(c)) {
1.6 ! espie 249: /* hell... truncate it */
! 250: if (sp == tok + sizeof tok - 1)
! 251: *sp = EOS;
! 252: else
1.5 deraadt 253: *sp++ = c;
1.1 deraadt 254: token = YES;
255: }
256: continue;
257: }
258:
259: sp = tok;
260: token = NO;
261: }
262: }
263:
264: /*
265: * func_entry --
266: * handle a function reference
267: */
268: static int
269: func_entry()
270: {
271: int c; /* current character */
272: int level = 0; /* for matching '()' */
273:
274: /*
275: * Find the end of the assumed function declaration.
276: * Note that ANSI C functions can have type definitions so keep
277: * track of the parentheses nesting level.
278: */
279: while (GETC(!=, EOF)) {
280: switch (c) {
281: case '\'':
282: case '"':
283: /* skip strings and character constants */
284: skip_string(c);
285: break;
286: case '/':
287: /* skip comments */
288: if (GETC(==, '*'))
289: skip_comment();
290: break;
291: case '(':
292: level++;
293: break;
294: case ')':
295: if (level == 0)
296: goto fnd;
297: level--;
298: break;
299: case '\n':
300: SETLINE;
301: }
302: }
303: return (NO);
304: fnd:
305: /*
306: * we assume that the character after a function's right paren
307: * is a token character if it's a function and a non-token
308: * character if it's a declaration. Comments don't count...
309: */
310: for (;;) {
311: while (GETC(!=, EOF) && iswhite(c))
312: if (c == '\n')
313: SETLINE;
314: if (intoken(c) || c == '{')
315: break;
316: if (c == '/' && GETC(==, '*'))
317: skip_comment();
318: else { /* don't ever "read" '/' */
319: (void)ungetc(c, inf);
320: return (NO);
321: }
322: }
323: if (c != '{')
324: (void)skip_key('{');
325: return (YES);
326: }
327:
328: /*
329: * hash_entry --
330: * handle a line starting with a '#'
331: */
332: static void
333: hash_entry()
334: {
335: int c; /* character read */
336: int curline; /* line started on */
337: char *sp; /* buffer pointer */
338: char tok[MAXTOKEN]; /* storage buffer */
1.4 deraadt 339:
340: /*
341: * to treat following macro.
342: * # macro(arg) ....
343: */
344: while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
345: ;
346: (void)ungetc(c, inf);
1.1 deraadt 347:
348: curline = lineno;
349: for (sp = tok;;) { /* get next token */
350: if (GETC(==, EOF))
351: return;
352: if (iswhite(c))
353: break;
1.6 ! espie 354: /* hell... truncate it */
! 355: if (sp == tok + sizeof tok - 1)
! 356: *sp = EOS;
! 357: else
1.5 deraadt 358: *sp++ = c;
1.1 deraadt 359: }
360: *sp = EOS;
361: if (memcmp(tok, "define", 6)) /* only interested in #define's */
362: goto skip;
363: for (;;) { /* this doesn't handle "#define \n" */
364: if (GETC(==, EOF))
365: return;
366: if (!iswhite(c))
367: break;
368: }
369: for (sp = tok;;) { /* get next token */
1.6 ! espie 370: /* hell... truncate it */
! 371: if (sp == tok + sizeof tok - 1)
! 372: *sp = EOS;
! 373: else
1.5 deraadt 374: *sp++ = c;
1.1 deraadt 375: if (GETC(==, EOF))
376: return;
377: /*
378: * this is where it DOESN'T handle
379: * "#define \n"
380: */
381: if (!intoken(c))
382: break;
383: }
384: *sp = EOS;
385: if (dflag || c == '(') { /* only want macros */
386: getline();
387: pfnote(tok, curline);
388: }
389: skip: if (c == '\n') { /* get rid of rest of define */
390: SETLINE
391: if (*(sp - 1) != '\\')
392: return;
393: }
394: (void)skip_key('\n');
395: }
396:
397: /*
398: * str_entry --
399: * handle a struct, union or enum entry
400: */
401: static int
402: str_entry(c)
403: int c; /* current character */
404: {
405: int curline; /* line started on */
406: char *sp; /* buffer pointer */
407: char tok[LINE_MAX]; /* storage buffer */
408:
409: curline = lineno;
410: while (iswhite(c))
411: if (GETC(==, EOF))
412: return (NO);
413: if (c == '{') /* it was "struct {" */
414: return (YES);
415: for (sp = tok;;) { /* get next token */
1.6 ! espie 416: /* hell... truncate it */
! 417: if (sp == tok + sizeof tok - 1)
! 418: *sp = EOS;
! 419: else
1.5 deraadt 420: *sp++ = c;
1.1 deraadt 421: if (GETC(==, EOF))
422: return (NO);
423: if (!intoken(c))
424: break;
425: }
426: switch (c) {
427: case '{': /* it was "struct foo{" */
428: --sp;
429: break;
430: case '\n': /* it was "struct foo\n" */
431: SETLINE;
432: /*FALLTHROUGH*/
433: default: /* probably "struct foo " */
434: while (GETC(!=, EOF))
435: if (!iswhite(c))
436: break;
437: if (c != '{') {
438: (void)ungetc(c, inf);
439: return (NO);
440: }
441: }
442: *sp = EOS;
443: pfnote(tok, curline);
444: return (YES);
445: }
446:
447: /*
448: * skip_comment --
449: * skip over comment
450: */
451: void
452: skip_comment()
453: {
454: int c; /* character read */
455: int star; /* '*' flag */
456:
457: for (star = 0; GETC(!=, EOF);)
458: switch(c) {
459: /* comments don't nest, nor can they be escaped. */
460: case '*':
461: star = YES;
462: break;
463: case '/':
464: if (star)
465: return;
466: break;
467: case '\n':
468: SETLINE;
469: /*FALLTHROUGH*/
470: default:
471: star = NO;
472: break;
473: }
474: }
475:
476: /*
477: * skip_string --
478: * skip to the end of a string or character constant.
479: */
480: void
481: skip_string(key)
482: int key;
483: {
484: int c,
485: skip;
486:
487: for (skip = NO; GETC(!=, EOF); )
488: switch (c) {
489: case '\\': /* a backslash escapes anything */
490: skip = !skip; /* we toggle in case it's "\\" */
491: break;
492: case '\n':
493: SETLINE;
494: /*FALLTHROUGH*/
495: default:
496: if (c == key && !skip)
497: return;
498: skip = NO;
499: }
500: }
501:
502: /*
503: * skip_key --
504: * skip to next char "key"
505: */
506: int
507: skip_key(key)
508: int key;
509: {
510: int c,
511: skip,
512: retval;
513:
514: for (skip = retval = NO; GETC(!=, EOF);)
515: switch(c) {
516: case '\\': /* a backslash escapes anything */
517: skip = !skip; /* we toggle in case it's "\\" */
518: break;
519: case ';': /* special case for yacc; if one */
520: case '|': /* of these chars occurs, we may */
521: retval = YES; /* have moved out of the rule */
522: break; /* not used by C */
523: case '\'':
524: case '"':
525: /* skip strings and character constants */
526: skip_string(c);
527: break;
528: case '/':
529: /* skip comments */
530: if (GETC(==, '*')) {
531: skip_comment();
532: break;
533: }
534: (void)ungetc(c, inf);
535: c = '/';
536: goto norm;
537: case '\n':
538: SETLINE;
539: /*FALLTHROUGH*/
540: default:
541: norm:
542: if (c == key && !skip)
543: return (retval);
544: skip = NO;
545: }
546: return (retval);
547: }