Annotation of src/usr.bin/ctags/C.c, Revision 1.3
1.3 ! deraadt 1: /* $OpenBSD: C.c,v 1.2 1996/06/26 05:32:26 deraadt Exp $ */
1.1 deraadt 2: /* $NetBSD: C.c,v 1.3 1995/03/26 20:14:02 glass Exp $ */
3:
4: /*
5: * Copyright (c) 1987, 1993, 1994
6: * The Regents of the University of California. All rights reserved.
7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: * 3. All advertising materials mentioning features or use of this software
17: * must display the following acknowledgement:
18: * This product includes software developed by the University of
19: * California, Berkeley and its contributors.
20: * 4. Neither the name of the University nor the names of its contributors
21: * may be used to endorse or promote products derived from this software
22: * without specific prior written permission.
23: *
24: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34: * SUCH DAMAGE.
35: */
36:
37: #ifndef lint
38: #if 0
39: static char sccsid[] = "@(#)C.c 8.4 (Berkeley) 4/2/94";
40: #else
1.3 ! deraadt 41: static char rcsid[] = "$OpenBSD: C.c,v 1.2 1996/06/26 05:32:26 deraadt Exp $";
1.1 deraadt 42: #endif
43: #endif /* not lint */
44:
45: #include <limits.h>
46: #include <stdio.h>
47: #include <string.h>
48:
49: #include "ctags.h"
50:
51: static int func_entry __P((void));
52: static void hash_entry __P((void));
53: static void skip_string __P((int));
54: static int str_entry __P((int));
55:
56: /*
57: * c_entries --
58: * read .c and .h files and call appropriate routines
59: */
60: void
61: c_entries()
62: {
63: int c; /* current character */
64: int level; /* brace level */
65: int token; /* if reading a token */
66: int t_def; /* if reading a typedef */
67: int t_level; /* typedef's brace level */
68: char *sp; /* buffer pointer */
69: char tok[MAXTOKEN]; /* token buffer */
70:
71: lineftell = ftell(inf);
72: sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
73: while (GETC(!=, EOF)) {
74: switch (c) {
75: /*
76: * Here's where it DOESN'T handle: {
77: * foo(a)
78: * {
79: * #ifdef notdef
80: * }
81: * #endif
82: * if (a)
83: * puts("hello, world");
84: * }
85: */
86: case '{':
87: ++level;
88: goto endtok;
89: case '}':
90: /*
91: * if level goes below zero, try and fix
92: * it, even though we've already messed up
93: */
94: if (--level < 0)
95: level = 0;
96: goto endtok;
97:
98: case '\n':
99: SETLINE;
100: /*
101: * the above 3 cases are similar in that they
102: * are special characters that also end tokens.
103: */
104: endtok: if (sp > tok) {
105: *sp = EOS;
106: token = YES;
107: sp = tok;
108: }
109: else
110: token = NO;
111: continue;
112:
113: /*
114: * We ignore quoted strings and character constants
115: * completely.
116: */
117: case '"':
118: case '\'':
119: (void)skip_string(c);
120: break;
121:
122: /*
123: * comments can be fun; note the state is unchanged after
124: * return, in case we found:
125: * "foo() XX comment XX { int bar; }"
126: */
127: case '/':
128: if (GETC(==, '*')) {
129: skip_comment();
130: continue;
131: }
132: (void)ungetc(c, inf);
133: c = '/';
134: goto storec;
135:
136: /* hash marks flag #define's. */
137: case '#':
138: if (sp == tok) {
139: hash_entry();
140: break;
141: }
142: goto storec;
143:
144: /*
145: * if we have a current token, parenthesis on
146: * level zero indicates a function.
147: */
148: case '(':
149: if (!level && token) {
150: int curline;
151:
152: if (sp != tok)
153: *sp = EOS;
154: /*
155: * grab the line immediately, we may
156: * already be wrong, for example,
157: * foo\n
158: * (arg1,
159: */
160: getline();
161: curline = lineno;
162: if (func_entry()) {
163: ++level;
164: pfnote(tok, curline);
165: }
166: break;
167: }
168: goto storec;
169:
170: /*
171: * semi-colons indicate the end of a typedef; if we find a
172: * typedef we search for the next semi-colon of the same
173: * level as the typedef. Ignoring "structs", they are
174: * tricky, since you can find:
175: *
1.3 ! deraadt 176: * "typedef int time_t;"
1.1 deraadt 177: * "typedef unsigned int u_int;"
178: * "typedef unsigned int u_int [10];"
179: *
180: * If looking at a typedef, we save a copy of the last token
181: * found. Then, when we find the ';' we take the current
182: * token if it starts with a valid token name, else we take
183: * the one we saved. There's probably some reasonable
184: * alternative to this...
185: */
186: case ';':
187: if (t_def && level == t_level) {
188: t_def = NO;
189: getline();
190: if (sp != tok)
191: *sp = EOS;
192: pfnote(tok, lineno);
193: break;
194: }
195: goto storec;
196:
197: /*
198: * store characters until one that can't be part of a token
199: * comes along; check the current token against certain
200: * reserved words.
201: */
202: default:
203: storec: if (!intoken(c)) {
204: if (sp == tok)
205: break;
206: *sp = EOS;
207: if (tflag) {
208: /* no typedefs inside typedefs */
209: if (!t_def &&
210: !memcmp(tok, "typedef",8)) {
211: t_def = YES;
212: t_level = level;
213: break;
214: }
215: /* catch "typedef struct" */
216: if ((!t_def || t_level < level)
217: && (!memcmp(tok, "struct", 7)
218: || !memcmp(tok, "union", 6)
219: || !memcmp(tok, "enum", 5))) {
220: /*
221: * get line immediately;
222: * may change before '{'
223: */
224: getline();
225: if (str_entry(c))
226: ++level;
227: break;
228: /* } */
229: }
230: }
231: sp = tok;
232: }
233: else if (sp != tok || begtoken(c)) {
234: *sp++ = c;
235: token = YES;
236: }
237: continue;
238: }
239:
240: sp = tok;
241: token = NO;
242: }
243: }
244:
245: /*
246: * func_entry --
247: * handle a function reference
248: */
249: static int
250: func_entry()
251: {
252: int c; /* current character */
253: int level = 0; /* for matching '()' */
254:
255: /*
256: * Find the end of the assumed function declaration.
257: * Note that ANSI C functions can have type definitions so keep
258: * track of the parentheses nesting level.
259: */
260: while (GETC(!=, EOF)) {
261: switch (c) {
262: case '\'':
263: case '"':
264: /* skip strings and character constants */
265: skip_string(c);
266: break;
267: case '/':
268: /* skip comments */
269: if (GETC(==, '*'))
270: skip_comment();
271: break;
272: case '(':
273: level++;
274: break;
275: case ')':
276: if (level == 0)
277: goto fnd;
278: level--;
279: break;
280: case '\n':
281: SETLINE;
282: }
283: }
284: return (NO);
285: fnd:
286: /*
287: * we assume that the character after a function's right paren
288: * is a token character if it's a function and a non-token
289: * character if it's a declaration. Comments don't count...
290: */
291: for (;;) {
292: while (GETC(!=, EOF) && iswhite(c))
293: if (c == '\n')
294: SETLINE;
295: if (intoken(c) || c == '{')
296: break;
297: if (c == '/' && GETC(==, '*'))
298: skip_comment();
299: else { /* don't ever "read" '/' */
300: (void)ungetc(c, inf);
301: return (NO);
302: }
303: }
304: if (c != '{')
305: (void)skip_key('{');
306: return (YES);
307: }
308:
309: /*
310: * hash_entry --
311: * handle a line starting with a '#'
312: */
313: static void
314: hash_entry()
315: {
316: int c; /* character read */
317: int curline; /* line started on */
318: char *sp; /* buffer pointer */
319: char tok[MAXTOKEN]; /* storage buffer */
320:
321: curline = lineno;
322: for (sp = tok;;) { /* get next token */
323: if (GETC(==, EOF))
324: return;
325: if (iswhite(c))
326: break;
327: *sp++ = c;
328: }
329: *sp = EOS;
330: if (memcmp(tok, "define", 6)) /* only interested in #define's */
331: goto skip;
332: for (;;) { /* this doesn't handle "#define \n" */
333: if (GETC(==, EOF))
334: return;
335: if (!iswhite(c))
336: break;
337: }
338: for (sp = tok;;) { /* get next token */
339: *sp++ = c;
340: if (GETC(==, EOF))
341: return;
342: /*
343: * this is where it DOESN'T handle
344: * "#define \n"
345: */
346: if (!intoken(c))
347: break;
348: }
349: *sp = EOS;
350: if (dflag || c == '(') { /* only want macros */
351: getline();
352: pfnote(tok, curline);
353: }
354: skip: if (c == '\n') { /* get rid of rest of define */
355: SETLINE
356: if (*(sp - 1) != '\\')
357: return;
358: }
359: (void)skip_key('\n');
360: }
361:
362: /*
363: * str_entry --
364: * handle a struct, union or enum entry
365: */
366: static int
367: str_entry(c)
368: int c; /* current character */
369: {
370: int curline; /* line started on */
371: char *sp; /* buffer pointer */
372: char tok[LINE_MAX]; /* storage buffer */
373:
374: curline = lineno;
375: while (iswhite(c))
376: if (GETC(==, EOF))
377: return (NO);
378: if (c == '{') /* it was "struct {" */
379: return (YES);
380: for (sp = tok;;) { /* get next token */
381: *sp++ = c;
382: if (GETC(==, EOF))
383: return (NO);
384: if (!intoken(c))
385: break;
386: }
387: switch (c) {
388: case '{': /* it was "struct foo{" */
389: --sp;
390: break;
391: case '\n': /* it was "struct foo\n" */
392: SETLINE;
393: /*FALLTHROUGH*/
394: default: /* probably "struct foo " */
395: while (GETC(!=, EOF))
396: if (!iswhite(c))
397: break;
398: if (c != '{') {
399: (void)ungetc(c, inf);
400: return (NO);
401: }
402: }
403: *sp = EOS;
404: pfnote(tok, curline);
405: return (YES);
406: }
407:
408: /*
409: * skip_comment --
410: * skip over comment
411: */
412: void
413: skip_comment()
414: {
415: int c; /* character read */
416: int star; /* '*' flag */
417:
418: for (star = 0; GETC(!=, EOF);)
419: switch(c) {
420: /* comments don't nest, nor can they be escaped. */
421: case '*':
422: star = YES;
423: break;
424: case '/':
425: if (star)
426: return;
427: break;
428: case '\n':
429: SETLINE;
430: /*FALLTHROUGH*/
431: default:
432: star = NO;
433: break;
434: }
435: }
436:
437: /*
438: * skip_string --
439: * skip to the end of a string or character constant.
440: */
441: void
442: skip_string(key)
443: int key;
444: {
445: int c,
446: skip;
447:
448: for (skip = NO; GETC(!=, EOF); )
449: switch (c) {
450: case '\\': /* a backslash escapes anything */
451: skip = !skip; /* we toggle in case it's "\\" */
452: break;
453: case '\n':
454: SETLINE;
455: /*FALLTHROUGH*/
456: default:
457: if (c == key && !skip)
458: return;
459: skip = NO;
460: }
461: }
462:
463: /*
464: * skip_key --
465: * skip to next char "key"
466: */
467: int
468: skip_key(key)
469: int key;
470: {
471: int c,
472: skip,
473: retval;
474:
475: for (skip = retval = NO; GETC(!=, EOF);)
476: switch(c) {
477: case '\\': /* a backslash escapes anything */
478: skip = !skip; /* we toggle in case it's "\\" */
479: break;
480: case ';': /* special case for yacc; if one */
481: case '|': /* of these chars occurs, we may */
482: retval = YES; /* have moved out of the rule */
483: break; /* not used by C */
484: case '\'':
485: case '"':
486: /* skip strings and character constants */
487: skip_string(c);
488: break;
489: case '/':
490: /* skip comments */
491: if (GETC(==, '*')) {
492: skip_comment();
493: break;
494: }
495: (void)ungetc(c, inf);
496: c = '/';
497: goto norm;
498: case '\n':
499: SETLINE;
500: /*FALLTHROUGH*/
501: default:
502: norm:
503: if (c == key && !skip)
504: return (retval);
505: skip = NO;
506: }
507: return (retval);
508: }