Annotation of src/usr.bin/ctags/C.c, Revision 1.12
1.12 ! deraadt 1: /* $OpenBSD: C.c,v 1.11 2003/10/28 13:23:59 avsm Exp $ */
1.1 deraadt 2: /* $NetBSD: C.c,v 1.3 1995/03/26 20:14:02 glass Exp $ */
3:
4: /*
5: * Copyright (c) 1987, 1993, 1994
6: * The Regents of the University of California. All rights reserved.
7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
1.9 millert 16: * 3. Neither the name of the University nor the names of its contributors
1.1 deraadt 17: * may be used to endorse or promote products derived from this software
18: * without specific prior written permission.
19: *
20: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30: * SUCH DAMAGE.
31: */
32:
33: #include <limits.h>
34: #include <stdio.h>
35: #include <string.h>
36:
37: #include "ctags.h"
38:
1.7 millert 39: static int func_entry(void);
40: static void hash_entry(void);
41: static void skip_string(int);
42: static int str_entry(int);
1.1 deraadt 43:
44: /*
45: * c_entries --
46: * read .c and .h files and call appropriate routines
47: */
48: void
1.10 deraadt 49: c_entries(void)
1.1 deraadt 50: {
51: int c; /* current character */
52: int level; /* brace level */
53: int token; /* if reading a token */
54: int t_def; /* if reading a typedef */
55: int t_level; /* typedef's brace level */
56: char *sp; /* buffer pointer */
57: char tok[MAXTOKEN]; /* token buffer */
58:
59: lineftell = ftell(inf);
60: sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
61: while (GETC(!=, EOF)) {
62: switch (c) {
63: /*
64: * Here's where it DOESN'T handle: {
65: * foo(a)
66: * {
67: * #ifdef notdef
68: * }
69: * #endif
70: * if (a)
71: * puts("hello, world");
72: * }
73: */
74: case '{':
75: ++level;
76: goto endtok;
77: case '}':
78: /*
79: * if level goes below zero, try and fix
80: * it, even though we've already messed up
81: */
82: if (--level < 0)
83: level = 0;
84: goto endtok;
85:
86: case '\n':
87: SETLINE;
88: /*
89: * the above 3 cases are similar in that they
90: * are special characters that also end tokens.
91: */
1.5 deraadt 92: endtok: if (sp > tok) {
1.1 deraadt 93: *sp = EOS;
94: token = YES;
95: sp = tok;
96: }
97: else
98: token = NO;
99: continue;
100:
101: /*
102: * We ignore quoted strings and character constants
103: * completely.
104: */
105: case '"':
106: case '\'':
107: (void)skip_string(c);
108: break;
109:
110: /*
111: * comments can be fun; note the state is unchanged after
112: * return, in case we found:
113: * "foo() XX comment XX { int bar; }"
114: */
115: case '/':
116: if (GETC(==, '*')) {
1.8 pjanzen 117: skip_comment(c);
118: continue;
119: } else if (c == '/') {
120: skip_comment(c);
1.1 deraadt 121: continue;
122: }
123: (void)ungetc(c, inf);
124: c = '/';
125: goto storec;
126:
127: /* hash marks flag #define's. */
128: case '#':
129: if (sp == tok) {
130: hash_entry();
131: break;
132: }
133: goto storec;
134:
135: /*
136: * if we have a current token, parenthesis on
137: * level zero indicates a function.
138: */
139: case '(':
1.8 pjanzen 140: do {
141: c = getc(inf);
142: } while (iswhite(c));
143: if (c == '*')
144: break;
145: else
146: ungetc(c, inf);
1.1 deraadt 147: if (!level && token) {
148: int curline;
149:
150: if (sp != tok)
151: *sp = EOS;
152: /*
153: * grab the line immediately, we may
154: * already be wrong, for example,
155: * foo\n
156: * (arg1,
157: */
158: getline();
159: curline = lineno;
160: if (func_entry()) {
161: ++level;
162: pfnote(tok, curline);
163: }
164: break;
165: }
166: goto storec;
167:
168: /*
169: * semi-colons indicate the end of a typedef; if we find a
170: * typedef we search for the next semi-colon of the same
171: * level as the typedef. Ignoring "structs", they are
172: * tricky, since you can find:
173: *
1.3 deraadt 174: * "typedef int time_t;"
1.1 deraadt 175: * "typedef unsigned int u_int;"
176: * "typedef unsigned int u_int [10];"
177: *
178: * If looking at a typedef, we save a copy of the last token
179: * found. Then, when we find the ';' we take the current
180: * token if it starts with a valid token name, else we take
181: * the one we saved. There's probably some reasonable
182: * alternative to this...
183: */
184: case ';':
185: if (t_def && level == t_level) {
186: t_def = NO;
187: getline();
188: if (sp != tok)
189: *sp = EOS;
190: pfnote(tok, lineno);
191: break;
192: }
193: goto storec;
194:
195: /*
196: * store characters until one that can't be part of a token
197: * comes along; check the current token against certain
198: * reserved words.
199: */
200: default:
1.4 deraadt 201: /*
202: * to treat following function.
203: * func (arg) {
204: * ....
205: * }
206: */
207: if (c == ' ' || c == '\t') {
208: int save = c;
209: while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
210: ;
211: if (c == EOF)
212: return;
213: (void)ungetc(c, inf);
214: c = save;
215: }
1.1 deraadt 216: storec: if (!intoken(c)) {
217: if (sp == tok)
218: break;
219: *sp = EOS;
220: if (tflag) {
221: /* no typedefs inside typedefs */
222: if (!t_def &&
223: !memcmp(tok, "typedef",8)) {
224: t_def = YES;
225: t_level = level;
226: break;
227: }
228: /* catch "typedef struct" */
229: if ((!t_def || t_level < level)
230: && (!memcmp(tok, "struct", 7)
231: || !memcmp(tok, "union", 6)
232: || !memcmp(tok, "enum", 5))) {
233: /*
234: * get line immediately;
235: * may change before '{'
236: */
237: getline();
238: if (str_entry(c))
239: ++level;
240: break;
241: /* } */
242: }
243: }
244: sp = tok;
245: }
246: else if (sp != tok || begtoken(c)) {
1.6 espie 247: /* hell... truncate it */
248: if (sp == tok + sizeof tok - 1)
249: *sp = EOS;
250: else
1.5 deraadt 251: *sp++ = c;
1.1 deraadt 252: token = YES;
253: }
254: continue;
255: }
256:
257: sp = tok;
258: token = NO;
259: }
260: }
261:
262: /*
263: * func_entry --
264: * handle a function reference
265: */
266: static int
1.10 deraadt 267: func_entry(void)
1.1 deraadt 268: {
269: int c; /* current character */
270: int level = 0; /* for matching '()' */
1.8 pjanzen 271: static char attribute[] = "__attribute__";
272: char maybe_attribute[sizeof attribute + 1];
273: char *anext;
1.1 deraadt 274:
275: /*
276: * Find the end of the assumed function declaration.
277: * Note that ANSI C functions can have type definitions so keep
278: * track of the parentheses nesting level.
279: */
280: while (GETC(!=, EOF)) {
281: switch (c) {
282: case '\'':
283: case '"':
284: /* skip strings and character constants */
285: skip_string(c);
286: break;
287: case '/':
288: /* skip comments */
289: if (GETC(==, '*'))
1.8 pjanzen 290: skip_comment(c);
291: else if (c == '/')
292: skip_comment(c);
1.1 deraadt 293: break;
294: case '(':
295: level++;
296: break;
297: case ')':
298: if (level == 0)
299: goto fnd;
300: level--;
301: break;
302: case '\n':
303: SETLINE;
304: }
305: }
306: return (NO);
307: fnd:
308: /*
309: * we assume that the character after a function's right paren
310: * is a token character if it's a function and a non-token
311: * character if it's a declaration. Comments don't count...
312: */
1.8 pjanzen 313: for (anext = maybe_attribute;;) {
1.1 deraadt 314: while (GETC(!=, EOF) && iswhite(c))
315: if (c == '\n')
316: SETLINE;
1.8 pjanzen 317: if (c == EOF)
318: return NO;
319: /*
320: * Recognize the GNU __attribute__ extension, which would
321: * otherwise make the heuristic test DTWT
322: */
323: if (anext == maybe_attribute) {
324: if (intoken(c)) {
325: *anext++ = c;
326: continue;
327: }
328: } else {
329: if (intoken(c)) {
330: if (anext - maybe_attribute < (int)(sizeof attribute - 1))
331: *anext++ = c;
332: else
333: break;
334: continue;
335: } else {
336: *anext++ = '\0';
337: if (strcmp(maybe_attribute, attribute) == 0) {
338: (void)ungetc(c, inf);
339: return NO;
340: }
341: break;
342: }
343: }
1.1 deraadt 344: if (intoken(c) || c == '{')
345: break;
346: if (c == '/' && GETC(==, '*'))
1.8 pjanzen 347: skip_comment(c);
348: else if (c == '/')
349: skip_comment(c);
1.1 deraadt 350: else { /* don't ever "read" '/' */
351: (void)ungetc(c, inf);
352: return (NO);
353: }
354: }
355: if (c != '{')
356: (void)skip_key('{');
357: return (YES);
358: }
359:
360: /*
361: * hash_entry --
362: * handle a line starting with a '#'
363: */
364: static void
1.10 deraadt 365: hash_entry(void)
1.1 deraadt 366: {
367: int c; /* character read */
368: int curline; /* line started on */
369: char *sp; /* buffer pointer */
370: char tok[MAXTOKEN]; /* storage buffer */
1.4 deraadt 371:
372: /*
373: * to treat following macro.
374: * # macro(arg) ....
375: */
376: while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
377: ;
378: (void)ungetc(c, inf);
1.1 deraadt 379:
380: curline = lineno;
381: for (sp = tok;;) { /* get next token */
382: if (GETC(==, EOF))
383: return;
384: if (iswhite(c))
385: break;
1.6 espie 386: /* hell... truncate it */
387: if (sp == tok + sizeof tok - 1)
388: *sp = EOS;
389: else
1.5 deraadt 390: *sp++ = c;
1.1 deraadt 391: }
392: *sp = EOS;
393: if (memcmp(tok, "define", 6)) /* only interested in #define's */
394: goto skip;
395: for (;;) { /* this doesn't handle "#define \n" */
396: if (GETC(==, EOF))
397: return;
398: if (!iswhite(c))
399: break;
400: }
401: for (sp = tok;;) { /* get next token */
1.6 espie 402: /* hell... truncate it */
403: if (sp == tok + sizeof tok - 1)
404: *sp = EOS;
405: else
1.5 deraadt 406: *sp++ = c;
1.1 deraadt 407: if (GETC(==, EOF))
408: return;
409: /*
410: * this is where it DOESN'T handle
411: * "#define \n"
412: */
413: if (!intoken(c))
414: break;
415: }
416: *sp = EOS;
417: if (dflag || c == '(') { /* only want macros */
418: getline();
419: pfnote(tok, curline);
420: }
421: skip: if (c == '\n') { /* get rid of rest of define */
422: SETLINE
423: if (*(sp - 1) != '\\')
424: return;
425: }
426: (void)skip_key('\n');
427: }
428:
429: /*
430: * str_entry --
431: * handle a struct, union or enum entry
432: */
433: static int
1.10 deraadt 434: str_entry(int c)
1.1 deraadt 435: {
436: int curline; /* line started on */
437: char *sp; /* buffer pointer */
438: char tok[LINE_MAX]; /* storage buffer */
439:
440: curline = lineno;
441: while (iswhite(c))
442: if (GETC(==, EOF))
443: return (NO);
444: if (c == '{') /* it was "struct {" */
445: return (YES);
446: for (sp = tok;;) { /* get next token */
1.6 espie 447: /* hell... truncate it */
448: if (sp == tok + sizeof tok - 1)
449: *sp = EOS;
450: else
1.5 deraadt 451: *sp++ = c;
1.1 deraadt 452: if (GETC(==, EOF))
453: return (NO);
454: if (!intoken(c))
455: break;
456: }
457: switch (c) {
458: case '{': /* it was "struct foo{" */
459: --sp;
460: break;
461: case '\n': /* it was "struct foo\n" */
462: SETLINE;
463: /*FALLTHROUGH*/
464: default: /* probably "struct foo " */
465: while (GETC(!=, EOF))
466: if (!iswhite(c))
467: break;
468: if (c != '{') {
469: (void)ungetc(c, inf);
470: return (NO);
471: }
472: }
473: *sp = EOS;
474: pfnote(tok, curline);
475: return (YES);
476: }
477:
478: /*
479: * skip_comment --
480: * skip over comment
481: */
482: void
1.8 pjanzen 483: skip_comment(int commenttype)
1.1 deraadt 484: {
485: int c; /* character read */
486: int star; /* '*' flag */
487:
488: for (star = 0; GETC(!=, EOF);)
489: switch(c) {
490: /* comments don't nest, nor can they be escaped. */
491: case '*':
492: star = YES;
493: break;
494: case '/':
1.8 pjanzen 495: if (commenttype == '*' && star)
1.1 deraadt 496: return;
497: break;
498: case '\n':
1.8 pjanzen 499: if (commenttype == '/') {
500: /* We don't really parse C, so sometimes it
501: * is necessary to see the newline
502: */
503: ungetc(c, inf);
504: return;
505: }
1.1 deraadt 506: SETLINE;
507: /*FALLTHROUGH*/
508: default:
509: star = NO;
510: break;
511: }
512: }
513:
514: /*
515: * skip_string --
516: * skip to the end of a string or character constant.
517: */
1.11 avsm 518: static void
1.10 deraadt 519: skip_string(int key)
1.1 deraadt 520: {
521: int c,
522: skip;
523:
524: for (skip = NO; GETC(!=, EOF); )
525: switch (c) {
526: case '\\': /* a backslash escapes anything */
527: skip = !skip; /* we toggle in case it's "\\" */
528: break;
529: case '\n':
530: SETLINE;
531: /*FALLTHROUGH*/
532: default:
533: if (c == key && !skip)
534: return;
535: skip = NO;
536: }
537: }
538:
539: /*
540: * skip_key --
541: * skip to next char "key"
542: */
543: int
1.10 deraadt 544: skip_key(int key)
1.1 deraadt 545: {
546: int c,
547: skip,
548: retval;
549:
550: for (skip = retval = NO; GETC(!=, EOF);)
551: switch(c) {
552: case '\\': /* a backslash escapes anything */
553: skip = !skip; /* we toggle in case it's "\\" */
554: break;
555: case ';': /* special case for yacc; if one */
556: case '|': /* of these chars occurs, we may */
557: retval = YES; /* have moved out of the rule */
558: break; /* not used by C */
559: case '\'':
560: case '"':
561: /* skip strings and character constants */
562: skip_string(c);
563: break;
564: case '/':
565: /* skip comments */
566: if (GETC(==, '*')) {
1.8 pjanzen 567: skip_comment(c);
568: break;
569: } else if (c == '/') {
570: skip_comment(c);
1.1 deraadt 571: break;
572: }
573: (void)ungetc(c, inf);
574: c = '/';
575: goto norm;
576: case '\n':
577: SETLINE;
578: /*FALLTHROUGH*/
579: default:
580: norm:
581: if (c == key && !skip)
582: return (retval);
583: skip = NO;
584: }
585: return (retval);
586: }