Annotation of src/usr.bin/ctags/C.c, Revision 1.13
1.13 ! millert 1: /* $OpenBSD: C.c,v 1.12 2009/10/27 23:59:37 deraadt Exp $ */
1.1 deraadt 2: /* $NetBSD: C.c,v 1.3 1995/03/26 20:14:02 glass Exp $ */
3:
4: /*
5: * Copyright (c) 1987, 1993, 1994
6: * The Regents of the University of California. All rights reserved.
7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
1.9 millert 16: * 3. Neither the name of the University nor the names of its contributors
1.1 deraadt 17: * may be used to endorse or promote products derived from this software
18: * without specific prior written permission.
19: *
20: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30: * SUCH DAMAGE.
31: */
32:
33: #include <limits.h>
34: #include <stdio.h>
35: #include <string.h>
36:
37: #include "ctags.h"
38:
1.7 millert 39: static int func_entry(void);
40: static void hash_entry(void);
41: static void skip_string(int);
42: static int str_entry(int);
1.1 deraadt 43:
44: /*
45: * c_entries --
46: * read .c and .h files and call appropriate routines
47: */
48: void
1.10 deraadt 49: c_entries(void)
1.1 deraadt 50: {
51: int c; /* current character */
52: int level; /* brace level */
53: int token; /* if reading a token */
54: int t_def; /* if reading a typedef */
55: int t_level; /* typedef's brace level */
56: char *sp; /* buffer pointer */
57: char tok[MAXTOKEN]; /* token buffer */
58:
59: lineftell = ftell(inf);
60: sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
61: while (GETC(!=, EOF)) {
62: switch (c) {
63: /*
64: * Here's where it DOESN'T handle: {
65: * foo(a)
66: * {
67: * #ifdef notdef
68: * }
69: * #endif
70: * if (a)
71: * puts("hello, world");
72: * }
73: */
74: case '{':
75: ++level;
76: goto endtok;
77: case '}':
78: /*
79: * if level goes below zero, try and fix
80: * it, even though we've already messed up
81: */
82: if (--level < 0)
83: level = 0;
84: goto endtok;
85:
86: case '\n':
87: SETLINE;
88: /*
89: * the above 3 cases are similar in that they
90: * are special characters that also end tokens.
91: */
1.5 deraadt 92: endtok: if (sp > tok) {
1.1 deraadt 93: *sp = EOS;
94: token = YES;
95: sp = tok;
96: }
97: else
98: token = NO;
99: continue;
100:
101: /*
102: * We ignore quoted strings and character constants
103: * completely.
104: */
105: case '"':
106: case '\'':
107: (void)skip_string(c);
108: break;
109:
110: /*
111: * comments can be fun; note the state is unchanged after
112: * return, in case we found:
113: * "foo() XX comment XX { int bar; }"
114: */
115: case '/':
116: if (GETC(==, '*')) {
1.8 pjanzen 117: skip_comment(c);
118: continue;
119: } else if (c == '/') {
120: skip_comment(c);
1.1 deraadt 121: continue;
122: }
123: (void)ungetc(c, inf);
124: c = '/';
125: goto storec;
126:
127: /* hash marks flag #define's. */
128: case '#':
129: if (sp == tok) {
130: hash_entry();
131: break;
132: }
133: goto storec;
134:
135: /*
136: * if we have a current token, parenthesis on
137: * level zero indicates a function.
138: */
139: case '(':
1.8 pjanzen 140: do {
141: c = getc(inf);
142: } while (iswhite(c));
143: if (c == '*')
144: break;
145: else
146: ungetc(c, inf);
1.1 deraadt 147: if (!level && token) {
148: int curline;
149:
150: if (sp != tok)
151: *sp = EOS;
152: /*
153: * grab the line immediately, we may
154: * already be wrong, for example,
155: * foo\n
156: * (arg1,
157: */
158: getline();
159: curline = lineno;
160: if (func_entry()) {
161: ++level;
162: pfnote(tok, curline);
163: }
164: break;
165: }
166: goto storec;
167:
168: /*
169: * semi-colons indicate the end of a typedef; if we find a
170: * typedef we search for the next semi-colon of the same
171: * level as the typedef. Ignoring "structs", they are
172: * tricky, since you can find:
173: *
1.3 deraadt 174: * "typedef int time_t;"
1.1 deraadt 175: * "typedef unsigned int u_int;"
176: * "typedef unsigned int u_int [10];"
177: *
178: * If looking at a typedef, we save a copy of the last token
179: * found. Then, when we find the ';' we take the current
180: * token if it starts with a valid token name, else we take
181: * the one we saved. There's probably some reasonable
182: * alternative to this...
183: */
184: case ';':
185: if (t_def && level == t_level) {
186: t_def = NO;
187: getline();
188: if (sp != tok)
189: *sp = EOS;
190: pfnote(tok, lineno);
191: break;
192: }
193: goto storec;
194:
195: /*
196: * store characters until one that can't be part of a token
197: * comes along; check the current token against certain
198: * reserved words.
199: */
200: default:
1.4 deraadt 201: /*
202: * to treat following function.
203: * func (arg) {
204: * ....
205: * }
206: */
207: if (c == ' ' || c == '\t') {
208: int save = c;
209: while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
210: ;
211: if (c == EOF)
212: return;
213: (void)ungetc(c, inf);
214: c = save;
215: }
1.1 deraadt 216: storec: if (!intoken(c)) {
217: if (sp == tok)
218: break;
219: *sp = EOS;
1.13 ! millert 220: /* no typedefs inside typedefs */
! 221: if (!t_def &&
! 222: !memcmp(tok, "typedef",8)) {
! 223: t_def = YES;
! 224: t_level = level;
! 225: break;
! 226: }
! 227: /* catch "typedef struct" */
! 228: if ((!t_def || t_level < level)
! 229: && (!memcmp(tok, "struct", 7)
! 230: || !memcmp(tok, "union", 6)
! 231: || !memcmp(tok, "enum", 5))) {
! 232: /*
! 233: * get line immediately;
! 234: * may change before '{'
! 235: */
! 236: getline();
! 237: if (str_entry(c))
! 238: ++level;
! 239: break;
! 240: /* } */
1.1 deraadt 241: }
242: sp = tok;
243: }
244: else if (sp != tok || begtoken(c)) {
1.6 espie 245: /* hell... truncate it */
246: if (sp == tok + sizeof tok - 1)
247: *sp = EOS;
248: else
1.5 deraadt 249: *sp++ = c;
1.1 deraadt 250: token = YES;
251: }
252: continue;
253: }
254:
255: sp = tok;
256: token = NO;
257: }
258: }
259:
260: /*
261: * func_entry --
262: * handle a function reference
263: */
264: static int
1.10 deraadt 265: func_entry(void)
1.1 deraadt 266: {
267: int c; /* current character */
268: int level = 0; /* for matching '()' */
1.8 pjanzen 269: static char attribute[] = "__attribute__";
270: char maybe_attribute[sizeof attribute + 1];
271: char *anext;
1.1 deraadt 272:
273: /*
274: * Find the end of the assumed function declaration.
275: * Note that ANSI C functions can have type definitions so keep
276: * track of the parentheses nesting level.
277: */
278: while (GETC(!=, EOF)) {
279: switch (c) {
280: case '\'':
281: case '"':
282: /* skip strings and character constants */
283: skip_string(c);
284: break;
285: case '/':
286: /* skip comments */
287: if (GETC(==, '*'))
1.8 pjanzen 288: skip_comment(c);
289: else if (c == '/')
290: skip_comment(c);
1.1 deraadt 291: break;
292: case '(':
293: level++;
294: break;
295: case ')':
296: if (level == 0)
297: goto fnd;
298: level--;
299: break;
300: case '\n':
301: SETLINE;
302: }
303: }
304: return (NO);
305: fnd:
306: /*
307: * we assume that the character after a function's right paren
308: * is a token character if it's a function and a non-token
309: * character if it's a declaration. Comments don't count...
310: */
1.8 pjanzen 311: for (anext = maybe_attribute;;) {
1.1 deraadt 312: while (GETC(!=, EOF) && iswhite(c))
313: if (c == '\n')
314: SETLINE;
1.8 pjanzen 315: if (c == EOF)
316: return NO;
317: /*
318: * Recognize the GNU __attribute__ extension, which would
319: * otherwise make the heuristic test DTWT
320: */
321: if (anext == maybe_attribute) {
322: if (intoken(c)) {
323: *anext++ = c;
324: continue;
325: }
326: } else {
327: if (intoken(c)) {
328: if (anext - maybe_attribute < (int)(sizeof attribute - 1))
329: *anext++ = c;
330: else
331: break;
332: continue;
333: } else {
334: *anext++ = '\0';
335: if (strcmp(maybe_attribute, attribute) == 0) {
336: (void)ungetc(c, inf);
337: return NO;
338: }
339: break;
340: }
341: }
1.1 deraadt 342: if (intoken(c) || c == '{')
343: break;
344: if (c == '/' && GETC(==, '*'))
1.8 pjanzen 345: skip_comment(c);
346: else if (c == '/')
347: skip_comment(c);
1.1 deraadt 348: else { /* don't ever "read" '/' */
349: (void)ungetc(c, inf);
350: return (NO);
351: }
352: }
353: if (c != '{')
354: (void)skip_key('{');
355: return (YES);
356: }
357:
358: /*
359: * hash_entry --
360: * handle a line starting with a '#'
361: */
362: static void
1.10 deraadt 363: hash_entry(void)
1.1 deraadt 364: {
365: int c; /* character read */
366: int curline; /* line started on */
367: char *sp; /* buffer pointer */
368: char tok[MAXTOKEN]; /* storage buffer */
1.4 deraadt 369:
370: /*
371: * to treat following macro.
372: * # macro(arg) ....
373: */
374: while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
375: ;
376: (void)ungetc(c, inf);
1.1 deraadt 377:
378: curline = lineno;
379: for (sp = tok;;) { /* get next token */
380: if (GETC(==, EOF))
381: return;
382: if (iswhite(c))
383: break;
1.6 espie 384: /* hell... truncate it */
385: if (sp == tok + sizeof tok - 1)
386: *sp = EOS;
387: else
1.5 deraadt 388: *sp++ = c;
1.1 deraadt 389: }
390: *sp = EOS;
391: if (memcmp(tok, "define", 6)) /* only interested in #define's */
392: goto skip;
393: for (;;) { /* this doesn't handle "#define \n" */
394: if (GETC(==, EOF))
395: return;
396: if (!iswhite(c))
397: break;
398: }
399: for (sp = tok;;) { /* get next token */
1.6 espie 400: /* hell... truncate it */
401: if (sp == tok + sizeof tok - 1)
402: *sp = EOS;
403: else
1.5 deraadt 404: *sp++ = c;
1.1 deraadt 405: if (GETC(==, EOF))
406: return;
407: /*
408: * this is where it DOESN'T handle
409: * "#define \n"
410: */
411: if (!intoken(c))
412: break;
413: }
414: *sp = EOS;
415: if (dflag || c == '(') { /* only want macros */
416: getline();
417: pfnote(tok, curline);
418: }
419: skip: if (c == '\n') { /* get rid of rest of define */
420: SETLINE
421: if (*(sp - 1) != '\\')
422: return;
423: }
424: (void)skip_key('\n');
425: }
426:
427: /*
428: * str_entry --
429: * handle a struct, union or enum entry
430: */
431: static int
1.10 deraadt 432: str_entry(int c)
1.1 deraadt 433: {
434: int curline; /* line started on */
435: char *sp; /* buffer pointer */
436: char tok[LINE_MAX]; /* storage buffer */
437:
438: curline = lineno;
439: while (iswhite(c))
440: if (GETC(==, EOF))
441: return (NO);
442: if (c == '{') /* it was "struct {" */
443: return (YES);
444: for (sp = tok;;) { /* get next token */
1.6 espie 445: /* hell... truncate it */
446: if (sp == tok + sizeof tok - 1)
447: *sp = EOS;
448: else
1.5 deraadt 449: *sp++ = c;
1.1 deraadt 450: if (GETC(==, EOF))
451: return (NO);
452: if (!intoken(c))
453: break;
454: }
455: switch (c) {
456: case '{': /* it was "struct foo{" */
457: --sp;
458: break;
459: case '\n': /* it was "struct foo\n" */
460: SETLINE;
461: /*FALLTHROUGH*/
462: default: /* probably "struct foo " */
463: while (GETC(!=, EOF))
464: if (!iswhite(c))
465: break;
466: if (c != '{') {
467: (void)ungetc(c, inf);
468: return (NO);
469: }
470: }
471: *sp = EOS;
472: pfnote(tok, curline);
473: return (YES);
474: }
475:
476: /*
477: * skip_comment --
478: * skip over comment
479: */
480: void
1.8 pjanzen 481: skip_comment(int commenttype)
1.1 deraadt 482: {
483: int c; /* character read */
484: int star; /* '*' flag */
485:
486: for (star = 0; GETC(!=, EOF);)
487: switch(c) {
488: /* comments don't nest, nor can they be escaped. */
489: case '*':
490: star = YES;
491: break;
492: case '/':
1.8 pjanzen 493: if (commenttype == '*' && star)
1.1 deraadt 494: return;
495: break;
496: case '\n':
1.8 pjanzen 497: if (commenttype == '/') {
498: /* We don't really parse C, so sometimes it
499: * is necessary to see the newline
500: */
501: ungetc(c, inf);
502: return;
503: }
1.1 deraadt 504: SETLINE;
505: /*FALLTHROUGH*/
506: default:
507: star = NO;
508: break;
509: }
510: }
511:
512: /*
513: * skip_string --
514: * skip to the end of a string or character constant.
515: */
1.11 avsm 516: static void
1.10 deraadt 517: skip_string(int key)
1.1 deraadt 518: {
519: int c,
520: skip;
521:
522: for (skip = NO; GETC(!=, EOF); )
523: switch (c) {
524: case '\\': /* a backslash escapes anything */
525: skip = !skip; /* we toggle in case it's "\\" */
526: break;
527: case '\n':
528: SETLINE;
529: /*FALLTHROUGH*/
530: default:
531: if (c == key && !skip)
532: return;
533: skip = NO;
534: }
535: }
536:
537: /*
538: * skip_key --
539: * skip to next char "key"
540: */
541: int
1.10 deraadt 542: skip_key(int key)
1.1 deraadt 543: {
544: int c,
545: skip,
546: retval;
547:
548: for (skip = retval = NO; GETC(!=, EOF);)
549: switch(c) {
550: case '\\': /* a backslash escapes anything */
551: skip = !skip; /* we toggle in case it's "\\" */
552: break;
553: case ';': /* special case for yacc; if one */
554: case '|': /* of these chars occurs, we may */
555: retval = YES; /* have moved out of the rule */
556: break; /* not used by C */
557: case '\'':
558: case '"':
559: /* skip strings and character constants */
560: skip_string(c);
561: break;
562: case '/':
563: /* skip comments */
564: if (GETC(==, '*')) {
1.8 pjanzen 565: skip_comment(c);
566: break;
567: } else if (c == '/') {
568: skip_comment(c);
1.1 deraadt 569: break;
570: }
571: (void)ungetc(c, inf);
572: c = '/';
573: goto norm;
574: case '\n':
575: SETLINE;
576: /*FALLTHROUGH*/
577: default:
578: norm:
579: if (c == key && !skip)
580: return (retval);
581: skip = NO;
582: }
583: return (retval);
584: }