Annotation of src/usr.bin/indent/lexi.c, Revision 1.6
1.6 ! deraadt 1: /* $OpenBSD: lexi.c,v 1.5 1997/09/10 07:06:37 deraadt Exp $ */
1.2 deraadt 2:
1.1 deraadt 3: /*
4: * Copyright (c) 1985 Sun Microsystems, Inc.
5: * Copyright (c) 1980 The Regents of the University of California.
6: * Copyright (c) 1976 Board of Trustees of the University of Illinois.
7: * All rights reserved.
8: *
9: * Redistribution and use in source and binary forms, with or without
10: * modification, are permitted provided that the following conditions
11: * are met:
12: * 1. Redistributions of source code must retain the above copyright
13: * notice, this list of conditions and the following disclaimer.
14: * 2. Redistributions in binary form must reproduce the above copyright
15: * notice, this list of conditions and the following disclaimer in the
16: * documentation and/or other materials provided with the distribution.
17: * 3. All advertising materials mentioning features or use of this software
18: * must display the following acknowledgement:
19: * This product includes software developed by the University of
20: * California, Berkeley and its contributors.
21: * 4. Neither the name of the University nor the names of its contributors
22: * may be used to endorse or promote products derived from this software
23: * without specific prior written permission.
24: *
25: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35: * SUCH DAMAGE.
36: */
37:
38: #ifndef lint
39: /*static char sccsid[] = "from: @(#)lexi.c 5.16 (Berkeley) 2/26/91";*/
1.6 ! deraadt 40: static char rcsid[] = "$OpenBSD: lexi.c,v 1.5 1997/09/10 07:06:37 deraadt Exp $";
1.1 deraadt 41: #endif /* not lint */
42:
43: /*
44: * Here we have the token scanner for indent. It scans off one token and puts
45: * it in the global variable "token". It returns a code, indicating the type
46: * of token scanned.
47: */
48:
49: #include <stdio.h>
50: #include <ctype.h>
51: #include <stdlib.h>
52: #include <string.h>
53: #include "indent_globs.h"
54: #include "indent_codes.h"
55:
56: #define alphanum 1
57: #define opchar 3
58:
59: struct templ {
60: char *rwd;
61: int rwcode;
62: };
63:
1.4 deraadt 64: struct templ specialsinit[] = {
1.3 mickey 65: { "switch", 1 },
66: { "case", 2 },
67: { "break", 0 },
68: { "struct", 3 },
69: { "union", 3 },
70: { "enum", 3 },
71: { "default", 2 },
72: { "int", 4 },
73: { "char", 4 },
74: { "float", 4 },
75: { "double", 4 },
76: { "long", 4 },
77: { "short", 4 },
78: { "typdef", 4 },
79: { "unsigned", 4 },
80: { "register", 4 },
81: { "static", 4 },
82: { "global", 4 },
83: { "extern", 4 },
84: { "void", 4 },
85: { "goto", 0 },
86: { "return", 0 },
87: { "if", 5 },
88: { "while", 5 },
89: { "for", 5 },
90: { "else", 6 },
91: { "do", 6 },
92: { "sizeof", 7 },
1.1 deraadt 93: };
94:
1.4 deraadt 95: struct templ *specials = specialsinit;
96: int nspecials = sizeof(specialsinit) / sizeof(specialsinit[0]);
97: int maxspecials;
98:
1.1 deraadt 99: char chartype[128] =
100: { /* this is used to facilitate the decision of
101: * what type (alphanumeric, operator) each
102: * character is */
103: 0, 0, 0, 0, 0, 0, 0, 0,
104: 0, 0, 0, 0, 0, 0, 0, 0,
105: 0, 0, 0, 0, 0, 0, 0, 0,
106: 0, 0, 0, 0, 0, 0, 0, 0,
107: 0, 3, 0, 0, 1, 3, 3, 0,
108: 0, 0, 3, 3, 0, 3, 0, 3,
109: 1, 1, 1, 1, 1, 1, 1, 1,
110: 1, 1, 0, 0, 3, 3, 3, 3,
111: 0, 1, 1, 1, 1, 1, 1, 1,
112: 1, 1, 1, 1, 1, 1, 1, 1,
113: 1, 1, 1, 1, 1, 1, 1, 1,
114: 1, 1, 1, 0, 0, 0, 3, 1,
115: 0, 1, 1, 1, 1, 1, 1, 1,
116: 1, 1, 1, 1, 1, 1, 1, 1,
117: 1, 1, 1, 1, 1, 1, 1, 1,
118: 1, 1, 1, 0, 3, 0, 3, 0
119: };
120:
121:
122:
123:
124: int
125: lexi()
126: {
127: int unary_delim; /* this is set to 1 if the current token
128: *
129: * forces a following operator to be unary */
130: static int last_code; /* the last token type returned */
131: static int l_struct; /* set to 1 if the last token was 'struct' */
132: int code; /* internal code to be returned */
133: char qchar; /* the delimiter character for a string */
1.4 deraadt 134: int i;
1.1 deraadt 135:
136: e_token = s_token; /* point to start of place to save token */
137: unary_delim = false;
138: ps.col_1 = ps.last_nl; /* tell world that this token started in
139: * column 1 iff the last thing scanned was nl */
140: ps.last_nl = false;
141:
142: while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
143: ps.col_1 = false; /* leading blanks imply token is not in column
144: * 1 */
145: if (++buf_ptr >= buf_end)
146: fill_buffer();
147: }
148:
149: /* Scan an alphanumeric token */
1.3 mickey 150: if (chartype[*buf_ptr] == alphanum ||
151: (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
1.1 deraadt 152: /*
153: * we have a character or number
154: */
155: register char *j; /* used for searching thru list of
156: *
157: * reserved words */
1.3 mickey 158: if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
1.1 deraadt 159: int seendot = 0,
1.6 ! deraadt 160: seenexp = 0,
! 161: seensfx = 0;
1.1 deraadt 162: if (*buf_ptr == '0' &&
163: (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
164: *e_token++ = *buf_ptr++;
165: *e_token++ = *buf_ptr++;
166: while (isxdigit(*buf_ptr)) {
167: CHECK_SIZE_TOKEN;
168: *e_token++ = *buf_ptr++;
169: }
170: }
171: else
172: while (1) {
173: if (*buf_ptr == '.')
174: if (seendot)
175: break;
176: else
177: seendot++;
178: CHECK_SIZE_TOKEN;
179: *e_token++ = *buf_ptr++;
180: if (!isdigit(*buf_ptr) && *buf_ptr != '.')
181: if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
182: break;
183: else {
184: seenexp++;
185: seendot++;
186: CHECK_SIZE_TOKEN;
187: *e_token++ = *buf_ptr++;
188: if (*buf_ptr == '+' || *buf_ptr == '-')
189: *e_token++ = *buf_ptr++;
190: }
191: }
1.6 ! deraadt 192: while (1) {
! 193: if (!(seensfx & 1) &&
! 194: (*buf_ptr == 'U' || *buf_ptr == 'u')) {
! 195: CHECK_SIZE_TOKEN;
! 196: *e_token++ = *buf_ptr++;
! 197: seensfx |= 1;
! 198: continue;
! 199: }
! 200: if (!(seensfx & 2) &&
! 201: (*buf_ptr == 'L' || *buf_ptr == 'l')) {
! 202: CHECK_SIZE_TOKEN;
! 203: if (buf_ptr[1] == buf_ptr[0])
! 204: *e_token++ = *buf_ptr++;
! 205: *e_token++ = *buf_ptr++;
! 206: seensfx |= 2;
! 207: continue;
! 208: }
! 209: break;
! 210: }
1.1 deraadt 211: }
212: else
213: while (chartype[*buf_ptr] == alphanum) { /* copy it over */
214: CHECK_SIZE_TOKEN;
215: *e_token++ = *buf_ptr++;
216: if (buf_ptr >= buf_end)
217: fill_buffer();
218: }
219: *e_token++ = '\0';
220: while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
221: if (++buf_ptr >= buf_end)
222: fill_buffer();
223: }
224: ps.its_a_keyword = false;
225: ps.sizeof_keyword = false;
226: if (l_struct) { /* if last token was 'struct', then this token
227: * should be treated as a declaration */
228: l_struct = false;
229: last_code = ident;
230: ps.last_u_d = true;
231: return (decl);
232: }
233: ps.last_u_d = false; /* Operator after indentifier is binary */
234: last_code = ident; /* Remember that this is the code we will
235: * return */
236:
237: /*
238: * This loop will check if the token is a keyword.
239: */
1.4 deraadt 240: for (i = 0; i < nspecials; i++) {
1.1 deraadt 241: register char *p = s_token; /* point at scanned token */
1.4 deraadt 242: j = specials[i].rwd;
1.1 deraadt 243: if (*j++ != *p++ || *j++ != *p++)
244: continue; /* This test depends on the fact that
245: * identifiers are always at least 1 character
246: * long (ie. the first two bytes of the
247: * identifier are always meaningful) */
248: if (p[-1] == 0)
249: break; /* If its a one-character identifier */
250: while (*p++ == *j)
251: if (*j++ == 0)
252: goto found_keyword; /* I wish that C had a multi-level
253: * break... */
254: }
1.4 deraadt 255: if (i < nspecials) { /* we have a keyword */
1.1 deraadt 256: found_keyword:
257: ps.its_a_keyword = true;
258: ps.last_u_d = true;
1.4 deraadt 259: switch (specials[i].rwcode) {
1.1 deraadt 260: case 1: /* it is a switch */
261: return (swstmt);
262: case 2: /* a case or default */
263: return (casestmt);
264:
265: case 3: /* a "struct" */
266: if (ps.p_l_follow)
267: break; /* inside parens: cast */
268: l_struct = true;
269:
270: /*
271: * Next time around, we will want to know that we have had a
272: * 'struct'
273: */
274: case 4: /* one of the declaration keywords */
275: if (ps.p_l_follow) {
276: ps.cast_mask |= 1 << ps.p_l_follow;
277: break; /* inside parens: cast */
278: }
279: last_code = decl;
280: return (decl);
281:
282: case 5: /* if, while, for */
283: return (sp_paren);
284:
285: case 6: /* do, else */
286: return (sp_nparen);
287:
288: case 7:
289: ps.sizeof_keyword = true;
290: default: /* all others are treated like any other
291: * identifier */
292: return (ident);
293: } /* end of switch */
294: } /* end of if (found_it) */
295: if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
296: register char *tp = buf_ptr;
297: while (tp < buf_end)
298: if (*tp++ == ')' && (*tp == ';' || *tp == ','))
299: goto not_proc;
300: strncpy(ps.procname, token, sizeof ps.procname - 1);
301: ps.in_parameter_declaration = 1;
302: rparen_count = 1;
303: not_proc:;
304: }
305: /*
306: * The following hack attempts to guess whether or not the current
307: * token is in fact a declaration keyword -- one that has been
308: * typedefd
309: */
310: if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
311: && !ps.p_l_follow
312: && !ps.block_init
313: && (ps.last_token == rparen || ps.last_token == semicolon ||
314: ps.last_token == decl ||
315: ps.last_token == lbrace || ps.last_token == rbrace)) {
316: ps.its_a_keyword = true;
317: ps.last_u_d = true;
318: last_code = decl;
319: return decl;
320: }
321: if (last_code == decl) /* if this is a declared variable, then
322: * following sign is unary */
323: ps.last_u_d = true; /* will make "int a -1" work */
324: last_code = ident;
325: return (ident); /* the ident is not in the list */
326: } /* end of procesing for alpanum character */
327:
328: /* Scan a non-alphanumeric token */
329:
330: *e_token++ = *buf_ptr; /* if it is only a one-character token, it is
331: * moved here */
332: *e_token = '\0';
333: if (++buf_ptr >= buf_end)
334: fill_buffer();
335:
336: switch (*token) {
337: case '\n':
338: unary_delim = ps.last_u_d;
339: ps.last_nl = true; /* remember that we just had a newline */
340: code = (had_eof ? 0 : newline);
341:
342: /*
343: * if data has been exausted, the newline is a dummy, and we should
344: * return code to stop
345: */
346: break;
347:
348: case '\'': /* start of quoted character */
349: case '"': /* start of string */
350: qchar = *token;
351: if (troff) {
352: e_token[-1] = '`';
353: if (qchar == '"')
354: *e_token++ = '`';
355: e_token = chfont(&bodyf, &stringf, e_token);
356: }
357: do { /* copy the string */
358: while (1) { /* move one character or [/<char>]<char> */
359: if (*buf_ptr == '\n') {
360: printf("%d: Unterminated literal\n", line_no);
361: goto stop_lit;
362: }
363: CHECK_SIZE_TOKEN; /* Only have to do this once in this loop,
364: * since CHECK_SIZE guarantees that there
365: * are at least 5 entries left */
366: *e_token = *buf_ptr++;
367: if (buf_ptr >= buf_end)
368: fill_buffer();
369: if (*e_token == BACKSLASH) { /* if escape, copy extra char */
370: if (*buf_ptr == '\n') /* check for escaped newline */
371: ++line_no;
372: if (troff) {
373: *++e_token = BACKSLASH;
374: if (*buf_ptr == BACKSLASH)
375: *++e_token = BACKSLASH;
376: }
377: *++e_token = *buf_ptr++;
378: ++e_token; /* we must increment this again because we
379: * copied two chars */
380: if (buf_ptr >= buf_end)
381: fill_buffer();
382: }
383: else
384: break; /* we copied one character */
385: } /* end of while (1) */
386: } while (*e_token++ != qchar);
387: if (troff) {
388: e_token = chfont(&stringf, &bodyf, e_token - 1);
389: if (qchar == '"')
390: *e_token++ = '\'';
391: }
392: stop_lit:
393: code = ident;
394: break;
395:
396: case ('('):
397: case ('['):
398: unary_delim = true;
399: code = lparen;
400: break;
401:
402: case (')'):
403: case (']'):
404: code = rparen;
405: break;
406:
407: case '#':
408: unary_delim = ps.last_u_d;
409: code = preesc;
410: break;
411:
412: case '?':
413: unary_delim = true;
414: code = question;
415: break;
416:
417: case (':'):
418: code = colon;
419: unary_delim = true;
420: break;
421:
422: case (';'):
423: unary_delim = true;
424: code = semicolon;
425: break;
426:
427: case ('{'):
428: unary_delim = true;
429:
430: /*
431: * if (ps.in_or_st) ps.block_init = 1;
432: */
433: /* ? code = ps.block_init ? lparen : lbrace; */
434: code = lbrace;
435: break;
436:
437: case ('}'):
438: unary_delim = true;
439: /* ? code = ps.block_init ? rparen : rbrace; */
440: code = rbrace;
441: break;
442:
443: case 014: /* a form feed */
444: unary_delim = ps.last_u_d;
445: ps.last_nl = true; /* remember this so we can set 'ps.col_1'
446: * right */
447: code = form_feed;
448: break;
449:
450: case (','):
451: unary_delim = true;
452: code = comma;
453: break;
454:
455: case '.':
456: unary_delim = false;
457: code = period;
458: break;
459:
460: case '-':
461: case '+': /* check for -, +, --, ++ */
462: code = (ps.last_u_d ? unary_op : binary_op);
463: unary_delim = true;
464:
465: if (*buf_ptr == token[0]) {
466: /* check for doubled character */
467: *e_token++ = *buf_ptr++;
468: /* buffer overflow will be checked at end of loop */
469: if (last_code == ident || last_code == rparen) {
470: code = (ps.last_u_d ? unary_op : postop);
471: /* check for following ++ or -- */
472: unary_delim = false;
473: }
474: }
475: else if (*buf_ptr == '=')
476: /* check for operator += */
477: *e_token++ = *buf_ptr++;
478: else if (*buf_ptr == '>') {
479: /* check for operator -> */
480: *e_token++ = *buf_ptr++;
481: if (!pointer_as_binop) {
482: unary_delim = false;
483: code = unary_op;
484: ps.want_blank = false;
485: }
486: }
487: break; /* buffer overflow will be checked at end of
488: * switch */
489:
490: case '=':
491: if (ps.in_or_st)
492: ps.block_init = 1;
493: #ifdef undef
494: if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */
495: e_token[-1] = *buf_ptr++;
496: if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
497: *e_token++ = *buf_ptr++;
498: *e_token++ = '='; /* Flip =+ to += */
499: *e_token = 0;
500: }
501: #else
502: if (*buf_ptr == '=') {/* == */
503: *e_token++ = '='; /* Flip =+ to += */
504: buf_ptr++;
505: *e_token = 0;
506: }
507: #endif
508: code = binary_op;
509: unary_delim = true;
510: break;
511: /* can drop thru!!! */
512:
513: case '>':
514: case '<':
515: case '!': /* ops like <, <<, <=, !=, etc */
516: if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
517: *e_token++ = *buf_ptr;
518: if (++buf_ptr >= buf_end)
519: fill_buffer();
520: }
521: if (*buf_ptr == '=')
522: *e_token++ = *buf_ptr++;
523: code = (ps.last_u_d ? unary_op : binary_op);
524: unary_delim = true;
525: break;
526:
527: default:
528: if (token[0] == '/' && *buf_ptr == '*') {
529: /* it is start of comment */
530: *e_token++ = '*';
531:
532: if (++buf_ptr >= buf_end)
533: fill_buffer();
534:
535: code = comment;
536: unary_delim = ps.last_u_d;
537: break;
538: }
539: while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
540: /*
541: * handle ||, &&, etc, and also things as in int *****i
542: */
543: *e_token++ = *buf_ptr;
544: if (++buf_ptr >= buf_end)
545: fill_buffer();
546: }
547: code = (ps.last_u_d ? unary_op : binary_op);
548: unary_delim = true;
549:
550:
551: } /* end of switch */
552: if (code != newline) {
553: l_struct = false;
554: last_code = code;
555: }
556: if (buf_ptr >= buf_end) /* check for input buffer empty */
557: fill_buffer();
558: ps.last_u_d = unary_delim;
559: *e_token = '\0'; /* null terminate the token */
560: return (code);
561: }
562:
563: /*
564: * Add the given keyword to the keyword table, using val as the keyword type
565: */
1.3 mickey 566: void
1.1 deraadt 567: addkey(key, val)
568: char *key;
1.4 deraadt 569: int val;
1.1 deraadt 570: {
1.4 deraadt 571: register struct templ *p;
572: int i = 0;
573:
574: while (i < nspecials) {
575: p = &specials[i];
1.1 deraadt 576: if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
577: return;
578: else
1.4 deraadt 579: i++;
580: }
581:
582: if (specials == specialsinit) {
583: /*
584: * Whoa. Must reallocate special table.
585: */
586: nspecials = sizeof (specialsinit) / sizeof (specialsinit[0]);
587: maxspecials = nspecials;
588: maxspecials += maxspecials >> 2;
589: specials = (struct templ *)malloc(maxspecials * sizeof specials[0]);
590: if (specials == NULL)
591: errx(1, "indent: out of memory");
592: memmove(specials, specialsinit, sizeof specialsinit);
593: } else if (nspecials >= maxspecials) {
594: maxspecials += maxspecials >> 2;
595: specials = realloc(specials, maxspecials * sizeof specials[0]);
596: if (specials == NULL)
597: errx(1, "indent: out of memory");
598: }
599:
600: p = &specials[i];
1.1 deraadt 601: p->rwd = key;
602: p->rwcode = val;
1.4 deraadt 603: nspecials++;
1.1 deraadt 604: return;
605: }