Annotation of src/usr.bin/indent/lexi.c, Revision 1.4
1.4 ! deraadt 1: /* $OpenBSD: lexi.c,v 1.3 1997/07/25 22:00:46 mickey Exp $ */
1.2 deraadt 2:
1.1 deraadt 3: /*
4: * Copyright (c) 1985 Sun Microsystems, Inc.
5: * Copyright (c) 1980 The Regents of the University of California.
6: * Copyright (c) 1976 Board of Trustees of the University of Illinois.
7: * All rights reserved.
8: *
9: * Redistribution and use in source and binary forms, with or without
10: * modification, are permitted provided that the following conditions
11: * are met:
12: * 1. Redistributions of source code must retain the above copyright
13: * notice, this list of conditions and the following disclaimer.
14: * 2. Redistributions in binary form must reproduce the above copyright
15: * notice, this list of conditions and the following disclaimer in the
16: * documentation and/or other materials provided with the distribution.
17: * 3. All advertising materials mentioning features or use of this software
18: * must display the following acknowledgement:
19: * This product includes software developed by the University of
20: * California, Berkeley and its contributors.
21: * 4. Neither the name of the University nor the names of its contributors
22: * may be used to endorse or promote products derived from this software
23: * without specific prior written permission.
24: *
25: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35: * SUCH DAMAGE.
36: */
37:
38: #ifndef lint
39: /*static char sccsid[] = "from: @(#)lexi.c 5.16 (Berkeley) 2/26/91";*/
1.4 ! deraadt 40: static char rcsid[] = "$OpenBSD: lexi.c,v 1.3 1997/07/25 22:00:46 mickey Exp $";
1.1 deraadt 41: #endif /* not lint */
42:
43: /*
44: * Here we have the token scanner for indent. It scans off one token and puts
45: * it in the global variable "token". It returns a code, indicating the type
46: * of token scanned.
47: */
48:
49: #include <stdio.h>
50: #include <ctype.h>
51: #include <stdlib.h>
52: #include <string.h>
53: #include "indent_globs.h"
54: #include "indent_codes.h"
55:
56: #define alphanum 1
57: #define opchar 3
58:
59: struct templ {
60: char *rwd;
61: int rwcode;
62: };
63:
1.4 ! deraadt 64: struct templ specialsinit[] = {
1.3 mickey 65: { "switch", 1 },
66: { "case", 2 },
67: { "break", 0 },
68: { "struct", 3 },
69: { "union", 3 },
70: { "enum", 3 },
71: { "default", 2 },
72: { "int", 4 },
73: { "char", 4 },
74: { "float", 4 },
75: { "double", 4 },
76: { "long", 4 },
77: { "short", 4 },
78: { "typdef", 4 },
79: { "unsigned", 4 },
80: { "register", 4 },
81: { "static", 4 },
82: { "global", 4 },
83: { "extern", 4 },
84: { "void", 4 },
85: { "goto", 0 },
86: { "return", 0 },
87: { "if", 5 },
88: { "while", 5 },
89: { "for", 5 },
90: { "else", 6 },
91: { "do", 6 },
92: { "sizeof", 7 },
1.1 deraadt 93: };
94:
1.4 ! deraadt 95: struct templ *specials = specialsinit;
! 96: int nspecials = sizeof(specialsinit) / sizeof(specialsinit[0]);
! 97: int maxspecials;
! 98:
1.1 deraadt 99: char chartype[128] =
100: { /* this is used to facilitate the decision of
101: * what type (alphanumeric, operator) each
102: * character is */
103: 0, 0, 0, 0, 0, 0, 0, 0,
104: 0, 0, 0, 0, 0, 0, 0, 0,
105: 0, 0, 0, 0, 0, 0, 0, 0,
106: 0, 0, 0, 0, 0, 0, 0, 0,
107: 0, 3, 0, 0, 1, 3, 3, 0,
108: 0, 0, 3, 3, 0, 3, 0, 3,
109: 1, 1, 1, 1, 1, 1, 1, 1,
110: 1, 1, 0, 0, 3, 3, 3, 3,
111: 0, 1, 1, 1, 1, 1, 1, 1,
112: 1, 1, 1, 1, 1, 1, 1, 1,
113: 1, 1, 1, 1, 1, 1, 1, 1,
114: 1, 1, 1, 0, 0, 0, 3, 1,
115: 0, 1, 1, 1, 1, 1, 1, 1,
116: 1, 1, 1, 1, 1, 1, 1, 1,
117: 1, 1, 1, 1, 1, 1, 1, 1,
118: 1, 1, 1, 0, 3, 0, 3, 0
119: };
120:
121:
122:
123:
124: int
125: lexi()
126: {
127: int unary_delim; /* this is set to 1 if the current token
128: *
129: * forces a following operator to be unary */
130: static int last_code; /* the last token type returned */
131: static int l_struct; /* set to 1 if the last token was 'struct' */
132: int code; /* internal code to be returned */
133: char qchar; /* the delimiter character for a string */
1.4 ! deraadt 134: int i;
1.1 deraadt 135:
136: e_token = s_token; /* point to start of place to save token */
137: unary_delim = false;
138: ps.col_1 = ps.last_nl; /* tell world that this token started in
139: * column 1 iff the last thing scanned was nl */
140: ps.last_nl = false;
141:
142: while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
143: ps.col_1 = false; /* leading blanks imply token is not in column
144: * 1 */
145: if (++buf_ptr >= buf_end)
146: fill_buffer();
147: }
148:
149: /* Scan an alphanumeric token */
1.3 mickey 150: if (chartype[*buf_ptr] == alphanum ||
151: (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
1.1 deraadt 152: /*
153: * we have a character or number
154: */
155: register char *j; /* used for searching thru list of
156: *
157: * reserved words */
1.3 mickey 158: if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
1.1 deraadt 159: int seendot = 0,
160: seenexp = 0;
161: if (*buf_ptr == '0' &&
162: (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
163: *e_token++ = *buf_ptr++;
164: *e_token++ = *buf_ptr++;
165: while (isxdigit(*buf_ptr)) {
166: CHECK_SIZE_TOKEN;
167: *e_token++ = *buf_ptr++;
168: }
169: }
170: else
171: while (1) {
172: if (*buf_ptr == '.')
173: if (seendot)
174: break;
175: else
176: seendot++;
177: CHECK_SIZE_TOKEN;
178: *e_token++ = *buf_ptr++;
179: if (!isdigit(*buf_ptr) && *buf_ptr != '.')
180: if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
181: break;
182: else {
183: seenexp++;
184: seendot++;
185: CHECK_SIZE_TOKEN;
186: *e_token++ = *buf_ptr++;
187: if (*buf_ptr == '+' || *buf_ptr == '-')
188: *e_token++ = *buf_ptr++;
189: }
190: }
191: if (*buf_ptr == 'L' || *buf_ptr == 'l')
192: *e_token++ = *buf_ptr++;
193: }
194: else
195: while (chartype[*buf_ptr] == alphanum) { /* copy it over */
196: CHECK_SIZE_TOKEN;
197: *e_token++ = *buf_ptr++;
198: if (buf_ptr >= buf_end)
199: fill_buffer();
200: }
201: *e_token++ = '\0';
202: while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
203: if (++buf_ptr >= buf_end)
204: fill_buffer();
205: }
206: ps.its_a_keyword = false;
207: ps.sizeof_keyword = false;
208: if (l_struct) { /* if last token was 'struct', then this token
209: * should be treated as a declaration */
210: l_struct = false;
211: last_code = ident;
212: ps.last_u_d = true;
213: return (decl);
214: }
215: ps.last_u_d = false; /* Operator after indentifier is binary */
216: last_code = ident; /* Remember that this is the code we will
217: * return */
218:
219: /*
220: * This loop will check if the token is a keyword.
221: */
1.4 ! deraadt 222: for (i = 0; i < nspecials; i++) {
1.1 deraadt 223: register char *p = s_token; /* point at scanned token */
1.4 ! deraadt 224: j = specials[i].rwd;
1.1 deraadt 225: if (*j++ != *p++ || *j++ != *p++)
226: continue; /* This test depends on the fact that
227: * identifiers are always at least 1 character
228: * long (ie. the first two bytes of the
229: * identifier are always meaningful) */
230: if (p[-1] == 0)
231: break; /* If its a one-character identifier */
232: while (*p++ == *j)
233: if (*j++ == 0)
234: goto found_keyword; /* I wish that C had a multi-level
235: * break... */
236: }
1.4 ! deraadt 237: if (i < nspecials) { /* we have a keyword */
1.1 deraadt 238: found_keyword:
239: ps.its_a_keyword = true;
240: ps.last_u_d = true;
1.4 ! deraadt 241: switch (specials[i].rwcode) {
1.1 deraadt 242: case 1: /* it is a switch */
243: return (swstmt);
244: case 2: /* a case or default */
245: return (casestmt);
246:
247: case 3: /* a "struct" */
248: if (ps.p_l_follow)
249: break; /* inside parens: cast */
250: l_struct = true;
251:
252: /*
253: * Next time around, we will want to know that we have had a
254: * 'struct'
255: */
256: case 4: /* one of the declaration keywords */
257: if (ps.p_l_follow) {
258: ps.cast_mask |= 1 << ps.p_l_follow;
259: break; /* inside parens: cast */
260: }
261: last_code = decl;
262: return (decl);
263:
264: case 5: /* if, while, for */
265: return (sp_paren);
266:
267: case 6: /* do, else */
268: return (sp_nparen);
269:
270: case 7:
271: ps.sizeof_keyword = true;
272: default: /* all others are treated like any other
273: * identifier */
274: return (ident);
275: } /* end of switch */
276: } /* end of if (found_it) */
277: if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
278: register char *tp = buf_ptr;
279: while (tp < buf_end)
280: if (*tp++ == ')' && (*tp == ';' || *tp == ','))
281: goto not_proc;
282: strncpy(ps.procname, token, sizeof ps.procname - 1);
283: ps.in_parameter_declaration = 1;
284: rparen_count = 1;
285: not_proc:;
286: }
287: /*
288: * The following hack attempts to guess whether or not the current
289: * token is in fact a declaration keyword -- one that has been
290: * typedefd
291: */
292: if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
293: && !ps.p_l_follow
294: && !ps.block_init
295: && (ps.last_token == rparen || ps.last_token == semicolon ||
296: ps.last_token == decl ||
297: ps.last_token == lbrace || ps.last_token == rbrace)) {
298: ps.its_a_keyword = true;
299: ps.last_u_d = true;
300: last_code = decl;
301: return decl;
302: }
303: if (last_code == decl) /* if this is a declared variable, then
304: * following sign is unary */
305: ps.last_u_d = true; /* will make "int a -1" work */
306: last_code = ident;
307: return (ident); /* the ident is not in the list */
308: } /* end of procesing for alpanum character */
309:
310: /* Scan a non-alphanumeric token */
311:
312: *e_token++ = *buf_ptr; /* if it is only a one-character token, it is
313: * moved here */
314: *e_token = '\0';
315: if (++buf_ptr >= buf_end)
316: fill_buffer();
317:
318: switch (*token) {
319: case '\n':
320: unary_delim = ps.last_u_d;
321: ps.last_nl = true; /* remember that we just had a newline */
322: code = (had_eof ? 0 : newline);
323:
324: /*
325: * if data has been exausted, the newline is a dummy, and we should
326: * return code to stop
327: */
328: break;
329:
330: case '\'': /* start of quoted character */
331: case '"': /* start of string */
332: qchar = *token;
333: if (troff) {
334: e_token[-1] = '`';
335: if (qchar == '"')
336: *e_token++ = '`';
337: e_token = chfont(&bodyf, &stringf, e_token);
338: }
339: do { /* copy the string */
340: while (1) { /* move one character or [/<char>]<char> */
341: if (*buf_ptr == '\n') {
342: printf("%d: Unterminated literal\n", line_no);
343: goto stop_lit;
344: }
345: CHECK_SIZE_TOKEN; /* Only have to do this once in this loop,
346: * since CHECK_SIZE guarantees that there
347: * are at least 5 entries left */
348: *e_token = *buf_ptr++;
349: if (buf_ptr >= buf_end)
350: fill_buffer();
351: if (*e_token == BACKSLASH) { /* if escape, copy extra char */
352: if (*buf_ptr == '\n') /* check for escaped newline */
353: ++line_no;
354: if (troff) {
355: *++e_token = BACKSLASH;
356: if (*buf_ptr == BACKSLASH)
357: *++e_token = BACKSLASH;
358: }
359: *++e_token = *buf_ptr++;
360: ++e_token; /* we must increment this again because we
361: * copied two chars */
362: if (buf_ptr >= buf_end)
363: fill_buffer();
364: }
365: else
366: break; /* we copied one character */
367: } /* end of while (1) */
368: } while (*e_token++ != qchar);
369: if (troff) {
370: e_token = chfont(&stringf, &bodyf, e_token - 1);
371: if (qchar == '"')
372: *e_token++ = '\'';
373: }
374: stop_lit:
375: code = ident;
376: break;
377:
378: case ('('):
379: case ('['):
380: unary_delim = true;
381: code = lparen;
382: break;
383:
384: case (')'):
385: case (']'):
386: code = rparen;
387: break;
388:
389: case '#':
390: unary_delim = ps.last_u_d;
391: code = preesc;
392: break;
393:
394: case '?':
395: unary_delim = true;
396: code = question;
397: break;
398:
399: case (':'):
400: code = colon;
401: unary_delim = true;
402: break;
403:
404: case (';'):
405: unary_delim = true;
406: code = semicolon;
407: break;
408:
409: case ('{'):
410: unary_delim = true;
411:
412: /*
413: * if (ps.in_or_st) ps.block_init = 1;
414: */
415: /* ? code = ps.block_init ? lparen : lbrace; */
416: code = lbrace;
417: break;
418:
419: case ('}'):
420: unary_delim = true;
421: /* ? code = ps.block_init ? rparen : rbrace; */
422: code = rbrace;
423: break;
424:
425: case 014: /* a form feed */
426: unary_delim = ps.last_u_d;
427: ps.last_nl = true; /* remember this so we can set 'ps.col_1'
428: * right */
429: code = form_feed;
430: break;
431:
432: case (','):
433: unary_delim = true;
434: code = comma;
435: break;
436:
437: case '.':
438: unary_delim = false;
439: code = period;
440: break;
441:
442: case '-':
443: case '+': /* check for -, +, --, ++ */
444: code = (ps.last_u_d ? unary_op : binary_op);
445: unary_delim = true;
446:
447: if (*buf_ptr == token[0]) {
448: /* check for doubled character */
449: *e_token++ = *buf_ptr++;
450: /* buffer overflow will be checked at end of loop */
451: if (last_code == ident || last_code == rparen) {
452: code = (ps.last_u_d ? unary_op : postop);
453: /* check for following ++ or -- */
454: unary_delim = false;
455: }
456: }
457: else if (*buf_ptr == '=')
458: /* check for operator += */
459: *e_token++ = *buf_ptr++;
460: else if (*buf_ptr == '>') {
461: /* check for operator -> */
462: *e_token++ = *buf_ptr++;
463: if (!pointer_as_binop) {
464: unary_delim = false;
465: code = unary_op;
466: ps.want_blank = false;
467: }
468: }
469: break; /* buffer overflow will be checked at end of
470: * switch */
471:
472: case '=':
473: if (ps.in_or_st)
474: ps.block_init = 1;
475: #ifdef undef
476: if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */
477: e_token[-1] = *buf_ptr++;
478: if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
479: *e_token++ = *buf_ptr++;
480: *e_token++ = '='; /* Flip =+ to += */
481: *e_token = 0;
482: }
483: #else
484: if (*buf_ptr == '=') {/* == */
485: *e_token++ = '='; /* Flip =+ to += */
486: buf_ptr++;
487: *e_token = 0;
488: }
489: #endif
490: code = binary_op;
491: unary_delim = true;
492: break;
493: /* can drop thru!!! */
494:
495: case '>':
496: case '<':
497: case '!': /* ops like <, <<, <=, !=, etc */
498: if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
499: *e_token++ = *buf_ptr;
500: if (++buf_ptr >= buf_end)
501: fill_buffer();
502: }
503: if (*buf_ptr == '=')
504: *e_token++ = *buf_ptr++;
505: code = (ps.last_u_d ? unary_op : binary_op);
506: unary_delim = true;
507: break;
508:
509: default:
510: if (token[0] == '/' && *buf_ptr == '*') {
511: /* it is start of comment */
512: *e_token++ = '*';
513:
514: if (++buf_ptr >= buf_end)
515: fill_buffer();
516:
517: code = comment;
518: unary_delim = ps.last_u_d;
519: break;
520: }
521: while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
522: /*
523: * handle ||, &&, etc, and also things as in int *****i
524: */
525: *e_token++ = *buf_ptr;
526: if (++buf_ptr >= buf_end)
527: fill_buffer();
528: }
529: code = (ps.last_u_d ? unary_op : binary_op);
530: unary_delim = true;
531:
532:
533: } /* end of switch */
534: if (code != newline) {
535: l_struct = false;
536: last_code = code;
537: }
538: if (buf_ptr >= buf_end) /* check for input buffer empty */
539: fill_buffer();
540: ps.last_u_d = unary_delim;
541: *e_token = '\0'; /* null terminate the token */
542: return (code);
543: }
544:
545: /*
546: * Add the given keyword to the keyword table, using val as the keyword type
547: */
1.3 mickey 548: void
1.1 deraadt 549: addkey(key, val)
550: char *key;
1.4 ! deraadt 551: int val;
1.1 deraadt 552: {
1.4 ! deraadt 553: register struct templ *p;
! 554: int i = 0;
! 555:
! 556: while (i < nspecials) {
! 557: p = &specials[i];
1.1 deraadt 558: if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
559: return;
560: else
1.4 ! deraadt 561: i++;
! 562: }
! 563:
! 564: if (specials == specialsinit) {
! 565: /*
! 566: * Whoa. Must reallocate special table.
! 567: */
! 568: printf("alloc\n");
! 569: nspecials = sizeof (specialsinit) / sizeof (specialsinit[0]);
! 570: maxspecials = nspecials;
! 571: maxspecials += maxspecials >> 2;
! 572: specials = (struct templ *)malloc(maxspecials * sizeof specials[0]);
! 573: if (specials == NULL)
! 574: errx(1, "indent: out of memory");
! 575: memmove(specials, specialsinit, sizeof specialsinit);
! 576: } else if (nspecials >= maxspecials) {
! 577: printf("realloc\n");
! 578: maxspecials += maxspecials >> 2;
! 579: specials = realloc(specials, maxspecials * sizeof specials[0]);
! 580: if (specials == NULL)
! 581: errx(1, "indent: out of memory");
! 582: }
! 583:
! 584: p = &specials[i];
1.1 deraadt 585: p->rwd = key;
586: p->rwcode = val;
1.4 ! deraadt 587: nspecials++;
1.1 deraadt 588: return;
589: }