src/usr.bin/indent/lexi.c - annotate

Return to lexi.c CVS log
Up to [local] / src / usr.bin / indent
Annotation of src/usr.bin/indent/lexi.c, Revision 1.1.1.1

1.1       deraadt     1: /*
                      2:  * Copyright (c) 1985 Sun Microsystems, Inc.
                      3:  * Copyright (c) 1980 The Regents of the University of California.
                      4:  * Copyright (c) 1976 Board of Trustees of the University of Illinois.
                      5:  * All rights reserved.
                      6:  *
                      7:  * Redistribution and use in source and binary forms, with or without
                      8:  * modification, are permitted provided that the following conditions
                      9:  * are met:
                     10:  * 1. Redistributions of source code must retain the above copyright
                     11:  *    notice, this list of conditions and the following disclaimer.
                     12:  * 2. Redistributions in binary form must reproduce the above copyright
                     13:  *    notice, this list of conditions and the following disclaimer in the
                     14:  *    documentation and/or other materials provided with the distribution.
                     15:  * 3. All advertising materials mentioning features or use of this software
                     16:  *    must display the following acknowledgement:
                     17:  *     This product includes software developed by the University of
                     18:  *     California, Berkeley and its contributors.
                     19:  * 4. Neither the name of the University nor the names of its contributors
                     20:  *    may be used to endorse or promote products derived from this software
                     21:  *    without specific prior written permission.
                     22:  *
                     23:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     24:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     25:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     26:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     27:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     28:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     29:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     30:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     31:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     32:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     33:  * SUCH DAMAGE.
                     34:  */
                     35:
                     36: #ifndef lint
                     37: /*static char sccsid[] = "from: @(#)lexi.c     5.16 (Berkeley) 2/26/91";*/
                     38: static char rcsid[] = "$Id: lexi.c,v 1.2 1993/08/01 18:14:31 mycroft Exp $";
                     39: #endif /* not lint */
                     40:
                     41: /*
                     42:  * Here we have the token scanner for indent.  It scans off one token and puts
                     43:  * it in the global variable "token".  It returns a code, indicating the type
                     44:  * of token scanned.
                     45:  */
                     46:
                     47: #include <stdio.h>
                     48: #include <ctype.h>
                     49: #include <stdlib.h>
                     50: #include <string.h>
                     51: #include "indent_globs.h"
                     52: #include "indent_codes.h"
                     53:
                     54: #define alphanum 1
                     55: #define opchar 3
                     56:
                     57: struct templ {
                     58:     char       *rwd;
                     59:     int         rwcode;
                     60: };
                     61:
                     62: struct templ specials[100] =
                     63: {
                     64:     "switch", 1,
                     65:     "case", 2,
                     66:     "break", 0,
                     67:     "struct", 3,
                     68:     "union", 3,
                     69:     "enum", 3,
                     70:     "default", 2,
                     71:     "int", 4,
                     72:     "char", 4,
                     73:     "float", 4,
                     74:     "double", 4,
                     75:     "long", 4,
                     76:     "short", 4,
                     77:     "typdef", 4,
                     78:     "unsigned", 4,
                     79:     "register", 4,
                     80:     "static", 4,
                     81:     "global", 4,
                     82:     "extern", 4,
                     83:     "void", 4,
                     84:     "goto", 0,
                     85:     "return", 0,
                     86:     "if", 5,
                     87:     "while", 5,
                     88:     "for", 5,
                     89:     "else", 6,
                     90:     "do", 6,
                     91:     "sizeof", 7,
                     92:     0, 0
                     93: };
                     94:
                     95: char        chartype[128] =
                     96: {                              /* this is used to facilitate the decision of
                     97:                                 * what type (alphanumeric, operator) each
                     98:                                 * character is */
                     99:     0, 0, 0, 0, 0, 0, 0, 0,
                    100:     0, 0, 0, 0, 0, 0, 0, 0,
                    101:     0, 0, 0, 0, 0, 0, 0, 0,
                    102:     0, 0, 0, 0, 0, 0, 0, 0,
                    103:     0, 3, 0, 0, 1, 3, 3, 0,
                    104:     0, 0, 3, 3, 0, 3, 0, 3,
                    105:     1, 1, 1, 1, 1, 1, 1, 1,
                    106:     1, 1, 0, 0, 3, 3, 3, 3,
                    107:     0, 1, 1, 1, 1, 1, 1, 1,
                    108:     1, 1, 1, 1, 1, 1, 1, 1,
                    109:     1, 1, 1, 1, 1, 1, 1, 1,
                    110:     1, 1, 1, 0, 0, 0, 3, 1,
                    111:     0, 1, 1, 1, 1, 1, 1, 1,
                    112:     1, 1, 1, 1, 1, 1, 1, 1,
                    113:     1, 1, 1, 1, 1, 1, 1, 1,
                    114:     1, 1, 1, 0, 3, 0, 3, 0
                    115: };
                    116:
                    117:
                    118:
                    119:
                    120: int
                    121: lexi()
                    122: {
                    123:     int         unary_delim;   /* this is set to 1 if the current token
                    124:                                 *
                    125:                                 * forces a following operator to be unary */
                    126:     static int  last_code;     /* the last token type returned */
                    127:     static int  l_struct;      /* set to 1 if the last token was 'struct' */
                    128:     int         code;          /* internal code to be returned */
                    129:     char        qchar;         /* the delimiter character for a string */
                    130:
                    131:     e_token = s_token;         /* point to start of place to save token */
                    132:     unary_delim = false;
                    133:     ps.col_1 = ps.last_nl;     /* tell world that this token started in
                    134:                                 * column 1 iff the last thing scanned was nl */
                    135:     ps.last_nl = false;
                    136:
                    137:     while (*buf_ptr == ' ' || *buf_ptr == '\t') {      /* get rid of blanks */
                    138:        ps.col_1 = false;       /* leading blanks imply token is not in column
                    139:                                 * 1 */
                    140:        if (++buf_ptr >= buf_end)
                    141:            fill_buffer();
                    142:     }
                    143:
                    144:     /* Scan an alphanumeric token */
                    145:     if (chartype[*buf_ptr] == alphanum || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
                    146:        /*
                    147:         * we have a character or number
                    148:         */
                    149:        register char *j;       /* used for searching thru list of
                    150:                                 *
                    151:                                 * reserved words */
                    152:        register struct templ *p;
                    153:
                    154:        if (isdigit(*buf_ptr) || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
                    155:            int         seendot = 0,
                    156:                        seenexp = 0;
                    157:            if (*buf_ptr == '0' &&
                    158:                    (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
                    159:                *e_token++ = *buf_ptr++;
                    160:                *e_token++ = *buf_ptr++;
                    161:                while (isxdigit(*buf_ptr)) {
                    162:                    CHECK_SIZE_TOKEN;
                    163:                    *e_token++ = *buf_ptr++;
                    164:                }
                    165:            }
                    166:            else
                    167:                while (1) {
                    168:                    if (*buf_ptr == '.')
                    169:                        if (seendot)
                    170:                            break;
                    171:                        else
                    172:                            seendot++;
                    173:                    CHECK_SIZE_TOKEN;
                    174:                    *e_token++ = *buf_ptr++;
                    175:                    if (!isdigit(*buf_ptr) && *buf_ptr != '.')
                    176:                        if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
                    177:                            break;
                    178:                        else {
                    179:                            seenexp++;
                    180:                            seendot++;
                    181:                            CHECK_SIZE_TOKEN;
                    182:                            *e_token++ = *buf_ptr++;
                    183:                            if (*buf_ptr == '+' || *buf_ptr == '-')
                    184:                                *e_token++ = *buf_ptr++;
                    185:                        }
                    186:                }
                    187:            if (*buf_ptr == 'L' || *buf_ptr == 'l')
                    188:                *e_token++ = *buf_ptr++;
                    189:        }
                    190:        else
                    191:            while (chartype[*buf_ptr] == alphanum) {    /* copy it over */
                    192:                CHECK_SIZE_TOKEN;
                    193:                *e_token++ = *buf_ptr++;
                    194:                if (buf_ptr >= buf_end)
                    195:                    fill_buffer();
                    196:            }
                    197:        *e_token++ = '\0';
                    198:        while (*buf_ptr == ' ' || *buf_ptr == '\t') {   /* get rid of blanks */
                    199:            if (++buf_ptr >= buf_end)
                    200:                fill_buffer();
                    201:        }
                    202:        ps.its_a_keyword = false;
                    203:        ps.sizeof_keyword = false;
                    204:        if (l_struct) {         /* if last token was 'struct', then this token
                    205:                                 * should be treated as a declaration */
                    206:            l_struct = false;
                    207:            last_code = ident;
                    208:            ps.last_u_d = true;
                    209:            return (decl);
                    210:        }
                    211:        ps.last_u_d = false;    /* Operator after indentifier is binary */
                    212:        last_code = ident;      /* Remember that this is the code we will
                    213:                                 * return */
                    214:
                    215:        /*
                    216:         * This loop will check if the token is a keyword.
                    217:         */
                    218:        for (p = specials; (j = p->rwd) != 0; p++) {
                    219:            register char *p = s_token; /* point at scanned token */
                    220:            if (*j++ != *p++ || *j++ != *p++)
                    221:                continue;       /* This test depends on the fact that
                    222:                                 * identifiers are always at least 1 character
                    223:                                 * long (ie. the first two bytes of the
                    224:                                 * identifier are always meaningful) */
                    225:            if (p[-1] == 0)
                    226:                break;          /* If its a one-character identifier */
                    227:            while (*p++ == *j)
                    228:                if (*j++ == 0)
                    229:                    goto found_keyword; /* I wish that C had a multi-level
                    230:                                         * break... */
                    231:        }
                    232:        if (p->rwd) {           /* we have a keyword */
                    233:     found_keyword:
                    234:            ps.its_a_keyword = true;
                    235:            ps.last_u_d = true;
                    236:            switch (p->rwcode) {
                    237:            case 1:             /* it is a switch */
                    238:                return (swstmt);
                    239:            case 2:             /* a case or default */
                    240:                return (casestmt);
                    241:
                    242:            case 3:             /* a "struct" */
                    243:                if (ps.p_l_follow)
                    244:                    break;      /* inside parens: cast */
                    245:                l_struct = true;
                    246:
                    247:                /*
                    248:                 * Next time around, we will want to know that we have had a
                    249:                 * 'struct'
                    250:                 */
                    251:            case 4:             /* one of the declaration keywords */
                    252:                if (ps.p_l_follow) {
                    253:                    ps.cast_mask |= 1 << ps.p_l_follow;
                    254:                    break;      /* inside parens: cast */
                    255:                }
                    256:                last_code = decl;
                    257:                return (decl);
                    258:
                    259:            case 5:             /* if, while, for */
                    260:                return (sp_paren);
                    261:
                    262:            case 6:             /* do, else */
                    263:                return (sp_nparen);
                    264:
                    265:            case 7:
                    266:                ps.sizeof_keyword = true;
                    267:            default:            /* all others are treated like any other
                    268:                                 * identifier */
                    269:                return (ident);
                    270:            }                   /* end of switch */
                    271:        }                       /* end of if (found_it) */
                    272:        if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
                    273:            register char *tp = buf_ptr;
                    274:            while (tp < buf_end)
                    275:                if (*tp++ == ')' && (*tp == ';' || *tp == ','))
                    276:                    goto not_proc;
                    277:            strncpy(ps.procname, token, sizeof ps.procname - 1);
                    278:            ps.in_parameter_declaration = 1;
                    279:            rparen_count = 1;
                    280:     not_proc:;
                    281:        }
                    282:        /*
                    283:         * The following hack attempts to guess whether or not the current
                    284:         * token is in fact a declaration keyword -- one that has been
                    285:         * typedefd
                    286:         */
                    287:        if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
                    288:                && !ps.p_l_follow
                    289:                && !ps.block_init
                    290:                && (ps.last_token == rparen || ps.last_token == semicolon ||
                    291:                    ps.last_token == decl ||
                    292:                    ps.last_token == lbrace || ps.last_token == rbrace)) {
                    293:            ps.its_a_keyword = true;
                    294:            ps.last_u_d = true;
                    295:            last_code = decl;
                    296:            return decl;
                    297:        }
                    298:        if (last_code == decl)  /* if this is a declared variable, then
                    299:                                 * following sign is unary */
                    300:            ps.last_u_d = true; /* will make "int a -1" work */
                    301:        last_code = ident;
                    302:        return (ident);         /* the ident is not in the list */
                    303:     }                          /* end of procesing for alpanum character */
                    304:
                    305:     /* Scan a non-alphanumeric token */
                    306:
                    307:     *e_token++ = *buf_ptr;             /* if it is only a one-character token, it is
                    308:                                 * moved here */
                    309:     *e_token = '\0';
                    310:     if (++buf_ptr >= buf_end)
                    311:        fill_buffer();
                    312:
                    313:     switch (*token) {
                    314:     case '\n':
                    315:        unary_delim = ps.last_u_d;
                    316:        ps.last_nl = true;      /* remember that we just had a newline */
                    317:        code = (had_eof ? 0 : newline);
                    318:
                    319:        /*
                    320:         * if data has been exausted, the newline is a dummy, and we should
                    321:         * return code to stop
                    322:         */
                    323:        break;
                    324:
                    325:     case '\'':                 /* start of quoted character */
                    326:     case '"':                  /* start of string */
                    327:        qchar = *token;
                    328:        if (troff) {
                    329:            e_token[-1] = '`';
                    330:            if (qchar == '"')
                    331:                *e_token++ = '`';
                    332:            e_token = chfont(&bodyf, &stringf, e_token);
                    333:        }
                    334:        do {                    /* copy the string */
                    335:            while (1) {         /* move one character or [/<char>]<char> */
                    336:                if (*buf_ptr == '\n') {
                    337:                    printf("%d: Unterminated literal\n", line_no);
                    338:                    goto stop_lit;
                    339:                }
                    340:                CHECK_SIZE_TOKEN;       /* Only have to do this once in this loop,
                    341:                                         * since CHECK_SIZE guarantees that there
                    342:                                         * are at least 5 entries left */
                    343:                *e_token = *buf_ptr++;
                    344:                if (buf_ptr >= buf_end)
                    345:                    fill_buffer();
                    346:                if (*e_token == BACKSLASH) {    /* if escape, copy extra char */
                    347:                    if (*buf_ptr == '\n')       /* check for escaped newline */
                    348:                        ++line_no;
                    349:                    if (troff) {
                    350:                        *++e_token = BACKSLASH;
                    351:                        if (*buf_ptr == BACKSLASH)
                    352:                            *++e_token = BACKSLASH;
                    353:                    }
                    354:                    *++e_token = *buf_ptr++;
                    355:                    ++e_token;  /* we must increment this again because we
                    356:                                 * copied two chars */
                    357:                    if (buf_ptr >= buf_end)
                    358:                        fill_buffer();
                    359:                }
                    360:                else
                    361:                    break;      /* we copied one character */
                    362:            }                   /* end of while (1) */
                    363:        } while (*e_token++ != qchar);
                    364:        if (troff) {
                    365:            e_token = chfont(&stringf, &bodyf, e_token - 1);
                    366:            if (qchar == '"')
                    367:                *e_token++ = '\'';
                    368:        }
                    369: stop_lit:
                    370:        code = ident;
                    371:        break;
                    372:
                    373:     case ('('):
                    374:     case ('['):
                    375:        unary_delim = true;
                    376:        code = lparen;
                    377:        break;
                    378:
                    379:     case (')'):
                    380:     case (']'):
                    381:        code = rparen;
                    382:        break;
                    383:
                    384:     case '#':
                    385:        unary_delim = ps.last_u_d;
                    386:        code = preesc;
                    387:        break;
                    388:
                    389:     case '?':
                    390:        unary_delim = true;
                    391:        code = question;
                    392:        break;
                    393:
                    394:     case (':'):
                    395:        code = colon;
                    396:        unary_delim = true;
                    397:        break;
                    398:
                    399:     case (';'):
                    400:        unary_delim = true;
                    401:        code = semicolon;
                    402:        break;
                    403:
                    404:     case ('{'):
                    405:        unary_delim = true;
                    406:
                    407:        /*
                    408:         * if (ps.in_or_st) ps.block_init = 1;
                    409:         */
                    410:        /* ?    code = ps.block_init ? lparen : lbrace; */
                    411:        code = lbrace;
                    412:        break;
                    413:
                    414:     case ('}'):
                    415:        unary_delim = true;
                    416:        /* ?    code = ps.block_init ? rparen : rbrace; */
                    417:        code = rbrace;
                    418:        break;
                    419:
                    420:     case 014:                  /* a form feed */
                    421:        unary_delim = ps.last_u_d;
                    422:        ps.last_nl = true;      /* remember this so we can set 'ps.col_1'
                    423:                                 * right */
                    424:        code = form_feed;
                    425:        break;
                    426:
                    427:     case (','):
                    428:        unary_delim = true;
                    429:        code = comma;
                    430:        break;
                    431:
                    432:     case '.':
                    433:        unary_delim = false;
                    434:        code = period;
                    435:        break;
                    436:
                    437:     case '-':
                    438:     case '+':                  /* check for -, +, --, ++ */
                    439:        code = (ps.last_u_d ? unary_op : binary_op);
                    440:        unary_delim = true;
                    441:
                    442:        if (*buf_ptr == token[0]) {
                    443:            /* check for doubled character */
                    444:            *e_token++ = *buf_ptr++;
                    445:            /* buffer overflow will be checked at end of loop */
                    446:            if (last_code == ident || last_code == rparen) {
                    447:                code = (ps.last_u_d ? unary_op : postop);
                    448:                /* check for following ++ or -- */
                    449:                unary_delim = false;
                    450:            }
                    451:        }
                    452:        else if (*buf_ptr == '=')
                    453:            /* check for operator += */
                    454:            *e_token++ = *buf_ptr++;
                    455:        else if (*buf_ptr == '>') {
                    456:            /* check for operator -> */
                    457:            *e_token++ = *buf_ptr++;
                    458:            if (!pointer_as_binop) {
                    459:                unary_delim = false;
                    460:                code = unary_op;
                    461:                ps.want_blank = false;
                    462:            }
                    463:        }
                    464:        break;                  /* buffer overflow will be checked at end of
                    465:                                 * switch */
                    466:
                    467:     case '=':
                    468:        if (ps.in_or_st)
                    469:            ps.block_init = 1;
                    470: #ifdef undef
                    471:        if (chartype[*buf_ptr] == opchar) {     /* we have two char assignment */
                    472:            e_token[-1] = *buf_ptr++;
                    473:            if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
                    474:                *e_token++ = *buf_ptr++;
                    475:            *e_token++ = '=';   /* Flip =+ to += */
                    476:            *e_token = 0;
                    477:        }
                    478: #else
                    479:        if (*buf_ptr == '=') {/* == */
                    480:            *e_token++ = '=';   /* Flip =+ to += */
                    481:            buf_ptr++;
                    482:            *e_token = 0;
                    483:        }
                    484: #endif
                    485:        code = binary_op;
                    486:        unary_delim = true;
                    487:        break;
                    488:        /* can drop thru!!! */
                    489:
                    490:     case '>':
                    491:     case '<':
                    492:     case '!':                  /* ops like <, <<, <=, !=, etc */
                    493:        if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
                    494:            *e_token++ = *buf_ptr;
                    495:            if (++buf_ptr >= buf_end)
                    496:                fill_buffer();
                    497:        }
                    498:        if (*buf_ptr == '=')
                    499:            *e_token++ = *buf_ptr++;
                    500:        code = (ps.last_u_d ? unary_op : binary_op);
                    501:        unary_delim = true;
                    502:        break;
                    503:
                    504:     default:
                    505:        if (token[0] == '/' && *buf_ptr == '*') {
                    506:            /* it is start of comment */
                    507:            *e_token++ = '*';
                    508:
                    509:            if (++buf_ptr >= buf_end)
                    510:                fill_buffer();
                    511:
                    512:            code = comment;
                    513:            unary_delim = ps.last_u_d;
                    514:            break;
                    515:        }
                    516:        while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
                    517:            /*
                    518:             * handle ||, &&, etc, and also things as in int *****i
                    519:             */
                    520:            *e_token++ = *buf_ptr;
                    521:            if (++buf_ptr >= buf_end)
                    522:                fill_buffer();
                    523:        }
                    524:        code = (ps.last_u_d ? unary_op : binary_op);
                    525:        unary_delim = true;
                    526:
                    527:
                    528:     }                          /* end of switch */
                    529:     if (code != newline) {
                    530:        l_struct = false;
                    531:        last_code = code;
                    532:     }
                    533:     if (buf_ptr >= buf_end)    /* check for input buffer empty */
                    534:        fill_buffer();
                    535:     ps.last_u_d = unary_delim;
                    536:     *e_token = '\0';           /* null terminate the token */
                    537:     return (code);
                    538: }
                    539:
                    540: /*
                    541:  * Add the given keyword to the keyword table, using val as the keyword type
                    542:  */
                    543: addkey(key, val)
                    544:     char       *key;
                    545: {
                    546:     register struct templ *p = specials;
                    547:     while (p->rwd)
                    548:        if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
                    549:            return;
                    550:        else
                    551:            p++;
                    552:     if (p >= specials + sizeof specials / sizeof specials[0])
                    553:        return;                 /* For now, table overflows are silently
                    554:                                 * ignored */
                    555:     p->rwd = key;
                    556:     p->rwcode = val;
                    557:     p[1].rwd = 0;
                    558:     p[1].rwcode = 0;
                    559:     return;
                    560: }