src/usr.bin/indent/lexi.c - annotate

Return to lexi.c CVS log
Up to [local] / src / usr.bin / indent
Annotation of src/usr.bin/indent/lexi.c, Revision 1.3

1.3     ! mickey      1: /*     $OpenBSD: lexi.c,v 1.2 1996/06/26 05:34:31 deraadt Exp $        */
1.2       deraadt     2:
1.1       deraadt     3: /*
                      4:  * Copyright (c) 1985 Sun Microsystems, Inc.
                      5:  * Copyright (c) 1980 The Regents of the University of California.
                      6:  * Copyright (c) 1976 Board of Trustees of the University of Illinois.
                      7:  * All rights reserved.
                      8:  *
                      9:  * Redistribution and use in source and binary forms, with or without
                     10:  * modification, are permitted provided that the following conditions
                     11:  * are met:
                     12:  * 1. Redistributions of source code must retain the above copyright
                     13:  *    notice, this list of conditions and the following disclaimer.
                     14:  * 2. Redistributions in binary form must reproduce the above copyright
                     15:  *    notice, this list of conditions and the following disclaimer in the
                     16:  *    documentation and/or other materials provided with the distribution.
                     17:  * 3. All advertising materials mentioning features or use of this software
                     18:  *    must display the following acknowledgement:
                     19:  *     This product includes software developed by the University of
                     20:  *     California, Berkeley and its contributors.
                     21:  * 4. Neither the name of the University nor the names of its contributors
                     22:  *    may be used to endorse or promote products derived from this software
                     23:  *    without specific prior written permission.
                     24:  *
                     25:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     26:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     27:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     28:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     29:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     30:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     31:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     32:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     33:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     34:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     35:  * SUCH DAMAGE.
                     36:  */
                     37:
                     38: #ifndef lint
                     39: /*static char sccsid[] = "from: @(#)lexi.c     5.16 (Berkeley) 2/26/91";*/
1.3     ! mickey     40: static char rcsid[] = "$OpenBSD: lexi.c,v 1.2 1996/06/26 05:34:31 deraadt Exp $";
1.1       deraadt    41: #endif /* not lint */
                     42:
                     43: /*
                     44:  * Here we have the token scanner for indent.  It scans off one token and puts
                     45:  * it in the global variable "token".  It returns a code, indicating the type
                     46:  * of token scanned.
                     47:  */
                     48:
                     49: #include <stdio.h>
                     50: #include <ctype.h>
                     51: #include <stdlib.h>
                     52: #include <string.h>
                     53: #include "indent_globs.h"
                     54: #include "indent_codes.h"
                     55:
                     56: #define alphanum 1
                     57: #define opchar 3
                     58:
                     59: struct templ {
                     60:     char       *rwd;
                     61:     int         rwcode;
                     62: };
                     63:
                     64: struct templ specials[100] =
                     65: {
1.3     ! mickey     66:        { "switch", 1 },
        !            67:        { "case", 2 },
        !            68:        { "break", 0 },
        !            69:        { "struct", 3 },
        !            70:        { "union", 3 },
        !            71:        { "enum", 3 },
        !            72:        { "default", 2 },
        !            73:        { "int", 4 },
        !            74:        { "char", 4 },
        !            75:        { "float", 4 },
        !            76:        { "double", 4 },
        !            77:        { "long", 4 },
        !            78:        { "short", 4 },
        !            79:        { "typdef", 4 },
        !            80:        { "unsigned", 4 },
        !            81:        { "register", 4 },
        !            82:        { "static", 4 },
        !            83:        { "global", 4 },
        !            84:        { "extern", 4 },
        !            85:        { "void", 4 },
        !            86:        { "goto", 0 },
        !            87:        { "return", 0 },
        !            88:        { "if", 5 },
        !            89:        { "while", 5 },
        !            90:        { "for", 5 },
        !            91:        { "else", 6 },
        !            92:        { "do", 6 },
        !            93:        { "sizeof", 7 },
        !            94:        { 0, 0 }
1.1       deraadt    95: };
                     96:
                     97: char        chartype[128] =
                     98: {                              /* this is used to facilitate the decision of
                     99:                                 * what type (alphanumeric, operator) each
                    100:                                 * character is */
                    101:     0, 0, 0, 0, 0, 0, 0, 0,
                    102:     0, 0, 0, 0, 0, 0, 0, 0,
                    103:     0, 0, 0, 0, 0, 0, 0, 0,
                    104:     0, 0, 0, 0, 0, 0, 0, 0,
                    105:     0, 3, 0, 0, 1, 3, 3, 0,
                    106:     0, 0, 3, 3, 0, 3, 0, 3,
                    107:     1, 1, 1, 1, 1, 1, 1, 1,
                    108:     1, 1, 0, 0, 3, 3, 3, 3,
                    109:     0, 1, 1, 1, 1, 1, 1, 1,
                    110:     1, 1, 1, 1, 1, 1, 1, 1,
                    111:     1, 1, 1, 1, 1, 1, 1, 1,
                    112:     1, 1, 1, 0, 0, 0, 3, 1,
                    113:     0, 1, 1, 1, 1, 1, 1, 1,
                    114:     1, 1, 1, 1, 1, 1, 1, 1,
                    115:     1, 1, 1, 1, 1, 1, 1, 1,
                    116:     1, 1, 1, 0, 3, 0, 3, 0
                    117: };
                    118:
                    119:
                    120:
                    121:
                    122: int
                    123: lexi()
                    124: {
                    125:     int         unary_delim;   /* this is set to 1 if the current token
                    126:                                 *
                    127:                                 * forces a following operator to be unary */
                    128:     static int  last_code;     /* the last token type returned */
                    129:     static int  l_struct;      /* set to 1 if the last token was 'struct' */
                    130:     int         code;          /* internal code to be returned */
                    131:     char        qchar;         /* the delimiter character for a string */
                    132:
                    133:     e_token = s_token;         /* point to start of place to save token */
                    134:     unary_delim = false;
                    135:     ps.col_1 = ps.last_nl;     /* tell world that this token started in
                    136:                                 * column 1 iff the last thing scanned was nl */
                    137:     ps.last_nl = false;
                    138:
                    139:     while (*buf_ptr == ' ' || *buf_ptr == '\t') {      /* get rid of blanks */
                    140:        ps.col_1 = false;       /* leading blanks imply token is not in column
                    141:                                 * 1 */
                    142:        if (++buf_ptr >= buf_end)
                    143:            fill_buffer();
                    144:     }
                    145:
                    146:     /* Scan an alphanumeric token */
1.3     ! mickey    147:     if (chartype[*buf_ptr] == alphanum ||
        !           148:        (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
1.1       deraadt   149:        /*
                    150:         * we have a character or number
                    151:         */
                    152:        register char *j;       /* used for searching thru list of
                    153:                                 *
                    154:                                 * reserved words */
                    155:        register struct templ *p;
                    156:
1.3     ! mickey    157:        if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
1.1       deraadt   158:            int         seendot = 0,
                    159:                        seenexp = 0;
                    160:            if (*buf_ptr == '0' &&
                    161:                    (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
                    162:                *e_token++ = *buf_ptr++;
                    163:                *e_token++ = *buf_ptr++;
                    164:                while (isxdigit(*buf_ptr)) {
                    165:                    CHECK_SIZE_TOKEN;
                    166:                    *e_token++ = *buf_ptr++;
                    167:                }
                    168:            }
                    169:            else
                    170:                while (1) {
                    171:                    if (*buf_ptr == '.')
                    172:                        if (seendot)
                    173:                            break;
                    174:                        else
                    175:                            seendot++;
                    176:                    CHECK_SIZE_TOKEN;
                    177:                    *e_token++ = *buf_ptr++;
                    178:                    if (!isdigit(*buf_ptr) && *buf_ptr != '.')
                    179:                        if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
                    180:                            break;
                    181:                        else {
                    182:                            seenexp++;
                    183:                            seendot++;
                    184:                            CHECK_SIZE_TOKEN;
                    185:                            *e_token++ = *buf_ptr++;
                    186:                            if (*buf_ptr == '+' || *buf_ptr == '-')
                    187:                                *e_token++ = *buf_ptr++;
                    188:                        }
                    189:                }
                    190:            if (*buf_ptr == 'L' || *buf_ptr == 'l')
                    191:                *e_token++ = *buf_ptr++;
                    192:        }
                    193:        else
                    194:            while (chartype[*buf_ptr] == alphanum) {    /* copy it over */
                    195:                CHECK_SIZE_TOKEN;
                    196:                *e_token++ = *buf_ptr++;
                    197:                if (buf_ptr >= buf_end)
                    198:                    fill_buffer();
                    199:            }
                    200:        *e_token++ = '\0';
                    201:        while (*buf_ptr == ' ' || *buf_ptr == '\t') {   /* get rid of blanks */
                    202:            if (++buf_ptr >= buf_end)
                    203:                fill_buffer();
                    204:        }
                    205:        ps.its_a_keyword = false;
                    206:        ps.sizeof_keyword = false;
                    207:        if (l_struct) {         /* if last token was 'struct', then this token
                    208:                                 * should be treated as a declaration */
                    209:            l_struct = false;
                    210:            last_code = ident;
                    211:            ps.last_u_d = true;
                    212:            return (decl);
                    213:        }
                    214:        ps.last_u_d = false;    /* Operator after indentifier is binary */
                    215:        last_code = ident;      /* Remember that this is the code we will
                    216:                                 * return */
                    217:
                    218:        /*
                    219:         * This loop will check if the token is a keyword.
                    220:         */
                    221:        for (p = specials; (j = p->rwd) != 0; p++) {
                    222:            register char *p = s_token; /* point at scanned token */
                    223:            if (*j++ != *p++ || *j++ != *p++)
                    224:                continue;       /* This test depends on the fact that
                    225:                                 * identifiers are always at least 1 character
                    226:                                 * long (ie. the first two bytes of the
                    227:                                 * identifier are always meaningful) */
                    228:            if (p[-1] == 0)
                    229:                break;          /* If its a one-character identifier */
                    230:            while (*p++ == *j)
                    231:                if (*j++ == 0)
                    232:                    goto found_keyword; /* I wish that C had a multi-level
                    233:                                         * break... */
                    234:        }
                    235:        if (p->rwd) {           /* we have a keyword */
                    236:     found_keyword:
                    237:            ps.its_a_keyword = true;
                    238:            ps.last_u_d = true;
                    239:            switch (p->rwcode) {
                    240:            case 1:             /* it is a switch */
                    241:                return (swstmt);
                    242:            case 2:             /* a case or default */
                    243:                return (casestmt);
                    244:
                    245:            case 3:             /* a "struct" */
                    246:                if (ps.p_l_follow)
                    247:                    break;      /* inside parens: cast */
                    248:                l_struct = true;
                    249:
                    250:                /*
                    251:                 * Next time around, we will want to know that we have had a
                    252:                 * 'struct'
                    253:                 */
                    254:            case 4:             /* one of the declaration keywords */
                    255:                if (ps.p_l_follow) {
                    256:                    ps.cast_mask |= 1 << ps.p_l_follow;
                    257:                    break;      /* inside parens: cast */
                    258:                }
                    259:                last_code = decl;
                    260:                return (decl);
                    261:
                    262:            case 5:             /* if, while, for */
                    263:                return (sp_paren);
                    264:
                    265:            case 6:             /* do, else */
                    266:                return (sp_nparen);
                    267:
                    268:            case 7:
                    269:                ps.sizeof_keyword = true;
                    270:            default:            /* all others are treated like any other
                    271:                                 * identifier */
                    272:                return (ident);
                    273:            }                   /* end of switch */
                    274:        }                       /* end of if (found_it) */
                    275:        if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
                    276:            register char *tp = buf_ptr;
                    277:            while (tp < buf_end)
                    278:                if (*tp++ == ')' && (*tp == ';' || *tp == ','))
                    279:                    goto not_proc;
                    280:            strncpy(ps.procname, token, sizeof ps.procname - 1);
                    281:            ps.in_parameter_declaration = 1;
                    282:            rparen_count = 1;
                    283:     not_proc:;
                    284:        }
                    285:        /*
                    286:         * The following hack attempts to guess whether or not the current
                    287:         * token is in fact a declaration keyword -- one that has been
                    288:         * typedefd
                    289:         */
                    290:        if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
                    291:                && !ps.p_l_follow
                    292:                && !ps.block_init
                    293:                && (ps.last_token == rparen || ps.last_token == semicolon ||
                    294:                    ps.last_token == decl ||
                    295:                    ps.last_token == lbrace || ps.last_token == rbrace)) {
                    296:            ps.its_a_keyword = true;
                    297:            ps.last_u_d = true;
                    298:            last_code = decl;
                    299:            return decl;
                    300:        }
                    301:        if (last_code == decl)  /* if this is a declared variable, then
                    302:                                 * following sign is unary */
                    303:            ps.last_u_d = true; /* will make "int a -1" work */
                    304:        last_code = ident;
                    305:        return (ident);         /* the ident is not in the list */
                    306:     }                          /* end of procesing for alpanum character */
                    307:
                    308:     /* Scan a non-alphanumeric token */
                    309:
                    310:     *e_token++ = *buf_ptr;             /* if it is only a one-character token, it is
                    311:                                 * moved here */
                    312:     *e_token = '\0';
                    313:     if (++buf_ptr >= buf_end)
                    314:        fill_buffer();
                    315:
                    316:     switch (*token) {
                    317:     case '\n':
                    318:        unary_delim = ps.last_u_d;
                    319:        ps.last_nl = true;      /* remember that we just had a newline */
                    320:        code = (had_eof ? 0 : newline);
                    321:
                    322:        /*
                    323:         * if data has been exausted, the newline is a dummy, and we should
                    324:         * return code to stop
                    325:         */
                    326:        break;
                    327:
                    328:     case '\'':                 /* start of quoted character */
                    329:     case '"':                  /* start of string */
                    330:        qchar = *token;
                    331:        if (troff) {
                    332:            e_token[-1] = '`';
                    333:            if (qchar == '"')
                    334:                *e_token++ = '`';
                    335:            e_token = chfont(&bodyf, &stringf, e_token);
                    336:        }
                    337:        do {                    /* copy the string */
                    338:            while (1) {         /* move one character or [/<char>]<char> */
                    339:                if (*buf_ptr == '\n') {
                    340:                    printf("%d: Unterminated literal\n", line_no);
                    341:                    goto stop_lit;
                    342:                }
                    343:                CHECK_SIZE_TOKEN;       /* Only have to do this once in this loop,
                    344:                                         * since CHECK_SIZE guarantees that there
                    345:                                         * are at least 5 entries left */
                    346:                *e_token = *buf_ptr++;
                    347:                if (buf_ptr >= buf_end)
                    348:                    fill_buffer();
                    349:                if (*e_token == BACKSLASH) {    /* if escape, copy extra char */
                    350:                    if (*buf_ptr == '\n')       /* check for escaped newline */
                    351:                        ++line_no;
                    352:                    if (troff) {
                    353:                        *++e_token = BACKSLASH;
                    354:                        if (*buf_ptr == BACKSLASH)
                    355:                            *++e_token = BACKSLASH;
                    356:                    }
                    357:                    *++e_token = *buf_ptr++;
                    358:                    ++e_token;  /* we must increment this again because we
                    359:                                 * copied two chars */
                    360:                    if (buf_ptr >= buf_end)
                    361:                        fill_buffer();
                    362:                }
                    363:                else
                    364:                    break;      /* we copied one character */
                    365:            }                   /* end of while (1) */
                    366:        } while (*e_token++ != qchar);
                    367:        if (troff) {
                    368:            e_token = chfont(&stringf, &bodyf, e_token - 1);
                    369:            if (qchar == '"')
                    370:                *e_token++ = '\'';
                    371:        }
                    372: stop_lit:
                    373:        code = ident;
                    374:        break;
                    375:
                    376:     case ('('):
                    377:     case ('['):
                    378:        unary_delim = true;
                    379:        code = lparen;
                    380:        break;
                    381:
                    382:     case (')'):
                    383:     case (']'):
                    384:        code = rparen;
                    385:        break;
                    386:
                    387:     case '#':
                    388:        unary_delim = ps.last_u_d;
                    389:        code = preesc;
                    390:        break;
                    391:
                    392:     case '?':
                    393:        unary_delim = true;
                    394:        code = question;
                    395:        break;
                    396:
                    397:     case (':'):
                    398:        code = colon;
                    399:        unary_delim = true;
                    400:        break;
                    401:
                    402:     case (';'):
                    403:        unary_delim = true;
                    404:        code = semicolon;
                    405:        break;
                    406:
                    407:     case ('{'):
                    408:        unary_delim = true;
                    409:
                    410:        /*
                    411:         * if (ps.in_or_st) ps.block_init = 1;
                    412:         */
                    413:        /* ?    code = ps.block_init ? lparen : lbrace; */
                    414:        code = lbrace;
                    415:        break;
                    416:
                    417:     case ('}'):
                    418:        unary_delim = true;
                    419:        /* ?    code = ps.block_init ? rparen : rbrace; */
                    420:        code = rbrace;
                    421:        break;
                    422:
                    423:     case 014:                  /* a form feed */
                    424:        unary_delim = ps.last_u_d;
                    425:        ps.last_nl = true;      /* remember this so we can set 'ps.col_1'
                    426:                                 * right */
                    427:        code = form_feed;
                    428:        break;
                    429:
                    430:     case (','):
                    431:        unary_delim = true;
                    432:        code = comma;
                    433:        break;
                    434:
                    435:     case '.':
                    436:        unary_delim = false;
                    437:        code = period;
                    438:        break;
                    439:
                    440:     case '-':
                    441:     case '+':                  /* check for -, +, --, ++ */
                    442:        code = (ps.last_u_d ? unary_op : binary_op);
                    443:        unary_delim = true;
                    444:
                    445:        if (*buf_ptr == token[0]) {
                    446:            /* check for doubled character */
                    447:            *e_token++ = *buf_ptr++;
                    448:            /* buffer overflow will be checked at end of loop */
                    449:            if (last_code == ident || last_code == rparen) {
                    450:                code = (ps.last_u_d ? unary_op : postop);
                    451:                /* check for following ++ or -- */
                    452:                unary_delim = false;
                    453:            }
                    454:        }
                    455:        else if (*buf_ptr == '=')
                    456:            /* check for operator += */
                    457:            *e_token++ = *buf_ptr++;
                    458:        else if (*buf_ptr == '>') {
                    459:            /* check for operator -> */
                    460:            *e_token++ = *buf_ptr++;
                    461:            if (!pointer_as_binop) {
                    462:                unary_delim = false;
                    463:                code = unary_op;
                    464:                ps.want_blank = false;
                    465:            }
                    466:        }
                    467:        break;                  /* buffer overflow will be checked at end of
                    468:                                 * switch */
                    469:
                    470:     case '=':
                    471:        if (ps.in_or_st)
                    472:            ps.block_init = 1;
                    473: #ifdef undef
                    474:        if (chartype[*buf_ptr] == opchar) {     /* we have two char assignment */
                    475:            e_token[-1] = *buf_ptr++;
                    476:            if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
                    477:                *e_token++ = *buf_ptr++;
                    478:            *e_token++ = '=';   /* Flip =+ to += */
                    479:            *e_token = 0;
                    480:        }
                    481: #else
                    482:        if (*buf_ptr == '=') {/* == */
                    483:            *e_token++ = '=';   /* Flip =+ to += */
                    484:            buf_ptr++;
                    485:            *e_token = 0;
                    486:        }
                    487: #endif
                    488:        code = binary_op;
                    489:        unary_delim = true;
                    490:        break;
                    491:        /* can drop thru!!! */
                    492:
                    493:     case '>':
                    494:     case '<':
                    495:     case '!':                  /* ops like <, <<, <=, !=, etc */
                    496:        if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
                    497:            *e_token++ = *buf_ptr;
                    498:            if (++buf_ptr >= buf_end)
                    499:                fill_buffer();
                    500:        }
                    501:        if (*buf_ptr == '=')
                    502:            *e_token++ = *buf_ptr++;
                    503:        code = (ps.last_u_d ? unary_op : binary_op);
                    504:        unary_delim = true;
                    505:        break;
                    506:
                    507:     default:
                    508:        if (token[0] == '/' && *buf_ptr == '*') {
                    509:            /* it is start of comment */
                    510:            *e_token++ = '*';
                    511:
                    512:            if (++buf_ptr >= buf_end)
                    513:                fill_buffer();
                    514:
                    515:            code = comment;
                    516:            unary_delim = ps.last_u_d;
                    517:            break;
                    518:        }
                    519:        while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
                    520:            /*
                    521:             * handle ||, &&, etc, and also things as in int *****i
                    522:             */
                    523:            *e_token++ = *buf_ptr;
                    524:            if (++buf_ptr >= buf_end)
                    525:                fill_buffer();
                    526:        }
                    527:        code = (ps.last_u_d ? unary_op : binary_op);
                    528:        unary_delim = true;
                    529:
                    530:
                    531:     }                          /* end of switch */
                    532:     if (code != newline) {
                    533:        l_struct = false;
                    534:        last_code = code;
                    535:     }
                    536:     if (buf_ptr >= buf_end)    /* check for input buffer empty */
                    537:        fill_buffer();
                    538:     ps.last_u_d = unary_delim;
                    539:     *e_token = '\0';           /* null terminate the token */
                    540:     return (code);
                    541: }
                    542:
                    543: /*
                    544:  * Add the given keyword to the keyword table, using val as the keyword type
                    545:  */
1.3     ! mickey    546: void
1.1       deraadt   547: addkey(key, val)
                    548:     char       *key;
                    549: {
                    550:     register struct templ *p = specials;
                    551:     while (p->rwd)
                    552:        if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
                    553:            return;
                    554:        else
                    555:            p++;
                    556:     if (p >= specials + sizeof specials / sizeof specials[0])
                    557:        return;                 /* For now, table overflows are silently
                    558:                                 * ignored */
                    559:     p->rwd = key;
                    560:     p->rwcode = val;
                    561:     p[1].rwd = 0;
                    562:     p[1].rwcode = 0;
                    563:     return;
                    564: }