[BACK]Return to mdoc.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / mandoc

Annotation of src/usr.bin/mandoc/mdoc.c, Revision 1.159

1.159   ! schwarze    1: /*     $OpenBSD: mdoc.c,v 1.158 2018/08/17 20:31:52 schwarze Exp $ */
1.1       kristaps    2: /*
1.77      schwarze    3:  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
1.158     schwarze    4:  * Copyright (c) 2010, 2012-2018 Ingo Schwarze <schwarze@openbsd.org>
1.1       kristaps    5:  *
                      6:  * Permission to use, copy, modify, and distribute this software for any
1.3       schwarze    7:  * purpose with or without fee is hereby granted, provided that the above
                      8:  * copyright notice and this permission notice appear in all copies.
1.1       kristaps    9:  *
1.130     schwarze   10:  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1.3       schwarze   11:  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1.130     schwarze   12:  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1.3       schwarze   13:  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
                     14:  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
                     15:  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
                     16:  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1.1       kristaps   17:  */
1.28      schwarze   18: #include <sys/types.h>
                     19:
1.1       kristaps   20: #include <assert.h>
1.101     schwarze   21: #include <ctype.h>
1.1       kristaps   22: #include <stdarg.h>
                     23: #include <stdio.h>
                     24: #include <stdlib.h>
                     25: #include <string.h>
1.39      schwarze   26: #include <time.h>
1.1       kristaps   27:
1.128     schwarze   28: #include "mandoc_aux.h"
                     29: #include "mandoc.h"
                     30: #include "roff.h"
1.83      schwarze   31: #include "mdoc.h"
1.128     schwarze   32: #include "libmandoc.h"
1.136     schwarze   33: #include "roff_int.h"
1.1       kristaps   34: #include "libmdoc.h"
                     35:
1.103     schwarze   36: const  char *const __mdoc_argnames[MDOC_ARG_MAX] = {
1.1       kristaps   37:        "split",                "nosplit",              "ragged",
1.103     schwarze   38:        "unfilled",             "literal",              "file",
                     39:        "offset",               "bullet",               "dash",
                     40:        "hyphen",               "item",                 "enum",
                     41:        "tag",                  "diag",                 "hang",
                     42:        "ohang",                "inset",                "column",
                     43:        "width",                "compact",              "std",
1.1       kristaps   44:        "filled",               "words",                "emphasis",
1.30      schwarze   45:        "symbolic",             "nested",               "centered"
1.151     schwarze   46: };
1.1       kristaps   47: const  char * const *mdoc_argnames = __mdoc_argnames;
                     48:
1.131     schwarze   49: static int               mdoc_ptext(struct roff_man *, int, char *, int);
                     50: static int               mdoc_pmacro(struct roff_man *, int, char *, int);
1.1       kristaps   51:
1.81      schwarze   52:
1.1       kristaps   53: /*
                     54:  * Main parse routine.  Parses a single line -- really just hands off to
1.45      schwarze   55:  * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()).
1.1       kristaps   56:  */
                     57: int
1.131     schwarze   58: mdoc_parseln(struct roff_man *mdoc, int ln, char *buf, int offs)
1.1       kristaps   59: {
                     60:
1.128     schwarze   61:        if (mdoc->last->type != ROFFT_EQN || ln > mdoc->last->line)
1.117     schwarze   62:                mdoc->flags |= MDOC_NEWLINE;
1.60      schwarze   63:
                     64:        /*
                     65:         * Let the roff nS register switch SYNOPSIS mode early,
                     66:         * such that the parser knows at all times
                     67:         * whether this mode is on or off.
                     68:         * Note that this mode is also switched by the Sh macro.
                     69:         */
1.94      schwarze   70:        if (roff_getreg(mdoc->roff, "nS"))
                     71:                mdoc->flags |= MDOC_SYNOPSIS;
                     72:        else
                     73:                mdoc->flags &= ~MDOC_SYNOPSIS;
1.60      schwarze   74:
1.142     schwarze   75:        return roff_getcontrol(mdoc->roff, buf, &offs) ?
1.103     schwarze   76:            mdoc_pmacro(mdoc, ln, buf, offs) :
1.142     schwarze   77:            mdoc_ptext(mdoc, ln, buf, offs);
1.1       kristaps   78: }
                     79:
1.121     schwarze   80: void
1.151     schwarze   81: mdoc_tail_alloc(struct roff_man *mdoc, int line, int pos, enum roff_tok tok)
1.1       kristaps   82: {
1.129     schwarze   83:        struct roff_node *p;
1.1       kristaps   84:
1.136     schwarze   85:        p = roff_node_alloc(mdoc, line, pos, ROFFT_TAIL, tok);
                     86:        roff_node_append(mdoc, p);
1.131     schwarze   87:        mdoc->next = ROFF_NEXT_CHILD;
1.59      schwarze   88: }
                     89:
1.129     schwarze   90: struct roff_node *
1.151     schwarze   91: mdoc_endbody_alloc(struct roff_man *mdoc, int line, int pos,
                     92:     enum roff_tok tok, struct roff_node *body)
1.59      schwarze   93: {
1.129     schwarze   94:        struct roff_node *p;
1.59      schwarze   95:
1.147     schwarze   96:        body->flags |= NODE_ENDED;
                     97:        body->parent->flags |= NODE_ENDED;
1.136     schwarze   98:        p = roff_node_alloc(mdoc, line, pos, ROFFT_BODY, tok);
1.126     schwarze   99:        p->body = body;
1.92      schwarze  100:        p->norm = body->norm;
1.149     schwarze  101:        p->end = ENDBODY_SPACE;
1.136     schwarze  102:        roff_node_append(mdoc, p);
1.131     schwarze  103:        mdoc->next = ROFF_NEXT_SIBLING;
1.142     schwarze  104:        return p;
1.1       kristaps  105: }
                    106:
1.129     schwarze  107: struct roff_node *
1.131     schwarze  108: mdoc_block_alloc(struct roff_man *mdoc, int line, int pos,
1.151     schwarze  109:     enum roff_tok tok, struct mdoc_arg *args)
1.1       kristaps  110: {
1.129     schwarze  111:        struct roff_node *p;
1.1       kristaps  112:
1.136     schwarze  113:        p = roff_node_alloc(mdoc, line, pos, ROFFT_BLOCK, tok);
1.4       schwarze  114:        p->args = args;
                    115:        if (p->args)
1.1       kristaps  116:                (args->refcnt)++;
1.75      schwarze  117:
                    118:        switch (tok) {
1.103     schwarze  119:        case MDOC_Bd:
                    120:        case MDOC_Bf:
                    121:        case MDOC_Bl:
1.107     schwarze  122:        case MDOC_En:
1.103     schwarze  123:        case MDOC_Rs:
1.75      schwarze  124:                p->norm = mandoc_calloc(1, sizeof(union mdoc_data));
                    125:                break;
                    126:        default:
                    127:                break;
                    128:        }
1.136     schwarze  129:        roff_node_append(mdoc, p);
1.131     schwarze  130:        mdoc->next = ROFF_NEXT_CHILD;
1.142     schwarze  131:        return p;
1.1       kristaps  132: }
                    133:
1.120     schwarze  134: void
1.131     schwarze  135: mdoc_elem_alloc(struct roff_man *mdoc, int line, int pos,
1.151     schwarze  136:      enum roff_tok tok, struct mdoc_arg *args)
1.1       kristaps  137: {
1.129     schwarze  138:        struct roff_node *p;
1.1       kristaps  139:
1.136     schwarze  140:        p = roff_node_alloc(mdoc, line, pos, ROFFT_ELEM, tok);
1.4       schwarze  141:        p->args = args;
                    142:        if (p->args)
1.1       kristaps  143:                (args->refcnt)++;
1.75      schwarze  144:
                    145:        switch (tok) {
1.103     schwarze  146:        case MDOC_An:
1.75      schwarze  147:                p->norm = mandoc_calloc(1, sizeof(union mdoc_data));
                    148:                break;
                    149:        default:
                    150:                break;
                    151:        }
1.136     schwarze  152:        roff_node_append(mdoc, p);
1.131     schwarze  153:        mdoc->next = ROFF_NEXT_CHILD;
1.1       kristaps  154: }
                    155:
                    156: /*
                    157:  * Parse free-form text, that is, a line that does not begin with the
                    158:  * control character.
                    159:  */
                    160: static int
1.131     schwarze  161: mdoc_ptext(struct roff_man *mdoc, int line, char *buf, int offs)
1.1       kristaps  162: {
1.129     schwarze  163:        struct roff_node *n;
1.156     schwarze  164:        const char       *cp, *sp;
1.56      schwarze  165:        char             *c, *ws, *end;
1.46      schwarze  166:
1.93      schwarze  167:        n = mdoc->last;
1.56      schwarze  168:
                    169:        /*
1.146     schwarze  170:         * If a column list contains plain text, assume an implicit item
                    171:         * macro.  This can happen one or more times at the beginning
                    172:         * of such a list, intermixed with non-It mdoc macros and with
                    173:         * nodes generated on the roff level, for example by tbl.
1.56      schwarze  174:         */
                    175:
1.146     schwarze  176:        if ((n->tok == MDOC_Bl && n->type == ROFFT_BODY &&
                    177:             n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) ||
                    178:            (n->parent != NULL && n->parent->tok == MDOC_Bl &&
                    179:             n->parent->norm->Bl.type == LIST_column)) {
1.93      schwarze  180:                mdoc->flags |= MDOC_FREECOL;
1.158     schwarze  181:                (*mdoc_macro(MDOC_It)->fp)(mdoc, MDOC_It,
                    182:                    line, offs, &offs, buf);
1.142     schwarze  183:                return 1;
1.56      schwarze  184:        }
                    185:
1.52      schwarze  186:        /*
                    187:         * Search for the beginning of unescaped trailing whitespace (ws)
                    188:         * and for the first character not to be output (end).
                    189:         */
1.56      schwarze  190:
                    191:        /* FIXME: replace with strcspn(). */
1.52      schwarze  192:        ws = NULL;
1.53      schwarze  193:        for (c = end = buf + offs; *c; c++) {
1.52      schwarze  194:                switch (*c) {
                    195:                case ' ':
                    196:                        if (NULL == ws)
                    197:                                ws = c;
                    198:                        continue;
                    199:                case '\t':
                    200:                        /*
                    201:                         * Always warn about trailing tabs,
                    202:                         * even outside literal context,
                    203:                         * where they should be put on the next line.
                    204:                         */
                    205:                        if (NULL == ws)
                    206:                                ws = c;
                    207:                        /*
                    208:                         * Strip trailing tabs in literal context only;
                    209:                         * outside, they affect the next line.
                    210:                         */
1.93      schwarze  211:                        if (MDOC_LITERAL & mdoc->flags)
1.52      schwarze  212:                                continue;
                    213:                        break;
                    214:                case '\\':
                    215:                        /* Skip the escaped character, too, if any. */
                    216:                        if (c[1])
                    217:                                c++;
                    218:                        /* FALLTHROUGH */
                    219:                default:
                    220:                        ws = NULL;
                    221:                        break;
                    222:                }
                    223:                end = c + 1;
                    224:        }
                    225:        *end = '\0';
1.19      schwarze  226:
1.52      schwarze  227:        if (ws)
1.108     schwarze  228:                mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse,
                    229:                    line, (int)(ws-buf), NULL);
1.34      schwarze  230:
1.156     schwarze  231:        /*
                    232:         * Blank lines are allowed in no-fill mode
                    233:         * and cancel preceding \c,
                    234:         * but add a single vertical space elsewhere.
                    235:         */
                    236:
1.120     schwarze  237:        if (buf[offs] == '\0' && ! (mdoc->flags & MDOC_LITERAL)) {
1.156     schwarze  238:                switch (mdoc->last->type) {
                    239:                case ROFFT_TEXT:
                    240:                        sp = mdoc->last->string;
                    241:                        cp = end = strchr(sp, '\0') - 2;
                    242:                        if (cp < sp || cp[0] != '\\' || cp[1] != 'c')
                    243:                                break;
                    244:                        while (cp > sp && cp[-1] == '\\')
                    245:                                cp--;
                    246:                        if ((end - cp) % 2)
                    247:                                break;
                    248:                        *end = '\0';
                    249:                        return 1;
                    250:                default:
                    251:                        break;
                    252:                }
1.108     schwarze  253:                mandoc_msg(MANDOCERR_FI_BLANK, mdoc->parse,
                    254:                    line, (int)(c - buf), NULL);
1.153     schwarze  255:                roff_elem_alloc(mdoc, line, offs, ROFF_sp);
1.147     schwarze  256:                mdoc->last->flags |= NODE_VALID | NODE_ENDED;
1.131     schwarze  257:                mdoc->next = ROFF_NEXT_SIBLING;
1.142     schwarze  258:                return 1;
1.38      schwarze  259:        }
1.1       kristaps  260:
1.138     schwarze  261:        roff_word_alloc(mdoc, line, offs, buf+offs);
1.1       kristaps  262:
1.120     schwarze  263:        if (mdoc->flags & MDOC_LITERAL)
1.142     schwarze  264:                return 1;
1.52      schwarze  265:
1.35      schwarze  266:        /*
1.48      schwarze  267:         * End-of-sentence check.  If the last character is an unescaped
                    268:         * EOS character, then flag the node as being the end of a
                    269:         * sentence.  The front-end will know how to interpret this.
1.35      schwarze  270:         */
                    271:
1.52      schwarze  272:        assert(buf < end);
1.48      schwarze  273:
1.97      schwarze  274:        if (mandoc_eos(buf+offs, (size_t)(end-buf-offs)))
1.147     schwarze  275:                mdoc->last->flags |= NODE_EOS;
1.148     schwarze  276:
                    277:        for (c = buf + offs; c != NULL; c = strchr(c + 1, '.')) {
                    278:                if (c - buf < offs + 2)
                    279:                        continue;
1.154     schwarze  280:                if (end - c < 3)
1.148     schwarze  281:                        break;
1.155     schwarze  282:                if (c[1] != ' ' ||
1.157     schwarze  283:                    isalnum((unsigned char)c[-2]) == 0 ||
                    284:                    isalnum((unsigned char)c[-1]) == 0 ||
1.155     schwarze  285:                    (c[-2] == 'n' && c[-1] == 'c') ||
                    286:                    (c[-2] == 'v' && c[-1] == 's'))
                    287:                        continue;
                    288:                c += 2;
                    289:                if (*c == ' ')
                    290:                        c++;
                    291:                if (*c == ' ')
                    292:                        c++;
                    293:                if (isupper((unsigned char)(*c)))
1.148     schwarze  294:                        mandoc_msg(MANDOCERR_EOS, mdoc->parse,
                    295:                            line, (int)(c - buf), NULL);
                    296:        }
                    297:
1.142     schwarze  298:        return 1;
1.1       kristaps  299: }
1.19      schwarze  300:
1.1       kristaps  301: /*
                    302:  * Parse a macro line, that is, a line beginning with the control
                    303:  * character.
                    304:  */
1.61      schwarze  305: static int
1.131     schwarze  306: mdoc_pmacro(struct roff_man *mdoc, int ln, char *buf, int offs)
1.1       kristaps  307: {
1.129     schwarze  308:        struct roff_node *n;
1.118     schwarze  309:        const char       *cp;
1.152     schwarze  310:        size_t            sz;
1.151     schwarze  311:        enum roff_tok     tok;
1.152     schwarze  312:        int               sv;
                    313:
                    314:        /* Determine the line macro. */
1.1       kristaps  315:
1.83      schwarze  316:        sv = offs;
1.152     schwarze  317:        tok = TOKEN_NONE;
                    318:        for (sz = 0; sz < 4 && strchr(" \t\\", buf[offs]) == NULL; sz++)
                    319:                offs++;
                    320:        if (sz == 2 || sz == 3)
                    321:                tok = roffhash_find(mdoc->mdocmac, buf + sv, sz);
1.137     schwarze  322:        if (tok == TOKEN_NONE) {
1.112     schwarze  323:                mandoc_msg(MANDOCERR_MACRO, mdoc->parse,
                    324:                    ln, sv, buf + sv - 1);
1.142     schwarze  325:                return 1;
1.1       kristaps  326:        }
                    327:
1.118     schwarze  328:        /* Skip a leading escape sequence or tab. */
1.63      schwarze  329:
1.118     schwarze  330:        switch (buf[offs]) {
                    331:        case '\\':
                    332:                cp = buf + offs + 1;
                    333:                mandoc_escape(&cp, NULL, NULL);
                    334:                offs = cp - buf;
                    335:                break;
                    336:        case '\t':
1.83      schwarze  337:                offs++;
1.118     schwarze  338:                break;
                    339:        default:
                    340:                break;
                    341:        }
1.63      schwarze  342:
                    343:        /* Jump to the next non-whitespace word. */
1.1       kristaps  344:
1.150     schwarze  345:        while (buf[offs] == ' ')
1.83      schwarze  346:                offs++;
1.34      schwarze  347:
1.103     schwarze  348:        /*
1.46      schwarze  349:         * Trailing whitespace.  Note that tabs are allowed to be passed
                    350:         * into the parser as "text", so we only warn about spaces here.
                    351:         */
1.34      schwarze  352:
1.83      schwarze  353:        if ('\0' == buf[offs] && ' ' == buf[offs - 1])
1.108     schwarze  354:                mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse,
                    355:                    ln, offs - 1, NULL);
1.1       kristaps  356:
1.56      schwarze  357:        /*
                    358:         * If an initial macro or a list invocation, divert directly
                    359:         * into macro processing.
                    360:         */
                    361:
1.146     schwarze  362:        n = mdoc->last;
                    363:        if (n == NULL || tok == MDOC_It || tok == MDOC_El) {
1.158     schwarze  364:                (*mdoc_macro(tok)->fp)(mdoc, tok, ln, sv, &offs, buf);
1.142     schwarze  365:                return 1;
1.121     schwarze  366:        }
1.56      schwarze  367:
                    368:        /*
1.146     schwarze  369:         * If a column list contains a non-It macro, assume an implicit
                    370:         * item macro.  This can happen one or more times at the
                    371:         * beginning of such a list, intermixed with text lines and
                    372:         * with nodes generated on the roff level, for example by tbl.
1.24      schwarze  373:         */
1.56      schwarze  374:
1.146     schwarze  375:        if ((n->tok == MDOC_Bl && n->type == ROFFT_BODY &&
                    376:             n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) ||
                    377:            (n->parent != NULL && n->parent->tok == MDOC_Bl &&
                    378:             n->parent->norm->Bl.type == LIST_column)) {
1.93      schwarze  379:                mdoc->flags |= MDOC_FREECOL;
1.158     schwarze  380:                (*mdoc_macro(MDOC_It)->fp)(mdoc, MDOC_It, ln, sv, &sv, buf);
1.142     schwarze  381:                return 1;
1.56      schwarze  382:        }
                    383:
                    384:        /* Normal processing of a macro. */
                    385:
1.158     schwarze  386:        (*mdoc_macro(tok)->fp)(mdoc, tok, ln, sv, &offs, buf);
1.98      schwarze  387:
                    388:        /* In quick mode (for mandocdb), abort after the NAME section. */
                    389:
                    390:        if (mdoc->quick && MDOC_Sh == tok &&
                    391:            SEC_NAME != mdoc->last->sec)
1.142     schwarze  392:                return 2;
1.1       kristaps  393:
1.142     schwarze  394:        return 1;
1.1       kristaps  395: }
1.24      schwarze  396:
1.83      schwarze  397: enum mdelim
                    398: mdoc_isdelim(const char *p)
                    399: {
                    400:
                    401:        if ('\0' == p[0])
1.142     schwarze  402:                return DELIM_NONE;
1.83      schwarze  403:
                    404:        if ('\0' == p[1])
                    405:                switch (p[0]) {
1.103     schwarze  406:                case '(':
                    407:                case '[':
1.142     schwarze  408:                        return DELIM_OPEN;
1.103     schwarze  409:                case '|':
1.142     schwarze  410:                        return DELIM_MIDDLE;
1.103     schwarze  411:                case '.':
                    412:                case ',':
                    413:                case ';':
                    414:                case ':':
                    415:                case '?':
                    416:                case '!':
                    417:                case ')':
                    418:                case ']':
1.142     schwarze  419:                        return DELIM_CLOSE;
1.83      schwarze  420:                default:
1.142     schwarze  421:                        return DELIM_NONE;
1.83      schwarze  422:                }
                    423:
                    424:        if ('\\' != p[0])
1.142     schwarze  425:                return DELIM_NONE;
1.24      schwarze  426:
1.83      schwarze  427:        if (0 == strcmp(p + 1, "."))
1.142     schwarze  428:                return DELIM_CLOSE;
1.90      schwarze  429:        if (0 == strcmp(p + 1, "fR|\\fP"))
1.142     schwarze  430:                return DELIM_MIDDLE;
1.83      schwarze  431:
1.142     schwarze  432:        return DELIM_NONE;
1.144     schwarze  433: }
                    434:
                    435: void
                    436: mdoc_validate(struct roff_man *mdoc)
                    437: {
                    438:
                    439:        mdoc->last = mdoc->first;
                    440:        mdoc_node_validate(mdoc);
                    441:        mdoc_state_reset(mdoc);
1.83      schwarze  442: }