[BACK]Return to mdoc.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / mandoc

Annotation of src/usr.bin/mandoc/mdoc.c, Revision 1.156

1.156   ! schwarze    1: /*     $OpenBSD: mdoc.c,v 1.155 2017/06/07 20:58:36 schwarze Exp $ */
1.1       kristaps    2: /*
1.77      schwarze    3:  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
1.148     schwarze    4:  * Copyright (c) 2010, 2012-2017 Ingo Schwarze <schwarze@openbsd.org>
1.1       kristaps    5:  *
                      6:  * Permission to use, copy, modify, and distribute this software for any
1.3       schwarze    7:  * purpose with or without fee is hereby granted, provided that the above
                      8:  * copyright notice and this permission notice appear in all copies.
1.1       kristaps    9:  *
1.130     schwarze   10:  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1.3       schwarze   11:  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1.130     schwarze   12:  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1.3       schwarze   13:  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
                     14:  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
                     15:  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
                     16:  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1.1       kristaps   17:  */
1.28      schwarze   18: #include <sys/types.h>
                     19:
1.1       kristaps   20: #include <assert.h>
1.101     schwarze   21: #include <ctype.h>
1.1       kristaps   22: #include <stdarg.h>
                     23: #include <stdio.h>
                     24: #include <stdlib.h>
                     25: #include <string.h>
1.39      schwarze   26: #include <time.h>
1.1       kristaps   27:
1.128     schwarze   28: #include "mandoc_aux.h"
                     29: #include "mandoc.h"
                     30: #include "roff.h"
1.83      schwarze   31: #include "mdoc.h"
1.128     schwarze   32: #include "libmandoc.h"
1.136     schwarze   33: #include "roff_int.h"
1.1       kristaps   34: #include "libmdoc.h"
                     35:
1.103     schwarze   36: const  char *const __mdoc_argnames[MDOC_ARG_MAX] = {
1.1       kristaps   37:        "split",                "nosplit",              "ragged",
1.103     schwarze   38:        "unfilled",             "literal",              "file",
                     39:        "offset",               "bullet",               "dash",
                     40:        "hyphen",               "item",                 "enum",
                     41:        "tag",                  "diag",                 "hang",
                     42:        "ohang",                "inset",                "column",
                     43:        "width",                "compact",              "std",
1.1       kristaps   44:        "filled",               "words",                "emphasis",
1.30      schwarze   45:        "symbolic",             "nested",               "centered"
1.151     schwarze   46: };
1.1       kristaps   47: const  char * const *mdoc_argnames = __mdoc_argnames;
                     48:
1.131     schwarze   49: static int               mdoc_ptext(struct roff_man *, int, char *, int);
                     50: static int               mdoc_pmacro(struct roff_man *, int, char *, int);
1.1       kristaps   51:
1.81      schwarze   52:
1.1       kristaps   53: /*
                     54:  * Main parse routine.  Parses a single line -- really just hands off to
1.45      schwarze   55:  * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()).
1.1       kristaps   56:  */
                     57: int
1.131     schwarze   58: mdoc_parseln(struct roff_man *mdoc, int ln, char *buf, int offs)
1.1       kristaps   59: {
                     60:
1.128     schwarze   61:        if (mdoc->last->type != ROFFT_EQN || ln > mdoc->last->line)
1.117     schwarze   62:                mdoc->flags |= MDOC_NEWLINE;
1.60      schwarze   63:
                     64:        /*
                     65:         * Let the roff nS register switch SYNOPSIS mode early,
                     66:         * such that the parser knows at all times
                     67:         * whether this mode is on or off.
                     68:         * Note that this mode is also switched by the Sh macro.
                     69:         */
1.94      schwarze   70:        if (roff_getreg(mdoc->roff, "nS"))
                     71:                mdoc->flags |= MDOC_SYNOPSIS;
                     72:        else
                     73:                mdoc->flags &= ~MDOC_SYNOPSIS;
1.60      schwarze   74:
1.142     schwarze   75:        return roff_getcontrol(mdoc->roff, buf, &offs) ?
1.103     schwarze   76:            mdoc_pmacro(mdoc, ln, buf, offs) :
1.142     schwarze   77:            mdoc_ptext(mdoc, ln, buf, offs);
1.1       kristaps   78: }
                     79:
1.121     schwarze   80: void
1.58      schwarze   81: mdoc_macro(MACRO_PROT_ARGS)
1.1       kristaps   82: {
1.151     schwarze   83:        assert(tok >= MDOC_Dd && tok < MDOC_MAX);
1.121     schwarze   84:        (*mdoc_macros[tok].fp)(mdoc, tok, line, ppos, pos, buf);
1.1       kristaps   85: }
                     86:
1.120     schwarze   87: void
1.151     schwarze   88: mdoc_tail_alloc(struct roff_man *mdoc, int line, int pos, enum roff_tok tok)
1.1       kristaps   89: {
1.129     schwarze   90:        struct roff_node *p;
1.1       kristaps   91:
1.136     schwarze   92:        p = roff_node_alloc(mdoc, line, pos, ROFFT_TAIL, tok);
                     93:        roff_node_append(mdoc, p);
1.131     schwarze   94:        mdoc->next = ROFF_NEXT_CHILD;
1.59      schwarze   95: }
                     96:
1.129     schwarze   97: struct roff_node *
1.151     schwarze   98: mdoc_endbody_alloc(struct roff_man *mdoc, int line, int pos,
                     99:     enum roff_tok tok, struct roff_node *body)
1.59      schwarze  100: {
1.129     schwarze  101:        struct roff_node *p;
1.59      schwarze  102:
1.147     schwarze  103:        body->flags |= NODE_ENDED;
                    104:        body->parent->flags |= NODE_ENDED;
1.136     schwarze  105:        p = roff_node_alloc(mdoc, line, pos, ROFFT_BODY, tok);
1.126     schwarze  106:        p->body = body;
1.92      schwarze  107:        p->norm = body->norm;
1.149     schwarze  108:        p->end = ENDBODY_SPACE;
1.136     schwarze  109:        roff_node_append(mdoc, p);
1.131     schwarze  110:        mdoc->next = ROFF_NEXT_SIBLING;
1.142     schwarze  111:        return p;
1.1       kristaps  112: }
                    113:
1.129     schwarze  114: struct roff_node *
1.131     schwarze  115: mdoc_block_alloc(struct roff_man *mdoc, int line, int pos,
1.151     schwarze  116:     enum roff_tok tok, struct mdoc_arg *args)
1.1       kristaps  117: {
1.129     schwarze  118:        struct roff_node *p;
1.1       kristaps  119:
1.136     schwarze  120:        p = roff_node_alloc(mdoc, line, pos, ROFFT_BLOCK, tok);
1.4       schwarze  121:        p->args = args;
                    122:        if (p->args)
1.1       kristaps  123:                (args->refcnt)++;
1.75      schwarze  124:
                    125:        switch (tok) {
1.103     schwarze  126:        case MDOC_Bd:
                    127:        case MDOC_Bf:
                    128:        case MDOC_Bl:
1.107     schwarze  129:        case MDOC_En:
1.103     schwarze  130:        case MDOC_Rs:
1.75      schwarze  131:                p->norm = mandoc_calloc(1, sizeof(union mdoc_data));
                    132:                break;
                    133:        default:
                    134:                break;
                    135:        }
1.136     schwarze  136:        roff_node_append(mdoc, p);
1.131     schwarze  137:        mdoc->next = ROFF_NEXT_CHILD;
1.142     schwarze  138:        return p;
1.1       kristaps  139: }
                    140:
1.120     schwarze  141: void
1.131     schwarze  142: mdoc_elem_alloc(struct roff_man *mdoc, int line, int pos,
1.151     schwarze  143:      enum roff_tok tok, struct mdoc_arg *args)
1.1       kristaps  144: {
1.129     schwarze  145:        struct roff_node *p;
1.1       kristaps  146:
1.136     schwarze  147:        p = roff_node_alloc(mdoc, line, pos, ROFFT_ELEM, tok);
1.4       schwarze  148:        p->args = args;
                    149:        if (p->args)
1.1       kristaps  150:                (args->refcnt)++;
1.75      schwarze  151:
                    152:        switch (tok) {
1.103     schwarze  153:        case MDOC_An:
1.75      schwarze  154:                p->norm = mandoc_calloc(1, sizeof(union mdoc_data));
                    155:                break;
                    156:        default:
                    157:                break;
                    158:        }
1.136     schwarze  159:        roff_node_append(mdoc, p);
1.131     schwarze  160:        mdoc->next = ROFF_NEXT_CHILD;
1.1       kristaps  161: }
                    162:
1.120     schwarze  163: void
1.131     schwarze  164: mdoc_node_relink(struct roff_man *mdoc, struct roff_node *p)
1.91      schwarze  165: {
                    166:
1.136     schwarze  167:        roff_node_unlink(mdoc, p);
1.145     schwarze  168:        p->prev = p->next = NULL;
1.136     schwarze  169:        roff_node_append(mdoc, p);
1.1       kristaps  170: }
                    171:
                    172: /*
                    173:  * Parse free-form text, that is, a line that does not begin with the
                    174:  * control character.
                    175:  */
                    176: static int
1.131     schwarze  177: mdoc_ptext(struct roff_man *mdoc, int line, char *buf, int offs)
1.1       kristaps  178: {
1.129     schwarze  179:        struct roff_node *n;
1.156   ! schwarze  180:        const char       *cp, *sp;
1.56      schwarze  181:        char             *c, *ws, *end;
1.46      schwarze  182:
1.93      schwarze  183:        n = mdoc->last;
1.56      schwarze  184:
                    185:        /*
1.146     schwarze  186:         * If a column list contains plain text, assume an implicit item
                    187:         * macro.  This can happen one or more times at the beginning
                    188:         * of such a list, intermixed with non-It mdoc macros and with
                    189:         * nodes generated on the roff level, for example by tbl.
1.56      schwarze  190:         */
                    191:
1.146     schwarze  192:        if ((n->tok == MDOC_Bl && n->type == ROFFT_BODY &&
                    193:             n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) ||
                    194:            (n->parent != NULL && n->parent->tok == MDOC_Bl &&
                    195:             n->parent->norm->Bl.type == LIST_column)) {
1.93      schwarze  196:                mdoc->flags |= MDOC_FREECOL;
1.121     schwarze  197:                mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf);
1.142     schwarze  198:                return 1;
1.56      schwarze  199:        }
                    200:
1.52      schwarze  201:        /*
                    202:         * Search for the beginning of unescaped trailing whitespace (ws)
                    203:         * and for the first character not to be output (end).
                    204:         */
1.56      schwarze  205:
                    206:        /* FIXME: replace with strcspn(). */
1.52      schwarze  207:        ws = NULL;
1.53      schwarze  208:        for (c = end = buf + offs; *c; c++) {
1.52      schwarze  209:                switch (*c) {
                    210:                case ' ':
                    211:                        if (NULL == ws)
                    212:                                ws = c;
                    213:                        continue;
                    214:                case '\t':
                    215:                        /*
                    216:                         * Always warn about trailing tabs,
                    217:                         * even outside literal context,
                    218:                         * where they should be put on the next line.
                    219:                         */
                    220:                        if (NULL == ws)
                    221:                                ws = c;
                    222:                        /*
                    223:                         * Strip trailing tabs in literal context only;
                    224:                         * outside, they affect the next line.
                    225:                         */
1.93      schwarze  226:                        if (MDOC_LITERAL & mdoc->flags)
1.52      schwarze  227:                                continue;
                    228:                        break;
                    229:                case '\\':
                    230:                        /* Skip the escaped character, too, if any. */
                    231:                        if (c[1])
                    232:                                c++;
                    233:                        /* FALLTHROUGH */
                    234:                default:
                    235:                        ws = NULL;
                    236:                        break;
                    237:                }
                    238:                end = c + 1;
                    239:        }
                    240:        *end = '\0';
1.19      schwarze  241:
1.52      schwarze  242:        if (ws)
1.108     schwarze  243:                mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse,
                    244:                    line, (int)(ws-buf), NULL);
1.34      schwarze  245:
1.156   ! schwarze  246:        /*
        !           247:         * Blank lines are allowed in no-fill mode
        !           248:         * and cancel preceding \c,
        !           249:         * but add a single vertical space elsewhere.
        !           250:         */
        !           251:
1.120     schwarze  252:        if (buf[offs] == '\0' && ! (mdoc->flags & MDOC_LITERAL)) {
1.156   ! schwarze  253:                switch (mdoc->last->type) {
        !           254:                case ROFFT_TEXT:
        !           255:                        sp = mdoc->last->string;
        !           256:                        cp = end = strchr(sp, '\0') - 2;
        !           257:                        if (cp < sp || cp[0] != '\\' || cp[1] != 'c')
        !           258:                                break;
        !           259:                        while (cp > sp && cp[-1] == '\\')
        !           260:                                cp--;
        !           261:                        if ((end - cp) % 2)
        !           262:                                break;
        !           263:                        *end = '\0';
        !           264:                        return 1;
        !           265:                default:
        !           266:                        break;
        !           267:                }
1.108     schwarze  268:                mandoc_msg(MANDOCERR_FI_BLANK, mdoc->parse,
                    269:                    line, (int)(c - buf), NULL);
1.153     schwarze  270:                roff_elem_alloc(mdoc, line, offs, ROFF_sp);
1.147     schwarze  271:                mdoc->last->flags |= NODE_VALID | NODE_ENDED;
1.131     schwarze  272:                mdoc->next = ROFF_NEXT_SIBLING;
1.142     schwarze  273:                return 1;
1.38      schwarze  274:        }
1.1       kristaps  275:
1.138     schwarze  276:        roff_word_alloc(mdoc, line, offs, buf+offs);
1.1       kristaps  277:
1.120     schwarze  278:        if (mdoc->flags & MDOC_LITERAL)
1.142     schwarze  279:                return 1;
1.52      schwarze  280:
1.35      schwarze  281:        /*
1.48      schwarze  282:         * End-of-sentence check.  If the last character is an unescaped
                    283:         * EOS character, then flag the node as being the end of a
                    284:         * sentence.  The front-end will know how to interpret this.
1.35      schwarze  285:         */
                    286:
1.52      schwarze  287:        assert(buf < end);
1.48      schwarze  288:
1.97      schwarze  289:        if (mandoc_eos(buf+offs, (size_t)(end-buf-offs)))
1.147     schwarze  290:                mdoc->last->flags |= NODE_EOS;
1.148     schwarze  291:
                    292:        for (c = buf + offs; c != NULL; c = strchr(c + 1, '.')) {
                    293:                if (c - buf < offs + 2)
                    294:                        continue;
1.154     schwarze  295:                if (end - c < 3)
1.148     schwarze  296:                        break;
1.155     schwarze  297:                if (c[1] != ' ' ||
                    298:                    isalpha((unsigned char)c[-2]) == 0 ||
                    299:                    isalpha((unsigned char)c[-1]) == 0 ||
                    300:                    (c[-2] == 'n' && c[-1] == 'c') ||
                    301:                    (c[-2] == 'v' && c[-1] == 's'))
                    302:                        continue;
                    303:                c += 2;
                    304:                if (*c == ' ')
                    305:                        c++;
                    306:                if (*c == ' ')
                    307:                        c++;
                    308:                if (isupper((unsigned char)(*c)))
1.148     schwarze  309:                        mandoc_msg(MANDOCERR_EOS, mdoc->parse,
                    310:                            line, (int)(c - buf), NULL);
                    311:        }
                    312:
1.142     schwarze  313:        return 1;
1.1       kristaps  314: }
1.19      schwarze  315:
1.1       kristaps  316: /*
                    317:  * Parse a macro line, that is, a line beginning with the control
                    318:  * character.
                    319:  */
1.61      schwarze  320: static int
1.131     schwarze  321: mdoc_pmacro(struct roff_man *mdoc, int ln, char *buf, int offs)
1.1       kristaps  322: {
1.129     schwarze  323:        struct roff_node *n;
1.118     schwarze  324:        const char       *cp;
1.152     schwarze  325:        size_t            sz;
1.151     schwarze  326:        enum roff_tok     tok;
1.152     schwarze  327:        int               sv;
                    328:
                    329:        /* Determine the line macro. */
1.1       kristaps  330:
1.83      schwarze  331:        sv = offs;
1.152     schwarze  332:        tok = TOKEN_NONE;
                    333:        for (sz = 0; sz < 4 && strchr(" \t\\", buf[offs]) == NULL; sz++)
                    334:                offs++;
                    335:        if (sz == 2 || sz == 3)
                    336:                tok = roffhash_find(mdoc->mdocmac, buf + sv, sz);
1.137     schwarze  337:        if (tok == TOKEN_NONE) {
1.112     schwarze  338:                mandoc_msg(MANDOCERR_MACRO, mdoc->parse,
                    339:                    ln, sv, buf + sv - 1);
1.142     schwarze  340:                return 1;
1.1       kristaps  341:        }
                    342:
1.118     schwarze  343:        /* Skip a leading escape sequence or tab. */
1.63      schwarze  344:
1.118     schwarze  345:        switch (buf[offs]) {
                    346:        case '\\':
                    347:                cp = buf + offs + 1;
                    348:                mandoc_escape(&cp, NULL, NULL);
                    349:                offs = cp - buf;
                    350:                break;
                    351:        case '\t':
1.83      schwarze  352:                offs++;
1.118     schwarze  353:                break;
                    354:        default:
                    355:                break;
                    356:        }
1.63      schwarze  357:
                    358:        /* Jump to the next non-whitespace word. */
1.1       kristaps  359:
1.150     schwarze  360:        while (buf[offs] == ' ')
1.83      schwarze  361:                offs++;
1.34      schwarze  362:
1.103     schwarze  363:        /*
1.46      schwarze  364:         * Trailing whitespace.  Note that tabs are allowed to be passed
                    365:         * into the parser as "text", so we only warn about spaces here.
                    366:         */
1.34      schwarze  367:
1.83      schwarze  368:        if ('\0' == buf[offs] && ' ' == buf[offs - 1])
1.108     schwarze  369:                mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse,
                    370:                    ln, offs - 1, NULL);
1.1       kristaps  371:
1.56      schwarze  372:        /*
                    373:         * If an initial macro or a list invocation, divert directly
                    374:         * into macro processing.
                    375:         */
                    376:
1.146     schwarze  377:        n = mdoc->last;
                    378:        if (n == NULL || tok == MDOC_It || tok == MDOC_El) {
1.121     schwarze  379:                mdoc_macro(mdoc, tok, ln, sv, &offs, buf);
1.142     schwarze  380:                return 1;
1.121     schwarze  381:        }
1.56      schwarze  382:
                    383:        /*
1.146     schwarze  384:         * If a column list contains a non-It macro, assume an implicit
                    385:         * item macro.  This can happen one or more times at the
                    386:         * beginning of such a list, intermixed with text lines and
                    387:         * with nodes generated on the roff level, for example by tbl.
1.24      schwarze  388:         */
1.56      schwarze  389:
1.146     schwarze  390:        if ((n->tok == MDOC_Bl && n->type == ROFFT_BODY &&
                    391:             n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) ||
                    392:            (n->parent != NULL && n->parent->tok == MDOC_Bl &&
                    393:             n->parent->norm->Bl.type == LIST_column)) {
1.93      schwarze  394:                mdoc->flags |= MDOC_FREECOL;
1.121     schwarze  395:                mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf);
1.142     schwarze  396:                return 1;
1.56      schwarze  397:        }
                    398:
                    399:        /* Normal processing of a macro. */
                    400:
1.121     schwarze  401:        mdoc_macro(mdoc, tok, ln, sv, &offs, buf);
1.98      schwarze  402:
                    403:        /* In quick mode (for mandocdb), abort after the NAME section. */
                    404:
                    405:        if (mdoc->quick && MDOC_Sh == tok &&
                    406:            SEC_NAME != mdoc->last->sec)
1.142     schwarze  407:                return 2;
1.1       kristaps  408:
1.142     schwarze  409:        return 1;
1.1       kristaps  410: }
1.24      schwarze  411:
1.83      schwarze  412: enum mdelim
                    413: mdoc_isdelim(const char *p)
                    414: {
                    415:
                    416:        if ('\0' == p[0])
1.142     schwarze  417:                return DELIM_NONE;
1.83      schwarze  418:
                    419:        if ('\0' == p[1])
                    420:                switch (p[0]) {
1.103     schwarze  421:                case '(':
                    422:                case '[':
1.142     schwarze  423:                        return DELIM_OPEN;
1.103     schwarze  424:                case '|':
1.142     schwarze  425:                        return DELIM_MIDDLE;
1.103     schwarze  426:                case '.':
                    427:                case ',':
                    428:                case ';':
                    429:                case ':':
                    430:                case '?':
                    431:                case '!':
                    432:                case ')':
                    433:                case ']':
1.142     schwarze  434:                        return DELIM_CLOSE;
1.83      schwarze  435:                default:
1.142     schwarze  436:                        return DELIM_NONE;
1.83      schwarze  437:                }
                    438:
                    439:        if ('\\' != p[0])
1.142     schwarze  440:                return DELIM_NONE;
1.24      schwarze  441:
1.83      schwarze  442:        if (0 == strcmp(p + 1, "."))
1.142     schwarze  443:                return DELIM_CLOSE;
1.90      schwarze  444:        if (0 == strcmp(p + 1, "fR|\\fP"))
1.142     schwarze  445:                return DELIM_MIDDLE;
1.83      schwarze  446:
1.142     schwarze  447:        return DELIM_NONE;
1.144     schwarze  448: }
                    449:
                    450: void
                    451: mdoc_validate(struct roff_man *mdoc)
                    452: {
                    453:
                    454:        mdoc->last = mdoc->first;
                    455:        mdoc_node_validate(mdoc);
                    456:        mdoc_state_reset(mdoc);
1.83      schwarze  457: }