[BACK]Return to mdoc.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / mandoc

Annotation of src/usr.bin/mandoc/mdoc.c, Revision 1.154

1.154   ! schwarze    1: /*     $OpenBSD: mdoc.c,v 1.153 2017/05/05 15:16:25 schwarze Exp $ */
1.1       kristaps    2: /*
1.77      schwarze    3:  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
1.148     schwarze    4:  * Copyright (c) 2010, 2012-2017 Ingo Schwarze <schwarze@openbsd.org>
1.1       kristaps    5:  *
                      6:  * Permission to use, copy, modify, and distribute this software for any
1.3       schwarze    7:  * purpose with or without fee is hereby granted, provided that the above
                      8:  * copyright notice and this permission notice appear in all copies.
1.1       kristaps    9:  *
1.130     schwarze   10:  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1.3       schwarze   11:  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1.130     schwarze   12:  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1.3       schwarze   13:  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
                     14:  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
                     15:  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
                     16:  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1.1       kristaps   17:  */
1.28      schwarze   18: #include <sys/types.h>
                     19:
1.1       kristaps   20: #include <assert.h>
1.101     schwarze   21: #include <ctype.h>
1.1       kristaps   22: #include <stdarg.h>
                     23: #include <stdio.h>
                     24: #include <stdlib.h>
                     25: #include <string.h>
1.39      schwarze   26: #include <time.h>
1.1       kristaps   27:
1.128     schwarze   28: #include "mandoc_aux.h"
                     29: #include "mandoc.h"
                     30: #include "roff.h"
1.83      schwarze   31: #include "mdoc.h"
1.128     schwarze   32: #include "libmandoc.h"
1.136     schwarze   33: #include "roff_int.h"
1.1       kristaps   34: #include "libmdoc.h"
                     35:
1.103     schwarze   36: const  char *const __mdoc_argnames[MDOC_ARG_MAX] = {
1.1       kristaps   37:        "split",                "nosplit",              "ragged",
1.103     schwarze   38:        "unfilled",             "literal",              "file",
                     39:        "offset",               "bullet",               "dash",
                     40:        "hyphen",               "item",                 "enum",
                     41:        "tag",                  "diag",                 "hang",
                     42:        "ohang",                "inset",                "column",
                     43:        "width",                "compact",              "std",
1.1       kristaps   44:        "filled",               "words",                "emphasis",
1.30      schwarze   45:        "symbolic",             "nested",               "centered"
1.151     schwarze   46: };
1.1       kristaps   47: const  char * const *mdoc_argnames = __mdoc_argnames;
                     48:
1.131     schwarze   49: static int               mdoc_ptext(struct roff_man *, int, char *, int);
                     50: static int               mdoc_pmacro(struct roff_man *, int, char *, int);
1.1       kristaps   51:
1.81      schwarze   52:
1.1       kristaps   53: /*
                     54:  * Main parse routine.  Parses a single line -- really just hands off to
1.45      schwarze   55:  * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()).
1.1       kristaps   56:  */
                     57: int
1.131     schwarze   58: mdoc_parseln(struct roff_man *mdoc, int ln, char *buf, int offs)
1.1       kristaps   59: {
                     60:
1.128     schwarze   61:        if (mdoc->last->type != ROFFT_EQN || ln > mdoc->last->line)
1.117     schwarze   62:                mdoc->flags |= MDOC_NEWLINE;
1.60      schwarze   63:
                     64:        /*
                     65:         * Let the roff nS register switch SYNOPSIS mode early,
                     66:         * such that the parser knows at all times
                     67:         * whether this mode is on or off.
                     68:         * Note that this mode is also switched by the Sh macro.
                     69:         */
1.94      schwarze   70:        if (roff_getreg(mdoc->roff, "nS"))
                     71:                mdoc->flags |= MDOC_SYNOPSIS;
                     72:        else
                     73:                mdoc->flags &= ~MDOC_SYNOPSIS;
1.60      schwarze   74:
1.142     schwarze   75:        return roff_getcontrol(mdoc->roff, buf, &offs) ?
1.103     schwarze   76:            mdoc_pmacro(mdoc, ln, buf, offs) :
1.142     schwarze   77:            mdoc_ptext(mdoc, ln, buf, offs);
1.1       kristaps   78: }
                     79:
1.121     schwarze   80: void
1.58      schwarze   81: mdoc_macro(MACRO_PROT_ARGS)
1.1       kristaps   82: {
1.151     schwarze   83:        assert(tok >= MDOC_Dd && tok < MDOC_MAX);
1.121     schwarze   84:        (*mdoc_macros[tok].fp)(mdoc, tok, line, ppos, pos, buf);
1.1       kristaps   85: }
                     86:
1.120     schwarze   87: void
1.151     schwarze   88: mdoc_tail_alloc(struct roff_man *mdoc, int line, int pos, enum roff_tok tok)
1.1       kristaps   89: {
1.129     schwarze   90:        struct roff_node *p;
1.1       kristaps   91:
1.136     schwarze   92:        p = roff_node_alloc(mdoc, line, pos, ROFFT_TAIL, tok);
                     93:        roff_node_append(mdoc, p);
1.131     schwarze   94:        mdoc->next = ROFF_NEXT_CHILD;
1.59      schwarze   95: }
                     96:
1.129     schwarze   97: struct roff_node *
1.151     schwarze   98: mdoc_endbody_alloc(struct roff_man *mdoc, int line, int pos,
                     99:     enum roff_tok tok, struct roff_node *body)
1.59      schwarze  100: {
1.129     schwarze  101:        struct roff_node *p;
1.59      schwarze  102:
1.147     schwarze  103:        body->flags |= NODE_ENDED;
                    104:        body->parent->flags |= NODE_ENDED;
1.136     schwarze  105:        p = roff_node_alloc(mdoc, line, pos, ROFFT_BODY, tok);
1.126     schwarze  106:        p->body = body;
1.92      schwarze  107:        p->norm = body->norm;
1.149     schwarze  108:        p->end = ENDBODY_SPACE;
1.136     schwarze  109:        roff_node_append(mdoc, p);
1.131     schwarze  110:        mdoc->next = ROFF_NEXT_SIBLING;
1.142     schwarze  111:        return p;
1.1       kristaps  112: }
                    113:
1.129     schwarze  114: struct roff_node *
1.131     schwarze  115: mdoc_block_alloc(struct roff_man *mdoc, int line, int pos,
1.151     schwarze  116:     enum roff_tok tok, struct mdoc_arg *args)
1.1       kristaps  117: {
1.129     schwarze  118:        struct roff_node *p;
1.1       kristaps  119:
1.136     schwarze  120:        p = roff_node_alloc(mdoc, line, pos, ROFFT_BLOCK, tok);
1.4       schwarze  121:        p->args = args;
                    122:        if (p->args)
1.1       kristaps  123:                (args->refcnt)++;
1.75      schwarze  124:
                    125:        switch (tok) {
1.103     schwarze  126:        case MDOC_Bd:
                    127:        case MDOC_Bf:
                    128:        case MDOC_Bl:
1.107     schwarze  129:        case MDOC_En:
1.103     schwarze  130:        case MDOC_Rs:
1.75      schwarze  131:                p->norm = mandoc_calloc(1, sizeof(union mdoc_data));
                    132:                break;
                    133:        default:
                    134:                break;
                    135:        }
1.136     schwarze  136:        roff_node_append(mdoc, p);
1.131     schwarze  137:        mdoc->next = ROFF_NEXT_CHILD;
1.142     schwarze  138:        return p;
1.1       kristaps  139: }
                    140:
1.120     schwarze  141: void
1.131     schwarze  142: mdoc_elem_alloc(struct roff_man *mdoc, int line, int pos,
1.151     schwarze  143:      enum roff_tok tok, struct mdoc_arg *args)
1.1       kristaps  144: {
1.129     schwarze  145:        struct roff_node *p;
1.1       kristaps  146:
1.136     schwarze  147:        p = roff_node_alloc(mdoc, line, pos, ROFFT_ELEM, tok);
1.4       schwarze  148:        p->args = args;
                    149:        if (p->args)
1.1       kristaps  150:                (args->refcnt)++;
1.75      schwarze  151:
                    152:        switch (tok) {
1.103     schwarze  153:        case MDOC_An:
1.75      schwarze  154:                p->norm = mandoc_calloc(1, sizeof(union mdoc_data));
                    155:                break;
                    156:        default:
                    157:                break;
                    158:        }
1.136     schwarze  159:        roff_node_append(mdoc, p);
1.131     schwarze  160:        mdoc->next = ROFF_NEXT_CHILD;
1.1       kristaps  161: }
                    162:
1.120     schwarze  163: void
1.131     schwarze  164: mdoc_node_relink(struct roff_man *mdoc, struct roff_node *p)
1.91      schwarze  165: {
                    166:
1.136     schwarze  167:        roff_node_unlink(mdoc, p);
1.145     schwarze  168:        p->prev = p->next = NULL;
1.136     schwarze  169:        roff_node_append(mdoc, p);
1.1       kristaps  170: }
                    171:
                    172: /*
                    173:  * Parse free-form text, that is, a line that does not begin with the
                    174:  * control character.
                    175:  */
                    176: static int
1.131     schwarze  177: mdoc_ptext(struct roff_man *mdoc, int line, char *buf, int offs)
1.1       kristaps  178: {
1.129     schwarze  179:        struct roff_node *n;
1.56      schwarze  180:        char             *c, *ws, *end;
1.46      schwarze  181:
1.93      schwarze  182:        n = mdoc->last;
1.56      schwarze  183:
                    184:        /*
1.146     schwarze  185:         * If a column list contains plain text, assume an implicit item
                    186:         * macro.  This can happen one or more times at the beginning
                    187:         * of such a list, intermixed with non-It mdoc macros and with
                    188:         * nodes generated on the roff level, for example by tbl.
1.56      schwarze  189:         */
                    190:
1.146     schwarze  191:        if ((n->tok == MDOC_Bl && n->type == ROFFT_BODY &&
                    192:             n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) ||
                    193:            (n->parent != NULL && n->parent->tok == MDOC_Bl &&
                    194:             n->parent->norm->Bl.type == LIST_column)) {
1.93      schwarze  195:                mdoc->flags |= MDOC_FREECOL;
1.121     schwarze  196:                mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf);
1.142     schwarze  197:                return 1;
1.56      schwarze  198:        }
                    199:
1.52      schwarze  200:        /*
                    201:         * Search for the beginning of unescaped trailing whitespace (ws)
                    202:         * and for the first character not to be output (end).
                    203:         */
1.56      schwarze  204:
                    205:        /* FIXME: replace with strcspn(). */
1.52      schwarze  206:        ws = NULL;
1.53      schwarze  207:        for (c = end = buf + offs; *c; c++) {
1.52      schwarze  208:                switch (*c) {
                    209:                case ' ':
                    210:                        if (NULL == ws)
                    211:                                ws = c;
                    212:                        continue;
                    213:                case '\t':
                    214:                        /*
                    215:                         * Always warn about trailing tabs,
                    216:                         * even outside literal context,
                    217:                         * where they should be put on the next line.
                    218:                         */
                    219:                        if (NULL == ws)
                    220:                                ws = c;
                    221:                        /*
                    222:                         * Strip trailing tabs in literal context only;
                    223:                         * outside, they affect the next line.
                    224:                         */
1.93      schwarze  225:                        if (MDOC_LITERAL & mdoc->flags)
1.52      schwarze  226:                                continue;
                    227:                        break;
                    228:                case '\\':
                    229:                        /* Skip the escaped character, too, if any. */
                    230:                        if (c[1])
                    231:                                c++;
                    232:                        /* FALLTHROUGH */
                    233:                default:
                    234:                        ws = NULL;
                    235:                        break;
                    236:                }
                    237:                end = c + 1;
                    238:        }
                    239:        *end = '\0';
1.19      schwarze  240:
1.52      schwarze  241:        if (ws)
1.108     schwarze  242:                mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse,
                    243:                    line, (int)(ws-buf), NULL);
1.34      schwarze  244:
1.120     schwarze  245:        if (buf[offs] == '\0' && ! (mdoc->flags & MDOC_LITERAL)) {
1.108     schwarze  246:                mandoc_msg(MANDOCERR_FI_BLANK, mdoc->parse,
                    247:                    line, (int)(c - buf), NULL);
1.46      schwarze  248:
1.40      schwarze  249:                /*
1.66      schwarze  250:                 * Insert a `sp' in the case of a blank line.  Technically,
1.46      schwarze  251:                 * blank lines aren't allowed, but enough manuals assume this
                    252:                 * behaviour that we want to work around it.
1.40      schwarze  253:                 */
1.153     schwarze  254:                roff_elem_alloc(mdoc, line, offs, ROFF_sp);
1.147     schwarze  255:                mdoc->last->flags |= NODE_VALID | NODE_ENDED;
1.131     schwarze  256:                mdoc->next = ROFF_NEXT_SIBLING;
1.142     schwarze  257:                return 1;
1.38      schwarze  258:        }
1.1       kristaps  259:
1.138     schwarze  260:        roff_word_alloc(mdoc, line, offs, buf+offs);
1.1       kristaps  261:
1.120     schwarze  262:        if (mdoc->flags & MDOC_LITERAL)
1.142     schwarze  263:                return 1;
1.52      schwarze  264:
1.35      schwarze  265:        /*
1.48      schwarze  266:         * End-of-sentence check.  If the last character is an unescaped
                    267:         * EOS character, then flag the node as being the end of a
                    268:         * sentence.  The front-end will know how to interpret this.
1.35      schwarze  269:         */
                    270:
1.52      schwarze  271:        assert(buf < end);
1.48      schwarze  272:
1.97      schwarze  273:        if (mandoc_eos(buf+offs, (size_t)(end-buf-offs)))
1.147     schwarze  274:                mdoc->last->flags |= NODE_EOS;
1.148     schwarze  275:
                    276:        for (c = buf + offs; c != NULL; c = strchr(c + 1, '.')) {
                    277:                if (c - buf < offs + 2)
                    278:                        continue;
1.154   ! schwarze  279:                if (end - c < 3)
1.148     schwarze  280:                        break;
                    281:                if (isalpha((unsigned char)c[-2]) &&
                    282:                    isalpha((unsigned char)c[-1]) &&
                    283:                    c[1] == ' ' &&
                    284:                    isupper((unsigned char)(c[2] == ' ' ? c[3] : c[2])) &&
                    285:                    (c[-2] != 'n' || c[-1] != 'c') &&
                    286:                    (c[-2] != 'v' || c[-1] != 's'))
                    287:                        mandoc_msg(MANDOCERR_EOS, mdoc->parse,
                    288:                            line, (int)(c - buf), NULL);
                    289:        }
                    290:
1.142     schwarze  291:        return 1;
1.1       kristaps  292: }
1.19      schwarze  293:
1.1       kristaps  294: /*
                    295:  * Parse a macro line, that is, a line beginning with the control
                    296:  * character.
                    297:  */
1.61      schwarze  298: static int
1.131     schwarze  299: mdoc_pmacro(struct roff_man *mdoc, int ln, char *buf, int offs)
1.1       kristaps  300: {
1.129     schwarze  301:        struct roff_node *n;
1.118     schwarze  302:        const char       *cp;
1.152     schwarze  303:        size_t            sz;
1.151     schwarze  304:        enum roff_tok     tok;
1.152     schwarze  305:        int               sv;
                    306:
                    307:        /* Determine the line macro. */
1.1       kristaps  308:
1.83      schwarze  309:        sv = offs;
1.152     schwarze  310:        tok = TOKEN_NONE;
                    311:        for (sz = 0; sz < 4 && strchr(" \t\\", buf[offs]) == NULL; sz++)
                    312:                offs++;
                    313:        if (sz == 2 || sz == 3)
                    314:                tok = roffhash_find(mdoc->mdocmac, buf + sv, sz);
1.137     schwarze  315:        if (tok == TOKEN_NONE) {
1.112     schwarze  316:                mandoc_msg(MANDOCERR_MACRO, mdoc->parse,
                    317:                    ln, sv, buf + sv - 1);
1.142     schwarze  318:                return 1;
1.1       kristaps  319:        }
                    320:
1.118     schwarze  321:        /* Skip a leading escape sequence or tab. */
1.63      schwarze  322:
1.118     schwarze  323:        switch (buf[offs]) {
                    324:        case '\\':
                    325:                cp = buf + offs + 1;
                    326:                mandoc_escape(&cp, NULL, NULL);
                    327:                offs = cp - buf;
                    328:                break;
                    329:        case '\t':
1.83      schwarze  330:                offs++;
1.118     schwarze  331:                break;
                    332:        default:
                    333:                break;
                    334:        }
1.63      schwarze  335:
                    336:        /* Jump to the next non-whitespace word. */
1.1       kristaps  337:
1.150     schwarze  338:        while (buf[offs] == ' ')
1.83      schwarze  339:                offs++;
1.34      schwarze  340:
1.103     schwarze  341:        /*
1.46      schwarze  342:         * Trailing whitespace.  Note that tabs are allowed to be passed
                    343:         * into the parser as "text", so we only warn about spaces here.
                    344:         */
1.34      schwarze  345:
1.83      schwarze  346:        if ('\0' == buf[offs] && ' ' == buf[offs - 1])
1.108     schwarze  347:                mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse,
                    348:                    ln, offs - 1, NULL);
1.1       kristaps  349:
1.56      schwarze  350:        /*
                    351:         * If an initial macro or a list invocation, divert directly
                    352:         * into macro processing.
                    353:         */
                    354:
1.146     schwarze  355:        n = mdoc->last;
                    356:        if (n == NULL || tok == MDOC_It || tok == MDOC_El) {
1.121     schwarze  357:                mdoc_macro(mdoc, tok, ln, sv, &offs, buf);
1.142     schwarze  358:                return 1;
1.121     schwarze  359:        }
1.56      schwarze  360:
                    361:        /*
1.146     schwarze  362:         * If a column list contains a non-It macro, assume an implicit
                    363:         * item macro.  This can happen one or more times at the
                    364:         * beginning of such a list, intermixed with text lines and
                    365:         * with nodes generated on the roff level, for example by tbl.
1.24      schwarze  366:         */
1.56      schwarze  367:
1.146     schwarze  368:        if ((n->tok == MDOC_Bl && n->type == ROFFT_BODY &&
                    369:             n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) ||
                    370:            (n->parent != NULL && n->parent->tok == MDOC_Bl &&
                    371:             n->parent->norm->Bl.type == LIST_column)) {
1.93      schwarze  372:                mdoc->flags |= MDOC_FREECOL;
1.121     schwarze  373:                mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf);
1.142     schwarze  374:                return 1;
1.56      schwarze  375:        }
                    376:
                    377:        /* Normal processing of a macro. */
                    378:
1.121     schwarze  379:        mdoc_macro(mdoc, tok, ln, sv, &offs, buf);
1.98      schwarze  380:
                    381:        /* In quick mode (for mandocdb), abort after the NAME section. */
                    382:
                    383:        if (mdoc->quick && MDOC_Sh == tok &&
                    384:            SEC_NAME != mdoc->last->sec)
1.142     schwarze  385:                return 2;
1.1       kristaps  386:
1.142     schwarze  387:        return 1;
1.1       kristaps  388: }
1.24      schwarze  389:
1.83      schwarze  390: enum mdelim
                    391: mdoc_isdelim(const char *p)
                    392: {
                    393:
                    394:        if ('\0' == p[0])
1.142     schwarze  395:                return DELIM_NONE;
1.83      schwarze  396:
                    397:        if ('\0' == p[1])
                    398:                switch (p[0]) {
1.103     schwarze  399:                case '(':
                    400:                case '[':
1.142     schwarze  401:                        return DELIM_OPEN;
1.103     schwarze  402:                case '|':
1.142     schwarze  403:                        return DELIM_MIDDLE;
1.103     schwarze  404:                case '.':
                    405:                case ',':
                    406:                case ';':
                    407:                case ':':
                    408:                case '?':
                    409:                case '!':
                    410:                case ')':
                    411:                case ']':
1.142     schwarze  412:                        return DELIM_CLOSE;
1.83      schwarze  413:                default:
1.142     schwarze  414:                        return DELIM_NONE;
1.83      schwarze  415:                }
                    416:
                    417:        if ('\\' != p[0])
1.142     schwarze  418:                return DELIM_NONE;
1.24      schwarze  419:
1.83      schwarze  420:        if (0 == strcmp(p + 1, "."))
1.142     schwarze  421:                return DELIM_CLOSE;
1.90      schwarze  422:        if (0 == strcmp(p + 1, "fR|\\fP"))
1.142     schwarze  423:                return DELIM_MIDDLE;
1.83      schwarze  424:
1.142     schwarze  425:        return DELIM_NONE;
1.144     schwarze  426: }
                    427:
                    428: void
                    429: mdoc_validate(struct roff_man *mdoc)
                    430: {
                    431:
                    432:        mdoc->last = mdoc->first;
                    433:        mdoc_node_validate(mdoc);
                    434:        mdoc_state_reset(mdoc);
1.83      schwarze  435: }