[BACK]Return to mdoc.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / mandoc

Annotation of src/usr.bin/mandoc/mdoc.c, Revision 1.160

1.160   ! schwarze    1: /*     $OpenBSD: mdoc.c,v 1.159 2018/12/04 02:53:45 schwarze Exp $ */
1.1       kristaps    2: /*
1.77      schwarze    3:  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
1.158     schwarze    4:  * Copyright (c) 2010, 2012-2018 Ingo Schwarze <schwarze@openbsd.org>
1.1       kristaps    5:  *
                      6:  * Permission to use, copy, modify, and distribute this software for any
1.3       schwarze    7:  * purpose with or without fee is hereby granted, provided that the above
                      8:  * copyright notice and this permission notice appear in all copies.
1.1       kristaps    9:  *
1.130     schwarze   10:  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1.3       schwarze   11:  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1.130     schwarze   12:  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1.3       schwarze   13:  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
                     14:  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
                     15:  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
                     16:  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1.1       kristaps   17:  */
1.28      schwarze   18: #include <sys/types.h>
                     19:
1.1       kristaps   20: #include <assert.h>
1.101     schwarze   21: #include <ctype.h>
1.1       kristaps   22: #include <stdarg.h>
                     23: #include <stdio.h>
                     24: #include <stdlib.h>
                     25: #include <string.h>
1.39      schwarze   26: #include <time.h>
1.1       kristaps   27:
1.128     schwarze   28: #include "mandoc_aux.h"
                     29: #include "mandoc.h"
                     30: #include "roff.h"
1.83      schwarze   31: #include "mdoc.h"
1.128     schwarze   32: #include "libmandoc.h"
1.136     schwarze   33: #include "roff_int.h"
1.1       kristaps   34: #include "libmdoc.h"
                     35:
1.103     schwarze   36: const  char *const __mdoc_argnames[MDOC_ARG_MAX] = {
1.1       kristaps   37:        "split",                "nosplit",              "ragged",
1.103     schwarze   38:        "unfilled",             "literal",              "file",
                     39:        "offset",               "bullet",               "dash",
                     40:        "hyphen",               "item",                 "enum",
                     41:        "tag",                  "diag",                 "hang",
                     42:        "ohang",                "inset",                "column",
                     43:        "width",                "compact",              "std",
1.1       kristaps   44:        "filled",               "words",                "emphasis",
1.30      schwarze   45:        "symbolic",             "nested",               "centered"
1.151     schwarze   46: };
1.1       kristaps   47: const  char * const *mdoc_argnames = __mdoc_argnames;
                     48:
1.131     schwarze   49: static int               mdoc_ptext(struct roff_man *, int, char *, int);
                     50: static int               mdoc_pmacro(struct roff_man *, int, char *, int);
1.1       kristaps   51:
1.81      schwarze   52:
1.1       kristaps   53: /*
                     54:  * Main parse routine.  Parses a single line -- really just hands off to
1.45      schwarze   55:  * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()).
1.1       kristaps   56:  */
                     57: int
1.131     schwarze   58: mdoc_parseln(struct roff_man *mdoc, int ln, char *buf, int offs)
1.1       kristaps   59: {
                     60:
1.128     schwarze   61:        if (mdoc->last->type != ROFFT_EQN || ln > mdoc->last->line)
1.117     schwarze   62:                mdoc->flags |= MDOC_NEWLINE;
1.60      schwarze   63:
                     64:        /*
                     65:         * Let the roff nS register switch SYNOPSIS mode early,
                     66:         * such that the parser knows at all times
                     67:         * whether this mode is on or off.
                     68:         * Note that this mode is also switched by the Sh macro.
                     69:         */
1.94      schwarze   70:        if (roff_getreg(mdoc->roff, "nS"))
                     71:                mdoc->flags |= MDOC_SYNOPSIS;
                     72:        else
                     73:                mdoc->flags &= ~MDOC_SYNOPSIS;
1.60      schwarze   74:
1.142     schwarze   75:        return roff_getcontrol(mdoc->roff, buf, &offs) ?
1.103     schwarze   76:            mdoc_pmacro(mdoc, ln, buf, offs) :
1.142     schwarze   77:            mdoc_ptext(mdoc, ln, buf, offs);
1.1       kristaps   78: }
                     79:
1.121     schwarze   80: void
1.151     schwarze   81: mdoc_tail_alloc(struct roff_man *mdoc, int line, int pos, enum roff_tok tok)
1.1       kristaps   82: {
1.129     schwarze   83:        struct roff_node *p;
1.1       kristaps   84:
1.136     schwarze   85:        p = roff_node_alloc(mdoc, line, pos, ROFFT_TAIL, tok);
                     86:        roff_node_append(mdoc, p);
1.131     schwarze   87:        mdoc->next = ROFF_NEXT_CHILD;
1.59      schwarze   88: }
                     89:
1.129     schwarze   90: struct roff_node *
1.151     schwarze   91: mdoc_endbody_alloc(struct roff_man *mdoc, int line, int pos,
                     92:     enum roff_tok tok, struct roff_node *body)
1.59      schwarze   93: {
1.129     schwarze   94:        struct roff_node *p;
1.59      schwarze   95:
1.147     schwarze   96:        body->flags |= NODE_ENDED;
                     97:        body->parent->flags |= NODE_ENDED;
1.136     schwarze   98:        p = roff_node_alloc(mdoc, line, pos, ROFFT_BODY, tok);
1.126     schwarze   99:        p->body = body;
1.92      schwarze  100:        p->norm = body->norm;
1.149     schwarze  101:        p->end = ENDBODY_SPACE;
1.136     schwarze  102:        roff_node_append(mdoc, p);
1.131     schwarze  103:        mdoc->next = ROFF_NEXT_SIBLING;
1.142     schwarze  104:        return p;
1.1       kristaps  105: }
                    106:
1.129     schwarze  107: struct roff_node *
1.131     schwarze  108: mdoc_block_alloc(struct roff_man *mdoc, int line, int pos,
1.151     schwarze  109:     enum roff_tok tok, struct mdoc_arg *args)
1.1       kristaps  110: {
1.129     schwarze  111:        struct roff_node *p;
1.1       kristaps  112:
1.136     schwarze  113:        p = roff_node_alloc(mdoc, line, pos, ROFFT_BLOCK, tok);
1.4       schwarze  114:        p->args = args;
                    115:        if (p->args)
1.1       kristaps  116:                (args->refcnt)++;
1.75      schwarze  117:
                    118:        switch (tok) {
1.103     schwarze  119:        case MDOC_Bd:
                    120:        case MDOC_Bf:
                    121:        case MDOC_Bl:
1.107     schwarze  122:        case MDOC_En:
1.103     schwarze  123:        case MDOC_Rs:
1.75      schwarze  124:                p->norm = mandoc_calloc(1, sizeof(union mdoc_data));
                    125:                break;
                    126:        default:
                    127:                break;
                    128:        }
1.136     schwarze  129:        roff_node_append(mdoc, p);
1.131     schwarze  130:        mdoc->next = ROFF_NEXT_CHILD;
1.142     schwarze  131:        return p;
1.1       kristaps  132: }
                    133:
1.120     schwarze  134: void
1.131     schwarze  135: mdoc_elem_alloc(struct roff_man *mdoc, int line, int pos,
1.151     schwarze  136:      enum roff_tok tok, struct mdoc_arg *args)
1.1       kristaps  137: {
1.129     schwarze  138:        struct roff_node *p;
1.1       kristaps  139:
1.136     schwarze  140:        p = roff_node_alloc(mdoc, line, pos, ROFFT_ELEM, tok);
1.4       schwarze  141:        p->args = args;
                    142:        if (p->args)
1.1       kristaps  143:                (args->refcnt)++;
1.75      schwarze  144:
                    145:        switch (tok) {
1.103     schwarze  146:        case MDOC_An:
1.75      schwarze  147:                p->norm = mandoc_calloc(1, sizeof(union mdoc_data));
                    148:                break;
                    149:        default:
                    150:                break;
                    151:        }
1.136     schwarze  152:        roff_node_append(mdoc, p);
1.131     schwarze  153:        mdoc->next = ROFF_NEXT_CHILD;
1.1       kristaps  154: }
                    155:
                    156: /*
                    157:  * Parse free-form text, that is, a line that does not begin with the
                    158:  * control character.
                    159:  */
                    160: static int
1.131     schwarze  161: mdoc_ptext(struct roff_man *mdoc, int line, char *buf, int offs)
1.1       kristaps  162: {
1.129     schwarze  163:        struct roff_node *n;
1.156     schwarze  164:        const char       *cp, *sp;
1.56      schwarze  165:        char             *c, *ws, *end;
1.46      schwarze  166:
1.93      schwarze  167:        n = mdoc->last;
1.56      schwarze  168:
                    169:        /*
1.146     schwarze  170:         * If a column list contains plain text, assume an implicit item
                    171:         * macro.  This can happen one or more times at the beginning
                    172:         * of such a list, intermixed with non-It mdoc macros and with
                    173:         * nodes generated on the roff level, for example by tbl.
1.56      schwarze  174:         */
                    175:
1.146     schwarze  176:        if ((n->tok == MDOC_Bl && n->type == ROFFT_BODY &&
                    177:             n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) ||
                    178:            (n->parent != NULL && n->parent->tok == MDOC_Bl &&
                    179:             n->parent->norm->Bl.type == LIST_column)) {
1.93      schwarze  180:                mdoc->flags |= MDOC_FREECOL;
1.158     schwarze  181:                (*mdoc_macro(MDOC_It)->fp)(mdoc, MDOC_It,
                    182:                    line, offs, &offs, buf);
1.142     schwarze  183:                return 1;
1.56      schwarze  184:        }
                    185:
1.52      schwarze  186:        /*
                    187:         * Search for the beginning of unescaped trailing whitespace (ws)
                    188:         * and for the first character not to be output (end).
                    189:         */
1.56      schwarze  190:
                    191:        /* FIXME: replace with strcspn(). */
1.52      schwarze  192:        ws = NULL;
1.53      schwarze  193:        for (c = end = buf + offs; *c; c++) {
1.52      schwarze  194:                switch (*c) {
                    195:                case ' ':
                    196:                        if (NULL == ws)
                    197:                                ws = c;
                    198:                        continue;
                    199:                case '\t':
                    200:                        /*
                    201:                         * Always warn about trailing tabs,
                    202:                         * even outside literal context,
                    203:                         * where they should be put on the next line.
                    204:                         */
                    205:                        if (NULL == ws)
                    206:                                ws = c;
                    207:                        /*
                    208:                         * Strip trailing tabs in literal context only;
                    209:                         * outside, they affect the next line.
                    210:                         */
1.93      schwarze  211:                        if (MDOC_LITERAL & mdoc->flags)
1.52      schwarze  212:                                continue;
                    213:                        break;
                    214:                case '\\':
                    215:                        /* Skip the escaped character, too, if any. */
                    216:                        if (c[1])
                    217:                                c++;
                    218:                        /* FALLTHROUGH */
                    219:                default:
                    220:                        ws = NULL;
                    221:                        break;
                    222:                }
                    223:                end = c + 1;
                    224:        }
                    225:        *end = '\0';
1.19      schwarze  226:
1.52      schwarze  227:        if (ws)
1.160   ! schwarze  228:                mandoc_msg(MANDOCERR_SPACE_EOL, line, (int)(ws - buf), NULL);
1.34      schwarze  229:
1.156     schwarze  230:        /*
                    231:         * Blank lines are allowed in no-fill mode
                    232:         * and cancel preceding \c,
                    233:         * but add a single vertical space elsewhere.
                    234:         */
                    235:
1.120     schwarze  236:        if (buf[offs] == '\0' && ! (mdoc->flags & MDOC_LITERAL)) {
1.156     schwarze  237:                switch (mdoc->last->type) {
                    238:                case ROFFT_TEXT:
                    239:                        sp = mdoc->last->string;
                    240:                        cp = end = strchr(sp, '\0') - 2;
                    241:                        if (cp < sp || cp[0] != '\\' || cp[1] != 'c')
                    242:                                break;
                    243:                        while (cp > sp && cp[-1] == '\\')
                    244:                                cp--;
                    245:                        if ((end - cp) % 2)
                    246:                                break;
                    247:                        *end = '\0';
                    248:                        return 1;
                    249:                default:
                    250:                        break;
                    251:                }
1.160   ! schwarze  252:                mandoc_msg(MANDOCERR_FI_BLANK, line, (int)(c - buf), NULL);
1.153     schwarze  253:                roff_elem_alloc(mdoc, line, offs, ROFF_sp);
1.147     schwarze  254:                mdoc->last->flags |= NODE_VALID | NODE_ENDED;
1.131     schwarze  255:                mdoc->next = ROFF_NEXT_SIBLING;
1.142     schwarze  256:                return 1;
1.38      schwarze  257:        }
1.1       kristaps  258:
1.138     schwarze  259:        roff_word_alloc(mdoc, line, offs, buf+offs);
1.1       kristaps  260:
1.120     schwarze  261:        if (mdoc->flags & MDOC_LITERAL)
1.142     schwarze  262:                return 1;
1.52      schwarze  263:
1.35      schwarze  264:        /*
1.48      schwarze  265:         * End-of-sentence check.  If the last character is an unescaped
                    266:         * EOS character, then flag the node as being the end of a
                    267:         * sentence.  The front-end will know how to interpret this.
1.35      schwarze  268:         */
                    269:
1.52      schwarze  270:        assert(buf < end);
1.48      schwarze  271:
1.97      schwarze  272:        if (mandoc_eos(buf+offs, (size_t)(end-buf-offs)))
1.147     schwarze  273:                mdoc->last->flags |= NODE_EOS;
1.148     schwarze  274:
                    275:        for (c = buf + offs; c != NULL; c = strchr(c + 1, '.')) {
                    276:                if (c - buf < offs + 2)
                    277:                        continue;
1.154     schwarze  278:                if (end - c < 3)
1.148     schwarze  279:                        break;
1.155     schwarze  280:                if (c[1] != ' ' ||
1.157     schwarze  281:                    isalnum((unsigned char)c[-2]) == 0 ||
                    282:                    isalnum((unsigned char)c[-1]) == 0 ||
1.155     schwarze  283:                    (c[-2] == 'n' && c[-1] == 'c') ||
                    284:                    (c[-2] == 'v' && c[-1] == 's'))
                    285:                        continue;
                    286:                c += 2;
                    287:                if (*c == ' ')
                    288:                        c++;
                    289:                if (*c == ' ')
                    290:                        c++;
                    291:                if (isupper((unsigned char)(*c)))
1.160   ! schwarze  292:                        mandoc_msg(MANDOCERR_EOS, line, (int)(c - buf), NULL);
1.148     schwarze  293:        }
                    294:
1.142     schwarze  295:        return 1;
1.1       kristaps  296: }
1.19      schwarze  297:
1.1       kristaps  298: /*
                    299:  * Parse a macro line, that is, a line beginning with the control
                    300:  * character.
                    301:  */
1.61      schwarze  302: static int
1.131     schwarze  303: mdoc_pmacro(struct roff_man *mdoc, int ln, char *buf, int offs)
1.1       kristaps  304: {
1.129     schwarze  305:        struct roff_node *n;
1.118     schwarze  306:        const char       *cp;
1.152     schwarze  307:        size_t            sz;
1.151     schwarze  308:        enum roff_tok     tok;
1.152     schwarze  309:        int               sv;
                    310:
                    311:        /* Determine the line macro. */
1.1       kristaps  312:
1.83      schwarze  313:        sv = offs;
1.152     schwarze  314:        tok = TOKEN_NONE;
                    315:        for (sz = 0; sz < 4 && strchr(" \t\\", buf[offs]) == NULL; sz++)
                    316:                offs++;
                    317:        if (sz == 2 || sz == 3)
                    318:                tok = roffhash_find(mdoc->mdocmac, buf + sv, sz);
1.137     schwarze  319:        if (tok == TOKEN_NONE) {
1.160   ! schwarze  320:                mandoc_msg(MANDOCERR_MACRO, ln, sv, "%s", buf + sv - 1);
1.142     schwarze  321:                return 1;
1.1       kristaps  322:        }
                    323:
1.118     schwarze  324:        /* Skip a leading escape sequence or tab. */
1.63      schwarze  325:
1.118     schwarze  326:        switch (buf[offs]) {
                    327:        case '\\':
                    328:                cp = buf + offs + 1;
                    329:                mandoc_escape(&cp, NULL, NULL);
                    330:                offs = cp - buf;
                    331:                break;
                    332:        case '\t':
1.83      schwarze  333:                offs++;
1.118     schwarze  334:                break;
                    335:        default:
                    336:                break;
                    337:        }
1.63      schwarze  338:
                    339:        /* Jump to the next non-whitespace word. */
1.1       kristaps  340:
1.150     schwarze  341:        while (buf[offs] == ' ')
1.83      schwarze  342:                offs++;
1.34      schwarze  343:
1.103     schwarze  344:        /*
1.46      schwarze  345:         * Trailing whitespace.  Note that tabs are allowed to be passed
                    346:         * into the parser as "text", so we only warn about spaces here.
                    347:         */
1.34      schwarze  348:
1.83      schwarze  349:        if ('\0' == buf[offs] && ' ' == buf[offs - 1])
1.160   ! schwarze  350:                mandoc_msg(MANDOCERR_SPACE_EOL, ln, offs - 1, NULL);
1.1       kristaps  351:
1.56      schwarze  352:        /*
                    353:         * If an initial macro or a list invocation, divert directly
                    354:         * into macro processing.
                    355:         */
                    356:
1.146     schwarze  357:        n = mdoc->last;
                    358:        if (n == NULL || tok == MDOC_It || tok == MDOC_El) {
1.158     schwarze  359:                (*mdoc_macro(tok)->fp)(mdoc, tok, ln, sv, &offs, buf);
1.142     schwarze  360:                return 1;
1.121     schwarze  361:        }
1.56      schwarze  362:
                    363:        /*
1.146     schwarze  364:         * If a column list contains a non-It macro, assume an implicit
                    365:         * item macro.  This can happen one or more times at the
                    366:         * beginning of such a list, intermixed with text lines and
                    367:         * with nodes generated on the roff level, for example by tbl.
1.24      schwarze  368:         */
1.56      schwarze  369:
1.146     schwarze  370:        if ((n->tok == MDOC_Bl && n->type == ROFFT_BODY &&
                    371:             n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) ||
                    372:            (n->parent != NULL && n->parent->tok == MDOC_Bl &&
                    373:             n->parent->norm->Bl.type == LIST_column)) {
1.93      schwarze  374:                mdoc->flags |= MDOC_FREECOL;
1.158     schwarze  375:                (*mdoc_macro(MDOC_It)->fp)(mdoc, MDOC_It, ln, sv, &sv, buf);
1.142     schwarze  376:                return 1;
1.56      schwarze  377:        }
                    378:
                    379:        /* Normal processing of a macro. */
                    380:
1.158     schwarze  381:        (*mdoc_macro(tok)->fp)(mdoc, tok, ln, sv, &offs, buf);
1.98      schwarze  382:
                    383:        /* In quick mode (for mandocdb), abort after the NAME section. */
                    384:
                    385:        if (mdoc->quick && MDOC_Sh == tok &&
                    386:            SEC_NAME != mdoc->last->sec)
1.142     schwarze  387:                return 2;
1.1       kristaps  388:
1.142     schwarze  389:        return 1;
1.1       kristaps  390: }
1.24      schwarze  391:
1.83      schwarze  392: enum mdelim
                    393: mdoc_isdelim(const char *p)
                    394: {
                    395:
                    396:        if ('\0' == p[0])
1.142     schwarze  397:                return DELIM_NONE;
1.83      schwarze  398:
                    399:        if ('\0' == p[1])
                    400:                switch (p[0]) {
1.103     schwarze  401:                case '(':
                    402:                case '[':
1.142     schwarze  403:                        return DELIM_OPEN;
1.103     schwarze  404:                case '|':
1.142     schwarze  405:                        return DELIM_MIDDLE;
1.103     schwarze  406:                case '.':
                    407:                case ',':
                    408:                case ';':
                    409:                case ':':
                    410:                case '?':
                    411:                case '!':
                    412:                case ')':
                    413:                case ']':
1.142     schwarze  414:                        return DELIM_CLOSE;
1.83      schwarze  415:                default:
1.142     schwarze  416:                        return DELIM_NONE;
1.83      schwarze  417:                }
                    418:
                    419:        if ('\\' != p[0])
1.142     schwarze  420:                return DELIM_NONE;
1.24      schwarze  421:
1.83      schwarze  422:        if (0 == strcmp(p + 1, "."))
1.142     schwarze  423:                return DELIM_CLOSE;
1.90      schwarze  424:        if (0 == strcmp(p + 1, "fR|\\fP"))
1.142     schwarze  425:                return DELIM_MIDDLE;
1.83      schwarze  426:
1.142     schwarze  427:        return DELIM_NONE;
1.144     schwarze  428: }
                    429:
                    430: void
                    431: mdoc_validate(struct roff_man *mdoc)
                    432: {
                    433:
                    434:        mdoc->last = mdoc->first;
                    435:        mdoc_node_validate(mdoc);
                    436:        mdoc_state_reset(mdoc);
1.83      schwarze  437: }