src/usr.bin/mandoc/mandoc.c - annotate

Return to mandoc.c CVS log
Up to [local] / src / usr.bin / mandoc
Annotation of src/usr.bin/mandoc/mandoc.c, Revision 1.32

1.32    ! schwarze    1: /*     $Id: mandoc.c,v 1.31 2011/11/17 11:58:11 schwarze Exp $ */
1.1       schwarze    2: /*
1.24      schwarze    3:  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
1.32    ! schwarze    4:  * Copyright (c) 2011, 2012 Ingo Schwarze <schwarze@openbsd.org>
1.1       schwarze    5:  *
                      6:  * Permission to use, copy, modify, and distribute this software for any
                      7:  * purpose with or without fee is hereby granted, provided that the above
                      8:  * copyright notice and this permission notice appear in all copies.
                      9:  *
1.21      schwarze   10:  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1.1       schwarze   11:  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1.21      schwarze   12:  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1.1       schwarze   13:  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
                     14:  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
                     15:  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
                     16:  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
                     17:  */
1.2       schwarze   18: #include <sys/types.h>
                     19:
1.1       schwarze   20: #include <assert.h>
                     21: #include <ctype.h>
1.26      schwarze   22: #include <errno.h>
                     23: #include <limits.h>
1.1       schwarze   24: #include <stdlib.h>
1.4       schwarze   25: #include <stdio.h>
                     26: #include <string.h>
1.5       schwarze   27: #include <time.h>
1.1       schwarze   28:
1.14      schwarze   29: #include "mandoc.h"
1.1       schwarze   30: #include "libmandoc.h"
                     31:
1.22      schwarze   32: #define DATESIZE 32
                     33:
1.14      schwarze   34: static int      a2time(time_t *, const char *, const char *);
1.22      schwarze   35: static char    *time2a(time_t);
1.26      schwarze   36: static int      numescape(const char *);
1.5       schwarze   37:
1.26      schwarze   38: /*
                     39:  * Pass over recursive numerical expressions.  This context of this
                     40:  * function is important: it's only called within character-terminating
                     41:  * escapes (e.g., \s[xxxyyy]), so all we need to do is handle initial
                     42:  * recursion: we don't care about what's in these blocks.
                     43:  * This returns the number of characters skipped or -1 if an error
                     44:  * occurs (the caller should bail).
                     45:  */
                     46: static int
                     47: numescape(const char *start)
1.1       schwarze   48: {
1.26      schwarze   49:        int              i;
                     50:        size_t           sz;
                     51:        const char      *cp;
                     52:
                     53:        i = 0;
                     54:
                     55:        /* The expression consists of a subexpression. */
                     56:
                     57:        if ('\\' == start[i]) {
                     58:                cp = &start[++i];
                     59:                /*
                     60:                 * Read past the end of the subexpression.
                     61:                 * Bail immediately on errors.
                     62:                 */
                     63:                if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))
                     64:                        return(-1);
                     65:                return(i + cp - &start[i]);
                     66:        }
                     67:
                     68:        if ('(' != start[i++])
                     69:                return(0);
1.14      schwarze   70:
1.26      schwarze   71:        /*
                     72:         * A parenthesised subexpression.  Read until the closing
                     73:         * parenthesis, making sure to handle any nested subexpressions
                     74:         * that might ruin our parse.
                     75:         */
                     76:
                     77:        while (')' != start[i]) {
                     78:                sz = strcspn(&start[i], ")\\");
                     79:                i += (int)sz;
                     80:
                     81:                if ('\0' == start[i])
                     82:                        return(-1);
                     83:                else if ('\\' != start[i])
                     84:                        continue;
                     85:
                     86:                cp = &start[++i];
                     87:                if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))
                     88:                        return(-1);
                     89:                i += cp - &start[i];
                     90:        }
                     91:
                     92:        /* Read past the terminating ')'. */
                     93:        return(++i);
                     94: }
                     95:
                     96: enum mandoc_esc
                     97: mandoc_escape(const char **end, const char **start, int *sz)
                     98: {
                     99:        char             c, term, numeric;
                    100:        int              i, lim, ssz, rlim;
                    101:        const char      *cp, *rstart;
                    102:        enum mandoc_esc  gly;
                    103:
                    104:        cp = *end;
                    105:        rstart = cp;
                    106:        if (start)
                    107:                *start = rstart;
                    108:        i = lim = 0;
                    109:        gly = ESCAPE_ERROR;
                    110:        term = numeric = '\0';
                    111:
                    112:        switch ((c = cp[i++])) {
                    113:        /*
                    114:         * First the glyphs.  There are several different forms of
                    115:         * these, but each eventually returns a substring of the glyph
                    116:         * name.
                    117:         */
                    118:        case ('('):
                    119:                gly = ESCAPE_SPECIAL;
                    120:                lim = 2;
                    121:                break;
                    122:        case ('['):
                    123:                gly = ESCAPE_SPECIAL;
                    124:                /*
                    125:                 * Unicode escapes are defined in groff as \[uXXXX] to
                    126:                 * \[u10FFFF], where the contained value must be a valid
                    127:                 * Unicode codepoint.  Here, however, only check whether
                    128:                 * it's not a zero-width escape.
                    129:                 */
                    130:                if ('u' == cp[i] && ']' != cp[i + 1])
                    131:                        gly = ESCAPE_UNICODE;
                    132:                term = ']';
                    133:                break;
                    134:        case ('C'):
                    135:                if ('\'' != cp[i])
                    136:                        return(ESCAPE_ERROR);
                    137:                gly = ESCAPE_SPECIAL;
                    138:                term = '\'';
                    139:                break;
1.32    ! schwarze  140:
        !           141:        /*
        !           142:         * The \z escape is supposed to output the following
        !           143:         * character without advancing the cursor position.
        !           144:         * Since we are mostly dealing with terminal mode,
        !           145:         * let us just skip the next character.
        !           146:         */
        !           147:        case ('z'):
        !           148:                (*end)++;
        !           149:                return(ESCAPE_SKIPCHAR);
1.1       schwarze  150:
1.26      schwarze  151:        /*
                    152:         * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where
                    153:         * 'X' is the trigger.  These have opaque sub-strings.
                    154:         */
                    155:        case ('F'):
1.16      schwarze  156:                /* FALLTHROUGH */
1.26      schwarze  157:        case ('g'):
1.16      schwarze  158:                /* FALLTHROUGH */
1.26      schwarze  159:        case ('k'):
1.1       schwarze  160:                /* FALLTHROUGH */
1.26      schwarze  161:        case ('M'):
1.14      schwarze  162:                /* FALLTHROUGH */
1.26      schwarze  163:        case ('m'):
1.1       schwarze  164:                /* FALLTHROUGH */
1.26      schwarze  165:        case ('n'):
1.1       schwarze  166:                /* FALLTHROUGH */
1.26      schwarze  167:        case ('V'):
1.1       schwarze  168:                /* FALLTHROUGH */
1.26      schwarze  169:        case ('Y'):
1.29      schwarze  170:                gly = ESCAPE_IGNORE;
1.1       schwarze  171:                /* FALLTHROUGH */
1.26      schwarze  172:        case ('f'):
                    173:                if (ESCAPE_ERROR == gly)
                    174:                        gly = ESCAPE_FONT;
                    175:
                    176:                rstart= &cp[i];
                    177:                if (start)
                    178:                        *start = rstart;
                    179:
                    180:                switch (cp[i++]) {
                    181:                case ('('):
                    182:                        lim = 2;
                    183:                        break;
                    184:                case ('['):
                    185:                        term = ']';
                    186:                        break;
                    187:                default:
                    188:                        lim = 1;
                    189:                        i--;
                    190:                        break;
                    191:                }
                    192:                break;
                    193:
                    194:        /*
                    195:         * These escapes are of the form \X'Y', where 'X' is the trigger
                    196:         * and 'Y' is any string.  These have opaque sub-strings.
                    197:         */
                    198:        case ('A'):
1.13      schwarze  199:                /* FALLTHROUGH */
1.26      schwarze  200:        case ('b'):
1.1       schwarze  201:                /* FALLTHROUGH */
1.16      schwarze  202:        case ('D'):
1.1       schwarze  203:                /* FALLTHROUGH */
1.26      schwarze  204:        case ('o'):
1.1       schwarze  205:                /* FALLTHROUGH */
1.26      schwarze  206:        case ('R'):
1.1       schwarze  207:                /* FALLTHROUGH */
1.26      schwarze  208:        case ('X'):
1.1       schwarze  209:                /* FALLTHROUGH */
1.26      schwarze  210:        case ('Z'):
                    211:                if ('\'' != cp[i++])
                    212:                        return(ESCAPE_ERROR);
                    213:                gly = ESCAPE_IGNORE;
1.16      schwarze  214:                term = '\'';
                    215:                break;
1.26      schwarze  216:
                    217:        /*
                    218:         * These escapes are of the form \X'N', where 'X' is the trigger
                    219:         * and 'N' resolves to a numerical expression.
                    220:         */
                    221:        case ('B'):
                    222:                /* FALLTHROUGH */
1.17      schwarze  223:        case ('h'):
                    224:                /* FALLTHROUGH */
1.26      schwarze  225:        case ('H'):
                    226:                /* FALLTHROUGH */
                    227:        case ('L'):
                    228:                /* FALLTHROUGH */
                    229:        case ('l'):
1.29      schwarze  230:                gly = ESCAPE_NUMBERED;
1.26      schwarze  231:                /* FALLTHROUGH */
                    232:        case ('S'):
                    233:                /* FALLTHROUGH */
1.17      schwarze  234:        case ('v'):
                    235:                /* FALLTHROUGH */
1.26      schwarze  236:        case ('w'):
                    237:                /* FALLTHROUGH */
                    238:        case ('x'):
                    239:                if (ESCAPE_ERROR == gly)
                    240:                        gly = ESCAPE_IGNORE;
                    241:                if ('\'' != cp[i++])
                    242:                        return(ESCAPE_ERROR);
                    243:                term = numeric = '\'';
                    244:                break;
1.29      schwarze  245:
                    246:        /*
                    247:         * Special handling for the numbered character escape.
                    248:         * XXX Do any other escapes need similar handling?
                    249:         */
                    250:        case ('N'):
                    251:                if ('\0' == cp[i])
                    252:                        return(ESCAPE_ERROR);
                    253:                *end = &cp[++i];
                    254:                if (isdigit((unsigned char)cp[i-1]))
                    255:                        return(ESCAPE_IGNORE);
                    256:                while (isdigit((unsigned char)**end))
                    257:                        (*end)++;
                    258:                if (start)
                    259:                        *start = &cp[i];
                    260:                if (sz)
                    261:                        *sz = *end - &cp[i];
                    262:                if ('\0' != **end)
                    263:                        (*end)++;
                    264:                return(ESCAPE_NUMBERED);
1.26      schwarze  265:
                    266:        /*
                    267:         * Sizes get a special category of their own.
                    268:         */
1.6       schwarze  269:        case ('s'):
1.26      schwarze  270:                gly = ESCAPE_IGNORE;
1.17      schwarze  271:
1.26      schwarze  272:                rstart = &cp[i];
                    273:                if (start)
                    274:                        *start = rstart;
                    275:
                    276:                /* See +/- counts as a sign. */
                    277:                c = cp[i];
                    278:                if ('+' == c || '-' == c || ASCII_HYPH == c)
                    279:                        ++i;
1.6       schwarze  280:
1.26      schwarze  281:                switch (cp[i++]) {
1.16      schwarze  282:                case ('('):
1.26      schwarze  283:                        lim = 2;
1.16      schwarze  284:                        break;
                    285:                case ('['):
1.26      schwarze  286:                        term = numeric = ']';
1.16      schwarze  287:                        break;
                    288:                case ('\''):
1.26      schwarze  289:                        term = numeric = '\'';
1.16      schwarze  290:                        break;
                    291:                default:
1.26      schwarze  292:                        lim = 1;
                    293:                        i--;
1.16      schwarze  294:                        break;
1.6       schwarze  295:                }
                    296:
1.26      schwarze  297:                /* See +/- counts as a sign. */
                    298:                c = cp[i];
                    299:                if ('+' == c || '-' == c || ASCII_HYPH == c)
                    300:                        ++i;
                    301:
                    302:                break;
                    303:
                    304:        /*
                    305:         * Anything else is assumed to be a glyph.
                    306:         */
                    307:        default:
                    308:                gly = ESCAPE_SPECIAL;
                    309:                lim = 1;
                    310:                i--;
                    311:                break;
                    312:        }
                    313:
                    314:        assert(ESCAPE_ERROR != gly);
                    315:
                    316:        rstart = &cp[i];
                    317:        if (start)
                    318:                *start = rstart;
                    319:
                    320:        /*
                    321:         * If a terminating block has been specified, we need to
                    322:         * handle the case of recursion, which could have their
                    323:         * own terminating blocks that mess up our parse.  This, by the
                    324:         * way, means that the "start" and "size" values will be
                    325:         * effectively meaningless.
                    326:         */
                    327:
                    328:        ssz = 0;
                    329:        if (numeric && -1 == (ssz = numescape(&cp[i])))
                    330:                return(ESCAPE_ERROR);
                    331:
                    332:        i += ssz;
                    333:        rlim = -1;
                    334:
                    335:        /*
                    336:         * We have a character terminator.  Try to read up to that
                    337:         * character.  If we can't (i.e., we hit the nil), then return
                    338:         * an error; if we can, calculate our length, read past the
                    339:         * terminating character, and exit.
                    340:         */
                    341:
                    342:        if ('\0' != term) {
                    343:                *end = strchr(&cp[i], term);
                    344:                if ('\0' == *end)
                    345:                        return(ESCAPE_ERROR);
                    346:
                    347:                rlim = *end - &cp[i];
                    348:                if (sz)
                    349:                        *sz = rlim;
                    350:                (*end)++;
                    351:                goto out;
                    352:        }
                    353:
                    354:        assert(lim > 0);
                    355:
                    356:        /*
                    357:         * We have a numeric limit.  If the string is shorter than that,
                    358:         * stop and return an error.  Else adjust our endpoint, length,
                    359:         * and return the current glyph.
                    360:         */
                    361:
                    362:        if ((size_t)lim > strlen(&cp[i]))
                    363:                return(ESCAPE_ERROR);
                    364:
                    365:        rlim = lim;
                    366:        if (sz)
                    367:                *sz = rlim;
1.19      schwarze  368:
1.26      schwarze  369:        *end = &cp[i] + lim;
                    370:
                    371: out:
                    372:        assert(rlim >= 0 && rstart);
1.19      schwarze  373:
1.26      schwarze  374:        /* Run post-processors. */
1.19      schwarze  375:
1.26      schwarze  376:        switch (gly) {
                    377:        case (ESCAPE_FONT):
1.30      schwarze  378:                /*
                    379:                 * Pretend that the constant-width font modes are the
                    380:                 * same as the regular font modes.
                    381:                 */
                    382:                if (2 == rlim && 'C' == *rstart)
                    383:                        rstart++;
                    384:                else if (1 != rlim)
1.26      schwarze  385:                        break;
1.30      schwarze  386:
1.26      schwarze  387:                switch (*rstart) {
                    388:                case ('3'):
                    389:                        /* FALLTHROUGH */
                    390:                case ('B'):
                    391:                        gly = ESCAPE_FONTBOLD;
                    392:                        break;
                    393:                case ('2'):
                    394:                        /* FALLTHROUGH */
                    395:                case ('I'):
                    396:                        gly = ESCAPE_FONTITALIC;
1.16      schwarze  397:                        break;
1.26      schwarze  398:                case ('P'):
                    399:                        gly = ESCAPE_FONTPREV;
1.16      schwarze  400:                        break;
1.26      schwarze  401:                case ('1'):
                    402:                        /* FALLTHROUGH */
                    403:                case ('R'):
                    404:                        gly = ESCAPE_FONTROMAN;
1.1       schwarze  405:                        break;
                    406:                }
1.16      schwarze  407:                break;
1.26      schwarze  408:        case (ESCAPE_SPECIAL):
                    409:                if (1 != rlim)
                    410:                        break;
                    411:                if ('c' == *rstart)
                    412:                        gly = ESCAPE_NOSPACE;
1.16      schwarze  413:                break;
1.1       schwarze  414:        default:
1.16      schwarze  415:                break;
1.1       schwarze  416:        }
                    417:
1.26      schwarze  418:        return(gly);
1.1       schwarze  419: }
                    420:
1.4       schwarze  421: void *
                    422: mandoc_calloc(size_t num, size_t size)
                    423: {
                    424:        void            *ptr;
                    425:
                    426:        ptr = calloc(num, size);
                    427:        if (NULL == ptr) {
                    428:                perror(NULL);
1.20      schwarze  429:                exit((int)MANDOCLEVEL_SYSERR);
1.4       schwarze  430:        }
                    431:
                    432:        return(ptr);
                    433: }
                    434:
                    435:
                    436: void *
                    437: mandoc_malloc(size_t size)
                    438: {
                    439:        void            *ptr;
                    440:
                    441:        ptr = malloc(size);
                    442:        if (NULL == ptr) {
                    443:                perror(NULL);
1.20      schwarze  444:                exit((int)MANDOCLEVEL_SYSERR);
1.4       schwarze  445:        }
                    446:
                    447:        return(ptr);
                    448: }
                    449:
                    450:
                    451: void *
                    452: mandoc_realloc(void *ptr, size_t size)
                    453: {
                    454:
                    455:        ptr = realloc(ptr, size);
                    456:        if (NULL == ptr) {
                    457:                perror(NULL);
1.20      schwarze  458:                exit((int)MANDOCLEVEL_SYSERR);
1.4       schwarze  459:        }
                    460:
                    461:        return(ptr);
                    462: }
                    463:
1.27      schwarze  464: char *
                    465: mandoc_strndup(const char *ptr, size_t sz)
                    466: {
                    467:        char            *p;
                    468:
                    469:        p = mandoc_malloc(sz + 1);
                    470:        memcpy(p, ptr, sz);
                    471:        p[(int)sz] = '\0';
                    472:        return(p);
                    473: }
1.4       schwarze  474:
                    475: char *
                    476: mandoc_strdup(const char *ptr)
                    477: {
                    478:        char            *p;
                    479:
                    480:        p = strdup(ptr);
                    481:        if (NULL == p) {
                    482:                perror(NULL);
1.20      schwarze  483:                exit((int)MANDOCLEVEL_SYSERR);
1.4       schwarze  484:        }
                    485:
                    486:        return(p);
1.21      schwarze  487: }
                    488:
                    489: /*
                    490:  * Parse a quoted or unquoted roff-style request or macro argument.
                    491:  * Return a pointer to the parsed argument, which is either the original
                    492:  * pointer or advanced by one byte in case the argument is quoted.
                    493:  * Null-terminate the argument in place.
                    494:  * Collapse pairs of quotes inside quoted arguments.
                    495:  * Advance the argument pointer to the next argument,
                    496:  * or to the null byte terminating the argument line.
                    497:  */
                    498: char *
1.25      schwarze  499: mandoc_getarg(struct mparse *parse, char **cpp, int ln, int *pos)
1.21      schwarze  500: {
                    501:        char     *start, *cp;
                    502:        int       quoted, pairs, white;
                    503:
                    504:        /* Quoting can only start with a new word. */
                    505:        start = *cpp;
1.26      schwarze  506:        quoted = 0;
1.21      schwarze  507:        if ('"' == *start) {
                    508:                quoted = 1;
                    509:                start++;
1.26      schwarze  510:        }
1.21      schwarze  511:
                    512:        pairs = 0;
                    513:        white = 0;
                    514:        for (cp = start; '\0' != *cp; cp++) {
                    515:                /* Move left after quoted quotes and escaped backslashes. */
                    516:                if (pairs)
                    517:                        cp[-pairs] = cp[0];
                    518:                if ('\\' == cp[0]) {
                    519:                        if ('\\' == cp[1]) {
                    520:                                /* Poor man's copy mode. */
                    521:                                pairs++;
                    522:                                cp++;
                    523:                        } else if (0 == quoted && ' ' == cp[1])
                    524:                                /* Skip escaped blanks. */
                    525:                                cp++;
                    526:                } else if (0 == quoted) {
                    527:                        if (' ' == cp[0]) {
                    528:                                /* Unescaped blanks end unquoted args. */
                    529:                                white = 1;
                    530:                                break;
                    531:                        }
                    532:                } else if ('"' == cp[0]) {
                    533:                        if ('"' == cp[1]) {
                    534:                                /* Quoted quotes collapse. */
                    535:                                pairs++;
                    536:                                cp++;
                    537:                        } else {
                    538:                                /* Unquoted quotes end quoted args. */
                    539:                                quoted = 2;
                    540:                                break;
                    541:                        }
                    542:                }
                    543:        }
                    544:
                    545:        /* Quoted argument without a closing quote. */
1.25      schwarze  546:        if (1 == quoted)
                    547:                mandoc_msg(MANDOCERR_BADQUOTE, parse, ln, *pos, NULL);
1.21      schwarze  548:
                    549:        /* Null-terminate this argument and move to the next one. */
                    550:        if (pairs)
                    551:                cp[-pairs] = '\0';
                    552:        if ('\0' != *cp) {
                    553:                *cp++ = '\0';
                    554:                while (' ' == *cp)
                    555:                        cp++;
                    556:        }
1.24      schwarze  557:        *pos += (int)(cp - start) + (quoted ? 1 : 0);
1.21      schwarze  558:        *cpp = cp;
                    559:
1.25      schwarze  560:        if ('\0' == *cp && (white || ' ' == cp[-1]))
                    561:                mandoc_msg(MANDOCERR_EOLNSPACE, parse, ln, *pos, NULL);
1.21      schwarze  562:
                    563:        return(start);
1.4       schwarze  564: }
1.5       schwarze  565:
                    566: static int
                    567: a2time(time_t *t, const char *fmt, const char *p)
                    568: {
                    569:        struct tm        tm;
                    570:        char            *pp;
                    571:
                    572:        memset(&tm, 0, sizeof(struct tm));
                    573:
                    574:        pp = strptime(p, fmt, &tm);
                    575:        if (NULL != pp && '\0' == *pp) {
                    576:                *t = mktime(&tm);
                    577:                return(1);
                    578:        }
                    579:
                    580:        return(0);
                    581: }
                    582:
1.22      schwarze  583: static char *
                    584: time2a(time_t t)
                    585: {
1.28      schwarze  586:        struct tm       *tm;
1.23      schwarze  587:        char            *buf, *p;
                    588:        size_t           ssz;
1.22      schwarze  589:        int              isz;
                    590:
1.28      schwarze  591:        tm = localtime(&t);
1.22      schwarze  592:
1.23      schwarze  593:        /*
                    594:         * Reserve space:
                    595:         * up to 9 characters for the month (September) + blank
                    596:         * up to 2 characters for the day + comma + blank
                    597:         * 4 characters for the year and a terminating '\0'
                    598:         */
                    599:        p = buf = mandoc_malloc(10 + 4 + 4 + 1);
                    600:
1.28      schwarze  601:        if (0 == (ssz = strftime(p, 10 + 1, "%B ", tm)))
1.23      schwarze  602:                goto fail;
                    603:        p += (int)ssz;
1.22      schwarze  604:
1.28      schwarze  605:        if (-1 == (isz = snprintf(p, 4 + 1, "%d, ", tm->tm_mday)))
1.23      schwarze  606:                goto fail;
1.22      schwarze  607:        p += isz;
                    608:
1.28      schwarze  609:        if (0 == strftime(p, 4 + 1, "%Y", tm))
1.23      schwarze  610:                goto fail;
                    611:        return(buf);
                    612:
                    613: fail:
                    614:        free(buf);
                    615:        return(NULL);
1.22      schwarze  616: }
                    617:
                    618: char *
1.25      schwarze  619: mandoc_normdate(struct mparse *parse, char *in, int ln, int pos)
1.5       schwarze  620: {
1.22      schwarze  621:        char            *out;
1.5       schwarze  622:        time_t           t;
                    623:
1.22      schwarze  624:        if (NULL == in || '\0' == *in ||
                    625:            0 == strcmp(in, "$" "Mdocdate$")) {
1.25      schwarze  626:                mandoc_msg(MANDOCERR_NODATE, parse, ln, pos, NULL);
1.22      schwarze  627:                time(&t);
                    628:        }
1.31      schwarze  629:        else if (a2time(&t, "%Y-%m-%d", in))
                    630:                t = 0;
1.22      schwarze  631:        else if (!a2time(&t, "$" "Mdocdate: %b %d %Y $", in) &&
1.31      schwarze  632:            !a2time(&t, "%b %d, %Y", in)) {
1.25      schwarze  633:                mandoc_msg(MANDOCERR_BADDATE, parse, ln, pos, NULL);
1.22      schwarze  634:                t = 0;
1.5       schwarze  635:        }
1.22      schwarze  636:        out = t ? time2a(t) : NULL;
1.23      schwarze  637:        return(out ? out : mandoc_strdup(in));
1.5       schwarze  638: }
                    639:
1.9       schwarze  640: int
1.15      schwarze  641: mandoc_eos(const char *p, size_t sz, int enclosed)
1.9       schwarze  642: {
1.15      schwarze  643:        const char *q;
1.16      schwarze  644:        int found;
1.9       schwarze  645:
1.10      schwarze  646:        if (0 == sz)
                    647:                return(0);
1.9       schwarze  648:
1.11      schwarze  649:        /*
                    650:         * End-of-sentence recognition must include situations where
                    651:         * some symbols, such as `)', allow prior EOS punctuation to
1.26      schwarze  652:         * propagate outward.
1.11      schwarze  653:         */
                    654:
1.16      schwarze  655:        found = 0;
                    656:        for (q = p + (int)sz - 1; q >= p; q--) {
1.15      schwarze  657:                switch (*q) {
1.11      schwarze  658:                case ('\"'):
                    659:                        /* FALLTHROUGH */
                    660:                case ('\''):
                    661:                        /* FALLTHROUGH */
                    662:                case (']'):
                    663:                        /* FALLTHROUGH */
                    664:                case (')'):
1.15      schwarze  665:                        if (0 == found)
                    666:                                enclosed = 1;
1.11      schwarze  667:                        break;
                    668:                case ('.'):
                    669:                        /* FALLTHROUGH */
                    670:                case ('!'):
                    671:                        /* FALLTHROUGH */
                    672:                case ('?'):
1.15      schwarze  673:                        found = 1;
                    674:                        break;
1.11      schwarze  675:                default:
1.20      schwarze  676:                        return(found && (!enclosed || isalnum((unsigned char)*q)));
1.11      schwarze  677:                }
1.9       schwarze  678:        }
                    679:
1.15      schwarze  680:        return(found && !enclosed);
1.24      schwarze  681: }
                    682:
                    683: /*
1.25      schwarze  684:  * Find out whether a line is a macro line or not.  If it is, adjust the
                    685:  * current position and return one; if it isn't, return zero and don't
                    686:  * change the current position.
1.24      schwarze  687:  */
1.25      schwarze  688: int
                    689: mandoc_getcontrol(const char *cp, int *ppos)
1.24      schwarze  690: {
1.25      schwarze  691:        int             pos;
1.24      schwarze  692:
1.25      schwarze  693:        pos = *ppos;
1.24      schwarze  694:
1.25      schwarze  695:        if ('\\' == cp[pos] && '.' == cp[pos + 1])
                    696:                pos += 2;
                    697:        else if ('.' == cp[pos] || '\'' == cp[pos])
                    698:                pos++;
                    699:        else
                    700:                return(0);
1.24      schwarze  701:
1.25      schwarze  702:        while (' ' == cp[pos] || '\t' == cp[pos])
                    703:                pos++;
1.24      schwarze  704:
1.25      schwarze  705:        *ppos = pos;
                    706:        return(1);
1.9       schwarze  707: }
1.26      schwarze  708:
                    709: /*
                    710:  * Convert a string to a long that may not be <0.
                    711:  * If the string is invalid, or is less than 0, return -1.
                    712:  */
                    713: int
1.27      schwarze  714: mandoc_strntoi(const char *p, size_t sz, int base)
1.26      schwarze  715: {
                    716:        char             buf[32];
                    717:        char            *ep;
                    718:        long             v;
                    719:
                    720:        if (sz > 31)
                    721:                return(-1);
                    722:
                    723:        memcpy(buf, p, sz);
                    724:        buf[(int)sz] = '\0';
                    725:
                    726:        errno = 0;
                    727:        v = strtol(buf, &ep, base);
                    728:
                    729:        if (buf[0] == '\0' || *ep != '\0')
                    730:                return(-1);
                    731:
1.27      schwarze  732:        if (v > INT_MAX)
                    733:                v = INT_MAX;
                    734:        if (v < INT_MIN)
                    735:                v = INT_MIN;
1.26      schwarze  736:
                    737:        return((int)v);
                    738: }