Annotation of src/usr.bin/mandoc/mandoc.c, Revision 1.46
1.46 ! schwarze 1: /* $Id: mandoc.c,v 1.45 2014/03/21 22:17:01 schwarze Exp $ */
1.1 schwarze 2: /*
1.24 schwarze 3: * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
1.37 schwarze 4: * Copyright (c) 2011, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org>
1.1 schwarze 5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
1.21 schwarze 10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1.1 schwarze 11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1.21 schwarze 12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1.1 schwarze 13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
1.2 schwarze 18: #include <sys/types.h>
19:
1.1 schwarze 20: #include <assert.h>
21: #include <ctype.h>
1.26 schwarze 22: #include <errno.h>
23: #include <limits.h>
1.1 schwarze 24: #include <stdlib.h>
1.4 schwarze 25: #include <stdio.h>
26: #include <string.h>
1.5 schwarze 27: #include <time.h>
1.1 schwarze 28:
1.14 schwarze 29: #include "mandoc.h"
1.45 schwarze 30: #include "mandoc_aux.h"
1.1 schwarze 31: #include "libmandoc.h"
32:
1.22 schwarze 33: #define DATESIZE 32
34:
1.14 schwarze 35: static int a2time(time_t *, const char *, const char *);
1.22 schwarze 36: static char *time2a(time_t);
1.5 schwarze 37:
1.26 schwarze 38:
39: enum mandoc_esc
1.44 schwarze 40: mandoc_escape(const char **end, const char **start, int *sz)
1.26 schwarze 41: {
1.34 schwarze 42: const char *local_start;
43: int local_sz;
44: char term;
1.26 schwarze 45: enum mandoc_esc gly;
46:
1.34 schwarze 47: /*
48: * When the caller doesn't provide return storage,
49: * use local storage.
50: */
51:
52: if (NULL == start)
53: start = &local_start;
54: if (NULL == sz)
55: sz = &local_sz;
56:
57: /*
58: * Beyond the backslash, at least one input character
59: * is part of the escape sequence. With one exception
60: * (see below), that character won't be returned.
61: */
62:
1.26 schwarze 63: gly = ESCAPE_ERROR;
1.34 schwarze 64: *start = ++*end;
65: *sz = 0;
1.33 schwarze 66: term = '\0';
1.26 schwarze 67:
1.34 schwarze 68: switch ((*start)[-1]) {
1.26 schwarze 69: /*
70: * First the glyphs. There are several different forms of
71: * these, but each eventually returns a substring of the glyph
72: * name.
73: */
74: case ('('):
75: gly = ESCAPE_SPECIAL;
1.34 schwarze 76: *sz = 2;
1.26 schwarze 77: break;
78: case ('['):
79: gly = ESCAPE_SPECIAL;
80: /*
81: * Unicode escapes are defined in groff as \[uXXXX] to
82: * \[u10FFFF], where the contained value must be a valid
83: * Unicode codepoint. Here, however, only check whether
84: * it's not a zero-width escape.
85: */
1.34 schwarze 86: if ('u' == (*start)[0] && ']' != (*start)[1])
1.26 schwarze 87: gly = ESCAPE_UNICODE;
88: term = ']';
89: break;
90: case ('C'):
1.34 schwarze 91: if ('\'' != **start)
1.26 schwarze 92: return(ESCAPE_ERROR);
1.34 schwarze 93: *start = ++*end;
1.39 schwarze 94: if ('u' == (*start)[0] && '\'' != (*start)[1])
95: gly = ESCAPE_UNICODE;
96: else
97: gly = ESCAPE_SPECIAL;
1.26 schwarze 98: term = '\'';
99: break;
1.41 schwarze 100:
101: /*
102: * Escapes taking no arguments at all.
103: */
104: case ('d'):
105: /* FALLTHROUGH */
106: case ('u'):
107: return(ESCAPE_IGNORE);
1.32 schwarze 108:
109: /*
110: * The \z escape is supposed to output the following
111: * character without advancing the cursor position.
112: * Since we are mostly dealing with terminal mode,
113: * let us just skip the next character.
114: */
115: case ('z'):
116: return(ESCAPE_SKIPCHAR);
1.1 schwarze 117:
1.26 schwarze 118: /*
119: * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where
120: * 'X' is the trigger. These have opaque sub-strings.
121: */
122: case ('F'):
1.16 schwarze 123: /* FALLTHROUGH */
1.26 schwarze 124: case ('g'):
1.16 schwarze 125: /* FALLTHROUGH */
1.26 schwarze 126: case ('k'):
1.1 schwarze 127: /* FALLTHROUGH */
1.26 schwarze 128: case ('M'):
1.14 schwarze 129: /* FALLTHROUGH */
1.26 schwarze 130: case ('m'):
1.1 schwarze 131: /* FALLTHROUGH */
1.26 schwarze 132: case ('n'):
1.1 schwarze 133: /* FALLTHROUGH */
1.26 schwarze 134: case ('V'):
1.1 schwarze 135: /* FALLTHROUGH */
1.26 schwarze 136: case ('Y'):
1.29 schwarze 137: gly = ESCAPE_IGNORE;
1.1 schwarze 138: /* FALLTHROUGH */
1.26 schwarze 139: case ('f'):
140: if (ESCAPE_ERROR == gly)
141: gly = ESCAPE_FONT;
1.34 schwarze 142: switch (**start) {
1.26 schwarze 143: case ('('):
1.34 schwarze 144: *start = ++*end;
145: *sz = 2;
1.26 schwarze 146: break;
147: case ('['):
1.34 schwarze 148: *start = ++*end;
1.26 schwarze 149: term = ']';
150: break;
151: default:
1.34 schwarze 152: *sz = 1;
1.26 schwarze 153: break;
154: }
155: break;
156:
157: /*
158: * These escapes are of the form \X'Y', where 'X' is the trigger
159: * and 'Y' is any string. These have opaque sub-strings.
160: */
161: case ('A'):
1.13 schwarze 162: /* FALLTHROUGH */
1.26 schwarze 163: case ('b'):
1.1 schwarze 164: /* FALLTHROUGH */
1.42 schwarze 165: case ('B'):
166: /* FALLTHROUGH */
1.16 schwarze 167: case ('D'):
1.1 schwarze 168: /* FALLTHROUGH */
1.26 schwarze 169: case ('o'):
1.1 schwarze 170: /* FALLTHROUGH */
1.26 schwarze 171: case ('R'):
1.1 schwarze 172: /* FALLTHROUGH */
1.42 schwarze 173: case ('w'):
174: /* FALLTHROUGH */
1.26 schwarze 175: case ('X'):
1.1 schwarze 176: /* FALLTHROUGH */
1.26 schwarze 177: case ('Z'):
1.46 ! schwarze 178: if ('\0' == **start)
1.26 schwarze 179: return(ESCAPE_ERROR);
180: gly = ESCAPE_IGNORE;
1.46 ! schwarze 181: term = **start;
1.34 schwarze 182: *start = ++*end;
1.16 schwarze 183: break;
1.26 schwarze 184:
185: /*
186: * These escapes are of the form \X'N', where 'X' is the trigger
187: * and 'N' resolves to a numerical expression.
188: */
1.17 schwarze 189: case ('h'):
190: /* FALLTHROUGH */
1.26 schwarze 191: case ('H'):
192: /* FALLTHROUGH */
193: case ('L'):
194: /* FALLTHROUGH */
195: case ('l'):
196: /* FALLTHROUGH */
197: case ('S'):
198: /* FALLTHROUGH */
1.17 schwarze 199: case ('v'):
200: /* FALLTHROUGH */
1.26 schwarze 201: case ('x'):
1.46 ! schwarze 202: if (strchr("\0 %&()*+-./0123456789:<=>", **start))
1.34 schwarze 203: return(ESCAPE_ERROR);
1.42 schwarze 204: gly = ESCAPE_IGNORE;
1.46 ! schwarze 205: term = **start;
1.34 schwarze 206: *start = ++*end;
1.26 schwarze 207: break;
1.29 schwarze 208:
209: /*
210: * Special handling for the numbered character escape.
211: * XXX Do any other escapes need similar handling?
212: */
213: case ('N'):
1.34 schwarze 214: if ('\0' == **start)
1.29 schwarze 215: return(ESCAPE_ERROR);
1.34 schwarze 216: (*end)++;
217: if (isdigit((unsigned char)**start)) {
218: *sz = 1;
1.29 schwarze 219: return(ESCAPE_IGNORE);
1.34 schwarze 220: }
221: (*start)++;
1.29 schwarze 222: while (isdigit((unsigned char)**end))
223: (*end)++;
1.34 schwarze 224: *sz = *end - *start;
1.29 schwarze 225: if ('\0' != **end)
226: (*end)++;
227: return(ESCAPE_NUMBERED);
1.26 schwarze 228:
229: /*
230: * Sizes get a special category of their own.
231: */
1.6 schwarze 232: case ('s'):
1.26 schwarze 233: gly = ESCAPE_IGNORE;
1.17 schwarze 234:
1.26 schwarze 235: /* See +/- counts as a sign. */
1.34 schwarze 236: if ('+' == **end || '-' == **end || ASCII_HYPH == **end)
237: (*end)++;
1.6 schwarze 238:
1.34 schwarze 239: switch (**end) {
1.16 schwarze 240: case ('('):
1.34 schwarze 241: *start = ++*end;
242: *sz = 2;
1.16 schwarze 243: break;
244: case ('['):
1.34 schwarze 245: *start = ++*end;
1.33 schwarze 246: term = ']';
1.16 schwarze 247: break;
248: case ('\''):
1.34 schwarze 249: *start = ++*end;
1.33 schwarze 250: term = '\'';
1.16 schwarze 251: break;
252: default:
1.34 schwarze 253: *sz = 1;
1.16 schwarze 254: break;
1.6 schwarze 255: }
256:
1.26 schwarze 257: break;
258:
259: /*
260: * Anything else is assumed to be a glyph.
1.34 schwarze 261: * In this case, pass back the character after the backslash.
1.26 schwarze 262: */
263: default:
264: gly = ESCAPE_SPECIAL;
1.34 schwarze 265: *start = --*end;
266: *sz = 1;
1.26 schwarze 267: break;
268: }
269:
270: assert(ESCAPE_ERROR != gly);
271:
272: /*
1.33 schwarze 273: * Read up to the terminating character,
274: * paying attention to nested escapes.
1.26 schwarze 275: */
276:
277: if ('\0' != term) {
1.33 schwarze 278: while (**end != term) {
279: switch (**end) {
280: case ('\0'):
281: return(ESCAPE_ERROR);
282: case ('\\'):
283: (*end)++;
284: if (ESCAPE_ERROR ==
285: mandoc_escape(end, NULL, NULL))
286: return(ESCAPE_ERROR);
287: break;
288: default:
289: (*end)++;
290: break;
291: }
292: }
1.34 schwarze 293: *sz = (*end)++ - *start;
1.33 schwarze 294: } else {
1.34 schwarze 295: assert(*sz > 0);
296: if ((size_t)*sz > strlen(*start))
1.26 schwarze 297: return(ESCAPE_ERROR);
1.34 schwarze 298: *end += *sz;
1.26 schwarze 299: }
1.19 schwarze 300:
1.26 schwarze 301: /* Run post-processors. */
1.19 schwarze 302:
1.26 schwarze 303: switch (gly) {
304: case (ESCAPE_FONT):
1.37 schwarze 305: if (2 == *sz) {
306: if ('C' == **start) {
307: /*
308: * Treat constant-width font modes
309: * just like regular font modes.
310: */
311: (*start)++;
312: (*sz)--;
313: } else {
314: if ('B' == (*start)[0] && 'I' == (*start)[1])
315: gly = ESCAPE_FONTBI;
316: break;
317: }
1.34 schwarze 318: } else if (1 != *sz)
1.26 schwarze 319: break;
1.30 schwarze 320:
1.34 schwarze 321: switch (**start) {
1.26 schwarze 322: case ('3'):
323: /* FALLTHROUGH */
324: case ('B'):
325: gly = ESCAPE_FONTBOLD;
326: break;
327: case ('2'):
328: /* FALLTHROUGH */
329: case ('I'):
330: gly = ESCAPE_FONTITALIC;
1.16 schwarze 331: break;
1.26 schwarze 332: case ('P'):
333: gly = ESCAPE_FONTPREV;
1.16 schwarze 334: break;
1.26 schwarze 335: case ('1'):
336: /* FALLTHROUGH */
337: case ('R'):
338: gly = ESCAPE_FONTROMAN;
1.1 schwarze 339: break;
340: }
1.16 schwarze 341: break;
1.26 schwarze 342: case (ESCAPE_SPECIAL):
1.34 schwarze 343: if (1 == *sz && 'c' == **start)
1.26 schwarze 344: gly = ESCAPE_NOSPACE;
1.16 schwarze 345: break;
1.1 schwarze 346: default:
1.16 schwarze 347: break;
1.1 schwarze 348: }
349:
1.26 schwarze 350: return(gly);
1.21 schwarze 351: }
352:
353: /*
354: * Parse a quoted or unquoted roff-style request or macro argument.
355: * Return a pointer to the parsed argument, which is either the original
356: * pointer or advanced by one byte in case the argument is quoted.
1.40 schwarze 357: * NUL-terminate the argument in place.
1.21 schwarze 358: * Collapse pairs of quotes inside quoted arguments.
359: * Advance the argument pointer to the next argument,
1.40 schwarze 360: * or to the NUL byte terminating the argument line.
1.21 schwarze 361: */
362: char *
1.25 schwarze 363: mandoc_getarg(struct mparse *parse, char **cpp, int ln, int *pos)
1.21 schwarze 364: {
365: char *start, *cp;
366: int quoted, pairs, white;
367:
368: /* Quoting can only start with a new word. */
369: start = *cpp;
1.26 schwarze 370: quoted = 0;
1.21 schwarze 371: if ('"' == *start) {
372: quoted = 1;
373: start++;
1.26 schwarze 374: }
1.21 schwarze 375:
376: pairs = 0;
377: white = 0;
378: for (cp = start; '\0' != *cp; cp++) {
1.36 schwarze 379:
380: /*
381: * Move the following text left
382: * after quoted quotes and after "\\" and "\t".
383: */
1.21 schwarze 384: if (pairs)
385: cp[-pairs] = cp[0];
1.36 schwarze 386:
1.21 schwarze 387: if ('\\' == cp[0]) {
1.36 schwarze 388: /*
389: * In copy mode, translate double to single
390: * backslashes and backslash-t to literal tabs.
391: */
392: switch (cp[1]) {
393: case ('t'):
394: cp[0] = '\t';
395: /* FALLTHROUGH */
396: case ('\\'):
1.21 schwarze 397: pairs++;
398: cp++;
1.36 schwarze 399: break;
400: case (' '):
1.21 schwarze 401: /* Skip escaped blanks. */
1.36 schwarze 402: if (0 == quoted)
403: cp++;
404: break;
405: default:
406: break;
407: }
1.21 schwarze 408: } else if (0 == quoted) {
409: if (' ' == cp[0]) {
410: /* Unescaped blanks end unquoted args. */
411: white = 1;
412: break;
413: }
414: } else if ('"' == cp[0]) {
415: if ('"' == cp[1]) {
416: /* Quoted quotes collapse. */
417: pairs++;
418: cp++;
419: } else {
420: /* Unquoted quotes end quoted args. */
421: quoted = 2;
422: break;
423: }
424: }
425: }
426:
427: /* Quoted argument without a closing quote. */
1.25 schwarze 428: if (1 == quoted)
429: mandoc_msg(MANDOCERR_BADQUOTE, parse, ln, *pos, NULL);
1.21 schwarze 430:
1.40 schwarze 431: /* NUL-terminate this argument and move to the next one. */
1.21 schwarze 432: if (pairs)
433: cp[-pairs] = '\0';
434: if ('\0' != *cp) {
435: *cp++ = '\0';
436: while (' ' == *cp)
437: cp++;
438: }
1.24 schwarze 439: *pos += (int)(cp - start) + (quoted ? 1 : 0);
1.21 schwarze 440: *cpp = cp;
441:
1.25 schwarze 442: if ('\0' == *cp && (white || ' ' == cp[-1]))
443: mandoc_msg(MANDOCERR_EOLNSPACE, parse, ln, *pos, NULL);
1.21 schwarze 444:
445: return(start);
1.4 schwarze 446: }
1.5 schwarze 447:
448: static int
449: a2time(time_t *t, const char *fmt, const char *p)
450: {
451: struct tm tm;
452: char *pp;
453:
454: memset(&tm, 0, sizeof(struct tm));
455:
456: pp = strptime(p, fmt, &tm);
457: if (NULL != pp && '\0' == *pp) {
458: *t = mktime(&tm);
459: return(1);
460: }
461:
462: return(0);
463: }
464:
1.22 schwarze 465: static char *
466: time2a(time_t t)
467: {
1.28 schwarze 468: struct tm *tm;
1.23 schwarze 469: char *buf, *p;
470: size_t ssz;
1.22 schwarze 471: int isz;
472:
1.28 schwarze 473: tm = localtime(&t);
1.22 schwarze 474:
1.23 schwarze 475: /*
476: * Reserve space:
477: * up to 9 characters for the month (September) + blank
478: * up to 2 characters for the day + comma + blank
479: * 4 characters for the year and a terminating '\0'
480: */
481: p = buf = mandoc_malloc(10 + 4 + 4 + 1);
482:
1.28 schwarze 483: if (0 == (ssz = strftime(p, 10 + 1, "%B ", tm)))
1.23 schwarze 484: goto fail;
485: p += (int)ssz;
1.22 schwarze 486:
1.28 schwarze 487: if (-1 == (isz = snprintf(p, 4 + 1, "%d, ", tm->tm_mday)))
1.23 schwarze 488: goto fail;
1.22 schwarze 489: p += isz;
490:
1.28 schwarze 491: if (0 == strftime(p, 4 + 1, "%Y", tm))
1.23 schwarze 492: goto fail;
493: return(buf);
494:
495: fail:
496: free(buf);
497: return(NULL);
1.22 schwarze 498: }
499:
500: char *
1.25 schwarze 501: mandoc_normdate(struct mparse *parse, char *in, int ln, int pos)
1.5 schwarze 502: {
1.22 schwarze 503: char *out;
1.5 schwarze 504: time_t t;
505:
1.22 schwarze 506: if (NULL == in || '\0' == *in ||
507: 0 == strcmp(in, "$" "Mdocdate$")) {
1.25 schwarze 508: mandoc_msg(MANDOCERR_NODATE, parse, ln, pos, NULL);
1.22 schwarze 509: time(&t);
510: }
1.31 schwarze 511: else if (a2time(&t, "%Y-%m-%d", in))
512: t = 0;
1.22 schwarze 513: else if (!a2time(&t, "$" "Mdocdate: %b %d %Y $", in) &&
1.31 schwarze 514: !a2time(&t, "%b %d, %Y", in)) {
1.25 schwarze 515: mandoc_msg(MANDOCERR_BADDATE, parse, ln, pos, NULL);
1.22 schwarze 516: t = 0;
1.5 schwarze 517: }
1.22 schwarze 518: out = t ? time2a(t) : NULL;
1.23 schwarze 519: return(out ? out : mandoc_strdup(in));
1.5 schwarze 520: }
521:
1.9 schwarze 522: int
1.43 schwarze 523: mandoc_eos(const char *p, size_t sz)
1.9 schwarze 524: {
1.43 schwarze 525: const char *q;
526: int enclosed, found;
1.9 schwarze 527:
1.10 schwarze 528: if (0 == sz)
529: return(0);
1.9 schwarze 530:
1.11 schwarze 531: /*
532: * End-of-sentence recognition must include situations where
533: * some symbols, such as `)', allow prior EOS punctuation to
1.26 schwarze 534: * propagate outward.
1.11 schwarze 535: */
536:
1.43 schwarze 537: enclosed = found = 0;
1.16 schwarze 538: for (q = p + (int)sz - 1; q >= p; q--) {
1.15 schwarze 539: switch (*q) {
1.11 schwarze 540: case ('\"'):
541: /* FALLTHROUGH */
542: case ('\''):
543: /* FALLTHROUGH */
544: case (']'):
545: /* FALLTHROUGH */
546: case (')'):
1.15 schwarze 547: if (0 == found)
548: enclosed = 1;
1.11 schwarze 549: break;
550: case ('.'):
551: /* FALLTHROUGH */
552: case ('!'):
553: /* FALLTHROUGH */
554: case ('?'):
1.15 schwarze 555: found = 1;
556: break;
1.11 schwarze 557: default:
1.20 schwarze 558: return(found && (!enclosed || isalnum((unsigned char)*q)));
1.11 schwarze 559: }
1.9 schwarze 560: }
561:
1.15 schwarze 562: return(found && !enclosed);
1.9 schwarze 563: }
1.26 schwarze 564:
565: /*
566: * Convert a string to a long that may not be <0.
567: * If the string is invalid, or is less than 0, return -1.
568: */
569: int
1.27 schwarze 570: mandoc_strntoi(const char *p, size_t sz, int base)
1.26 schwarze 571: {
572: char buf[32];
573: char *ep;
574: long v;
575:
576: if (sz > 31)
577: return(-1);
578:
579: memcpy(buf, p, sz);
580: buf[(int)sz] = '\0';
581:
582: errno = 0;
583: v = strtol(buf, &ep, base);
584:
585: if (buf[0] == '\0' || *ep != '\0')
586: return(-1);
587:
1.27 schwarze 588: if (v > INT_MAX)
589: v = INT_MAX;
590: if (v < INT_MIN)
591: v = INT_MIN;
1.26 schwarze 592:
593: return((int)v);
594: }