Annotation of src/usr.bin/mandoc/mandoc.c, Revision 1.21
1.21 ! schwarze 1: /* $Id: mandoc.c,v 1.20 2010/09/27 21:25:28 schwarze Exp $ */
1.1 schwarze 2: /*
1.15 schwarze 3: * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
1.21 ! schwarze 4: * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
1.1 schwarze 5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
1.21 ! schwarze 10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1.1 schwarze 11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1.21 ! schwarze 12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1.1 schwarze 13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
1.2 schwarze 18: #include <sys/types.h>
19:
1.1 schwarze 20: #include <assert.h>
21: #include <ctype.h>
22: #include <stdlib.h>
1.4 schwarze 23: #include <stdio.h>
24: #include <string.h>
1.5 schwarze 25: #include <time.h>
1.1 schwarze 26:
1.14 schwarze 27: #include "mandoc.h"
1.1 schwarze 28: #include "libmandoc.h"
29:
1.14 schwarze 30: static int a2time(time_t *, const char *, const char *);
1.5 schwarze 31:
32:
1.1 schwarze 33: int
1.14 schwarze 34: mandoc_special(char *p)
1.1 schwarze 35: {
1.16 schwarze 36: int len, i;
37: char term;
1.14 schwarze 38: char *sv;
1.1 schwarze 39:
1.16 schwarze 40: len = 0;
41: term = '\0';
1.14 schwarze 42: sv = p;
43:
1.16 schwarze 44: assert('\\' == *p);
45: p++;
1.1 schwarze 46:
1.16 schwarze 47: switch (*p++) {
48: #if 0
49: case ('Z'):
50: /* FALLTHROUGH */
51: case ('X'):
52: /* FALLTHROUGH */
53: case ('x'):
1.1 schwarze 54: /* FALLTHROUGH */
1.16 schwarze 55: case ('S'):
1.14 schwarze 56: /* FALLTHROUGH */
1.16 schwarze 57: case ('R'):
1.1 schwarze 58: /* FALLTHROUGH */
1.16 schwarze 59: case ('N'):
1.1 schwarze 60: /* FALLTHROUGH */
1.16 schwarze 61: case ('l'):
1.1 schwarze 62: /* FALLTHROUGH */
1.16 schwarze 63: case ('L'):
1.1 schwarze 64: /* FALLTHROUGH */
1.16 schwarze 65: case ('H'):
1.13 schwarze 66: /* FALLTHROUGH */
1.16 schwarze 67: case ('h'):
1.1 schwarze 68: /* FALLTHROUGH */
1.16 schwarze 69: case ('D'):
1.1 schwarze 70: /* FALLTHROUGH */
1.16 schwarze 71: case ('C'):
1.1 schwarze 72: /* FALLTHROUGH */
1.16 schwarze 73: case ('b'):
1.1 schwarze 74: /* FALLTHROUGH */
1.16 schwarze 75: case ('B'):
1.1 schwarze 76: /* FALLTHROUGH */
1.16 schwarze 77: case ('a'):
1.14 schwarze 78: /* FALLTHROUGH */
1.16 schwarze 79: case ('A'):
80: if (*p++ != '\'')
81: return(0);
82: term = '\'';
83: break;
84: #endif
1.17 schwarze 85: case ('h'):
86: /* FALLTHROUGH */
87: case ('v'):
88: /* FALLTHROUGH */
1.6 schwarze 89: case ('s'):
1.16 schwarze 90: if (ASCII_HYPH == *p)
91: *p = '-';
1.17 schwarze 92:
93: i = 0;
94: if ('+' == *p || '-' == *p) {
1.16 schwarze 95: p++;
1.17 schwarze 96: i = 1;
97: }
1.6 schwarze 98:
1.16 schwarze 99: switch (*p++) {
100: case ('('):
101: len = 2;
102: break;
103: case ('['):
104: term = ']';
105: break;
106: case ('\''):
107: term = '\'';
108: break;
109: case ('0'):
1.17 schwarze 110: i = 1;
1.16 schwarze 111: /* FALLTHROUGH */
112: default:
113: len = 1;
114: p--;
115: break;
1.6 schwarze 116: }
117:
1.16 schwarze 118: if (ASCII_HYPH == *p)
119: *p = '-';
120: if ('+' == *p || '-' == *p) {
1.17 schwarze 121: if (i)
1.16 schwarze 122: return(0);
123: p++;
124: }
125:
1.19 schwarze 126: /* Handle embedded numerical subexp or escape. */
127:
128: if ('(' == *p) {
129: while (*p && ')' != *p)
130: if ('\\' == *p++) {
131: i = mandoc_special(--p);
132: if (0 == i)
133: return(0);
134: p += i;
135: }
136:
137: if (')' == *p++)
138: break;
139:
140: return(0);
141: } else if ('\\' == *p) {
142: if (0 == (i = mandoc_special(p)))
143: return(0);
144: p += i;
145: }
146:
1.16 schwarze 147: break;
148: #if 0
149: case ('Y'):
150: /* FALLTHROUGH */
151: case ('V'):
152: /* FALLTHROUGH */
153: case ('$'):
154: /* FALLTHROUGH */
155: case ('n'):
156: /* FALLTHROUGH */
1.19 schwarze 157: #endif
1.16 schwarze 158: case ('k'):
159: /* FALLTHROUGH */
160: case ('M'):
161: /* FALLTHROUGH */
162: case ('m'):
163: /* FALLTHROUGH */
1.8 schwarze 164: case ('f'):
165: /* FALLTHROUGH */
166: case ('F'):
167: /* FALLTHROUGH */
1.1 schwarze 168: case ('*'):
1.16 schwarze 169: switch (*p++) {
1.1 schwarze 170: case ('('):
1.16 schwarze 171: len = 2;
172: break;
1.1 schwarze 173: case ('['):
1.16 schwarze 174: term = ']';
175: break;
1.1 schwarze 176: default:
1.16 schwarze 177: len = 1;
178: p--;
1.1 schwarze 179: break;
180: }
1.16 schwarze 181: break;
1.1 schwarze 182: case ('('):
1.16 schwarze 183: len = 2;
184: break;
1.1 schwarze 185: case ('['):
1.16 schwarze 186: term = ']';
1.1 schwarze 187: break;
1.19 schwarze 188: case ('z'):
189: len = 1;
190: if ('\\' == *p) {
191: if (0 == (i = mandoc_special(p)))
192: return(0);
193: p += i;
194: return(*p ? (int)(p - sv) : 0);
195: }
196: break;
197: case ('o'):
198: /* FALLTHROUGH */
199: case ('w'):
200: if ('\'' == *p++) {
201: term = '\'';
202: break;
203: }
204: /* FALLTHROUGH */
1.1 schwarze 205: default:
1.16 schwarze 206: len = 1;
207: p--;
208: break;
1.1 schwarze 209: }
210:
1.16 schwarze 211: if (term) {
212: for ( ; *p && term != *p; p++)
213: if (ASCII_HYPH == *p)
214: *p = '-';
215: return(*p ? (int)(p - sv) : 0);
216: }
1.1 schwarze 217:
1.16 schwarze 218: for (i = 0; *p && i < len; i++, p++)
219: if (ASCII_HYPH == *p)
220: *p = '-';
221: return(i == len ? (int)(p - sv) : 0);
1.1 schwarze 222: }
223:
1.4 schwarze 224:
225: void *
226: mandoc_calloc(size_t num, size_t size)
227: {
228: void *ptr;
229:
230: ptr = calloc(num, size);
231: if (NULL == ptr) {
232: perror(NULL);
1.20 schwarze 233: exit((int)MANDOCLEVEL_SYSERR);
1.4 schwarze 234: }
235:
236: return(ptr);
237: }
238:
239:
240: void *
241: mandoc_malloc(size_t size)
242: {
243: void *ptr;
244:
245: ptr = malloc(size);
246: if (NULL == ptr) {
247: perror(NULL);
1.20 schwarze 248: exit((int)MANDOCLEVEL_SYSERR);
1.4 schwarze 249: }
250:
251: return(ptr);
252: }
253:
254:
255: void *
256: mandoc_realloc(void *ptr, size_t size)
257: {
258:
259: ptr = realloc(ptr, size);
260: if (NULL == ptr) {
261: perror(NULL);
1.20 schwarze 262: exit((int)MANDOCLEVEL_SYSERR);
1.4 schwarze 263: }
264:
265: return(ptr);
266: }
267:
268:
269: char *
270: mandoc_strdup(const char *ptr)
271: {
272: char *p;
273:
274: p = strdup(ptr);
275: if (NULL == p) {
276: perror(NULL);
1.20 schwarze 277: exit((int)MANDOCLEVEL_SYSERR);
1.4 schwarze 278: }
279:
280: return(p);
1.21 ! schwarze 281: }
! 282:
! 283: /*
! 284: * Parse a quoted or unquoted roff-style request or macro argument.
! 285: * Return a pointer to the parsed argument, which is either the original
! 286: * pointer or advanced by one byte in case the argument is quoted.
! 287: * Null-terminate the argument in place.
! 288: * Collapse pairs of quotes inside quoted arguments.
! 289: * Advance the argument pointer to the next argument,
! 290: * or to the null byte terminating the argument line.
! 291: */
! 292: char *
! 293: mandoc_getarg(char **cpp, mandocmsg msg, void *data, int ln, int *pos)
! 294: {
! 295: char *start, *cp;
! 296: int quoted, pairs, white;
! 297:
! 298: /* Quoting can only start with a new word. */
! 299: start = *cpp;
! 300: if ('"' == *start) {
! 301: quoted = 1;
! 302: start++;
! 303: } else
! 304: quoted = 0;
! 305:
! 306: pairs = 0;
! 307: white = 0;
! 308: for (cp = start; '\0' != *cp; cp++) {
! 309: /* Move left after quoted quotes and escaped backslashes. */
! 310: if (pairs)
! 311: cp[-pairs] = cp[0];
! 312: if ('\\' == cp[0]) {
! 313: if ('\\' == cp[1]) {
! 314: /* Poor man's copy mode. */
! 315: pairs++;
! 316: cp++;
! 317: } else if (0 == quoted && ' ' == cp[1])
! 318: /* Skip escaped blanks. */
! 319: cp++;
! 320: } else if (0 == quoted) {
! 321: if (' ' == cp[0]) {
! 322: /* Unescaped blanks end unquoted args. */
! 323: white = 1;
! 324: break;
! 325: }
! 326: } else if ('"' == cp[0]) {
! 327: if ('"' == cp[1]) {
! 328: /* Quoted quotes collapse. */
! 329: pairs++;
! 330: cp++;
! 331: } else {
! 332: /* Unquoted quotes end quoted args. */
! 333: quoted = 2;
! 334: break;
! 335: }
! 336: }
! 337: }
! 338:
! 339: /* Quoted argument without a closing quote. */
! 340: if (1 == quoted && msg)
! 341: (*msg)(MANDOCERR_BADQUOTE, data, ln, *pos, NULL);
! 342:
! 343: /* Null-terminate this argument and move to the next one. */
! 344: if (pairs)
! 345: cp[-pairs] = '\0';
! 346: if ('\0' != *cp) {
! 347: *cp++ = '\0';
! 348: while (' ' == *cp)
! 349: cp++;
! 350: }
! 351: *pos += (cp - start) + (quoted ? 1 : 0);
! 352: *cpp = cp;
! 353:
! 354: if ('\0' == *cp && msg && (white || ' ' == cp[-1]))
! 355: (*msg)(MANDOCERR_EOLNSPACE, data, ln, *pos, NULL);
! 356:
! 357: return(start);
1.4 schwarze 358: }
1.5 schwarze 359:
360:
361: static int
362: a2time(time_t *t, const char *fmt, const char *p)
363: {
364: struct tm tm;
365: char *pp;
366:
367: memset(&tm, 0, sizeof(struct tm));
368:
369: pp = strptime(p, fmt, &tm);
370: if (NULL != pp && '\0' == *pp) {
371: *t = mktime(&tm);
372: return(1);
373: }
374:
375: return(0);
376: }
377:
378:
379: /*
380: * Convert from a manual date string (see mdoc(7) and man(7)) into a
381: * date according to the stipulated date type.
382: */
383: time_t
384: mandoc_a2time(int flags, const char *p)
385: {
386: time_t t;
387:
388: if (MTIME_MDOCDATE & flags) {
389: if (0 == strcmp(p, "$" "Mdocdate$"))
390: return(time(NULL));
391: if (a2time(&t, "$" "Mdocdate: %b %d %Y $", p))
392: return(t);
393: }
394:
395: if (MTIME_CANONICAL & flags || MTIME_REDUCED & flags)
396: if (a2time(&t, "%b %d, %Y", p))
397: return(t);
398:
399: if (MTIME_ISO_8601 & flags)
400: if (a2time(&t, "%Y-%m-%d", p))
401: return(t);
402:
403: if (MTIME_REDUCED & flags) {
404: if (a2time(&t, "%d, %Y", p))
405: return(t);
406: if (a2time(&t, "%Y", p))
407: return(t);
408: }
409:
410: return(0);
411: }
412:
1.9 schwarze 413:
414: int
1.15 schwarze 415: mandoc_eos(const char *p, size_t sz, int enclosed)
1.9 schwarze 416: {
1.15 schwarze 417: const char *q;
1.16 schwarze 418: int found;
1.9 schwarze 419:
1.10 schwarze 420: if (0 == sz)
421: return(0);
1.9 schwarze 422:
1.11 schwarze 423: /*
424: * End-of-sentence recognition must include situations where
425: * some symbols, such as `)', allow prior EOS punctuation to
426: * propogate outward.
427: */
428:
1.16 schwarze 429: found = 0;
430: for (q = p + (int)sz - 1; q >= p; q--) {
1.15 schwarze 431: switch (*q) {
1.11 schwarze 432: case ('\"'):
433: /* FALLTHROUGH */
434: case ('\''):
435: /* FALLTHROUGH */
436: case (']'):
437: /* FALLTHROUGH */
438: case (')'):
1.15 schwarze 439: if (0 == found)
440: enclosed = 1;
1.11 schwarze 441: break;
442: case ('.'):
443: /* FALLTHROUGH */
444: case ('!'):
445: /* FALLTHROUGH */
446: case ('?'):
1.15 schwarze 447: found = 1;
448: break;
1.11 schwarze 449: default:
1.20 schwarze 450: return(found && (!enclosed || isalnum((unsigned char)*q)));
1.11 schwarze 451: }
1.9 schwarze 452: }
453:
1.15 schwarze 454: return(found && !enclosed);
1.12 schwarze 455: }
456:
457:
458: int
459: mandoc_hyph(const char *start, const char *c)
460: {
461:
462: /*
463: * Choose whether to break at a hyphenated character. We only
464: * do this if it's free-standing within a word.
465: */
466:
467: /* Skip first/last character of buffer. */
468: if (c == start || '\0' == *(c + 1))
469: return(0);
470: /* Skip first/last character of word. */
471: if ('\t' == *(c + 1) || '\t' == *(c - 1))
472: return(0);
473: if (' ' == *(c + 1) || ' ' == *(c - 1))
474: return(0);
475: /* Skip double invocations. */
476: if ('-' == *(c + 1) || '-' == *(c - 1))
477: return(0);
478: /* Skip escapes. */
479: if ('\\' == *(c - 1))
480: return(0);
481:
482: return(1);
1.9 schwarze 483: }