Annotation of src/usr.bin/mandoc/mandoc.c, Revision 1.25
1.25 ! schwarze 1: /* $Id: mandoc.c,v 1.24 2011/04/21 22:59:54 schwarze Exp $ */
1.1 schwarze 2: /*
1.24 schwarze 3: * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
1.21 schwarze 4: * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
1.1 schwarze 5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
1.21 schwarze 10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1.1 schwarze 11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1.21 schwarze 12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1.1 schwarze 13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
1.2 schwarze 18: #include <sys/types.h>
19:
1.1 schwarze 20: #include <assert.h>
21: #include <ctype.h>
22: #include <stdlib.h>
1.4 schwarze 23: #include <stdio.h>
24: #include <string.h>
1.5 schwarze 25: #include <time.h>
1.1 schwarze 26:
1.14 schwarze 27: #include "mandoc.h"
1.1 schwarze 28: #include "libmandoc.h"
29:
1.22 schwarze 30: #define DATESIZE 32
31:
1.14 schwarze 32: static int a2time(time_t *, const char *, const char *);
1.22 schwarze 33: static char *time2a(time_t);
1.5 schwarze 34:
1.1 schwarze 35: int
1.14 schwarze 36: mandoc_special(char *p)
1.1 schwarze 37: {
1.16 schwarze 38: int len, i;
39: char term;
1.14 schwarze 40: char *sv;
1.1 schwarze 41:
1.16 schwarze 42: len = 0;
43: term = '\0';
1.14 schwarze 44: sv = p;
45:
1.16 schwarze 46: assert('\\' == *p);
47: p++;
1.1 schwarze 48:
1.16 schwarze 49: switch (*p++) {
50: #if 0
51: case ('Z'):
52: /* FALLTHROUGH */
53: case ('X'):
54: /* FALLTHROUGH */
55: case ('x'):
1.1 schwarze 56: /* FALLTHROUGH */
1.16 schwarze 57: case ('S'):
1.14 schwarze 58: /* FALLTHROUGH */
1.16 schwarze 59: case ('R'):
1.1 schwarze 60: /* FALLTHROUGH */
1.16 schwarze 61: case ('N'):
1.1 schwarze 62: /* FALLTHROUGH */
1.16 schwarze 63: case ('l'):
1.1 schwarze 64: /* FALLTHROUGH */
1.16 schwarze 65: case ('L'):
1.1 schwarze 66: /* FALLTHROUGH */
1.16 schwarze 67: case ('H'):
1.13 schwarze 68: /* FALLTHROUGH */
1.16 schwarze 69: case ('h'):
1.1 schwarze 70: /* FALLTHROUGH */
1.16 schwarze 71: case ('D'):
1.1 schwarze 72: /* FALLTHROUGH */
1.16 schwarze 73: case ('C'):
1.1 schwarze 74: /* FALLTHROUGH */
1.16 schwarze 75: case ('b'):
1.1 schwarze 76: /* FALLTHROUGH */
1.16 schwarze 77: case ('B'):
1.1 schwarze 78: /* FALLTHROUGH */
1.16 schwarze 79: case ('a'):
1.14 schwarze 80: /* FALLTHROUGH */
1.16 schwarze 81: case ('A'):
82: if (*p++ != '\'')
83: return(0);
84: term = '\'';
85: break;
86: #endif
1.17 schwarze 87: case ('h'):
88: /* FALLTHROUGH */
89: case ('v'):
90: /* FALLTHROUGH */
1.6 schwarze 91: case ('s'):
1.16 schwarze 92: if (ASCII_HYPH == *p)
93: *p = '-';
1.17 schwarze 94:
95: i = 0;
96: if ('+' == *p || '-' == *p) {
1.16 schwarze 97: p++;
1.17 schwarze 98: i = 1;
99: }
1.6 schwarze 100:
1.16 schwarze 101: switch (*p++) {
102: case ('('):
103: len = 2;
104: break;
105: case ('['):
106: term = ']';
107: break;
108: case ('\''):
109: term = '\'';
110: break;
111: case ('0'):
1.17 schwarze 112: i = 1;
1.16 schwarze 113: /* FALLTHROUGH */
114: default:
115: len = 1;
116: p--;
117: break;
1.6 schwarze 118: }
119:
1.16 schwarze 120: if (ASCII_HYPH == *p)
121: *p = '-';
122: if ('+' == *p || '-' == *p) {
1.17 schwarze 123: if (i)
1.16 schwarze 124: return(0);
125: p++;
126: }
127:
1.19 schwarze 128: /* Handle embedded numerical subexp or escape. */
129:
130: if ('(' == *p) {
131: while (*p && ')' != *p)
132: if ('\\' == *p++) {
133: i = mandoc_special(--p);
134: if (0 == i)
135: return(0);
136: p += i;
137: }
138:
139: if (')' == *p++)
140: break;
141:
142: return(0);
143: } else if ('\\' == *p) {
144: if (0 == (i = mandoc_special(p)))
145: return(0);
146: p += i;
147: }
148:
1.16 schwarze 149: break;
150: #if 0
151: case ('Y'):
152: /* FALLTHROUGH */
153: case ('V'):
154: /* FALLTHROUGH */
155: case ('$'):
156: /* FALLTHROUGH */
157: case ('n'):
158: /* FALLTHROUGH */
1.19 schwarze 159: #endif
1.16 schwarze 160: case ('k'):
161: /* FALLTHROUGH */
162: case ('M'):
163: /* FALLTHROUGH */
164: case ('m'):
165: /* FALLTHROUGH */
1.8 schwarze 166: case ('f'):
167: /* FALLTHROUGH */
168: case ('F'):
169: /* FALLTHROUGH */
1.1 schwarze 170: case ('*'):
1.16 schwarze 171: switch (*p++) {
1.1 schwarze 172: case ('('):
1.16 schwarze 173: len = 2;
174: break;
1.1 schwarze 175: case ('['):
1.16 schwarze 176: term = ']';
177: break;
1.1 schwarze 178: default:
1.16 schwarze 179: len = 1;
180: p--;
1.1 schwarze 181: break;
182: }
1.16 schwarze 183: break;
1.1 schwarze 184: case ('('):
1.16 schwarze 185: len = 2;
186: break;
1.1 schwarze 187: case ('['):
1.16 schwarze 188: term = ']';
1.1 schwarze 189: break;
1.19 schwarze 190: case ('z'):
191: len = 1;
192: if ('\\' == *p) {
193: if (0 == (i = mandoc_special(p)))
194: return(0);
195: p += i;
196: return(*p ? (int)(p - sv) : 0);
197: }
198: break;
199: case ('o'):
200: /* FALLTHROUGH */
201: case ('w'):
202: if ('\'' == *p++) {
203: term = '\'';
204: break;
205: }
206: /* FALLTHROUGH */
1.1 schwarze 207: default:
1.16 schwarze 208: len = 1;
209: p--;
210: break;
1.1 schwarze 211: }
212:
1.16 schwarze 213: if (term) {
214: for ( ; *p && term != *p; p++)
215: if (ASCII_HYPH == *p)
216: *p = '-';
217: return(*p ? (int)(p - sv) : 0);
218: }
1.1 schwarze 219:
1.16 schwarze 220: for (i = 0; *p && i < len; i++, p++)
221: if (ASCII_HYPH == *p)
222: *p = '-';
223: return(i == len ? (int)(p - sv) : 0);
1.1 schwarze 224: }
225:
1.4 schwarze 226:
227: void *
228: mandoc_calloc(size_t num, size_t size)
229: {
230: void *ptr;
231:
232: ptr = calloc(num, size);
233: if (NULL == ptr) {
234: perror(NULL);
1.20 schwarze 235: exit((int)MANDOCLEVEL_SYSERR);
1.4 schwarze 236: }
237:
238: return(ptr);
239: }
240:
241:
242: void *
243: mandoc_malloc(size_t size)
244: {
245: void *ptr;
246:
247: ptr = malloc(size);
248: if (NULL == ptr) {
249: perror(NULL);
1.20 schwarze 250: exit((int)MANDOCLEVEL_SYSERR);
1.4 schwarze 251: }
252:
253: return(ptr);
254: }
255:
256:
257: void *
258: mandoc_realloc(void *ptr, size_t size)
259: {
260:
261: ptr = realloc(ptr, size);
262: if (NULL == ptr) {
263: perror(NULL);
1.20 schwarze 264: exit((int)MANDOCLEVEL_SYSERR);
1.4 schwarze 265: }
266:
267: return(ptr);
268: }
269:
270:
271: char *
272: mandoc_strdup(const char *ptr)
273: {
274: char *p;
275:
276: p = strdup(ptr);
277: if (NULL == p) {
278: perror(NULL);
1.20 schwarze 279: exit((int)MANDOCLEVEL_SYSERR);
1.4 schwarze 280: }
281:
282: return(p);
1.21 schwarze 283: }
284:
285: /*
286: * Parse a quoted or unquoted roff-style request or macro argument.
287: * Return a pointer to the parsed argument, which is either the original
288: * pointer or advanced by one byte in case the argument is quoted.
289: * Null-terminate the argument in place.
290: * Collapse pairs of quotes inside quoted arguments.
291: * Advance the argument pointer to the next argument,
292: * or to the null byte terminating the argument line.
293: */
294: char *
1.25 ! schwarze 295: mandoc_getarg(struct mparse *parse, char **cpp, int ln, int *pos)
1.21 schwarze 296: {
297: char *start, *cp;
298: int quoted, pairs, white;
299:
300: /* Quoting can only start with a new word. */
301: start = *cpp;
302: if ('"' == *start) {
303: quoted = 1;
304: start++;
305: } else
306: quoted = 0;
307:
308: pairs = 0;
309: white = 0;
310: for (cp = start; '\0' != *cp; cp++) {
311: /* Move left after quoted quotes and escaped backslashes. */
312: if (pairs)
313: cp[-pairs] = cp[0];
314: if ('\\' == cp[0]) {
315: if ('\\' == cp[1]) {
316: /* Poor man's copy mode. */
317: pairs++;
318: cp++;
319: } else if (0 == quoted && ' ' == cp[1])
320: /* Skip escaped blanks. */
321: cp++;
322: } else if (0 == quoted) {
323: if (' ' == cp[0]) {
324: /* Unescaped blanks end unquoted args. */
325: white = 1;
326: break;
327: }
328: } else if ('"' == cp[0]) {
329: if ('"' == cp[1]) {
330: /* Quoted quotes collapse. */
331: pairs++;
332: cp++;
333: } else {
334: /* Unquoted quotes end quoted args. */
335: quoted = 2;
336: break;
337: }
338: }
339: }
340:
341: /* Quoted argument without a closing quote. */
1.25 ! schwarze 342: if (1 == quoted)
! 343: mandoc_msg(MANDOCERR_BADQUOTE, parse, ln, *pos, NULL);
1.21 schwarze 344:
345: /* Null-terminate this argument and move to the next one. */
346: if (pairs)
347: cp[-pairs] = '\0';
348: if ('\0' != *cp) {
349: *cp++ = '\0';
350: while (' ' == *cp)
351: cp++;
352: }
1.24 schwarze 353: *pos += (int)(cp - start) + (quoted ? 1 : 0);
1.21 schwarze 354: *cpp = cp;
355:
1.25 ! schwarze 356: if ('\0' == *cp && (white || ' ' == cp[-1]))
! 357: mandoc_msg(MANDOCERR_EOLNSPACE, parse, ln, *pos, NULL);
1.21 schwarze 358:
359: return(start);
1.4 schwarze 360: }
1.5 schwarze 361:
362: static int
363: a2time(time_t *t, const char *fmt, const char *p)
364: {
365: struct tm tm;
366: char *pp;
367:
368: memset(&tm, 0, sizeof(struct tm));
369:
370: pp = strptime(p, fmt, &tm);
371: if (NULL != pp && '\0' == *pp) {
372: *t = mktime(&tm);
373: return(1);
374: }
375:
376: return(0);
377: }
378:
1.22 schwarze 379: static char *
380: time2a(time_t t)
381: {
382: struct tm tm;
1.23 schwarze 383: char *buf, *p;
384: size_t ssz;
1.22 schwarze 385: int isz;
386:
387: localtime_r(&t, &tm);
388:
1.23 schwarze 389: /*
390: * Reserve space:
391: * up to 9 characters for the month (September) + blank
392: * up to 2 characters for the day + comma + blank
393: * 4 characters for the year and a terminating '\0'
394: */
395: p = buf = mandoc_malloc(10 + 4 + 4 + 1);
396:
397: if (0 == (ssz = strftime(p, 10 + 1, "%B ", &tm)))
398: goto fail;
399: p += (int)ssz;
1.22 schwarze 400:
1.23 schwarze 401: if (-1 == (isz = snprintf(p, 4 + 1, "%d, ", tm.tm_mday)))
402: goto fail;
1.22 schwarze 403: p += isz;
404:
1.23 schwarze 405: if (0 == strftime(p, 4 + 1, "%Y", &tm))
406: goto fail;
407: return(buf);
408:
409: fail:
410: free(buf);
411: return(NULL);
1.22 schwarze 412: }
413:
414: char *
1.25 ! schwarze 415: mandoc_normdate(struct mparse *parse, char *in, int ln, int pos)
1.5 schwarze 416: {
1.22 schwarze 417: char *out;
1.5 schwarze 418: time_t t;
419:
1.22 schwarze 420: if (NULL == in || '\0' == *in ||
421: 0 == strcmp(in, "$" "Mdocdate$")) {
1.25 ! schwarze 422: mandoc_msg(MANDOCERR_NODATE, parse, ln, pos, NULL);
1.22 schwarze 423: time(&t);
424: }
425: else if (!a2time(&t, "$" "Mdocdate: %b %d %Y $", in) &&
426: !a2time(&t, "%b %d, %Y", in) &&
427: !a2time(&t, "%Y-%m-%d", in)) {
1.25 ! schwarze 428: mandoc_msg(MANDOCERR_BADDATE, parse, ln, pos, NULL);
1.22 schwarze 429: t = 0;
1.5 schwarze 430: }
1.22 schwarze 431: out = t ? time2a(t) : NULL;
1.23 schwarze 432: return(out ? out : mandoc_strdup(in));
1.5 schwarze 433: }
434:
1.9 schwarze 435: int
1.15 schwarze 436: mandoc_eos(const char *p, size_t sz, int enclosed)
1.9 schwarze 437: {
1.15 schwarze 438: const char *q;
1.16 schwarze 439: int found;
1.9 schwarze 440:
1.10 schwarze 441: if (0 == sz)
442: return(0);
1.9 schwarze 443:
1.11 schwarze 444: /*
445: * End-of-sentence recognition must include situations where
446: * some symbols, such as `)', allow prior EOS punctuation to
447: * propogate outward.
448: */
449:
1.16 schwarze 450: found = 0;
451: for (q = p + (int)sz - 1; q >= p; q--) {
1.15 schwarze 452: switch (*q) {
1.11 schwarze 453: case ('\"'):
454: /* FALLTHROUGH */
455: case ('\''):
456: /* FALLTHROUGH */
457: case (']'):
458: /* FALLTHROUGH */
459: case (')'):
1.15 schwarze 460: if (0 == found)
461: enclosed = 1;
1.11 schwarze 462: break;
463: case ('.'):
464: /* FALLTHROUGH */
465: case ('!'):
466: /* FALLTHROUGH */
467: case ('?'):
1.15 schwarze 468: found = 1;
469: break;
1.11 schwarze 470: default:
1.20 schwarze 471: return(found && (!enclosed || isalnum((unsigned char)*q)));
1.11 schwarze 472: }
1.9 schwarze 473: }
474:
1.15 schwarze 475: return(found && !enclosed);
1.12 schwarze 476: }
477:
478: int
479: mandoc_hyph(const char *start, const char *c)
480: {
481:
482: /*
483: * Choose whether to break at a hyphenated character. We only
484: * do this if it's free-standing within a word.
485: */
486:
487: /* Skip first/last character of buffer. */
488: if (c == start || '\0' == *(c + 1))
489: return(0);
490: /* Skip first/last character of word. */
491: if ('\t' == *(c + 1) || '\t' == *(c - 1))
492: return(0);
493: if (' ' == *(c + 1) || ' ' == *(c - 1))
494: return(0);
495: /* Skip double invocations. */
496: if ('-' == *(c + 1) || '-' == *(c - 1))
497: return(0);
498: /* Skip escapes. */
499: if ('\\' == *(c - 1))
500: return(0);
501:
502: return(1);
1.24 schwarze 503: }
504:
505: /*
1.25 ! schwarze 506: * Find out whether a line is a macro line or not. If it is, adjust the
! 507: * current position and return one; if it isn't, return zero and don't
! 508: * change the current position.
1.24 schwarze 509: */
1.25 ! schwarze 510: int
! 511: mandoc_getcontrol(const char *cp, int *ppos)
1.24 schwarze 512: {
1.25 ! schwarze 513: int pos;
1.24 schwarze 514:
1.25 ! schwarze 515: pos = *ppos;
1.24 schwarze 516:
1.25 ! schwarze 517: if ('\\' == cp[pos] && '.' == cp[pos + 1])
! 518: pos += 2;
! 519: else if ('.' == cp[pos] || '\'' == cp[pos])
! 520: pos++;
! 521: else
! 522: return(0);
1.24 schwarze 523:
1.25 ! schwarze 524: while (' ' == cp[pos] || '\t' == cp[pos])
! 525: pos++;
1.24 schwarze 526:
1.25 ! schwarze 527: *ppos = pos;
! 528: return(1);
1.9 schwarze 529: }