Annotation of src/usr.bin/mandoc/mandoc.c, Revision 1.15
1.15 ! schwarze 1: /* $Id: mandoc.c,v 1.14 2010/06/26 17:56:43 schwarze Exp $ */
1.1 schwarze 2: /*
1.15 ! schwarze 3: * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
1.1 schwarze 4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
1.2 schwarze 17: #include <sys/types.h>
18:
1.1 schwarze 19: #include <assert.h>
20: #include <ctype.h>
21: #include <stdlib.h>
1.4 schwarze 22: #include <stdio.h>
23: #include <string.h>
1.5 schwarze 24: #include <time.h>
1.1 schwarze 25:
1.14 schwarze 26: #include "mandoc.h"
1.1 schwarze 27: #include "libmandoc.h"
28:
1.14 schwarze 29: static int a2time(time_t *, const char *, const char *);
30: static int spec_norm(char *, int);
31:
32:
33: /*
34: * "Normalise" a special string by converting its ASCII_HYPH entries
35: * into actual hyphens.
36: */
37: static int
38: spec_norm(char *p, int sz)
39: {
40: int i;
41:
42: for (i = 0; i < sz; i++)
43: if (ASCII_HYPH == p[i])
44: p[i] = '-';
45:
46: return(sz);
47: }
1.5 schwarze 48:
49:
1.1 schwarze 50: int
1.14 schwarze 51: mandoc_special(char *p)
1.1 schwarze 52: {
1.6 schwarze 53: int terminator; /* Terminator for \s. */
54: int lim; /* Limit for N in \s. */
55: int c, i;
1.14 schwarze 56: char *sv;
1.1 schwarze 57:
1.14 schwarze 58: sv = p;
59:
1.1 schwarze 60: if ('\\' != *p++)
1.14 schwarze 61: return(spec_norm(sv, 0));
1.1 schwarze 62:
63: switch (*p) {
64: case ('\''):
65: /* FALLTHROUGH */
66: case ('`'):
67: /* FALLTHROUGH */
68: case ('q'):
69: /* FALLTHROUGH */
1.14 schwarze 70: case (ASCII_HYPH):
71: /* FALLTHROUGH */
1.1 schwarze 72: case ('-'):
73: /* FALLTHROUGH */
74: case ('~'):
75: /* FALLTHROUGH */
76: case ('^'):
77: /* FALLTHROUGH */
78: case ('%'):
79: /* FALLTHROUGH */
80: case ('0'):
81: /* FALLTHROUGH */
82: case (' '):
1.13 schwarze 83: /* FALLTHROUGH */
84: case ('}'):
1.1 schwarze 85: /* FALLTHROUGH */
86: case ('|'):
87: /* FALLTHROUGH */
88: case ('&'):
89: /* FALLTHROUGH */
90: case ('.'):
91: /* FALLTHROUGH */
92: case (':'):
93: /* FALLTHROUGH */
1.3 schwarze 94: case ('c'):
1.14 schwarze 95: /* FALLTHROUGH */
1.1 schwarze 96: case ('e'):
1.14 schwarze 97: return(spec_norm(sv, 2));
1.6 schwarze 98: case ('s'):
99: if ('\0' == *++p)
1.14 schwarze 100: return(spec_norm(sv, 2));
1.6 schwarze 101:
102: c = 2;
103: terminator = 0;
104: lim = 1;
105:
106: if (*p == '\'') {
107: lim = 0;
108: terminator = 1;
109: ++p;
110: ++c;
111: } else if (*p == '[') {
112: lim = 0;
113: terminator = 2;
114: ++p;
115: ++c;
116: } else if (*p == '(') {
117: lim = 2;
118: terminator = 3;
119: ++p;
120: ++c;
121: }
122:
123: if (*p == '+' || *p == '-') {
124: ++p;
125: ++c;
126: }
127:
128: if (*p == '\'') {
129: if (terminator)
1.14 schwarze 130: return(spec_norm(sv, 0));
1.6 schwarze 131: lim = 0;
132: terminator = 1;
133: ++p;
134: ++c;
135: } else if (*p == '[') {
136: if (terminator)
1.14 schwarze 137: return(spec_norm(sv, 0));
1.6 schwarze 138: lim = 0;
139: terminator = 2;
140: ++p;
141: ++c;
142: } else if (*p == '(') {
143: if (terminator)
1.14 schwarze 144: return(spec_norm(sv, 0));
1.6 schwarze 145: lim = 2;
146: terminator = 3;
147: ++p;
148: ++c;
149: }
150:
151: /* TODO: needs to handle floating point. */
152:
153: if ( ! isdigit((u_char)*p))
1.14 schwarze 154: return(spec_norm(sv, 0));
1.6 schwarze 155:
156: for (i = 0; isdigit((u_char)*p); i++) {
157: if (lim && i >= lim)
158: break;
159: ++p;
160: ++c;
161: }
162:
163: if (terminator && terminator < 3) {
164: if (1 == terminator && *p != '\'')
1.14 schwarze 165: return(spec_norm(sv, 0));
1.6 schwarze 166: if (2 == terminator && *p != ']')
1.14 schwarze 167: return(spec_norm(sv, 0));
1.6 schwarze 168: ++p;
169: ++c;
170: }
171:
1.14 schwarze 172: return(spec_norm(sv, c));
1.8 schwarze 173: case ('f'):
174: /* FALLTHROUGH */
175: case ('F'):
176: /* FALLTHROUGH */
1.1 schwarze 177: case ('*'):
1.14 schwarze 178: if ('\0' == *++p || isspace((u_char)*p))
179: return(spec_norm(sv, 0));
1.1 schwarze 180: switch (*p) {
181: case ('('):
1.14 schwarze 182: if ('\0' == *++p || isspace((u_char)*p))
183: return(spec_norm(sv, 0));
184: return(spec_norm(sv, 4));
1.1 schwarze 185: case ('['):
186: for (c = 3, p++; *p && ']' != *p; p++, c++)
1.14 schwarze 187: if (isspace((u_char)*p))
1.1 schwarze 188: break;
1.14 schwarze 189: return(spec_norm(sv, *p == ']' ? c : 0));
1.1 schwarze 190: default:
191: break;
192: }
1.14 schwarze 193: return(spec_norm(sv, 3));
1.1 schwarze 194: case ('('):
1.14 schwarze 195: if ('\0' == *++p || isspace((u_char)*p))
196: return(spec_norm(sv, 0));
197: if ('\0' == *++p || isspace((u_char)*p))
198: return(spec_norm(sv, 0));
199: return(spec_norm(sv, 4));
1.1 schwarze 200: case ('['):
201: break;
202: default:
1.14 schwarze 203: return(spec_norm(sv, 0));
1.1 schwarze 204: }
205:
206: for (c = 3, p++; *p && ']' != *p; p++, c++)
1.14 schwarze 207: if (isspace((u_char)*p))
1.1 schwarze 208: break;
209:
1.14 schwarze 210: return(spec_norm(sv, *p == ']' ? c : 0));
1.1 schwarze 211: }
212:
1.4 schwarze 213:
214: void *
215: mandoc_calloc(size_t num, size_t size)
216: {
217: void *ptr;
218:
219: ptr = calloc(num, size);
220: if (NULL == ptr) {
221: perror(NULL);
222: exit(EXIT_FAILURE);
223: }
224:
225: return(ptr);
226: }
227:
228:
229: void *
230: mandoc_malloc(size_t size)
231: {
232: void *ptr;
233:
234: ptr = malloc(size);
235: if (NULL == ptr) {
236: perror(NULL);
237: exit(EXIT_FAILURE);
238: }
239:
240: return(ptr);
241: }
242:
243:
244: void *
245: mandoc_realloc(void *ptr, size_t size)
246: {
247:
248: ptr = realloc(ptr, size);
249: if (NULL == ptr) {
250: perror(NULL);
251: exit(EXIT_FAILURE);
252: }
253:
254: return(ptr);
255: }
256:
257:
258: char *
259: mandoc_strdup(const char *ptr)
260: {
261: char *p;
262:
263: p = strdup(ptr);
264: if (NULL == p) {
265: perror(NULL);
266: exit(EXIT_FAILURE);
267: }
268:
269: return(p);
270: }
1.5 schwarze 271:
272:
273: static int
274: a2time(time_t *t, const char *fmt, const char *p)
275: {
276: struct tm tm;
277: char *pp;
278:
279: memset(&tm, 0, sizeof(struct tm));
280:
281: pp = strptime(p, fmt, &tm);
282: if (NULL != pp && '\0' == *pp) {
283: *t = mktime(&tm);
284: return(1);
285: }
286:
287: return(0);
288: }
289:
290:
291: /*
292: * Convert from a manual date string (see mdoc(7) and man(7)) into a
293: * date according to the stipulated date type.
294: */
295: time_t
296: mandoc_a2time(int flags, const char *p)
297: {
298: time_t t;
299:
300: if (MTIME_MDOCDATE & flags) {
301: if (0 == strcmp(p, "$" "Mdocdate$"))
302: return(time(NULL));
303: if (a2time(&t, "$" "Mdocdate: %b %d %Y $", p))
304: return(t);
305: }
306:
307: if (MTIME_CANONICAL & flags || MTIME_REDUCED & flags)
308: if (a2time(&t, "%b %d, %Y", p))
309: return(t);
310:
311: if (MTIME_ISO_8601 & flags)
312: if (a2time(&t, "%Y-%m-%d", p))
313: return(t);
314:
315: if (MTIME_REDUCED & flags) {
316: if (a2time(&t, "%d, %Y", p))
317: return(t);
318: if (a2time(&t, "%Y", p))
319: return(t);
320: }
321:
322: return(0);
323: }
324:
1.9 schwarze 325:
326: int
1.15 ! schwarze 327: mandoc_eos(const char *p, size_t sz, int enclosed)
1.9 schwarze 328: {
1.15 ! schwarze 329: const char *q;
! 330: int found = 0;
1.9 schwarze 331:
1.10 schwarze 332: if (0 == sz)
333: return(0);
1.9 schwarze 334:
1.11 schwarze 335: /*
336: * End-of-sentence recognition must include situations where
337: * some symbols, such as `)', allow prior EOS punctuation to
338: * propogate outward.
339: */
340:
1.15 ! schwarze 341: for (q = p + sz - 1; q >= p; q--) {
! 342: switch (*q) {
1.11 schwarze 343: case ('\"'):
344: /* FALLTHROUGH */
345: case ('\''):
346: /* FALLTHROUGH */
347: case (']'):
348: /* FALLTHROUGH */
349: case (')'):
1.15 ! schwarze 350: if (0 == found)
! 351: enclosed = 1;
1.11 schwarze 352: break;
353: case ('.'):
354: /* FALLTHROUGH */
355: case ('!'):
356: /* FALLTHROUGH */
357: case ('?'):
1.15 ! schwarze 358: found = 1;
! 359: break;
1.11 schwarze 360: default:
1.15 ! schwarze 361: return(found && (!enclosed || isalnum(*q)));
1.11 schwarze 362: }
1.9 schwarze 363: }
364:
1.15 ! schwarze 365: return(found && !enclosed);
1.12 schwarze 366: }
367:
368:
369: int
370: mandoc_hyph(const char *start, const char *c)
371: {
372:
373: /*
374: * Choose whether to break at a hyphenated character. We only
375: * do this if it's free-standing within a word.
376: */
377:
378: /* Skip first/last character of buffer. */
379: if (c == start || '\0' == *(c + 1))
380: return(0);
381: /* Skip first/last character of word. */
382: if ('\t' == *(c + 1) || '\t' == *(c - 1))
383: return(0);
384: if (' ' == *(c + 1) || ' ' == *(c - 1))
385: return(0);
386: /* Skip double invocations. */
387: if ('-' == *(c + 1) || '-' == *(c - 1))
388: return(0);
389: /* Skip escapes. */
390: if ('\\' == *(c - 1))
391: return(0);
392:
393: return(1);
1.9 schwarze 394: }