Annotation of src/usr.bin/mandoc/mandoc.c, Revision 1.14
1.14 ! schwarze 1: /* $Id: mandoc.c,v 1.13 2010/06/06 20:30:08 schwarze Exp $ */
1.1 schwarze 2: /*
1.14 ! schwarze 3: * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@bsd.lv>
1.1 schwarze 4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
1.2 schwarze 17: #include <sys/types.h>
18:
1.1 schwarze 19: #include <assert.h>
20: #include <ctype.h>
21: #include <stdlib.h>
1.4 schwarze 22: #include <stdio.h>
23: #include <string.h>
1.5 schwarze 24: #include <time.h>
1.1 schwarze 25:
1.14 ! schwarze 26: #include "mandoc.h"
1.1 schwarze 27: #include "libmandoc.h"
28:
1.14 ! schwarze 29: static int a2time(time_t *, const char *, const char *);
! 30: static int spec_norm(char *, int);
! 31:
! 32:
! 33: /*
! 34: * "Normalise" a special string by converting its ASCII_HYPH entries
! 35: * into actual hyphens.
! 36: */
! 37: static int
! 38: spec_norm(char *p, int sz)
! 39: {
! 40: int i;
! 41:
! 42: for (i = 0; i < sz; i++)
! 43: if (ASCII_HYPH == p[i])
! 44: p[i] = '-';
! 45:
! 46: return(sz);
! 47: }
1.5 schwarze 48:
49:
1.1 schwarze 50: int
1.14 ! schwarze 51: mandoc_special(char *p)
1.1 schwarze 52: {
1.6 schwarze 53: int terminator; /* Terminator for \s. */
54: int lim; /* Limit for N in \s. */
55: int c, i;
1.14 ! schwarze 56: char *sv;
1.1 schwarze 57:
1.14 ! schwarze 58: sv = p;
! 59:
1.1 schwarze 60: if ('\\' != *p++)
1.14 ! schwarze 61: return(spec_norm(sv, 0));
1.1 schwarze 62:
63: switch (*p) {
64: case ('\''):
65: /* FALLTHROUGH */
66: case ('`'):
67: /* FALLTHROUGH */
68: case ('q'):
69: /* FALLTHROUGH */
1.14 ! schwarze 70: case (ASCII_HYPH):
! 71: /* FALLTHROUGH */
1.1 schwarze 72: case ('-'):
73: /* FALLTHROUGH */
74: case ('~'):
75: /* FALLTHROUGH */
76: case ('^'):
77: /* FALLTHROUGH */
78: case ('%'):
79: /* FALLTHROUGH */
80: case ('0'):
81: /* FALLTHROUGH */
82: case (' '):
1.13 schwarze 83: /* FALLTHROUGH */
84: case ('}'):
1.1 schwarze 85: /* FALLTHROUGH */
86: case ('|'):
87: /* FALLTHROUGH */
88: case ('&'):
89: /* FALLTHROUGH */
90: case ('.'):
91: /* FALLTHROUGH */
92: case (':'):
93: /* FALLTHROUGH */
1.3 schwarze 94: case ('c'):
1.14 ! schwarze 95: /* FALLTHROUGH */
1.1 schwarze 96: case ('e'):
1.14 ! schwarze 97: return(spec_norm(sv, 2));
1.6 schwarze 98: case ('s'):
99: if ('\0' == *++p)
1.14 ! schwarze 100: return(spec_norm(sv, 2));
1.6 schwarze 101:
102: c = 2;
103: terminator = 0;
104: lim = 1;
105:
106: if (*p == '\'') {
107: lim = 0;
108: terminator = 1;
109: ++p;
110: ++c;
111: } else if (*p == '[') {
112: lim = 0;
113: terminator = 2;
114: ++p;
115: ++c;
116: } else if (*p == '(') {
117: lim = 2;
118: terminator = 3;
119: ++p;
120: ++c;
121: }
122:
123: if (*p == '+' || *p == '-') {
124: ++p;
125: ++c;
126: }
127:
128: if (*p == '\'') {
129: if (terminator)
1.14 ! schwarze 130: return(spec_norm(sv, 0));
1.6 schwarze 131: lim = 0;
132: terminator = 1;
133: ++p;
134: ++c;
135: } else if (*p == '[') {
136: if (terminator)
1.14 ! schwarze 137: return(spec_norm(sv, 0));
1.6 schwarze 138: lim = 0;
139: terminator = 2;
140: ++p;
141: ++c;
142: } else if (*p == '(') {
143: if (terminator)
1.14 ! schwarze 144: return(spec_norm(sv, 0));
1.6 schwarze 145: lim = 2;
146: terminator = 3;
147: ++p;
148: ++c;
149: }
150:
151: /* TODO: needs to handle floating point. */
152:
153: if ( ! isdigit((u_char)*p))
1.14 ! schwarze 154: return(spec_norm(sv, 0));
1.6 schwarze 155:
156: for (i = 0; isdigit((u_char)*p); i++) {
157: if (lim && i >= lim)
158: break;
159: ++p;
160: ++c;
161: }
162:
163: if (terminator && terminator < 3) {
164: if (1 == terminator && *p != '\'')
1.14 ! schwarze 165: return(spec_norm(sv, 0));
1.6 schwarze 166: if (2 == terminator && *p != ']')
1.14 ! schwarze 167: return(spec_norm(sv, 0));
1.6 schwarze 168: ++p;
169: ++c;
170: }
171:
1.14 ! schwarze 172: return(spec_norm(sv, c));
1.8 schwarze 173: case ('f'):
174: /* FALLTHROUGH */
175: case ('F'):
176: /* FALLTHROUGH */
1.1 schwarze 177: case ('*'):
1.14 ! schwarze 178: if ('\0' == *++p || isspace((u_char)*p))
! 179: return(spec_norm(sv, 0));
1.1 schwarze 180: switch (*p) {
181: case ('('):
1.14 ! schwarze 182: if ('\0' == *++p || isspace((u_char)*p))
! 183: return(spec_norm(sv, 0));
! 184: return(spec_norm(sv, 4));
1.1 schwarze 185: case ('['):
186: for (c = 3, p++; *p && ']' != *p; p++, c++)
1.14 ! schwarze 187: if (isspace((u_char)*p))
1.1 schwarze 188: break;
1.14 ! schwarze 189: return(spec_norm(sv, *p == ']' ? c : 0));
1.1 schwarze 190: default:
191: break;
192: }
1.14 ! schwarze 193: return(spec_norm(sv, 3));
1.1 schwarze 194: case ('('):
1.14 ! schwarze 195: if ('\0' == *++p || isspace((u_char)*p))
! 196: return(spec_norm(sv, 0));
! 197: if ('\0' == *++p || isspace((u_char)*p))
! 198: return(spec_norm(sv, 0));
! 199: return(spec_norm(sv, 4));
1.1 schwarze 200: case ('['):
201: break;
202: default:
1.14 ! schwarze 203: return(spec_norm(sv, 0));
1.1 schwarze 204: }
205:
206: for (c = 3, p++; *p && ']' != *p; p++, c++)
1.14 ! schwarze 207: if (isspace((u_char)*p))
1.1 schwarze 208: break;
209:
1.14 ! schwarze 210: return(spec_norm(sv, *p == ']' ? c : 0));
1.1 schwarze 211: }
212:
1.4 schwarze 213:
214: void *
215: mandoc_calloc(size_t num, size_t size)
216: {
217: void *ptr;
218:
219: ptr = calloc(num, size);
220: if (NULL == ptr) {
221: perror(NULL);
222: exit(EXIT_FAILURE);
223: }
224:
225: return(ptr);
226: }
227:
228:
229: void *
230: mandoc_malloc(size_t size)
231: {
232: void *ptr;
233:
234: ptr = malloc(size);
235: if (NULL == ptr) {
236: perror(NULL);
237: exit(EXIT_FAILURE);
238: }
239:
240: return(ptr);
241: }
242:
243:
244: void *
245: mandoc_realloc(void *ptr, size_t size)
246: {
247:
248: ptr = realloc(ptr, size);
249: if (NULL == ptr) {
250: perror(NULL);
251: exit(EXIT_FAILURE);
252: }
253:
254: return(ptr);
255: }
256:
257:
258: char *
259: mandoc_strdup(const char *ptr)
260: {
261: char *p;
262:
263: p = strdup(ptr);
264: if (NULL == p) {
265: perror(NULL);
266: exit(EXIT_FAILURE);
267: }
268:
269: return(p);
270: }
1.5 schwarze 271:
272:
273: static int
274: a2time(time_t *t, const char *fmt, const char *p)
275: {
276: struct tm tm;
277: char *pp;
278:
279: memset(&tm, 0, sizeof(struct tm));
280:
281: pp = strptime(p, fmt, &tm);
282: if (NULL != pp && '\0' == *pp) {
283: *t = mktime(&tm);
284: return(1);
285: }
286:
287: return(0);
288: }
289:
290:
291: /*
292: * Convert from a manual date string (see mdoc(7) and man(7)) into a
293: * date according to the stipulated date type.
294: */
295: time_t
296: mandoc_a2time(int flags, const char *p)
297: {
298: time_t t;
299:
300: if (MTIME_MDOCDATE & flags) {
301: if (0 == strcmp(p, "$" "Mdocdate$"))
302: return(time(NULL));
303: if (a2time(&t, "$" "Mdocdate: %b %d %Y $", p))
304: return(t);
305: }
306:
307: if (MTIME_CANONICAL & flags || MTIME_REDUCED & flags)
308: if (a2time(&t, "%b %d, %Y", p))
309: return(t);
310:
311: if (MTIME_ISO_8601 & flags)
312: if (a2time(&t, "%Y-%m-%d", p))
313: return(t);
314:
315: if (MTIME_REDUCED & flags) {
316: if (a2time(&t, "%d, %Y", p))
317: return(t);
318: if (a2time(&t, "%Y", p))
319: return(t);
320: }
321:
322: return(0);
323: }
324:
1.9 schwarze 325:
326: int
327: mandoc_eos(const char *p, size_t sz)
328: {
329:
1.10 schwarze 330: if (0 == sz)
331: return(0);
1.9 schwarze 332:
1.11 schwarze 333: /*
334: * End-of-sentence recognition must include situations where
335: * some symbols, such as `)', allow prior EOS punctuation to
336: * propogate outward.
337: */
338:
339: for ( ; sz; sz--) {
340: switch (p[(int)sz - 1]) {
341: case ('\"'):
342: /* FALLTHROUGH */
343: case ('\''):
344: /* FALLTHROUGH */
345: case (']'):
346: /* FALLTHROUGH */
347: case (')'):
348: break;
349: case ('.'):
350: /* Escaped periods. */
351: if (sz > 1 && '\\' == p[(int)sz - 2])
352: return(0);
353: /* FALLTHROUGH */
354: case ('!'):
355: /* FALLTHROUGH */
356: case ('?'):
357: return(1);
358: default:
1.9 schwarze 359: return(0);
1.11 schwarze 360: }
1.9 schwarze 361: }
362:
1.11 schwarze 363: return(0);
1.12 schwarze 364: }
365:
366:
367: int
368: mandoc_hyph(const char *start, const char *c)
369: {
370:
371: /*
372: * Choose whether to break at a hyphenated character. We only
373: * do this if it's free-standing within a word.
374: */
375:
376: /* Skip first/last character of buffer. */
377: if (c == start || '\0' == *(c + 1))
378: return(0);
379: /* Skip first/last character of word. */
380: if ('\t' == *(c + 1) || '\t' == *(c - 1))
381: return(0);
382: if (' ' == *(c + 1) || ' ' == *(c - 1))
383: return(0);
384: /* Skip double invocations. */
385: if ('-' == *(c + 1) || '-' == *(c - 1))
386: return(0);
387: /* Skip escapes. */
388: if ('\\' == *(c - 1))
389: return(0);
390:
391: return(1);
1.9 schwarze 392: }