Annotation of src/usr.bin/mandoc/chars.c, Revision 1.12
1.12 ! schwarze 1: /* $Id: chars.c,v 1.11 2010/08/18 02:46:37 schwarze Exp $ */
1.1 schwarze 2: /*
1.9 schwarze 3: * Copyright (c) 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
1.1 schwarze 4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <assert.h>
1.3 schwarze 18: #include <stdio.h>
1.1 schwarze 19: #include <stdlib.h>
20: #include <string.h>
21:
1.7 schwarze 22: #include "mandoc.h"
1.1 schwarze 23: #include "chars.h"
24:
25: #define PRINT_HI 126
26: #define PRINT_LO 32
27:
28: struct ln {
29: struct ln *next;
30: const char *code;
31: const char *ascii;
1.9 schwarze 32: int unicode;
1.1 schwarze 33: int type;
34: #define CHARS_CHAR (1 << 0)
35: #define CHARS_STRING (1 << 1)
1.4 schwarze 36: #define CHARS_BOTH (CHARS_CHAR | CHARS_STRING)
1.1 schwarze 37: };
38:
1.11 schwarze 39: #define LINES_MAX 369
1.1 schwarze 40:
1.10 schwarze 41: #define CHAR(in, ch, code) \
42: { NULL, (in), (ch), (code), CHARS_CHAR },
43: #define STRING(in, ch, code) \
44: { NULL, (in), (ch), (code), CHARS_STRING },
45: #define BOTH(in, ch, code) \
46: { NULL, (in), (ch), (code), CHARS_BOTH },
1.1 schwarze 47:
1.4 schwarze 48: #define CHAR_TBL_START static struct ln lines[LINES_MAX] = {
49: #define CHAR_TBL_END };
50:
1.1 schwarze 51: #include "chars.in"
52:
53: struct tbl {
54: enum chars type;
55: struct ln **htab;
56: };
57:
58: static inline int match(const struct ln *,
59: const char *, size_t, int);
1.9 schwarze 60: static const struct ln *find(struct tbl *, const char *, size_t, int);
1.1 schwarze 61:
62:
63: void
64: chars_free(void *arg)
65: {
66: struct tbl *tab;
67:
68: tab = (struct tbl *)arg;
69:
70: free(tab->htab);
71: free(tab);
72: }
73:
74:
75: void *
76: chars_init(enum chars type)
77: {
78: struct tbl *tab;
79: struct ln **htab;
80: struct ln *pp;
81: int i, hash;
82:
83: /*
84: * Constructs a very basic chaining hashtable. The hash routine
85: * is simply the integral value of the first character.
86: * Subsequent entries are chained in the order they're processed
87: * (they're in-line re-ordered during lookup).
88: */
89:
1.3 schwarze 90: tab = malloc(sizeof(struct tbl));
91: if (NULL == tab) {
92: perror(NULL);
1.12 ! schwarze 93: exit(MANDOCLEVEL_SYSERR);
1.3 schwarze 94: }
1.1 schwarze 95:
96: htab = calloc(PRINT_HI - PRINT_LO + 1, sizeof(struct ln **));
1.3 schwarze 97: if (NULL == htab) {
98: perror(NULL);
1.12 ! schwarze 99: exit(MANDOCLEVEL_SYSERR);
1.3 schwarze 100: }
1.1 schwarze 101:
102: for (i = 0; i < LINES_MAX; i++) {
103: hash = (int)lines[i].code[0] - PRINT_LO;
104:
105: if (NULL == (pp = htab[hash])) {
106: htab[hash] = &lines[i];
107: continue;
108: }
109:
110: for ( ; pp->next; pp = pp->next)
111: /* Scan ahead. */ ;
112: pp->next = &lines[i];
113: }
114:
115: tab->htab = htab;
1.3 schwarze 116: tab->type = type;
1.1 schwarze 117: return(tab);
118: }
119:
120:
1.9 schwarze 121: /*
122: * Special character to Unicode codepoint.
123: */
124: int
125: chars_spec2cp(void *arg, const char *p, size_t sz)
126: {
127: const struct ln *ln;
128:
129: ln = find((struct tbl *)arg, p, sz, CHARS_CHAR);
130: if (NULL == ln)
131: return(-1);
132: return(ln->unicode);
133: }
134:
135:
136: /*
137: * Reserved word to Unicode codepoint.
138: */
139: int
140: chars_res2cp(void *arg, const char *p, size_t sz)
141: {
142: const struct ln *ln;
143:
144: ln = find((struct tbl *)arg, p, sz, CHARS_STRING);
145: if (NULL == ln)
146: return(-1);
147: return(ln->unicode);
148: }
149:
150:
151: /*
152: * Special character to string array.
153: */
1.1 schwarze 154: const char *
1.9 schwarze 155: chars_spec2str(void *arg, const char *p, size_t sz, size_t *rsz)
1.1 schwarze 156: {
1.9 schwarze 157: const struct ln *ln;
158:
159: ln = find((struct tbl *)arg, p, sz, CHARS_CHAR);
160: if (NULL == ln)
161: return(NULL);
1.1 schwarze 162:
1.10 schwarze 163: *rsz = strlen(ln->ascii);
1.9 schwarze 164: return(ln->ascii);
1.1 schwarze 165: }
166:
167:
1.9 schwarze 168: /*
169: * Reserved word to string array.
170: */
1.1 schwarze 171: const char *
1.9 schwarze 172: chars_res2str(void *arg, const char *p, size_t sz, size_t *rsz)
1.1 schwarze 173: {
1.9 schwarze 174: const struct ln *ln;
175:
176: ln = find((struct tbl *)arg, p, sz, CHARS_STRING);
177: if (NULL == ln)
178: return(NULL);
1.1 schwarze 179:
1.10 schwarze 180: *rsz = strlen(ln->ascii);
1.9 schwarze 181: return(ln->ascii);
1.1 schwarze 182: }
183:
184:
1.9 schwarze 185: static const struct ln *
186: find(struct tbl *tab, const char *p, size_t sz, int type)
1.1 schwarze 187: {
188: struct ln *pp, *prev;
189: struct ln **htab;
190: int hash;
191:
192: assert(p);
1.9 schwarze 193: if (0 == sz)
194: return(NULL);
1.1 schwarze 195:
196: if (p[0] < PRINT_LO || p[0] > PRINT_HI)
197: return(NULL);
198:
199: /*
200: * Lookup the symbol in the symbol hash. See ascii2htab for the
201: * hashtable specs. This dynamically re-orders the hash chain
202: * to optimise for repeat hits.
203: */
204:
205: hash = (int)p[0] - PRINT_LO;
206: htab = tab->htab;
207:
208: if (NULL == (pp = htab[hash]))
209: return(NULL);
210:
211: for (prev = NULL; pp; pp = pp->next) {
212: if ( ! match(pp, p, sz, type)) {
213: prev = pp;
214: continue;
215: }
216:
217: if (prev) {
218: prev->next = pp->next;
219: pp->next = htab[hash];
220: htab[hash] = pp;
221: }
222:
1.9 schwarze 223: return(pp);
1.1 schwarze 224: }
225:
226: return(NULL);
227: }
228:
229:
230: static inline int
231: match(const struct ln *ln, const char *p, size_t sz, int type)
232: {
233:
234: if ( ! (ln->type & type))
235: return(0);
1.9 schwarze 236: if (strncmp(ln->code, p, sz))
1.1 schwarze 237: return(0);
1.9 schwarze 238: return('\0' == ln->code[(int)sz]);
1.1 schwarze 239: }