Annotation of src/usr.bin/mandoc/chars.c, Revision 1.7
1.7 ! schwarze 1: /* $Id: chars.c,v 1.6 2010/03/26 01:22:05 schwarze Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2009 Kristaps Dzonsons <kristaps@kth.se>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <assert.h>
1.3 schwarze 18: #include <stdio.h>
1.1 schwarze 19: #include <stdlib.h>
20: #include <string.h>
21:
1.7 ! schwarze 22: #include "mandoc.h"
1.1 schwarze 23: #include "chars.h"
24:
25: #define PRINT_HI 126
26: #define PRINT_LO 32
27:
28: struct ln {
29: struct ln *next;
30: const char *code;
31: const char *ascii;
32: const char *html;
33: size_t codesz;
34: size_t asciisz;
35: size_t htmlsz;
36: int type;
37: #define CHARS_CHAR (1 << 0)
38: #define CHARS_STRING (1 << 1)
1.4 schwarze 39: #define CHARS_BOTH (CHARS_CHAR | CHARS_STRING)
1.1 schwarze 40: };
41:
1.6 schwarze 42: #define LINES_MAX 369
1.1 schwarze 43:
44: #define CHAR(w, x, y, z, a, b) \
45: { NULL, (w), (y), (a), (x), (z), (b), CHARS_CHAR },
46: #define STRING(w, x, y, z, a, b) \
47: { NULL, (w), (y), (a), (x), (z), (b), CHARS_STRING },
48: #define BOTH(w, x, y, z, a, b) \
49: { NULL, (w), (y), (a), (x), (z), (b), CHARS_BOTH },
50:
1.4 schwarze 51: #define CHAR_TBL_START static struct ln lines[LINES_MAX] = {
52: #define CHAR_TBL_END };
53:
1.1 schwarze 54: #include "chars.in"
55:
56: struct tbl {
57: enum chars type;
58: struct ln **htab;
59: };
60:
61: static inline int match(const struct ln *,
62: const char *, size_t, int);
63: static const char *find(struct tbl *, const char *,
64: size_t, size_t *, int);
65:
66:
67: void
68: chars_free(void *arg)
69: {
70: struct tbl *tab;
71:
72: tab = (struct tbl *)arg;
73:
74: free(tab->htab);
75: free(tab);
76: }
77:
78:
79: void *
80: chars_init(enum chars type)
81: {
82: struct tbl *tab;
83: struct ln **htab;
84: struct ln *pp;
85: int i, hash;
86:
87: /*
88: * Constructs a very basic chaining hashtable. The hash routine
89: * is simply the integral value of the first character.
90: * Subsequent entries are chained in the order they're processed
91: * (they're in-line re-ordered during lookup).
92: */
93:
1.3 schwarze 94: tab = malloc(sizeof(struct tbl));
95: if (NULL == tab) {
96: perror(NULL);
97: exit(EXIT_FAILURE);
98: }
1.1 schwarze 99:
100: htab = calloc(PRINT_HI - PRINT_LO + 1, sizeof(struct ln **));
1.3 schwarze 101: if (NULL == htab) {
102: perror(NULL);
103: exit(EXIT_FAILURE);
104: }
1.1 schwarze 105:
106: for (i = 0; i < LINES_MAX; i++) {
107: hash = (int)lines[i].code[0] - PRINT_LO;
108:
109: if (NULL == (pp = htab[hash])) {
110: htab[hash] = &lines[i];
111: continue;
112: }
113:
114: for ( ; pp->next; pp = pp->next)
115: /* Scan ahead. */ ;
116: pp->next = &lines[i];
117: }
118:
119: tab->htab = htab;
1.3 schwarze 120: tab->type = type;
1.1 schwarze 121: return(tab);
122: }
123:
124:
125: const char *
126: chars_a2ascii(void *arg, const char *p, size_t sz, size_t *rsz)
127: {
128:
129: return(find((struct tbl *)arg, p, sz, rsz, CHARS_CHAR));
130: }
131:
132:
133: const char *
134: chars_a2res(void *arg, const char *p, size_t sz, size_t *rsz)
135: {
136:
137: return(find((struct tbl *)arg, p, sz, rsz, CHARS_STRING));
138: }
139:
140:
141: static const char *
142: find(struct tbl *tab, const char *p, size_t sz, size_t *rsz, int type)
143: {
144: struct ln *pp, *prev;
145: struct ln **htab;
146: int hash;
147:
148: assert(p);
149: assert(sz > 0);
150:
151: if (p[0] < PRINT_LO || p[0] > PRINT_HI)
152: return(NULL);
153:
154: /*
155: * Lookup the symbol in the symbol hash. See ascii2htab for the
156: * hashtable specs. This dynamically re-orders the hash chain
157: * to optimise for repeat hits.
158: */
159:
160: hash = (int)p[0] - PRINT_LO;
161: htab = tab->htab;
162:
163: if (NULL == (pp = htab[hash]))
164: return(NULL);
165:
166: for (prev = NULL; pp; pp = pp->next) {
167: if ( ! match(pp, p, sz, type)) {
168: prev = pp;
169: continue;
170: }
171:
172: if (prev) {
173: prev->next = pp->next;
174: pp->next = htab[hash];
175: htab[hash] = pp;
176: }
177:
178: if (CHARS_HTML == tab->type) {
179: *rsz = pp->htmlsz;
180: return(pp->html);
181: }
182: *rsz = pp->asciisz;
183: return(pp->ascii);
184: }
185:
186: return(NULL);
187: }
188:
189:
190: static inline int
191: match(const struct ln *ln, const char *p, size_t sz, int type)
192: {
193:
194: if ( ! (ln->type & type))
195: return(0);
196: if (ln->codesz != sz)
197: return(0);
198: return(0 == strncmp(ln->code, p, sz));
199: }