[BACK]Return to chars.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / mandoc

Annotation of src/usr.bin/mandoc/chars.c, Revision 1.19

1.19    ! schwarze    1: /*     $Id: chars.c,v 1.18 2011/04/24 16:22:02 schwarze Exp $ */
1.1       schwarze    2: /*
1.19    ! schwarze    3:  * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
1.16      schwarze    4:  * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
1.1       schwarze    5:  *
                      6:  * Permission to use, copy, modify, and distribute this software for any
                      7:  * purpose with or without fee is hereby granted, provided that the above
                      8:  * copyright notice and this permission notice appear in all copies.
                      9:  *
                     10:  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
                     11:  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
                     12:  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
                     13:  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
                     14:  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
                     15:  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
                     16:  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
                     17:  */
                     18: #include <assert.h>
1.19    ! schwarze   19: #include <ctype.h>
1.3       schwarze   20: #include <stdio.h>
1.1       schwarze   21: #include <stdlib.h>
                     22: #include <string.h>
                     23:
1.7       schwarze   24: #include "mandoc.h"
1.19    ! schwarze   25: #include "libmandoc.h"
1.1       schwarze   26:
                     27: #define        PRINT_HI         126
                     28: #define        PRINT_LO         32
                     29:
                     30: struct ln {
                     31:        struct ln        *next;
                     32:        const char       *code;
                     33:        const char       *ascii;
1.9       schwarze   34:        int               unicode;
1.1       schwarze   35: };
                     36:
1.19    ! schwarze   37: #define        LINES_MAX         325
1.1       schwarze   38:
1.10      schwarze   39: #define CHAR(in, ch, code) \
1.19    ! schwarze   40:        { NULL, (in), (ch), (code) },
1.1       schwarze   41:
1.4       schwarze   42: #define        CHAR_TBL_START    static struct ln lines[LINES_MAX] = {
                     43: #define        CHAR_TBL_END      };
                     44:
1.1       schwarze   45: #include "chars.in"
                     46:
1.19    ! schwarze   47: struct mchars {
1.1       schwarze   48:        struct ln       **htab;
                     49: };
                     50:
1.19    ! schwarze   51: static inline int        match(const struct ln *, const char *, size_t);
        !            52: static const struct ln  *find(struct mchars *, const char *, size_t);
1.1       schwarze   53:
                     54: void
1.19    ! schwarze   55: mchars_free(struct mchars *arg)
1.1       schwarze   56: {
                     57:
1.19    ! schwarze   58:        free(arg->htab);
        !            59:        free(arg);
1.1       schwarze   60: }
                     61:
1.19    ! schwarze   62: struct mchars *
        !            63: mchars_alloc(void)
1.1       schwarze   64: {
1.19    ! schwarze   65:        struct mchars    *tab;
1.1       schwarze   66:        struct ln       **htab;
                     67:        struct ln        *pp;
                     68:        int               i, hash;
                     69:
                     70:        /*
                     71:         * Constructs a very basic chaining hashtable.  The hash routine
                     72:         * is simply the integral value of the first character.
                     73:         * Subsequent entries are chained in the order they're processed
                     74:         * (they're in-line re-ordered during lookup).
                     75:         */
                     76:
1.19    ! schwarze   77:        tab = mandoc_malloc(sizeof(struct mchars));
1.17      schwarze   78:        htab = mandoc_calloc(PRINT_HI - PRINT_LO + 1, sizeof(struct ln **));
1.1       schwarze   79:
                     80:        for (i = 0; i < LINES_MAX; i++) {
                     81:                hash = (int)lines[i].code[0] - PRINT_LO;
                     82:
                     83:                if (NULL == (pp = htab[hash])) {
                     84:                        htab[hash] = &lines[i];
                     85:                        continue;
                     86:                }
                     87:
                     88:                for ( ; pp->next; pp = pp->next)
                     89:                        /* Scan ahead. */ ;
                     90:                pp->next = &lines[i];
                     91:        }
                     92:
                     93:        tab->htab = htab;
                     94:        return(tab);
                     95: }
                     96:
                     97:
1.9       schwarze   98: /*
                     99:  * Special character to Unicode codepoint.
                    100:  */
                    101: int
1.19    ! schwarze  102: mchars_spec2cp(struct mchars *arg, const char *p, size_t sz)
1.9       schwarze  103: {
                    104:        const struct ln *ln;
                    105:
1.19    ! schwarze  106:        ln = find(arg, p, sz);
1.9       schwarze  107:        if (NULL == ln)
                    108:                return(-1);
                    109:        return(ln->unicode);
                    110: }
                    111:
1.19    ! schwarze  112: /*
        !           113:  * Numbered character string to ASCII codepoint.
        !           114:  * This can only be a printable character (i.e., alnum, punct, space) so
        !           115:  * prevent the character from ruining our state (backspace, newline, and
        !           116:  * so on).
        !           117:  * If the character is illegal, returns '\0'.
1.9       schwarze  118:  */
1.19    ! schwarze  119: char
        !           120: mchars_num2char(const char *p, size_t sz)
1.9       schwarze  121: {
1.19    ! schwarze  122:        int               i;
1.9       schwarze  123:
1.19    ! schwarze  124:        if ((i = mandoc_strntou(p, sz, 10)) < 0)
        !           125:                return('\0');
        !           126:        return(isprint(i) ? i : '\0');
1.16      schwarze  127: }
                    128:
                    129: /*
1.19    ! schwarze  130:  * Hex character string to Unicode codepoint.
        !           131:  * If the character is illegal, returns '\0'.
1.16      schwarze  132:  */
1.19    ! schwarze  133: int
        !           134: mchars_num2uc(const char *p, size_t sz)
1.16      schwarze  135: {
1.19    ! schwarze  136:        int               i;
1.16      schwarze  137:
1.19    ! schwarze  138:        if ((i = mandoc_strntou(p, sz, 16)) < 0)
        !           139:                return('\0');
        !           140:        /* FIXME: make sure we're not in a bogus range. */
        !           141:        return(i > 0x80 && i <= 0x10FFFF ? i : '\0');
1.9       schwarze  142: }
                    143:
                    144: /*
                    145:  * Special character to string array.
                    146:  */
1.1       schwarze  147: const char *
1.19    ! schwarze  148: mchars_spec2str(struct mchars *arg, const char *p, size_t sz, size_t *rsz)
1.1       schwarze  149: {
1.9       schwarze  150:        const struct ln *ln;
                    151:
1.19    ! schwarze  152:        ln = find(arg, p, sz);
1.9       schwarze  153:        if (NULL == ln)
                    154:                return(NULL);
1.1       schwarze  155:
1.10      schwarze  156:        *rsz = strlen(ln->ascii);
1.9       schwarze  157:        return(ln->ascii);
1.1       schwarze  158: }
                    159:
1.9       schwarze  160: static const struct ln *
1.19    ! schwarze  161: find(struct mchars *tab, const char *p, size_t sz)
1.1       schwarze  162: {
                    163:        struct ln        *pp, *prev;
                    164:        struct ln       **htab;
                    165:        int               hash;
                    166:
                    167:        assert(p);
1.9       schwarze  168:        if (0 == sz)
                    169:                return(NULL);
1.1       schwarze  170:
                    171:        if (p[0] < PRINT_LO || p[0] > PRINT_HI)
                    172:                return(NULL);
                    173:
                    174:        /*
                    175:         * Lookup the symbol in the symbol hash.  See ascii2htab for the
                    176:         * hashtable specs.  This dynamically re-orders the hash chain
                    177:         * to optimise for repeat hits.
                    178:         */
                    179:
                    180:        hash = (int)p[0] - PRINT_LO;
                    181:        htab = tab->htab;
                    182:
                    183:        if (NULL == (pp = htab[hash]))
                    184:                return(NULL);
                    185:
                    186:        for (prev = NULL; pp; pp = pp->next) {
1.19    ! schwarze  187:                if ( ! match(pp, p, sz)) {
1.1       schwarze  188:                        prev = pp;
                    189:                        continue;
                    190:                }
                    191:
                    192:                if (prev) {
                    193:                        prev->next = pp->next;
                    194:                        pp->next = htab[hash];
                    195:                        htab[hash] = pp;
                    196:                }
                    197:
1.9       schwarze  198:                return(pp);
1.1       schwarze  199:        }
                    200:
                    201:        return(NULL);
                    202: }
                    203:
                    204: static inline int
1.19    ! schwarze  205: match(const struct ln *ln, const char *p, size_t sz)
1.1       schwarze  206: {
                    207:
1.9       schwarze  208:        if (strncmp(ln->code, p, sz))
1.1       schwarze  209:                return(0);
1.9       schwarze  210:        return('\0' == ln->code[(int)sz]);
1.1       schwarze  211: }