[BACK]Return to regexp.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / vim

Annotation of src/usr.bin/vim/regexp.c, Revision 1.1

1.1     ! downsj      1: /* $OpenBSD$   */
        !             2: /* vi:set ts=4 sw=4:
        !             3:  * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
        !             4:  *
        !             5:  * This is NOT the original regular expression code as written by
        !             6:  * Henry Spencer. This code has been modified specifically for use
        !             7:  * with the VIM editor, and should not be used apart from compiling
        !             8:  * VIM. If you want a good regular expression library, get the
        !             9:  * original code. The copyright notice that follows is from the
        !            10:  * original.
        !            11:  *
        !            12:  * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
        !            13:  *
        !            14:  *
        !            15:  * vim_regcomp and vim_regexec -- vim_regsub and regerror are elsewhere
        !            16:  *
        !            17:  *     Copyright (c) 1986 by University of Toronto.
        !            18:  *     Written by Henry Spencer.  Not derived from licensed software.
        !            19:  *
        !            20:  *     Permission is granted to anyone to use this software for any
        !            21:  *     purpose on any computer system, and to redistribute it freely,
        !            22:  *     subject to the following restrictions:
        !            23:  *
        !            24:  *     1. The author is not responsible for the consequences of use of
        !            25:  *             this software, no matter how awful, even if they arise
        !            26:  *             from defects in it.
        !            27:  *
        !            28:  *     2. The origin of this software must not be misrepresented, either
        !            29:  *             by explicit claim or by omission.
        !            30:  *
        !            31:  *     3. Altered versions must be plainly marked as such, and must not
        !            32:  *             be misrepresented as being the original software.
        !            33:  *
        !            34:  * Beware that some of this code is subtly aware of the way operator
        !            35:  * precedence is structured in regular expressions.  Serious changes in
        !            36:  * regular-expression syntax might require a total rethink.
        !            37:  *
        !            38:  * $Log:       regexp.c,v $
        !            39:  * Revision 1.2  88/04/28  08:09:45  tony
        !            40:  * First modification of the regexp library. Added an external variable
        !            41:  * 'reg_ic' which can be set to indicate that case should be ignored.
        !            42:  * Added a new parameter to vim_regexec() to indicate that the given string
        !            43:  * comes from the beginning of a line and is thus eligible to match
        !            44:  * 'beginning-of-line'.
        !            45:  *
        !            46:  * Revisions by Olaf 'Rhialto' Seibert, rhialto@mbfys.kun.nl:
        !            47:  * Changes for vi: (the semantics of several things were rather different)
        !            48:  * - Added lexical analyzer, because in vi magicness of characters
        !            49:  *   is rather difficult, and may change over time.
        !            50:  * - Added support for \< \> \1-\9 and ~
        !            51:  * - Left some magic stuff in, but only backslashed: \| \+
        !            52:  * - * and \+ still work after \) even though they shouldn't.
        !            53:  */
        !            54: #include "vim.h"
        !            55: #include "globals.h"
        !            56: #include "proto.h"
        !            57: #undef DEBUG
        !            58:
        !            59: #include <stdio.h>
        !            60: #include "regexp.h"
        !            61: #include "option.h"
        !            62:
        !            63: /*
        !            64:  * The "internal use only" fields in regexp.h are present to pass info from
        !            65:  * compile to execute that permits the execute phase to run lots faster on
        !            66:  * simple cases.  They are:
        !            67:  *
        !            68:  * regstart    char that must begin a match; '\0' if none obvious
        !            69:  * reganch     is the match anchored (at beginning-of-line only)?
        !            70:  * regmust     string (pointer into program) that match must include, or NULL
        !            71:  * regmlen     length of regmust string
        !            72:  *
        !            73:  * Regstart and reganch permit very fast decisions on suitable starting points
        !            74:  * for a match, cutting down the work a lot.  Regmust permits fast rejection
        !            75:  * of lines that cannot possibly match.  The regmust tests are costly enough
        !            76:  * that vim_regcomp() supplies a regmust only if the r.e. contains something
        !            77:  * potentially expensive (at present, the only such thing detected is * or +
        !            78:  * at the start of the r.e., which can involve a lot of backup).  Regmlen is
        !            79:  * supplied because the test in vim_regexec() needs it and vim_regcomp() is
        !            80:  * computing it anyway.
        !            81:  */
        !            82:
        !            83: /*
        !            84:  * Structure for regexp "program". This is essentially a linear encoding
        !            85:  * of a nondeterministic finite-state machine (aka syntax charts or
        !            86:  * "railroad normal form" in parsing technology).  Each node is an opcode
        !            87:  * plus a "next" pointer, possibly plus an operand.  "Next" pointers of
        !            88:  * all nodes except BRANCH implement concatenation; a "next" pointer with
        !            89:  * a BRANCH on both ends of it is connecting two alternatives. (Here we
        !            90:  * have one of the subtle syntax dependencies: an individual BRANCH (as
        !            91:  * opposed to a collection of them) is never concatenated with anything
        !            92:  * because of operator precedence.)  The operand of some types of node is
        !            93:  * a literal string; for others, it is a node leading into a sub-FSM.  In
        !            94:  * particular, the operand of a BRANCH node is the first node of the branch.
        !            95:  * (NB this is *not* a tree structure: the tail of the branch connects
        !            96:  * to the thing following the set of BRANCHes.)  The opcodes are:
        !            97:  */
        !            98:
        !            99: /* definition  number             opnd?    meaning */
        !           100: #define END    0               /* no   End of program. */
        !           101: #define BOL    1               /* no   Match "" at beginning of line. */
        !           102: #define EOL    2               /* no   Match "" at end of line. */
        !           103: #define ANY    3               /* no   Match any one character. */
        !           104: #define ANYOF  4               /* str  Match any character in this string. */
        !           105: #define ANYBUT 5               /* str  Match any character not in this
        !           106:                                 *      string. */
        !           107: #define BRANCH 6               /* node Match this alternative, or the
        !           108:                                 *      next... */
        !           109: #define BACK   7               /* no   Match "", "next" ptr points backward. */
        !           110: #define EXACTLY 8              /* str  Match this string. */
        !           111: #define NOTHING 9              /* no   Match empty string. */
        !           112: #define STAR   10              /* node Match this (simple) thing 0 or more
        !           113:                                 *      times. */
        !           114: #define PLUS   11              /* node Match this (simple) thing 1 or more
        !           115:                                 *      times. */
        !           116: #define BOW        12              /* no   Match "" after [^a-zA-Z0-9_] */
        !           117: #define EOW        13              /* no   Match "" at    [^a-zA-Z0-9_] */
        !           118: #define IDENT  14              /* no   Match identifier char */
        !           119: #define WORD   15              /* no   Match keyword char */
        !           120: #define FNAME   16             /* no   Match file name char */
        !           121: #define PRINT  17              /* no   Match printable char */
        !           122: #define SIDENT 18              /* no   Match identifier char but no digit */
        !           123: #define SWORD  19              /* no   Match word char but no digit */
        !           124: #define SFNAME 20              /* no   Match file name char but no digit */
        !           125: #define SPRINT 21              /* no   Match printable char but no digit */
        !           126: #define MOPEN  30              /* no   Mark this point in input as start of
        !           127:                                 *      #n. */
        !           128:  /* MOPEN+1 is number 1, etc. */
        !           129: #define MCLOSE 40              /* no   Analogous to MOPEN. */
        !           130: #define BACKREF    50              /* node Match same string again \1-\9 */
        !           131:
        !           132: #define Magic(x)   ((x)|('\\'<<8))
        !           133:
        !           134: /*
        !           135:  * Opcode notes:
        !           136:  *
        !           137:  * BRANCH      The set of branches constituting a single choice are hooked
        !           138:  *             together with their "next" pointers, since precedence prevents
        !           139:  *             anything being concatenated to any individual branch.  The
        !           140:  *             "next" pointer of the last BRANCH in a choice points to the
        !           141:  *             thing following the whole choice.  This is also where the
        !           142:  *             final "next" pointer of each individual branch points; each
        !           143:  *             branch starts with the operand node of a BRANCH node.
        !           144:  *
        !           145:  * BACK        Normal "next" pointers all implicitly point forward; BACK
        !           146:  *             exists to make loop structures possible.
        !           147:  *
        !           148:  * STAR,PLUS   '=', and complex '*' and '+', are implemented as circular
        !           149:  *             BRANCH structures using BACK.  Simple cases (one character
        !           150:  *             per match) are implemented with STAR and PLUS for speed
        !           151:  *             and to minimize recursive plunges.
        !           152:  *             Note: We would like to use "\?" instead of "\=", but a "\?"
        !           153:  *             can be part of a pattern to escape the special meaning of '?'
        !           154:  *             at the end of the pattern in "?pattern?e".
        !           155:  *
        !           156:  * MOPEN,MCLOSE    ...are numbered at compile time.
        !           157:  */
        !           158:
        !           159: /*
        !           160:  * A node is one char of opcode followed by two chars of "next" pointer.
        !           161:  * "Next" pointers are stored as two 8-bit pieces, high order first.  The
        !           162:  * value is a positive offset from the opcode of the node containing it.
        !           163:  * An operand, if any, simply follows the node.  (Note that much of the
        !           164:  * code generation knows about this implicit relationship.)
        !           165:  *
        !           166:  * Using two bytes for the "next" pointer is vast overkill for most things,
        !           167:  * but allows patterns to get big without disasters.
        !           168:  */
        !           169: #define OP(p)  (*(p))
        !           170: #define NEXT(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377))
        !           171: #define OPERAND(p)     ((p) + 3)
        !           172:
        !           173: /*
        !           174:  * Utility definitions.
        !           175:  */
        !           176: #ifndef CHARBITS
        !           177: #define UCHARAT(p)     ((int)*(unsigned char *)(p))
        !           178: #else
        !           179: #define UCHARAT(p)     ((int)*(p)&CHARBITS)
        !           180: #endif
        !           181:
        !           182: #define EMSG_RETURN(m) { emsg(m); rc_did_emsg = TRUE; return NULL; }
        !           183:
        !           184: static int ismult __ARGS((int));
        !           185: static char_u *cstrchr __ARGS((char_u *, int));
        !           186:
        !           187:    static int
        !           188: ismult(c)
        !           189:    int c;
        !           190: {
        !           191:    return (c == Magic('*') || c == Magic('+') || c == Magic('='));
        !           192: }
        !           193:
        !           194: /*
        !           195:  * Flags to be passed up and down.
        !           196:  */
        !           197: #define HASWIDTH       01      /* Known never to match null string. */
        !           198: #define SIMPLE         02      /* Simple enough to be STAR/PLUS operand. */
        !           199: #define SPSTART        04      /* Starts with * or +. */
        !           200: #define WORST          0       /* Worst case. */
        !           201:
        !           202: /*
        !           203:  * The following allows empty REs, meaning "the same as the previous RE".
        !           204:  * per the ed(1) manual.
        !           205:  */
        !           206: /* #define EMPTY_RE */         /* this is done outside of regexp */
        !           207: #ifdef EMPTY_RE
        !           208: char_u        *reg_prev_re;
        !           209: #endif
        !           210:
        !           211: #define TILDE
        !           212: #ifdef TILDE
        !           213: char_u        *reg_prev_sub;
        !           214: #endif
        !           215:
        !           216: /*
        !           217:  * Global work variables for vim_regcomp().
        !           218:  */
        !           219:
        !           220: static char_u    *regparse;    /* Input-scan pointer. */
        !           221: static int     regnpar;        /* () count. */
        !           222: static char_u  regdummy;
        !           223: static char_u   *regcode;      /* Code-emit pointer; &regdummy = don't. */
        !           224: static long    regsize;        /* Code size. */
        !           225: static char_u   **regendp;     /* Ditto for endp. */
        !           226:
        !           227: /*
        !           228:  * META contains all characters that may be magic, except '^' and '$'.
        !           229:  * This depends on the configuration options TILDE, BACKREF.
        !           230:  * (could be done simpler for compilers that know string concatenation)
        !           231:  */
        !           232:
        !           233: #ifdef TILDE
        !           234: # ifdef BACKREF
        !           235:        static char_u META[] = ".[()|=+*<>iIkKfFpP~123456789";
        !           236: # else
        !           237:        static char_u META[] = ".[()|=+*<>iIkKfFpP~";
        !           238: # endif
        !           239: #else
        !           240: # ifdef BACKREF
        !           241:        static char_u META[] = ".[()|=+*<>iIkKfFpP123456789";
        !           242: # else
        !           243:        static char_u META[] = ".[()|=+*<>iIkKfFpP";
        !           244: # endif
        !           245: #endif
        !           246:
        !           247: /*
        !           248:  * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
        !           249:  * These are:
        !           250:  * \r  - New line (CR).
        !           251:  * \t  - Tab (TAB).
        !           252:  * \e  - Escape (ESC).
        !           253:  * \b  - Backspace (Ctrl('H')).
        !           254:  */
        !           255: static char_u REGEXP_ABBR[] = "rteb";
        !           256:
        !           257: /*
        !           258:  * Forward declarations for vim_regcomp()'s friends.
        !           259:  */
        !           260: static void        initchr __ARGS((char_u *));
        !           261: static int     getchr __ARGS((void));
        !           262: static int     peekchr __ARGS((void));
        !           263: #define PeekChr() curchr   /* shortcut only when last action was peekchr() */
        !           264: static int         curchr;
        !           265: static void        skipchr __ARGS((void));
        !           266: static void        ungetchr __ARGS((void));
        !           267: static char_u    *reg __ARGS((int, int *));
        !           268: static char_u    *regbranch __ARGS((int *));
        !           269: static char_u    *regpiece __ARGS((int *));
        !           270: static char_u    *regatom __ARGS((int *));
        !           271: static char_u    *regnode __ARGS((int));
        !           272: static char_u    *regnext __ARGS((char_u *));
        !           273: static void    regc __ARGS((int));
        !           274: static void    unregc __ARGS((void));
        !           275: static void    reginsert __ARGS((int, char_u *));
        !           276: static void    regtail __ARGS((char_u *, char_u *));
        !           277: static void    regoptail __ARGS((char_u *, char_u *));
        !           278:
        !           279: #ifndef HAVE_STRCSPN
        !           280: static size_t  strcspn __ARGS((const char_u *, const char_u *));
        !           281: #endif
        !           282:
        !           283: /*
        !           284:  * skip past regular expression
        !           285:  * stop at end of 'p' of where 'dirc' is found ('/', '?', etc)
        !           286:  * take care of characters with a backslash in front of it
        !           287:  * skip strings inside [ and ].
        !           288:  */
        !           289:    char_u *
        !           290: skip_regexp(p, dirc)
        !           291:    char_u  *p;
        !           292:    int     dirc;
        !           293: {
        !           294:    int     in_range = FALSE;
        !           295:
        !           296:    for (; p[0] != NUL; ++p)
        !           297:    {
        !           298:        if (p[0] == dirc && !in_range)      /* found end of regexp */
        !           299:            break;
        !           300:        if ((p[0] == '[' && p_magic) || (p[0] == '\\' && p[1] == '[' && !p_magic))
        !           301:        {
        !           302:            in_range = TRUE;
        !           303:            if (p[0] == '\\')
        !           304:                ++p;
        !           305:                                /* "[^]" and "[]" are not the end of a range */
        !           306:            if (p[1] == '^')
        !           307:                ++p;
        !           308:            if (p[1] == ']')
        !           309:                ++p;
        !           310:        }
        !           311:        else if (p[0] == '\\' && p[1] != NUL)
        !           312:            ++p;    /* skip next character */
        !           313:        else if (p[0] == ']')
        !           314:            in_range = FALSE;
        !           315:    }
        !           316:    return p;
        !           317: }
        !           318:
        !           319: /*
        !           320:  - vim_regcomp - compile a regular expression into internal code
        !           321:  *
        !           322:  * We can't allocate space until we know how big the compiled form will be,
        !           323:  * but we can't compile it (and thus know how big it is) until we've got a
        !           324:  * place to put the code.  So we cheat:  we compile it twice, once with code
        !           325:  * generation turned off and size counting turned on, and once "for real".
        !           326:  * This also means that we don't allocate space until we are sure that the
        !           327:  * thing really will compile successfully, and we never have to move the
        !           328:  * code and thus invalidate pointers into it.  (Note that it has to be in
        !           329:  * one piece because vim_free() must be able to free it all.)
        !           330:  *
        !           331:  * Beware that the optimization-preparation code in here knows about some
        !           332:  * of the structure of the compiled regexp.
        !           333:  */
        !           334:    regexp         *
        !           335: vim_regcomp(exp)
        !           336:    char_u         *exp;
        !           337: {
        !           338:    register regexp *r;
        !           339:    register char_u  *scan;
        !           340:    register char_u  *longest;
        !           341:    register int    len;
        !           342:    int             flags;
        !           343: /* extern char    *malloc();*/
        !           344:
        !           345:    if (exp == NULL)
        !           346:        EMSG_RETURN(e_null);
        !           347:
        !           348: #ifdef EMPTY_RE            /* this is done outside of regexp */
        !           349:    if (*exp == '\0')
        !           350:    {
        !           351:        if (reg_prev_re)
        !           352:            exp = reg_prev_re;
        !           353:        else
        !           354:            EMSG_RETURN(e_noprevre);
        !           355:    }
        !           356: #endif
        !           357:
        !           358:    /* First pass: determine size, legality. */
        !           359:    initchr((char_u *)exp);
        !           360:    regnpar = 1;
        !           361:    regsize = 0L;
        !           362:    regcode = &regdummy;
        !           363:    regendp = NULL;
        !           364:    regc(MAGIC);
        !           365:    if (reg(0, &flags) == NULL)
        !           366:        return NULL;
        !           367:
        !           368:    /* Small enough for pointer-storage convention? */
        !           369:    if (regsize >= 32767L)      /* Probably could be 65535L. */
        !           370:        EMSG_RETURN(e_toolong);
        !           371:
        !           372:    /* Allocate space. */
        !           373: /* r = (regexp *) malloc((unsigned) (sizeof(regexp) + regsize));*/
        !           374:    r = (regexp *) alloc((unsigned) (sizeof(regexp) + regsize));
        !           375:    if (r == NULL)
        !           376:        return NULL;
        !           377:
        !           378: #ifdef EMPTY_RE            /* this is done outside of regexp */
        !           379:    if (exp != reg_prev_re) {
        !           380:        vim_free(reg_prev_re);
        !           381:        if (reg_prev_re = alloc(STRLEN(exp) + 1))
        !           382:            STRCPY(reg_prev_re, exp);
        !           383:    }
        !           384: #endif
        !           385:
        !           386:    /* Second pass: emit code. */
        !           387:    initchr((char_u *)exp);
        !           388:    regnpar = 1;
        !           389:    regcode = r->program;
        !           390:    regendp = r->endp;
        !           391:    regc(MAGIC);
        !           392:    if (reg(0, &flags) == NULL) {
        !           393:        vim_free(r);
        !           394:        return NULL;
        !           395:    }
        !           396:
        !           397:    /* Dig out information for optimizations. */
        !           398:    r->regstart = '\0';         /* Worst-case defaults. */
        !           399:    r->reganch = 0;
        !           400:    r->regmust = NULL;
        !           401:    r->regmlen = 0;
        !           402:    scan = r->program + 1;      /* First BRANCH. */
        !           403:    if (OP(regnext(scan)) == END) {     /* Only one top-level choice. */
        !           404:        scan = OPERAND(scan);
        !           405:
        !           406:        /* Starting-point info. */
        !           407:        if (OP(scan) == EXACTLY)
        !           408:            r->regstart = *OPERAND(scan);
        !           409:        else if (OP(scan) == BOL)
        !           410:            r->reganch++;
        !           411:
        !           412:        /*
        !           413:         * If there's something expensive in the r.e., find the longest
        !           414:         * literal string that must appear and make it the regmust.  Resolve
        !           415:         * ties in favor of later strings, since the regstart check works
        !           416:         * with the beginning of the r.e. and avoiding duplication
        !           417:         * strengthens checking.  Not a strong reason, but sufficient in the
        !           418:         * absence of others.
        !           419:         */
        !           420:        /*
        !           421:         * When the r.e. starts with BOW, it is faster to look for a regmust
        !           422:         * first. Used a lot for "#" and "*" commands. (Added by mool).
        !           423:         */
        !           424:        if (flags & SPSTART || OP(scan) == BOW) {
        !           425:            longest = NULL;
        !           426:            len = 0;
        !           427:            for (; scan != NULL; scan = regnext(scan))
        !           428:                if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len) {
        !           429:                    longest = OPERAND(scan);
        !           430:                    len = STRLEN(OPERAND(scan));
        !           431:                }
        !           432:            r->regmust = longest;
        !           433:            r->regmlen = len;
        !           434:        }
        !           435:    }
        !           436: #ifdef DEBUG
        !           437:    regdump(r);
        !           438: #endif
        !           439:    return r;
        !           440: }
        !           441:
        !           442: /*
        !           443:  - reg - regular expression, i.e. main body or parenthesized thing
        !           444:  *
        !           445:  * Caller must absorb opening parenthesis.
        !           446:  *
        !           447:  * Combining parenthesis handling with the base level of regular expression
        !           448:  * is a trifle forced, but the need to tie the tails of the branches to what
        !           449:  * follows makes it hard to avoid.
        !           450:  */
        !           451:    static char_u *
        !           452: reg(paren, flagp)
        !           453:    int             paren;      /* Parenthesized? */
        !           454:    int            *flagp;
        !           455: {
        !           456:    register char_u  *ret;
        !           457:    register char_u  *br;
        !           458:    register char_u  *ender;
        !           459:    register int    parno = 0;
        !           460:    int             flags;
        !           461:
        !           462:    *flagp = HASWIDTH;          /* Tentatively. */
        !           463:
        !           464:    /* Make an MOPEN node, if parenthesized. */
        !           465:    if (paren) {
        !           466:        if (regnpar >= NSUBEXP)
        !           467:            EMSG_RETURN(e_toombra);
        !           468:        parno = regnpar;
        !           469:        regnpar++;
        !           470:        ret = regnode(MOPEN + parno);
        !           471:        if (regendp)
        !           472:            regendp[parno] = NULL;  /* haven't seen the close paren yet */
        !           473:    } else
        !           474:        ret = NULL;
        !           475:
        !           476:    /* Pick up the branches, linking them together. */
        !           477:    br = regbranch(&flags);
        !           478:    if (br == NULL)
        !           479:        return NULL;
        !           480:    if (ret != NULL)
        !           481:        regtail(ret, br);       /* MOPEN -> first. */
        !           482:    else
        !           483:        ret = br;
        !           484:    if (!(flags & HASWIDTH))
        !           485:        *flagp &= ~HASWIDTH;
        !           486:    *flagp |= flags & SPSTART;
        !           487:    while (peekchr() == Magic('|')) {
        !           488:        skipchr();
        !           489:        br = regbranch(&flags);
        !           490:        if (br == NULL)
        !           491:            return NULL;
        !           492:        regtail(ret, br);       /* BRANCH -> BRANCH. */
        !           493:        if (!(flags & HASWIDTH))
        !           494:            *flagp &= ~HASWIDTH;
        !           495:        *flagp |= flags & SPSTART;
        !           496:    }
        !           497:
        !           498:    /* Make a closing node, and hook it on the end. */
        !           499:    ender = regnode((paren) ? MCLOSE + parno : END);
        !           500:    regtail(ret, ender);
        !           501:
        !           502:    /* Hook the tails of the branches to the closing node. */
        !           503:    for (br = ret; br != NULL; br = regnext(br))
        !           504:        regoptail(br, ender);
        !           505:
        !           506:    /* Check for proper termination. */
        !           507:    if (paren && getchr() != Magic(')'))
        !           508:        EMSG_RETURN(e_toombra)
        !           509:    else if (!paren && peekchr() != '\0')
        !           510:    {
        !           511:        if (PeekChr() == Magic(')'))
        !           512:            EMSG_RETURN(e_toomket)
        !           513:        else
        !           514:            EMSG_RETURN(e_trailing)     /* "Can't happen". */
        !           515:        /* NOTREACHED */
        !           516:    }
        !           517:    /*
        !           518:     * Here we set the flag allowing back references to this set of
        !           519:     * parentheses.
        !           520:     */
        !           521:    if (paren && regendp)
        !           522:        regendp[parno] = ender; /* have seen the close paren */
        !           523:    return ret;
        !           524: }
        !           525:
        !           526: /*
        !           527:  - regbranch - one alternative of an | operator
        !           528:  *
        !           529:  * Implements the concatenation operator.
        !           530:  */
        !           531:    static char_u    *
        !           532: regbranch(flagp)
        !           533:    int            *flagp;
        !           534: {
        !           535:    register char_u  *ret;
        !           536:    register char_u  *chain;
        !           537:    register char_u  *latest;
        !           538:    int             flags;
        !           539:
        !           540:    *flagp = WORST;             /* Tentatively. */
        !           541:
        !           542:    ret = regnode(BRANCH);
        !           543:    chain = NULL;
        !           544:    while (peekchr() != '\0' && PeekChr() != Magic('|') && PeekChr() != Magic(')')) {
        !           545:        latest = regpiece(&flags);
        !           546:        if (latest == NULL)
        !           547:            return NULL;
        !           548:        *flagp |= flags & HASWIDTH;
        !           549:        if (chain == NULL)      /* First piece. */
        !           550:            *flagp |= flags & SPSTART;
        !           551:        else
        !           552:            regtail(chain, latest);
        !           553:        chain = latest;
        !           554:    }
        !           555:    if (chain == NULL)          /* Loop ran zero times. */
        !           556:        (void) regnode(NOTHING);
        !           557:
        !           558:    return ret;
        !           559: }
        !           560:
        !           561: /*
        !           562:  - regpiece - something followed by possible [*+=]
        !           563:  *
        !           564:  * Note that the branching code sequences used for = and the general cases
        !           565:  * of * and + are somewhat optimized:  they use the same NOTHING node as
        !           566:  * both the endmarker for their branch list and the body of the last branch.
        !           567:  * It might seem that this node could be dispensed with entirely, but the
        !           568:  * endmarker role is not redundant.
        !           569:  */
        !           570: static char_u    *
        !           571: regpiece(flagp)
        !           572:    int            *flagp;
        !           573: {
        !           574:    register char_u  *ret;
        !           575:    register int    op;
        !           576:    register char_u  *next;
        !           577:    int             flags;
        !           578:
        !           579:    ret = regatom(&flags);
        !           580:    if (ret == NULL)
        !           581:        return NULL;
        !           582:
        !           583:    op = peekchr();
        !           584:    if (!ismult(op)) {
        !           585:        *flagp = flags;
        !           586:        return ret;
        !           587:    }
        !           588:    if (!(flags & HASWIDTH) && op != Magic('='))
        !           589:        EMSG_RETURN((char_u *)"*+ operand could be empty");
        !           590:    *flagp = (op != Magic('+')) ? (WORST | SPSTART) : (WORST | HASWIDTH);
        !           591:
        !           592:    if (op == Magic('*') && (flags & SIMPLE))
        !           593:        reginsert(STAR, ret);
        !           594:    else if (op == Magic('*')) {
        !           595:        /* Emit x* as (x&|), where & means "self". */
        !           596:        reginsert(BRANCH, ret); /* Either x */
        !           597:        regoptail(ret, regnode(BACK));  /* and loop */
        !           598:        regoptail(ret, ret);    /* back */
        !           599:        regtail(ret, regnode(BRANCH));  /* or */
        !           600:        regtail(ret, regnode(NOTHING)); /* null. */
        !           601:    } else if (op == Magic('+') && (flags & SIMPLE))
        !           602:        reginsert(PLUS, ret);
        !           603:    else if (op == Magic('+')) {
        !           604:        /* Emit x+ as x(&|), where & means "self". */
        !           605:        next = regnode(BRANCH); /* Either */
        !           606:        regtail(ret, next);
        !           607:        regtail(regnode(BACK), ret);    /* loop back */
        !           608:        regtail(next, regnode(BRANCH)); /* or */
        !           609:        regtail(ret, regnode(NOTHING)); /* null. */
        !           610:    } else if (op == Magic('=')) {
        !           611:        /* Emit x= as (x|) */
        !           612:        reginsert(BRANCH, ret); /* Either x */
        !           613:        regtail(ret, regnode(BRANCH));  /* or */
        !           614:        next = regnode(NOTHING);/* null. */
        !           615:        regtail(ret, next);
        !           616:        regoptail(ret, next);
        !           617:    }
        !           618:    skipchr();
        !           619:    if (ismult(peekchr()))
        !           620:        EMSG_RETURN((char_u *)"Nested *=+");
        !           621:
        !           622:    return ret;
        !           623: }
        !           624:
        !           625: /*
        !           626:  - regatom - the lowest level
        !           627:  *
        !           628:  * Optimization:  gobbles an entire sequence of ordinary characters so that
        !           629:  * it can turn them into a single node, which is smaller to store and
        !           630:  * faster to run.
        !           631:  */
        !           632: static char_u    *
        !           633: regatom(flagp)
        !           634:    int            *flagp;
        !           635: {
        !           636:    register char_u  *ret;
        !           637:    int             flags;
        !           638:
        !           639:    *flagp = WORST;             /* Tentatively. */
        !           640:
        !           641:    switch (getchr()) {
        !           642:      case Magic('^'):
        !           643:        ret = regnode(BOL);
        !           644:        break;
        !           645:      case Magic('$'):
        !           646:        ret = regnode(EOL);
        !           647:        break;
        !           648:      case Magic('<'):
        !           649:        ret = regnode(BOW);
        !           650:        break;
        !           651:      case Magic('>'):
        !           652:        ret = regnode(EOW);
        !           653:        break;
        !           654:      case Magic('.'):
        !           655:        ret = regnode(ANY);
        !           656:        *flagp |= HASWIDTH | SIMPLE;
        !           657:        break;
        !           658:      case Magic('i'):
        !           659:        ret = regnode(IDENT);
        !           660:        *flagp |= HASWIDTH | SIMPLE;
        !           661:        break;
        !           662:      case Magic('k'):
        !           663:        ret = regnode(WORD);
        !           664:        *flagp |= HASWIDTH | SIMPLE;
        !           665:        break;
        !           666:      case Magic('I'):
        !           667:        ret = regnode(SIDENT);
        !           668:        *flagp |= HASWIDTH | SIMPLE;
        !           669:        break;
        !           670:      case Magic('K'):
        !           671:        ret = regnode(SWORD);
        !           672:        *flagp |= HASWIDTH | SIMPLE;
        !           673:        break;
        !           674:      case Magic('f'):
        !           675:        ret = regnode(FNAME);
        !           676:        *flagp |= HASWIDTH | SIMPLE;
        !           677:        break;
        !           678:      case Magic('F'):
        !           679:        ret = regnode(SFNAME);
        !           680:        *flagp |= HASWIDTH | SIMPLE;
        !           681:        break;
        !           682:      case Magic('p'):
        !           683:        ret = regnode(PRINT);
        !           684:        *flagp |= HASWIDTH | SIMPLE;
        !           685:        break;
        !           686:      case Magic('P'):
        !           687:        ret = regnode(SPRINT);
        !           688:        *flagp |= HASWIDTH | SIMPLE;
        !           689:        break;
        !           690:      case Magic('('):
        !           691:        ret = reg(1, &flags);
        !           692:        if (ret == NULL)
        !           693:            return NULL;
        !           694:        *flagp |= flags & (HASWIDTH | SPSTART);
        !           695:        break;
        !           696:      case '\0':
        !           697:      case Magic('|'):
        !           698:      case Magic(')'):
        !           699:        EMSG_RETURN(e_internal)     /* Supposed to be caught earlier. */
        !           700:        /* NOTREACHED */
        !           701:      case Magic('='):
        !           702:        EMSG_RETURN((char_u *)"\\= follows nothing")
        !           703:        /* NOTREACHED */
        !           704:      case Magic('+'):
        !           705:        EMSG_RETURN((char_u *)"\\+ follows nothing")
        !           706:        /* NOTREACHED */
        !           707:      case Magic('*'):
        !           708:        if (reg_magic)
        !           709:            EMSG_RETURN((char_u *)"* follows nothing")
        !           710:        else
        !           711:            EMSG_RETURN((char_u *)"\\* follows nothing")
        !           712:        /* break; Not Reached */
        !           713: #ifdef TILDE
        !           714:      case Magic('~'):          /* previous substitute pattern */
        !           715:            if (reg_prev_sub) {
        !           716:                register char_u *p;
        !           717:
        !           718:                ret = regnode(EXACTLY);
        !           719:                p = reg_prev_sub;
        !           720:                while (*p) {
        !           721:                    regc(*p++);
        !           722:                }
        !           723:                regc('\0');
        !           724:                if (p - reg_prev_sub) {
        !           725:                    *flagp |= HASWIDTH;
        !           726:                    if ((p - reg_prev_sub) == 1)
        !           727:                        *flagp |= SIMPLE;
        !           728:                }
        !           729:            } else
        !           730:                EMSG_RETURN(e_nopresub);
        !           731:            break;
        !           732: #endif
        !           733: #ifdef BACKREF
        !           734:      case Magic('1'):
        !           735:      case Magic('2'):
        !           736:      case Magic('3'):
        !           737:      case Magic('4'):
        !           738:      case Magic('5'):
        !           739:      case Magic('6'):
        !           740:      case Magic('7'):
        !           741:      case Magic('8'):
        !           742:      case Magic('9'): {
        !           743:            int             refnum;
        !           744:
        !           745:            ungetchr();
        !           746:            refnum = getchr() - Magic('0');
        !           747:            /*
        !           748:             * Check if the back reference is legal. We use the parentheses
        !           749:             * pointers to mark encountered close parentheses, but this
        !           750:             * is only available in the second pass. Checking opens is
        !           751:             * always possible.
        !           752:             * Should also check that we don't refer to something that
        !           753:             * is repeated (+*=): what instance of the repetition should
        !           754:             * we match? TODO.
        !           755:             */
        !           756:            if (refnum < regnpar &&
        !           757:                (regendp == NULL || regendp[refnum] != NULL))
        !           758:                ret = regnode(BACKREF + refnum);
        !           759:            else
        !           760:                EMSG_RETURN((char_u *)"Illegal back reference");
        !           761:        }
        !           762:        break;
        !           763: #endif
        !           764:      case Magic('['):
        !           765:        {
        !           766:            char_u      *p;
        !           767:
        !           768:            /*
        !           769:             * If there is no matching ']', we assume the '[' is a normal
        !           770:             * character. This makes ":help [" work.
        !           771:             */
        !           772:            p = regparse;
        !           773:            if (*p == '^')  /* Complement of range. */
        !           774:                ++p;
        !           775:            if (*p == ']' || *p == '-')
        !           776:                ++p;
        !           777:            while (*p != '\0' && *p != ']')
        !           778:            {
        !           779:                if (*p == '-')
        !           780:                {
        !           781:                    ++p;
        !           782:                    if (*p != ']' && *p != '\0')
        !           783:                        ++p;
        !           784:                }
        !           785:                else if (*p == '\\' && p[1] != '\0')
        !           786:                    p += 2;
        !           787:                else
        !           788:                    ++p;
        !           789:            }
        !           790:            if (*p == ']')      /* there is a matching ']' */
        !           791:            {
        !           792:                /*
        !           793:                 * In a character class, different parsing rules apply.
        !           794:                 * Not even \ is special anymore, nothing is.
        !           795:                 */
        !           796:                if (*regparse == '^') {     /* Complement of range. */
        !           797:                    ret = regnode(ANYBUT);
        !           798:                    regparse++;
        !           799:                } else
        !           800:                    ret = regnode(ANYOF);
        !           801:                if (*regparse == ']' || *regparse == '-')
        !           802:                    regc(*regparse++);
        !           803:                while (*regparse != '\0' && *regparse != ']') {
        !           804:                    if (*regparse == '-') {
        !           805:                        regparse++;
        !           806:                        if (*regparse == ']' || *regparse == '\0')
        !           807:                            regc('-');
        !           808:                        else {
        !           809:                            register int    cclass;
        !           810:                            register int    cclassend;
        !           811:
        !           812:                            cclass = UCHARAT(regparse - 2) + 1;
        !           813:                            cclassend = UCHARAT(regparse);
        !           814:                            if (cclass > cclassend + 1)
        !           815:                                EMSG_RETURN(e_invrange);
        !           816:                            for (; cclass <= cclassend; cclass++)
        !           817:                                regc(cclass);
        !           818:                            regparse++;
        !           819:                        }
        !           820:                    } else if (*regparse == '\\' && regparse[1]) {
        !           821:                        regparse++;
        !           822:                        regc(*regparse++);
        !           823:                    } else
        !           824:                        regc(*regparse++);
        !           825:                }
        !           826:                regc('\0');
        !           827:                if (*regparse != ']')
        !           828:                    EMSG_RETURN(e_toomsbra);
        !           829:                skipchr();          /* let's be friends with the lexer again */
        !           830:                *flagp |= HASWIDTH | SIMPLE;
        !           831:                break;
        !           832:            }
        !           833:        }
        !           834:        /* FALLTHROUGH */
        !           835:
        !           836:      default:
        !           837:        {
        !           838:            register int    len;
        !           839:            int             chr;
        !           840:
        !           841:            ungetchr();
        !           842:            len = 0;
        !           843:            ret = regnode(EXACTLY);
        !           844:            /*
        !           845:             * Always take at least one character, for '[' without matching
        !           846:             * ']'.
        !           847:             */
        !           848:            while ((chr = peekchr()) != '\0' && (chr < Magic(0) || len == 0))
        !           849:            {
        !           850:                regc(chr);
        !           851:                skipchr();
        !           852:                len++;
        !           853:            }
        !           854: #ifdef DEBUG
        !           855:            if (len == 0)
        !           856:                 EMSG_RETURN((char_u *)"Unexpected magic character; check META.");
        !           857: #endif
        !           858:            /*
        !           859:             * If there is a following *, \+ or \= we need the character
        !           860:             * in front of it as a single character operand
        !           861:             */
        !           862:            if (len > 1 && ismult(chr))
        !           863:            {
        !           864:                unregc();           /* Back off of *+= operand */
        !           865:                ungetchr();         /* and put it back for next time */
        !           866:                --len;
        !           867:            }
        !           868:            regc('\0');
        !           869:            *flagp |= HASWIDTH;
        !           870:            if (len == 1)
        !           871:                *flagp |= SIMPLE;
        !           872:        }
        !           873:        break;
        !           874:    }
        !           875:
        !           876:    return ret;
        !           877: }
        !           878:
        !           879: /*
        !           880:  - regnode - emit a node
        !           881:  */
        !           882: static char_u    *             /* Location. */
        !           883: regnode(op)
        !           884:    int         op;
        !           885: {
        !           886:    register char_u  *ret;
        !           887:    register char_u  *ptr;
        !           888:
        !           889:    ret = regcode;
        !           890:    if (ret == &regdummy) {
        !           891:        regsize += 3;
        !           892:        return ret;
        !           893:    }
        !           894:    ptr = ret;
        !           895:    *ptr++ = op;
        !           896:    *ptr++ = '\0';              /* Null "next" pointer. */
        !           897:    *ptr++ = '\0';
        !           898:    regcode = ptr;
        !           899:
        !           900:    return ret;
        !           901: }
        !           902:
        !           903: /*
        !           904:  - regc - emit (if appropriate) a byte of code
        !           905:  */
        !           906: static void
        !           907: regc(b)
        !           908:    int         b;
        !           909: {
        !           910:    if (regcode != &regdummy)
        !           911:        *regcode++ = b;
        !           912:    else
        !           913:        regsize++;
        !           914: }
        !           915:
        !           916: /*
        !           917:  - unregc - take back (if appropriate) a byte of code
        !           918:  */
        !           919: static void
        !           920: unregc()
        !           921: {
        !           922:    if (regcode != &regdummy)
        !           923:        regcode--;
        !           924:    else
        !           925:        regsize--;
        !           926: }
        !           927:
        !           928: /*
        !           929:  - reginsert - insert an operator in front of already-emitted operand
        !           930:  *
        !           931:  * Means relocating the operand.
        !           932:  */
        !           933: static void
        !           934: reginsert(op, opnd)
        !           935:    int         op;
        !           936:    char_u         *opnd;
        !           937: {
        !           938:    register char_u  *src;
        !           939:    register char_u  *dst;
        !           940:    register char_u  *place;
        !           941:
        !           942:    if (regcode == &regdummy) {
        !           943:        regsize += 3;
        !           944:        return;
        !           945:    }
        !           946:    src = regcode;
        !           947:    regcode += 3;
        !           948:    dst = regcode;
        !           949:    while (src > opnd)
        !           950:        *--dst = *--src;
        !           951:
        !           952:    place = opnd;               /* Op node, where operand used to be. */
        !           953:    *place++ = op;
        !           954:    *place++ = '\0';
        !           955:    *place = '\0';
        !           956: }
        !           957:
        !           958: /*
        !           959:  - regtail - set the next-pointer at the end of a node chain
        !           960:  */
        !           961: static void
        !           962: regtail(p, val)
        !           963:    char_u         *p;
        !           964:    char_u         *val;
        !           965: {
        !           966:    register char_u  *scan;
        !           967:    register char_u  *temp;
        !           968:    register int    offset;
        !           969:
        !           970:    if (p == &regdummy)
        !           971:        return;
        !           972:
        !           973:    /* Find last node. */
        !           974:    scan = p;
        !           975:    for (;;) {
        !           976:        temp = regnext(scan);
        !           977:        if (temp == NULL)
        !           978:            break;
        !           979:        scan = temp;
        !           980:    }
        !           981:
        !           982:    if (OP(scan) == BACK)
        !           983:        offset = (int)(scan - val);
        !           984:    else
        !           985:        offset = (int)(val - scan);
        !           986:    *(scan + 1) = (char_u) ((offset >> 8) & 0377);
        !           987:    *(scan + 2) = (char_u) (offset & 0377);
        !           988: }
        !           989:
        !           990: /*
        !           991:  - regoptail - regtail on operand of first argument; nop if operandless
        !           992:  */
        !           993: static void
        !           994: regoptail(p, val)
        !           995:    char_u         *p;
        !           996:    char_u         *val;
        !           997: {
        !           998:    /* "Operandless" and "op != BRANCH" are synonymous in practice. */
        !           999:    if (p == NULL || p == &regdummy || OP(p) != BRANCH)
        !          1000:        return;
        !          1001:    regtail(OPERAND(p), val);
        !          1002: }
        !          1003:
        !          1004: /*
        !          1005:  - getchr() - get the next character from the pattern. We know about
        !          1006:  * magic and such, so therefore we need a lexical analyzer.
        !          1007:  */
        !          1008:
        !          1009: /* static int      curchr; */
        !          1010: static int     prevchr;
        !          1011: static int     nextchr;    /* used for ungetchr() */
        !          1012: /*
        !          1013:  * Note: prevchr is sometimes -1 when we are not at the start,
        !          1014:  * eg in /[ ^I]^ the pattern was never found even if it existed, because ^ was
        !          1015:  * taken to be magic -- webb
        !          1016:  */
        !          1017: static int     at_start;   /* True when we are on the first character */
        !          1018:
        !          1019: static void
        !          1020: initchr(str)
        !          1021: char_u *str;
        !          1022: {
        !          1023:    regparse = str;
        !          1024:    curchr = prevchr = nextchr = -1;
        !          1025:    at_start = TRUE;
        !          1026: }
        !          1027:
        !          1028: static int
        !          1029: peekchr()
        !          1030: {
        !          1031:    if (curchr < 0) {
        !          1032:        switch (curchr = regparse[0]) {
        !          1033:        case '.':
        !          1034:    /*  case '+':*/
        !          1035:    /*  case '=':*/
        !          1036:        case '[':
        !          1037:        case '~':
        !          1038:            if (reg_magic)
        !          1039:                curchr = Magic(curchr);
        !          1040:            break;
        !          1041:        case '*':
        !          1042:            /* * is not magic as the very first character, eg "?*ptr" */
        !          1043:            if (reg_magic && !at_start)
        !          1044:                curchr = Magic('*');
        !          1045:            break;
        !          1046:        case '^':
        !          1047:            /* ^ is only magic as the very first character */
        !          1048:            if (at_start)
        !          1049:                curchr = Magic('^');
        !          1050:            break;
        !          1051:        case '$':
        !          1052:            /* $ is only magic as the very last character and in front of '\|' */
        !          1053:            if (regparse[1] == NUL || (regparse[1] == '\\' && regparse[2] == '|'))
        !          1054:                curchr = Magic('$');
        !          1055:            break;
        !          1056:        case '\\':
        !          1057:            regparse++;
        !          1058:            if (regparse[0] == NUL)
        !          1059:                curchr = '\\';  /* trailing '\' */
        !          1060:            else if (vim_strchr(META, regparse[0]))
        !          1061:            {
        !          1062:                /*
        !          1063:                 * META contains everything that may be magic sometimes, except
        !          1064:                 * ^ and $ ("\^" and "\$" are never magic).
        !          1065:                 * We now fetch the next character and toggle its magicness.
        !          1066:                 * Therefore, \ is so meta-magic that it is not in META.
        !          1067:                 */
        !          1068:                curchr = -1;
        !          1069:                at_start = FALSE;           /* We still want to be able to say "/\*ptr" */
        !          1070:                peekchr();
        !          1071:                curchr ^= Magic(0);
        !          1072:            }
        !          1073:            else if (vim_strchr(REGEXP_ABBR, regparse[0]))
        !          1074:            {
        !          1075:                /*
        !          1076:                 * Handle abbreviations, like "\t" for TAB -- webb
        !          1077:                 */
        !          1078:                switch (regparse[0])
        !          1079:                {
        !          1080:                    case 'r':   curchr = CR;        break;
        !          1081:                    case 't':   curchr = TAB;       break;
        !          1082:                    case 'e':   curchr = ESC;       break;
        !          1083:                    case 'b':   curchr = Ctrl('H'); break;
        !          1084:                }
        !          1085:            }
        !          1086:            else
        !          1087:            {
        !          1088:                /*
        !          1089:                 * Next character can never be (made) magic?
        !          1090:                 * Then backslashing it won't do anything.
        !          1091:                 */
        !          1092:                curchr = regparse[0];
        !          1093:            }
        !          1094:            break;
        !          1095:        }
        !          1096:    }
        !          1097:
        !          1098:    return curchr;
        !          1099: }
        !          1100:
        !          1101: static void
        !          1102: skipchr()
        !          1103: {
        !          1104:    regparse++;
        !          1105:    at_start = FALSE;
        !          1106:    prevchr = curchr;
        !          1107:    curchr = nextchr;       /* use previously unget char, or -1 */
        !          1108:    nextchr = -1;
        !          1109: }
        !          1110:
        !          1111: static int
        !          1112: getchr()
        !          1113: {
        !          1114:    int chr;
        !          1115:
        !          1116:    chr = peekchr();
        !          1117:    skipchr();
        !          1118:
        !          1119:    return chr;
        !          1120: }
        !          1121:
        !          1122: /*
        !          1123:  * put character back. Works only once!
        !          1124:  */
        !          1125: static void
        !          1126: ungetchr()
        !          1127: {
        !          1128:    nextchr = curchr;
        !          1129:    curchr = prevchr;
        !          1130:    /*
        !          1131:     * Backup regparse as well; not because we will use what it points at,
        !          1132:     * but because skipchr() will bump it again.
        !          1133:     */
        !          1134:    regparse--;
        !          1135: }
        !          1136:
        !          1137: /*
        !          1138:  * vim_regexec and friends
        !          1139:  */
        !          1140:
        !          1141: /*
        !          1142:  * Global work variables for vim_regexec().
        !          1143:  */
        !          1144: static char_u    *reginput;        /* String-input pointer. */
        !          1145: static char_u    *regbol;      /* Beginning of input, for ^ check. */
        !          1146: static char_u   **regstartp;       /* Pointer to startp array. */
        !          1147: /* static char_u   **regendp;  */  /* Ditto for endp. */
        !          1148:
        !          1149: /*
        !          1150:  * Forwards.
        !          1151:  */
        !          1152: static int     regtry __ARGS((regexp *, char_u *));
        !          1153: static int     regmatch __ARGS((char_u *));
        !          1154: static int     regrepeat __ARGS((char_u *));
        !          1155:
        !          1156: #ifdef DEBUG
        !          1157: int            regnarrate = 1;
        !          1158: void           regdump __ARGS((regexp *));
        !          1159: static char_u    *regprop __ARGS((char_u *));
        !          1160: #endif
        !          1161:
        !          1162: /*
        !          1163:  * vim_regexec - match a regexp against a string
        !          1164:  * Return non-zero if there is a match.
        !          1165:  */
        !          1166: int
        !          1167: vim_regexec(prog, string, at_bol)
        !          1168:    register regexp *prog;
        !          1169:    register char_u  *string;
        !          1170:    int             at_bol;
        !          1171: {
        !          1172:    register char_u  *s;
        !          1173:
        !          1174:    /* Be paranoid... */
        !          1175:    if (prog == NULL || string == NULL) {
        !          1176:        emsg(e_null);
        !          1177:        rc_did_emsg = TRUE;
        !          1178:        return 0;
        !          1179:    }
        !          1180:    /* Check validity of program. */
        !          1181:    if (UCHARAT(prog->program) != MAGIC) {
        !          1182:        emsg(e_re_corr);
        !          1183:        rc_did_emsg = TRUE;
        !          1184:        return 0;
        !          1185:    }
        !          1186:    /* If there is a "must appear" string, look for it. */
        !          1187:    if (prog->regmust != NULL) {
        !          1188:        s = string;
        !          1189:        while ((s = cstrchr(s, prog->regmust[0])) != NULL) {
        !          1190:            if (cstrncmp(s, prog->regmust, prog->regmlen) == 0)
        !          1191:                break;          /* Found it. */
        !          1192:            s++;
        !          1193:        }
        !          1194:        if (s == NULL)          /* Not present. */
        !          1195:            return 0;
        !          1196:    }
        !          1197:    /* Mark beginning of line for ^ . */
        !          1198:    if (at_bol)
        !          1199:        regbol = string;        /* is possible to match bol */
        !          1200:    else
        !          1201:        regbol = NULL;          /* we aren't there, so don't match it */
        !          1202:
        !          1203:    /* Simplest case:  anchored match need be tried only once. */
        !          1204:    if (prog->reganch)
        !          1205:        return regtry(prog, string);
        !          1206:
        !          1207:    /* Messy cases:  unanchored match. */
        !          1208:    s = string;
        !          1209:    if (prog->regstart != '\0')
        !          1210:        /* We know what char it must start with. */
        !          1211:        while ((s = cstrchr(s, prog->regstart)) != NULL) {
        !          1212:            if (regtry(prog, s))
        !          1213:                return 1;
        !          1214:            s++;
        !          1215:        }
        !          1216:    else
        !          1217:        /* We don't -- general case. */
        !          1218:        do {
        !          1219:            if (regtry(prog, s))
        !          1220:                return 1;
        !          1221:        } while (*s++ != '\0');
        !          1222:
        !          1223:    /* Failure. */
        !          1224:    return 0;
        !          1225: }
        !          1226:
        !          1227: /*
        !          1228:  - regtry - try match at specific point
        !          1229:  */
        !          1230: static int                     /* 0 failure, 1 success */
        !          1231: regtry(prog, string)
        !          1232:    regexp         *prog;
        !          1233:    char_u         *string;
        !          1234: {
        !          1235:    register int    i;
        !          1236:    register char_u **sp;
        !          1237:    register char_u **ep;
        !          1238:
        !          1239:    reginput = string;
        !          1240:    regstartp = prog->startp;
        !          1241:    regendp = prog->endp;
        !          1242:
        !          1243:    sp = prog->startp;
        !          1244:    ep = prog->endp;
        !          1245:    for (i = NSUBEXP; i > 0; i--) {
        !          1246:        *sp++ = NULL;
        !          1247:        *ep++ = NULL;
        !          1248:    }
        !          1249:    if (regmatch(prog->program + 1)) {
        !          1250:        prog->startp[0] = string;
        !          1251:        prog->endp[0] = reginput;
        !          1252:        return 1;
        !          1253:    } else
        !          1254:        return 0;
        !          1255: }
        !          1256:
        !          1257: /*
        !          1258:  - regmatch - main matching routine
        !          1259:  *
        !          1260:  * Conceptually the strategy is simple:  check to see whether the current
        !          1261:  * node matches, call self recursively to see whether the rest matches,
        !          1262:  * and then act accordingly.  In practice we make some effort to avoid
        !          1263:  * recursion, in particular by going through "ordinary" nodes (that don't
        !          1264:  * need to know whether the rest of the match failed) by a loop instead of
        !          1265:  * by recursion.
        !          1266:  */
        !          1267: static int                     /* 0 failure, 1 success */
        !          1268: regmatch(prog)
        !          1269:    char_u         *prog;
        !          1270: {
        !          1271:    register char_u  *scan;     /* Current node. */
        !          1272:    char_u         *next;       /* Next node. */
        !          1273:
        !          1274:    scan = prog;
        !          1275: #ifdef DEBUG
        !          1276:    if (scan != NULL && regnarrate)
        !          1277:        fprintf(stderr, "%s(\n", regprop(scan));
        !          1278: #endif
        !          1279:    while (scan != NULL) {
        !          1280: #ifdef DEBUG
        !          1281:        if (regnarrate) {
        !          1282:            fprintf(stderr, "%s...\n", regprop(scan));
        !          1283:        }
        !          1284: #endif
        !          1285:        next = regnext(scan);
        !          1286:        switch (OP(scan)) {
        !          1287:          case BOL:
        !          1288:            if (reginput != regbol)
        !          1289:                return 0;
        !          1290:            break;
        !          1291:          case EOL:
        !          1292:            if (*reginput != '\0')
        !          1293:                return 0;
        !          1294:            break;
        !          1295:          case BOW:     /* \<word; reginput points to w */
        !          1296:            if (reginput != regbol && iswordchar(reginput[-1]))
        !          1297:                return 0;
        !          1298:            if (!reginput[0] || !iswordchar(reginput[0]))
        !          1299:                return 0;
        !          1300:            break;
        !          1301:          case EOW:     /* word\>; reginput points after d */
        !          1302:            if (reginput == regbol || !iswordchar(reginput[-1]))
        !          1303:                return 0;
        !          1304:            if (reginput[0] && iswordchar(reginput[0]))
        !          1305:                return 0;
        !          1306:            break;
        !          1307:          case ANY:
        !          1308:            if (*reginput == '\0')
        !          1309:                return 0;
        !          1310:            reginput++;
        !          1311:            break;
        !          1312:          case IDENT:
        !          1313:            if (!isidchar(*reginput))
        !          1314:                return 0;
        !          1315:            reginput++;
        !          1316:            break;
        !          1317:          case WORD:
        !          1318:            if (!iswordchar(*reginput))
        !          1319:                return 0;
        !          1320:            reginput++;
        !          1321:            break;
        !          1322:          case FNAME:
        !          1323:            if (!isfilechar(*reginput))
        !          1324:                return 0;
        !          1325:            reginput++;
        !          1326:            break;
        !          1327:          case PRINT:
        !          1328:            if (charsize(*reginput) != 1)
        !          1329:                return 0;
        !          1330:            reginput++;
        !          1331:            break;
        !          1332:          case SIDENT:
        !          1333:            if (isdigit(*reginput) || !isidchar(*reginput))
        !          1334:                return 0;
        !          1335:            reginput++;
        !          1336:            break;
        !          1337:          case SWORD:
        !          1338:            if (isdigit(*reginput) || !iswordchar(*reginput))
        !          1339:                return 0;
        !          1340:            reginput++;
        !          1341:            break;
        !          1342:          case SFNAME:
        !          1343:            if (isdigit(*reginput) || !isfilechar(*reginput))
        !          1344:                return 0;
        !          1345:            reginput++;
        !          1346:            break;
        !          1347:          case SPRINT:
        !          1348:            if (isdigit(*reginput) || charsize(*reginput) != 1)
        !          1349:                return 0;
        !          1350:            reginput++;
        !          1351:            break;
        !          1352:          case EXACTLY:{
        !          1353:                register int    len;
        !          1354:                register char_u  *opnd;
        !          1355:
        !          1356:                opnd = OPERAND(scan);
        !          1357:                /* Inline the first character, for speed. */
        !          1358:                if (*opnd != *reginput && (!reg_ic || TO_UPPER(*opnd) != TO_UPPER(*reginput)))
        !          1359:                    return 0;
        !          1360:                len = STRLEN(opnd);
        !          1361:                if (len > 1 && cstrncmp(opnd, reginput, len) != 0)
        !          1362:                    return 0;
        !          1363:                reginput += len;
        !          1364:            }
        !          1365:            break;
        !          1366:          case ANYOF:
        !          1367:            if (*reginput == '\0' || cstrchr(OPERAND(scan), *reginput) == NULL)
        !          1368:                return 0;
        !          1369:            reginput++;
        !          1370:            break;
        !          1371:          case ANYBUT:
        !          1372:            if (*reginput == '\0' || cstrchr(OPERAND(scan), *reginput) != NULL)
        !          1373:                return 0;
        !          1374:            reginput++;
        !          1375:            break;
        !          1376:          case NOTHING:
        !          1377:            break;
        !          1378:          case BACK:
        !          1379:            break;
        !          1380:          case MOPEN + 1:
        !          1381:          case MOPEN + 2:
        !          1382:          case MOPEN + 3:
        !          1383:          case MOPEN + 4:
        !          1384:          case MOPEN + 5:
        !          1385:          case MOPEN + 6:
        !          1386:          case MOPEN + 7:
        !          1387:          case MOPEN + 8:
        !          1388:          case MOPEN + 9:{
        !          1389:                register int    no;
        !          1390:                register char_u  *save;
        !          1391:
        !          1392:                no = OP(scan) - MOPEN;
        !          1393:                save = regstartp[no];
        !          1394:                regstartp[no] = reginput; /* Tentatively */
        !          1395: #ifdef DEBUG
        !          1396:                printf("MOPEN  %d pre  @'%s' ('%s' )'%s'\n",
        !          1397:                    no, save,
        !          1398:                    regstartp[no] ? regstartp[no] : "NULL",
        !          1399:                    regendp[no] ? regendp[no] : "NULL");
        !          1400: #endif
        !          1401:
        !          1402:                if (regmatch(next)) {
        !          1403: #ifdef DEBUG
        !          1404:                printf("MOPEN  %d post @'%s' ('%s' )'%s'\n",
        !          1405:                    no, save,
        !          1406:                    regstartp[no] ? regstartp[no] : "NULL",
        !          1407:                    regendp[no] ? regendp[no] : "NULL");
        !          1408: #endif
        !          1409:                    return 1;
        !          1410:                }
        !          1411:                regstartp[no] = save;       /* We were wrong... */
        !          1412:                return 0;
        !          1413:            }
        !          1414:            /* break; Not Reached */
        !          1415:          case MCLOSE + 1:
        !          1416:          case MCLOSE + 2:
        !          1417:          case MCLOSE + 3:
        !          1418:          case MCLOSE + 4:
        !          1419:          case MCLOSE + 5:
        !          1420:          case MCLOSE + 6:
        !          1421:          case MCLOSE + 7:
        !          1422:          case MCLOSE + 8:
        !          1423:          case MCLOSE + 9:{
        !          1424:                register int    no;
        !          1425:                register char_u  *save;
        !          1426:
        !          1427:                no = OP(scan) - MCLOSE;
        !          1428:                save = regendp[no];
        !          1429:                regendp[no] = reginput; /* Tentatively */
        !          1430: #ifdef DEBUG
        !          1431:                printf("MCLOSE %d pre  @'%s' ('%s' )'%s'\n",
        !          1432:                    no, save,
        !          1433:                    regstartp[no] ? regstartp[no] : "NULL",
        !          1434:                    regendp[no] ? regendp[no] : "NULL");
        !          1435: #endif
        !          1436:
        !          1437:                if (regmatch(next)) {
        !          1438: #ifdef DEBUG
        !          1439:                printf("MCLOSE %d post @'%s' ('%s' )'%s'\n",
        !          1440:                    no, save,
        !          1441:                    regstartp[no] ? regstartp[no] : "NULL",
        !          1442:                    regendp[no] ? regendp[no] : "NULL");
        !          1443: #endif
        !          1444:                    return 1;
        !          1445:                }
        !          1446:                regendp[no] = save;     /* We were wrong... */
        !          1447:                return 0;
        !          1448:            }
        !          1449:            /* break; Not Reached */
        !          1450: #ifdef BACKREF
        !          1451:          case BACKREF + 1:
        !          1452:          case BACKREF + 2:
        !          1453:          case BACKREF + 3:
        !          1454:          case BACKREF + 4:
        !          1455:          case BACKREF + 5:
        !          1456:          case BACKREF + 6:
        !          1457:          case BACKREF + 7:
        !          1458:          case BACKREF + 8:
        !          1459:          case BACKREF + 9:{
        !          1460:                register int    no;
        !          1461:                int             len;
        !          1462:
        !          1463:                no = OP(scan) - BACKREF;
        !          1464:                if (regendp[no] != NULL) {
        !          1465:                    len = (int)(regendp[no] - regstartp[no]);
        !          1466:                    if (cstrncmp(regstartp[no], reginput, len) != 0)
        !          1467:                        return 0;
        !          1468:                    reginput += len;
        !          1469:                } else {
        !          1470:                    /*emsg("backref to 0-repeat");*/
        !          1471:                    /*return 0;*/
        !          1472:                }
        !          1473:            }
        !          1474:            break;
        !          1475: #endif
        !          1476:          case BRANCH:{
        !          1477:                register char_u  *save;
        !          1478:
        !          1479:                if (OP(next) != BRANCH) /* No choice. */
        !          1480:                    next = OPERAND(scan);       /* Avoid recursion. */
        !          1481:                else {
        !          1482:                    do {
        !          1483:                        save = reginput;
        !          1484:                        if (regmatch(OPERAND(scan)))
        !          1485:                            return 1;
        !          1486:                        reginput = save;
        !          1487:                        scan = regnext(scan);
        !          1488:                    } while (scan != NULL && OP(scan) == BRANCH);
        !          1489:                    return 0;
        !          1490:                    /* NOTREACHED */
        !          1491:                }
        !          1492:            }
        !          1493:            break;
        !          1494:          case STAR:
        !          1495:          case PLUS:{
        !          1496:                register int    nextch;
        !          1497:                register int    no;
        !          1498:                register char_u  *save;
        !          1499:                register int    min;
        !          1500:
        !          1501:                /*
        !          1502:                 * Lookahead to avoid useless match attempts when we know
        !          1503:                 * what character comes next.
        !          1504:                 */
        !          1505:                nextch = '\0';
        !          1506:                if (OP(next) == EXACTLY)
        !          1507:                {
        !          1508:                    nextch = *OPERAND(next);
        !          1509:                    if (reg_ic)
        !          1510:                        nextch = TO_UPPER(nextch);
        !          1511:                }
        !          1512:                min = (OP(scan) == STAR) ? 0 : 1;
        !          1513:                save = reginput;
        !          1514:                no = regrepeat(OPERAND(scan));
        !          1515:                while (no >= min)
        !          1516:                {
        !          1517:                    /* If it could work, try it. */
        !          1518:                    if (nextch == '\0' || (*reginput == nextch ||
        !          1519:                                    (reg_ic && TO_UPPER(*reginput) == nextch)))
        !          1520:                        if (regmatch(next))
        !          1521:                            return 1;
        !          1522:                    /* Couldn't or didn't -- back up. */
        !          1523:                    no--;
        !          1524:                    reginput = save + no;
        !          1525:                }
        !          1526:                return 0;
        !          1527:            }
        !          1528:            /* break; Not Reached */
        !          1529:          case END:
        !          1530:            return 1;       /* Success! */
        !          1531:            /* break; Not Reached */
        !          1532:          default:
        !          1533:            emsg(e_re_corr);
        !          1534:            return 0;
        !          1535:            /* break; Not Reached */
        !          1536:        }
        !          1537:
        !          1538:        scan = next;
        !          1539:    }
        !          1540:
        !          1541:    /*
        !          1542:     * We get here only if there's trouble -- normally "case END" is the
        !          1543:     * terminating point.
        !          1544:     */
        !          1545:    emsg(e_re_corr);
        !          1546:    return 0;
        !          1547: }
        !          1548:
        !          1549: /*
        !          1550:  - regrepeat - repeatedly match something simple, report how many
        !          1551:  */
        !          1552: static int
        !          1553: regrepeat(p)
        !          1554:    char_u         *p;
        !          1555: {
        !          1556:    register int    count = 0;
        !          1557:    register char_u  *scan;
        !          1558:    register char_u  *opnd;
        !          1559:
        !          1560:    scan = reginput;
        !          1561:    opnd = OPERAND(p);
        !          1562:    switch (OP(p)) {
        !          1563:      case ANY:
        !          1564:        count = STRLEN(scan);
        !          1565:        scan += count;
        !          1566:        break;
        !          1567:      case IDENT:
        !          1568:        for (count = 0; isidchar(*scan); ++count)
        !          1569:            ++scan;
        !          1570:        break;
        !          1571:      case WORD:
        !          1572:        for (count = 0; iswordchar(*scan); ++count)
        !          1573:            ++scan;
        !          1574:        break;
        !          1575:      case FNAME:
        !          1576:        for (count = 0; isfilechar(*scan); ++count)
        !          1577:            ++scan;
        !          1578:        break;
        !          1579:      case PRINT:
        !          1580:        for (count = 0; charsize(*scan) == 1; ++count)
        !          1581:            ++scan;
        !          1582:        break;
        !          1583:      case SIDENT:
        !          1584:        for (count = 0; !isdigit(*scan) && isidchar(*scan); ++count)
        !          1585:            ++scan;
        !          1586:        break;
        !          1587:      case SWORD:
        !          1588:        for (count = 0; !isdigit(*scan) && iswordchar(*scan); ++count)
        !          1589:            ++scan;
        !          1590:        break;
        !          1591:      case SFNAME:
        !          1592:        for (count = 0; !isdigit(*scan) && isfilechar(*scan); ++count)
        !          1593:            ++scan;
        !          1594:        break;
        !          1595:      case SPRINT:
        !          1596:        for (count = 0; !isdigit(*scan) && charsize(*scan) == 1; ++count)
        !          1597:            ++scan;
        !          1598:        break;
        !          1599:      case EXACTLY:
        !          1600:        while (*opnd == *scan || (reg_ic && TO_UPPER(*opnd) == TO_UPPER(*scan)))
        !          1601:        {
        !          1602:            count++;
        !          1603:            scan++;
        !          1604:        }
        !          1605:        break;
        !          1606:      case ANYOF:
        !          1607:        while (*scan != '\0' && cstrchr(opnd, *scan) != NULL)
        !          1608:        {
        !          1609:            count++;
        !          1610:            scan++;
        !          1611:        }
        !          1612:        break;
        !          1613:      case ANYBUT:
        !          1614:        while (*scan != '\0' && cstrchr(opnd, *scan) == NULL) {
        !          1615:            count++;
        !          1616:            scan++;
        !          1617:        }
        !          1618:        break;
        !          1619:      default:                  /* Oh dear.  Called inappropriately. */
        !          1620:        emsg(e_re_corr);
        !          1621:        count = 0;              /* Best compromise. */
        !          1622:        break;
        !          1623:    }
        !          1624:    reginput = scan;
        !          1625:
        !          1626:    return count;
        !          1627: }
        !          1628:
        !          1629: /*
        !          1630:  - regnext - dig the "next" pointer out of a node
        !          1631:  */
        !          1632: static char_u    *
        !          1633: regnext(p)
        !          1634:    register char_u  *p;
        !          1635: {
        !          1636:    register int    offset;
        !          1637:
        !          1638:    if (p == &regdummy)
        !          1639:        return NULL;
        !          1640:
        !          1641:    offset = NEXT(p);
        !          1642:    if (offset == 0)
        !          1643:        return NULL;
        !          1644:
        !          1645:    if (OP(p) == BACK)
        !          1646:        return p - offset;
        !          1647:    else
        !          1648:        return p + offset;
        !          1649: }
        !          1650:
        !          1651: #ifdef DEBUG
        !          1652:
        !          1653: /*
        !          1654:  - regdump - dump a regexp onto stdout in vaguely comprehensible form
        !          1655:  */
        !          1656: void
        !          1657: regdump(r)
        !          1658:    regexp         *r;
        !          1659: {
        !          1660:    register char_u  *s;
        !          1661:    register int    op = EXACTLY;       /* Arbitrary non-END op. */
        !          1662:    register char_u  *next;
        !          1663:
        !          1664:
        !          1665:    s = r->program + 1;
        !          1666:    while (op != END) {         /* While that wasn't END last time... */
        !          1667:        op = OP(s);
        !          1668:        printf("%2d%s", s - r->program, regprop(s));    /* Where, what. */
        !          1669:        next = regnext(s);
        !          1670:        if (next == NULL)       /* Next ptr. */
        !          1671:            printf("(0)");
        !          1672:        else
        !          1673:            printf("(%d)", (s - r->program) + (next - s));
        !          1674:        s += 3;
        !          1675:        if (op == ANYOF || op == ANYBUT || op == EXACTLY) {
        !          1676:            /* Literal string, where present. */
        !          1677:            while (*s != '\0') {
        !          1678:                putchar(*s);
        !          1679:                s++;
        !          1680:            }
        !          1681:            s++;
        !          1682:        }
        !          1683:        putchar('\n');
        !          1684:    }
        !          1685:
        !          1686:    /* Header fields of interest. */
        !          1687:    if (r->regstart != '\0')
        !          1688:        printf("start `%c' ", r->regstart);
        !          1689:    if (r->reganch)
        !          1690:        printf("anchored ");
        !          1691:    if (r->regmust != NULL)
        !          1692:        printf("must have \"%s\"", r->regmust);
        !          1693:    printf("\n");
        !          1694: }
        !          1695:
        !          1696: /*
        !          1697:  - regprop - printable representation of opcode
        !          1698:  */
        !          1699: static char_u    *
        !          1700: regprop(op)
        !          1701:    char_u         *op;
        !          1702: {
        !          1703:    register char_u  *p;
        !          1704:    static char_u   buf[50];
        !          1705:
        !          1706:    (void) strcpy(buf, ":");
        !          1707:
        !          1708:    switch (OP(op)) {
        !          1709:      case BOL:
        !          1710:        p = "BOL";
        !          1711:        break;
        !          1712:      case EOL:
        !          1713:        p = "EOL";
        !          1714:        break;
        !          1715:      case ANY:
        !          1716:        p = "ANY";
        !          1717:        break;
        !          1718:      case IDENT:
        !          1719:        p = "IDENT";
        !          1720:        break;
        !          1721:      case WORD:
        !          1722:        p = "WORD";
        !          1723:        break;
        !          1724:      case FNAME:
        !          1725:        p = "FNAME";
        !          1726:        break;
        !          1727:      case PRINT:
        !          1728:        p = "PRINT";
        !          1729:        break;
        !          1730:      case SIDENT:
        !          1731:        p = "SIDENT";
        !          1732:        break;
        !          1733:      case SWORD:
        !          1734:        p = "SWORD";
        !          1735:        break;
        !          1736:      case SFNAME:
        !          1737:        p = "SFNAME";
        !          1738:        break;
        !          1739:      case SPRINT:
        !          1740:        p = "SPRINT";
        !          1741:        break;
        !          1742:      case ANYOF:
        !          1743:        p = "ANYOF";
        !          1744:        break;
        !          1745:      case ANYBUT:
        !          1746:        p = "ANYBUT";
        !          1747:        break;
        !          1748:      case BRANCH:
        !          1749:        p = "BRANCH";
        !          1750:        break;
        !          1751:      case EXACTLY:
        !          1752:        p = "EXACTLY";
        !          1753:        break;
        !          1754:      case NOTHING:
        !          1755:        p = "NOTHING";
        !          1756:        break;
        !          1757:      case BACK:
        !          1758:        p = "BACK";
        !          1759:        break;
        !          1760:      case END:
        !          1761:        p = "END";
        !          1762:        break;
        !          1763:      case MOPEN + 1:
        !          1764:      case MOPEN + 2:
        !          1765:      case MOPEN + 3:
        !          1766:      case MOPEN + 4:
        !          1767:      case MOPEN + 5:
        !          1768:      case MOPEN + 6:
        !          1769:      case MOPEN + 7:
        !          1770:      case MOPEN + 8:
        !          1771:      case MOPEN + 9:
        !          1772:        sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN);
        !          1773:        p = NULL;
        !          1774:        break;
        !          1775:      case MCLOSE + 1:
        !          1776:      case MCLOSE + 2:
        !          1777:      case MCLOSE + 3:
        !          1778:      case MCLOSE + 4:
        !          1779:      case MCLOSE + 5:
        !          1780:      case MCLOSE + 6:
        !          1781:      case MCLOSE + 7:
        !          1782:      case MCLOSE + 8:
        !          1783:      case MCLOSE + 9:
        !          1784:        sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE);
        !          1785:        p = NULL;
        !          1786:        break;
        !          1787:      case BACKREF + 1:
        !          1788:      case BACKREF + 2:
        !          1789:      case BACKREF + 3:
        !          1790:      case BACKREF + 4:
        !          1791:      case BACKREF + 5:
        !          1792:      case BACKREF + 6:
        !          1793:      case BACKREF + 7:
        !          1794:      case BACKREF + 8:
        !          1795:      case BACKREF + 9:
        !          1796:        sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF);
        !          1797:        p = NULL;
        !          1798:        break;
        !          1799:      case STAR:
        !          1800:        p = "STAR";
        !          1801:        break;
        !          1802:      case PLUS:
        !          1803:        p = "PLUS";
        !          1804:        break;
        !          1805:      default:
        !          1806:        sprintf(buf + STRLEN(buf), "corrupt %d", OP(op));
        !          1807:        p = NULL;
        !          1808:        break;
        !          1809:    }
        !          1810:    if (p != NULL)
        !          1811:        (void) strcat(buf, p);
        !          1812:    return buf;
        !          1813: }
        !          1814: #endif
        !          1815:
        !          1816: /*
        !          1817:  * The following is provided for those people who do not have strcspn() in
        !          1818:  * their C libraries.  They should get off their butts and do something
        !          1819:  * about it; at least one public-domain implementation of those (highly
        !          1820:  * useful) string routines has been published on Usenet.
        !          1821:  */
        !          1822: #ifndef HAVE_STRCSPN
        !          1823: /*
        !          1824:  * strcspn - find length of initial segment of s1 consisting entirely
        !          1825:  * of characters not from s2
        !          1826:  */
        !          1827:
        !          1828:    static size_t
        !          1829: strcspn(s1, s2)
        !          1830:    const char_u           *s1;
        !          1831:    const char_u           *s2;
        !          1832: {
        !          1833:    register char_u  *scan1;
        !          1834:    register char_u  *scan2;
        !          1835:    register int    count;
        !          1836:
        !          1837:    count = 0;
        !          1838:    for (scan1 = s1; *scan1 != '\0'; scan1++) {
        !          1839:        for (scan2 = s2; *scan2 != '\0';)       /* ++ moved down. */
        !          1840:            if (*scan1 == *scan2++)
        !          1841:                return count;
        !          1842:        count++;
        !          1843:    }
        !          1844:    return count;
        !          1845: }
        !          1846: #endif
        !          1847:
        !          1848: /*
        !          1849:  * Compare two strings, ignore case if reg_ic set.
        !          1850:  * Return 0 if strings match, non-zero otherwise.
        !          1851:  */
        !          1852:    int
        !          1853: cstrncmp(s1, s2, n)
        !          1854:    char_u         *s1, *s2;
        !          1855:    int             n;
        !          1856: {
        !          1857:    if (!reg_ic)
        !          1858:        return STRNCMP(s1, s2, (size_t)n);
        !          1859:
        !          1860:    return vim_strnicmp(s1, s2, (size_t)n);
        !          1861: }
        !          1862:
        !          1863: /*
        !          1864:  * cstrchr: This function is used a lot for simple searches, keep it fast!
        !          1865:  */
        !          1866:    static char_u *
        !          1867: cstrchr(s, c)
        !          1868:    char_u         *s;
        !          1869:    register int    c;
        !          1870: {
        !          1871:    register char_u        *p;
        !          1872:
        !          1873:    if (!reg_ic)
        !          1874:        return vim_strchr(s, c);
        !          1875:
        !          1876:    c = TO_UPPER(c);
        !          1877:
        !          1878:    for (p = s; *p; p++)
        !          1879:    {
        !          1880:        if (TO_UPPER(*p) == c)
        !          1881:            return p;
        !          1882:    }
        !          1883:    return NULL;
        !          1884: }