[BACK]Return to regex.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / mg

Annotation of src/usr.bin/mg/regex.c, Revision 1.1

1.1     ! deraadt     1: /* Imagine Generic gnuemacs copyright notice here */
        !             2:
        !             3: /* modified by Robert Larson to make fewer assumptions about the compiler */
        !             4: /* (Making it useful to mg) (#if not supported by osk, enum not supported by many) */
        !             5:
        !             6: /* To test, compile with -Dtest.
        !             7:  This Dtestable feature turns this into a self-contained program
        !             8:  which reads a pattern, describes how it compiles,
        !             9:  then reads a string and searches for it.  */
        !            10:
        !            11:
        !            12: #ifdef REGEX
        !            13: #include       "def.h"         /* defines VOID etc. for mg */
        !            14:
        !            15: #ifdef emacs
        !            16:
        !            17: /* The `emacs' switch turns on certain special matching commands
        !            18:  that make sense only in emacs. */
        !            19:
        !            20: #include "config.h"
        !            21: #include "lisp.h"
        !            22: #include "buffer.h"
        !            23: #include "syntax.h"
        !            24:
        !            25: #else  /* not emacs */
        !            26:
        !            27: /*
        !            28:  * Define the syntax stuff, so we can do the \<...\> things.
        !            29:  */
        !            30:
        !            31: #ifndef Sword /* must be non-zero in some of the tests below... */
        !            32: #define Sword 1
        !            33: #endif
        !            34:
        !            35: #define SYNTAX(c) re_syntax_table[c]
        !            36:
        !            37: #ifdef SYNTAX_TABLE
        !            38:
        !            39: char *re_syntax_table;
        !            40:
        !            41: #else
        !            42:
        !            43: static char re_syntax_table[256];
        !            44:
        !            45: static VOID
        !            46: init_syntax_once ()
        !            47: {
        !            48:    register int c;
        !            49:    static int done = 0;
        !            50:
        !            51:    if (done)
        !            52:      return;
        !            53:
        !            54:    bzero (re_syntax_table, sizeof re_syntax_table);
        !            55:
        !            56:    for (c = 'a'; c <= 'z'; c++)
        !            57:      re_syntax_table[c] = Sword;
        !            58:
        !            59:    for (c = 'A'; c <= 'Z'; c++)
        !            60:      re_syntax_table[c] = Sword;
        !            61:
        !            62:    for (c = '0'; c <= '9'; c++)
        !            63:      re_syntax_table[c] = Sword;
        !            64:
        !            65:    done = 1;
        !            66: }
        !            67:
        !            68: #endif /* SYNTAX_TABLE */
        !            69: #endif /* not emacs */
        !            70:
        !            71: #include "regex.h"
        !            72:
        !            73: /* Number of failure points to allocate space for initially,
        !            74:  when matching.         If this number is exceeded, more space is allocated,
        !            75:  so it is not a hard limit.  */
        !            76:
        !            77: #ifndef NFAILURES
        !            78: #define NFAILURES 80
        !            79: #endif NFAILURES
        !            80:
        !            81: /* width of a byte in bits */
        !            82:
        !            83: #define BYTEWIDTH 8
        !            84:
        !            85: /* These are the command codes that appear in compiled regular expressions, one per byte.
        !            86:   Some command codes are followed by argument bytes.
        !            87:   A command code can specify any interpretation whatever for its arguments.
        !            88:   Zero-bytes may appear in the compiled regular expression. */
        !            89:
        !            90: typedef char regexpcode;
        !            91: #define unused 0
        !            92: #define exactn 1       /* followed by one byte giving n, and then by n literal bytes */
        !            93: #define begline 2      /* fails unless at beginning of line */
        !            94: #define endline 3      /* fails unless at end of line */
        !            95: #define jump 4         /* followed by two bytes giving relative address to jump to */
        !            96: #define on_failure_jump 5 /* followed by two bytes giving relative address of place */
        !            97:                        /* to resume at in case of failure. */
        !            98: #define finalize_jump 6 /* Throw away latest failure point and then jump to address. */
        !            99: #define maybe_finalize_jump 7 /* Like jump but finalize if safe to do so. */
        !           100:                        /*  This is used to jump back to the beginning
        !           101:                            of a repeat.  If the command that follows
        !           102:                            this jump is clearly incompatible with the
        !           103:                            one at the beginning of the repeat, such that
        !           104:                            we can be sure that there is no use backtracking
        !           105:                            out of repetitions already completed,
        !           106:                            then we finalize. */
        !           107: #define dummy_failure_jump 8  /* jump, and push a dummy failure point. */
        !           108:                        /*  This failure point will be thrown away
        !           109:                            if an attempt is made to use it for a failure.
        !           110:                            A + construct makes this before the first repeat.  */
        !           111: #define anychar 9      /* matches any one character */
        !           112: #define charset 10     /* matches any one char belonging to specified set. */
        !           113:                  /*  First following byte is # bitmap bytes.
        !           114:                    Then come bytes for a bit-map saying which chars are in.
        !           115:                    Bits in each byte are ordered low-bit-first.
        !           116:                    A character is in the set if its bit is 1.
        !           117:                    A character too large to have a bit in the map
        !           118:                    is automatically not in the set */
        !           119: #define charset_not 11 /* similar but match any character that is NOT one of those specified */
        !           120: #define start_memory 12 /* starts remembering the text that is matched */
        !           121:                  /*  and stores it in a memory register.
        !           122:                    followed by one byte containing the register number.
        !           123:                    Register numbers must be in the range 0 through NREGS. */
        !           124: #define stop_memory 13 /* stops remembering the text that is matched */
        !           125:                  /*  and stores it in a memory register.
        !           126:                    followed by one byte containing the register number.
        !           127:                    Register numbers must be in the range 0 through NREGS. */
        !           128: #define duplicate 14   /* match a duplicate of something remembered. */
        !           129:                  /*  Followed by one byte containing the index of the memory register. */
        !           130: #define before_dot 15  /* Succeeds if before dot */
        !           131: #define at_dot 16      /* Succeeds if at dot */
        !           132: #define after_dot 17   /* Succeeds if after dot */
        !           133: #define begbuf 18      /* Succeeds if at beginning of buffer */
        !           134: #define endbuf 19      /* Succeeds if at end of buffer */
        !           135: #define wordchar 20    /* Matches any word-constituent character */
        !           136: #define notwordchar 21 /* Matches any char that is not a word-constituent */
        !           137: #define wordbeg 22     /* Succeeds if at word beginning */
        !           138: #define wordend 23     /* Succeeds if at word end */
        !           139: #define wordbound 24   /* Succeeds if at a word boundary */
        !           140: #define notwordbound 25 /* Succeeds if not at a word boundary */
        !           141: #define syntaxspec 26  /* Matches any character whose syntax is specified. */
        !           142:                        /* followed by a byte which contains a syntax code, Sword or such like */
        !           143: #define notsyntaxspec 27 /* Matches any character whose syntax differs from the specified. */
        !           144:
        !           145:
        !           146: #ifndef SIGN_EXTEND_CHAR
        !           147: #define SIGN_EXTEND_CHAR(x) (x)
        !           148: #endif
        !           149: 
        !           150: /* compile_pattern takes a regular-expression descriptor string in the user's format
        !           151:   and converts it into a buffer full of byte commands for matching.
        !           152:
        !           153:   pattern   is the address of the pattern string
        !           154:   size     is the length of it.
        !           155:   bufp     is a  struct re_pattern_buffer *  which points to the info
        !           156:            on where to store the byte commands.
        !           157:            This structure contains a  char *  which points to the
        !           158:            actual space, which should have been obtained with malloc.
        !           159:            compile_pattern may use  realloc  to grow the buffer space.
        !           160:
        !           161:   The number of bytes of commands can be found out by looking in
        !           162:   the  struct re_pattern_buffer         that bufp pointed to,
        !           163:   after compile_pattern returns.
        !           164: */
        !           165:
        !           166: #define PATPUSH(ch) (*b++ = (char) (ch))
        !           167:
        !           168: #define PATFETCH(c) \
        !           169:  {if (p == pend) goto end_of_pattern; \
        !           170:   c = * (unsigned char *) p++; \
        !           171:   if (translate) c = translate[c]; }
        !           172:
        !           173: #define PATFETCH_RAW(c) \
        !           174:  {if (p == pend) goto end_of_pattern; \
        !           175:   c = * (unsigned char *) p++; }
        !           176:
        !           177: #define PATUNFETCH p--
        !           178:
        !           179: #define EXTEND_BUFFER \
        !           180:   { char *old_buffer = bufp->buffer; \
        !           181:     if (bufp->allocated == (1<<16)) goto too_big; \
        !           182:     bufp->allocated *= 2; \
        !           183:     if (bufp->allocated > (1<<16)) bufp->allocated = (1<<16); \
        !           184:     if (!(bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated))) \
        !           185:       goto memory_exhausted; \
        !           186:     c = bufp->buffer - old_buffer; \
        !           187:     b += c; \
        !           188:     if (fixup_jump) \
        !           189:       fixup_jump += c; \
        !           190:     if (laststart) \
        !           191:       laststart += c; \
        !           192:     begalt += c; \
        !           193:     if (pending_exact) \
        !           194:       pending_exact += c; \
        !           195:   }
        !           196:
        !           197: static int store_jump (), insert_jump ();
        !           198:
        !           199: char *
        !           200: re_compile_pattern (pattern, size, bufp)
        !           201:      char *pattern;
        !           202:      int size;
        !           203:      struct re_pattern_buffer *bufp;
        !           204: {
        !           205:   register char *b = bufp->buffer;
        !           206:   register char *p = pattern;
        !           207:   char *pend = pattern + size;
        !           208:   register unsigned c, c1;
        !           209:   char *p1;
        !           210:   unsigned char *translate = (unsigned char *) bufp->translate;
        !           211:
        !           212:   /* address of the count-byte of the most recently inserted "exactn" command.
        !           213:     This makes it possible to tell whether a new exact-match character
        !           214:     can be added to that command or requires a new "exactn" command. */
        !           215:
        !           216:   char *pending_exact = 0;
        !           217:
        !           218:   /* address of the place where a forward-jump should go
        !           219:     to the end of the containing expression.
        !           220:     Each alternative of an "or", except the last, ends with a forward-jump
        !           221:     of this sort. */
        !           222:
        !           223:   char *fixup_jump = 0;
        !           224:
        !           225:   /* address of start of the most recently finished expression.
        !           226:     This tells postfix * where to find the start of its operand. */
        !           227:
        !           228:   char *laststart = 0;
        !           229:
        !           230:   /* In processing a repeat, 1 means zero matches is allowed */
        !           231:
        !           232:   char zero_times_ok;
        !           233:
        !           234:   /* In processing a repeat, 1 means many matches is allowed */
        !           235:
        !           236:   char many_times_ok;
        !           237:
        !           238:   /* address of beginning of regexp, or inside of last \( */
        !           239:
        !           240:   char *begalt = b;
        !           241:
        !           242:   /* Stack of information saved by \( and restored by \).
        !           243:      Four stack elements are pushed by each \(:
        !           244:        First, the value of b.
        !           245:        Second, the value of fixup_jump.
        !           246:        Third, the value of regnum.
        !           247:        Fourth, the value of begalt.  */
        !           248:
        !           249:   int stackb[40];
        !           250:   int *stackp = stackb;
        !           251:   int *stacke = stackb + 40;
        !           252:   int *stackt;
        !           253:
        !           254:   /* Counts \('s as they are encountered.  Remembered for the matching \),
        !           255:      where it becomes the "register number" to put in the stop_memory command */
        !           256:
        !           257:   int regnum = 1;
        !           258:
        !           259:   bufp->fastmap_accurate = 0;
        !           260:
        !           261: #ifndef SYNTAX_TABLE
        !           262: #ifndef emacs
        !           263:   /*
        !           264:    * Initialize the syntax table.
        !           265:    */
        !           266:    init_syntax_once();
        !           267: #endif
        !           268: #endif
        !           269:
        !           270:   if (bufp->allocated == 0)
        !           271:     {
        !           272:       bufp->allocated = 28;
        !           273:       if (bufp->buffer)
        !           274:        /* EXTEND_BUFFER loses when bufp->allocated is 0 */
        !           275:        bufp->buffer = (char *) realloc (bufp->buffer, 28);
        !           276:       else
        !           277:        /* Caller did not allocate a buffer.  Do it for him */
        !           278:        bufp->buffer = (char *) malloc (28);
        !           279:       if (!bufp->buffer) goto memory_exhausted;
        !           280:       begalt = b = bufp->buffer;
        !           281:     }
        !           282:
        !           283:   while (p != pend)
        !           284:     {
        !           285:       if (b - bufp->buffer > bufp->allocated - 10)
        !           286:        /* Note that EXTEND_BUFFER clobbers c */
        !           287:        EXTEND_BUFFER;
        !           288:
        !           289:       PATFETCH (c);
        !           290:
        !           291:       switch (c)
        !           292:        {
        !           293:        case '$':
        !           294:          /* $ means succeed if at end of line, but only in special contexts.
        !           295:            If randonly in the middle of a pattern, it is a normal character. */
        !           296:          if (p == pend || (*p == '\\' && (p[1] == ')' || p[1] == '|')))
        !           297:            {
        !           298:              PATPUSH (endline);
        !           299:              break;
        !           300:            }
        !           301:          goto normal_char;
        !           302:
        !           303:        case '^':
        !           304:          /* ^ means succeed if at beg of line, but only if no preceding pattern. */
        !           305:          if (laststart) goto normal_char;
        !           306:          PATPUSH (begline);
        !           307:          break;
        !           308:
        !           309:        case '*':
        !           310:        case '+':
        !           311:        case '?':
        !           312:          /* If there is no previous pattern, char not special. */
        !           313:          if (!laststart)
        !           314:            goto normal_char;
        !           315:          /* If there is a sequence of repetition chars,
        !           316:             collapse it down to equivalent to just one.  */
        !           317:          zero_times_ok = 0;
        !           318:          many_times_ok = 0;
        !           319:          while (1)
        !           320:            {
        !           321:              zero_times_ok |= c != '+';
        !           322:              many_times_ok |= c != '?';
        !           323:              if (p == pend)
        !           324:                break;
        !           325:              PATFETCH (c);
        !           326:              if (!(c == '*' || c == '+' || c == '?'))
        !           327:                {
        !           328:                  PATUNFETCH;
        !           329:                  break;
        !           330:                }
        !           331:            }
        !           332:
        !           333:          /* Now we know whether 0 matches is allowed,
        !           334:             and whether 2 or more matches is allowed.  */
        !           335:          if (many_times_ok)
        !           336:            {
        !           337:              /* If more than one repetition is allowed,
        !           338:                 put in a backward jump at the end.  */
        !           339:              store_jump (b, maybe_finalize_jump, laststart - 3);
        !           340:              b += 3;
        !           341:            }
        !           342:          insert_jump (on_failure_jump, laststart, b + 3, b);
        !           343:          pending_exact = 0;
        !           344:          b += 3;
        !           345:          if (!zero_times_ok)
        !           346:            {
        !           347:              /* At least one repetition required: insert before the loop
        !           348:                 a skip over the initial on-failure-jump instruction */
        !           349:              insert_jump (dummy_failure_jump, laststart, laststart + 6, b);
        !           350:              b += 3;
        !           351:            }
        !           352:          break;
        !           353:
        !           354:        case '.':
        !           355:          laststart = b;
        !           356:          PATPUSH (anychar);
        !           357:          break;
        !           358:
        !           359:        case '[':
        !           360:          if (b - bufp->buffer
        !           361:              > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH)
        !           362:            /* Note that EXTEND_BUFFER clobbers c */
        !           363:            EXTEND_BUFFER;
        !           364:
        !           365:          laststart = b;
        !           366:          if (*p == '^')
        !           367:            PATPUSH (charset_not), p++;
        !           368:          else
        !           369:            PATPUSH (charset);
        !           370:          p1 = p;
        !           371:
        !           372:          PATPUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
        !           373:          /* Clear the whole map */
        !           374:          bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
        !           375:          /* Read in characters and ranges, setting map bits */
        !           376:          while (1)
        !           377:            {
        !           378:              PATFETCH (c);
        !           379:              if (c == ']' && p != p1 + 1) break;
        !           380:              if (*p == '-')
        !           381:                {
        !           382:                  PATFETCH (c1);
        !           383:                  PATFETCH (c1);
        !           384:                  while (c <= c1)
        !           385:                    b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH), c++;
        !           386:                }
        !           387:              else
        !           388:                {
        !           389:                  b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH);
        !           390:                }
        !           391:            }
        !           392:          /* Discard any bitmap bytes that are all 0 at the end of the map.
        !           393:             Decrement the map-length byte too. */
        !           394:          while (b[-1] > 0 && b[b[-1] - 1] == 0)
        !           395:            b[-1]--;
        !           396:          b += b[-1];
        !           397:          break;
        !           398:
        !           399:        case '\\':
        !           400:          if (p == pend) goto invalid_pattern;
        !           401:          PATFETCH_RAW (c);
        !           402:          switch (c)
        !           403:            {
        !           404:            case '(':
        !           405:              if (stackp == stacke) goto nesting_too_deep;
        !           406:              if (regnum < RE_NREGS)
        !           407:                {
        !           408:                  PATPUSH (start_memory);
        !           409:                  PATPUSH (regnum);
        !           410:                }
        !           411:              *stackp++ = b - bufp->buffer;
        !           412:              *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0;
        !           413:              *stackp++ = regnum++;
        !           414:              *stackp++ = begalt - bufp->buffer;
        !           415:              fixup_jump = 0;
        !           416:              laststart = 0;
        !           417:              begalt = b;
        !           418:              break;
        !           419:
        !           420:            case ')':
        !           421:              if (stackp == stackb) goto unmatched_close;
        !           422:              begalt = *--stackp + bufp->buffer;
        !           423:              if (fixup_jump)
        !           424:                store_jump (fixup_jump, jump, b);
        !           425:              if (stackp[-1] < RE_NREGS)
        !           426:                {
        !           427:                  PATPUSH (stop_memory);
        !           428:                  PATPUSH (stackp[-1]);
        !           429:                }
        !           430:              stackp -= 2;
        !           431:              fixup_jump = 0;
        !           432:              if (*stackp)
        !           433:                fixup_jump = *stackp + bufp->buffer - 1;
        !           434:              laststart = *--stackp + bufp->buffer;
        !           435:              break;
        !           436:
        !           437:            case '|':
        !           438:              insert_jump (on_failure_jump, begalt, b + 6, b);
        !           439:              pending_exact = 0;
        !           440:              b += 3;
        !           441:              if (fixup_jump)
        !           442:                store_jump (fixup_jump, jump, b);
        !           443:              fixup_jump = b;
        !           444:              b += 3;
        !           445:              laststart = 0;
        !           446:              begalt = b;
        !           447:              break;
        !           448:
        !           449: #ifdef emacs
        !           450:            case '=':
        !           451:              PATPUSH (at_dot);
        !           452:              break;
        !           453:
        !           454:            case 's':
        !           455:              laststart = b;
        !           456:              PATPUSH (syntaxspec);
        !           457:              PATFETCH (c);
        !           458:              PATPUSH (syntax_spec_code[c]);
        !           459:              break;
        !           460:
        !           461:            case 'S':
        !           462:              laststart = b;
        !           463:              PATPUSH (notsyntaxspec);
        !           464:              PATFETCH (c);
        !           465:              PATPUSH (syntax_spec_code[c]);
        !           466:              break;
        !           467: #endif emacs
        !           468:
        !           469:            case 'w':
        !           470:              laststart = b;
        !           471:              PATPUSH (wordchar);
        !           472:              break;
        !           473:
        !           474:            case 'W':
        !           475:              laststart = b;
        !           476:              PATPUSH (notwordchar);
        !           477:              break;
        !           478:
        !           479:            case '<':
        !           480:              PATPUSH (wordbeg);
        !           481:              break;
        !           482:
        !           483:            case '>':
        !           484:              PATPUSH (wordend);
        !           485:              break;
        !           486:
        !           487:            case 'b':
        !           488:              PATPUSH (wordbound);
        !           489:              break;
        !           490:
        !           491:            case 'B':
        !           492:              PATPUSH (notwordbound);
        !           493:              break;
        !           494:
        !           495:            case '`':
        !           496:              PATPUSH (begbuf);
        !           497:              break;
        !           498:
        !           499:            case '\'':
        !           500:              PATPUSH (endbuf);
        !           501:              break;
        !           502:
        !           503:            case '1':
        !           504:            case '2':
        !           505:            case '3':
        !           506:            case '4':
        !           507:            case '5':
        !           508:            case '6':
        !           509:            case '7':
        !           510:            case '8':
        !           511:            case '9':
        !           512:              c1 = c - '0';
        !           513:              if (c1 >= regnum)
        !           514:                goto normal_char;
        !           515:              for (stackt = stackp - 2;  stackt > stackb;  stackt -= 4)
        !           516:                if (*stackt == c1)
        !           517:                  goto normal_char;
        !           518:              laststart = b;
        !           519:              PATPUSH (duplicate);
        !           520:              PATPUSH (c1);
        !           521:              break;
        !           522:            default:
        !           523:              goto normal_char;
        !           524:            }
        !           525:          break;
        !           526:
        !           527:        default:
        !           528:        normal_char:
        !           529:          if (!pending_exact || pending_exact + *pending_exact + 1 != b
        !           530:              || *pending_exact == 0177 || *p == '*' || *p == '^'
        !           531:              || *p == '+' || *p == '?')
        !           532:            {
        !           533:              laststart = b;
        !           534:              PATPUSH (exactn);
        !           535:              pending_exact = b;
        !           536:              PATPUSH (0);
        !           537:            }
        !           538:          PATPUSH (c);
        !           539:          (*pending_exact)++;
        !           540:        }
        !           541:     }
        !           542:
        !           543:   if (fixup_jump)
        !           544:     store_jump (fixup_jump, jump, b);
        !           545:
        !           546:   if (stackp != stackb) goto unmatched_open;
        !           547:
        !           548:   bufp->used = b - bufp->buffer;
        !           549:   return 0;
        !           550:
        !           551:  invalid_pattern:
        !           552:   return "Invalid regular expression";
        !           553:
        !           554:  unmatched_open:
        !           555:   return "Unmatched \\(";
        !           556:
        !           557:  unmatched_close:
        !           558:   return "Unmatched \\)";
        !           559:
        !           560:  end_of_pattern:
        !           561:   return "Premature end of regular expression";
        !           562:
        !           563:  nesting_too_deep:
        !           564:   return "Nesting too deep";
        !           565:
        !           566:  too_big:
        !           567:   return "Regular expression too big";
        !           568:
        !           569:  memory_exhausted:
        !           570:   return "Memory exhausted";
        !           571: }
        !           572:
        !           573: /* Store where `from' points a jump operation to jump to where `to' points.
        !           574:   `opcode' is the opcode to store. */
        !           575:
        !           576: static int
        !           577: store_jump (from, opcode, to)
        !           578:      char *from, *to;
        !           579:      char opcode;
        !           580: {
        !           581:   from[0] = opcode;
        !           582:   from[1] = (to - (from + 3)) & 0377;
        !           583:   from[2] = (to - (from + 3)) >> 8;
        !           584: }
        !           585:
        !           586: /* Open up space at char FROM, and insert there a jump to TO.
        !           587:    CURRENT_END gives te end of the storage no in use,
        !           588:    so we know how much data to copy up.
        !           589:    OP is the opcode of the jump to insert.
        !           590:
        !           591:    If you call this function, you must zero out pending_exact. */
        !           592:
        !           593: static int
        !           594: insert_jump (op, from, to, current_end)
        !           595:      char op;
        !           596:      char *from, *to, *current_end;
        !           597: {
        !           598:   register char *pto = current_end + 3;
        !           599:   register char *pfrom = current_end;
        !           600:   while (pfrom != from)
        !           601:     *--pto = *--pfrom;
        !           602:   store_jump (from, op, to);
        !           603: }
        !           604: 
        !           605: /* Given a pattern, compute a fastmap from it.
        !           606:  The fastmap records which of the (1 << BYTEWIDTH) possible characters
        !           607:  can start a string that matches the pattern.
        !           608:  This fastmap is used by re_search to skip quickly over totally implausible text.
        !           609:
        !           610:  The caller must supply the address of a (1 << BYTEWIDTH)-byte data area
        !           611:  as bufp->fastmap.
        !           612:  The other components of bufp describe the pattern to be used. */
        !           613:
        !           614: VOID
        !           615: re_compile_fastmap (bufp)
        !           616:      struct re_pattern_buffer *bufp;
        !           617: {
        !           618:   unsigned char *pattern = (unsigned char *) bufp->buffer;
        !           619:   int size = bufp->used;
        !           620:   register char *fastmap = bufp->fastmap;
        !           621:   register unsigned char *p = pattern;
        !           622:   register unsigned char *pend = pattern + size;
        !           623:   register int j, k;
        !           624:   unsigned char *translate = (unsigned char *) bufp->translate;
        !           625:
        !           626:   unsigned char *stackb[NFAILURES];
        !           627:   unsigned char **stackp = stackb;
        !           628:
        !           629:   bzero (fastmap, (1 << BYTEWIDTH));
        !           630:   bufp->fastmap_accurate = 1;
        !           631:   bufp->can_be_null = 0;
        !           632:
        !           633:   while (p)
        !           634:     {
        !           635:       if (p == pend)
        !           636:        {
        !           637:          bufp->can_be_null = 1;
        !           638:          break;
        !           639:        }
        !           640:       switch ((regexpcode) *p++)
        !           641:        {
        !           642:        case exactn:
        !           643:          if (translate)
        !           644:            fastmap[translate[p[1]]] = 1;
        !           645:          else
        !           646:            fastmap[p[1]] = 1;
        !           647:          break;
        !           648:
        !           649:        case begline:
        !           650:        case before_dot:
        !           651:        case at_dot:
        !           652:        case after_dot:
        !           653:        case begbuf:
        !           654:        case endbuf:
        !           655:        case wordbound:
        !           656:        case notwordbound:
        !           657:        case wordbeg:
        !           658:        case wordend:
        !           659:          continue;
        !           660:
        !           661:        case endline:
        !           662:          if (translate)
        !           663:            fastmap[translate['\n']] = 1;
        !           664:          else
        !           665:            fastmap['\n'] = 1;
        !           666:          bufp->can_be_null = 1;
        !           667:          break;
        !           668:
        !           669:        case finalize_jump:
        !           670:        case maybe_finalize_jump:
        !           671:        case jump:
        !           672:        case dummy_failure_jump:
        !           673:          bufp->can_be_null = 1;
        !           674:          j = *p++ & 0377;
        !           675:          j += SIGN_EXTEND_CHAR (*(char *)p++) << 8;
        !           676:          p += j;
        !           677:          if (j > 0)
        !           678:            continue;
        !           679:          /* Jump backward reached implies we just went through
        !           680:             the body of a loop and matched nothing.
        !           681:             Opcode jumped to should be an on_failure_jump.
        !           682:             Just treat it like an ordinary jump.
        !           683:             For a * loop, it has pushed its failure point already;
        !           684:             if so, discard that as redundant.  */
        !           685:          if ((regexpcode) *p != on_failure_jump)
        !           686:            continue;
        !           687:          p++;
        !           688:          j = *p++ & 0377;
        !           689:          j += SIGN_EXTEND_CHAR (*(char *)p++) << 8;
        !           690:          p += j;
        !           691:          if (stackp != stackb && *stackp == p)
        !           692:            stackp--;
        !           693:          continue;
        !           694:
        !           695:        case on_failure_jump:
        !           696:          j = *p++ & 0377;
        !           697:          j += SIGN_EXTEND_CHAR (*(char *)p++) << 8;
        !           698:          *++stackp = p + j;
        !           699:          continue;
        !           700:
        !           701:        case start_memory:
        !           702:        case stop_memory:
        !           703:          p++;
        !           704:          continue;
        !           705:
        !           706:        case duplicate:
        !           707:          bufp->can_be_null = 1;
        !           708:          fastmap['\n'] = 1;
        !           709:        case anychar:
        !           710:          for (j = 0; j < (1 << BYTEWIDTH); j++)
        !           711:            if (j != '\n')
        !           712:              fastmap[j] = 1;
        !           713:          if (bufp->can_be_null)
        !           714:            return;
        !           715:          /* Don't return; check the alternative paths
        !           716:             so we can set can_be_null if appropriate.  */
        !           717:          break;
        !           718:
        !           719:        case wordchar:
        !           720:          for (j = 0; j < (1 << BYTEWIDTH); j++)
        !           721:            if (SYNTAX (j) == Sword)
        !           722:              fastmap[j] = 1;
        !           723:          break;
        !           724:
        !           725:        case notwordchar:
        !           726:          for (j = 0; j < (1 << BYTEWIDTH); j++)
        !           727:            if (SYNTAX (j) != Sword)
        !           728:              fastmap[j] = 1;
        !           729:          break;
        !           730:
        !           731: #ifdef emacs
        !           732:        case syntaxspec:
        !           733:          k = *p++;
        !           734:          for (j = 0; j < (1 << BYTEWIDTH); j++)
        !           735:            if (SYNTAX (j) == (enum syntaxcode) k)
        !           736:              fastmap[j] = 1;
        !           737:          break;
        !           738:
        !           739:        case notsyntaxspec:
        !           740:          for (j = 0; j < (1 << BYTEWIDTH); j++)
        !           741:            if (SYNTAX (j) != (enum syntaxcode) k)
        !           742:              fastmap[j] = 1;
        !           743:          break;
        !           744: #endif emacs
        !           745:
        !           746:        case charset:
        !           747:          for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
        !           748:            if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
        !           749:              {
        !           750:                if (translate)
        !           751:                  fastmap[translate[j]] = 1;
        !           752:                else
        !           753:                  fastmap[j] = 1;
        !           754:              }
        !           755:          break;
        !           756:
        !           757:        case charset_not:
        !           758:          /* Chars beyond end of map must be allowed */
        !           759:          for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
        !           760:            if (translate)
        !           761:              fastmap[translate[j]] = 1;
        !           762:            else
        !           763:              fastmap[j] = 1;
        !           764:
        !           765:          for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
        !           766:            if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
        !           767:              {
        !           768:                if (translate)
        !           769:                  fastmap[translate[j]] = 1;
        !           770:                else
        !           771:                  fastmap[j] = 1;
        !           772:              }
        !           773:          break;
        !           774:        }
        !           775:
        !           776:       /* Get here means we have successfully found the possible starting characters
        !           777:         of one path of the pattern.  We need not follow this path any farther.
        !           778:         Instead, look at the next alternative remembered in the stack. */
        !           779:       if (stackp != stackb)
        !           780:        p = *stackp--;
        !           781:       else
        !           782:        break;
        !           783:     }
        !           784: }
        !           785: 
        !           786: /* Like re_search_2, below, but only one string is specified. */
        !           787:
        !           788: int
        !           789: re_search (pbufp, string, size, startpos, range, regs)
        !           790:      struct re_pattern_buffer *pbufp;
        !           791:      char *string;
        !           792:      int size, startpos, range;
        !           793:      struct re_registers *regs;
        !           794: {
        !           795:   return re_search_2 (pbufp, 0, 0, string, size, startpos, range, regs, size);
        !           796: }
        !           797:
        !           798: /* Like re_match_2 but tries first a match starting at index `startpos',
        !           799:  then at startpos + 1, and so on.
        !           800:  `range' is the number of places to try before giving up.
        !           801:  If `range' is negative, the starting positions tried are
        !           802:   startpos, startpos - 1, etc.
        !           803:  It is up to the caller to make sure that range is not so large
        !           804:   as to take the starting position outside of the input strings.
        !           805:
        !           806: The value returned is the position at which the match was found,
        !           807:  or -1 if no match was found. */
        !           808:
        !           809: int
        !           810: re_search_2 (pbufp, string1, size1, string2, size2, startpos, range, regs, mstop)
        !           811:      struct re_pattern_buffer *pbufp;
        !           812:      char *string1, *string2;
        !           813:      int size1, size2;
        !           814:      int startpos;
        !           815:      register int range;
        !           816:      struct re_registers *regs;
        !           817:      int mstop;
        !           818: {
        !           819:   register char *fastmap = pbufp->fastmap;
        !           820:   register char *translate = pbufp->translate;
        !           821:   int total = size1 + size2;
        !           822:
        !           823:   /* Update the fastmap now if not correct already */
        !           824:   if (fastmap && !pbufp->fastmap_accurate)
        !           825:     re_compile_fastmap (pbufp);
        !           826:
        !           827:   while (1)
        !           828:     {
        !           829:       /* If a fastmap is supplied, skip quickly over characters
        !           830:         that cannot possibly be the start of a match.
        !           831:         Note, however, that if the pattern can possibly match
        !           832:         the null string, we must test it at each starting point
        !           833:         so that we take the first null string we get.  */
        !           834:
        !           835:       if (fastmap && startpos < total && !pbufp->can_be_null)
        !           836:        {
        !           837:          if (range > 0)
        !           838:            {
        !           839:              register int lim = 0;
        !           840:              register char *p;
        !           841:              int irange = range;
        !           842:              if (startpos < size1 && startpos + range >= size1)
        !           843:                lim = range - (size1 - startpos);
        !           844:
        !           845:              p = &(startpos >= size1 ? string2 - size1 : string1)[startpos];
        !           846:
        !           847:              if (translate)
        !           848:                {
        !           849:                  while (range > lim && !fastmap[translate[*p++]])
        !           850:                    range--;
        !           851:                }
        !           852:              else
        !           853:                {
        !           854:                  while (range > lim && !fastmap[*p++])
        !           855:                    range--;
        !           856:                }
        !           857:              startpos += irange - range;
        !           858:            }
        !           859:          else
        !           860:            {
        !           861:              register char c;
        !           862:              if (startpos >= size1) c = string2[startpos - size1];
        !           863:              else c = string1[startpos];
        !           864:              if (translate ? !fastmap[translate[c]] : !fastmap[c])
        !           865:                goto advance;
        !           866:            }
        !           867:        }
        !           868:
        !           869:       if (range >= 0 && startpos == total
        !           870:          && fastmap && !pbufp->can_be_null)
        !           871:        return -1;
        !           872:
        !           873:       if (0 <= re_match_2 (pbufp, string1, size1, string2, size2, startpos, regs, mstop))
        !           874:        return startpos;
        !           875:
        !           876: #ifdef C_ALLOCA
        !           877:       alloca (0);
        !           878: #endif /* C_ALLOCA */
        !           879:
        !           880:     advance:
        !           881:       if (!range) break;
        !           882:       if (range > 0) range--, startpos++; else range++, startpos--;
        !           883:     }
        !           884:   return -1;
        !           885: }
        !           886: 
        !           887: #ifndef emacs  /* emacs never uses this */
        !           888: int
        !           889: re_match (pbufp, string, size, pos, regs)
        !           890:      struct re_pattern_buffer *pbufp;
        !           891:      char *string;
        !           892:      int size, pos;
        !           893:      struct re_registers *regs;
        !           894: {
        !           895:   return re_match_2 (pbufp, 0, 0, string, size, pos, regs, size);
        !           896: }
        !           897: #endif /* emacs */
        !           898:
        !           899: /* Match the pattern described by `pbufp'
        !           900:   against data which is the virtual concatenation of `string1' and `string2'.
        !           901:   `size1' and `size2' are the sizes of the two data strings.
        !           902:   Start the match at position `pos'.
        !           903:   Do not consider matching past the position `mstop'.
        !           904:
        !           905:   If pbufp->fastmap is nonzero, then it had better be up to date.
        !           906:
        !           907:   The reason that the data to match is specified as two components
        !           908:   which are to be regarded as concatenated
        !           909:   is so that this function can be used directly on the contents of an Emacs buffer.
        !           910:
        !           911:   -1 is returned if there is no match. Otherwise the value is the length
        !           912:   of the substring which was matched.
        !           913: */
        !           914:
        !           915: int
        !           916: re_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop)
        !           917:      struct re_pattern_buffer *pbufp;
        !           918:      char *string1, *string2;
        !           919:      int size1, size2;
        !           920:      int pos;
        !           921:      struct re_registers *regs;
        !           922:      int mstop;
        !           923: {
        !           924:   register char *p = pbufp->buffer;
        !           925:   register char *pend = p + pbufp->used;
        !           926:   /* End of first string */
        !           927:   char *end1;
        !           928:   /* End of second string */
        !           929:   char *end2;
        !           930:   /* Pointer just past last char to consider matching */
        !           931:   char *end_match_1, *end_match_2;
        !           932:   register char *d, *dend;
        !           933:   register int mcnt;
        !           934:   char *translate = pbufp->translate;
        !           935:
        !           936:  /* Failure point stack.  Each place that can handle a failure further down the line
        !           937:     pushes a failure point on this stack.  It consists of two char *'s.
        !           938:     The first one pushed is where to resume scanning the pattern;
        !           939:     the second pushed is where to resume scanning the strings.
        !           940:     If the latter is zero, the failure point is a "dummy".
        !           941:     If a failure happens and the innermost failure point is dormant,
        !           942:     it discards that failure point and tries the next one. */
        !           943:
        !           944:   char **stackb = (char **) alloca (2 * NFAILURES * sizeof (char *));
        !           945:   char **stackp = stackb, **stacke = &stackb[2 * NFAILURES];
        !           946:
        !           947:   /* Information on the "contents" of registers.
        !           948:      These are pointers into the input strings; they record
        !           949:      just what was matched (on this attempt) by some part of the pattern.
        !           950:      The start_memory command stores the start of a register's contents
        !           951:      and the stop_memory command stores the end.
        !           952:
        !           953:      At that point, regstart[regnum] points to the first character in the register,
        !           954:      regend[regnum] points to the first character beyond the end of the register,
        !           955:      and regstart_segend[regnum] is either the same as regend[regnum]
        !           956:      or else points to the end of the input string into which regstart[regnum] points.
        !           957:      The latter case happens when regstart[regnum] is in string1 and
        !           958:      regend[regnum] is in string2.  */
        !           959:
        !           960:   char *regstart[RE_NREGS];
        !           961:   char *regstart_segend[RE_NREGS];
        !           962:   char *regend[RE_NREGS];
        !           963:
        !           964:   /* Set up pointers to ends of strings.
        !           965:      Don't allow the second string to be empty unless both are empty.  */
        !           966:   if (!size2)
        !           967:     {
        !           968:       string2 = string1;
        !           969:       size2 = size1;
        !           970:       string1 = 0;
        !           971:       size1 = 0;
        !           972:     }
        !           973:   end1 = string1 + size1;
        !           974:   end2 = string2 + size2;
        !           975:
        !           976:   /* Compute where to stop matching, within the two strings */
        !           977:   if (mstop <= size1)
        !           978:     {
        !           979:       end_match_1 = string1 + mstop;
        !           980:       end_match_2 = string2;
        !           981:     }
        !           982:   else
        !           983:     {
        !           984:       end_match_1 = end1;
        !           985:       end_match_2 = string2 + mstop - size1;
        !           986:     }
        !           987:
        !           988:   /* Initialize \( and \) text positions to -1
        !           989:      to mark ones that no \( or \) has been seen for.  */
        !           990:
        !           991:   for (mcnt = 0; mcnt < sizeof (regstart) / sizeof (*regstart); mcnt++)
        !           992:     regstart[mcnt] = (char *) -1;
        !           993:
        !           994:   /* `p' scans through the pattern as `d' scans through the data.
        !           995:      `dend' is the end of the input string that `d' points within.
        !           996:      `d' is advanced into the following input string whenever necessary,
        !           997:      but this happens before fetching;
        !           998:      therefore, at the beginning of the loop,
        !           999:      `d' can be pointing at the end of a string,
        !          1000:      but it cannot equal string2.  */
        !          1001:
        !          1002:   if (pos <= size1)
        !          1003:     d = string1 + pos, dend = end_match_1;
        !          1004:   else
        !          1005:     d = string2 + pos - size1, dend = end_match_2;
        !          1006:
        !          1007: /* Write PREFETCH; just before fetching a character with *d.  */
        !          1008: #define PREFETCH \
        !          1009:  while (d == dend)                                                 \
        !          1010:   { if (dend == end_match_2) goto fail;         /* end of string2 => failure */   \
        !          1011:     d = string2;  /* end of string1 => advance to string2. */      \
        !          1012:     dend = end_match_2; }
        !          1013:
        !          1014:   /* This loop loops over pattern commands.
        !          1015:      It exits by returning from the function if match is complete,
        !          1016:      or it drops through if match fails at this starting point in the input data. */
        !          1017:
        !          1018:   while (1)
        !          1019:     {
        !          1020:       if (p == pend)
        !          1021:        /* End of pattern means we have succeeded! */
        !          1022:        {
        !          1023:          /* If caller wants register contents data back, convert it to indices */
        !          1024:          if (regs)
        !          1025:            {
        !          1026:              regend[0] = d;
        !          1027:              regstart[0] = string1;
        !          1028:              for (mcnt = 0; mcnt < RE_NREGS; mcnt++)
        !          1029:                {
        !          1030:                  if ((mcnt != 0) && regstart[mcnt] == (char *) -1)
        !          1031:                    {
        !          1032:                      regs->start[mcnt] = -1;
        !          1033:                      regs->end[mcnt] = -1;
        !          1034:                      continue;
        !          1035:                    }
        !          1036:                  if (regstart[mcnt] - string1 < 0 ||
        !          1037:                      regstart[mcnt] - string1 > size1)
        !          1038:                    regs->start[mcnt] = regstart[mcnt] - string2 + size1;
        !          1039:                  else
        !          1040:                    regs->start[mcnt] = regstart[mcnt] - string1;
        !          1041:                  if (regend[mcnt] - string1 < 0 ||
        !          1042:                      regend[mcnt] - string1 > size1)
        !          1043:                    regs->end[mcnt] = regend[mcnt] - string2 + size1;
        !          1044:                  else
        !          1045:                    regs->end[mcnt] = regend[mcnt] - string1;
        !          1046:                }
        !          1047:              regs->start[0] = pos;
        !          1048:            }
        !          1049:          if (d - string1 >= 0 && d - string1 <= size1)
        !          1050:            return d - string1 - pos;
        !          1051:          else
        !          1052:            return d - string2 + size1 - pos;
        !          1053:        }
        !          1054:
        !          1055:       /* Otherwise match next pattern command */
        !          1056:       switch ((regexpcode) *p++)
        !          1057:        {
        !          1058:
        !          1059:        /* \( is represented by a start_memory, \) by a stop_memory.
        !          1060:            Both of those commands contain a "register number" argument.
        !          1061:            The text matched within the \( and \) is recorded under that number.
        !          1062:            Then, \<digit> turns into a `duplicate' command which
        !          1063:            is followed by the numeric value of <digit> as the register number. */
        !          1064:
        !          1065:        case start_memory:
        !          1066:          regstart[*p] = d;
        !          1067:          regstart_segend[*p++] = dend;
        !          1068:          break;
        !          1069:
        !          1070:        case stop_memory:
        !          1071:          regend[*p] = d;
        !          1072:          if (regstart_segend[*p] == dend)
        !          1073:            regstart_segend[*p] = d;
        !          1074:          p++;
        !          1075:          break;
        !          1076:
        !          1077:        case duplicate:
        !          1078:          {
        !          1079:            int regno = *p++;   /* Get which register to match against */
        !          1080:            register char *d2, *dend2;
        !          1081:
        !          1082:            d2 = regstart[regno];
        !          1083:            dend2 = regstart_segend[regno];
        !          1084:            while (1)
        !          1085:              {
        !          1086:                /* Advance to next segment in register contents, if necessary */
        !          1087:                while (d2 == dend2)
        !          1088:                  {
        !          1089:                    if (dend2 == end_match_2) break;
        !          1090:                    if (dend2 == regend[regno]) break;
        !          1091:                    d2 = string2, dend2 = regend[regno];  /* end of string1 => advance to string2. */
        !          1092:                  }
        !          1093:                /* At end of register contents => success */
        !          1094:                if (d2 == dend2) break;
        !          1095:
        !          1096:                /* Advance to next segment in data being matched, if necessary */
        !          1097:                PREFETCH;
        !          1098:
        !          1099:                /* mcnt gets # consecutive chars to compare */
        !          1100:                mcnt = dend - d;
        !          1101:                if (mcnt > dend2 - d2)
        !          1102:                  mcnt = dend2 - d2;
        !          1103:                /* Compare that many; failure if mismatch, else skip them. */
        !          1104:                if (translate ? bcmp_translate (d, d2, mcnt, translate) : bcmp (d, d2, mcnt))
        !          1105:                  goto fail;
        !          1106:                d += mcnt, d2 += mcnt;
        !          1107:              }
        !          1108:          }
        !          1109:          break;
        !          1110:
        !          1111:        case anychar:
        !          1112:          /* fetch a data character */
        !          1113:          PREFETCH;
        !          1114:          /* Match anything but a newline.  */
        !          1115:          if ((translate ? translate[*d++] : *d++) == '\n')
        !          1116:            goto fail;
        !          1117:          break;
        !          1118:
        !          1119:        case charset:
        !          1120:        case charset_not:
        !          1121:          {
        !          1122:            /* Nonzero for charset_not */
        !          1123:            int not = 0;
        !          1124:            register int c;
        !          1125:            if (*(p - 1) == (char) charset_not)
        !          1126:              not = 1;
        !          1127:
        !          1128:            /* fetch a data character */
        !          1129:            PREFETCH;
        !          1130:
        !          1131:            if (translate)
        !          1132:              c = translate [*(unsigned char *)d];
        !          1133:            else
        !          1134:              c = *(unsigned char *)d;
        !          1135:
        !          1136:            if (c < *p * BYTEWIDTH
        !          1137:                && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
        !          1138:              not = !not;
        !          1139:
        !          1140:            p += 1 + *p;
        !          1141:
        !          1142:            if (!not) goto fail;
        !          1143:            d++;
        !          1144:            break;
        !          1145:          }
        !          1146:
        !          1147:        case begline:
        !          1148:          if (d == string1 || d[-1] == '\n')
        !          1149:            break;
        !          1150:          goto fail;
        !          1151:
        !          1152:        case endline:
        !          1153:          if (d == end2
        !          1154:              || (d == end1 ? (size2 == 0 || *string2 == '\n') : *d == '\n'))
        !          1155:            break;
        !          1156:          goto fail;
        !          1157:
        !          1158:        /* "or" constructs ("|") are handled by starting each alternative
        !          1159:            with an on_failure_jump that points to the start of the next alternative.
        !          1160:            Each alternative except the last ends with a jump to the joining point.
        !          1161:            (Actually, each jump except for the last one really jumps
        !          1162:             to the following jump, because tensioning the jumps is a hassle.) */
        !          1163:
        !          1164:        /* The start of a stupid repeat has an on_failure_jump that points
        !          1165:           past the end of the repeat text.
        !          1166:           This makes a failure point so that, on failure to match a repetition,
        !          1167:           matching restarts past as many repetitions have been found
        !          1168:           with no way to fail and look for another one.  */
        !          1169:
        !          1170:        /* A smart repeat is similar but loops back to the on_failure_jump
        !          1171:           so that each repetition makes another failure point. */
        !          1172:
        !          1173:        case on_failure_jump:
        !          1174:          if (stackp == stacke)
        !          1175:            {
        !          1176:              char **stackx = (char **) alloca (2 * (stacke - stackb) * sizeof (char *));
        !          1177:              bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
        !          1178:              stackp += stackx - stackb;
        !          1179:              stacke = stackx + 2 * (stacke - stackb);
        !          1180:              stackb = stackx;
        !          1181:            }
        !          1182:          mcnt = *p++ & 0377;
        !          1183:          mcnt += SIGN_EXTEND_CHAR (*p++) << 8;
        !          1184:          *stackp++ = mcnt + p;
        !          1185:          *stackp++ = d;
        !          1186:          break;
        !          1187:
        !          1188:        /* The end of a smart repeat has an maybe_finalize_jump back.
        !          1189:           Change it either to a finalize_jump or an ordinary jump. */
        !          1190:
        !          1191:        case maybe_finalize_jump:
        !          1192:          mcnt = *p++ & 0377;
        !          1193:          mcnt += SIGN_EXTEND_CHAR (*p++) << 8;
        !          1194:          /* Compare what follows with the begining of the repeat.
        !          1195:             If we can establish that there is nothing that they would
        !          1196:             both match, we can change to finalize_jump */
        !          1197:          if (p == pend)
        !          1198:            p[-3] = (char) finalize_jump;
        !          1199:          else if (*p == (char) exactn || *p == (char) endline)
        !          1200:            {
        !          1201:              register int c = *p == (char) endline ? '\n' : p[2];
        !          1202:              register char *p1 = p + mcnt;
        !          1203:              /* p1[0] ... p1[2] are an on_failure_jump.
        !          1204:                 Examine what follows that */
        !          1205:              if (p1[3] == (char) exactn && p1[5] != c)
        !          1206:                p[-3] = (char) finalize_jump;
        !          1207:              else if (p1[3] == (char) charset || p1[3] == (char) charset_not)
        !          1208:                {
        !          1209:                  int not = p1[3] == (char) charset_not;
        !          1210:                  if (c < p1[4] * BYTEWIDTH
        !          1211:                      && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
        !          1212:                    not = !not;
        !          1213:                  /* not is 1 if c would match */
        !          1214:                  /* That means it is not safe to finalize */
        !          1215:                  if (!not)
        !          1216:                    p[-3] = (char) finalize_jump;
        !          1217:                }
        !          1218:            }
        !          1219:          p -= 2;
        !          1220:          if (p[-1] != (char) finalize_jump)
        !          1221:            {
        !          1222:              p[-1] = (char) jump;
        !          1223:              goto nofinalize;
        !          1224:            }
        !          1225:
        !          1226:        /* The end of a stupid repeat has a finalize-jump
        !          1227:           back to the start, where another failure point will be made
        !          1228:           which will point after all the repetitions found so far. */
        !          1229:
        !          1230:        case finalize_jump:
        !          1231:          stackp -= 2;
        !          1232:
        !          1233:        case jump:
        !          1234:        nofinalize:
        !          1235:          mcnt = *p++ & 0377;
        !          1236:          mcnt += SIGN_EXTEND_CHAR (*p++) << 8;
        !          1237:          p += mcnt;
        !          1238:          break;
        !          1239:
        !          1240:        case dummy_failure_jump:
        !          1241:          if (stackp == stacke)
        !          1242:            {
        !          1243:              char **stackx = (char **) alloca (2 * (stacke - stackb) * sizeof (char *));
        !          1244:              bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
        !          1245:              stackp += stackx - stackb;
        !          1246:              stacke = stackx + 2 * (stacke - stackb);
        !          1247:              stackb = stackx;
        !          1248:            }
        !          1249:          *stackp++ = 0;
        !          1250:          *stackp++ = 0;
        !          1251:          goto nofinalize;
        !          1252:
        !          1253:        case wordbound:
        !          1254:          if (d == string1  /* Points to first char */
        !          1255:              || d == end2  /* Points to end */
        !          1256:              || (d == end1 && size2 == 0)) /* Points to end */
        !          1257:            break;
        !          1258:          if ((SYNTAX (((unsigned char *)d)[-1]) == Sword)
        !          1259:              != (SYNTAX (d == end1 ? *(unsigned char *)string2 : *(unsigned char *)d) == Sword))
        !          1260:            break;
        !          1261:          goto fail;
        !          1262:
        !          1263:        case notwordbound:
        !          1264:          if (d == string1  /* Points to first char */
        !          1265:              || d == end2  /* Points to end */
        !          1266:              || (d == end1 && size2 == 0)) /* Points to end */
        !          1267:            goto fail;
        !          1268:          if ((SYNTAX (((unsigned char *)d)[-1]) == Sword)
        !          1269:              != (SYNTAX (d == end1 ? *(unsigned char *)string2 : *(unsigned char *)d) == Sword))
        !          1270:            goto fail;
        !          1271:          break;
        !          1272:
        !          1273:        case wordbeg:
        !          1274:          if (d == end2  /* Points to end */
        !          1275:              || (d == end1 && size2 == 0) /* Points to end */
        !          1276:              || SYNTAX (*(unsigned char *) (d == end1 ? string2 : d)) != Sword) /* Next char not a letter */
        !          1277:            goto fail;
        !          1278:          if (d == string1  /* Points to first char */
        !          1279:              || SYNTAX (((unsigned char *)d)[-1]) != Sword)  /* prev char not letter */
        !          1280:            break;
        !          1281:          goto fail;
        !          1282:
        !          1283:        case wordend:
        !          1284:          if (d == string1  /* Points to first char */
        !          1285:              || SYNTAX (((unsigned char *)d)[-1]) != Sword)  /* prev char not letter */
        !          1286:            goto fail;
        !          1287:          if (d == end2  /* Points to end */
        !          1288:              || (d == end1 && size2 == 0) /* Points to end */
        !          1289:              || SYNTAX (d == end1 ? *(unsigned char *)string2 : *(unsigned char *)d) != Sword) /* Next char not a letter */
        !          1290:            break;
        !          1291:          goto fail;
        !          1292:
        !          1293: #ifdef emacs
        !          1294:        case before_dot:
        !          1295:          if (((d - string2 <= (unsigned) size2)
        !          1296:               ? d - (char *) bf_p2 : d - (char *) bf_p1)
        !          1297:              <= point)
        !          1298:            goto fail;
        !          1299:          break;
        !          1300:
        !          1301:        case at_dot:
        !          1302:          if (((d - string2 <= (unsigned) size2)
        !          1303:               ? d - (char *) bf_p2 : d - (char *) bf_p1)
        !          1304:              == point)
        !          1305:            goto fail;
        !          1306:          break;
        !          1307:
        !          1308:        case after_dot:
        !          1309:          if (((d - string2 <= (unsigned) size2)
        !          1310:               ? d - (char *) bf_p2 : d - (char *) bf_p1)
        !          1311:              >= point)
        !          1312:            goto fail;
        !          1313:          break;
        !          1314:
        !          1315:        case wordchar:
        !          1316:          mcnt = (int) Sword;
        !          1317:          goto matchsyntax;
        !          1318:
        !          1319:        case syntaxspec:
        !          1320:          mcnt = *p++;
        !          1321:        matchsyntax:
        !          1322:          PREFETCH;
        !          1323:          if (SYNTAX (*(unsigned char *)d++) != (enum syntaxcode) mcnt) goto fail;
        !          1324:          break;
        !          1325:
        !          1326:        case notwordchar:
        !          1327:          mcnt = (int) Sword;
        !          1328:          goto matchnotsyntax;
        !          1329:
        !          1330:        case notsyntaxspec:
        !          1331:          mcnt = *p++;
        !          1332:        matchnotsyntax:
        !          1333:          PREFETCH;
        !          1334:          if (SYNTAX (*(unsigned char *)d++) == (enum syntaxcode) mcnt) goto fail;
        !          1335:          break;
        !          1336: #else
        !          1337:        case wordchar:
        !          1338:          PREFETCH;
        !          1339:          if (SYNTAX (*(unsigned char *)d++) == 0) goto fail;
        !          1340:          break;
        !          1341:
        !          1342:        case notwordchar:
        !          1343:          PREFETCH;
        !          1344:          if (SYNTAX (*(unsigned char *)d++) != 0) goto fail;
        !          1345:          break;
        !          1346: #endif not emacs
        !          1347:
        !          1348:        case begbuf:
        !          1349:          if (d == string1)     /* Note, d cannot equal string2 */
        !          1350:            break;              /* unless string1 == string2.  */
        !          1351:          goto fail;
        !          1352:
        !          1353:        case endbuf:
        !          1354:          if (d == end2 || (d == end1 && size2 == 0))
        !          1355:            break;
        !          1356:          goto fail;
        !          1357:
        !          1358:        case exactn:
        !          1359:          /* Match the next few pattern characters exactly.
        !          1360:             mcnt is how many characters to match. */
        !          1361:          mcnt = *p++;
        !          1362:          if (translate)
        !          1363:            {
        !          1364:              do
        !          1365:                {
        !          1366:                  PREFETCH;
        !          1367:                  if (translate[*(unsigned char *)d++] != *p++) goto fail;
        !          1368:                }
        !          1369:              while (--mcnt);
        !          1370:            }
        !          1371:          else
        !          1372:            {
        !          1373:              do
        !          1374:                {
        !          1375:                  PREFETCH;
        !          1376:                  if (*d++ != *p++) goto fail;
        !          1377:                }
        !          1378:              while (--mcnt);
        !          1379:            }
        !          1380:          break;
        !          1381:        }
        !          1382:       continue;           /* Successfully matched one pattern command; keep matching */
        !          1383:
        !          1384:       /* Jump here if any matching operation fails. */
        !          1385:     fail:
        !          1386:       if (stackp != stackb)
        !          1387:        /* A restart point is known.  Restart there and pop it. */
        !          1388:        {
        !          1389:          if (!stackp[-2])
        !          1390:            {   /* If innermost failure point is dormant, flush it and keep looking */
        !          1391:              stackp -= 2;
        !          1392:              goto fail;
        !          1393:            }
        !          1394:          d = *--stackp;
        !          1395:          p = *--stackp;
        !          1396:          if (d >= string1 && d <= end1)
        !          1397:            dend = end_match_1;
        !          1398:        }
        !          1399:       else break;   /* Matching at this starting point really fails! */
        !          1400:     }
        !          1401:   return -1;        /* Failure to match */
        !          1402: }
        !          1403:
        !          1404: static int
        !          1405: bcmp_translate (s1, s2, len, translate)
        !          1406:      char *s1, *s2;
        !          1407:      register int len;
        !          1408:      char *translate;
        !          1409: {
        !          1410:   register char *p1 = s1, *p2 = s2;
        !          1411:   while (len)
        !          1412:     {
        !          1413:       if (translate [*p1++] != translate [*p2++]) return 1;
        !          1414:       len--;
        !          1415:     }
        !          1416:   return 0;
        !          1417: }
        !          1418: 
        !          1419: /* Entry points compatible with bsd4.2 regex library */
        !          1420:
        !          1421: #ifndef emacs
        !          1422:
        !          1423: static struct re_pattern_buffer re_comp_buf;
        !          1424:
        !          1425: char *
        !          1426: re_comp (s)
        !          1427:      char *s;
        !          1428: {
        !          1429:   if (!s)
        !          1430:     {
        !          1431:       if (!re_comp_buf.buffer)
        !          1432:        return "No previous regular expression";
        !          1433:       return 0;
        !          1434:     }
        !          1435:
        !          1436:   if (!re_comp_buf.buffer)
        !          1437:     {
        !          1438:       if (!(re_comp_buf.buffer = (char *) malloc (200)))
        !          1439:        return "Memory exhausted";
        !          1440:       re_comp_buf.allocated = 200;
        !          1441:       if (!(re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH)))
        !          1442:        return "Memory exhausted";
        !          1443:     }
        !          1444:   return re_compile_pattern (s, strlen (s), &re_comp_buf);
        !          1445: }
        !          1446:
        !          1447: int
        !          1448: re_exec (s)
        !          1449:      char *s;
        !          1450: {
        !          1451:   int len = strlen (s);
        !          1452:   return 0 <= re_search (&re_comp_buf, s, len, 0, len, 0);
        !          1453: }
        !          1454:
        !          1455: #endif /* emacs */
        !          1456: 
        !          1457: #ifdef test
        !          1458:
        !          1459: #include <stdio.h>
        !          1460:
        !          1461: /* Indexed by a character, gives the upper case equivalent of the character */
        !          1462:
        !          1463: static char upcase[0400] =
        !          1464:   { 000, 001, 002, 003, 004, 005, 006, 007,
        !          1465:     010, 011, 012, 013, 014, 015, 016, 017,
        !          1466:     020, 021, 022, 023, 024, 025, 026, 027,
        !          1467:     030, 031, 032, 033, 034, 035, 036, 037,
        !          1468:     040, 041, 042, 043, 044, 045, 046, 047,
        !          1469:     050, 051, 052, 053, 054, 055, 056, 057,
        !          1470:     060, 061, 062, 063, 064, 065, 066, 067,
        !          1471:     070, 071, 072, 073, 074, 075, 076, 077,
        !          1472:     0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
        !          1473:     0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
        !          1474:     0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
        !          1475:     0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
        !          1476:     0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
        !          1477:     0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
        !          1478:     0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
        !          1479:     0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
        !          1480:     0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
        !          1481:     0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
        !          1482:     0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
        !          1483:     0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
        !          1484:     0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
        !          1485:     0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
        !          1486:     0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
        !          1487:     0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
        !          1488:     0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
        !          1489:     0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
        !          1490:     0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
        !          1491:     0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
        !          1492:     0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
        !          1493:     0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
        !          1494:     0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
        !          1495:     0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
        !          1496:   };
        !          1497:
        !          1498: main ()
        !          1499: {
        !          1500:   char pat[80];
        !          1501:   struct re_pattern_buffer buf;
        !          1502:   struct re_registers regs;
        !          1503:   int i;
        !          1504:   char c;
        !          1505:   char fastmap[(1 << BYTEWIDTH)];
        !          1506:
        !          1507:   buf.allocated = 40;
        !          1508:   buf.buffer = (char *) malloc (buf.allocated);
        !          1509:   buf.fastmap = fastmap;
        !          1510:   buf.translate = upcase;
        !          1511:
        !          1512:   while (1)
        !          1513:     {
        !          1514:       printf("Enter pattern\n");
        !          1515:       gets (pat);
        !          1516:
        !          1517:       if (*pat)
        !          1518:        {
        !          1519:          re_compile_pattern (pat, strlen(pat), &buf);
        !          1520:
        !          1521:          for (i = 0; i < buf.used; i++)
        !          1522:            printchar (buf.buffer[i]);
        !          1523:
        !          1524:          putchar ('\n');
        !          1525:
        !          1526:          printf ("%d allocated, %d used.\n", buf.allocated, buf.used);
        !          1527:
        !          1528:          re_compile_fastmap (&buf);
        !          1529:          printf ("Allowed by fastmap: ");
        !          1530:          for (i = 0; i < (1 << BYTEWIDTH); i++)
        !          1531:            if (fastmap[i]) printchar (i);
        !          1532:          putchar ('\n');
        !          1533:        }
        !          1534:
        !          1535:       printf("enter string to search\n");
        !          1536:       gets (pat);      /* Now read the string to match against */
        !          1537:
        !          1538: /*    i = re_match (&buf, pat, strlen (pat), 0, 0); */
        !          1539:       i = re_search (&buf, pat, strlen (pat), 0, strlen (pat), &regs);
        !          1540:       printf ("Match value %d.\n", i);
        !          1541:       for (i=0; i < RE_NREGS; i++)
        !          1542:        printf("%2d start %2d  end %2d\n", i, regs.start[i], regs.end[i]);
        !          1543:     }
        !          1544: }
        !          1545:
        !          1546: #ifdef NOTDEF
        !          1547: print_buf (bufp)
        !          1548:      struct re_pattern_buffer *bufp;
        !          1549: {
        !          1550:   int i;
        !          1551:
        !          1552:   printf ("buf is :\n----------------\n");
        !          1553:   for (i = 0; i < bufp->used; i++)
        !          1554:     printchar (bufp->buffer[i]);
        !          1555:
        !          1556:   printf ("\n%d allocated, %d used.\n", bufp->allocated, bufp->used);
        !          1557:
        !          1558:   printf ("Allowed by fastmap: ");
        !          1559:   for (i = 0; i < (1 << BYTEWIDTH); i++)
        !          1560:     if (bufp->fastmap[i])
        !          1561:       printchar (i);
        !          1562:   printf ("\nAllowed by translate: ");
        !          1563:   if (bufp->translate)
        !          1564:     for (i = 0; i < (1 << BYTEWIDTH); i++)
        !          1565:       if (bufp->translate[i])
        !          1566:        printchar (i);
        !          1567:   printf ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't");
        !          1568:   printf ("can %s be null\n----------", bufp->can_be_null ? "" : "not");
        !          1569: }
        !          1570: #endif
        !          1571:
        !          1572: printchar (c)
        !          1573:      char c;
        !          1574: {
        !          1575:   if (c < 041 || c >= 0177)
        !          1576:     {
        !          1577:       putchar ('\\');
        !          1578:       putchar (((c >> 6) & 3) + '0');
        !          1579:       putchar (((c >> 3) & 7) + '0');
        !          1580:       putchar ((c & 7) + '0');
        !          1581:     }
        !          1582:   else
        !          1583:     putchar (c);
        !          1584: }
        !          1585:
        !          1586: error (string)
        !          1587:      char *string;
        !          1588: {
        !          1589:   puts (string);
        !          1590:   exit (1);
        !          1591: }
        !          1592:
        !          1593: #endif /* test */
        !          1594: #endif /* REGEX */