Annotation of src/usr.bin/mg/regex.c, Revision 1.1
1.1 ! deraadt 1: /* Imagine Generic gnuemacs copyright notice here */
! 2:
! 3: /* modified by Robert Larson to make fewer assumptions about the compiler */
! 4: /* (Making it useful to mg) (#if not supported by osk, enum not supported by many) */
! 5:
! 6: /* To test, compile with -Dtest.
! 7: This Dtestable feature turns this into a self-contained program
! 8: which reads a pattern, describes how it compiles,
! 9: then reads a string and searches for it. */
! 10:
! 11:
! 12: #ifdef REGEX
! 13: #include "def.h" /* defines VOID etc. for mg */
! 14:
! 15: #ifdef emacs
! 16:
! 17: /* The `emacs' switch turns on certain special matching commands
! 18: that make sense only in emacs. */
! 19:
! 20: #include "config.h"
! 21: #include "lisp.h"
! 22: #include "buffer.h"
! 23: #include "syntax.h"
! 24:
! 25: #else /* not emacs */
! 26:
! 27: /*
! 28: * Define the syntax stuff, so we can do the \<...\> things.
! 29: */
! 30:
! 31: #ifndef Sword /* must be non-zero in some of the tests below... */
! 32: #define Sword 1
! 33: #endif
! 34:
! 35: #define SYNTAX(c) re_syntax_table[c]
! 36:
! 37: #ifdef SYNTAX_TABLE
! 38:
! 39: char *re_syntax_table;
! 40:
! 41: #else
! 42:
! 43: static char re_syntax_table[256];
! 44:
! 45: static VOID
! 46: init_syntax_once ()
! 47: {
! 48: register int c;
! 49: static int done = 0;
! 50:
! 51: if (done)
! 52: return;
! 53:
! 54: bzero (re_syntax_table, sizeof re_syntax_table);
! 55:
! 56: for (c = 'a'; c <= 'z'; c++)
! 57: re_syntax_table[c] = Sword;
! 58:
! 59: for (c = 'A'; c <= 'Z'; c++)
! 60: re_syntax_table[c] = Sword;
! 61:
! 62: for (c = '0'; c <= '9'; c++)
! 63: re_syntax_table[c] = Sword;
! 64:
! 65: done = 1;
! 66: }
! 67:
! 68: #endif /* SYNTAX_TABLE */
! 69: #endif /* not emacs */
! 70:
! 71: #include "regex.h"
! 72:
! 73: /* Number of failure points to allocate space for initially,
! 74: when matching. If this number is exceeded, more space is allocated,
! 75: so it is not a hard limit. */
! 76:
! 77: #ifndef NFAILURES
! 78: #define NFAILURES 80
! 79: #endif NFAILURES
! 80:
! 81: /* width of a byte in bits */
! 82:
! 83: #define BYTEWIDTH 8
! 84:
! 85: /* These are the command codes that appear in compiled regular expressions, one per byte.
! 86: Some command codes are followed by argument bytes.
! 87: A command code can specify any interpretation whatever for its arguments.
! 88: Zero-bytes may appear in the compiled regular expression. */
! 89:
! 90: typedef char regexpcode;
! 91: #define unused 0
! 92: #define exactn 1 /* followed by one byte giving n, and then by n literal bytes */
! 93: #define begline 2 /* fails unless at beginning of line */
! 94: #define endline 3 /* fails unless at end of line */
! 95: #define jump 4 /* followed by two bytes giving relative address to jump to */
! 96: #define on_failure_jump 5 /* followed by two bytes giving relative address of place */
! 97: /* to resume at in case of failure. */
! 98: #define finalize_jump 6 /* Throw away latest failure point and then jump to address. */
! 99: #define maybe_finalize_jump 7 /* Like jump but finalize if safe to do so. */
! 100: /* This is used to jump back to the beginning
! 101: of a repeat. If the command that follows
! 102: this jump is clearly incompatible with the
! 103: one at the beginning of the repeat, such that
! 104: we can be sure that there is no use backtracking
! 105: out of repetitions already completed,
! 106: then we finalize. */
! 107: #define dummy_failure_jump 8 /* jump, and push a dummy failure point. */
! 108: /* This failure point will be thrown away
! 109: if an attempt is made to use it for a failure.
! 110: A + construct makes this before the first repeat. */
! 111: #define anychar 9 /* matches any one character */
! 112: #define charset 10 /* matches any one char belonging to specified set. */
! 113: /* First following byte is # bitmap bytes.
! 114: Then come bytes for a bit-map saying which chars are in.
! 115: Bits in each byte are ordered low-bit-first.
! 116: A character is in the set if its bit is 1.
! 117: A character too large to have a bit in the map
! 118: is automatically not in the set */
! 119: #define charset_not 11 /* similar but match any character that is NOT one of those specified */
! 120: #define start_memory 12 /* starts remembering the text that is matched */
! 121: /* and stores it in a memory register.
! 122: followed by one byte containing the register number.
! 123: Register numbers must be in the range 0 through NREGS. */
! 124: #define stop_memory 13 /* stops remembering the text that is matched */
! 125: /* and stores it in a memory register.
! 126: followed by one byte containing the register number.
! 127: Register numbers must be in the range 0 through NREGS. */
! 128: #define duplicate 14 /* match a duplicate of something remembered. */
! 129: /* Followed by one byte containing the index of the memory register. */
! 130: #define before_dot 15 /* Succeeds if before dot */
! 131: #define at_dot 16 /* Succeeds if at dot */
! 132: #define after_dot 17 /* Succeeds if after dot */
! 133: #define begbuf 18 /* Succeeds if at beginning of buffer */
! 134: #define endbuf 19 /* Succeeds if at end of buffer */
! 135: #define wordchar 20 /* Matches any word-constituent character */
! 136: #define notwordchar 21 /* Matches any char that is not a word-constituent */
! 137: #define wordbeg 22 /* Succeeds if at word beginning */
! 138: #define wordend 23 /* Succeeds if at word end */
! 139: #define wordbound 24 /* Succeeds if at a word boundary */
! 140: #define notwordbound 25 /* Succeeds if not at a word boundary */
! 141: #define syntaxspec 26 /* Matches any character whose syntax is specified. */
! 142: /* followed by a byte which contains a syntax code, Sword or such like */
! 143: #define notsyntaxspec 27 /* Matches any character whose syntax differs from the specified. */
! 144:
! 145:
! 146: #ifndef SIGN_EXTEND_CHAR
! 147: #define SIGN_EXTEND_CHAR(x) (x)
! 148: #endif
! 149:
! 150: /* compile_pattern takes a regular-expression descriptor string in the user's format
! 151: and converts it into a buffer full of byte commands for matching.
! 152:
! 153: pattern is the address of the pattern string
! 154: size is the length of it.
! 155: bufp is a struct re_pattern_buffer * which points to the info
! 156: on where to store the byte commands.
! 157: This structure contains a char * which points to the
! 158: actual space, which should have been obtained with malloc.
! 159: compile_pattern may use realloc to grow the buffer space.
! 160:
! 161: The number of bytes of commands can be found out by looking in
! 162: the struct re_pattern_buffer that bufp pointed to,
! 163: after compile_pattern returns.
! 164: */
! 165:
! 166: #define PATPUSH(ch) (*b++ = (char) (ch))
! 167:
! 168: #define PATFETCH(c) \
! 169: {if (p == pend) goto end_of_pattern; \
! 170: c = * (unsigned char *) p++; \
! 171: if (translate) c = translate[c]; }
! 172:
! 173: #define PATFETCH_RAW(c) \
! 174: {if (p == pend) goto end_of_pattern; \
! 175: c = * (unsigned char *) p++; }
! 176:
! 177: #define PATUNFETCH p--
! 178:
! 179: #define EXTEND_BUFFER \
! 180: { char *old_buffer = bufp->buffer; \
! 181: if (bufp->allocated == (1<<16)) goto too_big; \
! 182: bufp->allocated *= 2; \
! 183: if (bufp->allocated > (1<<16)) bufp->allocated = (1<<16); \
! 184: if (!(bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated))) \
! 185: goto memory_exhausted; \
! 186: c = bufp->buffer - old_buffer; \
! 187: b += c; \
! 188: if (fixup_jump) \
! 189: fixup_jump += c; \
! 190: if (laststart) \
! 191: laststart += c; \
! 192: begalt += c; \
! 193: if (pending_exact) \
! 194: pending_exact += c; \
! 195: }
! 196:
! 197: static int store_jump (), insert_jump ();
! 198:
! 199: char *
! 200: re_compile_pattern (pattern, size, bufp)
! 201: char *pattern;
! 202: int size;
! 203: struct re_pattern_buffer *bufp;
! 204: {
! 205: register char *b = bufp->buffer;
! 206: register char *p = pattern;
! 207: char *pend = pattern + size;
! 208: register unsigned c, c1;
! 209: char *p1;
! 210: unsigned char *translate = (unsigned char *) bufp->translate;
! 211:
! 212: /* address of the count-byte of the most recently inserted "exactn" command.
! 213: This makes it possible to tell whether a new exact-match character
! 214: can be added to that command or requires a new "exactn" command. */
! 215:
! 216: char *pending_exact = 0;
! 217:
! 218: /* address of the place where a forward-jump should go
! 219: to the end of the containing expression.
! 220: Each alternative of an "or", except the last, ends with a forward-jump
! 221: of this sort. */
! 222:
! 223: char *fixup_jump = 0;
! 224:
! 225: /* address of start of the most recently finished expression.
! 226: This tells postfix * where to find the start of its operand. */
! 227:
! 228: char *laststart = 0;
! 229:
! 230: /* In processing a repeat, 1 means zero matches is allowed */
! 231:
! 232: char zero_times_ok;
! 233:
! 234: /* In processing a repeat, 1 means many matches is allowed */
! 235:
! 236: char many_times_ok;
! 237:
! 238: /* address of beginning of regexp, or inside of last \( */
! 239:
! 240: char *begalt = b;
! 241:
! 242: /* Stack of information saved by \( and restored by \).
! 243: Four stack elements are pushed by each \(:
! 244: First, the value of b.
! 245: Second, the value of fixup_jump.
! 246: Third, the value of regnum.
! 247: Fourth, the value of begalt. */
! 248:
! 249: int stackb[40];
! 250: int *stackp = stackb;
! 251: int *stacke = stackb + 40;
! 252: int *stackt;
! 253:
! 254: /* Counts \('s as they are encountered. Remembered for the matching \),
! 255: where it becomes the "register number" to put in the stop_memory command */
! 256:
! 257: int regnum = 1;
! 258:
! 259: bufp->fastmap_accurate = 0;
! 260:
! 261: #ifndef SYNTAX_TABLE
! 262: #ifndef emacs
! 263: /*
! 264: * Initialize the syntax table.
! 265: */
! 266: init_syntax_once();
! 267: #endif
! 268: #endif
! 269:
! 270: if (bufp->allocated == 0)
! 271: {
! 272: bufp->allocated = 28;
! 273: if (bufp->buffer)
! 274: /* EXTEND_BUFFER loses when bufp->allocated is 0 */
! 275: bufp->buffer = (char *) realloc (bufp->buffer, 28);
! 276: else
! 277: /* Caller did not allocate a buffer. Do it for him */
! 278: bufp->buffer = (char *) malloc (28);
! 279: if (!bufp->buffer) goto memory_exhausted;
! 280: begalt = b = bufp->buffer;
! 281: }
! 282:
! 283: while (p != pend)
! 284: {
! 285: if (b - bufp->buffer > bufp->allocated - 10)
! 286: /* Note that EXTEND_BUFFER clobbers c */
! 287: EXTEND_BUFFER;
! 288:
! 289: PATFETCH (c);
! 290:
! 291: switch (c)
! 292: {
! 293: case '$':
! 294: /* $ means succeed if at end of line, but only in special contexts.
! 295: If randonly in the middle of a pattern, it is a normal character. */
! 296: if (p == pend || (*p == '\\' && (p[1] == ')' || p[1] == '|')))
! 297: {
! 298: PATPUSH (endline);
! 299: break;
! 300: }
! 301: goto normal_char;
! 302:
! 303: case '^':
! 304: /* ^ means succeed if at beg of line, but only if no preceding pattern. */
! 305: if (laststart) goto normal_char;
! 306: PATPUSH (begline);
! 307: break;
! 308:
! 309: case '*':
! 310: case '+':
! 311: case '?':
! 312: /* If there is no previous pattern, char not special. */
! 313: if (!laststart)
! 314: goto normal_char;
! 315: /* If there is a sequence of repetition chars,
! 316: collapse it down to equivalent to just one. */
! 317: zero_times_ok = 0;
! 318: many_times_ok = 0;
! 319: while (1)
! 320: {
! 321: zero_times_ok |= c != '+';
! 322: many_times_ok |= c != '?';
! 323: if (p == pend)
! 324: break;
! 325: PATFETCH (c);
! 326: if (!(c == '*' || c == '+' || c == '?'))
! 327: {
! 328: PATUNFETCH;
! 329: break;
! 330: }
! 331: }
! 332:
! 333: /* Now we know whether 0 matches is allowed,
! 334: and whether 2 or more matches is allowed. */
! 335: if (many_times_ok)
! 336: {
! 337: /* If more than one repetition is allowed,
! 338: put in a backward jump at the end. */
! 339: store_jump (b, maybe_finalize_jump, laststart - 3);
! 340: b += 3;
! 341: }
! 342: insert_jump (on_failure_jump, laststart, b + 3, b);
! 343: pending_exact = 0;
! 344: b += 3;
! 345: if (!zero_times_ok)
! 346: {
! 347: /* At least one repetition required: insert before the loop
! 348: a skip over the initial on-failure-jump instruction */
! 349: insert_jump (dummy_failure_jump, laststart, laststart + 6, b);
! 350: b += 3;
! 351: }
! 352: break;
! 353:
! 354: case '.':
! 355: laststart = b;
! 356: PATPUSH (anychar);
! 357: break;
! 358:
! 359: case '[':
! 360: if (b - bufp->buffer
! 361: > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH)
! 362: /* Note that EXTEND_BUFFER clobbers c */
! 363: EXTEND_BUFFER;
! 364:
! 365: laststart = b;
! 366: if (*p == '^')
! 367: PATPUSH (charset_not), p++;
! 368: else
! 369: PATPUSH (charset);
! 370: p1 = p;
! 371:
! 372: PATPUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
! 373: /* Clear the whole map */
! 374: bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
! 375: /* Read in characters and ranges, setting map bits */
! 376: while (1)
! 377: {
! 378: PATFETCH (c);
! 379: if (c == ']' && p != p1 + 1) break;
! 380: if (*p == '-')
! 381: {
! 382: PATFETCH (c1);
! 383: PATFETCH (c1);
! 384: while (c <= c1)
! 385: b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH), c++;
! 386: }
! 387: else
! 388: {
! 389: b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH);
! 390: }
! 391: }
! 392: /* Discard any bitmap bytes that are all 0 at the end of the map.
! 393: Decrement the map-length byte too. */
! 394: while (b[-1] > 0 && b[b[-1] - 1] == 0)
! 395: b[-1]--;
! 396: b += b[-1];
! 397: break;
! 398:
! 399: case '\\':
! 400: if (p == pend) goto invalid_pattern;
! 401: PATFETCH_RAW (c);
! 402: switch (c)
! 403: {
! 404: case '(':
! 405: if (stackp == stacke) goto nesting_too_deep;
! 406: if (regnum < RE_NREGS)
! 407: {
! 408: PATPUSH (start_memory);
! 409: PATPUSH (regnum);
! 410: }
! 411: *stackp++ = b - bufp->buffer;
! 412: *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0;
! 413: *stackp++ = regnum++;
! 414: *stackp++ = begalt - bufp->buffer;
! 415: fixup_jump = 0;
! 416: laststart = 0;
! 417: begalt = b;
! 418: break;
! 419:
! 420: case ')':
! 421: if (stackp == stackb) goto unmatched_close;
! 422: begalt = *--stackp + bufp->buffer;
! 423: if (fixup_jump)
! 424: store_jump (fixup_jump, jump, b);
! 425: if (stackp[-1] < RE_NREGS)
! 426: {
! 427: PATPUSH (stop_memory);
! 428: PATPUSH (stackp[-1]);
! 429: }
! 430: stackp -= 2;
! 431: fixup_jump = 0;
! 432: if (*stackp)
! 433: fixup_jump = *stackp + bufp->buffer - 1;
! 434: laststart = *--stackp + bufp->buffer;
! 435: break;
! 436:
! 437: case '|':
! 438: insert_jump (on_failure_jump, begalt, b + 6, b);
! 439: pending_exact = 0;
! 440: b += 3;
! 441: if (fixup_jump)
! 442: store_jump (fixup_jump, jump, b);
! 443: fixup_jump = b;
! 444: b += 3;
! 445: laststart = 0;
! 446: begalt = b;
! 447: break;
! 448:
! 449: #ifdef emacs
! 450: case '=':
! 451: PATPUSH (at_dot);
! 452: break;
! 453:
! 454: case 's':
! 455: laststart = b;
! 456: PATPUSH (syntaxspec);
! 457: PATFETCH (c);
! 458: PATPUSH (syntax_spec_code[c]);
! 459: break;
! 460:
! 461: case 'S':
! 462: laststart = b;
! 463: PATPUSH (notsyntaxspec);
! 464: PATFETCH (c);
! 465: PATPUSH (syntax_spec_code[c]);
! 466: break;
! 467: #endif emacs
! 468:
! 469: case 'w':
! 470: laststart = b;
! 471: PATPUSH (wordchar);
! 472: break;
! 473:
! 474: case 'W':
! 475: laststart = b;
! 476: PATPUSH (notwordchar);
! 477: break;
! 478:
! 479: case '<':
! 480: PATPUSH (wordbeg);
! 481: break;
! 482:
! 483: case '>':
! 484: PATPUSH (wordend);
! 485: break;
! 486:
! 487: case 'b':
! 488: PATPUSH (wordbound);
! 489: break;
! 490:
! 491: case 'B':
! 492: PATPUSH (notwordbound);
! 493: break;
! 494:
! 495: case '`':
! 496: PATPUSH (begbuf);
! 497: break;
! 498:
! 499: case '\'':
! 500: PATPUSH (endbuf);
! 501: break;
! 502:
! 503: case '1':
! 504: case '2':
! 505: case '3':
! 506: case '4':
! 507: case '5':
! 508: case '6':
! 509: case '7':
! 510: case '8':
! 511: case '9':
! 512: c1 = c - '0';
! 513: if (c1 >= regnum)
! 514: goto normal_char;
! 515: for (stackt = stackp - 2; stackt > stackb; stackt -= 4)
! 516: if (*stackt == c1)
! 517: goto normal_char;
! 518: laststart = b;
! 519: PATPUSH (duplicate);
! 520: PATPUSH (c1);
! 521: break;
! 522: default:
! 523: goto normal_char;
! 524: }
! 525: break;
! 526:
! 527: default:
! 528: normal_char:
! 529: if (!pending_exact || pending_exact + *pending_exact + 1 != b
! 530: || *pending_exact == 0177 || *p == '*' || *p == '^'
! 531: || *p == '+' || *p == '?')
! 532: {
! 533: laststart = b;
! 534: PATPUSH (exactn);
! 535: pending_exact = b;
! 536: PATPUSH (0);
! 537: }
! 538: PATPUSH (c);
! 539: (*pending_exact)++;
! 540: }
! 541: }
! 542:
! 543: if (fixup_jump)
! 544: store_jump (fixup_jump, jump, b);
! 545:
! 546: if (stackp != stackb) goto unmatched_open;
! 547:
! 548: bufp->used = b - bufp->buffer;
! 549: return 0;
! 550:
! 551: invalid_pattern:
! 552: return "Invalid regular expression";
! 553:
! 554: unmatched_open:
! 555: return "Unmatched \\(";
! 556:
! 557: unmatched_close:
! 558: return "Unmatched \\)";
! 559:
! 560: end_of_pattern:
! 561: return "Premature end of regular expression";
! 562:
! 563: nesting_too_deep:
! 564: return "Nesting too deep";
! 565:
! 566: too_big:
! 567: return "Regular expression too big";
! 568:
! 569: memory_exhausted:
! 570: return "Memory exhausted";
! 571: }
! 572:
! 573: /* Store where `from' points a jump operation to jump to where `to' points.
! 574: `opcode' is the opcode to store. */
! 575:
! 576: static int
! 577: store_jump (from, opcode, to)
! 578: char *from, *to;
! 579: char opcode;
! 580: {
! 581: from[0] = opcode;
! 582: from[1] = (to - (from + 3)) & 0377;
! 583: from[2] = (to - (from + 3)) >> 8;
! 584: }
! 585:
! 586: /* Open up space at char FROM, and insert there a jump to TO.
! 587: CURRENT_END gives te end of the storage no in use,
! 588: so we know how much data to copy up.
! 589: OP is the opcode of the jump to insert.
! 590:
! 591: If you call this function, you must zero out pending_exact. */
! 592:
! 593: static int
! 594: insert_jump (op, from, to, current_end)
! 595: char op;
! 596: char *from, *to, *current_end;
! 597: {
! 598: register char *pto = current_end + 3;
! 599: register char *pfrom = current_end;
! 600: while (pfrom != from)
! 601: *--pto = *--pfrom;
! 602: store_jump (from, op, to);
! 603: }
! 604:
! 605: /* Given a pattern, compute a fastmap from it.
! 606: The fastmap records which of the (1 << BYTEWIDTH) possible characters
! 607: can start a string that matches the pattern.
! 608: This fastmap is used by re_search to skip quickly over totally implausible text.
! 609:
! 610: The caller must supply the address of a (1 << BYTEWIDTH)-byte data area
! 611: as bufp->fastmap.
! 612: The other components of bufp describe the pattern to be used. */
! 613:
! 614: VOID
! 615: re_compile_fastmap (bufp)
! 616: struct re_pattern_buffer *bufp;
! 617: {
! 618: unsigned char *pattern = (unsigned char *) bufp->buffer;
! 619: int size = bufp->used;
! 620: register char *fastmap = bufp->fastmap;
! 621: register unsigned char *p = pattern;
! 622: register unsigned char *pend = pattern + size;
! 623: register int j, k;
! 624: unsigned char *translate = (unsigned char *) bufp->translate;
! 625:
! 626: unsigned char *stackb[NFAILURES];
! 627: unsigned char **stackp = stackb;
! 628:
! 629: bzero (fastmap, (1 << BYTEWIDTH));
! 630: bufp->fastmap_accurate = 1;
! 631: bufp->can_be_null = 0;
! 632:
! 633: while (p)
! 634: {
! 635: if (p == pend)
! 636: {
! 637: bufp->can_be_null = 1;
! 638: break;
! 639: }
! 640: switch ((regexpcode) *p++)
! 641: {
! 642: case exactn:
! 643: if (translate)
! 644: fastmap[translate[p[1]]] = 1;
! 645: else
! 646: fastmap[p[1]] = 1;
! 647: break;
! 648:
! 649: case begline:
! 650: case before_dot:
! 651: case at_dot:
! 652: case after_dot:
! 653: case begbuf:
! 654: case endbuf:
! 655: case wordbound:
! 656: case notwordbound:
! 657: case wordbeg:
! 658: case wordend:
! 659: continue;
! 660:
! 661: case endline:
! 662: if (translate)
! 663: fastmap[translate['\n']] = 1;
! 664: else
! 665: fastmap['\n'] = 1;
! 666: bufp->can_be_null = 1;
! 667: break;
! 668:
! 669: case finalize_jump:
! 670: case maybe_finalize_jump:
! 671: case jump:
! 672: case dummy_failure_jump:
! 673: bufp->can_be_null = 1;
! 674: j = *p++ & 0377;
! 675: j += SIGN_EXTEND_CHAR (*(char *)p++) << 8;
! 676: p += j;
! 677: if (j > 0)
! 678: continue;
! 679: /* Jump backward reached implies we just went through
! 680: the body of a loop and matched nothing.
! 681: Opcode jumped to should be an on_failure_jump.
! 682: Just treat it like an ordinary jump.
! 683: For a * loop, it has pushed its failure point already;
! 684: if so, discard that as redundant. */
! 685: if ((regexpcode) *p != on_failure_jump)
! 686: continue;
! 687: p++;
! 688: j = *p++ & 0377;
! 689: j += SIGN_EXTEND_CHAR (*(char *)p++) << 8;
! 690: p += j;
! 691: if (stackp != stackb && *stackp == p)
! 692: stackp--;
! 693: continue;
! 694:
! 695: case on_failure_jump:
! 696: j = *p++ & 0377;
! 697: j += SIGN_EXTEND_CHAR (*(char *)p++) << 8;
! 698: *++stackp = p + j;
! 699: continue;
! 700:
! 701: case start_memory:
! 702: case stop_memory:
! 703: p++;
! 704: continue;
! 705:
! 706: case duplicate:
! 707: bufp->can_be_null = 1;
! 708: fastmap['\n'] = 1;
! 709: case anychar:
! 710: for (j = 0; j < (1 << BYTEWIDTH); j++)
! 711: if (j != '\n')
! 712: fastmap[j] = 1;
! 713: if (bufp->can_be_null)
! 714: return;
! 715: /* Don't return; check the alternative paths
! 716: so we can set can_be_null if appropriate. */
! 717: break;
! 718:
! 719: case wordchar:
! 720: for (j = 0; j < (1 << BYTEWIDTH); j++)
! 721: if (SYNTAX (j) == Sword)
! 722: fastmap[j] = 1;
! 723: break;
! 724:
! 725: case notwordchar:
! 726: for (j = 0; j < (1 << BYTEWIDTH); j++)
! 727: if (SYNTAX (j) != Sword)
! 728: fastmap[j] = 1;
! 729: break;
! 730:
! 731: #ifdef emacs
! 732: case syntaxspec:
! 733: k = *p++;
! 734: for (j = 0; j < (1 << BYTEWIDTH); j++)
! 735: if (SYNTAX (j) == (enum syntaxcode) k)
! 736: fastmap[j] = 1;
! 737: break;
! 738:
! 739: case notsyntaxspec:
! 740: for (j = 0; j < (1 << BYTEWIDTH); j++)
! 741: if (SYNTAX (j) != (enum syntaxcode) k)
! 742: fastmap[j] = 1;
! 743: break;
! 744: #endif emacs
! 745:
! 746: case charset:
! 747: for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
! 748: if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
! 749: {
! 750: if (translate)
! 751: fastmap[translate[j]] = 1;
! 752: else
! 753: fastmap[j] = 1;
! 754: }
! 755: break;
! 756:
! 757: case charset_not:
! 758: /* Chars beyond end of map must be allowed */
! 759: for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
! 760: if (translate)
! 761: fastmap[translate[j]] = 1;
! 762: else
! 763: fastmap[j] = 1;
! 764:
! 765: for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
! 766: if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
! 767: {
! 768: if (translate)
! 769: fastmap[translate[j]] = 1;
! 770: else
! 771: fastmap[j] = 1;
! 772: }
! 773: break;
! 774: }
! 775:
! 776: /* Get here means we have successfully found the possible starting characters
! 777: of one path of the pattern. We need not follow this path any farther.
! 778: Instead, look at the next alternative remembered in the stack. */
! 779: if (stackp != stackb)
! 780: p = *stackp--;
! 781: else
! 782: break;
! 783: }
! 784: }
! 785:
! 786: /* Like re_search_2, below, but only one string is specified. */
! 787:
! 788: int
! 789: re_search (pbufp, string, size, startpos, range, regs)
! 790: struct re_pattern_buffer *pbufp;
! 791: char *string;
! 792: int size, startpos, range;
! 793: struct re_registers *regs;
! 794: {
! 795: return re_search_2 (pbufp, 0, 0, string, size, startpos, range, regs, size);
! 796: }
! 797:
! 798: /* Like re_match_2 but tries first a match starting at index `startpos',
! 799: then at startpos + 1, and so on.
! 800: `range' is the number of places to try before giving up.
! 801: If `range' is negative, the starting positions tried are
! 802: startpos, startpos - 1, etc.
! 803: It is up to the caller to make sure that range is not so large
! 804: as to take the starting position outside of the input strings.
! 805:
! 806: The value returned is the position at which the match was found,
! 807: or -1 if no match was found. */
! 808:
! 809: int
! 810: re_search_2 (pbufp, string1, size1, string2, size2, startpos, range, regs, mstop)
! 811: struct re_pattern_buffer *pbufp;
! 812: char *string1, *string2;
! 813: int size1, size2;
! 814: int startpos;
! 815: register int range;
! 816: struct re_registers *regs;
! 817: int mstop;
! 818: {
! 819: register char *fastmap = pbufp->fastmap;
! 820: register char *translate = pbufp->translate;
! 821: int total = size1 + size2;
! 822:
! 823: /* Update the fastmap now if not correct already */
! 824: if (fastmap && !pbufp->fastmap_accurate)
! 825: re_compile_fastmap (pbufp);
! 826:
! 827: while (1)
! 828: {
! 829: /* If a fastmap is supplied, skip quickly over characters
! 830: that cannot possibly be the start of a match.
! 831: Note, however, that if the pattern can possibly match
! 832: the null string, we must test it at each starting point
! 833: so that we take the first null string we get. */
! 834:
! 835: if (fastmap && startpos < total && !pbufp->can_be_null)
! 836: {
! 837: if (range > 0)
! 838: {
! 839: register int lim = 0;
! 840: register char *p;
! 841: int irange = range;
! 842: if (startpos < size1 && startpos + range >= size1)
! 843: lim = range - (size1 - startpos);
! 844:
! 845: p = &(startpos >= size1 ? string2 - size1 : string1)[startpos];
! 846:
! 847: if (translate)
! 848: {
! 849: while (range > lim && !fastmap[translate[*p++]])
! 850: range--;
! 851: }
! 852: else
! 853: {
! 854: while (range > lim && !fastmap[*p++])
! 855: range--;
! 856: }
! 857: startpos += irange - range;
! 858: }
! 859: else
! 860: {
! 861: register char c;
! 862: if (startpos >= size1) c = string2[startpos - size1];
! 863: else c = string1[startpos];
! 864: if (translate ? !fastmap[translate[c]] : !fastmap[c])
! 865: goto advance;
! 866: }
! 867: }
! 868:
! 869: if (range >= 0 && startpos == total
! 870: && fastmap && !pbufp->can_be_null)
! 871: return -1;
! 872:
! 873: if (0 <= re_match_2 (pbufp, string1, size1, string2, size2, startpos, regs, mstop))
! 874: return startpos;
! 875:
! 876: #ifdef C_ALLOCA
! 877: alloca (0);
! 878: #endif /* C_ALLOCA */
! 879:
! 880: advance:
! 881: if (!range) break;
! 882: if (range > 0) range--, startpos++; else range++, startpos--;
! 883: }
! 884: return -1;
! 885: }
! 886:
! 887: #ifndef emacs /* emacs never uses this */
! 888: int
! 889: re_match (pbufp, string, size, pos, regs)
! 890: struct re_pattern_buffer *pbufp;
! 891: char *string;
! 892: int size, pos;
! 893: struct re_registers *regs;
! 894: {
! 895: return re_match_2 (pbufp, 0, 0, string, size, pos, regs, size);
! 896: }
! 897: #endif /* emacs */
! 898:
! 899: /* Match the pattern described by `pbufp'
! 900: against data which is the virtual concatenation of `string1' and `string2'.
! 901: `size1' and `size2' are the sizes of the two data strings.
! 902: Start the match at position `pos'.
! 903: Do not consider matching past the position `mstop'.
! 904:
! 905: If pbufp->fastmap is nonzero, then it had better be up to date.
! 906:
! 907: The reason that the data to match is specified as two components
! 908: which are to be regarded as concatenated
! 909: is so that this function can be used directly on the contents of an Emacs buffer.
! 910:
! 911: -1 is returned if there is no match. Otherwise the value is the length
! 912: of the substring which was matched.
! 913: */
! 914:
! 915: int
! 916: re_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop)
! 917: struct re_pattern_buffer *pbufp;
! 918: char *string1, *string2;
! 919: int size1, size2;
! 920: int pos;
! 921: struct re_registers *regs;
! 922: int mstop;
! 923: {
! 924: register char *p = pbufp->buffer;
! 925: register char *pend = p + pbufp->used;
! 926: /* End of first string */
! 927: char *end1;
! 928: /* End of second string */
! 929: char *end2;
! 930: /* Pointer just past last char to consider matching */
! 931: char *end_match_1, *end_match_2;
! 932: register char *d, *dend;
! 933: register int mcnt;
! 934: char *translate = pbufp->translate;
! 935:
! 936: /* Failure point stack. Each place that can handle a failure further down the line
! 937: pushes a failure point on this stack. It consists of two char *'s.
! 938: The first one pushed is where to resume scanning the pattern;
! 939: the second pushed is where to resume scanning the strings.
! 940: If the latter is zero, the failure point is a "dummy".
! 941: If a failure happens and the innermost failure point is dormant,
! 942: it discards that failure point and tries the next one. */
! 943:
! 944: char **stackb = (char **) alloca (2 * NFAILURES * sizeof (char *));
! 945: char **stackp = stackb, **stacke = &stackb[2 * NFAILURES];
! 946:
! 947: /* Information on the "contents" of registers.
! 948: These are pointers into the input strings; they record
! 949: just what was matched (on this attempt) by some part of the pattern.
! 950: The start_memory command stores the start of a register's contents
! 951: and the stop_memory command stores the end.
! 952:
! 953: At that point, regstart[regnum] points to the first character in the register,
! 954: regend[regnum] points to the first character beyond the end of the register,
! 955: and regstart_segend[regnum] is either the same as regend[regnum]
! 956: or else points to the end of the input string into which regstart[regnum] points.
! 957: The latter case happens when regstart[regnum] is in string1 and
! 958: regend[regnum] is in string2. */
! 959:
! 960: char *regstart[RE_NREGS];
! 961: char *regstart_segend[RE_NREGS];
! 962: char *regend[RE_NREGS];
! 963:
! 964: /* Set up pointers to ends of strings.
! 965: Don't allow the second string to be empty unless both are empty. */
! 966: if (!size2)
! 967: {
! 968: string2 = string1;
! 969: size2 = size1;
! 970: string1 = 0;
! 971: size1 = 0;
! 972: }
! 973: end1 = string1 + size1;
! 974: end2 = string2 + size2;
! 975:
! 976: /* Compute where to stop matching, within the two strings */
! 977: if (mstop <= size1)
! 978: {
! 979: end_match_1 = string1 + mstop;
! 980: end_match_2 = string2;
! 981: }
! 982: else
! 983: {
! 984: end_match_1 = end1;
! 985: end_match_2 = string2 + mstop - size1;
! 986: }
! 987:
! 988: /* Initialize \( and \) text positions to -1
! 989: to mark ones that no \( or \) has been seen for. */
! 990:
! 991: for (mcnt = 0; mcnt < sizeof (regstart) / sizeof (*regstart); mcnt++)
! 992: regstart[mcnt] = (char *) -1;
! 993:
! 994: /* `p' scans through the pattern as `d' scans through the data.
! 995: `dend' is the end of the input string that `d' points within.
! 996: `d' is advanced into the following input string whenever necessary,
! 997: but this happens before fetching;
! 998: therefore, at the beginning of the loop,
! 999: `d' can be pointing at the end of a string,
! 1000: but it cannot equal string2. */
! 1001:
! 1002: if (pos <= size1)
! 1003: d = string1 + pos, dend = end_match_1;
! 1004: else
! 1005: d = string2 + pos - size1, dend = end_match_2;
! 1006:
! 1007: /* Write PREFETCH; just before fetching a character with *d. */
! 1008: #define PREFETCH \
! 1009: while (d == dend) \
! 1010: { if (dend == end_match_2) goto fail; /* end of string2 => failure */ \
! 1011: d = string2; /* end of string1 => advance to string2. */ \
! 1012: dend = end_match_2; }
! 1013:
! 1014: /* This loop loops over pattern commands.
! 1015: It exits by returning from the function if match is complete,
! 1016: or it drops through if match fails at this starting point in the input data. */
! 1017:
! 1018: while (1)
! 1019: {
! 1020: if (p == pend)
! 1021: /* End of pattern means we have succeeded! */
! 1022: {
! 1023: /* If caller wants register contents data back, convert it to indices */
! 1024: if (regs)
! 1025: {
! 1026: regend[0] = d;
! 1027: regstart[0] = string1;
! 1028: for (mcnt = 0; mcnt < RE_NREGS; mcnt++)
! 1029: {
! 1030: if ((mcnt != 0) && regstart[mcnt] == (char *) -1)
! 1031: {
! 1032: regs->start[mcnt] = -1;
! 1033: regs->end[mcnt] = -1;
! 1034: continue;
! 1035: }
! 1036: if (regstart[mcnt] - string1 < 0 ||
! 1037: regstart[mcnt] - string1 > size1)
! 1038: regs->start[mcnt] = regstart[mcnt] - string2 + size1;
! 1039: else
! 1040: regs->start[mcnt] = regstart[mcnt] - string1;
! 1041: if (regend[mcnt] - string1 < 0 ||
! 1042: regend[mcnt] - string1 > size1)
! 1043: regs->end[mcnt] = regend[mcnt] - string2 + size1;
! 1044: else
! 1045: regs->end[mcnt] = regend[mcnt] - string1;
! 1046: }
! 1047: regs->start[0] = pos;
! 1048: }
! 1049: if (d - string1 >= 0 && d - string1 <= size1)
! 1050: return d - string1 - pos;
! 1051: else
! 1052: return d - string2 + size1 - pos;
! 1053: }
! 1054:
! 1055: /* Otherwise match next pattern command */
! 1056: switch ((regexpcode) *p++)
! 1057: {
! 1058:
! 1059: /* \( is represented by a start_memory, \) by a stop_memory.
! 1060: Both of those commands contain a "register number" argument.
! 1061: The text matched within the \( and \) is recorded under that number.
! 1062: Then, \<digit> turns into a `duplicate' command which
! 1063: is followed by the numeric value of <digit> as the register number. */
! 1064:
! 1065: case start_memory:
! 1066: regstart[*p] = d;
! 1067: regstart_segend[*p++] = dend;
! 1068: break;
! 1069:
! 1070: case stop_memory:
! 1071: regend[*p] = d;
! 1072: if (regstart_segend[*p] == dend)
! 1073: regstart_segend[*p] = d;
! 1074: p++;
! 1075: break;
! 1076:
! 1077: case duplicate:
! 1078: {
! 1079: int regno = *p++; /* Get which register to match against */
! 1080: register char *d2, *dend2;
! 1081:
! 1082: d2 = regstart[regno];
! 1083: dend2 = regstart_segend[regno];
! 1084: while (1)
! 1085: {
! 1086: /* Advance to next segment in register contents, if necessary */
! 1087: while (d2 == dend2)
! 1088: {
! 1089: if (dend2 == end_match_2) break;
! 1090: if (dend2 == regend[regno]) break;
! 1091: d2 = string2, dend2 = regend[regno]; /* end of string1 => advance to string2. */
! 1092: }
! 1093: /* At end of register contents => success */
! 1094: if (d2 == dend2) break;
! 1095:
! 1096: /* Advance to next segment in data being matched, if necessary */
! 1097: PREFETCH;
! 1098:
! 1099: /* mcnt gets # consecutive chars to compare */
! 1100: mcnt = dend - d;
! 1101: if (mcnt > dend2 - d2)
! 1102: mcnt = dend2 - d2;
! 1103: /* Compare that many; failure if mismatch, else skip them. */
! 1104: if (translate ? bcmp_translate (d, d2, mcnt, translate) : bcmp (d, d2, mcnt))
! 1105: goto fail;
! 1106: d += mcnt, d2 += mcnt;
! 1107: }
! 1108: }
! 1109: break;
! 1110:
! 1111: case anychar:
! 1112: /* fetch a data character */
! 1113: PREFETCH;
! 1114: /* Match anything but a newline. */
! 1115: if ((translate ? translate[*d++] : *d++) == '\n')
! 1116: goto fail;
! 1117: break;
! 1118:
! 1119: case charset:
! 1120: case charset_not:
! 1121: {
! 1122: /* Nonzero for charset_not */
! 1123: int not = 0;
! 1124: register int c;
! 1125: if (*(p - 1) == (char) charset_not)
! 1126: not = 1;
! 1127:
! 1128: /* fetch a data character */
! 1129: PREFETCH;
! 1130:
! 1131: if (translate)
! 1132: c = translate [*(unsigned char *)d];
! 1133: else
! 1134: c = *(unsigned char *)d;
! 1135:
! 1136: if (c < *p * BYTEWIDTH
! 1137: && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
! 1138: not = !not;
! 1139:
! 1140: p += 1 + *p;
! 1141:
! 1142: if (!not) goto fail;
! 1143: d++;
! 1144: break;
! 1145: }
! 1146:
! 1147: case begline:
! 1148: if (d == string1 || d[-1] == '\n')
! 1149: break;
! 1150: goto fail;
! 1151:
! 1152: case endline:
! 1153: if (d == end2
! 1154: || (d == end1 ? (size2 == 0 || *string2 == '\n') : *d == '\n'))
! 1155: break;
! 1156: goto fail;
! 1157:
! 1158: /* "or" constructs ("|") are handled by starting each alternative
! 1159: with an on_failure_jump that points to the start of the next alternative.
! 1160: Each alternative except the last ends with a jump to the joining point.
! 1161: (Actually, each jump except for the last one really jumps
! 1162: to the following jump, because tensioning the jumps is a hassle.) */
! 1163:
! 1164: /* The start of a stupid repeat has an on_failure_jump that points
! 1165: past the end of the repeat text.
! 1166: This makes a failure point so that, on failure to match a repetition,
! 1167: matching restarts past as many repetitions have been found
! 1168: with no way to fail and look for another one. */
! 1169:
! 1170: /* A smart repeat is similar but loops back to the on_failure_jump
! 1171: so that each repetition makes another failure point. */
! 1172:
! 1173: case on_failure_jump:
! 1174: if (stackp == stacke)
! 1175: {
! 1176: char **stackx = (char **) alloca (2 * (stacke - stackb) * sizeof (char *));
! 1177: bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
! 1178: stackp += stackx - stackb;
! 1179: stacke = stackx + 2 * (stacke - stackb);
! 1180: stackb = stackx;
! 1181: }
! 1182: mcnt = *p++ & 0377;
! 1183: mcnt += SIGN_EXTEND_CHAR (*p++) << 8;
! 1184: *stackp++ = mcnt + p;
! 1185: *stackp++ = d;
! 1186: break;
! 1187:
! 1188: /* The end of a smart repeat has an maybe_finalize_jump back.
! 1189: Change it either to a finalize_jump or an ordinary jump. */
! 1190:
! 1191: case maybe_finalize_jump:
! 1192: mcnt = *p++ & 0377;
! 1193: mcnt += SIGN_EXTEND_CHAR (*p++) << 8;
! 1194: /* Compare what follows with the begining of the repeat.
! 1195: If we can establish that there is nothing that they would
! 1196: both match, we can change to finalize_jump */
! 1197: if (p == pend)
! 1198: p[-3] = (char) finalize_jump;
! 1199: else if (*p == (char) exactn || *p == (char) endline)
! 1200: {
! 1201: register int c = *p == (char) endline ? '\n' : p[2];
! 1202: register char *p1 = p + mcnt;
! 1203: /* p1[0] ... p1[2] are an on_failure_jump.
! 1204: Examine what follows that */
! 1205: if (p1[3] == (char) exactn && p1[5] != c)
! 1206: p[-3] = (char) finalize_jump;
! 1207: else if (p1[3] == (char) charset || p1[3] == (char) charset_not)
! 1208: {
! 1209: int not = p1[3] == (char) charset_not;
! 1210: if (c < p1[4] * BYTEWIDTH
! 1211: && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
! 1212: not = !not;
! 1213: /* not is 1 if c would match */
! 1214: /* That means it is not safe to finalize */
! 1215: if (!not)
! 1216: p[-3] = (char) finalize_jump;
! 1217: }
! 1218: }
! 1219: p -= 2;
! 1220: if (p[-1] != (char) finalize_jump)
! 1221: {
! 1222: p[-1] = (char) jump;
! 1223: goto nofinalize;
! 1224: }
! 1225:
! 1226: /* The end of a stupid repeat has a finalize-jump
! 1227: back to the start, where another failure point will be made
! 1228: which will point after all the repetitions found so far. */
! 1229:
! 1230: case finalize_jump:
! 1231: stackp -= 2;
! 1232:
! 1233: case jump:
! 1234: nofinalize:
! 1235: mcnt = *p++ & 0377;
! 1236: mcnt += SIGN_EXTEND_CHAR (*p++) << 8;
! 1237: p += mcnt;
! 1238: break;
! 1239:
! 1240: case dummy_failure_jump:
! 1241: if (stackp == stacke)
! 1242: {
! 1243: char **stackx = (char **) alloca (2 * (stacke - stackb) * sizeof (char *));
! 1244: bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
! 1245: stackp += stackx - stackb;
! 1246: stacke = stackx + 2 * (stacke - stackb);
! 1247: stackb = stackx;
! 1248: }
! 1249: *stackp++ = 0;
! 1250: *stackp++ = 0;
! 1251: goto nofinalize;
! 1252:
! 1253: case wordbound:
! 1254: if (d == string1 /* Points to first char */
! 1255: || d == end2 /* Points to end */
! 1256: || (d == end1 && size2 == 0)) /* Points to end */
! 1257: break;
! 1258: if ((SYNTAX (((unsigned char *)d)[-1]) == Sword)
! 1259: != (SYNTAX (d == end1 ? *(unsigned char *)string2 : *(unsigned char *)d) == Sword))
! 1260: break;
! 1261: goto fail;
! 1262:
! 1263: case notwordbound:
! 1264: if (d == string1 /* Points to first char */
! 1265: || d == end2 /* Points to end */
! 1266: || (d == end1 && size2 == 0)) /* Points to end */
! 1267: goto fail;
! 1268: if ((SYNTAX (((unsigned char *)d)[-1]) == Sword)
! 1269: != (SYNTAX (d == end1 ? *(unsigned char *)string2 : *(unsigned char *)d) == Sword))
! 1270: goto fail;
! 1271: break;
! 1272:
! 1273: case wordbeg:
! 1274: if (d == end2 /* Points to end */
! 1275: || (d == end1 && size2 == 0) /* Points to end */
! 1276: || SYNTAX (*(unsigned char *) (d == end1 ? string2 : d)) != Sword) /* Next char not a letter */
! 1277: goto fail;
! 1278: if (d == string1 /* Points to first char */
! 1279: || SYNTAX (((unsigned char *)d)[-1]) != Sword) /* prev char not letter */
! 1280: break;
! 1281: goto fail;
! 1282:
! 1283: case wordend:
! 1284: if (d == string1 /* Points to first char */
! 1285: || SYNTAX (((unsigned char *)d)[-1]) != Sword) /* prev char not letter */
! 1286: goto fail;
! 1287: if (d == end2 /* Points to end */
! 1288: || (d == end1 && size2 == 0) /* Points to end */
! 1289: || SYNTAX (d == end1 ? *(unsigned char *)string2 : *(unsigned char *)d) != Sword) /* Next char not a letter */
! 1290: break;
! 1291: goto fail;
! 1292:
! 1293: #ifdef emacs
! 1294: case before_dot:
! 1295: if (((d - string2 <= (unsigned) size2)
! 1296: ? d - (char *) bf_p2 : d - (char *) bf_p1)
! 1297: <= point)
! 1298: goto fail;
! 1299: break;
! 1300:
! 1301: case at_dot:
! 1302: if (((d - string2 <= (unsigned) size2)
! 1303: ? d - (char *) bf_p2 : d - (char *) bf_p1)
! 1304: == point)
! 1305: goto fail;
! 1306: break;
! 1307:
! 1308: case after_dot:
! 1309: if (((d - string2 <= (unsigned) size2)
! 1310: ? d - (char *) bf_p2 : d - (char *) bf_p1)
! 1311: >= point)
! 1312: goto fail;
! 1313: break;
! 1314:
! 1315: case wordchar:
! 1316: mcnt = (int) Sword;
! 1317: goto matchsyntax;
! 1318:
! 1319: case syntaxspec:
! 1320: mcnt = *p++;
! 1321: matchsyntax:
! 1322: PREFETCH;
! 1323: if (SYNTAX (*(unsigned char *)d++) != (enum syntaxcode) mcnt) goto fail;
! 1324: break;
! 1325:
! 1326: case notwordchar:
! 1327: mcnt = (int) Sword;
! 1328: goto matchnotsyntax;
! 1329:
! 1330: case notsyntaxspec:
! 1331: mcnt = *p++;
! 1332: matchnotsyntax:
! 1333: PREFETCH;
! 1334: if (SYNTAX (*(unsigned char *)d++) == (enum syntaxcode) mcnt) goto fail;
! 1335: break;
! 1336: #else
! 1337: case wordchar:
! 1338: PREFETCH;
! 1339: if (SYNTAX (*(unsigned char *)d++) == 0) goto fail;
! 1340: break;
! 1341:
! 1342: case notwordchar:
! 1343: PREFETCH;
! 1344: if (SYNTAX (*(unsigned char *)d++) != 0) goto fail;
! 1345: break;
! 1346: #endif not emacs
! 1347:
! 1348: case begbuf:
! 1349: if (d == string1) /* Note, d cannot equal string2 */
! 1350: break; /* unless string1 == string2. */
! 1351: goto fail;
! 1352:
! 1353: case endbuf:
! 1354: if (d == end2 || (d == end1 && size2 == 0))
! 1355: break;
! 1356: goto fail;
! 1357:
! 1358: case exactn:
! 1359: /* Match the next few pattern characters exactly.
! 1360: mcnt is how many characters to match. */
! 1361: mcnt = *p++;
! 1362: if (translate)
! 1363: {
! 1364: do
! 1365: {
! 1366: PREFETCH;
! 1367: if (translate[*(unsigned char *)d++] != *p++) goto fail;
! 1368: }
! 1369: while (--mcnt);
! 1370: }
! 1371: else
! 1372: {
! 1373: do
! 1374: {
! 1375: PREFETCH;
! 1376: if (*d++ != *p++) goto fail;
! 1377: }
! 1378: while (--mcnt);
! 1379: }
! 1380: break;
! 1381: }
! 1382: continue; /* Successfully matched one pattern command; keep matching */
! 1383:
! 1384: /* Jump here if any matching operation fails. */
! 1385: fail:
! 1386: if (stackp != stackb)
! 1387: /* A restart point is known. Restart there and pop it. */
! 1388: {
! 1389: if (!stackp[-2])
! 1390: { /* If innermost failure point is dormant, flush it and keep looking */
! 1391: stackp -= 2;
! 1392: goto fail;
! 1393: }
! 1394: d = *--stackp;
! 1395: p = *--stackp;
! 1396: if (d >= string1 && d <= end1)
! 1397: dend = end_match_1;
! 1398: }
! 1399: else break; /* Matching at this starting point really fails! */
! 1400: }
! 1401: return -1; /* Failure to match */
! 1402: }
! 1403:
! 1404: static int
! 1405: bcmp_translate (s1, s2, len, translate)
! 1406: char *s1, *s2;
! 1407: register int len;
! 1408: char *translate;
! 1409: {
! 1410: register char *p1 = s1, *p2 = s2;
! 1411: while (len)
! 1412: {
! 1413: if (translate [*p1++] != translate [*p2++]) return 1;
! 1414: len--;
! 1415: }
! 1416: return 0;
! 1417: }
! 1418:
! 1419: /* Entry points compatible with bsd4.2 regex library */
! 1420:
! 1421: #ifndef emacs
! 1422:
! 1423: static struct re_pattern_buffer re_comp_buf;
! 1424:
! 1425: char *
! 1426: re_comp (s)
! 1427: char *s;
! 1428: {
! 1429: if (!s)
! 1430: {
! 1431: if (!re_comp_buf.buffer)
! 1432: return "No previous regular expression";
! 1433: return 0;
! 1434: }
! 1435:
! 1436: if (!re_comp_buf.buffer)
! 1437: {
! 1438: if (!(re_comp_buf.buffer = (char *) malloc (200)))
! 1439: return "Memory exhausted";
! 1440: re_comp_buf.allocated = 200;
! 1441: if (!(re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH)))
! 1442: return "Memory exhausted";
! 1443: }
! 1444: return re_compile_pattern (s, strlen (s), &re_comp_buf);
! 1445: }
! 1446:
! 1447: int
! 1448: re_exec (s)
! 1449: char *s;
! 1450: {
! 1451: int len = strlen (s);
! 1452: return 0 <= re_search (&re_comp_buf, s, len, 0, len, 0);
! 1453: }
! 1454:
! 1455: #endif /* emacs */
! 1456:
! 1457: #ifdef test
! 1458:
! 1459: #include <stdio.h>
! 1460:
! 1461: /* Indexed by a character, gives the upper case equivalent of the character */
! 1462:
! 1463: static char upcase[0400] =
! 1464: { 000, 001, 002, 003, 004, 005, 006, 007,
! 1465: 010, 011, 012, 013, 014, 015, 016, 017,
! 1466: 020, 021, 022, 023, 024, 025, 026, 027,
! 1467: 030, 031, 032, 033, 034, 035, 036, 037,
! 1468: 040, 041, 042, 043, 044, 045, 046, 047,
! 1469: 050, 051, 052, 053, 054, 055, 056, 057,
! 1470: 060, 061, 062, 063, 064, 065, 066, 067,
! 1471: 070, 071, 072, 073, 074, 075, 076, 077,
! 1472: 0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
! 1473: 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
! 1474: 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
! 1475: 0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
! 1476: 0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
! 1477: 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
! 1478: 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
! 1479: 0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
! 1480: 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
! 1481: 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
! 1482: 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
! 1483: 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
! 1484: 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
! 1485: 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
! 1486: 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
! 1487: 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
! 1488: 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
! 1489: 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
! 1490: 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
! 1491: 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
! 1492: 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
! 1493: 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
! 1494: 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
! 1495: 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
! 1496: };
! 1497:
! 1498: main ()
! 1499: {
! 1500: char pat[80];
! 1501: struct re_pattern_buffer buf;
! 1502: struct re_registers regs;
! 1503: int i;
! 1504: char c;
! 1505: char fastmap[(1 << BYTEWIDTH)];
! 1506:
! 1507: buf.allocated = 40;
! 1508: buf.buffer = (char *) malloc (buf.allocated);
! 1509: buf.fastmap = fastmap;
! 1510: buf.translate = upcase;
! 1511:
! 1512: while (1)
! 1513: {
! 1514: printf("Enter pattern\n");
! 1515: gets (pat);
! 1516:
! 1517: if (*pat)
! 1518: {
! 1519: re_compile_pattern (pat, strlen(pat), &buf);
! 1520:
! 1521: for (i = 0; i < buf.used; i++)
! 1522: printchar (buf.buffer[i]);
! 1523:
! 1524: putchar ('\n');
! 1525:
! 1526: printf ("%d allocated, %d used.\n", buf.allocated, buf.used);
! 1527:
! 1528: re_compile_fastmap (&buf);
! 1529: printf ("Allowed by fastmap: ");
! 1530: for (i = 0; i < (1 << BYTEWIDTH); i++)
! 1531: if (fastmap[i]) printchar (i);
! 1532: putchar ('\n');
! 1533: }
! 1534:
! 1535: printf("enter string to search\n");
! 1536: gets (pat); /* Now read the string to match against */
! 1537:
! 1538: /* i = re_match (&buf, pat, strlen (pat), 0, 0); */
! 1539: i = re_search (&buf, pat, strlen (pat), 0, strlen (pat), ®s);
! 1540: printf ("Match value %d.\n", i);
! 1541: for (i=0; i < RE_NREGS; i++)
! 1542: printf("%2d start %2d end %2d\n", i, regs.start[i], regs.end[i]);
! 1543: }
! 1544: }
! 1545:
! 1546: #ifdef NOTDEF
! 1547: print_buf (bufp)
! 1548: struct re_pattern_buffer *bufp;
! 1549: {
! 1550: int i;
! 1551:
! 1552: printf ("buf is :\n----------------\n");
! 1553: for (i = 0; i < bufp->used; i++)
! 1554: printchar (bufp->buffer[i]);
! 1555:
! 1556: printf ("\n%d allocated, %d used.\n", bufp->allocated, bufp->used);
! 1557:
! 1558: printf ("Allowed by fastmap: ");
! 1559: for (i = 0; i < (1 << BYTEWIDTH); i++)
! 1560: if (bufp->fastmap[i])
! 1561: printchar (i);
! 1562: printf ("\nAllowed by translate: ");
! 1563: if (bufp->translate)
! 1564: for (i = 0; i < (1 << BYTEWIDTH); i++)
! 1565: if (bufp->translate[i])
! 1566: printchar (i);
! 1567: printf ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't");
! 1568: printf ("can %s be null\n----------", bufp->can_be_null ? "" : "not");
! 1569: }
! 1570: #endif
! 1571:
! 1572: printchar (c)
! 1573: char c;
! 1574: {
! 1575: if (c < 041 || c >= 0177)
! 1576: {
! 1577: putchar ('\\');
! 1578: putchar (((c >> 6) & 3) + '0');
! 1579: putchar (((c >> 3) & 7) + '0');
! 1580: putchar ((c & 7) + '0');
! 1581: }
! 1582: else
! 1583: putchar (c);
! 1584: }
! 1585:
! 1586: error (string)
! 1587: char *string;
! 1588: {
! 1589: puts (string);
! 1590: exit (1);
! 1591: }
! 1592:
! 1593: #endif /* test */
! 1594: #endif /* REGEX */