Annotation of src/usr.bin/window/scanner.c, Revision 1.1
1.1 ! deraadt 1: /* $NetBSD: scanner.c,v 1.3 1995/09/28 10:34:36 tls Exp $ */
! 2:
! 3: /*
! 4: * Copyright (c) 1983, 1993
! 5: * The Regents of the University of California. All rights reserved.
! 6: *
! 7: * This code is derived from software contributed to Berkeley by
! 8: * Edward Wang at The University of California, Berkeley.
! 9: *
! 10: * Redistribution and use in source and binary forms, with or without
! 11: * modification, are permitted provided that the following conditions
! 12: * are met:
! 13: * 1. Redistributions of source code must retain the above copyright
! 14: * notice, this list of conditions and the following disclaimer.
! 15: * 2. Redistributions in binary form must reproduce the above copyright
! 16: * notice, this list of conditions and the following disclaimer in the
! 17: * documentation and/or other materials provided with the distribution.
! 18: * 3. All advertising materials mentioning features or use of this software
! 19: * must display the following acknowledgement:
! 20: * This product includes software developed by the University of
! 21: * California, Berkeley and its contributors.
! 22: * 4. Neither the name of the University nor the names of its contributors
! 23: * may be used to endorse or promote products derived from this software
! 24: * without specific prior written permission.
! 25: *
! 26: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
! 27: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
! 28: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
! 29: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
! 30: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
! 31: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
! 32: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
! 33: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
! 34: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
! 35: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
! 36: * SUCH DAMAGE.
! 37: */
! 38:
! 39: #ifndef lint
! 40: #if 0
! 41: static char sccsid[] = "@(#)scanner.c 8.1 (Berkeley) 6/6/93";
! 42: #else
! 43: static char rcsid[] = "$NetBSD: scanner.c,v 1.3 1995/09/28 10:34:36 tls Exp $";
! 44: #endif
! 45: #endif /* not lint */
! 46:
! 47: #include "value.h"
! 48: #include "token.h"
! 49: #include "context.h"
! 50: #include "string.h"
! 51:
! 52: s_getc()
! 53: {
! 54: register c;
! 55:
! 56: switch (cx.x_type) {
! 57: case X_FILE:
! 58: c = getc(cx.x_fp);
! 59: if (cx.x_bol && c != EOF) {
! 60: cx.x_bol = 0;
! 61: cx.x_lineno++;
! 62: }
! 63: if (c == '\n')
! 64: cx.x_bol = 1;
! 65: return c;
! 66: case X_BUF:
! 67: if (*cx.x_bufp != 0)
! 68: return *cx.x_bufp++ & 0xff;
! 69: else
! 70: return EOF;
! 71: }
! 72: /*NOTREACHED*/
! 73: }
! 74:
! 75: s_ungetc(c)
! 76: {
! 77: if (c == EOF)
! 78: return EOF;
! 79: switch (cx.x_type) {
! 80: case X_FILE:
! 81: cx.x_bol = 0;
! 82: return ungetc(c, cx.x_fp);
! 83: case X_BUF:
! 84: if (cx.x_bufp > cx.x_buf)
! 85: return *--cx.x_bufp = c;
! 86: else
! 87: return EOF;
! 88: }
! 89: /*NOTREACHED*/
! 90: }
! 91:
! 92: s_gettok()
! 93: {
! 94: char buf[100];
! 95: register char *p = buf;
! 96: register c;
! 97: register state = 0;
! 98:
! 99: loop:
! 100: c = s_getc();
! 101: switch (state) {
! 102: case 0:
! 103: switch (c) {
! 104: case ' ':
! 105: case '\t':
! 106: break;
! 107: case '\n':
! 108: case ';':
! 109: cx.x_token = T_EOL;
! 110: state = -1;
! 111: break;
! 112: case '#':
! 113: state = 1;
! 114: break;
! 115: case EOF:
! 116: cx.x_token = T_EOF;
! 117: state = -1;
! 118: break;
! 119: case 'a': case 'b': case 'c': case 'd': case 'e':
! 120: case 'f': case 'g': case 'h': case 'i': case 'j':
! 121: case 'k': case 'l': case 'm': case 'n': case 'o':
! 122: case 'p': case 'q': case 'r': case 's': case 't':
! 123: case 'u': case 'v': case 'w': case 'x': case 'y':
! 124: case 'z':
! 125: case 'A': case 'B': case 'C': case 'D': case 'E':
! 126: case 'F': case 'G': case 'H': case 'I': case 'J':
! 127: case 'K': case 'L': case 'M': case 'N': case 'O':
! 128: case 'P': case 'Q': case 'R': case 'S': case 'T':
! 129: case 'U': case 'V': case 'W': case 'X': case 'Y':
! 130: case 'Z':
! 131: case '_': case '.':
! 132: *p++ = c;
! 133: state = 2;
! 134: break;
! 135: case '"':
! 136: state = 3;
! 137: break;
! 138: case '\'':
! 139: state = 4;
! 140: break;
! 141: case '\\':
! 142: switch (c = s_gettok1()) {
! 143: case -1:
! 144: break;
! 145: case -2:
! 146: state = 0;
! 147: break;
! 148: default:
! 149: *p++ = c;
! 150: state = 2;
! 151: }
! 152: break;
! 153: case '0':
! 154: cx.x_val.v_num = 0;
! 155: state = 10;
! 156: break;
! 157: case '1': case '2': case '3': case '4':
! 158: case '5': case '6': case '7': case '8': case '9':
! 159: cx.x_val.v_num = c - '0';
! 160: state = 11;
! 161: break;
! 162: case '>':
! 163: state = 20;
! 164: break;
! 165: case '<':
! 166: state = 21;
! 167: break;
! 168: case '=':
! 169: state = 22;
! 170: break;
! 171: case '!':
! 172: state = 23;
! 173: break;
! 174: case '&':
! 175: state = 24;
! 176: break;
! 177: case '|':
! 178: state = 25;
! 179: break;
! 180: case '$':
! 181: state = 26;
! 182: break;
! 183: case '~':
! 184: cx.x_token = T_COMP;
! 185: state = -1;
! 186: break;
! 187: case '+':
! 188: cx.x_token = T_PLUS;
! 189: state = -1;
! 190: break;
! 191: case '-':
! 192: cx.x_token = T_MINUS;
! 193: state = -1;
! 194: break;
! 195: case '*':
! 196: cx.x_token = T_MUL;
! 197: state = -1;
! 198: break;
! 199: case '/':
! 200: cx.x_token = T_DIV;
! 201: state = -1;
! 202: break;
! 203: case '%':
! 204: cx.x_token = T_MOD;
! 205: state = -1;
! 206: break;
! 207: case '^':
! 208: cx.x_token = T_XOR;
! 209: state = -1;
! 210: break;
! 211: case '(':
! 212: cx.x_token = T_LP;
! 213: state = -1;
! 214: break;
! 215: case ')':
! 216: cx.x_token = T_RP;
! 217: state = -1;
! 218: break;
! 219: case ',':
! 220: cx.x_token = T_COMMA;
! 221: state = -1;
! 222: break;
! 223: case '?':
! 224: cx.x_token = T_QUEST;
! 225: state = -1;
! 226: break;
! 227: case ':':
! 228: cx.x_token = T_COLON;
! 229: state = -1;
! 230: break;
! 231: case '[':
! 232: cx.x_token = T_LB;
! 233: state = -1;
! 234: break;
! 235: case ']':
! 236: cx.x_token = T_RB;
! 237: state = -1;
! 238: break;
! 239: default:
! 240: cx.x_val.v_num = c;
! 241: cx.x_token = T_CHAR;
! 242: state = -1;
! 243: break;
! 244: }
! 245: break;
! 246: case 1: /* got # */
! 247: if (c == '\n' || c == EOF) {
! 248: (void) s_ungetc(c);
! 249: state = 0;
! 250: }
! 251: break;
! 252: case 2: /* unquoted string */
! 253: switch (c) {
! 254: case 'a': case 'b': case 'c': case 'd': case 'e':
! 255: case 'f': case 'g': case 'h': case 'i': case 'j':
! 256: case 'k': case 'l': case 'm': case 'n': case 'o':
! 257: case 'p': case 'q': case 'r': case 's': case 't':
! 258: case 'u': case 'v': case 'w': case 'x': case 'y':
! 259: case 'z':
! 260: case 'A': case 'B': case 'C': case 'D': case 'E':
! 261: case 'F': case 'G': case 'H': case 'I': case 'J':
! 262: case 'K': case 'L': case 'M': case 'N': case 'O':
! 263: case 'P': case 'Q': case 'R': case 'S': case 'T':
! 264: case 'U': case 'V': case 'W': case 'X': case 'Y':
! 265: case 'Z':
! 266: case '_': case '.':
! 267: case '0': case '1': case '2': case '3': case '4':
! 268: case '5': case '6': case '7': case '8': case '9':
! 269: if (p < buf + sizeof buf - 1)
! 270: *p++ = c;
! 271: break;
! 272: case '"':
! 273: state = 3;
! 274: break;
! 275: case '\'':
! 276: state = 4;
! 277: break;
! 278: case '\\':
! 279: switch (c = s_gettok1()) {
! 280: case -2:
! 281: (void) s_ungetc(' ');
! 282: case -1:
! 283: break;
! 284: default:
! 285: if (p < buf + sizeof buf - 1)
! 286: *p++ = c;
! 287: }
! 288: break;
! 289: default:
! 290: (void) s_ungetc(c);
! 291: case EOF:
! 292: *p = 0;
! 293: cx.x_token = T_STR;
! 294: switch (*buf) {
! 295: case 'i':
! 296: if (buf[1] == 'f' && buf[2] == 0)
! 297: cx.x_token = T_IF;
! 298: break;
! 299: case 't':
! 300: if (buf[1] == 'h' && buf[2] == 'e'
! 301: && buf[3] == 'n' && buf[4] == 0)
! 302: cx.x_token = T_THEN;
! 303: break;
! 304: case 'e':
! 305: if (buf[1] == 'n' && buf[2] == 'd'
! 306: && buf[3] == 'i' && buf[4] == 'f'
! 307: && buf[5] == 0)
! 308: cx.x_token = T_ENDIF;
! 309: else if (buf[1] == 'l' && buf[2] == 's')
! 310: if (buf[3] == 'i' && buf[4] == 'f'
! 311: && buf[5] == 0)
! 312: cx.x_token = T_ELSIF;
! 313: else if (buf[3] == 'e' && buf[4] == 0)
! 314: cx.x_token = T_ELSE;
! 315: break;
! 316: }
! 317: if (cx.x_token == T_STR
! 318: && (cx.x_val.v_str = str_cpy(buf)) == 0) {
! 319: p_memerror();
! 320: cx.x_token = T_EOF;
! 321: }
! 322: state = -1;
! 323: break;
! 324: }
! 325: break;
! 326: case 3: /* " quoted string */
! 327: switch (c) {
! 328: case '\n':
! 329: (void) s_ungetc(c);
! 330: case EOF:
! 331: case '"':
! 332: state = 2;
! 333: break;
! 334: case '\\':
! 335: switch (c = s_gettok1()) {
! 336: case -1:
! 337: case -2: /* newlines are invisible */
! 338: break;
! 339: default:
! 340: if (p < buf + sizeof buf - 1)
! 341: *p++ = c;
! 342: }
! 343: break;
! 344: default:
! 345: if (p < buf + sizeof buf - 1)
! 346: *p++ = c;
! 347: break;
! 348: }
! 349: break;
! 350: case 4: /* ' quoted string */
! 351: switch (c) {
! 352: case '\n':
! 353: (void) s_ungetc(c);
! 354: case EOF:
! 355: case '\'':
! 356: state = 2;
! 357: break;
! 358: case '\\':
! 359: switch (c = s_gettok1()) {
! 360: case -1:
! 361: case -2: /* newlines are invisible */
! 362: break;
! 363: default:
! 364: if (p < buf + sizeof buf - 1)
! 365: *p++ = c;
! 366: }
! 367: break;
! 368: default:
! 369: if (p < buf + sizeof buf - 1)
! 370: *p++ = c;
! 371: break;
! 372: }
! 373: break;
! 374: case 10: /* got 0 */
! 375: switch (c) {
! 376: case 'x':
! 377: case 'X':
! 378: cx.x_val.v_num = 0;
! 379: state = 12;
! 380: break;
! 381: case '0': case '1': case '2': case '3': case '4':
! 382: case '5': case '6': case '7':
! 383: cx.x_val.v_num = c - '0';
! 384: state = 13;
! 385: break;
! 386: case '8': case '9':
! 387: cx.x_val.v_num = c - '0';
! 388: state = 11;
! 389: break;
! 390: default:
! 391: (void) s_ungetc(c);
! 392: state = -1;
! 393: cx.x_token = T_NUM;
! 394: }
! 395: break;
! 396: case 11: /* decimal number */
! 397: switch (c) {
! 398: case '0': case '1': case '2': case '3': case '4':
! 399: case '5': case '6': case '7': case '8': case '9':
! 400: cx.x_val.v_num = cx.x_val.v_num * 10 + c - '0';
! 401: break;
! 402: default:
! 403: (void) s_ungetc(c);
! 404: state = -1;
! 405: cx.x_token = T_NUM;
! 406: }
! 407: break;
! 408: case 12: /* hex number */
! 409: switch (c) {
! 410: case '0': case '1': case '2': case '3': case '4':
! 411: case '5': case '6': case '7': case '8': case '9':
! 412: cx.x_val.v_num = cx.x_val.v_num * 16 + c - '0';
! 413: break;
! 414: case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
! 415: cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'a' + 10;
! 416: break;
! 417: case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
! 418: cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'A' + 10;
! 419: break;
! 420: default:
! 421: (void) s_ungetc(c);
! 422: state = -1;
! 423: cx.x_token = T_NUM;
! 424: }
! 425: break;
! 426: case 13: /* octal number */
! 427: switch (c) {
! 428: case '0': case '1': case '2': case '3': case '4':
! 429: case '5': case '6': case '7':
! 430: cx.x_val.v_num = cx.x_val.v_num * 8 + c - '0';
! 431: break;
! 432: default:
! 433: (void) s_ungetc(c);
! 434: state = -1;
! 435: cx.x_token = T_NUM;
! 436: }
! 437: break;
! 438: case 20: /* got > */
! 439: switch (c) {
! 440: case '=':
! 441: cx.x_token = T_GE;
! 442: state = -1;
! 443: break;
! 444: case '>':
! 445: cx.x_token = T_RS;
! 446: state = -1;
! 447: break;
! 448: default:
! 449: (void) s_ungetc(c);
! 450: cx.x_token = T_GT;
! 451: state = -1;
! 452: }
! 453: break;
! 454: case 21: /* got < */
! 455: switch (c) {
! 456: case '=':
! 457: cx.x_token = T_LE;
! 458: state = -1;
! 459: break;
! 460: case '<':
! 461: cx.x_token = T_LS;
! 462: state = -1;
! 463: break;
! 464: default:
! 465: (void) s_ungetc(c);
! 466: cx.x_token = T_LT;
! 467: state = -1;
! 468: }
! 469: break;
! 470: case 22: /* got = */
! 471: switch (c) {
! 472: case '=':
! 473: cx.x_token = T_EQ;
! 474: state = -1;
! 475: break;
! 476: default:
! 477: (void) s_ungetc(c);
! 478: cx.x_token = T_ASSIGN;
! 479: state = -1;
! 480: }
! 481: break;
! 482: case 23: /* got ! */
! 483: switch (c) {
! 484: case '=':
! 485: cx.x_token = T_NE;
! 486: state = -1;
! 487: break;
! 488: default:
! 489: (void) s_ungetc(c);
! 490: cx.x_token = T_NOT;
! 491: state = -1;
! 492: }
! 493: break;
! 494: case 24: /* got & */
! 495: switch (c) {
! 496: case '&':
! 497: cx.x_token = T_ANDAND;
! 498: state = -1;
! 499: break;
! 500: default:
! 501: (void) s_ungetc(c);
! 502: cx.x_token = T_AND;
! 503: state = -1;
! 504: }
! 505: break;
! 506: case 25: /* got | */
! 507: switch (c) {
! 508: case '|':
! 509: cx.x_token = T_OROR;
! 510: state = -1;
! 511: break;
! 512: default:
! 513: (void) s_ungetc(c);
! 514: cx.x_token = T_OR;
! 515: state = -1;
! 516: }
! 517: break;
! 518: case 26: /* got $ */
! 519: switch (c) {
! 520: case '?':
! 521: cx.x_token = T_DQ;
! 522: state = -1;
! 523: break;
! 524: default:
! 525: (void) s_ungetc(c);
! 526: cx.x_token = T_DOLLAR;
! 527: state = -1;
! 528: }
! 529: break;
! 530: default:
! 531: abort();
! 532: }
! 533: if (state >= 0)
! 534: goto loop;
! 535: return cx.x_token;
! 536: }
! 537:
! 538: s_gettok1()
! 539: {
! 540: register c;
! 541: register n;
! 542:
! 543: c = s_getc(); /* got \ */
! 544: switch (c) {
! 545: case EOF:
! 546: return -1;
! 547: case '\n':
! 548: return -2;
! 549: case 'b':
! 550: return '\b';
! 551: case 'f':
! 552: return '\f';
! 553: case 'n':
! 554: return '\n';
! 555: case 'r':
! 556: return '\r';
! 557: case 't':
! 558: return '\t';
! 559: default:
! 560: return c;
! 561: case '0': case '1': case '2': case '3': case '4':
! 562: case '5': case '6': case '7':
! 563: break;
! 564: }
! 565: n = c - '0';
! 566: c = s_getc(); /* got \[0-7] */
! 567: if (c < '0' || c > '7') {
! 568: (void) s_ungetc(c);
! 569: return n;
! 570: }
! 571: n = n * 8 + c - '0';
! 572: c = s_getc(); /* got \[0-7][0-7] */
! 573: if (c < '0' || c > '7') {
! 574: (void) s_ungetc(c);
! 575: return n;
! 576: }
! 577: return n * 8 + c - '0';
! 578: }