Annotation of src/usr.bin/window/scanner.c, Revision 1.1.1.1
1.1 deraadt 1: /* $NetBSD: scanner.c,v 1.3 1995/09/28 10:34:36 tls Exp $ */
2:
3: /*
4: * Copyright (c) 1983, 1993
5: * The Regents of the University of California. All rights reserved.
6: *
7: * This code is derived from software contributed to Berkeley by
8: * Edward Wang at The University of California, Berkeley.
9: *
10: * Redistribution and use in source and binary forms, with or without
11: * modification, are permitted provided that the following conditions
12: * are met:
13: * 1. Redistributions of source code must retain the above copyright
14: * notice, this list of conditions and the following disclaimer.
15: * 2. Redistributions in binary form must reproduce the above copyright
16: * notice, this list of conditions and the following disclaimer in the
17: * documentation and/or other materials provided with the distribution.
18: * 3. All advertising materials mentioning features or use of this software
19: * must display the following acknowledgement:
20: * This product includes software developed by the University of
21: * California, Berkeley and its contributors.
22: * 4. Neither the name of the University nor the names of its contributors
23: * may be used to endorse or promote products derived from this software
24: * without specific prior written permission.
25: *
26: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36: * SUCH DAMAGE.
37: */
38:
39: #ifndef lint
40: #if 0
41: static char sccsid[] = "@(#)scanner.c 8.1 (Berkeley) 6/6/93";
42: #else
43: static char rcsid[] = "$NetBSD: scanner.c,v 1.3 1995/09/28 10:34:36 tls Exp $";
44: #endif
45: #endif /* not lint */
46:
47: #include "value.h"
48: #include "token.h"
49: #include "context.h"
50: #include "string.h"
51:
52: s_getc()
53: {
54: register c;
55:
56: switch (cx.x_type) {
57: case X_FILE:
58: c = getc(cx.x_fp);
59: if (cx.x_bol && c != EOF) {
60: cx.x_bol = 0;
61: cx.x_lineno++;
62: }
63: if (c == '\n')
64: cx.x_bol = 1;
65: return c;
66: case X_BUF:
67: if (*cx.x_bufp != 0)
68: return *cx.x_bufp++ & 0xff;
69: else
70: return EOF;
71: }
72: /*NOTREACHED*/
73: }
74:
75: s_ungetc(c)
76: {
77: if (c == EOF)
78: return EOF;
79: switch (cx.x_type) {
80: case X_FILE:
81: cx.x_bol = 0;
82: return ungetc(c, cx.x_fp);
83: case X_BUF:
84: if (cx.x_bufp > cx.x_buf)
85: return *--cx.x_bufp = c;
86: else
87: return EOF;
88: }
89: /*NOTREACHED*/
90: }
91:
92: s_gettok()
93: {
94: char buf[100];
95: register char *p = buf;
96: register c;
97: register state = 0;
98:
99: loop:
100: c = s_getc();
101: switch (state) {
102: case 0:
103: switch (c) {
104: case ' ':
105: case '\t':
106: break;
107: case '\n':
108: case ';':
109: cx.x_token = T_EOL;
110: state = -1;
111: break;
112: case '#':
113: state = 1;
114: break;
115: case EOF:
116: cx.x_token = T_EOF;
117: state = -1;
118: break;
119: case 'a': case 'b': case 'c': case 'd': case 'e':
120: case 'f': case 'g': case 'h': case 'i': case 'j':
121: case 'k': case 'l': case 'm': case 'n': case 'o':
122: case 'p': case 'q': case 'r': case 's': case 't':
123: case 'u': case 'v': case 'w': case 'x': case 'y':
124: case 'z':
125: case 'A': case 'B': case 'C': case 'D': case 'E':
126: case 'F': case 'G': case 'H': case 'I': case 'J':
127: case 'K': case 'L': case 'M': case 'N': case 'O':
128: case 'P': case 'Q': case 'R': case 'S': case 'T':
129: case 'U': case 'V': case 'W': case 'X': case 'Y':
130: case 'Z':
131: case '_': case '.':
132: *p++ = c;
133: state = 2;
134: break;
135: case '"':
136: state = 3;
137: break;
138: case '\'':
139: state = 4;
140: break;
141: case '\\':
142: switch (c = s_gettok1()) {
143: case -1:
144: break;
145: case -2:
146: state = 0;
147: break;
148: default:
149: *p++ = c;
150: state = 2;
151: }
152: break;
153: case '0':
154: cx.x_val.v_num = 0;
155: state = 10;
156: break;
157: case '1': case '2': case '3': case '4':
158: case '5': case '6': case '7': case '8': case '9':
159: cx.x_val.v_num = c - '0';
160: state = 11;
161: break;
162: case '>':
163: state = 20;
164: break;
165: case '<':
166: state = 21;
167: break;
168: case '=':
169: state = 22;
170: break;
171: case '!':
172: state = 23;
173: break;
174: case '&':
175: state = 24;
176: break;
177: case '|':
178: state = 25;
179: break;
180: case '$':
181: state = 26;
182: break;
183: case '~':
184: cx.x_token = T_COMP;
185: state = -1;
186: break;
187: case '+':
188: cx.x_token = T_PLUS;
189: state = -1;
190: break;
191: case '-':
192: cx.x_token = T_MINUS;
193: state = -1;
194: break;
195: case '*':
196: cx.x_token = T_MUL;
197: state = -1;
198: break;
199: case '/':
200: cx.x_token = T_DIV;
201: state = -1;
202: break;
203: case '%':
204: cx.x_token = T_MOD;
205: state = -1;
206: break;
207: case '^':
208: cx.x_token = T_XOR;
209: state = -1;
210: break;
211: case '(':
212: cx.x_token = T_LP;
213: state = -1;
214: break;
215: case ')':
216: cx.x_token = T_RP;
217: state = -1;
218: break;
219: case ',':
220: cx.x_token = T_COMMA;
221: state = -1;
222: break;
223: case '?':
224: cx.x_token = T_QUEST;
225: state = -1;
226: break;
227: case ':':
228: cx.x_token = T_COLON;
229: state = -1;
230: break;
231: case '[':
232: cx.x_token = T_LB;
233: state = -1;
234: break;
235: case ']':
236: cx.x_token = T_RB;
237: state = -1;
238: break;
239: default:
240: cx.x_val.v_num = c;
241: cx.x_token = T_CHAR;
242: state = -1;
243: break;
244: }
245: break;
246: case 1: /* got # */
247: if (c == '\n' || c == EOF) {
248: (void) s_ungetc(c);
249: state = 0;
250: }
251: break;
252: case 2: /* unquoted string */
253: switch (c) {
254: case 'a': case 'b': case 'c': case 'd': case 'e':
255: case 'f': case 'g': case 'h': case 'i': case 'j':
256: case 'k': case 'l': case 'm': case 'n': case 'o':
257: case 'p': case 'q': case 'r': case 's': case 't':
258: case 'u': case 'v': case 'w': case 'x': case 'y':
259: case 'z':
260: case 'A': case 'B': case 'C': case 'D': case 'E':
261: case 'F': case 'G': case 'H': case 'I': case 'J':
262: case 'K': case 'L': case 'M': case 'N': case 'O':
263: case 'P': case 'Q': case 'R': case 'S': case 'T':
264: case 'U': case 'V': case 'W': case 'X': case 'Y':
265: case 'Z':
266: case '_': case '.':
267: case '0': case '1': case '2': case '3': case '4':
268: case '5': case '6': case '7': case '8': case '9':
269: if (p < buf + sizeof buf - 1)
270: *p++ = c;
271: break;
272: case '"':
273: state = 3;
274: break;
275: case '\'':
276: state = 4;
277: break;
278: case '\\':
279: switch (c = s_gettok1()) {
280: case -2:
281: (void) s_ungetc(' ');
282: case -1:
283: break;
284: default:
285: if (p < buf + sizeof buf - 1)
286: *p++ = c;
287: }
288: break;
289: default:
290: (void) s_ungetc(c);
291: case EOF:
292: *p = 0;
293: cx.x_token = T_STR;
294: switch (*buf) {
295: case 'i':
296: if (buf[1] == 'f' && buf[2] == 0)
297: cx.x_token = T_IF;
298: break;
299: case 't':
300: if (buf[1] == 'h' && buf[2] == 'e'
301: && buf[3] == 'n' && buf[4] == 0)
302: cx.x_token = T_THEN;
303: break;
304: case 'e':
305: if (buf[1] == 'n' && buf[2] == 'd'
306: && buf[3] == 'i' && buf[4] == 'f'
307: && buf[5] == 0)
308: cx.x_token = T_ENDIF;
309: else if (buf[1] == 'l' && buf[2] == 's')
310: if (buf[3] == 'i' && buf[4] == 'f'
311: && buf[5] == 0)
312: cx.x_token = T_ELSIF;
313: else if (buf[3] == 'e' && buf[4] == 0)
314: cx.x_token = T_ELSE;
315: break;
316: }
317: if (cx.x_token == T_STR
318: && (cx.x_val.v_str = str_cpy(buf)) == 0) {
319: p_memerror();
320: cx.x_token = T_EOF;
321: }
322: state = -1;
323: break;
324: }
325: break;
326: case 3: /* " quoted string */
327: switch (c) {
328: case '\n':
329: (void) s_ungetc(c);
330: case EOF:
331: case '"':
332: state = 2;
333: break;
334: case '\\':
335: switch (c = s_gettok1()) {
336: case -1:
337: case -2: /* newlines are invisible */
338: break;
339: default:
340: if (p < buf + sizeof buf - 1)
341: *p++ = c;
342: }
343: break;
344: default:
345: if (p < buf + sizeof buf - 1)
346: *p++ = c;
347: break;
348: }
349: break;
350: case 4: /* ' quoted string */
351: switch (c) {
352: case '\n':
353: (void) s_ungetc(c);
354: case EOF:
355: case '\'':
356: state = 2;
357: break;
358: case '\\':
359: switch (c = s_gettok1()) {
360: case -1:
361: case -2: /* newlines are invisible */
362: break;
363: default:
364: if (p < buf + sizeof buf - 1)
365: *p++ = c;
366: }
367: break;
368: default:
369: if (p < buf + sizeof buf - 1)
370: *p++ = c;
371: break;
372: }
373: break;
374: case 10: /* got 0 */
375: switch (c) {
376: case 'x':
377: case 'X':
378: cx.x_val.v_num = 0;
379: state = 12;
380: break;
381: case '0': case '1': case '2': case '3': case '4':
382: case '5': case '6': case '7':
383: cx.x_val.v_num = c - '0';
384: state = 13;
385: break;
386: case '8': case '9':
387: cx.x_val.v_num = c - '0';
388: state = 11;
389: break;
390: default:
391: (void) s_ungetc(c);
392: state = -1;
393: cx.x_token = T_NUM;
394: }
395: break;
396: case 11: /* decimal number */
397: switch (c) {
398: case '0': case '1': case '2': case '3': case '4':
399: case '5': case '6': case '7': case '8': case '9':
400: cx.x_val.v_num = cx.x_val.v_num * 10 + c - '0';
401: break;
402: default:
403: (void) s_ungetc(c);
404: state = -1;
405: cx.x_token = T_NUM;
406: }
407: break;
408: case 12: /* hex number */
409: switch (c) {
410: case '0': case '1': case '2': case '3': case '4':
411: case '5': case '6': case '7': case '8': case '9':
412: cx.x_val.v_num = cx.x_val.v_num * 16 + c - '0';
413: break;
414: case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
415: cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'a' + 10;
416: break;
417: case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
418: cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'A' + 10;
419: break;
420: default:
421: (void) s_ungetc(c);
422: state = -1;
423: cx.x_token = T_NUM;
424: }
425: break;
426: case 13: /* octal number */
427: switch (c) {
428: case '0': case '1': case '2': case '3': case '4':
429: case '5': case '6': case '7':
430: cx.x_val.v_num = cx.x_val.v_num * 8 + c - '0';
431: break;
432: default:
433: (void) s_ungetc(c);
434: state = -1;
435: cx.x_token = T_NUM;
436: }
437: break;
438: case 20: /* got > */
439: switch (c) {
440: case '=':
441: cx.x_token = T_GE;
442: state = -1;
443: break;
444: case '>':
445: cx.x_token = T_RS;
446: state = -1;
447: break;
448: default:
449: (void) s_ungetc(c);
450: cx.x_token = T_GT;
451: state = -1;
452: }
453: break;
454: case 21: /* got < */
455: switch (c) {
456: case '=':
457: cx.x_token = T_LE;
458: state = -1;
459: break;
460: case '<':
461: cx.x_token = T_LS;
462: state = -1;
463: break;
464: default:
465: (void) s_ungetc(c);
466: cx.x_token = T_LT;
467: state = -1;
468: }
469: break;
470: case 22: /* got = */
471: switch (c) {
472: case '=':
473: cx.x_token = T_EQ;
474: state = -1;
475: break;
476: default:
477: (void) s_ungetc(c);
478: cx.x_token = T_ASSIGN;
479: state = -1;
480: }
481: break;
482: case 23: /* got ! */
483: switch (c) {
484: case '=':
485: cx.x_token = T_NE;
486: state = -1;
487: break;
488: default:
489: (void) s_ungetc(c);
490: cx.x_token = T_NOT;
491: state = -1;
492: }
493: break;
494: case 24: /* got & */
495: switch (c) {
496: case '&':
497: cx.x_token = T_ANDAND;
498: state = -1;
499: break;
500: default:
501: (void) s_ungetc(c);
502: cx.x_token = T_AND;
503: state = -1;
504: }
505: break;
506: case 25: /* got | */
507: switch (c) {
508: case '|':
509: cx.x_token = T_OROR;
510: state = -1;
511: break;
512: default:
513: (void) s_ungetc(c);
514: cx.x_token = T_OR;
515: state = -1;
516: }
517: break;
518: case 26: /* got $ */
519: switch (c) {
520: case '?':
521: cx.x_token = T_DQ;
522: state = -1;
523: break;
524: default:
525: (void) s_ungetc(c);
526: cx.x_token = T_DOLLAR;
527: state = -1;
528: }
529: break;
530: default:
531: abort();
532: }
533: if (state >= 0)
534: goto loop;
535: return cx.x_token;
536: }
537:
538: s_gettok1()
539: {
540: register c;
541: register n;
542:
543: c = s_getc(); /* got \ */
544: switch (c) {
545: case EOF:
546: return -1;
547: case '\n':
548: return -2;
549: case 'b':
550: return '\b';
551: case 'f':
552: return '\f';
553: case 'n':
554: return '\n';
555: case 'r':
556: return '\r';
557: case 't':
558: return '\t';
559: default:
560: return c;
561: case '0': case '1': case '2': case '3': case '4':
562: case '5': case '6': case '7':
563: break;
564: }
565: n = c - '0';
566: c = s_getc(); /* got \[0-7] */
567: if (c < '0' || c > '7') {
568: (void) s_ungetc(c);
569: return n;
570: }
571: n = n * 8 + c - '0';
572: c = s_getc(); /* got \[0-7][0-7] */
573: if (c < '0' || c > '7') {
574: (void) s_ungetc(c);
575: return n;
576: }
577: return n * 8 + c - '0';
578: }