Annotation of src/usr.bin/window/scanner.c, Revision 1.3
1.3 ! downsj 1: /* $OpenBSD$ */
1.1 deraadt 2: /* $NetBSD: scanner.c,v 1.3 1995/09/28 10:34:36 tls Exp $ */
3:
4: /*
5: * Copyright (c) 1983, 1993
6: * The Regents of the University of California. All rights reserved.
7: *
8: * This code is derived from software contributed to Berkeley by
9: * Edward Wang at The University of California, Berkeley.
10: *
11: * Redistribution and use in source and binary forms, with or without
12: * modification, are permitted provided that the following conditions
13: * are met:
14: * 1. Redistributions of source code must retain the above copyright
15: * notice, this list of conditions and the following disclaimer.
16: * 2. Redistributions in binary form must reproduce the above copyright
17: * notice, this list of conditions and the following disclaimer in the
18: * documentation and/or other materials provided with the distribution.
19: * 3. All advertising materials mentioning features or use of this software
20: * must display the following acknowledgement:
21: * This product includes software developed by the University of
22: * California, Berkeley and its contributors.
23: * 4. Neither the name of the University nor the names of its contributors
24: * may be used to endorse or promote products derived from this software
25: * without specific prior written permission.
26: *
27: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37: * SUCH DAMAGE.
38: */
39:
40: #ifndef lint
41: #if 0
42: static char sccsid[] = "@(#)scanner.c 8.1 (Berkeley) 6/6/93";
43: #else
1.3 ! downsj 44: static char rcsid[] = "$OpenBSD$";
1.1 deraadt 45: #endif
46: #endif /* not lint */
47:
48: #include "value.h"
49: #include "token.h"
50: #include "context.h"
51: #include "string.h"
52:
53: s_getc()
54: {
55: register c;
56:
57: switch (cx.x_type) {
58: case X_FILE:
59: c = getc(cx.x_fp);
60: if (cx.x_bol && c != EOF) {
61: cx.x_bol = 0;
62: cx.x_lineno++;
63: }
64: if (c == '\n')
65: cx.x_bol = 1;
66: return c;
67: case X_BUF:
68: if (*cx.x_bufp != 0)
69: return *cx.x_bufp++ & 0xff;
70: else
71: return EOF;
72: }
73: /*NOTREACHED*/
74: }
75:
76: s_ungetc(c)
77: {
78: if (c == EOF)
79: return EOF;
80: switch (cx.x_type) {
81: case X_FILE:
82: cx.x_bol = 0;
83: return ungetc(c, cx.x_fp);
84: case X_BUF:
85: if (cx.x_bufp > cx.x_buf)
86: return *--cx.x_bufp = c;
87: else
88: return EOF;
89: }
90: /*NOTREACHED*/
91: }
92:
93: s_gettok()
94: {
95: char buf[100];
96: register char *p = buf;
97: register c;
98: register state = 0;
99:
100: loop:
101: c = s_getc();
102: switch (state) {
103: case 0:
104: switch (c) {
105: case ' ':
106: case '\t':
107: break;
108: case '\n':
109: case ';':
110: cx.x_token = T_EOL;
111: state = -1;
112: break;
113: case '#':
114: state = 1;
115: break;
116: case EOF:
117: cx.x_token = T_EOF;
118: state = -1;
119: break;
120: case 'a': case 'b': case 'c': case 'd': case 'e':
121: case 'f': case 'g': case 'h': case 'i': case 'j':
122: case 'k': case 'l': case 'm': case 'n': case 'o':
123: case 'p': case 'q': case 'r': case 's': case 't':
124: case 'u': case 'v': case 'w': case 'x': case 'y':
125: case 'z':
126: case 'A': case 'B': case 'C': case 'D': case 'E':
127: case 'F': case 'G': case 'H': case 'I': case 'J':
128: case 'K': case 'L': case 'M': case 'N': case 'O':
129: case 'P': case 'Q': case 'R': case 'S': case 'T':
130: case 'U': case 'V': case 'W': case 'X': case 'Y':
131: case 'Z':
132: case '_': case '.':
133: *p++ = c;
134: state = 2;
135: break;
136: case '"':
137: state = 3;
138: break;
139: case '\'':
140: state = 4;
141: break;
142: case '\\':
143: switch (c = s_gettok1()) {
144: case -1:
145: break;
146: case -2:
147: state = 0;
148: break;
149: default:
150: *p++ = c;
151: state = 2;
152: }
153: break;
154: case '0':
155: cx.x_val.v_num = 0;
156: state = 10;
157: break;
158: case '1': case '2': case '3': case '4':
159: case '5': case '6': case '7': case '8': case '9':
160: cx.x_val.v_num = c - '0';
161: state = 11;
162: break;
163: case '>':
164: state = 20;
165: break;
166: case '<':
167: state = 21;
168: break;
169: case '=':
170: state = 22;
171: break;
172: case '!':
173: state = 23;
174: break;
175: case '&':
176: state = 24;
177: break;
178: case '|':
179: state = 25;
180: break;
181: case '$':
182: state = 26;
183: break;
184: case '~':
185: cx.x_token = T_COMP;
186: state = -1;
187: break;
188: case '+':
189: cx.x_token = T_PLUS;
190: state = -1;
191: break;
192: case '-':
193: cx.x_token = T_MINUS;
194: state = -1;
195: break;
196: case '*':
197: cx.x_token = T_MUL;
198: state = -1;
199: break;
200: case '/':
201: cx.x_token = T_DIV;
202: state = -1;
203: break;
204: case '%':
205: cx.x_token = T_MOD;
206: state = -1;
207: break;
208: case '^':
209: cx.x_token = T_XOR;
210: state = -1;
211: break;
212: case '(':
213: cx.x_token = T_LP;
214: state = -1;
215: break;
216: case ')':
217: cx.x_token = T_RP;
218: state = -1;
219: break;
220: case ',':
221: cx.x_token = T_COMMA;
222: state = -1;
223: break;
224: case '?':
225: cx.x_token = T_QUEST;
226: state = -1;
227: break;
228: case ':':
229: cx.x_token = T_COLON;
230: state = -1;
231: break;
232: case '[':
233: cx.x_token = T_LB;
234: state = -1;
235: break;
236: case ']':
237: cx.x_token = T_RB;
238: state = -1;
239: break;
240: default:
241: cx.x_val.v_num = c;
242: cx.x_token = T_CHAR;
243: state = -1;
244: break;
245: }
246: break;
247: case 1: /* got # */
248: if (c == '\n' || c == EOF) {
249: (void) s_ungetc(c);
250: state = 0;
251: }
252: break;
253: case 2: /* unquoted string */
254: switch (c) {
255: case 'a': case 'b': case 'c': case 'd': case 'e':
256: case 'f': case 'g': case 'h': case 'i': case 'j':
257: case 'k': case 'l': case 'm': case 'n': case 'o':
258: case 'p': case 'q': case 'r': case 's': case 't':
259: case 'u': case 'v': case 'w': case 'x': case 'y':
260: case 'z':
261: case 'A': case 'B': case 'C': case 'D': case 'E':
262: case 'F': case 'G': case 'H': case 'I': case 'J':
263: case 'K': case 'L': case 'M': case 'N': case 'O':
264: case 'P': case 'Q': case 'R': case 'S': case 'T':
265: case 'U': case 'V': case 'W': case 'X': case 'Y':
266: case 'Z':
267: case '_': case '.':
268: case '0': case '1': case '2': case '3': case '4':
269: case '5': case '6': case '7': case '8': case '9':
270: if (p < buf + sizeof buf - 1)
271: *p++ = c;
272: break;
273: case '"':
274: state = 3;
275: break;
276: case '\'':
277: state = 4;
278: break;
279: case '\\':
280: switch (c = s_gettok1()) {
281: case -2:
282: (void) s_ungetc(' ');
283: case -1:
284: break;
285: default:
286: if (p < buf + sizeof buf - 1)
287: *p++ = c;
288: }
289: break;
290: default:
291: (void) s_ungetc(c);
292: case EOF:
293: *p = 0;
294: cx.x_token = T_STR;
295: switch (*buf) {
296: case 'i':
297: if (buf[1] == 'f' && buf[2] == 0)
298: cx.x_token = T_IF;
299: break;
300: case 't':
301: if (buf[1] == 'h' && buf[2] == 'e'
302: && buf[3] == 'n' && buf[4] == 0)
303: cx.x_token = T_THEN;
304: break;
305: case 'e':
306: if (buf[1] == 'n' && buf[2] == 'd'
307: && buf[3] == 'i' && buf[4] == 'f'
308: && buf[5] == 0)
309: cx.x_token = T_ENDIF;
310: else if (buf[1] == 'l' && buf[2] == 's')
311: if (buf[3] == 'i' && buf[4] == 'f'
312: && buf[5] == 0)
313: cx.x_token = T_ELSIF;
314: else if (buf[3] == 'e' && buf[4] == 0)
315: cx.x_token = T_ELSE;
316: break;
317: }
318: if (cx.x_token == T_STR
319: && (cx.x_val.v_str = str_cpy(buf)) == 0) {
320: p_memerror();
321: cx.x_token = T_EOF;
322: }
323: state = -1;
324: break;
325: }
326: break;
327: case 3: /* " quoted string */
328: switch (c) {
329: case '\n':
330: (void) s_ungetc(c);
331: case EOF:
332: case '"':
333: state = 2;
334: break;
335: case '\\':
336: switch (c = s_gettok1()) {
337: case -1:
338: case -2: /* newlines are invisible */
339: break;
340: default:
341: if (p < buf + sizeof buf - 1)
342: *p++ = c;
343: }
344: break;
345: default:
346: if (p < buf + sizeof buf - 1)
347: *p++ = c;
348: break;
349: }
350: break;
351: case 4: /* ' quoted string */
352: switch (c) {
353: case '\n':
354: (void) s_ungetc(c);
355: case EOF:
356: case '\'':
357: state = 2;
358: break;
359: case '\\':
360: switch (c = s_gettok1()) {
361: case -1:
362: case -2: /* newlines are invisible */
363: break;
364: default:
365: if (p < buf + sizeof buf - 1)
366: *p++ = c;
367: }
368: break;
369: default:
370: if (p < buf + sizeof buf - 1)
371: *p++ = c;
372: break;
373: }
374: break;
375: case 10: /* got 0 */
376: switch (c) {
377: case 'x':
378: case 'X':
379: cx.x_val.v_num = 0;
380: state = 12;
381: break;
382: case '0': case '1': case '2': case '3': case '4':
383: case '5': case '6': case '7':
384: cx.x_val.v_num = c - '0';
385: state = 13;
386: break;
387: case '8': case '9':
388: cx.x_val.v_num = c - '0';
389: state = 11;
390: break;
391: default:
392: (void) s_ungetc(c);
393: state = -1;
394: cx.x_token = T_NUM;
395: }
396: break;
397: case 11: /* decimal number */
398: switch (c) {
399: case '0': case '1': case '2': case '3': case '4':
400: case '5': case '6': case '7': case '8': case '9':
401: cx.x_val.v_num = cx.x_val.v_num * 10 + c - '0';
402: break;
403: default:
404: (void) s_ungetc(c);
405: state = -1;
406: cx.x_token = T_NUM;
407: }
408: break;
409: case 12: /* hex number */
410: switch (c) {
411: case '0': case '1': case '2': case '3': case '4':
412: case '5': case '6': case '7': case '8': case '9':
413: cx.x_val.v_num = cx.x_val.v_num * 16 + c - '0';
414: break;
415: case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
416: cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'a' + 10;
417: break;
418: case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
419: cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'A' + 10;
420: break;
421: default:
422: (void) s_ungetc(c);
423: state = -1;
424: cx.x_token = T_NUM;
425: }
426: break;
427: case 13: /* octal number */
428: switch (c) {
429: case '0': case '1': case '2': case '3': case '4':
430: case '5': case '6': case '7':
431: cx.x_val.v_num = cx.x_val.v_num * 8 + c - '0';
432: break;
433: default:
434: (void) s_ungetc(c);
435: state = -1;
436: cx.x_token = T_NUM;
437: }
438: break;
439: case 20: /* got > */
440: switch (c) {
441: case '=':
442: cx.x_token = T_GE;
443: state = -1;
444: break;
445: case '>':
446: cx.x_token = T_RS;
447: state = -1;
448: break;
449: default:
450: (void) s_ungetc(c);
451: cx.x_token = T_GT;
452: state = -1;
453: }
454: break;
455: case 21: /* got < */
456: switch (c) {
457: case '=':
458: cx.x_token = T_LE;
459: state = -1;
460: break;
461: case '<':
462: cx.x_token = T_LS;
463: state = -1;
464: break;
465: default:
466: (void) s_ungetc(c);
467: cx.x_token = T_LT;
468: state = -1;
469: }
470: break;
471: case 22: /* got = */
472: switch (c) {
473: case '=':
474: cx.x_token = T_EQ;
475: state = -1;
476: break;
477: default:
478: (void) s_ungetc(c);
479: cx.x_token = T_ASSIGN;
480: state = -1;
481: }
482: break;
483: case 23: /* got ! */
484: switch (c) {
485: case '=':
486: cx.x_token = T_NE;
487: state = -1;
488: break;
489: default:
490: (void) s_ungetc(c);
491: cx.x_token = T_NOT;
492: state = -1;
493: }
494: break;
495: case 24: /* got & */
496: switch (c) {
497: case '&':
498: cx.x_token = T_ANDAND;
499: state = -1;
500: break;
501: default:
502: (void) s_ungetc(c);
503: cx.x_token = T_AND;
504: state = -1;
505: }
506: break;
507: case 25: /* got | */
508: switch (c) {
509: case '|':
510: cx.x_token = T_OROR;
511: state = -1;
512: break;
513: default:
514: (void) s_ungetc(c);
515: cx.x_token = T_OR;
516: state = -1;
517: }
518: break;
519: case 26: /* got $ */
520: switch (c) {
521: case '?':
522: cx.x_token = T_DQ;
523: state = -1;
524: break;
525: default:
526: (void) s_ungetc(c);
527: cx.x_token = T_DOLLAR;
528: state = -1;
529: }
530: break;
531: default:
532: abort();
533: }
534: if (state >= 0)
535: goto loop;
536: return cx.x_token;
537: }
538:
539: s_gettok1()
540: {
541: register c;
542: register n;
543:
544: c = s_getc(); /* got \ */
545: switch (c) {
546: case EOF:
547: return -1;
548: case '\n':
549: return -2;
550: case 'b':
551: return '\b';
552: case 'f':
553: return '\f';
554: case 'n':
555: return '\n';
556: case 'r':
557: return '\r';
558: case 't':
559: return '\t';
560: default:
561: return c;
562: case '0': case '1': case '2': case '3': case '4':
563: case '5': case '6': case '7':
564: break;
565: }
566: n = c - '0';
567: c = s_getc(); /* got \[0-7] */
568: if (c < '0' || c > '7') {
569: (void) s_ungetc(c);
570: return n;
571: }
572: n = n * 8 + c - '0';
573: c = s_getc(); /* got \[0-7][0-7] */
574: if (c < '0' || c > '7') {
575: (void) s_ungetc(c);
576: return n;
577: }
578: return n * 8 + c - '0';
579: }