Annotation of src/usr.bin/window/scanner.c, Revision 1.5
1.5 ! millert 1: /* $OpenBSD: scanner.c,v 1.4 2001/11/19 19:02:18 mpech Exp $ */
1.1 deraadt 2: /* $NetBSD: scanner.c,v 1.3 1995/09/28 10:34:36 tls Exp $ */
3:
4: /*
5: * Copyright (c) 1983, 1993
6: * The Regents of the University of California. All rights reserved.
7: *
8: * This code is derived from software contributed to Berkeley by
9: * Edward Wang at The University of California, Berkeley.
10: *
11: * Redistribution and use in source and binary forms, with or without
12: * modification, are permitted provided that the following conditions
13: * are met:
14: * 1. Redistributions of source code must retain the above copyright
15: * notice, this list of conditions and the following disclaimer.
16: * 2. Redistributions in binary form must reproduce the above copyright
17: * notice, this list of conditions and the following disclaimer in the
18: * documentation and/or other materials provided with the distribution.
1.5 ! millert 19: * 3. Neither the name of the University nor the names of its contributors
1.1 deraadt 20: * may be used to endorse or promote products derived from this software
21: * without specific prior written permission.
22: *
23: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33: * SUCH DAMAGE.
34: */
35:
36: #ifndef lint
37: #if 0
38: static char sccsid[] = "@(#)scanner.c 8.1 (Berkeley) 6/6/93";
39: #else
1.5 ! millert 40: static char rcsid[] = "$OpenBSD: scanner.c,v 1.4 2001/11/19 19:02:18 mpech Exp $";
1.1 deraadt 41: #endif
42: #endif /* not lint */
43:
44: #include "value.h"
45: #include "token.h"
46: #include "context.h"
47: #include "string.h"
48:
49: s_getc()
50: {
1.4 mpech 51: int c;
1.1 deraadt 52:
53: switch (cx.x_type) {
54: case X_FILE:
55: c = getc(cx.x_fp);
56: if (cx.x_bol && c != EOF) {
57: cx.x_bol = 0;
58: cx.x_lineno++;
59: }
60: if (c == '\n')
61: cx.x_bol = 1;
62: return c;
63: case X_BUF:
64: if (*cx.x_bufp != 0)
65: return *cx.x_bufp++ & 0xff;
66: else
67: return EOF;
68: }
69: /*NOTREACHED*/
70: }
71:
72: s_ungetc(c)
73: {
74: if (c == EOF)
75: return EOF;
76: switch (cx.x_type) {
77: case X_FILE:
78: cx.x_bol = 0;
79: return ungetc(c, cx.x_fp);
80: case X_BUF:
81: if (cx.x_bufp > cx.x_buf)
82: return *--cx.x_bufp = c;
83: else
84: return EOF;
85: }
86: /*NOTREACHED*/
87: }
88:
89: s_gettok()
90: {
91: char buf[100];
1.4 mpech 92: char *p = buf;
93: int c;
94: int state = 0;
1.1 deraadt 95:
96: loop:
97: c = s_getc();
98: switch (state) {
99: case 0:
100: switch (c) {
101: case ' ':
102: case '\t':
103: break;
104: case '\n':
105: case ';':
106: cx.x_token = T_EOL;
107: state = -1;
108: break;
109: case '#':
110: state = 1;
111: break;
112: case EOF:
113: cx.x_token = T_EOF;
114: state = -1;
115: break;
116: case 'a': case 'b': case 'c': case 'd': case 'e':
117: case 'f': case 'g': case 'h': case 'i': case 'j':
118: case 'k': case 'l': case 'm': case 'n': case 'o':
119: case 'p': case 'q': case 'r': case 's': case 't':
120: case 'u': case 'v': case 'w': case 'x': case 'y':
121: case 'z':
122: case 'A': case 'B': case 'C': case 'D': case 'E':
123: case 'F': case 'G': case 'H': case 'I': case 'J':
124: case 'K': case 'L': case 'M': case 'N': case 'O':
125: case 'P': case 'Q': case 'R': case 'S': case 'T':
126: case 'U': case 'V': case 'W': case 'X': case 'Y':
127: case 'Z':
128: case '_': case '.':
129: *p++ = c;
130: state = 2;
131: break;
132: case '"':
133: state = 3;
134: break;
135: case '\'':
136: state = 4;
137: break;
138: case '\\':
139: switch (c = s_gettok1()) {
140: case -1:
141: break;
142: case -2:
143: state = 0;
144: break;
145: default:
146: *p++ = c;
147: state = 2;
148: }
149: break;
150: case '0':
151: cx.x_val.v_num = 0;
152: state = 10;
153: break;
154: case '1': case '2': case '3': case '4':
155: case '5': case '6': case '7': case '8': case '9':
156: cx.x_val.v_num = c - '0';
157: state = 11;
158: break;
159: case '>':
160: state = 20;
161: break;
162: case '<':
163: state = 21;
164: break;
165: case '=':
166: state = 22;
167: break;
168: case '!':
169: state = 23;
170: break;
171: case '&':
172: state = 24;
173: break;
174: case '|':
175: state = 25;
176: break;
177: case '$':
178: state = 26;
179: break;
180: case '~':
181: cx.x_token = T_COMP;
182: state = -1;
183: break;
184: case '+':
185: cx.x_token = T_PLUS;
186: state = -1;
187: break;
188: case '-':
189: cx.x_token = T_MINUS;
190: state = -1;
191: break;
192: case '*':
193: cx.x_token = T_MUL;
194: state = -1;
195: break;
196: case '/':
197: cx.x_token = T_DIV;
198: state = -1;
199: break;
200: case '%':
201: cx.x_token = T_MOD;
202: state = -1;
203: break;
204: case '^':
205: cx.x_token = T_XOR;
206: state = -1;
207: break;
208: case '(':
209: cx.x_token = T_LP;
210: state = -1;
211: break;
212: case ')':
213: cx.x_token = T_RP;
214: state = -1;
215: break;
216: case ',':
217: cx.x_token = T_COMMA;
218: state = -1;
219: break;
220: case '?':
221: cx.x_token = T_QUEST;
222: state = -1;
223: break;
224: case ':':
225: cx.x_token = T_COLON;
226: state = -1;
227: break;
228: case '[':
229: cx.x_token = T_LB;
230: state = -1;
231: break;
232: case ']':
233: cx.x_token = T_RB;
234: state = -1;
235: break;
236: default:
237: cx.x_val.v_num = c;
238: cx.x_token = T_CHAR;
239: state = -1;
240: break;
241: }
242: break;
243: case 1: /* got # */
244: if (c == '\n' || c == EOF) {
245: (void) s_ungetc(c);
246: state = 0;
247: }
248: break;
249: case 2: /* unquoted string */
250: switch (c) {
251: case 'a': case 'b': case 'c': case 'd': case 'e':
252: case 'f': case 'g': case 'h': case 'i': case 'j':
253: case 'k': case 'l': case 'm': case 'n': case 'o':
254: case 'p': case 'q': case 'r': case 's': case 't':
255: case 'u': case 'v': case 'w': case 'x': case 'y':
256: case 'z':
257: case 'A': case 'B': case 'C': case 'D': case 'E':
258: case 'F': case 'G': case 'H': case 'I': case 'J':
259: case 'K': case 'L': case 'M': case 'N': case 'O':
260: case 'P': case 'Q': case 'R': case 'S': case 'T':
261: case 'U': case 'V': case 'W': case 'X': case 'Y':
262: case 'Z':
263: case '_': case '.':
264: case '0': case '1': case '2': case '3': case '4':
265: case '5': case '6': case '7': case '8': case '9':
266: if (p < buf + sizeof buf - 1)
267: *p++ = c;
268: break;
269: case '"':
270: state = 3;
271: break;
272: case '\'':
273: state = 4;
274: break;
275: case '\\':
276: switch (c = s_gettok1()) {
277: case -2:
278: (void) s_ungetc(' ');
279: case -1:
280: break;
281: default:
282: if (p < buf + sizeof buf - 1)
283: *p++ = c;
284: }
285: break;
286: default:
287: (void) s_ungetc(c);
288: case EOF:
289: *p = 0;
290: cx.x_token = T_STR;
291: switch (*buf) {
292: case 'i':
293: if (buf[1] == 'f' && buf[2] == 0)
294: cx.x_token = T_IF;
295: break;
296: case 't':
297: if (buf[1] == 'h' && buf[2] == 'e'
298: && buf[3] == 'n' && buf[4] == 0)
299: cx.x_token = T_THEN;
300: break;
301: case 'e':
302: if (buf[1] == 'n' && buf[2] == 'd'
303: && buf[3] == 'i' && buf[4] == 'f'
304: && buf[5] == 0)
305: cx.x_token = T_ENDIF;
306: else if (buf[1] == 'l' && buf[2] == 's')
307: if (buf[3] == 'i' && buf[4] == 'f'
308: && buf[5] == 0)
309: cx.x_token = T_ELSIF;
310: else if (buf[3] == 'e' && buf[4] == 0)
311: cx.x_token = T_ELSE;
312: break;
313: }
314: if (cx.x_token == T_STR
315: && (cx.x_val.v_str = str_cpy(buf)) == 0) {
316: p_memerror();
317: cx.x_token = T_EOF;
318: }
319: state = -1;
320: break;
321: }
322: break;
323: case 3: /* " quoted string */
324: switch (c) {
325: case '\n':
326: (void) s_ungetc(c);
327: case EOF:
328: case '"':
329: state = 2;
330: break;
331: case '\\':
332: switch (c = s_gettok1()) {
333: case -1:
334: case -2: /* newlines are invisible */
335: break;
336: default:
337: if (p < buf + sizeof buf - 1)
338: *p++ = c;
339: }
340: break;
341: default:
342: if (p < buf + sizeof buf - 1)
343: *p++ = c;
344: break;
345: }
346: break;
347: case 4: /* ' quoted string */
348: switch (c) {
349: case '\n':
350: (void) s_ungetc(c);
351: case EOF:
352: case '\'':
353: state = 2;
354: break;
355: case '\\':
356: switch (c = s_gettok1()) {
357: case -1:
358: case -2: /* newlines are invisible */
359: break;
360: default:
361: if (p < buf + sizeof buf - 1)
362: *p++ = c;
363: }
364: break;
365: default:
366: if (p < buf + sizeof buf - 1)
367: *p++ = c;
368: break;
369: }
370: break;
371: case 10: /* got 0 */
372: switch (c) {
373: case 'x':
374: case 'X':
375: cx.x_val.v_num = 0;
376: state = 12;
377: break;
378: case '0': case '1': case '2': case '3': case '4':
379: case '5': case '6': case '7':
380: cx.x_val.v_num = c - '0';
381: state = 13;
382: break;
383: case '8': case '9':
384: cx.x_val.v_num = c - '0';
385: state = 11;
386: break;
387: default:
388: (void) s_ungetc(c);
389: state = -1;
390: cx.x_token = T_NUM;
391: }
392: break;
393: case 11: /* decimal number */
394: switch (c) {
395: case '0': case '1': case '2': case '3': case '4':
396: case '5': case '6': case '7': case '8': case '9':
397: cx.x_val.v_num = cx.x_val.v_num * 10 + c - '0';
398: break;
399: default:
400: (void) s_ungetc(c);
401: state = -1;
402: cx.x_token = T_NUM;
403: }
404: break;
405: case 12: /* hex number */
406: switch (c) {
407: case '0': case '1': case '2': case '3': case '4':
408: case '5': case '6': case '7': case '8': case '9':
409: cx.x_val.v_num = cx.x_val.v_num * 16 + c - '0';
410: break;
411: case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
412: cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'a' + 10;
413: break;
414: case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
415: cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'A' + 10;
416: break;
417: default:
418: (void) s_ungetc(c);
419: state = -1;
420: cx.x_token = T_NUM;
421: }
422: break;
423: case 13: /* octal number */
424: switch (c) {
425: case '0': case '1': case '2': case '3': case '4':
426: case '5': case '6': case '7':
427: cx.x_val.v_num = cx.x_val.v_num * 8 + c - '0';
428: break;
429: default:
430: (void) s_ungetc(c);
431: state = -1;
432: cx.x_token = T_NUM;
433: }
434: break;
435: case 20: /* got > */
436: switch (c) {
437: case '=':
438: cx.x_token = T_GE;
439: state = -1;
440: break;
441: case '>':
442: cx.x_token = T_RS;
443: state = -1;
444: break;
445: default:
446: (void) s_ungetc(c);
447: cx.x_token = T_GT;
448: state = -1;
449: }
450: break;
451: case 21: /* got < */
452: switch (c) {
453: case '=':
454: cx.x_token = T_LE;
455: state = -1;
456: break;
457: case '<':
458: cx.x_token = T_LS;
459: state = -1;
460: break;
461: default:
462: (void) s_ungetc(c);
463: cx.x_token = T_LT;
464: state = -1;
465: }
466: break;
467: case 22: /* got = */
468: switch (c) {
469: case '=':
470: cx.x_token = T_EQ;
471: state = -1;
472: break;
473: default:
474: (void) s_ungetc(c);
475: cx.x_token = T_ASSIGN;
476: state = -1;
477: }
478: break;
479: case 23: /* got ! */
480: switch (c) {
481: case '=':
482: cx.x_token = T_NE;
483: state = -1;
484: break;
485: default:
486: (void) s_ungetc(c);
487: cx.x_token = T_NOT;
488: state = -1;
489: }
490: break;
491: case 24: /* got & */
492: switch (c) {
493: case '&':
494: cx.x_token = T_ANDAND;
495: state = -1;
496: break;
497: default:
498: (void) s_ungetc(c);
499: cx.x_token = T_AND;
500: state = -1;
501: }
502: break;
503: case 25: /* got | */
504: switch (c) {
505: case '|':
506: cx.x_token = T_OROR;
507: state = -1;
508: break;
509: default:
510: (void) s_ungetc(c);
511: cx.x_token = T_OR;
512: state = -1;
513: }
514: break;
515: case 26: /* got $ */
516: switch (c) {
517: case '?':
518: cx.x_token = T_DQ;
519: state = -1;
520: break;
521: default:
522: (void) s_ungetc(c);
523: cx.x_token = T_DOLLAR;
524: state = -1;
525: }
526: break;
527: default:
528: abort();
529: }
530: if (state >= 0)
531: goto loop;
532: return cx.x_token;
533: }
534:
535: s_gettok1()
536: {
1.4 mpech 537: int c;
538: int n;
1.1 deraadt 539:
540: c = s_getc(); /* got \ */
541: switch (c) {
542: case EOF:
543: return -1;
544: case '\n':
545: return -2;
546: case 'b':
547: return '\b';
548: case 'f':
549: return '\f';
550: case 'n':
551: return '\n';
552: case 'r':
553: return '\r';
554: case 't':
555: return '\t';
556: default:
557: return c;
558: case '0': case '1': case '2': case '3': case '4':
559: case '5': case '6': case '7':
560: break;
561: }
562: n = c - '0';
563: c = s_getc(); /* got \[0-7] */
564: if (c < '0' || c > '7') {
565: (void) s_ungetc(c);
566: return n;
567: }
568: n = n * 8 + c - '0';
569: c = s_getc(); /* got \[0-7][0-7] */
570: if (c < '0' || c > '7') {
571: (void) s_ungetc(c);
572: return n;
573: }
574: return n * 8 + c - '0';
575: }