Annotation of src/usr.bin/window/scanner.c, Revision 1.6
1.6 ! david 1: /* $OpenBSD: scanner.c,v 1.5 2003/06/03 02:56:23 millert Exp $ */
1.1 deraadt 2: /* $NetBSD: scanner.c,v 1.3 1995/09/28 10:34:36 tls Exp $ */
3:
4: /*
5: * Copyright (c) 1983, 1993
6: * The Regents of the University of California. All rights reserved.
7: *
8: * This code is derived from software contributed to Berkeley by
9: * Edward Wang at The University of California, Berkeley.
10: *
11: * Redistribution and use in source and binary forms, with or without
12: * modification, are permitted provided that the following conditions
13: * are met:
14: * 1. Redistributions of source code must retain the above copyright
15: * notice, this list of conditions and the following disclaimer.
16: * 2. Redistributions in binary form must reproduce the above copyright
17: * notice, this list of conditions and the following disclaimer in the
18: * documentation and/or other materials provided with the distribution.
1.5 millert 19: * 3. Neither the name of the University nor the names of its contributors
1.1 deraadt 20: * may be used to endorse or promote products derived from this software
21: * without specific prior written permission.
22: *
23: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33: * SUCH DAMAGE.
34: */
35:
36: #ifndef lint
37: #if 0
38: static char sccsid[] = "@(#)scanner.c 8.1 (Berkeley) 6/6/93";
39: #else
1.6 ! david 40: static char rcsid[] = "$OpenBSD: scanner.c,v 1.5 2003/06/03 02:56:23 millert Exp $";
1.1 deraadt 41: #endif
42: #endif /* not lint */
43:
44: #include "value.h"
45: #include "token.h"
46: #include "context.h"
47: #include "string.h"
1.6 ! david 48: #include <stdlib.h>
1.1 deraadt 49:
50: s_getc()
51: {
1.4 mpech 52: int c;
1.1 deraadt 53:
54: switch (cx.x_type) {
55: case X_FILE:
56: c = getc(cx.x_fp);
57: if (cx.x_bol && c != EOF) {
58: cx.x_bol = 0;
59: cx.x_lineno++;
60: }
61: if (c == '\n')
62: cx.x_bol = 1;
63: return c;
64: case X_BUF:
65: if (*cx.x_bufp != 0)
66: return *cx.x_bufp++ & 0xff;
67: else
68: return EOF;
69: }
70: /*NOTREACHED*/
71: }
72:
73: s_ungetc(c)
74: {
75: if (c == EOF)
76: return EOF;
77: switch (cx.x_type) {
78: case X_FILE:
79: cx.x_bol = 0;
80: return ungetc(c, cx.x_fp);
81: case X_BUF:
82: if (cx.x_bufp > cx.x_buf)
83: return *--cx.x_bufp = c;
84: else
85: return EOF;
86: }
87: /*NOTREACHED*/
88: }
89:
90: s_gettok()
91: {
92: char buf[100];
1.4 mpech 93: char *p = buf;
94: int c;
95: int state = 0;
1.1 deraadt 96:
97: loop:
98: c = s_getc();
99: switch (state) {
100: case 0:
101: switch (c) {
102: case ' ':
103: case '\t':
104: break;
105: case '\n':
106: case ';':
107: cx.x_token = T_EOL;
108: state = -1;
109: break;
110: case '#':
111: state = 1;
112: break;
113: case EOF:
114: cx.x_token = T_EOF;
115: state = -1;
116: break;
117: case 'a': case 'b': case 'c': case 'd': case 'e':
118: case 'f': case 'g': case 'h': case 'i': case 'j':
119: case 'k': case 'l': case 'm': case 'n': case 'o':
120: case 'p': case 'q': case 'r': case 's': case 't':
121: case 'u': case 'v': case 'w': case 'x': case 'y':
122: case 'z':
123: case 'A': case 'B': case 'C': case 'D': case 'E':
124: case 'F': case 'G': case 'H': case 'I': case 'J':
125: case 'K': case 'L': case 'M': case 'N': case 'O':
126: case 'P': case 'Q': case 'R': case 'S': case 'T':
127: case 'U': case 'V': case 'W': case 'X': case 'Y':
128: case 'Z':
129: case '_': case '.':
130: *p++ = c;
131: state = 2;
132: break;
133: case '"':
134: state = 3;
135: break;
136: case '\'':
137: state = 4;
138: break;
139: case '\\':
140: switch (c = s_gettok1()) {
141: case -1:
142: break;
143: case -2:
144: state = 0;
145: break;
146: default:
147: *p++ = c;
148: state = 2;
149: }
150: break;
151: case '0':
152: cx.x_val.v_num = 0;
153: state = 10;
154: break;
155: case '1': case '2': case '3': case '4':
156: case '5': case '6': case '7': case '8': case '9':
157: cx.x_val.v_num = c - '0';
158: state = 11;
159: break;
160: case '>':
161: state = 20;
162: break;
163: case '<':
164: state = 21;
165: break;
166: case '=':
167: state = 22;
168: break;
169: case '!':
170: state = 23;
171: break;
172: case '&':
173: state = 24;
174: break;
175: case '|':
176: state = 25;
177: break;
178: case '$':
179: state = 26;
180: break;
181: case '~':
182: cx.x_token = T_COMP;
183: state = -1;
184: break;
185: case '+':
186: cx.x_token = T_PLUS;
187: state = -1;
188: break;
189: case '-':
190: cx.x_token = T_MINUS;
191: state = -1;
192: break;
193: case '*':
194: cx.x_token = T_MUL;
195: state = -1;
196: break;
197: case '/':
198: cx.x_token = T_DIV;
199: state = -1;
200: break;
201: case '%':
202: cx.x_token = T_MOD;
203: state = -1;
204: break;
205: case '^':
206: cx.x_token = T_XOR;
207: state = -1;
208: break;
209: case '(':
210: cx.x_token = T_LP;
211: state = -1;
212: break;
213: case ')':
214: cx.x_token = T_RP;
215: state = -1;
216: break;
217: case ',':
218: cx.x_token = T_COMMA;
219: state = -1;
220: break;
221: case '?':
222: cx.x_token = T_QUEST;
223: state = -1;
224: break;
225: case ':':
226: cx.x_token = T_COLON;
227: state = -1;
228: break;
229: case '[':
230: cx.x_token = T_LB;
231: state = -1;
232: break;
233: case ']':
234: cx.x_token = T_RB;
235: state = -1;
236: break;
237: default:
238: cx.x_val.v_num = c;
239: cx.x_token = T_CHAR;
240: state = -1;
241: break;
242: }
243: break;
244: case 1: /* got # */
245: if (c == '\n' || c == EOF) {
246: (void) s_ungetc(c);
247: state = 0;
248: }
249: break;
250: case 2: /* unquoted string */
251: switch (c) {
252: case 'a': case 'b': case 'c': case 'd': case 'e':
253: case 'f': case 'g': case 'h': case 'i': case 'j':
254: case 'k': case 'l': case 'm': case 'n': case 'o':
255: case 'p': case 'q': case 'r': case 's': case 't':
256: case 'u': case 'v': case 'w': case 'x': case 'y':
257: case 'z':
258: case 'A': case 'B': case 'C': case 'D': case 'E':
259: case 'F': case 'G': case 'H': case 'I': case 'J':
260: case 'K': case 'L': case 'M': case 'N': case 'O':
261: case 'P': case 'Q': case 'R': case 'S': case 'T':
262: case 'U': case 'V': case 'W': case 'X': case 'Y':
263: case 'Z':
264: case '_': case '.':
265: case '0': case '1': case '2': case '3': case '4':
266: case '5': case '6': case '7': case '8': case '9':
267: if (p < buf + sizeof buf - 1)
268: *p++ = c;
269: break;
270: case '"':
271: state = 3;
272: break;
273: case '\'':
274: state = 4;
275: break;
276: case '\\':
277: switch (c = s_gettok1()) {
278: case -2:
279: (void) s_ungetc(' ');
280: case -1:
281: break;
282: default:
283: if (p < buf + sizeof buf - 1)
284: *p++ = c;
285: }
286: break;
287: default:
288: (void) s_ungetc(c);
289: case EOF:
290: *p = 0;
291: cx.x_token = T_STR;
292: switch (*buf) {
293: case 'i':
294: if (buf[1] == 'f' && buf[2] == 0)
295: cx.x_token = T_IF;
296: break;
297: case 't':
298: if (buf[1] == 'h' && buf[2] == 'e'
299: && buf[3] == 'n' && buf[4] == 0)
300: cx.x_token = T_THEN;
301: break;
302: case 'e':
303: if (buf[1] == 'n' && buf[2] == 'd'
304: && buf[3] == 'i' && buf[4] == 'f'
305: && buf[5] == 0)
306: cx.x_token = T_ENDIF;
307: else if (buf[1] == 'l' && buf[2] == 's')
308: if (buf[3] == 'i' && buf[4] == 'f'
309: && buf[5] == 0)
310: cx.x_token = T_ELSIF;
311: else if (buf[3] == 'e' && buf[4] == 0)
312: cx.x_token = T_ELSE;
313: break;
314: }
315: if (cx.x_token == T_STR
316: && (cx.x_val.v_str = str_cpy(buf)) == 0) {
317: p_memerror();
318: cx.x_token = T_EOF;
319: }
320: state = -1;
321: break;
322: }
323: break;
324: case 3: /* " quoted string */
325: switch (c) {
326: case '\n':
327: (void) s_ungetc(c);
328: case EOF:
329: case '"':
330: state = 2;
331: break;
332: case '\\':
333: switch (c = s_gettok1()) {
334: case -1:
335: case -2: /* newlines are invisible */
336: break;
337: default:
338: if (p < buf + sizeof buf - 1)
339: *p++ = c;
340: }
341: break;
342: default:
343: if (p < buf + sizeof buf - 1)
344: *p++ = c;
345: break;
346: }
347: break;
348: case 4: /* ' quoted string */
349: switch (c) {
350: case '\n':
351: (void) s_ungetc(c);
352: case EOF:
353: case '\'':
354: state = 2;
355: break;
356: case '\\':
357: switch (c = s_gettok1()) {
358: case -1:
359: case -2: /* newlines are invisible */
360: break;
361: default:
362: if (p < buf + sizeof buf - 1)
363: *p++ = c;
364: }
365: break;
366: default:
367: if (p < buf + sizeof buf - 1)
368: *p++ = c;
369: break;
370: }
371: break;
372: case 10: /* got 0 */
373: switch (c) {
374: case 'x':
375: case 'X':
376: cx.x_val.v_num = 0;
377: state = 12;
378: break;
379: case '0': case '1': case '2': case '3': case '4':
380: case '5': case '6': case '7':
381: cx.x_val.v_num = c - '0';
382: state = 13;
383: break;
384: case '8': case '9':
385: cx.x_val.v_num = c - '0';
386: state = 11;
387: break;
388: default:
389: (void) s_ungetc(c);
390: state = -1;
391: cx.x_token = T_NUM;
392: }
393: break;
394: case 11: /* decimal number */
395: switch (c) {
396: case '0': case '1': case '2': case '3': case '4':
397: case '5': case '6': case '7': case '8': case '9':
398: cx.x_val.v_num = cx.x_val.v_num * 10 + c - '0';
399: break;
400: default:
401: (void) s_ungetc(c);
402: state = -1;
403: cx.x_token = T_NUM;
404: }
405: break;
406: case 12: /* hex number */
407: switch (c) {
408: case '0': case '1': case '2': case '3': case '4':
409: case '5': case '6': case '7': case '8': case '9':
410: cx.x_val.v_num = cx.x_val.v_num * 16 + c - '0';
411: break;
412: case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
413: cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'a' + 10;
414: break;
415: case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
416: cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'A' + 10;
417: break;
418: default:
419: (void) s_ungetc(c);
420: state = -1;
421: cx.x_token = T_NUM;
422: }
423: break;
424: case 13: /* octal number */
425: switch (c) {
426: case '0': case '1': case '2': case '3': case '4':
427: case '5': case '6': case '7':
428: cx.x_val.v_num = cx.x_val.v_num * 8 + c - '0';
429: break;
430: default:
431: (void) s_ungetc(c);
432: state = -1;
433: cx.x_token = T_NUM;
434: }
435: break;
436: case 20: /* got > */
437: switch (c) {
438: case '=':
439: cx.x_token = T_GE;
440: state = -1;
441: break;
442: case '>':
443: cx.x_token = T_RS;
444: state = -1;
445: break;
446: default:
447: (void) s_ungetc(c);
448: cx.x_token = T_GT;
449: state = -1;
450: }
451: break;
452: case 21: /* got < */
453: switch (c) {
454: case '=':
455: cx.x_token = T_LE;
456: state = -1;
457: break;
458: case '<':
459: cx.x_token = T_LS;
460: state = -1;
461: break;
462: default:
463: (void) s_ungetc(c);
464: cx.x_token = T_LT;
465: state = -1;
466: }
467: break;
468: case 22: /* got = */
469: switch (c) {
470: case '=':
471: cx.x_token = T_EQ;
472: state = -1;
473: break;
474: default:
475: (void) s_ungetc(c);
476: cx.x_token = T_ASSIGN;
477: state = -1;
478: }
479: break;
480: case 23: /* got ! */
481: switch (c) {
482: case '=':
483: cx.x_token = T_NE;
484: state = -1;
485: break;
486: default:
487: (void) s_ungetc(c);
488: cx.x_token = T_NOT;
489: state = -1;
490: }
491: break;
492: case 24: /* got & */
493: switch (c) {
494: case '&':
495: cx.x_token = T_ANDAND;
496: state = -1;
497: break;
498: default:
499: (void) s_ungetc(c);
500: cx.x_token = T_AND;
501: state = -1;
502: }
503: break;
504: case 25: /* got | */
505: switch (c) {
506: case '|':
507: cx.x_token = T_OROR;
508: state = -1;
509: break;
510: default:
511: (void) s_ungetc(c);
512: cx.x_token = T_OR;
513: state = -1;
514: }
515: break;
516: case 26: /* got $ */
517: switch (c) {
518: case '?':
519: cx.x_token = T_DQ;
520: state = -1;
521: break;
522: default:
523: (void) s_ungetc(c);
524: cx.x_token = T_DOLLAR;
525: state = -1;
526: }
527: break;
528: default:
529: abort();
530: }
531: if (state >= 0)
532: goto loop;
533: return cx.x_token;
534: }
535:
536: s_gettok1()
537: {
1.4 mpech 538: int c;
539: int n;
1.1 deraadt 540:
541: c = s_getc(); /* got \ */
542: switch (c) {
543: case EOF:
544: return -1;
545: case '\n':
546: return -2;
547: case 'b':
548: return '\b';
549: case 'f':
550: return '\f';
551: case 'n':
552: return '\n';
553: case 'r':
554: return '\r';
555: case 't':
556: return '\t';
557: default:
558: return c;
559: case '0': case '1': case '2': case '3': case '4':
560: case '5': case '6': case '7':
561: break;
562: }
563: n = c - '0';
564: c = s_getc(); /* got \[0-7] */
565: if (c < '0' || c > '7') {
566: (void) s_ungetc(c);
567: return n;
568: }
569: n = n * 8 + c - '0';
570: c = s_getc(); /* got \[0-7][0-7] */
571: if (c < '0' || c > '7') {
572: (void) s_ungetc(c);
573: return n;
574: }
575: return n * 8 + c - '0';
576: }