Annotation of src/usr.bin/indent/indent.c, Revision 1.31
1.31 ! deraadt 1: /* $OpenBSD: indent.c,v 1.30 2015/11/11 01:12:09 deraadt Exp $ */
1.3 deraadt 2:
1.1 deraadt 3: /*
1.9 pjanzen 4: * Copyright (c) 1980, 1993
5: * The Regents of the University of California.
6: * Copyright (c) 1976 Board of Trustees of the University of Illinois.
1.1 deraadt 7: * Copyright (c) 1985 Sun Microsystems, Inc.
8: * All rights reserved.
9: *
10: * Redistribution and use in source and binary forms, with or without
11: * modification, are permitted provided that the following conditions
12: * are met:
13: * 1. Redistributions of source code must retain the above copyright
14: * notice, this list of conditions and the following disclaimer.
15: * 2. Redistributions in binary form must reproduce the above copyright
16: * notice, this list of conditions and the following disclaimer in the
17: * documentation and/or other materials provided with the distribution.
1.15 deraadt 18: * 3. Neither the name of the University nor the names of its contributors
1.1 deraadt 19: * may be used to endorse or promote products derived from this software
20: * without specific prior written permission.
21: *
22: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32: * SUCH DAMAGE.
33: */
34:
35: #include <fcntl.h>
36: #include <unistd.h>
1.26 deraadt 37: #include <limits.h>
1.1 deraadt 38: #include <stdio.h>
39: #include <stdlib.h>
40: #include <string.h>
41: #include "indent_globs.h"
42: #include "indent_codes.h"
43: #include <ctype.h>
1.2 deraadt 44: #include <errno.h>
1.4 mickey 45: #include <err.h>
1.1 deraadt 46:
47: char *in_name = "Standard Input"; /* will always point to name of input
48: * file */
49: char *out_name = "Standard Output"; /* will always point to name
50: * of output file */
1.26 deraadt 51: char bakfile[PATH_MAX] = "";
1.1 deraadt 52:
1.17 deraadt 53: void bakcopy(void);
1.4 mickey 54:
55: int
1.16 deraadt 56: main(int argc, char **argv)
1.1 deraadt 57: {
58:
59: extern int found_err; /* flag set in diag() on error */
60: int dec_ind; /* current indentation for declarations */
61: int di_stack[20]; /* a stack of structure indentation levels */
62: int flushed_nl; /* used when buffering up comments to remember
63: * that a newline was passed over */
64: int force_nl; /* when true, code must be broken */
65: int hd_type; /* used to store type of stmt for if (...),
66: * for (...), etc */
1.11 mpech 67: int i; /* local loop counter */
1.1 deraadt 68: int scase; /* set to true when we see a case, so we will
69: * know what to do with the following colon */
70: int sp_sw; /* when true, we are in the expressin of
71: * if(...), while(...), etc. */
72: int squest; /* when this is positive, we have seen a ?
73: * without the matching : in a <c>?<s>:<s>
74: * construct */
1.11 mpech 75: char *t_ptr; /* used for copying tokens */
1.21 jsg 76: int tabs_to_var; /* true if using tabs to indent to var name */
1.1 deraadt 77: int type_code; /* the type of token, returned by lexi */
78:
79: int last_else = 0; /* true iff last keyword was an else */
80:
1.29 deraadt 81: if (pledge("stdio rpath wpath cpath", NULL) == -1)
82: err(1, "pledge");
1.1 deraadt 83:
84: /*-----------------------------------------------*\
85: | INITIALIZATION |
86: \*-----------------------------------------------*/
87:
88:
1.9 pjanzen 89: hd_type = 0;
1.1 deraadt 90: ps.p_stack[0] = stmt; /* this is the parser's stack */
91: ps.last_nl = true; /* this is true if the last thing scanned was
92: * a newline */
93: ps.last_token = semicolon;
1.27 deraadt 94: combuf = malloc(bufsize);
95: labbuf = malloc(bufsize);
96: codebuf = malloc(bufsize);
97: tokenbuf = malloc(bufsize);
1.9 pjanzen 98: if (combuf == NULL || labbuf == NULL || codebuf == NULL ||
99: tokenbuf == NULL)
1.10 pjanzen 100: err(1, NULL);
1.1 deraadt 101: l_com = combuf + bufsize - 5;
102: l_lab = labbuf + bufsize - 5;
103: l_code = codebuf + bufsize - 5;
104: l_token = tokenbuf + bufsize - 5;
105: combuf[0] = codebuf[0] = labbuf[0] = ' '; /* set up code, label, and
106: * comment buffers */
107: combuf[1] = codebuf[1] = labbuf[1] = '\0';
108: ps.else_if = 1; /* Default else-if special processing to on */
109: s_lab = e_lab = labbuf + 1;
110: s_code = e_code = codebuf + 1;
111: s_com = e_com = combuf + 1;
112: s_token = e_token = tokenbuf + 1;
113:
1.27 deraadt 114: in_buffer = malloc(10);
1.9 pjanzen 115: if (in_buffer == NULL)
1.10 pjanzen 116: err(1, NULL);
1.1 deraadt 117: in_buffer_limit = in_buffer + 8;
118: buf_ptr = buf_end = in_buffer;
119: line_no = 1;
120: had_eof = ps.in_decl = ps.decl_on_line = break_comma = false;
121: sp_sw = force_nl = false;
122: ps.in_or_st = false;
123: ps.bl_line = true;
124: dec_ind = 0;
125: di_stack[ps.dec_nest = 0] = 0;
126: ps.want_blank = ps.in_stmt = ps.ind_stmt = false;
127:
128:
129: scase = ps.pcase = false;
130: squest = 0;
131: sc_end = 0;
132: bp_save = 0;
133: be_save = 0;
134:
135: output = 0;
136:
137:
138:
139: /*--------------------------------------------------*\
140: | COMMAND LINE SCAN |
141: \*--------------------------------------------------*/
142:
143: #ifdef undef
144: max_col = 78; /* -l78 */
145: lineup_to_parens = 1; /* -lp */
146: ps.ljust_decl = 0; /* -ndj */
147: ps.com_ind = 33; /* -c33 */
148: star_comment_cont = 1; /* -sc */
149: ps.ind_size = 8; /* -i8 */
150: verbose = 0;
151: ps.decl_indent = 16; /* -di16 */
152: ps.indent_parameters = 1; /* -ip */
153: ps.decl_com_ind = 0; /* if this is not set to some positive value
154: * by an arg, we will set this equal to
155: * ps.com_ind */
156: btype_2 = 1; /* -br */
157: cuddle_else = 1; /* -ce */
158: ps.unindent_displace = 0; /* -d0 */
159: ps.case_indent = 0; /* -cli0 */
160: format_col1_comments = 1; /* -fc1 */
161: procnames_start_line = 1; /* -psl */
162: proc_calls_space = 0; /* -npcs */
163: comment_delimiter_on_blankline = 1; /* -cdb */
164: ps.leave_comma = 1; /* -nbc */
165: #endif
166:
167: for (i = 1; i < argc; ++i)
168: if (strcmp(argv[i], "-npro") == 0)
169: break;
170: set_defaults();
171: if (i >= argc)
172: set_profile();
173:
174: for (i = 1; i < argc; ++i) {
175:
176: /*
177: * look thru args (if any) for changes to defaults
178: */
179: if (argv[i][0] != '-') {/* no flag on parameter */
180: if (input == 0) { /* we must have the input file */
181: in_name = argv[i]; /* remember name of input file */
182: input = fopen(in_name, "r");
1.4 mickey 183: if (input == NULL) /* check for open error */
1.7 millert 184: err(1, "%s", in_name);
1.1 deraadt 185: continue;
186: }
187: else if (output == 0) { /* we have the output file */
188: out_name = argv[i]; /* remember name of output file */
1.4 mickey 189: if (strcmp(in_name, out_name) == 0) /* attempt to overwrite
1.1 deraadt 190: * the file */
1.5 deraadt 191: errx(1, "input and output files must be different");
1.1 deraadt 192: output = fopen(out_name, "w");
1.4 mickey 193: if (output == NULL) /* check for create error */
1.7 millert 194: err(1, "%s", out_name);
1.1 deraadt 195: continue;
196: }
1.5 deraadt 197: errx(1, "unknown parameter: %s", argv[i]);
1.1 deraadt 198: }
199: else
200: set_option(argv[i]);
201: } /* end of for */
1.6 alex 202: if (input == NULL) {
1.22 jsg 203: input = stdin;
1.6 alex 204: }
1.9 pjanzen 205: if (output == NULL) {
1.22 jsg 206: if (troff || input == stdin)
1.1 deraadt 207: output = stdout;
208: else {
209: out_name = in_name;
210: bakcopy();
211: }
1.9 pjanzen 212: }
1.1 deraadt 213: if (ps.com_ind <= 1)
214: ps.com_ind = 2; /* dont put normal comments before column 2 */
215: if (troff) {
216: if (bodyf.font[0] == 0)
217: parsefont(&bodyf, "R");
218: if (scomf.font[0] == 0)
219: parsefont(&scomf, "I");
220: if (blkcomf.font[0] == 0)
221: blkcomf = scomf, blkcomf.size += 2;
222: if (boxcomf.font[0] == 0)
223: boxcomf = blkcomf;
224: if (stringf.font[0] == 0)
225: parsefont(&stringf, "L");
226: if (keywordf.font[0] == 0)
227: parsefont(&keywordf, "B");
228: writefdef(&bodyf, 'B');
229: writefdef(&scomf, 'C');
230: writefdef(&blkcomf, 'L');
231: writefdef(&boxcomf, 'X');
232: writefdef(&stringf, 'S');
233: writefdef(&keywordf, 'K');
234: }
235: if (block_comment_max_col <= 0)
236: block_comment_max_col = max_col;
237: if (ps.decl_com_ind <= 0) /* if not specified by user, set this */
238: ps.decl_com_ind = ps.ljust_decl ? (ps.com_ind <= 10 ? 2 : ps.com_ind - 8) : ps.com_ind;
239: if (continuation_indent == 0)
240: continuation_indent = ps.ind_size;
1.4 mickey 241: fill_buffer(); /* get first batch of stuff into input buffer */
1.1 deraadt 242:
243: parse(semicolon);
244: {
1.9 pjanzen 245: char *p = buf_ptr;
246: int col = 1;
1.1 deraadt 247:
248: while (1) {
249: if (*p == ' ')
250: col++;
251: else if (*p == '\t')
252: col = ((col - 1) & ~7) + 9;
253: else
254: break;
255: p++;
256: }
257: if (col > ps.ind_size)
258: ps.ind_level = ps.i_l_follow = col / ps.ind_size;
259: }
260: if (troff) {
1.11 mpech 261: char *p = in_name,
1.1 deraadt 262: *beg = in_name;
263:
264: while (*p)
265: if (*p++ == '/')
266: beg = p;
267: fprintf(output, ".Fn \"%s\"\n", beg);
268: }
269: /*
270: * START OF MAIN LOOP
271: */
272:
273: while (1) { /* this is the main loop. it will go until we
274: * reach eof */
275: int is_procname;
276:
277: type_code = lexi(); /* lexi reads one token. The actual
278: * characters read are stored in "token". lexi
279: * returns a code indicating the type of token */
280: is_procname = ps.procname[0];
281:
282: /*
283: * The following code moves everything following an if (), while (),
284: * else, etc. up to the start of the following stmt to a buffer. This
285: * allows proper handling of both kinds of brace placement.
286: */
287:
288: flushed_nl = false;
289: while (ps.search_brace) { /* if we scanned an if(), while(),
290: * etc., we might need to copy stuff
291: * into a buffer we must loop, copying
292: * stuff into save_com, until we find
293: * the start of the stmt which follows
294: * the if, or whatever */
295: switch (type_code) {
296: case newline:
297: ++line_no;
298: flushed_nl = true;
299: case form_feed:
300: break; /* form feeds and newlines found here will be
301: * ignored */
302:
303: case lbrace: /* this is a brace that starts the compound
304: * stmt */
305: if (sc_end == 0) { /* ignore buffering if a comment wasnt
306: * stored up */
307: ps.search_brace = false;
308: goto check_type;
309: }
310: if (btype_2) {
311: save_com[0] = '{'; /* we either want to put the brace
312: * right after the if */
313: goto sw_buffer; /* go to common code to get out of
314: * this loop */
315: }
316: case comment: /* we have a comment, so we must copy it into
317: * the buffer */
318: if (!flushed_nl || sc_end != 0) {
319: if (sc_end == 0) { /* if this is the first comment, we
320: * must set up the buffer */
321: save_com[0] = save_com[1] = ' ';
322: sc_end = &(save_com[2]);
323: }
324: else {
325: *sc_end++ = '\n'; /* add newline between
326: * comments */
327: *sc_end++ = ' ';
328: --line_no;
329: }
330: *sc_end++ = '/'; /* copy in start of comment */
331: *sc_end++ = '*';
332:
333: for (;;) { /* loop until we get to the end of the comment */
334: *sc_end = *buf_ptr++;
335: if (buf_ptr >= buf_end)
336: fill_buffer();
337:
338: if (*sc_end++ == '*' && *buf_ptr == '/')
339: break; /* we are at end of comment */
340:
341: if (sc_end >= &(save_com[sc_size])) { /* check for temp buffer
342: * overflow */
343: diag(1, "Internal buffer overflow - Move big comment from right after if, while, or whatever.");
344: fflush(output);
345: exit(1);
346: }
347: }
348: *sc_end++ = '/'; /* add ending slash */
349: if (++buf_ptr >= buf_end) /* get past / in buffer */
350: fill_buffer();
351: break;
352: }
353: default: /* it is the start of a normal statment */
354: if (flushed_nl) /* if we flushed a newline, make sure it is
355: * put back */
356: force_nl = true;
1.4 mickey 357: if ((type_code == sp_paren && *token == 'i'
358: && last_else && ps.else_if) ||
359: (type_code == sp_nparen && *token == 'e'
360: && e_code != s_code && e_code[-1] == '}'))
361: force_nl = false;
1.1 deraadt 362:
363: if (sc_end == 0) { /* ignore buffering if comment wasnt
364: * saved up */
365: ps.search_brace = false;
366: goto check_type;
367: }
368: if (force_nl) { /* if we should insert a nl here, put it into
369: * the buffer */
370: force_nl = false;
371: --line_no; /* this will be re-increased when the nl is
372: * read from the buffer */
373: *sc_end++ = '\n';
374: *sc_end++ = ' ';
375: if (verbose && !flushed_nl) /* print error msg if the line
376: * was not already broken */
377: diag(0, "Line broken");
378: flushed_nl = false;
379: }
380: for (t_ptr = token; *t_ptr; ++t_ptr)
381: *sc_end++ = *t_ptr; /* copy token into temp buffer */
382: ps.procname[0] = 0;
383:
384: sw_buffer:
385: ps.search_brace = false; /* stop looking for start of
386: * stmt */
387: bp_save = buf_ptr; /* save current input buffer */
388: be_save = buf_end;
389: buf_ptr = save_com; /* fix so that subsequent calls to
390: * lexi will take tokens out of
391: * save_com */
392: *sc_end++ = ' ';/* add trailing blank, just in case */
393: buf_end = sc_end;
394: sc_end = 0;
395: break;
396: } /* end of switch */
397: if (type_code != 0) /* we must make this check, just in case there
398: * was an unexpected EOF */
399: type_code = lexi(); /* read another token */
400: /* if (ps.search_brace) ps.procname[0] = 0; */
401: if ((is_procname = ps.procname[0]) && flushed_nl
402: && !procnames_start_line && ps.in_decl
403: && type_code == ident)
404: flushed_nl = 0;
405: } /* end of while (search_brace) */
406: last_else = 0;
407: check_type:
408: if (type_code == 0) { /* we got eof */
409: if (s_lab != e_lab || s_code != e_code
410: || s_com != e_com) /* must dump end of line */
411: dump_line();
412: if (ps.tos > 1) /* check for balanced braces */
1.12 vincent 413: diag(1, "Missing braces at end of file.");
1.1 deraadt 414:
415: if (verbose) {
416: printf("There were %d output lines and %d comments\n",
417: ps.out_lines, ps.out_coms);
418: printf("(Lines with comments)/(Lines with code): %6.3f\n",
419: (1.0 * ps.com_lines) / code_lines);
420: }
421: fflush(output);
422: exit(found_err);
423: }
424: if (
425: (type_code != comment) &&
426: (type_code != newline) &&
427: (type_code != preesc) &&
428: (type_code != form_feed)) {
429: if (force_nl &&
430: (type_code != semicolon) &&
431: (type_code != lbrace || !btype_2)) {
432: /* we should force a broken line here */
433: if (verbose && !flushed_nl)
434: diag(0, "Line broken");
435: flushed_nl = false;
436: dump_line();
437: ps.want_blank = false; /* dont insert blank at line start */
438: force_nl = false;
439: }
440: ps.in_stmt = true; /* turn on flag which causes an extra level of
441: * indentation. this is turned off by a ; or
442: * '}' */
443: if (s_com != e_com) { /* the turkey has embedded a comment
444: * in a line. fix it */
445: *e_code++ = ' ';
446: for (t_ptr = s_com; *t_ptr; ++t_ptr) {
447: CHECK_SIZE_CODE;
448: *e_code++ = *t_ptr;
449: }
450: *e_code++ = ' ';
451: *e_code = '\0'; /* null terminate code sect */
452: ps.want_blank = false;
453: e_com = s_com;
454: }
455: }
456: else if (type_code != comment) /* preserve force_nl thru a comment */
457: force_nl = false; /* cancel forced newline after newline, form
458: * feed, etc */
459:
460:
461:
462: /*-----------------------------------------------------*\
463: | do switch on type of token scanned |
464: \*-----------------------------------------------------*/
465: CHECK_SIZE_CODE;
466: switch (type_code) { /* now, decide what to do with the token */
467:
468: case form_feed: /* found a form feed in line */
469: ps.use_ff = true; /* a form feed is treated much like a newline */
470: dump_line();
471: ps.want_blank = false;
472: break;
473:
474: case newline:
475: if (ps.last_token != comma || ps.p_l_follow > 0
476: || !ps.leave_comma || ps.block_init || !break_comma || s_com != e_com) {
477: dump_line();
478: ps.want_blank = false;
479: }
480: ++line_no; /* keep track of input line number */
481: break;
482:
483: case lparen: /* got a '(' or '[' */
484: ++ps.p_l_follow; /* count parens to make Healy happy */
485: if (ps.want_blank && *token != '[' &&
486: (ps.last_token != ident || proc_calls_space
487: || (ps.its_a_keyword && (!ps.sizeof_keyword || Bill_Shannon))))
488: *e_code++ = ' ';
489: if (ps.in_decl && !ps.block_init)
490: if (troff && !ps.dumped_decl_indent && !is_procname && ps.last_token == decl) {
491: ps.dumped_decl_indent = 1;
1.14 krw 492: snprintf(e_code, (l_code - e_code) + 5,
493: "\n.Du %dp+\200p \"%s\"\n", dec_ind * 7, token);
1.1 deraadt 494: e_code += strlen(e_code);
1.14 krw 495: CHECK_SIZE_CODE;
1.1 deraadt 496: }
497: else {
498: while ((e_code - s_code) < dec_ind) {
499: CHECK_SIZE_CODE;
500: *e_code++ = ' ';
501: }
502: *e_code++ = token[0];
503: }
504: else
505: *e_code++ = token[0];
506: ps.paren_indents[ps.p_l_follow - 1] = e_code - s_code;
507: if (sp_sw && ps.p_l_follow == 1 && extra_expression_indent
508: && ps.paren_indents[0] < 2 * ps.ind_size)
509: ps.paren_indents[0] = 2 * ps.ind_size;
510: ps.want_blank = false;
511: if (ps.in_or_st && *token == '(' && ps.tos <= 2) {
512: /*
513: * this is a kluge to make sure that declarations will be
514: * aligned right if proc decl has an explicit type on it, i.e.
515: * "int a(x) {..."
516: */
517: parse(semicolon); /* I said this was a kluge... */
518: ps.in_or_st = false; /* turn off flag for structure decl or
519: * initialization */
520: }
521: if (ps.sizeof_keyword)
522: ps.sizeof_mask |= 1 << ps.p_l_follow;
523: break;
524:
525: case rparen: /* got a ')' or ']' */
526: rparen_count--;
527: if (ps.cast_mask & (1 << ps.p_l_follow) & ~ps.sizeof_mask) {
528: ps.last_u_d = true;
529: ps.cast_mask &= (1 << ps.p_l_follow) - 1;
530: }
531: ps.sizeof_mask &= (1 << ps.p_l_follow) - 1;
532: if (--ps.p_l_follow < 0) {
533: ps.p_l_follow = 0;
534: diag(0, "Extra %c", *token);
535: }
536: if (e_code == s_code) /* if the paren starts the line */
537: ps.paren_level = ps.p_l_follow; /* then indent it */
538:
539: *e_code++ = token[0];
540: ps.want_blank = true;
541:
542: if (sp_sw && (ps.p_l_follow == 0)) { /* check for end of if
543: * (...), or some such */
544: sp_sw = false;
545: force_nl = true;/* must force newline after if */
546: ps.last_u_d = true; /* inform lexi that a following
547: * operator is unary */
548: ps.in_stmt = false; /* dont use stmt continuation
549: * indentation */
550:
551: parse(hd_type); /* let parser worry about if, or whatever */
552: }
553: ps.search_brace = btype_2; /* this should insure that constructs
554: * such as main(){...} and int[]{...}
555: * have their braces put in the right
556: * place */
557: break;
558:
559: case unary_op: /* this could be any unary operation */
560: if (ps.want_blank)
561: *e_code++ = ' ';
562:
563: if (troff && !ps.dumped_decl_indent && ps.in_decl && !is_procname) {
1.14 krw 564: snprintf(e_code, (l_code - e_code) + 5,
565: "\n.Du %dp+\200p \"%s\"\n", dec_ind * 7, token);
1.1 deraadt 566: ps.dumped_decl_indent = 1;
567: e_code += strlen(e_code);
1.14 krw 568: CHECK_SIZE_CODE;
1.1 deraadt 569: }
570: else {
571: char *res = token;
572:
573: if (ps.in_decl && !ps.block_init) { /* if this is a unary op
574: * in a declaration, we
575: * should indent this
576: * token */
577: for (i = 0; token[i]; ++i); /* find length of token */
578: while ((e_code - s_code) < (dec_ind - i)) {
579: CHECK_SIZE_CODE;
580: *e_code++ = ' '; /* pad it */
581: }
582: }
583: if (troff && token[0] == '-' && token[1] == '>')
584: res = "\\(->";
585: for (t_ptr = res; *t_ptr; ++t_ptr) {
586: CHECK_SIZE_CODE;
587: *e_code++ = *t_ptr;
588: }
589: }
590: ps.want_blank = false;
591: break;
592:
593: case binary_op: /* any binary operation */
594: if (ps.want_blank)
595: *e_code++ = ' ';
596: {
597: char *res = token;
598:
599: if (troff)
600: switch (token[0]) {
601: case '<':
602: if (token[1] == '=')
603: res = "\\(<=";
604: break;
605: case '>':
606: if (token[1] == '=')
607: res = "\\(>=";
608: break;
609: case '!':
610: if (token[1] == '=')
611: res = "\\(!=";
612: break;
613: case '|':
614: if (token[1] == '|')
615: res = "\\(br\\(br";
616: else if (token[1] == 0)
617: res = "\\(br";
618: break;
619: }
620: for (t_ptr = res; *t_ptr; ++t_ptr) {
621: CHECK_SIZE_CODE;
622: *e_code++ = *t_ptr; /* move the operator */
623: }
624: }
625: ps.want_blank = true;
626: break;
627:
628: case postop: /* got a trailing ++ or -- */
629: *e_code++ = token[0];
630: *e_code++ = token[1];
631: ps.want_blank = true;
632: break;
633:
634: case question: /* got a ? */
635: squest++; /* this will be used when a later colon
636: * appears so we can distinguish the
637: * <c>?<n>:<n> construct */
638: if (ps.want_blank)
639: *e_code++ = ' ';
640: *e_code++ = '?';
641: ps.want_blank = true;
642: break;
643:
644: case casestmt: /* got word 'case' or 'default' */
645: scase = true; /* so we can process the later colon properly */
646: goto copy_id;
647:
648: case colon: /* got a ':' */
649: if (squest > 0) { /* it is part of the <c>?<n>: <n> construct */
650: --squest;
651: if (ps.want_blank)
652: *e_code++ = ' ';
653: *e_code++ = ':';
654: ps.want_blank = true;
655: break;
656: }
657: if (ps.in_decl) {
658: *e_code++ = ':';
659: ps.want_blank = false;
660: break;
661: }
662: ps.in_stmt = false; /* seeing a label does not imply we are in a
663: * stmt */
664: for (t_ptr = s_code; *t_ptr; ++t_ptr)
665: *e_lab++ = *t_ptr; /* turn everything so far into a label */
666: e_code = s_code;
667: *e_lab++ = ':';
668: *e_lab++ = ' ';
669: *e_lab = '\0';
670:
671: force_nl = ps.pcase = scase; /* ps.pcase will be used by
672: * dump_line to decide how to
673: * indent the label. force_nl
674: * will force a case n: to be
675: * on a line by itself */
676: scase = false;
677: ps.want_blank = false;
678: break;
679:
680: case semicolon: /* got a ';' */
681: ps.in_or_st = false;/* we are not in an initialization or
682: * structure declaration */
683: scase = false; /* these will only need resetting in a error */
684: squest = 0;
685: if (ps.last_token == rparen && rparen_count == 0)
686: ps.in_parameter_declaration = 0;
687: ps.cast_mask = 0;
688: ps.sizeof_mask = 0;
689: ps.block_init = 0;
690: ps.block_init_level = 0;
691: ps.just_saw_decl--;
692:
693: if (ps.in_decl && s_code == e_code && !ps.block_init)
694: while ((e_code - s_code) < (dec_ind - 1)) {
695: CHECK_SIZE_CODE;
696: *e_code++ = ' ';
697: }
698:
699: ps.in_decl = (ps.dec_nest > 0); /* if we were in a first level
700: * structure declaration, we
701: * arent any more */
702:
703: if ((!sp_sw || hd_type != forstmt) && ps.p_l_follow > 0) {
704:
705: /*
706: * This should be true iff there were unbalanced parens in the
707: * stmt. It is a bit complicated, because the semicolon might
708: * be in a for stmt
709: */
710: diag(1, "Unbalanced parens");
711: ps.p_l_follow = 0;
712: if (sp_sw) { /* this is a check for a if, while, etc. with
713: * unbalanced parens */
714: sp_sw = false;
715: parse(hd_type); /* dont lose the if, or whatever */
716: }
717: }
718: *e_code++ = ';';
719: ps.want_blank = true;
720: ps.in_stmt = (ps.p_l_follow > 0); /* we are no longer in the
721: * middle of a stmt */
722:
723: if (!sp_sw) { /* if not if for (;;) */
724: parse(semicolon); /* let parser know about end of stmt */
725: force_nl = true;/* force newline after a end of stmt */
726: }
727: break;
728:
729: case lbrace: /* got a '{' */
730: ps.in_stmt = false; /* dont indent the {} */
731: if (!ps.block_init)
732: force_nl = true;/* force other stuff on same line as '{' onto
733: * new line */
734: else if (ps.block_init_level <= 0)
735: ps.block_init_level = 1;
736: else
737: ps.block_init_level++;
738:
739: if (s_code != e_code && !ps.block_init) {
740: if (!btype_2) {
741: dump_line();
742: ps.want_blank = false;
743: }
744: else if (ps.in_parameter_declaration && !ps.in_or_st) {
745: ps.i_l_follow = 0;
746: dump_line();
747: ps.want_blank = false;
748: }
749: }
750: if (ps.in_parameter_declaration)
751: prefix_blankline_requested = 0;
752:
1.18 jsg 753: if (ps.p_l_follow > 0) { /* check for preceding unbalanced
1.1 deraadt 754: * parens */
755: diag(1, "Unbalanced parens");
756: ps.p_l_follow = 0;
757: if (sp_sw) { /* check for unclosed if, for, etc. */
758: sp_sw = false;
759: parse(hd_type);
760: ps.ind_level = ps.i_l_follow;
761: }
762: }
763: if (s_code == e_code)
764: ps.ind_stmt = false; /* dont put extra indentation on line
765: * with '{' */
766: if (ps.in_decl && ps.in_or_st) { /* this is either a structure
767: * declaration or an init */
768: di_stack[ps.dec_nest++] = dec_ind;
769: /* ? dec_ind = 0; */
770: }
771: else {
1.9 pjanzen 772: ps.decl_on_line = false;
773: /* we can't be in the middle of a declaration, so don't do
774: * special indentation of comments */
1.1 deraadt 775: if (blanklines_after_declarations_at_proctop
776: && ps.in_parameter_declaration)
777: postfix_blankline_requested = 1;
778: ps.in_parameter_declaration = 0;
779: }
780: dec_ind = 0;
781: parse(lbrace); /* let parser know about this */
782: if (ps.want_blank) /* put a blank before '{' if '{' is not at
783: * start of line */
784: *e_code++ = ' ';
785: ps.want_blank = false;
786: *e_code++ = '{';
787: ps.just_saw_decl = 0;
788: break;
789:
790: case rbrace: /* got a '}' */
791: if (ps.p_stack[ps.tos] == decl && !ps.block_init) /* semicolons can be
792: * omitted in
793: * declarations */
794: parse(semicolon);
795: if (ps.p_l_follow) {/* check for unclosed if, for, else. */
796: diag(1, "Unbalanced parens");
797: ps.p_l_follow = 0;
798: sp_sw = false;
799: }
800: ps.just_saw_decl = 0;
801: ps.block_init_level--;
802: if (s_code != e_code && !ps.block_init) { /* '}' must be first on
803: * line */
804: if (verbose)
805: diag(0, "Line broken");
806: dump_line();
807: }
808: *e_code++ = '}';
809: ps.want_blank = true;
810: ps.in_stmt = ps.ind_stmt = false;
811: if (ps.dec_nest > 0) { /* we are in multi-level structure
812: * declaration */
813: dec_ind = di_stack[--ps.dec_nest];
814: if (ps.dec_nest == 0 && !ps.in_parameter_declaration)
815: ps.just_saw_decl = 2;
816: ps.in_decl = true;
817: }
818: prefix_blankline_requested = 0;
819: parse(rbrace); /* let parser know about this */
820: ps.search_brace = cuddle_else && ps.p_stack[ps.tos] == ifhead
821: && ps.il[ps.tos] >= ps.ind_level;
822: if (ps.tos <= 1 && blanklines_after_procs && ps.dec_nest <= 0)
823: postfix_blankline_requested = 1;
824: break;
825:
826: case swstmt: /* got keyword "switch" */
827: sp_sw = true;
828: hd_type = swstmt; /* keep this for when we have seen the
829: * expression */
830: goto copy_id; /* go move the token into buffer */
831:
832: case sp_paren: /* token is if, while, for */
833: sp_sw = true; /* the interesting stuff is done after the
834: * expression is scanned */
835: hd_type = (*token == 'i' ? ifstmt :
836: (*token == 'w' ? whilestmt : forstmt));
837:
838: /*
839: * remember the type of header for later use by parser
840: */
841: goto copy_id; /* copy the token into line */
842:
843: case sp_nparen: /* got else, do */
844: ps.in_stmt = false;
845: if (*token == 'e') {
846: if (e_code != s_code && (!cuddle_else || e_code[-1] != '}')) {
847: if (verbose)
848: diag(0, "Line broken");
849: dump_line();/* make sure this starts a line */
850: ps.want_blank = false;
851: }
852: force_nl = true;/* also, following stuff must go onto new line */
853: last_else = 1;
854: parse(elselit);
855: }
856: else {
857: if (e_code != s_code) { /* make sure this starts a line */
858: if (verbose)
859: diag(0, "Line broken");
860: dump_line();
861: ps.want_blank = false;
862: }
863: force_nl = true;/* also, following stuff must go onto new line */
864: last_else = 0;
865: parse(dolit);
866: }
867: goto copy_id; /* move the token into line */
868:
869: case decl: /* we have a declaration type (int, register,
870: * etc.) */
871: parse(decl); /* let parser worry about indentation */
872: if (ps.last_token == rparen && ps.tos <= 1) {
873: ps.in_parameter_declaration = 1;
874: if (s_code != e_code) {
875: dump_line();
876: ps.want_blank = 0;
877: }
878: }
879: if (ps.in_parameter_declaration && ps.indent_parameters && ps.dec_nest == 0) {
880: ps.ind_level = ps.i_l_follow = 1;
881: ps.ind_stmt = 0;
882: }
883: ps.in_or_st = true; /* this might be a structure or initialization
884: * declaration */
885: ps.in_decl = ps.decl_on_line = true;
886: if ( /* !ps.in_or_st && */ ps.dec_nest <= 0)
887: ps.just_saw_decl = 2;
888: prefix_blankline_requested = 0;
889: for (i = 0; token[i++];); /* get length of token */
890:
891: /*
892: * dec_ind = e_code - s_code + (ps.decl_indent>i ? ps.decl_indent
893: * : i);
894: */
895: dec_ind = ps.decl_indent > 0 ? ps.decl_indent : i;
1.21 jsg 896: tabs_to_var = (use_tabs ? ps.decl_indent > 0 : 0);
1.1 deraadt 897: goto copy_id;
898:
899: case ident: /* got an identifier or constant */
900: if (ps.in_decl) { /* if we are in a declaration, we must indent
901: * identifier */
902: if (ps.want_blank)
903: *e_code++ = ' ';
904: ps.want_blank = false;
905: if (is_procname == 0 || !procnames_start_line) {
1.9 pjanzen 906: if (!ps.block_init) {
1.1 deraadt 907: if (troff && !ps.dumped_decl_indent) {
1.14 krw 908: snprintf(e_code, (l_code - e_code) + 5,
909: "\n.De %dp+\200p\n", dec_ind * 7);
1.1 deraadt 910: ps.dumped_decl_indent = 1;
911: e_code += strlen(e_code);
1.14 krw 912: CHECK_SIZE_CODE;
1.21 jsg 913: } else {
914: int cur_dec_ind;
915: int pos, startpos;
916:
917: /*
918: * in order to get the tab math right for
919: * indentations that are not multiples of 8 we
920: * need to modify both startpos and dec_ind
921: * (cur_dec_ind) here by eight minus the
922: * remainder of the current starting column
923: * divided by eight. This seems to be a
924: * properly working fix
925: */
926: startpos = e_code - s_code;
927: cur_dec_ind = dec_ind;
928: pos = startpos;
929: if ((ps.ind_level * ps.ind_size) % 8 != 0) {
930: pos += (ps.ind_level * ps.ind_size) % 8;
931: cur_dec_ind += (ps.ind_level * ps.ind_size) % 8;
932: }
933:
934: if (tabs_to_var) {
935: while ((pos & ~7) + 8 <= cur_dec_ind) {
936: CHECK_SIZE_CODE;
937: *e_code++ = '\t';
938: pos = (pos & ~7) + 8;
939: }
940: }
941: while (pos < cur_dec_ind) {
1.1 deraadt 942: CHECK_SIZE_CODE;
943: *e_code++ = ' ';
1.21 jsg 944: pos++;
1.1 deraadt 945: }
1.21 jsg 946: if (ps.want_blank && e_code - s_code == startpos)
947: *e_code++ = ' ';
948: ps.want_blank = false;
949: }
1.9 pjanzen 950: }
1.1 deraadt 951: }
952: else {
953: if (dec_ind && s_code != e_code)
954: dump_line();
955: dec_ind = 0;
956: ps.want_blank = false;
957: }
958: }
959: else if (sp_sw && ps.p_l_follow == 0) {
960: sp_sw = false;
961: force_nl = true;
962: ps.last_u_d = true;
963: ps.in_stmt = false;
964: parse(hd_type);
965: }
966: copy_id:
967: if (ps.want_blank)
968: *e_code++ = ' ';
969: if (troff && ps.its_a_keyword) {
970: e_code = chfont(&bodyf, &keywordf, e_code);
971: for (t_ptr = token; *t_ptr; ++t_ptr) {
972: CHECK_SIZE_CODE;
1.23 deraadt 973: *e_code++ = keywordf.allcaps &&
974: islower((unsigned char)*t_ptr) ?
975: toupper((unsigned char)*t_ptr) : *t_ptr;
1.1 deraadt 976: }
977: e_code = chfont(&keywordf, &bodyf, e_code);
978: }
979: else
980: for (t_ptr = token; *t_ptr; ++t_ptr) {
981: CHECK_SIZE_CODE;
982: *e_code++ = *t_ptr;
983: }
984: ps.want_blank = true;
985: break;
986:
987: case period: /* treat a period kind of like a binary
988: * operation */
989: *e_code++ = '.'; /* move the period into line */
990: ps.want_blank = false; /* dont put a blank after a period */
991: break;
992:
993: case comma:
994: ps.want_blank = (s_code != e_code); /* only put blank after comma
995: * if comma does not start the
996: * line */
997: if (ps.in_decl && is_procname == 0 && !ps.block_init)
998: while ((e_code - s_code) < (dec_ind - 1)) {
999: CHECK_SIZE_CODE;
1000: *e_code++ = ' ';
1001: }
1002:
1003: *e_code++ = ',';
1004: if (ps.p_l_follow == 0) {
1005: if (ps.block_init_level <= 0)
1006: ps.block_init = 0;
1007: if (break_comma && (!ps.leave_comma || compute_code_target() + (e_code - s_code) > max_col - 8))
1008: force_nl = true;
1009: }
1010: break;
1011:
1012: case preesc: /* got the character '#' */
1013: if ((s_com != e_com) ||
1014: (s_lab != e_lab) ||
1015: (s_code != e_code))
1016: dump_line();
1017: *e_lab++ = '#'; /* move whole line to 'label' buffer */
1018: {
1019: int in_comment = 0;
1020: int com_start = 0;
1021: char quote = 0;
1022: int com_end = 0;
1023:
1024: while (*buf_ptr == ' ' || *buf_ptr == '\t') {
1025: buf_ptr++;
1026: if (buf_ptr >= buf_end)
1027: fill_buffer();
1028: }
1.8 espie 1029: while (*buf_ptr != '\n' || (in_comment && !had_eof)) {
1.1 deraadt 1030: CHECK_SIZE_LAB;
1031: *e_lab = *buf_ptr++;
1032: if (buf_ptr >= buf_end)
1033: fill_buffer();
1034: switch (*e_lab++) {
1035: case BACKSLASH:
1036: if (troff)
1037: *e_lab++ = BACKSLASH;
1038: if (!in_comment) {
1039: *e_lab++ = *buf_ptr++;
1040: if (buf_ptr >= buf_end)
1041: fill_buffer();
1042: }
1043: break;
1044: case '/':
1045: if (*buf_ptr == '*' && !in_comment && !quote) {
1046: in_comment = 1;
1047: *e_lab++ = *buf_ptr++;
1048: com_start = e_lab - s_lab - 2;
1049: }
1050: break;
1051: case '"':
1052: if (quote == '"')
1053: quote = 0;
1054: break;
1055: case '\'':
1056: if (quote == '\'')
1057: quote = 0;
1058: break;
1059: case '*':
1060: if (*buf_ptr == '/' && in_comment) {
1061: in_comment = 0;
1062: *e_lab++ = *buf_ptr++;
1063: com_end = e_lab - s_lab;
1064: }
1065: break;
1066: }
1067: }
1068:
1069: while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t'))
1070: e_lab--;
1071: if (e_lab - s_lab == com_end && bp_save == 0) { /* comment on
1072: * preprocessor line */
1073: if (sc_end == 0) /* if this is the first comment, we
1074: * must set up the buffer */
1075: sc_end = &(save_com[0]);
1076: else {
1077: *sc_end++ = '\n'; /* add newline between
1078: * comments */
1079: *sc_end++ = ' ';
1080: --line_no;
1081: }
1082: bcopy(s_lab + com_start, sc_end, com_end - com_start);
1083: sc_end += com_end - com_start;
1084: if (sc_end >= &save_com[sc_size])
1085: abort();
1086: e_lab = s_lab + com_start;
1087: while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t'))
1088: e_lab--;
1089: bp_save = buf_ptr; /* save current input buffer */
1090: be_save = buf_end;
1091: buf_ptr = save_com; /* fix so that subsequent calls to
1092: * lexi will take tokens out of
1093: * save_com */
1094: *sc_end++ = ' '; /* add trailing blank, just in case */
1095: buf_end = sc_end;
1096: sc_end = 0;
1097: }
1098: *e_lab = '\0'; /* null terminate line */
1099: ps.pcase = false;
1100: }
1101:
1102: if (strncmp(s_lab, "#if", 3) == 0) {
1103: if (blanklines_around_conditional_compilation) {
1.9 pjanzen 1104: int c;
1.1 deraadt 1105: prefix_blankline_requested++;
1106: while ((c = getc(input)) == '\n');
1107: ungetc(c, input);
1108: }
1109: if (ifdef_level < sizeof state_stack / sizeof state_stack[0]) {
1110: match_state[ifdef_level].tos = -1;
1111: state_stack[ifdef_level++] = ps;
1112: }
1113: else
1114: diag(1, "#if stack overflow");
1115: }
1116: else if (strncmp(s_lab, "#else", 5) == 0)
1117: if (ifdef_level <= 0)
1118: diag(1, "Unmatched #else");
1119: else {
1120: match_state[ifdef_level - 1] = ps;
1121: ps = state_stack[ifdef_level - 1];
1122: }
1123: else if (strncmp(s_lab, "#endif", 6) == 0) {
1124: if (ifdef_level <= 0)
1125: diag(1, "Unmatched #endif");
1126: else {
1127: ifdef_level--;
1128:
1129: #ifdef undef
1130: /*
1131: * This match needs to be more intelligent before the
1132: * message is useful
1133: */
1134: if (match_state[ifdef_level].tos >= 0
1135: && bcmp(&ps, &match_state[ifdef_level], sizeof ps))
1.13 deraadt 1136: diag(0, "Syntactically inconsistent #ifdef alternatives.");
1.1 deraadt 1137: #endif
1138: }
1139: if (blanklines_around_conditional_compilation) {
1140: postfix_blankline_requested++;
1141: n_real_blanklines = 0;
1142: }
1143: }
1144: break; /* subsequent processing of the newline
1145: * character will cause the line to be printed */
1146:
1.4 mickey 1147: case comment: /* we have gotten a comment this is a biggie */
1.1 deraadt 1148: if (flushed_nl) { /* we should force a broken line here */
1149: flushed_nl = false;
1150: dump_line();
1151: ps.want_blank = false; /* dont insert blank at line start */
1152: force_nl = false;
1153: }
1154: pr_comment();
1155: break;
1156: } /* end of big switch stmt */
1157:
1158: *e_code = '\0'; /* make sure code section is null terminated */
1159: if (type_code != comment && type_code != newline && type_code != preesc)
1160: ps.last_token = type_code;
1161: } /* end of main while (1) loop */
1162: }
1163:
1164: /*
1165: * copy input file to backup file if in_name is /blah/blah/blah/file, then
1166: * backup file will be ".Bfile" then make the backup file the input and
1167: * original input file the output
1168: */
1.4 mickey 1169: void
1.16 deraadt 1170: bakcopy(void)
1.1 deraadt 1171: {
1172: int n,
1173: bakchn;
1174: char buff[8 * 1024];
1.9 pjanzen 1175: char *p;
1.1 deraadt 1176:
1177: /* construct file name .Bfile */
1178: for (p = in_name; *p; p++); /* skip to end of string */
1179: while (p > in_name && *p != '/') /* find last '/' */
1180: p--;
1181: if (*p == '/')
1182: p++;
1.26 deraadt 1183: if (snprintf(bakfile, PATH_MAX, "%s.BAK", p) >= PATH_MAX)
1.24 guenther 1184: errc(1, ENAMETOOLONG, "%s.BAK", p);
1.1 deraadt 1185:
1186: /* copy in_name to backup file */
1.30 deraadt 1187: bakchn = open(bakfile, O_CREAT | O_TRUNC | O_WRONLY, 0600);
1.31 ! deraadt 1188: if (bakchn == -1)
1.7 millert 1189: err(1, "%s", bakfile);
1.4 mickey 1190: while ((n = read(fileno(input), buff, sizeof buff)) > 0)
1.1 deraadt 1191: if (write(bakchn, buff, n) != n)
1.7 millert 1192: err(1, "%s", bakfile);
1.31 ! deraadt 1193: if (n == -1)
1.7 millert 1194: err(1, "%s", in_name);
1.1 deraadt 1195: close(bakchn);
1196: fclose(input);
1197:
1198: /* re-open backup file as the input file */
1199: input = fopen(bakfile, "r");
1.4 mickey 1200: if (input == NULL)
1.7 millert 1201: err(1, "%s", bakfile);
1.1 deraadt 1202: /* now the original input file will be the output */
1203: output = fopen(in_name, "w");
1.4 mickey 1204: if (output == NULL) {
1.25 guenther 1205: int saved_errno = errno;
1.1 deraadt 1206: unlink(bakfile);
1.25 guenther 1207: errc(1, saved_errno, "%s", in_name);
1.1 deraadt 1208: }
1209: }