version 1.33, 2015/10/09 01:37:07 |
version 1.34, 2015/12/15 16:26:17 |
|
|
#include <string.h> |
#include <string.h> |
#include <sysexits.h> |
#include <sysexits.h> |
#include <unistd.h> |
#include <unistd.h> |
|
#include <wchar.h> |
|
#include <wctype.h> |
|
|
/* Something that, we hope, will never be a genuine line length, |
/* Something that, we hope, will never be a genuine line length, |
* indentation etc. |
* indentation etc. |
|
|
static int format_troff = 0; /* Format troff? */ |
static int format_troff = 0; /* Format troff? */ |
|
|
static int n_errors = 0; /* Number of failed files. */ |
static int n_errors = 0; /* Number of failed files. */ |
static char *output_buffer = NULL; /* Output line will be built here */ |
|
static size_t x; /* Horizontal position in output line */ |
static size_t x; /* Horizontal position in output line */ |
static size_t x0; /* Ditto, ignoring leading whitespace */ |
static size_t x0; /* Ditto, ignoring leading whitespace */ |
static size_t pending_spaces; /* Spaces to add before next word */ |
static size_t pending_spaces; /* Spaces to add before next word */ |
|
|
|
|
static void process_named_file(const char *); |
static void process_named_file(const char *); |
static void process_stream(FILE *, const char *); |
static void process_stream(FILE *, const char *); |
static size_t indent_length(const char *, size_t); |
static size_t indent_length(const char *); |
static int might_be_header(const char *); |
static int might_be_header(const char *); |
static void new_paragraph(size_t, size_t); |
static void new_paragraph(size_t); |
static void output_word(size_t, size_t, const char *, size_t, size_t); |
static void output_word(size_t, size_t, const char *, int, int, int); |
static void output_indent(size_t); |
static void output_indent(size_t); |
static void center_stream(FILE *, const char *); |
static void center_stream(FILE *, const char *); |
static char *get_line(FILE *, size_t *); |
static char *get_line(FILE *); |
static void *xrealloc(void *, size_t); |
static void *xrealloc(void *, size_t); |
void usage(void); |
void usage(void); |
|
|
#define XMALLOC(x) xrealloc(0, x) |
|
#define ERRS(x) (x >= 127 ? 127 : ++x) |
#define ERRS(x) (x >= 127 ? 127 : ++x) |
|
|
/* Here is perhaps the right place to mention that this code is |
/* Here is perhaps the right place to mention that this code is |
|
|
goal_length = 65; |
goal_length = 65; |
if (max_length == 0) |
if (max_length == 0) |
max_length = goal_length+10; |
max_length = goal_length+10; |
output_buffer = XMALLOC(max_length+1); /* really needn't be longer */ |
|
|
|
/* 2. Process files. */ |
/* 2. Process files. */ |
|
|
|
|
static void |
static void |
process_stream(FILE *stream, const char *name) |
process_stream(FILE *stream, const char *name) |
{ |
{ |
size_t n; |
const char *wordp, *cp; |
|
wchar_t wc; |
size_t np; |
size_t np; |
size_t last_indent = SILLY; /* how many spaces in last indent? */ |
size_t last_indent = SILLY; /* how many spaces in last indent? */ |
size_t para_line_number = 0; /* how many lines already read in this para? */ |
size_t para_line_number = 0; /* how many lines already read in this para? */ |
size_t first_indent = SILLY; /* indentation of line 0 of paragraph */ |
size_t first_indent = SILLY; /* indentation of line 0 of paragraph */ |
|
int wcl; /* number of bytes in wide character */ |
|
int wcw; /* display width of wide character */ |
|
int word_length; /* number of bytes in word */ |
|
int word_width; /* display width of word */ |
|
int space_width; /* display width of space after word */ |
|
int line_width; /* display width of line */ |
HdrType prev_header_type = hdr_ParagraphStart; |
HdrType prev_header_type = hdr_ParagraphStart; |
HdrType header_type; |
HdrType header_type; |
|
|
/* ^-- header_type of previous line; -1 at para start */ |
/* ^-- header_type of previous line; -1 at para start */ |
const char *line; |
const char *line; |
size_t length; |
|
|
|
if (centerP) { |
if (centerP) { |
center_stream(stream, name); |
center_stream(stream, name); |
return; |
return; |
} |
} |
|
|
while ((line = get_line(stream, &length)) != NULL) { |
while ((line = get_line(stream)) != NULL) { |
np = indent_length(line, length); |
np = indent_length(line); |
header_type = hdr_NonHeader; |
header_type = hdr_NonHeader; |
if (grok_mail_headers && prev_header_type != hdr_NonHeader) { |
if (grok_mail_headers && prev_header_type != hdr_NonHeader) { |
if (np == 0 && might_be_header(line)) |
if (np == 0 && might_be_header(line)) |
|
|
* AND the line isn't a mail header continuation line |
* AND the line isn't a mail header continuation line |
* AND this isn't the second line of an indented paragraph. |
* AND this isn't the second line of an indented paragraph. |
*/ |
*/ |
if (length == 0 || (line[0] == '.' && !format_troff) || |
if (*line == '\0' || (*line == '.' && !format_troff) || |
header_type == hdr_Header || |
header_type == hdr_Header || |
(header_type == hdr_NonHeader && prev_header_type > hdr_NonHeader) || |
(header_type == hdr_NonHeader && prev_header_type > hdr_NonHeader) || |
(np != last_indent && header_type != hdr_Continuation && |
(np != last_indent && header_type != hdr_Continuation && |
(!allow_indented_paragraphs || para_line_number != 1)) ) { |
(!allow_indented_paragraphs || para_line_number != 1)) ) { |
new_paragraph(output_in_paragraph ? last_indent : first_indent, np); |
new_paragraph(np); |
para_line_number = 0; |
para_line_number = 0; |
first_indent = np; |
first_indent = np; |
last_indent = np; |
last_indent = np; |
|
|
/* nroff compatibility */ |
/* nroff compatibility */ |
if (length > 0 && line[0] == '.' && !format_troff) { |
if (*line == '.' && !format_troff) { |
printf("%.*s\n", (int)length, line); |
puts(line); |
continue; |
continue; |
} |
} |
if (header_type == hdr_Header) |
if (header_type == hdr_Header) |
last_indent = 2; /* for cont. lines */ |
last_indent = 2; /* for cont. lines */ |
if (length == 0) { |
if (*line == '\0') { |
putchar('\n'); |
putchar('\n'); |
prev_header_type = hdr_ParagraphStart; |
prev_header_type = hdr_ParagraphStart; |
continue; |
continue; |
|
|
prev_header_type = header_type; |
prev_header_type = header_type; |
} |
} |
|
|
n = np; |
line_width = np; |
while (n < length) { |
for (wordp = line; *wordp != '\0'; wordp = cp) { |
/* Find word end and count spaces after it */ |
word_length = 0; |
size_t word_length = 0, space_length = 0; |
word_width = space_width = 0; |
while (n+word_length < length && line[n+word_length] != ' ') |
for (cp = wordp; *cp != '\0'; cp += wcl) { |
++word_length; |
wcl = mbtowc(&wc, cp, MB_CUR_MAX); |
space_length = word_length; |
if (wcl == -1) { |
while (n+space_length < length && line[n+space_length] == ' ') |
(void)mbtowc(NULL, NULL, MB_CUR_MAX); |
++space_length; |
wc = L'?'; |
|
wcl = 1; |
|
wcw = 1; |
|
} else if (wc == L'\t') |
|
wcw = (line_width / tab_width + 1) * |
|
tab_width - line_width; |
|
else if ((wcw = wcwidth(wc)) == -1) |
|
wcw = 1; |
|
if (iswblank(wc)) { |
|
/* Skip whitespace at start of line. */ |
|
if (word_length == 0) { |
|
wordp += wcl; |
|
continue; |
|
} |
|
/* Count whitespace after word. */ |
|
space_width += wcw; |
|
} else { |
|
/* Detect end of word. */ |
|
if (space_width > 0) |
|
break; |
|
/* Measure word. */ |
|
word_length += wcl; |
|
word_width += wcw; |
|
} |
|
line_width += wcw; |
|
} |
|
|
/* Send the word to the output machinery. */ |
/* Send the word to the output machinery. */ |
output_word(first_indent, last_indent, |
output_word(first_indent, last_indent, wordp, |
line+n, word_length, space_length-word_length); |
word_length, word_width, space_width); |
n += space_length; |
|
} |
} |
++para_line_number; |
++para_line_number; |
} |
} |
|
|
new_paragraph(output_in_paragraph ? last_indent : first_indent, 0); |
new_paragraph(0); |
if (ferror(stream)) { |
if (ferror(stream)) { |
warn("%s", name); |
warn("%s", name); |
ERRS(n_errors); |
ERRS(n_errors); |
|
|
/* How long is the indent on this line? |
/* How long is the indent on this line? |
*/ |
*/ |
static size_t |
static size_t |
indent_length(const char *line, size_t length) |
indent_length(const char *line) |
{ |
{ |
size_t n = 0; |
size_t n = 0; |
|
|
while (n < length && *line++ == ' ') |
for (;;) { |
++n; |
switch(*line++) { |
|
case ' ': |
|
++n; |
|
continue; |
|
case '\t': |
|
n = (n / tab_width + 1) * tab_width; |
|
continue; |
|
default: |
|
break; |
|
} |
|
break; |
|
} |
return n; |
return n; |
} |
} |
|
|
|
|
/* Begin a new paragraph with an indent of |indent| spaces. |
/* Begin a new paragraph with an indent of |indent| spaces. |
*/ |
*/ |
static void |
static void |
new_paragraph(size_t old_indent, size_t indent) |
new_paragraph(size_t indent) |
{ |
{ |
|
|
if (x0) { |
if (x0 > 0) |
if (old_indent > 0) |
|
output_indent(old_indent); |
|
fwrite(output_buffer, 1, x0, stdout); |
|
putchar('\n'); |
putchar('\n'); |
} |
|
x = indent; |
x = indent; |
x0 = 0; |
x0 = 0; |
pending_spaces = 0; |
pending_spaces = 0; |
|
|
output_indent(size_t n_spaces) |
output_indent(size_t n_spaces) |
{ |
{ |
|
|
|
if (n_spaces == 0) |
|
return; |
if (output_tab_width) { |
if (output_tab_width) { |
while (n_spaces >= output_tab_width) { |
while (n_spaces >= output_tab_width) { |
putchar('\t'); |
putchar('\t'); |
|
|
putchar(' '); |
putchar(' '); |
} |
} |
|
|
/* Output a single word, or add it to the buffer. |
/* Output a single word. |
* indent0 and indent1 are the indents to use on the first and subsequent |
* indent0 and indent1 are the indents to use on the first and subsequent |
* lines of a paragraph. They'll often be the same, of course. |
* lines of a paragraph. They'll often be the same, of course. |
*/ |
*/ |
static void |
static void |
output_word(size_t indent0, size_t indent1, const char *word, size_t length, size_t spaces) |
output_word(size_t indent0, size_t indent1, const char *word, |
|
int length, int width, int spaces) |
{ |
{ |
size_t new_x = x + pending_spaces + length; |
size_t new_x = x + pending_spaces + width; |
size_t indent = output_in_paragraph ? indent1 : indent0; |
|
|
|
/* If either |spaces==0| (at end of line) or |coalesce_spaces_P| |
/* If either |spaces==0| (at end of line) or |coalesce_spaces_P| |
* (squashing internal whitespace), then add just one space; |
* (squashing internal whitespace), then add just one space; |
|
|
if (coalesce_spaces_P || spaces == 0) |
if (coalesce_spaces_P || spaces == 0) |
spaces = strchr(sentence_enders, word[length-1]) ? 2 : 1; |
spaces = strchr(sentence_enders, word[length-1]) ? 2 : 1; |
|
|
if (new_x <= goal_length) { |
if (x0 == 0) |
/* After adding the word we still aren't at the goal length, |
output_indent(output_in_paragraph ? indent1 : indent0); |
* so clearly we add it to the buffer rather than outputing it. |
else if (new_x > max_length || x >= goal_length || |
*/ |
(new_x > goal_length && new_x-goal_length > goal_length-x)) { |
memset(output_buffer+x0, ' ', pending_spaces); |
putchar('\n'); |
|
output_indent(indent1); |
|
x0 = 0; |
|
x = indent1; |
|
} else { |
x0 += pending_spaces; |
x0 += pending_spaces; |
x += pending_spaces; |
x += pending_spaces; |
memcpy(output_buffer+x0, word, length); |
while (pending_spaces--) |
x0 += length; |
putchar(' '); |
x += length; |
|
pending_spaces = spaces; |
|
} else { |
|
/* Adding the word takes us past the goal. Print the line-so-far, |
|
* and the word too iff either (1) the lsf is empty or (2) that |
|
* makes us nearer the goal but doesn't take us over the limit, |
|
* or (3) the word on its own takes us over the limit. |
|
* In case (3) we put a newline in between. |
|
*/ |
|
if (indent > 0) |
|
output_indent(indent); |
|
fwrite(output_buffer, 1, x0, stdout); |
|
if (x0 == 0 || (new_x <= max_length && new_x-goal_length <= goal_length-x)) { |
|
printf("%*s", (int)pending_spaces, ""); |
|
goto write_out_word; |
|
} else { |
|
/* If the word takes us over the limit on its own, just |
|
* spit it out and don't bother buffering it. |
|
*/ |
|
if (indent+length > max_length) { |
|
putchar('\n'); |
|
if (indent > 0) |
|
output_indent(indent); |
|
write_out_word: |
|
fwrite(word, 1, length, stdout); |
|
x0 = 0; |
|
x = indent1; |
|
pending_spaces = 0; |
|
} else { |
|
memcpy(output_buffer, word, length); |
|
x0 = length; |
|
x = length+indent1; |
|
pending_spaces = spaces; |
|
} |
|
} |
|
|
|
putchar('\n'); |
|
output_in_paragraph = 1; |
|
} |
} |
|
x0 += width; |
|
x += width; |
|
while(length--) |
|
putchar(*word++); |
|
pending_spaces = spaces; |
|
output_in_paragraph = 1; |
} |
} |
|
|
/* Process a stream, but just center its lines rather than trying to |
/* Process a stream, but just center its lines rather than trying to |
|
|
center_stream(FILE *stream, const char *name) |
center_stream(FILE *stream, const char *name) |
{ |
{ |
char *line; |
char *line; |
size_t length; |
|
size_t l; |
size_t l; |
|
|
while ((line = get_line(stream, &length)) != 0) { |
while ((line = get_line(stream)) != NULL) { |
l = length; |
while (isspace((unsigned char)*line)) |
while (l > 0 && isspace(*line)) { |
|
++line; |
++line; |
--l; |
l = strlen(line); |
} |
|
|
|
length = l; |
|
|
|
while (l < goal_length) { |
while (l < goal_length) { |
putchar(' '); |
putchar(' '); |
l += 2; |
l += 2; |
} |
} |
|
puts(line); |
fwrite(line, 1, length, stdout); |
|
putchar('\n'); |
|
} |
} |
|
|
if (ferror(stream)) { |
if (ferror(stream)) { |
|
|
} |
} |
} |
} |
|
|
/* Get a single line from a stream. Expand tabs, strip control |
/* Get a single line from a stream. Strip control |
* characters and trailing whitespace, and handle backspaces. |
* characters and trailing whitespace, and handle backspaces. |
* Return the address of the buffer containing the line, and |
* Return the address of the buffer containing the line. |
* put the length of the line in |lengthp|. |
|
* This can cope with arbitrarily long lines, and with lines |
* This can cope with arbitrarily long lines, and with lines |
* without terminating \n. |
* without terminating \n. |
* If there are no characters left or an error happens, we |
* If there are no characters left or an error happens, we |
* return 0. |
* return NULL. |
* Don't confuse |spaces_pending| here with the global |
|
* |pending_spaces|. |
|
*/ |
*/ |
static char * |
static char * |
get_line(FILE *stream, size_t *lengthp) |
get_line(FILE *stream) |
{ |
{ |
int ch; |
int ch; |
int troff = 0; |
int troff = 0; |
static char *buf = NULL; |
static char *buf = NULL; |
static size_t length = 0; |
static size_t length = 0; |
size_t len = 0; |
size_t len = 0; |
size_t spaces_pending = 0; |
|
|
|
if (buf == NULL) { |
if (buf == NULL) { |
length = 100; |
length = 100; |
buf = XMALLOC(length); |
buf = xrealloc(NULL, length); |
} |
} |
|
|
while ((ch = getc(stream)) != '\n' && ch != EOF) { |
while ((ch = getc(stream)) != '\n' && ch != EOF) { |
if ((len + spaces_pending == 0) && (ch == '.' && !format_troff)) |
if ((len == 0) && (ch == '.' && !format_troff)) |
troff = 1; |
troff = 1; |
if (ch == ' ') { |
if (troff || ch == '\t' || !iscntrl(ch)) { |
++spaces_pending; |
if (len >= length) { |
} else if (troff || !iscntrl(ch)) { |
|
while (len + spaces_pending >= length) { |
|
length *= 2; |
length *= 2; |
buf = xrealloc(buf, length); |
buf = xrealloc(buf, length); |
} |
} |
|
|
while (spaces_pending > 0) { |
|
--spaces_pending; |
|
buf[len++] = ' '; |
|
} |
|
buf[len++] = ch; |
buf[len++] = ch; |
} else if (ch == '\t') { |
|
spaces_pending += tab_width - (len+spaces_pending)%tab_width; |
|
} else if (ch == '\b') { |
} else if (ch == '\b') { |
if (len) |
if (len) |
--len; |
--len; |
} |
} |
} |
} |
|
while (len > 0 && isspace((unsigned char)buf[len-1])) |
*lengthp = len; |
--len; |
return (len > 0 || ch != EOF) ? buf : 0; |
buf[len] = '\0'; |
|
return (len > 0 || ch != EOF) ? buf : NULL; |
} |
} |
|
|
/* (Re)allocate some memory, exiting with an error if we can't. |
/* (Re)allocate some memory, exiting with an error if we can't. |