[BACK]Return to colrm.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / colrm

File: [local] / src / usr.bin / colrm / colrm.c (download)

Revision 1.12, Mon Jan 18 20:31:36 2016 UTC (8 years, 4 months ago) by schwarze
Branch: MAIN
CVS Tags: OPENBSD_6_0_BASE, OPENBSD_6_0, OPENBSD_5_9_BASE, OPENBSD_5_9
Changes since 1.11: +97 -36 lines

UTF-8 support:
Cut by display columns rather than by character positions because
the latter would be useless in the presence of combining zero-width
characters.  Similarly, let tab advance to display columns rather
than to character positions.

For compatibility with nroff and man(1) output, let backspace back
up one character rather than on display column.  But for compatibility
with POSIX fold(1), *if* two backspaces follow a double-width character,
ignore the second one.

Fix some bugs while here:  Delete backspaces that immediately follow
deleted characters.  Expand tabs intersecting deletions, such that
part of the blanks can be removed.  Expand tabs following deletions,
or they would no longer align with adjacent lines without tabs.

OK jmc@ on a previous version of the manual.
No opposition when shown on tech@.

/*	$OpenBSD: colrm.c,v 1.12 2016/01/18 20:31:36 schwarze Exp $	*/
/*	$NetBSD: colrm.c,v 1.4 1995/09/02 05:51:37 jtc Exp $	*/

/*-
 * Copyright (c) 1991, 1993
 *	The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/types.h>

#include <err.h>
#include <errno.h>
#include <limits.h>
#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <wchar.h>

#define	TAB	8

void usage(void);

int
main(int argc, char *argv[])
{
	char	 *line, *p;
	ssize_t	  linesz;
	wchar_t	  wc;
	u_long	  column, newcol, start, stop;
	int	  ch, len, width;

	setlocale(LC_ALL, "");

	if (pledge("stdio", NULL) == -1)
		err(1, "pledge");

	while ((ch = getopt(argc, argv, "")) != -1)
		switch(ch) {
		case '?':
		default:
			usage();
		}
	argc -= optind;
	argv += optind;

	start = stop = 0;
	switch(argc) {
	case 2:
		stop = strtol(argv[1], &p, 10);
		if (stop <= 0 || *p)
			errx(1, "illegal column -- %s", argv[1]);
		/* FALLTHROUGH */
	case 1:
		start = strtol(argv[0], &p, 10);
		if (start <= 0 || *p)
			errx(1, "illegal column -- %s", argv[0]);
		break;
	case 0:
		break;
	default:
		usage();
	}

	if (stop && start > stop)
		err(1, "illegal start and stop columns");

	line = NULL;
	while (getline(&line, &linesz, stdin) != -1) {
		column = 0;
		width = 0;
		for (p = line; *p != '\0'; p += len) {
			len = 1;
			switch (*p) {
			case '\n':
				putchar('\n');
				continue;
			case '\b':
				/*
				 * Pass it through if the previous character
				 * was in scope, still represented by the
				 * current value of "column".
				 * Allow an optional second backspace
				 * after a double-width character.
				 */
				if (start == 0 || column < start ||
				    (stop > 0 &&
				     column > stop + (width > 1))) {
					putchar('\b');
					if (width > 1 && p[1] == '\b')
						putchar('\b');
				} 
				if (width > 1 && p[1] == '\b')
					p++;
				column -= width;
				continue;
			case '\t':
				newcol = (column + TAB) & ~(TAB - 1);
				if (start == 0 || newcol < start) {
					putchar('\t');
					column = newcol;
				} else
					/*
					 * Expand tabs that intersect or
					 * follow deleted columns.
					 */
					while (column < newcol)
						if (++column < start ||
						    (stop > 0 &&
						     column > stop))
				 			putchar(' ');
				continue;
			default:
				break;
			}

			/*
			 * Handle the three cases of invalid bytes,
			 * non-printable, and printable characters.
			 */

			if ((len = mbtowc(&wc, p, MB_CUR_MAX)) == -1) {
				(void)mbtowc(NULL, NULL, MB_CUR_MAX);
				len = 1;
				width = 1;
			} else if ((width = wcwidth(wc)) == -1)
				width = 1;

			/*
			 * If the character completely fits before or
			 * after the cut, keep it; otherwise, skip it.
			 */

			if ((start == 0 || column + width < start ||
			    (stop > 0 && column + (width > 0) > stop)))
			    	fwrite(p, 1, len, stdout);

			/*
			 * If the cut cuts the character in half
			 * and no backspace follows,
			 * print a blank for correct columnation.
			 */

			else if (width > 1 && p[len] != '\b' &&
			    (start == 0 || column + 1 < start ||
			    (stop > 0 && column + width > stop)))
				putchar(' ');

			column += width;
		}
	}
	if (ferror(stdin))
		err(1, "stdin");
	if (ferror(stdout))
		err(1, "stdout");
	return 0;
}

void
usage(void)
{
	(void)fprintf(stderr, "usage: colrm [start [stop]]\n");
	exit(1);
}