Annotation of src/usr.bin/less/line.c, Revision 1.31
1.1 etheisen 1: /*
1.12 shadchin 2: * Copyright (C) 1984-2012 Mark Nudelman
1.14 nicm 3: * Modified for use with illumos by Garrett D'Amore.
4: * Copyright 2014 Garrett D'Amore <garrett@damore.org>
1.1 etheisen 5: *
1.5 millert 6: * You may distribute under the terms of either the GNU General Public
7: * License or the Less License, as specified in the README file.
1.1 etheisen 8: *
1.12 shadchin 9: * For more information, see the README file.
1.13 nicm 10: */
1.1 etheisen 11:
12: /*
13: * Routines to manipulate the "line buffer".
14: * The line buffer holds a line of output as it is being built
15: * in preparation for output to the screen.
16: */
17:
1.23 schwarze 18: #include <wchar.h>
1.28 schwarze 19: #include <wctype.h>
1.23 schwarze 20:
1.19 mmcc 21: #include "charset.h"
1.1 etheisen 22: #include "less.h"
23:
1.10 shadchin 24: static char *linebuf = NULL; /* Buffer which holds the current output line */
1.5 millert 25: static char *attr = NULL; /* Extension of linebuf to hold attributes */
1.13 nicm 26: int size_linebuf = 0; /* Size of line buffer (and attr buffer) */
1.5 millert 27:
1.10 shadchin 28: static int cshift; /* Current left-shift of output line buffer */
1.13 nicm 29: int hshift; /* Desired left-shift of output line buffer */
30: int tabstops[TABSTOP_MAX] = { 0 }; /* Custom tabstops */
31: int ntabstops = 1; /* Number of tabstops */
32: int tabdefault = 8; /* Default repeated tabstops */
33: off_t highest_hilite; /* Pos of last hilite in file found so far */
1.1 etheisen 34:
1.30 schwarze 35: static int curr; /* Total number of bytes in linebuf */
36: static int column; /* Display columns needed to show linebuf */
1.1 etheisen 37: static int overstrike; /* Next char should overstrike previous char */
38: static int is_null_line; /* There is no current line */
1.30 schwarze 39: static int lmargin; /* Index in linebuf of start of content */
1.1 etheisen 40: static char pendc;
1.13 nicm 41: static off_t pendpos;
1.5 millert 42: static char *end_ansi_chars;
1.10 shadchin 43: static char *mid_ansi_chars;
1.1 etheisen 44:
1.13 nicm 45: static int attr_swidth(int);
46: static int attr_ewidth(int);
47: static int do_append(LWCHAR, char *, off_t);
1.1 etheisen 48:
1.11 millert 49: extern volatile sig_atomic_t sigs;
1.1 etheisen 50: extern int bs_mode;
51: extern int linenums;
52: extern int ctldisp;
53: extern int twiddle;
54: extern int binattr;
1.5 millert 55: extern int status_col;
1.1 etheisen 56: extern int auto_wrap, ignaw;
57: extern int bo_s_width, bo_e_width;
58: extern int ul_s_width, ul_e_width;
59: extern int bl_s_width, bl_e_width;
60: extern int so_s_width, so_e_width;
61: extern int sc_width, sc_height;
1.5 millert 62: extern int utf_mode;
1.13 nicm 63: extern off_t start_attnpos;
64: extern off_t end_attnpos;
1.5 millert 65:
1.10 shadchin 66: static char mbc_buf[MAX_UTF_CHAR_LEN];
67: static int mbc_buf_index = 0;
1.13 nicm 68: static off_t mbc_pos;
1.10 shadchin 69:
1.5 millert 70: /*
71: * Initialize from environment variables.
72: */
1.13 nicm 73: void
74: init_line(void)
1.5 millert 75: {
76: end_ansi_chars = lgetenv("LESSANSIENDCHARS");
77: if (end_ansi_chars == NULL || *end_ansi_chars == '\0')
78: end_ansi_chars = "m";
1.10 shadchin 79:
80: mid_ansi_chars = lgetenv("LESSANSIMIDCHARS");
81: if (mid_ansi_chars == NULL || *mid_ansi_chars == '\0')
82: mid_ansi_chars = "0123456789;[?!\"'#%()*+ ";
83:
1.13 nicm 84: linebuf = ecalloc(LINEBUF_SIZE, sizeof (char));
85: attr = ecalloc(LINEBUF_SIZE, sizeof (char));
1.5 millert 86: size_linebuf = LINEBUF_SIZE;
87: }
88:
89: /*
90: * Expand the line buffer.
91: */
1.13 nicm 92: static int
93: expand_linebuf(void)
1.5 millert 94: {
1.10 shadchin 95: /* Double the size of the line buffer. */
1.7 millert 96: int new_size = size_linebuf * 2;
1.10 shadchin 97:
98: /* Just realloc to expand the buffer, if we can. */
1.22 deraadt 99: char *new_buf = recallocarray(linebuf, size_linebuf, new_size, 1);
100: char *new_attr = recallocarray(attr, size_linebuf, new_size, 1);
1.13 nicm 101: if (new_buf == NULL || new_attr == NULL) {
1.15 mmcc 102: free(new_attr);
103: free(new_buf);
1.13 nicm 104: return (1);
1.5 millert 105: }
106: linebuf = new_buf;
107: attr = new_attr;
108: size_linebuf = new_size;
1.13 nicm 109: return (0);
1.5 millert 110: }
1.1 etheisen 111:
112: /*
1.10 shadchin 113: * Is a character ASCII?
114: */
1.25 schwarze 115: static int
1.13 nicm 116: is_ascii_char(LWCHAR ch)
1.10 shadchin 117: {
118: return (ch <= 0x7F);
119: }
120:
121: /*
1.1 etheisen 122: * Rewind the line buffer.
123: */
1.13 nicm 124: void
125: prewind(void)
1.1 etheisen 126: {
127: curr = 0;
128: column = 0;
1.10 shadchin 129: cshift = 0;
1.1 etheisen 130: overstrike = 0;
131: is_null_line = 0;
132: pendc = '\0';
1.5 millert 133: lmargin = 0;
134: if (status_col)
135: lmargin += 1;
1.1 etheisen 136: }
137:
138: /*
139: * Insert the line number (of the given position) into the line buffer.
140: */
1.13 nicm 141: void
142: plinenum(off_t pos)
1.1 etheisen 143: {
1.16 mmcc 144: off_t linenum = 0;
1.13 nicm 145: int i;
1.5 millert 146:
1.13 nicm 147: if (linenums == OPT_ONPLUS) {
1.5 millert 148: /*
149: * Get the line number and put it in the current line.
150: * {{ Note: since find_linenum calls forw_raw_line,
1.13 nicm 151: * it may seek in the input file, requiring the caller
1.5 millert 152: * of plinenum to re-seek if necessary. }}
153: * {{ Since forw_raw_line modifies linebuf, we must
154: * do this first, before storing anything in linebuf. }}
155: */
156: linenum = find_linenum(pos);
157: }
1.1 etheisen 158:
159: /*
1.5 millert 160: * Display a status column if the -J option is set.
1.1 etheisen 161: */
1.13 nicm 162: if (status_col) {
1.5 millert 163: linebuf[curr] = ' ';
1.13 nicm 164: if (start_attnpos != -1 &&
1.5 millert 165: pos >= start_attnpos && pos < end_attnpos)
1.10 shadchin 166: attr[curr] = AT_NORMAL|AT_HILITE;
1.5 millert 167: else
1.10 shadchin 168: attr[curr] = AT_NORMAL;
1.5 millert 169: curr++;
170: column++;
171: }
1.1 etheisen 172: /*
1.5 millert 173: * Display the line number at the start of each line
174: * if the -N option is set.
1.1 etheisen 175: */
1.13 nicm 176: if (linenums == OPT_ONPLUS) {
1.18 mmcc 177: char buf[23];
1.5 millert 178: int n;
1.1 etheisen 179:
1.17 mmcc 180: postoa(linenum, buf, sizeof(buf));
1.5 millert 181: n = strlen(buf);
182: if (n < MIN_LINENUM_WIDTH)
183: n = MIN_LINENUM_WIDTH;
1.6 millert 184: snprintf(linebuf+curr, size_linebuf-curr, "%*s ", n, buf);
1.21 deraadt 185: n++; /* One space after the line number. */
1.5 millert 186: for (i = 0; i < n; i++)
187: attr[curr+i] = AT_NORMAL;
188: curr += n;
189: column += n;
190: lmargin += n;
191: }
1.1 etheisen 192:
193: /*
1.5 millert 194: * Append enough spaces to bring us to the lmargin.
1.1 etheisen 195: */
1.13 nicm 196: while (column < lmargin) {
1.1 etheisen 197: linebuf[curr] = ' ';
198: attr[curr++] = AT_NORMAL;
199: column++;
1.5 millert 200: }
201: }
202:
203: /*
1.10 shadchin 204: * Shift the input line left.
1.30 schwarze 205: * Starting at lmargin, some bytes are discarded from the linebuf,
206: * until the number of display columns needed to show these bytes
207: * would exceed the argument.
1.5 millert 208: */
1.13 nicm 209: static void
210: pshift(int shift)
1.10 shadchin 211: {
1.30 schwarze 212: int shifted = 0; /* Number of display columns already discarded. */
213: int from; /* Index in linebuf of the current character. */
214: int to; /* Index in linebuf to move this character to. */
215: int len; /* Number of bytes in this character. */
216: int width = 0; /* Display columns needed for this character. */
217: int prev_attr; /* Attributes of the preceding character. */
218: int next_attr; /* Attributes of the following character. */
219: unsigned char c; /* First byte of current character. */
1.10 shadchin 220:
221: if (shift > column - lmargin)
222: shift = column - lmargin;
223: if (shift > curr - lmargin)
224: shift = curr - lmargin;
1.5 millert 225:
1.10 shadchin 226: to = from = lmargin;
1.5 millert 227: /*
1.10 shadchin 228: * We keep on going when shifted == shift
229: * to get all combining chars.
1.5 millert 230: */
1.13 nicm 231: while (shifted <= shift && from < curr) {
1.10 shadchin 232: c = linebuf[from];
1.24 schwarze 233: if (ctldisp == OPT_ONPLUS && c == ESC) {
1.10 shadchin 234: /* Keep cumulative effect. */
235: linebuf[to] = c;
236: attr[to++] = attr[from++];
1.13 nicm 237: while (from < curr && linebuf[from]) {
1.10 shadchin 238: linebuf[to] = linebuf[from];
239: attr[to++] = attr[from];
240: if (!is_ansi_middle(linebuf[from++]))
241: break;
1.13 nicm 242: }
1.10 shadchin 243: continue;
244: }
1.31 ! schwarze 245: if (utf_mode && !isascii(c)) {
1.30 schwarze 246: wchar_t ch;
247: /*
248: * Before this point, UTF-8 validity was already
249: * checked, but for additional safety, treat
250: * invalid bytes as single-width characters
251: * if they ever make it here. Similarly, treat
252: * non-printable characters as width 1.
253: */
254: len = mbtowc(&ch, linebuf + from, curr - from);
255: if (len == -1)
256: len = width = 1;
257: else if ((width = wcwidth(ch)) == -1)
258: width = 1;
1.13 nicm 259: } else {
1.10 shadchin 260: len = 1;
261: if (c == '\b')
262: /* XXX - Incorrect if several '\b' in a row. */
1.30 schwarze 263: width = width > 0 ? -width : -1;
264: else
265: width = iscntrl(c) ? 0 : 1;
1.10 shadchin 266: }
267:
268: if (width == 2 && shift - shifted == 1) {
269: /*
1.30 schwarze 270: * Move the first half of a double-width character
271: * off screen. Print a space instead of the second
272: * half. This should never happen when called
273: * by pshift_all().
1.10 shadchin 274: */
1.30 schwarze 275: attr[to] = attr[from];
276: linebuf[to++] = ' ';
1.10 shadchin 277: from += len;
278: shifted++;
1.30 schwarze 279: break;
1.10 shadchin 280: }
281:
282: /* Adjust width for magic cookies. */
283: prev_attr = (to > 0) ? attr[to-1] : AT_NORMAL;
284: next_attr = (from + len < curr) ? attr[from + len] : prev_attr;
1.13 nicm 285: if (!is_at_equiv(attr[from], prev_attr) &&
286: !is_at_equiv(attr[from], next_attr)) {
1.10 shadchin 287: width += attr_swidth(attr[from]);
288: if (from + len < curr)
289: width += attr_ewidth(attr[from]);
1.13 nicm 290: if (is_at_equiv(prev_attr, next_attr)) {
1.10 shadchin 291: width += attr_ewidth(prev_attr);
292: if (from + len < curr)
293: width += attr_swidth(next_attr);
1.5 millert 294: }
295: }
296:
1.10 shadchin 297: if (shift - shifted < width)
298: break;
299: from += len;
300: shifted += width;
301: if (shifted < 0)
302: shifted = 0;
303: }
1.13 nicm 304: while (from < curr) {
1.10 shadchin 305: linebuf[to] = linebuf[from];
306: attr[to++] = attr[from++];
307: }
308: curr = to;
309: column -= shifted;
310: cshift += shifted;
1.5 millert 311: }
312:
313: /*
1.10 shadchin 314: *
1.5 millert 315: */
1.13 nicm 316: void
317: pshift_all(void)
1.5 millert 318: {
1.10 shadchin 319: pshift(column);
1.1 etheisen 320: }
321:
322: /*
323: * Return the printing width of the start (enter) sequence
324: * for a given character attribute.
325: */
1.13 nicm 326: static int
327: attr_swidth(int a)
1.1 etheisen 328: {
1.10 shadchin 329: int w = 0;
330:
331: a = apply_at_specials(a);
332:
333: if (a & AT_UNDERLINE)
334: w += ul_s_width;
335: if (a & AT_BOLD)
336: w += bo_s_width;
337: if (a & AT_BLINK)
338: w += bl_s_width;
339: if (a & AT_STANDOUT)
340: w += so_s_width;
341:
1.13 nicm 342: return (w);
1.1 etheisen 343: }
344:
345: /*
346: * Return the printing width of the end (exit) sequence
347: * for a given character attribute.
348: */
1.13 nicm 349: static int
350: attr_ewidth(int a)
1.1 etheisen 351: {
1.10 shadchin 352: int w = 0;
353:
354: a = apply_at_specials(a);
355:
356: if (a & AT_UNDERLINE)
357: w += ul_e_width;
358: if (a & AT_BOLD)
359: w += bo_e_width;
360: if (a & AT_BLINK)
361: w += bl_e_width;
362: if (a & AT_STANDOUT)
363: w += so_e_width;
364:
1.13 nicm 365: return (w);
1.1 etheisen 366: }
367:
368: /*
369: * Return the printing width of a given character and attribute,
370: * if the character were added to the current position in the line buffer.
371: * Adding a character with a given attribute may cause an enter or exit
372: * attribute sequence to be inserted, so this must be taken into account.
373: */
1.13 nicm 374: static int
1.23 schwarze 375: pwidth(wchar_t ch, int a, wchar_t prev_ch)
1.1 etheisen 376: {
1.10 shadchin 377: int w;
1.1 etheisen 378:
1.23 schwarze 379: /*
380: * In case of a backspace, back up by the width of the previous
381: * character. If that is non-printable (for example another
382: * backspace) or zero width (for example a combining accent),
383: * the terminal may actually back up to a character even further
384: * back, but we no longer know how wide that may have been.
385: * The best guess possible at this point is that it was
386: * hopefully width one.
387: */
388: if (ch == L'\b') {
389: w = wcwidth(prev_ch);
390: if (w <= 0)
391: w = 1;
392: return (-w);
393: }
394:
395: w = wcwidth(ch);
396:
397: /*
398: * Non-printable characters can get here if the -r flag is in
399: * effect, and possibly in some other situations (XXX check that!).
400: * Treat them as zero width.
401: * That may not always match their actual behaviour,
402: * but there is no reasonable way to be more exact.
403: */
404: if (w == -1)
405: w = 0;
1.13 nicm 406:
1.23 schwarze 407: /*
408: * Combining accents take up no space.
409: * Some terminals, upon failure to compose them with the
410: * characters that precede them, will actually take up one column
411: * for the combining accent; there isn't much we could do short
412: * of testing the (complex) composition process ourselves and
413: * printing a binary representation when it fails.
414: */
415: if (w == 0)
416: return (0);
1.1 etheisen 417:
418: /*
1.10 shadchin 419: * Other characters take one or two columns,
1.1 etheisen 420: * plus the width of any attribute enter/exit sequence.
421: */
1.10 shadchin 422: if (curr > 0 && !is_at_equiv(attr[curr-1], a))
1.1 etheisen 423: w += attr_ewidth(attr[curr-1]);
1.10 shadchin 424: if ((apply_at_specials(a) != AT_NORMAL) &&
425: (curr == 0 || !is_at_equiv(attr[curr-1], a)))
1.1 etheisen 426: w += attr_swidth(a);
427: return (w);
428: }
429:
430: /*
1.10 shadchin 431: * Delete to the previous base character in the line buffer.
432: * Return 1 if one is found.
1.1 etheisen 433: */
1.13 nicm 434: static int
435: backc(void)
1.1 etheisen 436: {
1.27 schwarze 437: wchar_t ch, prev_ch;
438: int i, len, width;
439:
440: i = curr - 1;
441: if (utf_mode) {
442: while (i >= lmargin && IS_UTF8_TRAIL(linebuf[i]))
443: i--;
444: }
445: if (i < lmargin)
446: return (0);
447: if (utf_mode) {
448: len = mbtowc(&ch, linebuf + i, curr - i);
449: if (len == -1 || i + len < curr) {
450: (void)mbtowc(NULL, NULL, MB_CUR_MAX);
451: return (0);
452: }
453: } else
454: ch = linebuf[i];
1.10 shadchin 455:
456: /* This assumes that there is no '\b' in linebuf. */
1.13 nicm 457: while (curr > lmargin && column > lmargin &&
458: (!(attr[curr - 1] & (AT_ANSI|AT_BINARY)))) {
1.27 schwarze 459: curr = i--;
460: if (utf_mode) {
461: while (i >= lmargin && IS_UTF8_TRAIL(linebuf[i]))
462: i--;
463: }
464: if (i < lmargin)
465: prev_ch = L'\0';
466: else if (utf_mode) {
467: len = mbtowc(&prev_ch, linebuf + i, curr - i);
468: if (len == -1 || i + len < curr) {
469: (void)mbtowc(NULL, NULL, MB_CUR_MAX);
470: prev_ch = L'\0';
471: }
472: } else
473: prev_ch = linebuf[i];
1.10 shadchin 474: width = pwidth(ch, attr[curr], prev_ch);
475: column -= width;
476: if (width > 0)
1.13 nicm 477: return (1);
1.27 schwarze 478: if (prev_ch == L'\0')
479: return (0);
1.10 shadchin 480: ch = prev_ch;
481: }
1.13 nicm 482: return (0);
1.1 etheisen 483: }
484:
485: /*
1.25 schwarze 486: * Is a character the end of an ANSI escape sequence?
1.5 millert 487: */
1.13 nicm 488: static int
489: is_ansi_end(LWCHAR ch)
1.10 shadchin 490: {
491: if (!is_ascii_char(ch))
492: return (0);
1.13 nicm 493: return (strchr(end_ansi_chars, (char)ch) != NULL);
1.10 shadchin 494: }
495:
496: /*
497: *
498: */
1.13 nicm 499: int
500: is_ansi_middle(LWCHAR ch)
1.5 millert 501: {
1.10 shadchin 502: if (!is_ascii_char(ch))
503: return (0);
504: if (is_ansi_end(ch))
505: return (0);
1.13 nicm 506: return (strchr(mid_ansi_chars, (char)ch) != NULL);
1.5 millert 507: }
508:
509: /*
1.1 etheisen 510: * Append a character and attribute to the line buffer.
511: */
1.13 nicm 512: static int
513: store_char(LWCHAR ch, char a, char *rep, off_t pos)
1.1 etheisen 514: {
1.25 schwarze 515: int i;
1.10 shadchin 516: int w;
517: int replen;
518: char cs;
1.13 nicm 519: int matches;
1.10 shadchin 520:
1.13 nicm 521: if (is_hilited(pos, pos+1, 0, &matches)) {
522: /*
523: * This character should be highlighted.
524: * Override the attribute passed in.
525: */
526: if (a != AT_ANSI) {
527: if (highest_hilite != -1 && pos > highest_hilite)
528: highest_hilite = pos;
529: a |= AT_HILITE;
1.10 shadchin 530: }
1.5 millert 531: }
1.10 shadchin 532:
1.25 schwarze 533: w = -1;
534: if (ctldisp == OPT_ONPLUS) {
535: /*
536: * Set i to the beginning of an ANSI escape sequence
537: * that was begun and not yet ended, or to -1 otherwise.
538: */
539: for (i = curr - 1; i >= 0; i--) {
540: if (linebuf[i] == ESC)
541: break;
542: if (!is_ansi_middle(linebuf[i]))
543: i = 0;
544: }
545: if (i >= 0 && !is_ansi_end(ch) && !is_ansi_middle(ch)) {
1.10 shadchin 546: /* Remove whole unrecognized sequence. */
1.25 schwarze 547: curr = i;
1.13 nicm 548: return (0);
1.10 shadchin 549: }
1.25 schwarze 550: if (i >= 0 || ch == ESC) {
551: a = AT_ANSI; /* Will force re-AT_'ing around it. */
552: w = 0;
553: }
554: }
555: if (w == -1) {
556: wchar_t prev_ch;
557:
558: if (utf_mode) {
559: for (i = curr - 1; i >= 0; i--)
560: if (!IS_UTF8_TRAIL(linebuf[i]))
561: break;
562: if (i >= 0) {
563: w = mbtowc(&prev_ch, linebuf + i, curr - i);
1.27 schwarze 564: if (w == -1 || i + w < curr) {
565: (void)mbtowc(NULL, NULL, MB_CUR_MAX);
1.25 schwarze 566: prev_ch = L' ';
1.27 schwarze 567: }
1.25 schwarze 568: } else
569: prev_ch = L' ';
570: } else
571: prev_ch = curr > 0 ? linebuf[curr - 1] : L' ';
1.10 shadchin 572: w = pwidth(ch, a, prev_ch);
573: }
574:
1.5 millert 575: if (ctldisp != OPT_ON && column + w + attr_ewidth(a) > sc_width)
1.1 etheisen 576: /*
577: * Won't fit on screen.
578: */
579: return (1);
580:
1.13 nicm 581: if (rep == NULL) {
582: cs = (char)ch;
1.10 shadchin 583: rep = &cs;
584: replen = 1;
1.13 nicm 585: } else {
1.10 shadchin 586: replen = utf_len(rep[0]);
587: }
1.13 nicm 588: if (curr + replen >= size_linebuf-6) {
1.1 etheisen 589: /*
590: * Won't fit in line buffer.
1.5 millert 591: * Try to expand it.
1.1 etheisen 592: */
1.5 millert 593: if (expand_linebuf())
594: return (1);
595: }
1.1 etheisen 596:
1.13 nicm 597: while (replen-- > 0) {
1.10 shadchin 598: linebuf[curr] = *rep++;
599: attr[curr] = a;
600: curr++;
1.1 etheisen 601: }
602: column += w;
603: return (0);
604: }
605:
606: /*
1.5 millert 607: * Append a tab to the line buffer.
608: * Store spaces to represent the tab.
609: */
1.13 nicm 610: static int
611: store_tab(int attr, off_t pos)
1.5 millert 612: {
613: int to_tab = column + cshift - lmargin;
614: int i;
615:
616: if (ntabstops < 2 || to_tab >= tabstops[ntabstops-1])
617: to_tab = tabdefault -
1.13 nicm 618: ((to_tab - tabstops[ntabstops-1]) % tabdefault);
619: else {
1.21 deraadt 620: for (i = ntabstops - 2; i >= 0; i--)
1.5 millert 621: if (to_tab >= tabstops[i])
622: break;
623: to_tab = tabstops[i+1] - to_tab;
624: }
625:
1.13 nicm 626: if (column + to_tab - 1 + pwidth(' ', attr, 0) +
627: attr_ewidth(attr) > sc_width)
628: return (1);
1.10 shadchin 629:
1.5 millert 630: do {
1.26 schwarze 631: if (store_char(' ', attr, " ", pos))
632: return (1);
1.5 millert 633: } while (--to_tab > 0);
1.13 nicm 634: return (0);
1.5 millert 635: }
636:
1.13 nicm 637: static int
638: store_prchar(char c, off_t pos)
1.10 shadchin 639: {
640: char *s;
641:
642: /*
643: * Convert to printable representation.
644: */
645: s = prchar(c);
646:
647: /*
648: * Make sure we can get the entire representation
649: * of the character on this line.
650: */
1.13 nicm 651: if (column + (int)strlen(s) - 1 +
652: pwidth(' ', binattr, 0) + attr_ewidth(binattr) > sc_width)
653: return (1);
1.10 shadchin 654:
1.13 nicm 655: for (; *s != 0; s++) {
1.26 schwarze 656: if (store_char(*s, AT_BINARY, NULL, pos))
657: return (1);
1.13 nicm 658: }
659: return (0);
1.10 shadchin 660: }
661:
1.13 nicm 662: static int
663: flush_mbc_buf(off_t pos)
1.10 shadchin 664: {
665: int i;
666:
1.26 schwarze 667: for (i = 0; i < mbc_buf_index; i++) {
1.10 shadchin 668: if (store_prchar(mbc_buf[i], pos))
1.13 nicm 669: return (mbc_buf_index - i);
1.26 schwarze 670: }
1.13 nicm 671: return (0);
1.10 shadchin 672: }
673:
1.5 millert 674: /*
1.1 etheisen 675: * Append a character to the line buffer.
676: * Expand tabs into spaces, handle underlining, boldfacing, etc.
677: * Returns 0 if ok, 1 if couldn't fit in buffer.
678: */
1.13 nicm 679: int
680: pappend(char c, off_t pos)
1.1 etheisen 681: {
1.29 schwarze 682: mbstate_t mbs;
683: size_t sz;
684: wchar_t ch;
1.5 millert 685: int r;
686:
1.13 nicm 687: if (pendc) {
1.10 shadchin 688: if (do_append(pendc, NULL, pendpos))
1.1 etheisen 689: /*
690: * Oops. We've probably lost the char which
691: * was in pendc, since caller won't back up.
692: */
693: return (1);
694: pendc = '\0';
695: }
696:
1.13 nicm 697: if (c == '\r' && bs_mode == BS_SPECIAL) {
1.29 schwarze 698: if (mbc_buf_index > 0) /* utf_mode must be on. */ {
1.10 shadchin 699: /* Flush incomplete (truncated) sequence. */
700: r = flush_mbc_buf(mbc_pos);
1.29 schwarze 701: mbc_buf_index = 0;
1.10 shadchin 702: if (r)
1.29 schwarze 703: return (r + 1);
1.10 shadchin 704: }
705:
1.1 etheisen 706: /*
1.13 nicm 707: * Don't put the CR into the buffer until we see
1.1 etheisen 708: * the next char. If the next char is a newline,
709: * discard the CR.
710: */
711: pendc = c;
712: pendpos = pos;
713: return (0);
714: }
715:
1.13 nicm 716: if (!utf_mode) {
1.10 shadchin 717: r = do_append((LWCHAR) c, NULL, pos);
1.13 nicm 718: } else {
1.29 schwarze 719: for (;;) {
720: if (mbc_buf_index == 0)
1.10 shadchin 721: mbc_pos = pos;
1.29 schwarze 722: mbc_buf[mbc_buf_index++] = c;
723: memset(&mbs, 0, sizeof(mbs));
724: sz = mbrtowc(&ch, mbc_buf, mbc_buf_index, &mbs);
725:
726: /* Incomplete UTF-8: wait for more bytes. */
727: if (sz == (size_t)-2)
1.10 shadchin 728: return (0);
1.29 schwarze 729:
730: /* Valid UTF-8: use the character. */
731: if (sz != (size_t)-1) {
732: r = do_append(ch, mbc_buf, mbc_pos) ?
733: mbc_buf_index : 0;
734: break;
735: }
736:
737: /* Invalid start byte: encode it. */
738: if (mbc_buf_index == 1) {
739: r = store_prchar(c, pos);
740: break;
1.13 nicm 741: }
1.29 schwarze 742:
743: /*
744: * Invalid continuation.
745: * Encode the preceding bytes.
746: * If they fit, handle the interrupting byte.
747: * Otherwise, tell the caller to back up
748: * by the number of bytes that do not fit,
749: * plus one for the new byte.
750: */
751: mbc_buf_index--;
752: if ((r = flush_mbc_buf(mbc_pos) + 1) == 1)
753: mbc_buf_index = 0;
1.10 shadchin 754: else
1.29 schwarze 755: break;
1.13 nicm 756: }
1.10 shadchin 757: }
758:
1.5 millert 759: /*
760: * If we need to shift the line, do it.
761: * But wait until we get to at least the middle of the screen,
762: * so shifting it doesn't affect the chars we're currently
763: * pappending. (Bold & underline can get messed up otherwise.)
764: */
1.13 nicm 765: if (cshift < hshift && column > sc_width / 2) {
1.5 millert 766: linebuf[curr] = '\0';
767: pshift(hshift - cshift);
768: }
1.29 schwarze 769: mbc_buf_index = 0;
1.5 millert 770: return (r);
1.1 etheisen 771: }
772:
1.13 nicm 773: static int
774: do_append(LWCHAR ch, char *rep, off_t pos)
1.1 etheisen 775: {
1.28 schwarze 776: wchar_t prev_ch;
1.13 nicm 777: int a;
1.1 etheisen 778:
1.10 shadchin 779: a = AT_NORMAL;
1.1 etheisen 780:
1.13 nicm 781: if (ch == '\b') {
1.10 shadchin 782: if (bs_mode == BS_CONTROL)
1.5 millert 783: goto do_control_char;
1.10 shadchin 784:
785: /*
786: * A better test is needed here so we don't
787: * backspace over part of the printed
788: * representation of a binary character.
789: */
1.13 nicm 790: if (curr <= lmargin ||
791: column <= lmargin ||
792: (attr[curr - 1] & (AT_ANSI|AT_BINARY))) {
1.26 schwarze 793: if (store_prchar('\b', pos))
794: return (1);
1.13 nicm 795: } else if (bs_mode == BS_NORMAL) {
1.26 schwarze 796: if (store_char(ch, AT_NORMAL, NULL, pos))
797: return (1);
1.13 nicm 798: } else if (bs_mode == BS_SPECIAL) {
1.10 shadchin 799: overstrike = backc();
1.13 nicm 800: }
1.10 shadchin 801:
1.13 nicm 802: return (0);
1.10 shadchin 803: }
804:
1.13 nicm 805: if (overstrike > 0) {
1.1 etheisen 806: /*
807: * Overstrike the character at the current position
1.13 nicm 808: * in the line buffer. This will cause either
809: * underline (if a "_" is overstruck),
1.1 etheisen 810: * bold (if an identical character is overstruck),
811: * or just deletion of the character in the buffer.
812: */
1.10 shadchin 813: overstrike = utf_mode ? -1 : 0;
814: /* To be correct, this must be a base character. */
1.28 schwarze 815: if (mbtowc(&prev_ch, linebuf + curr, MB_CUR_MAX) == -1) {
816: (void)mbtowc(NULL, NULL, MB_CUR_MAX);
817: prev_ch = L'\0';
818: }
1.10 shadchin 819: a = attr[curr];
1.13 nicm 820: if (ch == prev_ch) {
1.5 millert 821: /*
822: * Overstriking a char with itself means make it bold.
823: * But overstriking an underscore with itself is
824: * ambiguous. It could mean make it bold, or
825: * it could mean make it underlined.
826: * Use the previous overstrike to resolve it.
827: */
1.13 nicm 828: if (ch == '_') {
1.10 shadchin 829: if ((a & (AT_BOLD|AT_UNDERLINE)) != AT_NORMAL)
830: a |= (AT_BOLD|AT_UNDERLINE);
1.20 natano 831: else if (curr > 0 && attr[curr - 1] & AT_UNDERLINE)
832: a |= AT_UNDERLINE;
833: else if (curr > 0 && attr[curr - 1] & AT_BOLD)
834: a |= AT_BOLD;
1.10 shadchin 835: else
1.20 natano 836: a |= AT_INDET;
1.13 nicm 837: } else {
1.10 shadchin 838: a |= AT_BOLD;
1.13 nicm 839: }
1.28 schwarze 840: } else if (ch == '_' && prev_ch != L'\0') {
1.10 shadchin 841: a |= AT_UNDERLINE;
842: ch = prev_ch;
843: rep = linebuf + curr;
1.13 nicm 844: } else if (prev_ch == '_') {
1.10 shadchin 845: a |= AT_UNDERLINE;
846: }
847: /* Else we replace prev_ch, but we keep its attributes. */
1.13 nicm 848: } else if (overstrike < 0) {
1.28 schwarze 849: if (wcwidth(ch) == 0) {
1.10 shadchin 850: /* Continuation of the same overstrike. */
1.20 natano 851: if (curr > 0)
852: a = attr[curr - 1] & (AT_UNDERLINE | AT_BOLD);
853: else
854: a = AT_NORMAL;
855: } else
1.10 shadchin 856: overstrike = 0;
857: }
858:
1.13 nicm 859: if (ch == '\t') {
1.5 millert 860: /*
861: * Expand a tab into spaces.
862: */
1.13 nicm 863: switch (bs_mode) {
1.1 etheisen 864: case BS_CONTROL:
865: goto do_control_char;
1.5 millert 866: case BS_NORMAL:
1.1 etheisen 867: case BS_SPECIAL:
1.26 schwarze 868: if (store_tab(a, pos))
869: return (1);
1.1 etheisen 870: break;
871: }
1.28 schwarze 872: } else if ((!utf_mode || is_ascii_char(ch)) &&
873: !isprint((unsigned char)ch)) {
1.13 nicm 874: do_control_char:
875: if (ctldisp == OPT_ON ||
1.24 schwarze 876: (ctldisp == OPT_ONPLUS && ch == ESC)) {
1.1 etheisen 877: /*
878: * Output as a normal character.
879: */
1.26 schwarze 880: if (store_char(ch, AT_NORMAL, rep, pos))
881: return (1);
1.13 nicm 882: } else {
1.26 schwarze 883: if (store_prchar(ch, pos))
884: return (1);
1.10 shadchin 885: }
1.28 schwarze 886: } else if (utf_mode && ctldisp != OPT_ON && !iswprint(ch)) {
1.10 shadchin 887: char *s;
888:
889: s = prutfchar(ch);
1.1 etheisen 890:
1.13 nicm 891: if (column + (int)strlen(s) - 1 +
1.10 shadchin 892: pwidth(' ', binattr, 0) + attr_ewidth(binattr) > sc_width)
893: return (1);
1.1 etheisen 894:
1.26 schwarze 895: for (; *s != 0; s++) {
896: if (store_char(*s, AT_BINARY, NULL, pos))
897: return (1);
898: }
1.13 nicm 899: } else {
1.26 schwarze 900: if (store_char(ch, a, rep, pos))
901: return (1);
1.1 etheisen 902: }
1.13 nicm 903: return (0);
1.10 shadchin 904: }
905:
906: /*
907: *
908: */
1.13 nicm 909: int
910: pflushmbc(void)
1.10 shadchin 911: {
912: int r = 0;
1.1 etheisen 913:
1.29 schwarze 914: if (mbc_buf_index > 0) {
1.10 shadchin 915: /* Flush incomplete (truncated) sequence. */
916: r = flush_mbc_buf(mbc_pos);
1.29 schwarze 917: mbc_buf_index = 0;
1.10 shadchin 918: }
1.13 nicm 919: return (r);
1.1 etheisen 920: }
921:
922: /*
923: * Terminate the line in the line buffer.
924: */
1.13 nicm 925: void
926: pdone(int endline, int forw)
1.1 etheisen 927: {
1.20 natano 928: int i;
929:
1.10 shadchin 930: (void) pflushmbc();
931:
1.1 etheisen 932: if (pendc && (pendc != '\r' || !endline))
933: /*
934: * If we had a pending character, put it in the buffer.
935: * But discard a pending CR if we are at end of line
936: * (that is, discard the CR in a CR/LF sequence).
937: */
1.10 shadchin 938: (void) do_append(pendc, NULL, pendpos);
1.20 natano 939:
940: for (i = curr - 1; i >= 0; i--) {
941: if (attr[i] & AT_INDET) {
942: attr[i] &= ~AT_INDET;
943: if (i < curr - 1 && attr[i + 1] & AT_BOLD)
944: attr[i] |= AT_BOLD;
945: else
946: attr[i] |= AT_UNDERLINE;
947: }
948: }
1.1 etheisen 949:
950: /*
1.5 millert 951: * Make sure we've shifted the line, if we need to.
952: */
953: if (cshift < hshift)
954: pshift(hshift - cshift);
955:
1.13 nicm 956: if (ctldisp == OPT_ONPLUS && is_ansi_end('m')) {
1.10 shadchin 957: /* Switch to normal attribute at end of line. */
958: char *p = "\033[m";
1.13 nicm 959: for (; *p != '\0'; p++) {
1.10 shadchin 960: linebuf[curr] = *p;
961: attr[curr++] = AT_ANSI;
962: }
963: }
964:
1.5 millert 965: /*
1.1 etheisen 966: * Add a newline if necessary,
967: * and append a '\0' to the end of the line.
1.10 shadchin 968: * We output a newline if we're not at the right edge of the screen,
969: * or if the terminal doesn't auto wrap,
970: * or if this is really the end of the line AND the terminal ignores
971: * a newline at the right edge.
1.13 nicm 972: * (In the last case we don't want to output a newline if the terminal
1.10 shadchin 973: * doesn't ignore it since that would produce an extra blank line.
974: * But we do want to output a newline if the terminal ignores it in case
975: * the next line is blank. In that case the single newline output for
976: * that blank line would be ignored!)
1.1 etheisen 977: */
1.13 nicm 978: if (column < sc_width || !auto_wrap || (endline && ignaw) ||
979: ctldisp == OPT_ON) {
1.1 etheisen 980: linebuf[curr] = '\n';
981: attr[curr] = AT_NORMAL;
982: curr++;
1.13 nicm 983: } else if (ignaw && column >= sc_width && forw) {
1.10 shadchin 984: /*
985: * Terminals with "ignaw" don't wrap until they *really* need
986: * to, i.e. when the character *after* the last one to fit on a
987: * line is output. But they are too hard to deal with when they
988: * get in the state where a full screen width of characters
989: * have been output but the cursor is sitting on the right edge
990: * instead of at the start of the next line.
1.13 nicm 991: * So we nudge them into wrapping by outputting a space
992: * character plus a backspace. But do this only if moving
1.10 shadchin 993: * forward; if we're moving backward and drawing this line at
994: * the top of the screen, the space would overwrite the first
1.13 nicm 995: * char on the next line. We don't need to do this "nudge"
1.10 shadchin 996: * at the top of the screen anyway.
997: */
998: linebuf[curr] = ' ';
999: attr[curr++] = AT_NORMAL;
1.13 nicm 1000: linebuf[curr] = '\b';
1.10 shadchin 1001: attr[curr++] = AT_NORMAL;
1.1 etheisen 1002: }
1003: linebuf[curr] = '\0';
1004: attr[curr] = AT_NORMAL;
1.10 shadchin 1005: }
1.5 millert 1006:
1.10 shadchin 1007: /*
1008: *
1009: */
1.13 nicm 1010: void
1011: set_status_col(char c)
1.10 shadchin 1012: {
1013: linebuf[0] = c;
1014: attr[0] = AT_NORMAL|AT_HILITE;
1.1 etheisen 1015: }
1016:
1017: /*
1018: * Get a character from the current line.
1019: * Return the character as the function return value,
1020: * and the character attribute in *ap.
1021: */
1.13 nicm 1022: int
1023: gline(int i, int *ap)
1.1 etheisen 1024: {
1.13 nicm 1025: if (is_null_line) {
1.1 etheisen 1026: /*
1027: * If there is no current line, we pretend the line is
1028: * either "~" or "", depending on the "twiddle" flag.
1029: */
1.13 nicm 1030: if (twiddle) {
1031: if (i == 0) {
1.10 shadchin 1032: *ap = AT_BOLD;
1.13 nicm 1033: return ('~');
1.10 shadchin 1034: }
1035: --i;
1036: }
1037: /* Make sure we're back to AT_NORMAL before the '\n'. */
1038: *ap = AT_NORMAL;
1.13 nicm 1039: return (i ? '\0' : '\n');
1.1 etheisen 1040: }
1041:
1042: *ap = attr[i];
1.10 shadchin 1043: return (linebuf[i] & 0xFF);
1.1 etheisen 1044: }
1045:
1046: /*
1047: * Indicate that there is no current line.
1048: */
1.13 nicm 1049: void
1050: null_line(void)
1.1 etheisen 1051: {
1052: is_null_line = 1;
1.5 millert 1053: cshift = 0;
1.1 etheisen 1054: }
1055:
1056: /*
1057: * Analogous to forw_line(), but deals with "raw lines":
1058: * lines which are not split for screen width.
1059: * {{ This is supposed to be more efficient than forw_line(). }}
1060: */
1.13 nicm 1061: off_t
1062: forw_raw_line(off_t curr_pos, char **linep, int *line_lenp)
1063: {
1064: int n;
1065: int c;
1066: off_t new_pos;
1067:
1068: if (curr_pos == -1 || ch_seek(curr_pos) ||
1069: (c = ch_forw_get()) == EOI)
1070: return (-1);
1.1 etheisen 1071:
1.5 millert 1072: n = 0;
1.13 nicm 1073: for (;;) {
1074: if (c == '\n' || c == EOI || ABORT_SIGS()) {
1.1 etheisen 1075: new_pos = ch_tell();
1076: break;
1077: }
1.13 nicm 1078: if (n >= size_linebuf-1) {
1079: if (expand_linebuf()) {
1.5 millert 1080: /*
1081: * Overflowed the input buffer.
1082: * Pretend the line ended here.
1083: */
1084: new_pos = ch_tell() - 1;
1085: break;
1086: }
1.1 etheisen 1087: }
1.13 nicm 1088: linebuf[n++] = (char)c;
1.1 etheisen 1089: c = ch_forw_get();
1090: }
1.5 millert 1091: linebuf[n] = '\0';
1.1 etheisen 1092: if (linep != NULL)
1093: *linep = linebuf;
1.10 shadchin 1094: if (line_lenp != NULL)
1095: *line_lenp = n;
1.1 etheisen 1096: return (new_pos);
1097: }
1098:
1099: /*
1100: * Analogous to back_line(), but deals with "raw lines".
1101: * {{ This is supposed to be more efficient than back_line(). }}
1102: */
1.13 nicm 1103: off_t
1104: back_raw_line(off_t curr_pos, char **linep, int *line_lenp)
1105: {
1106: int n;
1107: int c;
1108: off_t new_pos;
1109:
1110: if (curr_pos == -1 || curr_pos <= ch_zero() || ch_seek(curr_pos - 1))
1111: return (-1);
1.1 etheisen 1112:
1.5 millert 1113: n = size_linebuf;
1114: linebuf[--n] = '\0';
1.13 nicm 1115: for (;;) {
1.1 etheisen 1116: c = ch_back_get();
1.13 nicm 1117: if (c == '\n' || ABORT_SIGS()) {
1.1 etheisen 1118: /*
1119: * This is the newline ending the previous line.
1120: * We have hit the beginning of the line.
1121: */
1122: new_pos = ch_tell() + 1;
1123: break;
1124: }
1.13 nicm 1125: if (c == EOI) {
1.1 etheisen 1126: /*
1127: * We have hit the beginning of the file.
1128: * This must be the first line in the file.
1129: * This must, of course, be the beginning of the line.
1130: */
1131: new_pos = ch_zero();
1132: break;
1133: }
1.13 nicm 1134: if (n <= 0) {
1.5 millert 1135: int old_size_linebuf = size_linebuf;
1.13 nicm 1136: if (expand_linebuf()) {
1.5 millert 1137: /*
1138: * Overflowed the input buffer.
1139: * Pretend the line ended here.
1140: */
1141: new_pos = ch_tell() + 1;
1142: break;
1143: }
1.1 etheisen 1144: /*
1.5 millert 1145: * Shift the data to the end of the new linebuf.
1.1 etheisen 1146: */
1.5 millert 1147: n = size_linebuf - old_size_linebuf;
1.8 millert 1148: memmove(linebuf + n, linebuf, old_size_linebuf);
1.1 etheisen 1149: }
1.5 millert 1150: linebuf[--n] = c;
1.1 etheisen 1151: }
1152: if (linep != NULL)
1.5 millert 1153: *linep = &linebuf[n];
1.10 shadchin 1154: if (line_lenp != NULL)
1155: *line_lenp = size_linebuf - 1 - n;
1.1 etheisen 1156: return (new_pos);
1157: }