Annotation of src/usr.bin/less/line.c, Revision 1.28
1.1 etheisen 1: /*
1.12 shadchin 2: * Copyright (C) 1984-2012 Mark Nudelman
1.14 nicm 3: * Modified for use with illumos by Garrett D'Amore.
4: * Copyright 2014 Garrett D'Amore <garrett@damore.org>
1.1 etheisen 5: *
1.5 millert 6: * You may distribute under the terms of either the GNU General Public
7: * License or the Less License, as specified in the README file.
1.1 etheisen 8: *
1.12 shadchin 9: * For more information, see the README file.
1.13 nicm 10: */
1.1 etheisen 11:
12: /*
13: * Routines to manipulate the "line buffer".
14: * The line buffer holds a line of output as it is being built
15: * in preparation for output to the screen.
16: */
17:
1.23 schwarze 18: #include <wchar.h>
1.28 ! schwarze 19: #include <wctype.h>
1.23 schwarze 20:
1.19 mmcc 21: #include "charset.h"
1.1 etheisen 22: #include "less.h"
23:
1.10 shadchin 24: static char *linebuf = NULL; /* Buffer which holds the current output line */
1.5 millert 25: static char *attr = NULL; /* Extension of linebuf to hold attributes */
1.13 nicm 26: int size_linebuf = 0; /* Size of line buffer (and attr buffer) */
1.5 millert 27:
1.10 shadchin 28: static int cshift; /* Current left-shift of output line buffer */
1.13 nicm 29: int hshift; /* Desired left-shift of output line buffer */
30: int tabstops[TABSTOP_MAX] = { 0 }; /* Custom tabstops */
31: int ntabstops = 1; /* Number of tabstops */
32: int tabdefault = 8; /* Default repeated tabstops */
33: off_t highest_hilite; /* Pos of last hilite in file found so far */
1.1 etheisen 34:
35: static int curr; /* Index into linebuf */
1.13 nicm 36: static int column; /* Printable length, accounting for backspaces, etc. */
1.1 etheisen 37: static int overstrike; /* Next char should overstrike previous char */
38: static int is_null_line; /* There is no current line */
1.5 millert 39: static int lmargin; /* Left margin */
1.1 etheisen 40: static char pendc;
1.13 nicm 41: static off_t pendpos;
1.5 millert 42: static char *end_ansi_chars;
1.10 shadchin 43: static char *mid_ansi_chars;
1.1 etheisen 44:
1.13 nicm 45: static int attr_swidth(int);
46: static int attr_ewidth(int);
47: static int do_append(LWCHAR, char *, off_t);
1.1 etheisen 48:
1.11 millert 49: extern volatile sig_atomic_t sigs;
1.1 etheisen 50: extern int bs_mode;
51: extern int linenums;
52: extern int ctldisp;
53: extern int twiddle;
54: extern int binattr;
1.5 millert 55: extern int status_col;
1.1 etheisen 56: extern int auto_wrap, ignaw;
57: extern int bo_s_width, bo_e_width;
58: extern int ul_s_width, ul_e_width;
59: extern int bl_s_width, bl_e_width;
60: extern int so_s_width, so_e_width;
61: extern int sc_width, sc_height;
1.5 millert 62: extern int utf_mode;
1.13 nicm 63: extern off_t start_attnpos;
64: extern off_t end_attnpos;
1.5 millert 65:
1.10 shadchin 66: static char mbc_buf[MAX_UTF_CHAR_LEN];
67: static int mbc_buf_len = 0;
68: static int mbc_buf_index = 0;
1.13 nicm 69: static off_t mbc_pos;
1.10 shadchin 70:
1.5 millert 71: /*
72: * Initialize from environment variables.
73: */
1.13 nicm 74: void
75: init_line(void)
1.5 millert 76: {
77: end_ansi_chars = lgetenv("LESSANSIENDCHARS");
78: if (end_ansi_chars == NULL || *end_ansi_chars == '\0')
79: end_ansi_chars = "m";
1.10 shadchin 80:
81: mid_ansi_chars = lgetenv("LESSANSIMIDCHARS");
82: if (mid_ansi_chars == NULL || *mid_ansi_chars == '\0')
83: mid_ansi_chars = "0123456789;[?!\"'#%()*+ ";
84:
1.13 nicm 85: linebuf = ecalloc(LINEBUF_SIZE, sizeof (char));
86: attr = ecalloc(LINEBUF_SIZE, sizeof (char));
1.5 millert 87: size_linebuf = LINEBUF_SIZE;
88: }
89:
90: /*
91: * Expand the line buffer.
92: */
1.13 nicm 93: static int
94: expand_linebuf(void)
1.5 millert 95: {
1.10 shadchin 96: /* Double the size of the line buffer. */
1.7 millert 97: int new_size = size_linebuf * 2;
1.10 shadchin 98:
99: /* Just realloc to expand the buffer, if we can. */
1.22 deraadt 100: char *new_buf = recallocarray(linebuf, size_linebuf, new_size, 1);
101: char *new_attr = recallocarray(attr, size_linebuf, new_size, 1);
1.13 nicm 102: if (new_buf == NULL || new_attr == NULL) {
1.15 mmcc 103: free(new_attr);
104: free(new_buf);
1.13 nicm 105: return (1);
1.5 millert 106: }
107: linebuf = new_buf;
108: attr = new_attr;
109: size_linebuf = new_size;
1.13 nicm 110: return (0);
1.5 millert 111: }
1.1 etheisen 112:
113: /*
1.10 shadchin 114: * Is a character ASCII?
115: */
1.25 schwarze 116: static int
1.13 nicm 117: is_ascii_char(LWCHAR ch)
1.10 shadchin 118: {
119: return (ch <= 0x7F);
120: }
121:
122: /*
1.1 etheisen 123: * Rewind the line buffer.
124: */
1.13 nicm 125: void
126: prewind(void)
1.1 etheisen 127: {
128: curr = 0;
129: column = 0;
1.10 shadchin 130: cshift = 0;
1.1 etheisen 131: overstrike = 0;
1.10 shadchin 132: mbc_buf_len = 0;
1.1 etheisen 133: is_null_line = 0;
134: pendc = '\0';
1.5 millert 135: lmargin = 0;
136: if (status_col)
137: lmargin += 1;
1.1 etheisen 138: }
139:
140: /*
141: * Insert the line number (of the given position) into the line buffer.
142: */
1.13 nicm 143: void
144: plinenum(off_t pos)
1.1 etheisen 145: {
1.16 mmcc 146: off_t linenum = 0;
1.13 nicm 147: int i;
1.5 millert 148:
1.13 nicm 149: if (linenums == OPT_ONPLUS) {
1.5 millert 150: /*
151: * Get the line number and put it in the current line.
152: * {{ Note: since find_linenum calls forw_raw_line,
1.13 nicm 153: * it may seek in the input file, requiring the caller
1.5 millert 154: * of plinenum to re-seek if necessary. }}
155: * {{ Since forw_raw_line modifies linebuf, we must
156: * do this first, before storing anything in linebuf. }}
157: */
158: linenum = find_linenum(pos);
159: }
1.1 etheisen 160:
161: /*
1.5 millert 162: * Display a status column if the -J option is set.
1.1 etheisen 163: */
1.13 nicm 164: if (status_col) {
1.5 millert 165: linebuf[curr] = ' ';
1.13 nicm 166: if (start_attnpos != -1 &&
1.5 millert 167: pos >= start_attnpos && pos < end_attnpos)
1.10 shadchin 168: attr[curr] = AT_NORMAL|AT_HILITE;
1.5 millert 169: else
1.10 shadchin 170: attr[curr] = AT_NORMAL;
1.5 millert 171: curr++;
172: column++;
173: }
1.1 etheisen 174: /*
1.5 millert 175: * Display the line number at the start of each line
176: * if the -N option is set.
1.1 etheisen 177: */
1.13 nicm 178: if (linenums == OPT_ONPLUS) {
1.18 mmcc 179: char buf[23];
1.5 millert 180: int n;
1.1 etheisen 181:
1.17 mmcc 182: postoa(linenum, buf, sizeof(buf));
1.5 millert 183: n = strlen(buf);
184: if (n < MIN_LINENUM_WIDTH)
185: n = MIN_LINENUM_WIDTH;
1.6 millert 186: snprintf(linebuf+curr, size_linebuf-curr, "%*s ", n, buf);
1.21 deraadt 187: n++; /* One space after the line number. */
1.5 millert 188: for (i = 0; i < n; i++)
189: attr[curr+i] = AT_NORMAL;
190: curr += n;
191: column += n;
192: lmargin += n;
193: }
1.1 etheisen 194:
195: /*
1.5 millert 196: * Append enough spaces to bring us to the lmargin.
1.1 etheisen 197: */
1.13 nicm 198: while (column < lmargin) {
1.1 etheisen 199: linebuf[curr] = ' ';
200: attr[curr++] = AT_NORMAL;
201: column++;
1.5 millert 202: }
203: }
204:
205: /*
1.10 shadchin 206: * Shift the input line left.
207: * This means discarding N printable chars at the start of the buffer.
1.5 millert 208: */
1.13 nicm 209: static void
210: pshift(int shift)
1.10 shadchin 211: {
212: LWCHAR prev_ch = 0;
213: unsigned char c;
214: int shifted = 0;
215: int to;
216: int from;
1.5 millert 217: int len;
1.10 shadchin 218: int width;
219: int prev_attr;
220: int next_attr;
221:
222: if (shift > column - lmargin)
223: shift = column - lmargin;
224: if (shift > curr - lmargin)
225: shift = curr - lmargin;
1.5 millert 226:
1.10 shadchin 227: to = from = lmargin;
1.5 millert 228: /*
1.10 shadchin 229: * We keep on going when shifted == shift
230: * to get all combining chars.
1.5 millert 231: */
1.13 nicm 232: while (shifted <= shift && from < curr) {
1.10 shadchin 233: c = linebuf[from];
1.24 schwarze 234: if (ctldisp == OPT_ONPLUS && c == ESC) {
1.10 shadchin 235: /* Keep cumulative effect. */
236: linebuf[to] = c;
237: attr[to++] = attr[from++];
1.13 nicm 238: while (from < curr && linebuf[from]) {
1.10 shadchin 239: linebuf[to] = linebuf[from];
240: attr[to++] = attr[from];
241: if (!is_ansi_middle(linebuf[from++]))
242: break;
1.13 nicm 243: }
1.10 shadchin 244: continue;
245: }
246:
247: width = 0;
248:
1.13 nicm 249: if (!IS_ASCII_OCTET(c) && utf_mode) {
1.10 shadchin 250: /* Assumes well-formedness validation already done. */
251: LWCHAR ch;
252:
253: len = utf_len(c);
254: if (from + len > curr)
255: break;
256: ch = get_wchar(linebuf + from);
1.13 nicm 257: if (!is_composing_char(ch) &&
258: !is_combining_char(prev_ch, ch))
1.10 shadchin 259: width = is_wide_char(ch) ? 2 : 1;
260: prev_ch = ch;
1.13 nicm 261: } else {
1.10 shadchin 262: len = 1;
263: if (c == '\b')
264: /* XXX - Incorrect if several '\b' in a row. */
1.13 nicm 265: width = (utf_mode && is_wide_char(prev_ch)) ?
266: -2 : -1;
1.10 shadchin 267: else if (!control_char(c))
268: width = 1;
269: prev_ch = 0;
270: }
271:
272: if (width == 2 && shift - shifted == 1) {
273: /* Should never happen when called by pshift_all(). */
274: attr[to] = attr[from];
275: /*
276: * Assume a wide_char will never be the first half of a
277: * combining_char pair, so reset prev_ch in case we're
278: * followed by a '\b'.
279: */
280: prev_ch = linebuf[to++] = ' ';
281: from += len;
282: shifted++;
283: continue;
284: }
285:
286: /* Adjust width for magic cookies. */
287: prev_attr = (to > 0) ? attr[to-1] : AT_NORMAL;
288: next_attr = (from + len < curr) ? attr[from + len] : prev_attr;
1.13 nicm 289: if (!is_at_equiv(attr[from], prev_attr) &&
290: !is_at_equiv(attr[from], next_attr)) {
1.10 shadchin 291: width += attr_swidth(attr[from]);
292: if (from + len < curr)
293: width += attr_ewidth(attr[from]);
1.13 nicm 294: if (is_at_equiv(prev_attr, next_attr)) {
1.10 shadchin 295: width += attr_ewidth(prev_attr);
296: if (from + len < curr)
297: width += attr_swidth(next_attr);
1.5 millert 298: }
299: }
300:
1.10 shadchin 301: if (shift - shifted < width)
302: break;
303: from += len;
304: shifted += width;
305: if (shifted < 0)
306: shifted = 0;
307: }
1.13 nicm 308: while (from < curr) {
1.10 shadchin 309: linebuf[to] = linebuf[from];
310: attr[to++] = attr[from++];
311: }
312: curr = to;
313: column -= shifted;
314: cshift += shifted;
1.5 millert 315: }
316:
317: /*
1.10 shadchin 318: *
1.5 millert 319: */
1.13 nicm 320: void
321: pshift_all(void)
1.5 millert 322: {
1.10 shadchin 323: pshift(column);
1.1 etheisen 324: }
325:
326: /*
327: * Return the printing width of the start (enter) sequence
328: * for a given character attribute.
329: */
1.13 nicm 330: static int
331: attr_swidth(int a)
1.1 etheisen 332: {
1.10 shadchin 333: int w = 0;
334:
335: a = apply_at_specials(a);
336:
337: if (a & AT_UNDERLINE)
338: w += ul_s_width;
339: if (a & AT_BOLD)
340: w += bo_s_width;
341: if (a & AT_BLINK)
342: w += bl_s_width;
343: if (a & AT_STANDOUT)
344: w += so_s_width;
345:
1.13 nicm 346: return (w);
1.1 etheisen 347: }
348:
349: /*
350: * Return the printing width of the end (exit) sequence
351: * for a given character attribute.
352: */
1.13 nicm 353: static int
354: attr_ewidth(int a)
1.1 etheisen 355: {
1.10 shadchin 356: int w = 0;
357:
358: a = apply_at_specials(a);
359:
360: if (a & AT_UNDERLINE)
361: w += ul_e_width;
362: if (a & AT_BOLD)
363: w += bo_e_width;
364: if (a & AT_BLINK)
365: w += bl_e_width;
366: if (a & AT_STANDOUT)
367: w += so_e_width;
368:
1.13 nicm 369: return (w);
1.1 etheisen 370: }
371:
372: /*
373: * Return the printing width of a given character and attribute,
374: * if the character were added to the current position in the line buffer.
375: * Adding a character with a given attribute may cause an enter or exit
376: * attribute sequence to be inserted, so this must be taken into account.
377: */
1.13 nicm 378: static int
1.23 schwarze 379: pwidth(wchar_t ch, int a, wchar_t prev_ch)
1.1 etheisen 380: {
1.10 shadchin 381: int w;
1.1 etheisen 382:
1.23 schwarze 383: /*
384: * In case of a backspace, back up by the width of the previous
385: * character. If that is non-printable (for example another
386: * backspace) or zero width (for example a combining accent),
387: * the terminal may actually back up to a character even further
388: * back, but we no longer know how wide that may have been.
389: * The best guess possible at this point is that it was
390: * hopefully width one.
391: */
392: if (ch == L'\b') {
393: w = wcwidth(prev_ch);
394: if (w <= 0)
395: w = 1;
396: return (-w);
397: }
398:
399: w = wcwidth(ch);
400:
401: /*
402: * Non-printable characters can get here if the -r flag is in
403: * effect, and possibly in some other situations (XXX check that!).
404: * Treat them as zero width.
405: * That may not always match their actual behaviour,
406: * but there is no reasonable way to be more exact.
407: */
408: if (w == -1)
409: w = 0;
1.13 nicm 410:
1.23 schwarze 411: /*
412: * Combining accents take up no space.
413: * Some terminals, upon failure to compose them with the
414: * characters that precede them, will actually take up one column
415: * for the combining accent; there isn't much we could do short
416: * of testing the (complex) composition process ourselves and
417: * printing a binary representation when it fails.
418: */
419: if (w == 0)
420: return (0);
1.1 etheisen 421:
422: /*
1.10 shadchin 423: * Other characters take one or two columns,
1.1 etheisen 424: * plus the width of any attribute enter/exit sequence.
425: */
1.10 shadchin 426: if (curr > 0 && !is_at_equiv(attr[curr-1], a))
1.1 etheisen 427: w += attr_ewidth(attr[curr-1]);
1.10 shadchin 428: if ((apply_at_specials(a) != AT_NORMAL) &&
429: (curr == 0 || !is_at_equiv(attr[curr-1], a)))
1.1 etheisen 430: w += attr_swidth(a);
431: return (w);
432: }
433:
434: /*
1.10 shadchin 435: * Delete to the previous base character in the line buffer.
436: * Return 1 if one is found.
1.1 etheisen 437: */
1.13 nicm 438: static int
439: backc(void)
1.1 etheisen 440: {
1.27 schwarze 441: wchar_t ch, prev_ch;
442: int i, len, width;
443:
444: i = curr - 1;
445: if (utf_mode) {
446: while (i >= lmargin && IS_UTF8_TRAIL(linebuf[i]))
447: i--;
448: }
449: if (i < lmargin)
450: return (0);
451: if (utf_mode) {
452: len = mbtowc(&ch, linebuf + i, curr - i);
453: if (len == -1 || i + len < curr) {
454: (void)mbtowc(NULL, NULL, MB_CUR_MAX);
455: return (0);
456: }
457: } else
458: ch = linebuf[i];
1.10 shadchin 459:
460: /* This assumes that there is no '\b' in linebuf. */
1.13 nicm 461: while (curr > lmargin && column > lmargin &&
462: (!(attr[curr - 1] & (AT_ANSI|AT_BINARY)))) {
1.27 schwarze 463: curr = i--;
464: if (utf_mode) {
465: while (i >= lmargin && IS_UTF8_TRAIL(linebuf[i]))
466: i--;
467: }
468: if (i < lmargin)
469: prev_ch = L'\0';
470: else if (utf_mode) {
471: len = mbtowc(&prev_ch, linebuf + i, curr - i);
472: if (len == -1 || i + len < curr) {
473: (void)mbtowc(NULL, NULL, MB_CUR_MAX);
474: prev_ch = L'\0';
475: }
476: } else
477: prev_ch = linebuf[i];
1.10 shadchin 478: width = pwidth(ch, attr[curr], prev_ch);
479: column -= width;
480: if (width > 0)
1.13 nicm 481: return (1);
1.27 schwarze 482: if (prev_ch == L'\0')
483: return (0);
1.10 shadchin 484: ch = prev_ch;
485: }
1.13 nicm 486: return (0);
1.1 etheisen 487: }
488:
489: /*
1.25 schwarze 490: * Is a character the end of an ANSI escape sequence?
1.5 millert 491: */
1.13 nicm 492: static int
493: is_ansi_end(LWCHAR ch)
1.10 shadchin 494: {
495: if (!is_ascii_char(ch))
496: return (0);
1.13 nicm 497: return (strchr(end_ansi_chars, (char)ch) != NULL);
1.10 shadchin 498: }
499:
500: /*
501: *
502: */
1.13 nicm 503: int
504: is_ansi_middle(LWCHAR ch)
1.5 millert 505: {
1.10 shadchin 506: if (!is_ascii_char(ch))
507: return (0);
508: if (is_ansi_end(ch))
509: return (0);
1.13 nicm 510: return (strchr(mid_ansi_chars, (char)ch) != NULL);
1.5 millert 511: }
512:
513: /*
1.1 etheisen 514: * Append a character and attribute to the line buffer.
515: */
1.13 nicm 516: static int
517: store_char(LWCHAR ch, char a, char *rep, off_t pos)
1.1 etheisen 518: {
1.25 schwarze 519: int i;
1.10 shadchin 520: int w;
521: int replen;
522: char cs;
1.13 nicm 523: int matches;
1.10 shadchin 524:
1.13 nicm 525: if (is_hilited(pos, pos+1, 0, &matches)) {
526: /*
527: * This character should be highlighted.
528: * Override the attribute passed in.
529: */
530: if (a != AT_ANSI) {
531: if (highest_hilite != -1 && pos > highest_hilite)
532: highest_hilite = pos;
533: a |= AT_HILITE;
1.10 shadchin 534: }
1.5 millert 535: }
1.10 shadchin 536:
1.25 schwarze 537: w = -1;
538: if (ctldisp == OPT_ONPLUS) {
539: /*
540: * Set i to the beginning of an ANSI escape sequence
541: * that was begun and not yet ended, or to -1 otherwise.
542: */
543: for (i = curr - 1; i >= 0; i--) {
544: if (linebuf[i] == ESC)
545: break;
546: if (!is_ansi_middle(linebuf[i]))
547: i = 0;
548: }
549: if (i >= 0 && !is_ansi_end(ch) && !is_ansi_middle(ch)) {
1.10 shadchin 550: /* Remove whole unrecognized sequence. */
1.25 schwarze 551: curr = i;
1.13 nicm 552: return (0);
1.10 shadchin 553: }
1.25 schwarze 554: if (i >= 0 || ch == ESC) {
555: a = AT_ANSI; /* Will force re-AT_'ing around it. */
556: w = 0;
557: }
558: }
559: if (w == -1) {
560: wchar_t prev_ch;
561:
562: if (utf_mode) {
563: for (i = curr - 1; i >= 0; i--)
564: if (!IS_UTF8_TRAIL(linebuf[i]))
565: break;
566: if (i >= 0) {
567: w = mbtowc(&prev_ch, linebuf + i, curr - i);
1.27 schwarze 568: if (w == -1 || i + w < curr) {
569: (void)mbtowc(NULL, NULL, MB_CUR_MAX);
1.25 schwarze 570: prev_ch = L' ';
1.27 schwarze 571: }
1.25 schwarze 572: } else
573: prev_ch = L' ';
574: } else
575: prev_ch = curr > 0 ? linebuf[curr - 1] : L' ';
1.10 shadchin 576: w = pwidth(ch, a, prev_ch);
577: }
578:
1.5 millert 579: if (ctldisp != OPT_ON && column + w + attr_ewidth(a) > sc_width)
1.1 etheisen 580: /*
581: * Won't fit on screen.
582: */
583: return (1);
584:
1.13 nicm 585: if (rep == NULL) {
586: cs = (char)ch;
1.10 shadchin 587: rep = &cs;
588: replen = 1;
1.13 nicm 589: } else {
1.10 shadchin 590: replen = utf_len(rep[0]);
591: }
1.13 nicm 592: if (curr + replen >= size_linebuf-6) {
1.1 etheisen 593: /*
594: * Won't fit in line buffer.
1.5 millert 595: * Try to expand it.
1.1 etheisen 596: */
1.5 millert 597: if (expand_linebuf())
598: return (1);
599: }
1.1 etheisen 600:
1.13 nicm 601: while (replen-- > 0) {
1.10 shadchin 602: linebuf[curr] = *rep++;
603: attr[curr] = a;
604: curr++;
1.1 etheisen 605: }
606: column += w;
607: return (0);
608: }
609:
610: /*
1.5 millert 611: * Append a tab to the line buffer.
612: * Store spaces to represent the tab.
613: */
1.13 nicm 614: static int
615: store_tab(int attr, off_t pos)
1.5 millert 616: {
617: int to_tab = column + cshift - lmargin;
618: int i;
619:
620: if (ntabstops < 2 || to_tab >= tabstops[ntabstops-1])
621: to_tab = tabdefault -
1.13 nicm 622: ((to_tab - tabstops[ntabstops-1]) % tabdefault);
623: else {
1.21 deraadt 624: for (i = ntabstops - 2; i >= 0; i--)
1.5 millert 625: if (to_tab >= tabstops[i])
626: break;
627: to_tab = tabstops[i+1] - to_tab;
628: }
629:
1.13 nicm 630: if (column + to_tab - 1 + pwidth(' ', attr, 0) +
631: attr_ewidth(attr) > sc_width)
632: return (1);
1.10 shadchin 633:
1.5 millert 634: do {
1.26 schwarze 635: if (store_char(' ', attr, " ", pos))
636: return (1);
1.5 millert 637: } while (--to_tab > 0);
1.13 nicm 638: return (0);
1.5 millert 639: }
640:
1.13 nicm 641: static int
642: store_prchar(char c, off_t pos)
1.10 shadchin 643: {
644: char *s;
645:
646: /*
647: * Convert to printable representation.
648: */
649: s = prchar(c);
650:
651: /*
652: * Make sure we can get the entire representation
653: * of the character on this line.
654: */
1.13 nicm 655: if (column + (int)strlen(s) - 1 +
656: pwidth(' ', binattr, 0) + attr_ewidth(binattr) > sc_width)
657: return (1);
1.10 shadchin 658:
1.13 nicm 659: for (; *s != 0; s++) {
1.26 schwarze 660: if (store_char(*s, AT_BINARY, NULL, pos))
661: return (1);
1.13 nicm 662: }
663: return (0);
1.10 shadchin 664: }
665:
1.13 nicm 666: static int
667: flush_mbc_buf(off_t pos)
1.10 shadchin 668: {
669: int i;
670:
1.26 schwarze 671: for (i = 0; i < mbc_buf_index; i++) {
1.10 shadchin 672: if (store_prchar(mbc_buf[i], pos))
1.13 nicm 673: return (mbc_buf_index - i);
1.26 schwarze 674: }
1.13 nicm 675: return (0);
1.10 shadchin 676: }
677:
1.5 millert 678: /*
1.1 etheisen 679: * Append a character to the line buffer.
680: * Expand tabs into spaces, handle underlining, boldfacing, etc.
681: * Returns 0 if ok, 1 if couldn't fit in buffer.
682: */
1.13 nicm 683: int
684: pappend(char c, off_t pos)
1.1 etheisen 685: {
1.5 millert 686: int r;
687:
1.13 nicm 688: if (pendc) {
1.10 shadchin 689: if (do_append(pendc, NULL, pendpos))
1.1 etheisen 690: /*
691: * Oops. We've probably lost the char which
692: * was in pendc, since caller won't back up.
693: */
694: return (1);
695: pendc = '\0';
696: }
697:
1.13 nicm 698: if (c == '\r' && bs_mode == BS_SPECIAL) {
699: if (mbc_buf_len > 0) /* utf_mode must be on. */ {
1.10 shadchin 700: /* Flush incomplete (truncated) sequence. */
701: r = flush_mbc_buf(mbc_pos);
702: mbc_buf_index = r + 1;
703: mbc_buf_len = 0;
704: if (r)
705: return (mbc_buf_index);
706: }
707:
1.1 etheisen 708: /*
1.13 nicm 709: * Don't put the CR into the buffer until we see
1.1 etheisen 710: * the next char. If the next char is a newline,
711: * discard the CR.
712: */
713: pendc = c;
714: pendpos = pos;
715: return (0);
716: }
717:
1.13 nicm 718: if (!utf_mode) {
1.10 shadchin 719: r = do_append((LWCHAR) c, NULL, pos);
1.13 nicm 720: } else {
1.10 shadchin 721: /* Perform strict validation in all possible cases. */
1.13 nicm 722: if (mbc_buf_len == 0) {
723: retry:
1.10 shadchin 724: mbc_buf_index = 1;
725: *mbc_buf = c;
1.13 nicm 726: if (IS_ASCII_OCTET(c)) {
1.10 shadchin 727: r = do_append((LWCHAR) c, NULL, pos);
1.13 nicm 728: } else if (IS_UTF8_LEAD(c)) {
1.10 shadchin 729: mbc_buf_len = utf_len(c);
730: mbc_pos = pos;
731: return (0);
1.13 nicm 732: } else {
1.10 shadchin 733: /* UTF8_INVALID or stray UTF8_TRAIL */
734: r = flush_mbc_buf(pos);
1.13 nicm 735: }
736: } else if (IS_UTF8_TRAIL(c)) {
1.10 shadchin 737: mbc_buf[mbc_buf_index++] = c;
738: if (mbc_buf_index < mbc_buf_len)
739: return (0);
740: if (is_utf8_well_formed(mbc_buf))
1.13 nicm 741: r = do_append(get_wchar(mbc_buf), mbc_buf,
742: mbc_pos);
1.10 shadchin 743: else
744: /* Complete, but not shortest form, sequence. */
745: mbc_buf_index = r = flush_mbc_buf(mbc_pos);
746: mbc_buf_len = 0;
1.13 nicm 747: } else {
1.10 shadchin 748: /* Flush incomplete (truncated) sequence. */
749: r = flush_mbc_buf(mbc_pos);
750: mbc_buf_index = r + 1;
751: mbc_buf_len = 0;
752: /* Handle new char. */
753: if (!r)
754: goto retry;
1.13 nicm 755: }
1.10 shadchin 756: }
757:
1.5 millert 758: /*
759: * If we need to shift the line, do it.
760: * But wait until we get to at least the middle of the screen,
761: * so shifting it doesn't affect the chars we're currently
762: * pappending. (Bold & underline can get messed up otherwise.)
763: */
1.13 nicm 764: if (cshift < hshift && column > sc_width / 2) {
1.5 millert 765: linebuf[curr] = '\0';
766: pshift(hshift - cshift);
767: }
1.13 nicm 768: if (r) {
1.10 shadchin 769: /* How many chars should caller back up? */
770: r = (!utf_mode) ? 1 : mbc_buf_index;
771: }
1.5 millert 772: return (r);
1.1 etheisen 773: }
774:
1.13 nicm 775: static int
776: do_append(LWCHAR ch, char *rep, off_t pos)
1.1 etheisen 777: {
1.28 ! schwarze 778: wchar_t prev_ch;
1.13 nicm 779: int a;
1.1 etheisen 780:
1.10 shadchin 781: a = AT_NORMAL;
1.1 etheisen 782:
1.13 nicm 783: if (ch == '\b') {
1.10 shadchin 784: if (bs_mode == BS_CONTROL)
1.5 millert 785: goto do_control_char;
1.10 shadchin 786:
787: /*
788: * A better test is needed here so we don't
789: * backspace over part of the printed
790: * representation of a binary character.
791: */
1.13 nicm 792: if (curr <= lmargin ||
793: column <= lmargin ||
794: (attr[curr - 1] & (AT_ANSI|AT_BINARY))) {
1.26 schwarze 795: if (store_prchar('\b', pos))
796: return (1);
1.13 nicm 797: } else if (bs_mode == BS_NORMAL) {
1.26 schwarze 798: if (store_char(ch, AT_NORMAL, NULL, pos))
799: return (1);
1.13 nicm 800: } else if (bs_mode == BS_SPECIAL) {
1.10 shadchin 801: overstrike = backc();
1.13 nicm 802: }
1.10 shadchin 803:
1.13 nicm 804: return (0);
1.10 shadchin 805: }
806:
1.13 nicm 807: if (overstrike > 0) {
1.1 etheisen 808: /*
809: * Overstrike the character at the current position
1.13 nicm 810: * in the line buffer. This will cause either
811: * underline (if a "_" is overstruck),
1.1 etheisen 812: * bold (if an identical character is overstruck),
813: * or just deletion of the character in the buffer.
814: */
1.10 shadchin 815: overstrike = utf_mode ? -1 : 0;
816: /* To be correct, this must be a base character. */
1.28 ! schwarze 817: if (mbtowc(&prev_ch, linebuf + curr, MB_CUR_MAX) == -1) {
! 818: (void)mbtowc(NULL, NULL, MB_CUR_MAX);
! 819: prev_ch = L'\0';
! 820: }
1.10 shadchin 821: a = attr[curr];
1.13 nicm 822: if (ch == prev_ch) {
1.5 millert 823: /*
824: * Overstriking a char with itself means make it bold.
825: * But overstriking an underscore with itself is
826: * ambiguous. It could mean make it bold, or
827: * it could mean make it underlined.
828: * Use the previous overstrike to resolve it.
829: */
1.13 nicm 830: if (ch == '_') {
1.10 shadchin 831: if ((a & (AT_BOLD|AT_UNDERLINE)) != AT_NORMAL)
832: a |= (AT_BOLD|AT_UNDERLINE);
1.20 natano 833: else if (curr > 0 && attr[curr - 1] & AT_UNDERLINE)
834: a |= AT_UNDERLINE;
835: else if (curr > 0 && attr[curr - 1] & AT_BOLD)
836: a |= AT_BOLD;
1.10 shadchin 837: else
1.20 natano 838: a |= AT_INDET;
1.13 nicm 839: } else {
1.10 shadchin 840: a |= AT_BOLD;
1.13 nicm 841: }
1.28 ! schwarze 842: } else if (ch == '_' && prev_ch != L'\0') {
1.10 shadchin 843: a |= AT_UNDERLINE;
844: ch = prev_ch;
845: rep = linebuf + curr;
1.13 nicm 846: } else if (prev_ch == '_') {
1.10 shadchin 847: a |= AT_UNDERLINE;
848: }
849: /* Else we replace prev_ch, but we keep its attributes. */
1.13 nicm 850: } else if (overstrike < 0) {
1.28 ! schwarze 851: if (wcwidth(ch) == 0) {
1.10 shadchin 852: /* Continuation of the same overstrike. */
1.20 natano 853: if (curr > 0)
854: a = attr[curr - 1] & (AT_UNDERLINE | AT_BOLD);
855: else
856: a = AT_NORMAL;
857: } else
1.10 shadchin 858: overstrike = 0;
859: }
860:
1.13 nicm 861: if (ch == '\t') {
1.5 millert 862: /*
863: * Expand a tab into spaces.
864: */
1.13 nicm 865: switch (bs_mode) {
1.1 etheisen 866: case BS_CONTROL:
867: goto do_control_char;
1.5 millert 868: case BS_NORMAL:
1.1 etheisen 869: case BS_SPECIAL:
1.26 schwarze 870: if (store_tab(a, pos))
871: return (1);
1.1 etheisen 872: break;
873: }
1.28 ! schwarze 874: } else if ((!utf_mode || is_ascii_char(ch)) &&
! 875: !isprint((unsigned char)ch)) {
1.13 nicm 876: do_control_char:
877: if (ctldisp == OPT_ON ||
1.24 schwarze 878: (ctldisp == OPT_ONPLUS && ch == ESC)) {
1.1 etheisen 879: /*
880: * Output as a normal character.
881: */
1.26 schwarze 882: if (store_char(ch, AT_NORMAL, rep, pos))
883: return (1);
1.13 nicm 884: } else {
1.26 schwarze 885: if (store_prchar(ch, pos))
886: return (1);
1.10 shadchin 887: }
1.28 ! schwarze 888: } else if (utf_mode && ctldisp != OPT_ON && !iswprint(ch)) {
1.10 shadchin 889: char *s;
890:
891: s = prutfchar(ch);
1.1 etheisen 892:
1.13 nicm 893: if (column + (int)strlen(s) - 1 +
1.10 shadchin 894: pwidth(' ', binattr, 0) + attr_ewidth(binattr) > sc_width)
895: return (1);
1.1 etheisen 896:
1.26 schwarze 897: for (; *s != 0; s++) {
898: if (store_char(*s, AT_BINARY, NULL, pos))
899: return (1);
900: }
1.13 nicm 901: } else {
1.26 schwarze 902: if (store_char(ch, a, rep, pos))
903: return (1);
1.1 etheisen 904: }
1.13 nicm 905: return (0);
1.10 shadchin 906: }
907:
908: /*
909: *
910: */
1.13 nicm 911: int
912: pflushmbc(void)
1.10 shadchin 913: {
914: int r = 0;
1.1 etheisen 915:
1.13 nicm 916: if (mbc_buf_len > 0) {
1.10 shadchin 917: /* Flush incomplete (truncated) sequence. */
918: r = flush_mbc_buf(mbc_pos);
919: mbc_buf_len = 0;
920: }
1.13 nicm 921: return (r);
1.1 etheisen 922: }
923:
924: /*
925: * Terminate the line in the line buffer.
926: */
1.13 nicm 927: void
928: pdone(int endline, int forw)
1.1 etheisen 929: {
1.20 natano 930: int i;
931:
1.10 shadchin 932: (void) pflushmbc();
933:
1.1 etheisen 934: if (pendc && (pendc != '\r' || !endline))
935: /*
936: * If we had a pending character, put it in the buffer.
937: * But discard a pending CR if we are at end of line
938: * (that is, discard the CR in a CR/LF sequence).
939: */
1.10 shadchin 940: (void) do_append(pendc, NULL, pendpos);
1.20 natano 941:
942: for (i = curr - 1; i >= 0; i--) {
943: if (attr[i] & AT_INDET) {
944: attr[i] &= ~AT_INDET;
945: if (i < curr - 1 && attr[i + 1] & AT_BOLD)
946: attr[i] |= AT_BOLD;
947: else
948: attr[i] |= AT_UNDERLINE;
949: }
950: }
1.1 etheisen 951:
952: /*
1.5 millert 953: * Make sure we've shifted the line, if we need to.
954: */
955: if (cshift < hshift)
956: pshift(hshift - cshift);
957:
1.13 nicm 958: if (ctldisp == OPT_ONPLUS && is_ansi_end('m')) {
1.10 shadchin 959: /* Switch to normal attribute at end of line. */
960: char *p = "\033[m";
1.13 nicm 961: for (; *p != '\0'; p++) {
1.10 shadchin 962: linebuf[curr] = *p;
963: attr[curr++] = AT_ANSI;
964: }
965: }
966:
1.5 millert 967: /*
1.1 etheisen 968: * Add a newline if necessary,
969: * and append a '\0' to the end of the line.
1.10 shadchin 970: * We output a newline if we're not at the right edge of the screen,
971: * or if the terminal doesn't auto wrap,
972: * or if this is really the end of the line AND the terminal ignores
973: * a newline at the right edge.
1.13 nicm 974: * (In the last case we don't want to output a newline if the terminal
1.10 shadchin 975: * doesn't ignore it since that would produce an extra blank line.
976: * But we do want to output a newline if the terminal ignores it in case
977: * the next line is blank. In that case the single newline output for
978: * that blank line would be ignored!)
1.1 etheisen 979: */
1.13 nicm 980: if (column < sc_width || !auto_wrap || (endline && ignaw) ||
981: ctldisp == OPT_ON) {
1.1 etheisen 982: linebuf[curr] = '\n';
983: attr[curr] = AT_NORMAL;
984: curr++;
1.13 nicm 985: } else if (ignaw && column >= sc_width && forw) {
1.10 shadchin 986: /*
987: * Terminals with "ignaw" don't wrap until they *really* need
988: * to, i.e. when the character *after* the last one to fit on a
989: * line is output. But they are too hard to deal with when they
990: * get in the state where a full screen width of characters
991: * have been output but the cursor is sitting on the right edge
992: * instead of at the start of the next line.
1.13 nicm 993: * So we nudge them into wrapping by outputting a space
994: * character plus a backspace. But do this only if moving
1.10 shadchin 995: * forward; if we're moving backward and drawing this line at
996: * the top of the screen, the space would overwrite the first
1.13 nicm 997: * char on the next line. We don't need to do this "nudge"
1.10 shadchin 998: * at the top of the screen anyway.
999: */
1000: linebuf[curr] = ' ';
1001: attr[curr++] = AT_NORMAL;
1.13 nicm 1002: linebuf[curr] = '\b';
1.10 shadchin 1003: attr[curr++] = AT_NORMAL;
1.1 etheisen 1004: }
1005: linebuf[curr] = '\0';
1006: attr[curr] = AT_NORMAL;
1.10 shadchin 1007: }
1.5 millert 1008:
1.10 shadchin 1009: /*
1010: *
1011: */
1.13 nicm 1012: void
1013: set_status_col(char c)
1.10 shadchin 1014: {
1015: linebuf[0] = c;
1016: attr[0] = AT_NORMAL|AT_HILITE;
1.1 etheisen 1017: }
1018:
1019: /*
1020: * Get a character from the current line.
1021: * Return the character as the function return value,
1022: * and the character attribute in *ap.
1023: */
1.13 nicm 1024: int
1025: gline(int i, int *ap)
1.1 etheisen 1026: {
1.13 nicm 1027: if (is_null_line) {
1.1 etheisen 1028: /*
1029: * If there is no current line, we pretend the line is
1030: * either "~" or "", depending on the "twiddle" flag.
1031: */
1.13 nicm 1032: if (twiddle) {
1033: if (i == 0) {
1.10 shadchin 1034: *ap = AT_BOLD;
1.13 nicm 1035: return ('~');
1.10 shadchin 1036: }
1037: --i;
1038: }
1039: /* Make sure we're back to AT_NORMAL before the '\n'. */
1040: *ap = AT_NORMAL;
1.13 nicm 1041: return (i ? '\0' : '\n');
1.1 etheisen 1042: }
1043:
1044: *ap = attr[i];
1.10 shadchin 1045: return (linebuf[i] & 0xFF);
1.1 etheisen 1046: }
1047:
1048: /*
1049: * Indicate that there is no current line.
1050: */
1.13 nicm 1051: void
1052: null_line(void)
1.1 etheisen 1053: {
1054: is_null_line = 1;
1.5 millert 1055: cshift = 0;
1.1 etheisen 1056: }
1057:
1058: /*
1059: * Analogous to forw_line(), but deals with "raw lines":
1060: * lines which are not split for screen width.
1061: * {{ This is supposed to be more efficient than forw_line(). }}
1062: */
1.13 nicm 1063: off_t
1064: forw_raw_line(off_t curr_pos, char **linep, int *line_lenp)
1065: {
1066: int n;
1067: int c;
1068: off_t new_pos;
1069:
1070: if (curr_pos == -1 || ch_seek(curr_pos) ||
1071: (c = ch_forw_get()) == EOI)
1072: return (-1);
1.1 etheisen 1073:
1.5 millert 1074: n = 0;
1.13 nicm 1075: for (;;) {
1076: if (c == '\n' || c == EOI || ABORT_SIGS()) {
1.1 etheisen 1077: new_pos = ch_tell();
1078: break;
1079: }
1.13 nicm 1080: if (n >= size_linebuf-1) {
1081: if (expand_linebuf()) {
1.5 millert 1082: /*
1083: * Overflowed the input buffer.
1084: * Pretend the line ended here.
1085: */
1086: new_pos = ch_tell() - 1;
1087: break;
1088: }
1.1 etheisen 1089: }
1.13 nicm 1090: linebuf[n++] = (char)c;
1.1 etheisen 1091: c = ch_forw_get();
1092: }
1.5 millert 1093: linebuf[n] = '\0';
1.1 etheisen 1094: if (linep != NULL)
1095: *linep = linebuf;
1.10 shadchin 1096: if (line_lenp != NULL)
1097: *line_lenp = n;
1.1 etheisen 1098: return (new_pos);
1099: }
1100:
1101: /*
1102: * Analogous to back_line(), but deals with "raw lines".
1103: * {{ This is supposed to be more efficient than back_line(). }}
1104: */
1.13 nicm 1105: off_t
1106: back_raw_line(off_t curr_pos, char **linep, int *line_lenp)
1107: {
1108: int n;
1109: int c;
1110: off_t new_pos;
1111:
1112: if (curr_pos == -1 || curr_pos <= ch_zero() || ch_seek(curr_pos - 1))
1113: return (-1);
1.1 etheisen 1114:
1.5 millert 1115: n = size_linebuf;
1116: linebuf[--n] = '\0';
1.13 nicm 1117: for (;;) {
1.1 etheisen 1118: c = ch_back_get();
1.13 nicm 1119: if (c == '\n' || ABORT_SIGS()) {
1.1 etheisen 1120: /*
1121: * This is the newline ending the previous line.
1122: * We have hit the beginning of the line.
1123: */
1124: new_pos = ch_tell() + 1;
1125: break;
1126: }
1.13 nicm 1127: if (c == EOI) {
1.1 etheisen 1128: /*
1129: * We have hit the beginning of the file.
1130: * This must be the first line in the file.
1131: * This must, of course, be the beginning of the line.
1132: */
1133: new_pos = ch_zero();
1134: break;
1135: }
1.13 nicm 1136: if (n <= 0) {
1.5 millert 1137: int old_size_linebuf = size_linebuf;
1.13 nicm 1138: if (expand_linebuf()) {
1.5 millert 1139: /*
1140: * Overflowed the input buffer.
1141: * Pretend the line ended here.
1142: */
1143: new_pos = ch_tell() + 1;
1144: break;
1145: }
1.1 etheisen 1146: /*
1.5 millert 1147: * Shift the data to the end of the new linebuf.
1.1 etheisen 1148: */
1.5 millert 1149: n = size_linebuf - old_size_linebuf;
1.8 millert 1150: memmove(linebuf + n, linebuf, old_size_linebuf);
1.1 etheisen 1151: }
1.5 millert 1152: linebuf[--n] = c;
1.1 etheisen 1153: }
1154: if (linep != NULL)
1.5 millert 1155: *linep = &linebuf[n];
1.10 shadchin 1156: if (line_lenp != NULL)
1157: *line_lenp = size_linebuf - 1 - n;
1.1 etheisen 1158: return (new_pos);
1159: }