[BACK]Return to line.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / less

Annotation of src/usr.bin/less/line.c, Revision 1.16

1.1       etheisen    1: /*
1.12      shadchin    2:  * Copyright (C) 1984-2012  Mark Nudelman
1.14      nicm        3:  * Modified for use with illumos by Garrett D'Amore.
                      4:  * Copyright 2014 Garrett D'Amore <garrett@damore.org>
1.1       etheisen    5:  *
1.5       millert     6:  * You may distribute under the terms of either the GNU General Public
                      7:  * License or the Less License, as specified in the README file.
1.1       etheisen    8:  *
1.12      shadchin    9:  * For more information, see the README file.
1.13      nicm       10:  */
1.1       etheisen   11:
                     12: /*
                     13:  * Routines to manipulate the "line buffer".
                     14:  * The line buffer holds a line of output as it is being built
                     15:  * in preparation for output to the screen.
                     16:  */
                     17:
                     18: #include "less.h"
1.10      shadchin   19: #include "charset.h"
1.1       etheisen   20:
1.10      shadchin   21: static char *linebuf = NULL;   /* Buffer which holds the current output line */
1.5       millert    22: static char *attr = NULL;      /* Extension of linebuf to hold attributes */
1.13      nicm       23: int size_linebuf = 0;          /* Size of line buffer (and attr buffer) */
1.5       millert    24:
1.10      shadchin   25: static int cshift;             /* Current left-shift of output line buffer */
1.13      nicm       26: int hshift;                    /* Desired left-shift of output line buffer */
                     27: int tabstops[TABSTOP_MAX] = { 0 }; /* Custom tabstops */
                     28: int ntabstops = 1;             /* Number of tabstops */
                     29: int tabdefault = 8;            /* Default repeated tabstops */
                     30: off_t highest_hilite;          /* Pos of last hilite in file found so far */
1.1       etheisen   31:
                     32: static int curr;               /* Index into linebuf */
1.13      nicm       33: static int column;     /* Printable length, accounting for backspaces, etc. */
1.1       etheisen   34: static int overstrike;         /* Next char should overstrike previous char */
1.5       millert    35: static int last_overstrike = AT_NORMAL;
1.1       etheisen   36: static int is_null_line;       /* There is no current line */
1.5       millert    37: static int lmargin;            /* Left margin */
1.1       etheisen   38: static char pendc;
1.13      nicm       39: static off_t pendpos;
1.5       millert    40: static char *end_ansi_chars;
1.10      shadchin   41: static char *mid_ansi_chars;
1.1       etheisen   42:
1.13      nicm       43: static int attr_swidth(int);
                     44: static int attr_ewidth(int);
                     45: static int do_append(LWCHAR, char *, off_t);
1.1       etheisen   46:
1.11      millert    47: extern volatile sig_atomic_t sigs;
1.1       etheisen   48: extern int bs_mode;
                     49: extern int linenums;
                     50: extern int ctldisp;
                     51: extern int twiddle;
                     52: extern int binattr;
1.5       millert    53: extern int status_col;
1.1       etheisen   54: extern int auto_wrap, ignaw;
                     55: extern int bo_s_width, bo_e_width;
                     56: extern int ul_s_width, ul_e_width;
                     57: extern int bl_s_width, bl_e_width;
                     58: extern int so_s_width, so_e_width;
                     59: extern int sc_width, sc_height;
1.5       millert    60: extern int utf_mode;
1.13      nicm       61: extern off_t start_attnpos;
                     62: extern off_t end_attnpos;
1.5       millert    63:
1.10      shadchin   64: static char mbc_buf[MAX_UTF_CHAR_LEN];
                     65: static int mbc_buf_len = 0;
                     66: static int mbc_buf_index = 0;
1.13      nicm       67: static off_t mbc_pos;
1.10      shadchin   68:
1.5       millert    69: /*
                     70:  * Initialize from environment variables.
                     71:  */
1.13      nicm       72: void
                     73: init_line(void)
1.5       millert    74: {
                     75:        end_ansi_chars = lgetenv("LESSANSIENDCHARS");
                     76:        if (end_ansi_chars == NULL || *end_ansi_chars == '\0')
                     77:                end_ansi_chars = "m";
1.10      shadchin   78:
                     79:        mid_ansi_chars = lgetenv("LESSANSIMIDCHARS");
                     80:        if (mid_ansi_chars == NULL || *mid_ansi_chars == '\0')
                     81:                mid_ansi_chars = "0123456789;[?!\"'#%()*+ ";
                     82:
1.13      nicm       83:        linebuf = ecalloc(LINEBUF_SIZE, sizeof (char));
                     84:        attr = ecalloc(LINEBUF_SIZE, sizeof (char));
1.5       millert    85:        size_linebuf = LINEBUF_SIZE;
                     86: }
                     87:
                     88: /*
                     89:  * Expand the line buffer.
                     90:  */
1.13      nicm       91: static int
                     92: expand_linebuf(void)
1.5       millert    93: {
1.10      shadchin   94:        /* Double the size of the line buffer. */
1.7       millert    95:        int new_size = size_linebuf * 2;
1.10      shadchin   96:
                     97:        /* Just realloc to expand the buffer, if we can. */
1.13      nicm       98:        char *new_buf = realloc(linebuf, new_size);
                     99:        char *new_attr = realloc(attr, new_size);
                    100:        if (new_buf == NULL || new_attr == NULL) {
1.15      mmcc      101:                free(new_attr);
                    102:                free(new_buf);
1.13      nicm      103:                return (1);
1.5       millert   104:        }
                    105:        linebuf = new_buf;
                    106:        attr = new_attr;
                    107:        size_linebuf = new_size;
1.13      nicm      108:        return (0);
1.5       millert   109: }
1.1       etheisen  110:
                    111: /*
1.10      shadchin  112:  * Is a character ASCII?
                    113:  */
1.13      nicm      114: int
                    115: is_ascii_char(LWCHAR ch)
1.10      shadchin  116: {
                    117:        return (ch <= 0x7F);
                    118: }
                    119:
                    120: /*
1.1       etheisen  121:  * Rewind the line buffer.
                    122:  */
1.13      nicm      123: void
                    124: prewind(void)
1.1       etheisen  125: {
                    126:        curr = 0;
                    127:        column = 0;
1.10      shadchin  128:        cshift = 0;
1.1       etheisen  129:        overstrike = 0;
1.10      shadchin  130:        last_overstrike = AT_NORMAL;
                    131:        mbc_buf_len = 0;
1.1       etheisen  132:        is_null_line = 0;
                    133:        pendc = '\0';
1.5       millert   134:        lmargin = 0;
                    135:        if (status_col)
                    136:                lmargin += 1;
1.1       etheisen  137: }
                    138:
                    139: /*
                    140:  * Insert the line number (of the given position) into the line buffer.
                    141:  */
1.13      nicm      142: void
                    143: plinenum(off_t pos)
1.1       etheisen  144: {
1.16    ! mmcc      145:        off_t linenum = 0;
1.13      nicm      146:        int i;
1.5       millert   147:
1.13      nicm      148:        if (linenums == OPT_ONPLUS) {
1.5       millert   149:                /*
                    150:                 * Get the line number and put it in the current line.
                    151:                 * {{ Note: since find_linenum calls forw_raw_line,
1.13      nicm      152:                 *    it may seek in the input file, requiring the caller
1.5       millert   153:                 *    of plinenum to re-seek if necessary. }}
                    154:                 * {{ Since forw_raw_line modifies linebuf, we must
                    155:                 *    do this first, before storing anything in linebuf. }}
                    156:                 */
                    157:                linenum = find_linenum(pos);
                    158:        }
1.1       etheisen  159:
                    160:        /*
1.5       millert   161:         * Display a status column if the -J option is set.
1.1       etheisen  162:         */
1.13      nicm      163:        if (status_col) {
1.5       millert   164:                linebuf[curr] = ' ';
1.13      nicm      165:                if (start_attnpos != -1 &&
1.5       millert   166:                    pos >= start_attnpos && pos < end_attnpos)
1.10      shadchin  167:                        attr[curr] = AT_NORMAL|AT_HILITE;
1.5       millert   168:                else
1.10      shadchin  169:                        attr[curr] = AT_NORMAL;
1.5       millert   170:                curr++;
                    171:                column++;
                    172:        }
1.1       etheisen  173:        /*
1.5       millert   174:         * Display the line number at the start of each line
                    175:         * if the -N option is set.
1.1       etheisen  176:         */
1.13      nicm      177:        if (linenums == OPT_ONPLUS) {
1.5       millert   178:                char buf[INT_STRLEN_BOUND(pos) + 2];
                    179:                int n;
1.1       etheisen  180:
1.13      nicm      181:                linenumtoa(linenum, buf, sizeof (buf));
1.5       millert   182:                n = strlen(buf);
                    183:                if (n < MIN_LINENUM_WIDTH)
                    184:                        n = MIN_LINENUM_WIDTH;
1.6       millert   185:                snprintf(linebuf+curr, size_linebuf-curr, "%*s ", n, buf);
1.5       millert   186:                n++;  /* One space after the line number. */
                    187:                for (i = 0; i < n; i++)
                    188:                        attr[curr+i] = AT_NORMAL;
                    189:                curr += n;
                    190:                column += n;
                    191:                lmargin += n;
                    192:        }
1.1       etheisen  193:
                    194:        /*
1.5       millert   195:         * Append enough spaces to bring us to the lmargin.
1.1       etheisen  196:         */
1.13      nicm      197:        while (column < lmargin) {
1.1       etheisen  198:                linebuf[curr] = ' ';
                    199:                attr[curr++] = AT_NORMAL;
                    200:                column++;
1.5       millert   201:        }
                    202: }
                    203:
                    204: /*
1.10      shadchin  205:  * Shift the input line left.
                    206:  * This means discarding N printable chars at the start of the buffer.
1.5       millert   207:  */
1.13      nicm      208: static void
                    209: pshift(int shift)
1.10      shadchin  210: {
                    211:        LWCHAR prev_ch = 0;
                    212:        unsigned char c;
                    213:        int shifted = 0;
                    214:        int to;
                    215:        int from;
1.5       millert   216:        int len;
1.10      shadchin  217:        int width;
                    218:        int prev_attr;
                    219:        int next_attr;
                    220:
                    221:        if (shift > column - lmargin)
                    222:                shift = column - lmargin;
                    223:        if (shift > curr - lmargin)
                    224:                shift = curr - lmargin;
1.5       millert   225:
1.10      shadchin  226:        to = from = lmargin;
1.5       millert   227:        /*
1.10      shadchin  228:         * We keep on going when shifted == shift
                    229:         * to get all combining chars.
1.5       millert   230:         */
1.13      nicm      231:        while (shifted <= shift && from < curr) {
1.10      shadchin  232:                c = linebuf[from];
1.13      nicm      233:                if (ctldisp == OPT_ONPLUS && IS_CSI_START(c)) {
1.10      shadchin  234:                        /* Keep cumulative effect.  */
                    235:                        linebuf[to] = c;
                    236:                        attr[to++] = attr[from++];
1.13      nicm      237:                        while (from < curr && linebuf[from]) {
1.10      shadchin  238:                                linebuf[to] = linebuf[from];
                    239:                                attr[to++] = attr[from];
                    240:                                if (!is_ansi_middle(linebuf[from++]))
                    241:                                        break;
1.13      nicm      242:                        }
1.10      shadchin  243:                        continue;
                    244:                }
                    245:
                    246:                width = 0;
                    247:
1.13      nicm      248:                if (!IS_ASCII_OCTET(c) && utf_mode) {
1.10      shadchin  249:                        /* Assumes well-formedness validation already done.  */
                    250:                        LWCHAR ch;
                    251:
                    252:                        len = utf_len(c);
                    253:                        if (from + len > curr)
                    254:                                break;
                    255:                        ch = get_wchar(linebuf + from);
1.13      nicm      256:                        if (!is_composing_char(ch) &&
                    257:                            !is_combining_char(prev_ch, ch))
1.10      shadchin  258:                                width = is_wide_char(ch) ? 2 : 1;
                    259:                        prev_ch = ch;
1.13      nicm      260:                } else {
1.10      shadchin  261:                        len = 1;
                    262:                        if (c == '\b')
                    263:                                /* XXX - Incorrect if several '\b' in a row.  */
1.13      nicm      264:                                width = (utf_mode && is_wide_char(prev_ch)) ?
                    265:                                    -2 : -1;
1.10      shadchin  266:                        else if (!control_char(c))
                    267:                                width = 1;
                    268:                        prev_ch = 0;
                    269:                }
                    270:
                    271:                if (width == 2 && shift - shifted == 1) {
                    272:                        /* Should never happen when called by pshift_all().  */
                    273:                        attr[to] = attr[from];
                    274:                        /*
                    275:                         * Assume a wide_char will never be the first half of a
                    276:                         * combining_char pair, so reset prev_ch in case we're
                    277:                         * followed by a '\b'.
                    278:                         */
                    279:                        prev_ch = linebuf[to++] = ' ';
                    280:                        from += len;
                    281:                        shifted++;
                    282:                        continue;
                    283:                }
                    284:
                    285:                /* Adjust width for magic cookies. */
                    286:                prev_attr = (to > 0) ? attr[to-1] : AT_NORMAL;
                    287:                next_attr = (from + len < curr) ? attr[from + len] : prev_attr;
1.13      nicm      288:                if (!is_at_equiv(attr[from], prev_attr) &&
                    289:                    !is_at_equiv(attr[from], next_attr)) {
1.10      shadchin  290:                        width += attr_swidth(attr[from]);
                    291:                        if (from + len < curr)
                    292:                                width += attr_ewidth(attr[from]);
1.13      nicm      293:                        if (is_at_equiv(prev_attr, next_attr)) {
1.10      shadchin  294:                                width += attr_ewidth(prev_attr);
                    295:                                if (from + len < curr)
                    296:                                        width += attr_swidth(next_attr);
1.5       millert   297:                        }
                    298:                }
                    299:
1.10      shadchin  300:                if (shift - shifted < width)
                    301:                        break;
                    302:                from += len;
                    303:                shifted += width;
                    304:                if (shifted < 0)
                    305:                        shifted = 0;
                    306:        }
1.13      nicm      307:        while (from < curr) {
1.10      shadchin  308:                linebuf[to] = linebuf[from];
                    309:                attr[to++] = attr[from++];
                    310:        }
                    311:        curr = to;
                    312:        column -= shifted;
                    313:        cshift += shifted;
1.5       millert   314: }
                    315:
                    316: /*
1.10      shadchin  317:  *
1.5       millert   318:  */
1.13      nicm      319: void
                    320: pshift_all(void)
1.5       millert   321: {
1.10      shadchin  322:        pshift(column);
1.1       etheisen  323: }
                    324:
                    325: /*
                    326:  * Return the printing width of the start (enter) sequence
                    327:  * for a given character attribute.
                    328:  */
1.13      nicm      329: static int
                    330: attr_swidth(int a)
1.1       etheisen  331: {
1.10      shadchin  332:        int w = 0;
                    333:
                    334:        a = apply_at_specials(a);
                    335:
                    336:        if (a & AT_UNDERLINE)
                    337:                w += ul_s_width;
                    338:        if (a & AT_BOLD)
                    339:                w += bo_s_width;
                    340:        if (a & AT_BLINK)
                    341:                w += bl_s_width;
                    342:        if (a & AT_STANDOUT)
                    343:                w += so_s_width;
                    344:
1.13      nicm      345:        return (w);
1.1       etheisen  346: }
                    347:
                    348: /*
                    349:  * Return the printing width of the end (exit) sequence
                    350:  * for a given character attribute.
                    351:  */
1.13      nicm      352: static int
                    353: attr_ewidth(int a)
1.1       etheisen  354: {
1.10      shadchin  355:        int w = 0;
                    356:
                    357:        a = apply_at_specials(a);
                    358:
                    359:        if (a & AT_UNDERLINE)
                    360:                w += ul_e_width;
                    361:        if (a & AT_BOLD)
                    362:                w += bo_e_width;
                    363:        if (a & AT_BLINK)
                    364:                w += bl_e_width;
                    365:        if (a & AT_STANDOUT)
                    366:                w += so_e_width;
                    367:
1.13      nicm      368:        return (w);
1.1       etheisen  369: }
                    370:
                    371: /*
                    372:  * Return the printing width of a given character and attribute,
                    373:  * if the character were added to the current position in the line buffer.
                    374:  * Adding a character with a given attribute may cause an enter or exit
                    375:  * attribute sequence to be inserted, so this must be taken into account.
                    376:  */
1.13      nicm      377: static int
                    378: pwidth(LWCHAR ch, int a, LWCHAR prev_ch)
1.1       etheisen  379: {
1.10      shadchin  380:        int w;
1.1       etheisen  381:
1.10      shadchin  382:        if (ch == '\b')
1.1       etheisen  383:                /*
1.10      shadchin  384:                 * Backspace moves backwards one or two positions.
                    385:                 * XXX - Incorrect if several '\b' in a row.
1.1       etheisen  386:                 */
1.13      nicm      387:                return ((utf_mode && is_wide_char(prev_ch)) ? -2 : -1);
                    388:
                    389:        if (!utf_mode || is_ascii_char(ch)) {
                    390:                if (control_char((char)ch)) {
1.10      shadchin  391:                        /*
                    392:                         * Control characters do unpredictable things,
                    393:                         * so we don't even try to guess; say it doesn't move.
                    394:                         * This can only happen if the -r flag is in effect.
                    395:                         */
                    396:                        return (0);
                    397:                }
1.13      nicm      398:        } else {
                    399:                if (is_composing_char(ch) || is_combining_char(prev_ch, ch)) {
1.10      shadchin  400:                        /*
                    401:                         * Composing and combining chars take up no space.
                    402:                         *
                    403:                         * Some terminals, upon failure to compose a
                    404:                         * composing character with the character(s) that
                    405:                         * precede(s) it will actually take up one column
                    406:                         * for the composing character; there isn't much
                    407:                         * we could do short of testing the (complex)
                    408:                         * composition process ourselves and printing
                    409:                         * a binary representation when it fails.
                    410:                         */
                    411:                        return (0);
                    412:                }
                    413:        }
1.1       etheisen  414:
                    415:        /*
1.10      shadchin  416:         * Other characters take one or two columns,
1.1       etheisen  417:         * plus the width of any attribute enter/exit sequence.
                    418:         */
                    419:        w = 1;
1.10      shadchin  420:        if (is_wide_char(ch))
                    421:                w++;
                    422:        if (curr > 0 && !is_at_equiv(attr[curr-1], a))
1.1       etheisen  423:                w += attr_ewidth(attr[curr-1]);
1.10      shadchin  424:        if ((apply_at_specials(a) != AT_NORMAL) &&
                    425:            (curr == 0 || !is_at_equiv(attr[curr-1], a)))
1.1       etheisen  426:                w += attr_swidth(a);
                    427:        return (w);
                    428: }
                    429:
                    430: /*
1.10      shadchin  431:  * Delete to the previous base character in the line buffer.
                    432:  * Return 1 if one is found.
1.1       etheisen  433:  */
1.13      nicm      434: static int
                    435: backc(void)
1.1       etheisen  436: {
1.10      shadchin  437:        LWCHAR prev_ch;
                    438:        char *p = linebuf + curr;
                    439:        LWCHAR ch = step_char(&p, -1, linebuf + lmargin);
                    440:        int width;
                    441:
                    442:        /* This assumes that there is no '\b' in linebuf.  */
1.13      nicm      443:        while (curr > lmargin && column > lmargin &&
                    444:            (!(attr[curr - 1] & (AT_ANSI|AT_BINARY)))) {
1.10      shadchin  445:                curr = p - linebuf;
                    446:                prev_ch = step_char(&p, -1, linebuf + lmargin);
                    447:                width = pwidth(ch, attr[curr], prev_ch);
                    448:                column -= width;
                    449:                if (width > 0)
1.13      nicm      450:                        return (1);
1.10      shadchin  451:                ch = prev_ch;
                    452:        }
                    453:
1.13      nicm      454:        return (0);
1.1       etheisen  455: }
                    456:
                    457: /*
1.5       millert   458:  * Are we currently within a recognized ANSI escape sequence?
                    459:  */
1.13      nicm      460: static int
                    461: in_ansi_esc_seq(void)
1.5       millert   462: {
1.10      shadchin  463:        char *p;
1.5       millert   464:
                    465:        /*
                    466:         * Search backwards for either an ESC (which means we ARE in a seq);
                    467:         * or an end char (which means we're NOT in a seq).
                    468:         */
1.13      nicm      469:        for (p = &linebuf[curr];  p > linebuf; ) {
1.10      shadchin  470:                LWCHAR ch = step_char(&p, -1, linebuf);
                    471:                if (IS_CSI_START(ch))
1.5       millert   472:                        return (1);
1.10      shadchin  473:                if (!is_ansi_middle(ch))
1.5       millert   474:                        return (0);
                    475:        }
                    476:        return (0);
                    477: }
                    478:
                    479: /*
                    480:  * Is a character the end of an ANSI escape sequence?
                    481:  */
1.13      nicm      482: int
                    483: is_ansi_end(LWCHAR ch)
1.10      shadchin  484: {
                    485:        if (!is_ascii_char(ch))
                    486:                return (0);
1.13      nicm      487:        return (strchr(end_ansi_chars, (char)ch) != NULL);
1.10      shadchin  488: }
                    489:
                    490: /*
                    491:  *
                    492:  */
1.13      nicm      493: int
                    494: is_ansi_middle(LWCHAR ch)
1.5       millert   495: {
1.10      shadchin  496:        if (!is_ascii_char(ch))
                    497:                return (0);
                    498:        if (is_ansi_end(ch))
                    499:                return (0);
1.13      nicm      500:        return (strchr(mid_ansi_chars, (char)ch) != NULL);
1.5       millert   501: }
                    502:
                    503: /*
1.1       etheisen  504:  * Append a character and attribute to the line buffer.
                    505:  */
1.13      nicm      506: #define        STORE_CHAR(ch, a, rep, pos)                             \
                    507:                if (store_char((ch), (a), (rep), (pos)))        \
                    508:                        return (1)
                    509:
                    510: static int
                    511: store_char(LWCHAR ch, char a, char *rep, off_t pos)
1.1       etheisen  512: {
1.10      shadchin  513:        int w;
                    514:        int replen;
                    515:        char cs;
1.13      nicm      516:        int matches;
1.10      shadchin  517:
                    518:        w = (a & (AT_UNDERLINE|AT_BOLD));       /* Pre-use w.  */
                    519:        if (w != AT_NORMAL)
                    520:                last_overstrike = w;
1.1       etheisen  521:
1.13      nicm      522:        if (is_hilited(pos, pos+1, 0, &matches)) {
                    523:                /*
                    524:                 * This character should be highlighted.
                    525:                 * Override the attribute passed in.
                    526:                 */
                    527:                if (a != AT_ANSI) {
                    528:                        if (highest_hilite != -1 && pos > highest_hilite)
                    529:                                highest_hilite = pos;
                    530:                        a |= AT_HILITE;
1.10      shadchin  531:                }
1.5       millert   532:        }
1.10      shadchin  533:
1.13      nicm      534:        if (ctldisp == OPT_ONPLUS && in_ansi_esc_seq()) {
1.10      shadchin  535:                if (!is_ansi_end(ch) && !is_ansi_middle(ch)) {
                    536:                        /* Remove whole unrecognized sequence.  */
                    537:                        char *p = &linebuf[curr];
                    538:                        LWCHAR bch;
                    539:                        do {
                    540:                                bch = step_char(&p, -1, linebuf);
                    541:                        } while (p > linebuf && !IS_CSI_START(bch));
                    542:                        curr = p - linebuf;
1.13      nicm      543:                        return (0);
1.10      shadchin  544:                }
                    545:                a = AT_ANSI;    /* Will force re-AT_'ing around it.  */
1.5       millert   546:                w = 0;
1.13      nicm      547:        } else if (ctldisp == OPT_ONPLUS && IS_CSI_START(ch)) {
1.10      shadchin  548:                a = AT_ANSI;    /* Will force re-AT_'ing around it.  */
                    549:                w = 0;
1.13      nicm      550:        } else {
1.10      shadchin  551:                char *p = &linebuf[curr];
                    552:                LWCHAR prev_ch = step_char(&p, -1, linebuf);
                    553:                w = pwidth(ch, a, prev_ch);
                    554:        }
                    555:
1.5       millert   556:        if (ctldisp != OPT_ON && column + w + attr_ewidth(a) > sc_width)
1.1       etheisen  557:                /*
                    558:                 * Won't fit on screen.
                    559:                 */
                    560:                return (1);
                    561:
1.13      nicm      562:        if (rep == NULL) {
                    563:                cs = (char)ch;
1.10      shadchin  564:                rep = &cs;
                    565:                replen = 1;
1.13      nicm      566:        } else {
1.10      shadchin  567:                replen = utf_len(rep[0]);
                    568:        }
1.13      nicm      569:        if (curr + replen >= size_linebuf-6) {
1.1       etheisen  570:                /*
                    571:                 * Won't fit in line buffer.
1.5       millert   572:                 * Try to expand it.
1.1       etheisen  573:                 */
1.5       millert   574:                if (expand_linebuf())
                    575:                        return (1);
                    576:        }
1.1       etheisen  577:
1.13      nicm      578:        while (replen-- > 0) {
1.10      shadchin  579:                linebuf[curr] = *rep++;
                    580:                attr[curr] = a;
                    581:                curr++;
1.1       etheisen  582:        }
                    583:        column += w;
                    584:        return (0);
                    585: }
                    586:
                    587: /*
1.5       millert   588:  * Append a tab to the line buffer.
                    589:  * Store spaces to represent the tab.
                    590:  */
1.13      nicm      591: #define        STORE_TAB(a, pos)               \
                    592:        if (store_tab((a), (pos)))      \
                    593:                return (1)
1.5       millert   594:
1.13      nicm      595: static int
                    596: store_tab(int attr, off_t pos)
1.5       millert   597: {
                    598:        int to_tab = column + cshift - lmargin;
                    599:        int i;
                    600:
                    601:        if (ntabstops < 2 || to_tab >= tabstops[ntabstops-1])
                    602:                to_tab = tabdefault -
1.13      nicm      603:                    ((to_tab - tabstops[ntabstops-1]) % tabdefault);
                    604:        else {
1.5       millert   605:                for (i = ntabstops - 2;  i >= 0;  i--)
                    606:                        if (to_tab >= tabstops[i])
                    607:                                break;
                    608:                to_tab = tabstops[i+1] - to_tab;
                    609:        }
                    610:
1.13      nicm      611:        if (column + to_tab - 1 + pwidth(' ', attr, 0) +
                    612:            attr_ewidth(attr) > sc_width)
                    613:                return (1);
1.10      shadchin  614:
1.5       millert   615:        do {
1.10      shadchin  616:                STORE_CHAR(' ', attr, " ", pos);
1.5       millert   617:        } while (--to_tab > 0);
1.13      nicm      618:        return (0);
1.5       millert   619: }
                    620:
1.13      nicm      621: #define        STORE_PRCHAR(c, pos)            \
                    622:        if (store_prchar((c), (pos)))   \
                    623:                return (1)
1.10      shadchin  624:
1.13      nicm      625: static int
                    626: store_prchar(char c, off_t pos)
1.10      shadchin  627: {
                    628:        char *s;
                    629:
                    630:        /*
                    631:         * Convert to printable representation.
                    632:         */
                    633:        s = prchar(c);
                    634:
                    635:        /*
                    636:         * Make sure we can get the entire representation
                    637:         * of the character on this line.
                    638:         */
1.13      nicm      639:        if (column + (int)strlen(s) - 1 +
                    640:            pwidth(' ', binattr, 0) + attr_ewidth(binattr) > sc_width)
                    641:                return (1);
1.10      shadchin  642:
1.13      nicm      643:        for (; *s != 0; s++) {
1.10      shadchin  644:                STORE_CHAR(*s, AT_BINARY, NULL, pos);
1.13      nicm      645:        }
                    646:        return (0);
1.10      shadchin  647: }
                    648:
1.13      nicm      649: static int
                    650: flush_mbc_buf(off_t pos)
1.10      shadchin  651: {
                    652:        int i;
                    653:
                    654:        for (i = 0; i < mbc_buf_index; i++)
                    655:                if (store_prchar(mbc_buf[i], pos))
1.13      nicm      656:                        return (mbc_buf_index - i);
1.10      shadchin  657:
1.13      nicm      658:        return (0);
1.10      shadchin  659: }
                    660:
1.5       millert   661: /*
1.1       etheisen  662:  * Append a character to the line buffer.
                    663:  * Expand tabs into spaces, handle underlining, boldfacing, etc.
                    664:  * Returns 0 if ok, 1 if couldn't fit in buffer.
                    665:  */
1.13      nicm      666: int
                    667: pappend(char c, off_t pos)
1.1       etheisen  668: {
1.5       millert   669:        int r;
                    670:
1.13      nicm      671:        if (pendc) {
1.10      shadchin  672:                if (do_append(pendc, NULL, pendpos))
1.1       etheisen  673:                        /*
                    674:                         * Oops.  We've probably lost the char which
                    675:                         * was in pendc, since caller won't back up.
                    676:                         */
                    677:                        return (1);
                    678:                pendc = '\0';
                    679:        }
                    680:
1.13      nicm      681:        if (c == '\r' && bs_mode == BS_SPECIAL) {
                    682:                if (mbc_buf_len > 0)  /* utf_mode must be on. */ {
1.10      shadchin  683:                        /* Flush incomplete (truncated) sequence. */
                    684:                        r = flush_mbc_buf(mbc_pos);
                    685:                        mbc_buf_index = r + 1;
                    686:                        mbc_buf_len = 0;
                    687:                        if (r)
                    688:                                return (mbc_buf_index);
                    689:                }
                    690:
1.1       etheisen  691:                /*
1.13      nicm      692:                 * Don't put the CR into the buffer until we see
1.1       etheisen  693:                 * the next char.  If the next char is a newline,
                    694:                 * discard the CR.
                    695:                 */
                    696:                pendc = c;
                    697:                pendpos = pos;
                    698:                return (0);
                    699:        }
                    700:
1.13      nicm      701:        if (!utf_mode) {
1.10      shadchin  702:                r = do_append((LWCHAR) c, NULL, pos);
1.13      nicm      703:        } else {
1.10      shadchin  704:                /* Perform strict validation in all possible cases. */
1.13      nicm      705:                if (mbc_buf_len == 0) {
                    706: retry:
1.10      shadchin  707:                        mbc_buf_index = 1;
                    708:                        *mbc_buf = c;
1.13      nicm      709:                        if (IS_ASCII_OCTET(c)) {
1.10      shadchin  710:                                r = do_append((LWCHAR) c, NULL, pos);
1.13      nicm      711:                        } else if (IS_UTF8_LEAD(c)) {
1.10      shadchin  712:                                mbc_buf_len = utf_len(c);
                    713:                                mbc_pos = pos;
                    714:                                return (0);
1.13      nicm      715:                        } else {
1.10      shadchin  716:                                /* UTF8_INVALID or stray UTF8_TRAIL */
                    717:                                r = flush_mbc_buf(pos);
1.13      nicm      718:                        }
                    719:                } else if (IS_UTF8_TRAIL(c)) {
1.10      shadchin  720:                        mbc_buf[mbc_buf_index++] = c;
                    721:                        if (mbc_buf_index < mbc_buf_len)
                    722:                                return (0);
                    723:                        if (is_utf8_well_formed(mbc_buf))
1.13      nicm      724:                                r = do_append(get_wchar(mbc_buf), mbc_buf,
                    725:                                    mbc_pos);
1.10      shadchin  726:                        else
                    727:                                /* Complete, but not shortest form, sequence. */
                    728:                                mbc_buf_index = r = flush_mbc_buf(mbc_pos);
                    729:                        mbc_buf_len = 0;
1.13      nicm      730:                } else {
1.10      shadchin  731:                        /* Flush incomplete (truncated) sequence.  */
                    732:                        r = flush_mbc_buf(mbc_pos);
                    733:                        mbc_buf_index = r + 1;
                    734:                        mbc_buf_len = 0;
                    735:                        /* Handle new char.  */
                    736:                        if (!r)
                    737:                                goto retry;
1.13      nicm      738:                }
1.10      shadchin  739:        }
                    740:
1.5       millert   741:        /*
                    742:         * If we need to shift the line, do it.
                    743:         * But wait until we get to at least the middle of the screen,
                    744:         * so shifting it doesn't affect the chars we're currently
                    745:         * pappending.  (Bold & underline can get messed up otherwise.)
                    746:         */
1.13      nicm      747:        if (cshift < hshift && column > sc_width / 2) {
1.5       millert   748:                linebuf[curr] = '\0';
                    749:                pshift(hshift - cshift);
                    750:        }
1.13      nicm      751:        if (r) {
1.10      shadchin  752:                /* How many chars should caller back up? */
                    753:                r = (!utf_mode) ? 1 : mbc_buf_index;
                    754:        }
1.5       millert   755:        return (r);
1.1       etheisen  756: }
                    757:
1.13      nicm      758: static int
                    759: do_append(LWCHAR ch, char *rep, off_t pos)
1.1       etheisen  760: {
1.13      nicm      761:        int a;
1.10      shadchin  762:        LWCHAR prev_ch;
1.1       etheisen  763:
1.10      shadchin  764:        a = AT_NORMAL;
1.1       etheisen  765:
1.13      nicm      766:        if (ch == '\b') {
1.10      shadchin  767:                if (bs_mode == BS_CONTROL)
1.5       millert   768:                        goto do_control_char;
1.10      shadchin  769:
                    770:                /*
                    771:                 * A better test is needed here so we don't
                    772:                 * backspace over part of the printed
                    773:                 * representation of a binary character.
                    774:                 */
1.13      nicm      775:                if (curr <= lmargin ||
                    776:                    column <= lmargin ||
                    777:                    (attr[curr - 1] & (AT_ANSI|AT_BINARY))) {
1.10      shadchin  778:                        STORE_PRCHAR('\b', pos);
1.13      nicm      779:                } else if (bs_mode == BS_NORMAL) {
1.10      shadchin  780:                        STORE_CHAR(ch, AT_NORMAL, NULL, pos);
1.13      nicm      781:                } else if (bs_mode == BS_SPECIAL) {
1.10      shadchin  782:                        overstrike = backc();
1.13      nicm      783:                }
1.10      shadchin  784:
1.13      nicm      785:                return (0);
1.10      shadchin  786:        }
                    787:
1.13      nicm      788:        if (overstrike > 0) {
1.1       etheisen  789:                /*
                    790:                 * Overstrike the character at the current position
1.13      nicm      791:                 * in the line buffer.  This will cause either
                    792:                 * underline (if a "_" is overstruck),
1.1       etheisen  793:                 * bold (if an identical character is overstruck),
                    794:                 * or just deletion of the character in the buffer.
                    795:                 */
1.10      shadchin  796:                overstrike = utf_mode ? -1 : 0;
                    797:                /* To be correct, this must be a base character.  */
                    798:                prev_ch = get_wchar(linebuf + curr);
                    799:                a = attr[curr];
1.13      nicm      800:                if (ch == prev_ch) {
1.5       millert   801:                        /*
                    802:                         * Overstriking a char with itself means make it bold.
                    803:                         * But overstriking an underscore with itself is
                    804:                         * ambiguous.  It could mean make it bold, or
                    805:                         * it could mean make it underlined.
                    806:                         * Use the previous overstrike to resolve it.
                    807:                         */
1.13      nicm      808:                        if (ch == '_') {
1.10      shadchin  809:                                if ((a & (AT_BOLD|AT_UNDERLINE)) != AT_NORMAL)
                    810:                                        a |= (AT_BOLD|AT_UNDERLINE);
                    811:                                else if (last_overstrike != AT_NORMAL)
                    812:                                        a |= last_overstrike;
                    813:                                else
                    814:                                        a |= AT_BOLD;
1.13      nicm      815:                        } else {
1.10      shadchin  816:                                a |= AT_BOLD;
1.13      nicm      817:                        }
                    818:                } else if (ch == '_') {
1.10      shadchin  819:                        a |= AT_UNDERLINE;
                    820:                        ch = prev_ch;
                    821:                        rep = linebuf + curr;
1.13      nicm      822:                } else if (prev_ch == '_') {
1.10      shadchin  823:                        a |= AT_UNDERLINE;
                    824:                }
                    825:                /* Else we replace prev_ch, but we keep its attributes.  */
1.13      nicm      826:        } else if (overstrike < 0) {
                    827:                if (is_composing_char(ch) ||
                    828:                    is_combining_char(get_wchar(linebuf + curr), ch))
1.10      shadchin  829:                        /* Continuation of the same overstrike.  */
                    830:                        a = last_overstrike;
1.1       etheisen  831:                else
1.10      shadchin  832:                        overstrike = 0;
                    833:        }
                    834:
1.13      nicm      835:        if (ch == '\t') {
1.5       millert   836:                /*
                    837:                 * Expand a tab into spaces.
                    838:                 */
1.13      nicm      839:                switch (bs_mode) {
1.1       etheisen  840:                case BS_CONTROL:
                    841:                        goto do_control_char;
1.5       millert   842:                case BS_NORMAL:
1.1       etheisen  843:                case BS_SPECIAL:
1.10      shadchin  844:                        STORE_TAB(a, pos);
1.1       etheisen  845:                        break;
                    846:                }
1.13      nicm      847:        } else if ((!utf_mode || is_ascii_char(ch)) && control_char((char)ch)) {
                    848: do_control_char:
                    849:                if (ctldisp == OPT_ON ||
                    850:                    (ctldisp == OPT_ONPLUS && IS_CSI_START(ch))) {
1.1       etheisen  851:                        /*
                    852:                         * Output as a normal character.
                    853:                         */
1.10      shadchin  854:                        STORE_CHAR(ch, AT_NORMAL, rep, pos);
1.13      nicm      855:                } else {
                    856:                        STORE_PRCHAR((char)ch, pos);
1.10      shadchin  857:                }
1.13      nicm      858:        } else if (utf_mode && ctldisp != OPT_ON && is_ubin_char(ch)) {
1.10      shadchin  859:                char *s;
                    860:
                    861:                s = prutfchar(ch);
1.1       etheisen  862:
1.13      nicm      863:                if (column + (int)strlen(s) - 1 +
1.10      shadchin  864:                    pwidth(' ', binattr, 0) + attr_ewidth(binattr) > sc_width)
                    865:                        return (1);
1.1       etheisen  866:
1.13      nicm      867:                for (; *s != 0; s++)
1.10      shadchin  868:                        STORE_CHAR(*s, AT_BINARY, NULL, pos);
1.13      nicm      869:        } else {
1.10      shadchin  870:                STORE_CHAR(ch, a, rep, pos);
1.1       etheisen  871:        }
1.13      nicm      872:        return (0);
1.10      shadchin  873: }
                    874:
                    875: /*
                    876:  *
                    877:  */
1.13      nicm      878: int
                    879: pflushmbc(void)
1.10      shadchin  880: {
                    881:        int r = 0;
1.1       etheisen  882:
1.13      nicm      883:        if (mbc_buf_len > 0) {
1.10      shadchin  884:                /* Flush incomplete (truncated) sequence.  */
                    885:                r = flush_mbc_buf(mbc_pos);
                    886:                mbc_buf_len = 0;
                    887:        }
1.13      nicm      888:        return (r);
1.1       etheisen  889: }
                    890:
                    891: /*
                    892:  * Terminate the line in the line buffer.
                    893:  */
1.13      nicm      894: void
                    895: pdone(int endline, int forw)
1.1       etheisen  896: {
1.10      shadchin  897:        (void) pflushmbc();
                    898:
1.1       etheisen  899:        if (pendc && (pendc != '\r' || !endline))
                    900:                /*
                    901:                 * If we had a pending character, put it in the buffer.
                    902:                 * But discard a pending CR if we are at end of line
                    903:                 * (that is, discard the CR in a CR/LF sequence).
                    904:                 */
1.10      shadchin  905:                (void) do_append(pendc, NULL, pendpos);
1.1       etheisen  906:
                    907:        /*
1.5       millert   908:         * Make sure we've shifted the line, if we need to.
                    909:         */
                    910:        if (cshift < hshift)
                    911:                pshift(hshift - cshift);
                    912:
1.13      nicm      913:        if (ctldisp == OPT_ONPLUS && is_ansi_end('m')) {
1.10      shadchin  914:                /* Switch to normal attribute at end of line. */
                    915:                char *p = "\033[m";
1.13      nicm      916:                for (; *p != '\0'; p++) {
1.10      shadchin  917:                        linebuf[curr] = *p;
                    918:                        attr[curr++] = AT_ANSI;
                    919:                }
                    920:        }
                    921:
1.5       millert   922:        /*
1.1       etheisen  923:         * Add a newline if necessary,
                    924:         * and append a '\0' to the end of the line.
1.10      shadchin  925:         * We output a newline if we're not at the right edge of the screen,
                    926:         * or if the terminal doesn't auto wrap,
                    927:         * or if this is really the end of the line AND the terminal ignores
                    928:         * a newline at the right edge.
1.13      nicm      929:         * (In the last case we don't want to output a newline if the terminal
1.10      shadchin  930:         * doesn't ignore it since that would produce an extra blank line.
                    931:         * But we do want to output a newline if the terminal ignores it in case
                    932:         * the next line is blank.  In that case the single newline output for
                    933:         * that blank line would be ignored!)
1.1       etheisen  934:         */
1.13      nicm      935:        if (column < sc_width || !auto_wrap || (endline && ignaw) ||
                    936:            ctldisp == OPT_ON) {
1.1       etheisen  937:                linebuf[curr] = '\n';
                    938:                attr[curr] = AT_NORMAL;
                    939:                curr++;
1.13      nicm      940:        } else if (ignaw && column >= sc_width && forw) {
1.10      shadchin  941:                /*
                    942:                 * Terminals with "ignaw" don't wrap until they *really* need
                    943:                 * to, i.e. when the character *after* the last one to fit on a
                    944:                 * line is output. But they are too hard to deal with when they
                    945:                 * get in the state where a full screen width of characters
                    946:                 * have been output but the cursor is sitting on the right edge
                    947:                 * instead of at the start of the next line.
1.13      nicm      948:                 * So we nudge them into wrapping by outputting a space
                    949:                 * character plus a backspace.  But do this only if moving
1.10      shadchin  950:                 * forward; if we're moving backward and drawing this line at
                    951:                 * the top of the screen, the space would overwrite the first
1.13      nicm      952:                 * char on the next line.  We don't need to do this "nudge"
1.10      shadchin  953:                 * at the top of the screen anyway.
                    954:                 */
                    955:                linebuf[curr] = ' ';
                    956:                attr[curr++] = AT_NORMAL;
1.13      nicm      957:                linebuf[curr] = '\b';
1.10      shadchin  958:                attr[curr++] = AT_NORMAL;
1.1       etheisen  959:        }
                    960:        linebuf[curr] = '\0';
                    961:        attr[curr] = AT_NORMAL;
1.10      shadchin  962: }
1.5       millert   963:
1.10      shadchin  964: /*
                    965:  *
                    966:  */
1.13      nicm      967: void
                    968: set_status_col(char c)
1.10      shadchin  969: {
                    970:        linebuf[0] = c;
                    971:        attr[0] = AT_NORMAL|AT_HILITE;
1.1       etheisen  972: }
                    973:
                    974: /*
                    975:  * Get a character from the current line.
                    976:  * Return the character as the function return value,
                    977:  * and the character attribute in *ap.
                    978:  */
1.13      nicm      979: int
                    980: gline(int i, int *ap)
1.1       etheisen  981: {
1.13      nicm      982:        if (is_null_line) {
1.1       etheisen  983:                /*
                    984:                 * If there is no current line, we pretend the line is
                    985:                 * either "~" or "", depending on the "twiddle" flag.
                    986:                 */
1.13      nicm      987:                if (twiddle) {
                    988:                        if (i == 0) {
1.10      shadchin  989:                                *ap = AT_BOLD;
1.13      nicm      990:                                return ('~');
1.10      shadchin  991:                        }
                    992:                        --i;
                    993:                }
                    994:                /* Make sure we're back to AT_NORMAL before the '\n'.  */
                    995:                *ap = AT_NORMAL;
1.13      nicm      996:                return (i ? '\0' : '\n');
1.1       etheisen  997:        }
                    998:
                    999:        *ap = attr[i];
1.10      shadchin 1000:        return (linebuf[i] & 0xFF);
1.1       etheisen 1001: }
                   1002:
                   1003: /*
                   1004:  * Indicate that there is no current line.
                   1005:  */
1.13      nicm     1006: void
                   1007: null_line(void)
1.1       etheisen 1008: {
                   1009:        is_null_line = 1;
1.5       millert  1010:        cshift = 0;
1.1       etheisen 1011: }
                   1012:
                   1013: /*
                   1014:  * Analogous to forw_line(), but deals with "raw lines":
                   1015:  * lines which are not split for screen width.
                   1016:  * {{ This is supposed to be more efficient than forw_line(). }}
                   1017:  */
1.13      nicm     1018: off_t
                   1019: forw_raw_line(off_t curr_pos, char **linep, int *line_lenp)
                   1020: {
                   1021:        int n;
                   1022:        int c;
                   1023:        off_t new_pos;
                   1024:
                   1025:        if (curr_pos == -1 || ch_seek(curr_pos) ||
                   1026:            (c = ch_forw_get()) == EOI)
                   1027:                return (-1);
1.1       etheisen 1028:
1.5       millert  1029:        n = 0;
1.13      nicm     1030:        for (;;) {
                   1031:                if (c == '\n' || c == EOI || ABORT_SIGS()) {
1.1       etheisen 1032:                        new_pos = ch_tell();
                   1033:                        break;
                   1034:                }
1.13      nicm     1035:                if (n >= size_linebuf-1) {
                   1036:                        if (expand_linebuf()) {
1.5       millert  1037:                                /*
                   1038:                                 * Overflowed the input buffer.
                   1039:                                 * Pretend the line ended here.
                   1040:                                 */
                   1041:                                new_pos = ch_tell() - 1;
                   1042:                                break;
                   1043:                        }
1.1       etheisen 1044:                }
1.13      nicm     1045:                linebuf[n++] = (char)c;
1.1       etheisen 1046:                c = ch_forw_get();
                   1047:        }
1.5       millert  1048:        linebuf[n] = '\0';
1.1       etheisen 1049:        if (linep != NULL)
                   1050:                *linep = linebuf;
1.10      shadchin 1051:        if (line_lenp != NULL)
                   1052:                *line_lenp = n;
1.1       etheisen 1053:        return (new_pos);
                   1054: }
                   1055:
                   1056: /*
                   1057:  * Analogous to back_line(), but deals with "raw lines".
                   1058:  * {{ This is supposed to be more efficient than back_line(). }}
                   1059:  */
1.13      nicm     1060: off_t
                   1061: back_raw_line(off_t curr_pos, char **linep, int *line_lenp)
                   1062: {
                   1063:        int n;
                   1064:        int c;
                   1065:        off_t new_pos;
                   1066:
                   1067:        if (curr_pos == -1 || curr_pos <= ch_zero() || ch_seek(curr_pos - 1))
                   1068:                return (-1);
1.1       etheisen 1069:
1.5       millert  1070:        n = size_linebuf;
                   1071:        linebuf[--n] = '\0';
1.13      nicm     1072:        for (;;) {
1.1       etheisen 1073:                c = ch_back_get();
1.13      nicm     1074:                if (c == '\n' || ABORT_SIGS()) {
1.1       etheisen 1075:                        /*
                   1076:                         * This is the newline ending the previous line.
                   1077:                         * We have hit the beginning of the line.
                   1078:                         */
                   1079:                        new_pos = ch_tell() + 1;
                   1080:                        break;
                   1081:                }
1.13      nicm     1082:                if (c == EOI) {
1.1       etheisen 1083:                        /*
                   1084:                         * We have hit the beginning of the file.
                   1085:                         * This must be the first line in the file.
                   1086:                         * This must, of course, be the beginning of the line.
                   1087:                         */
                   1088:                        new_pos = ch_zero();
                   1089:                        break;
                   1090:                }
1.13      nicm     1091:                if (n <= 0) {
1.5       millert  1092:                        int old_size_linebuf = size_linebuf;
1.13      nicm     1093:                        if (expand_linebuf()) {
1.5       millert  1094:                                /*
                   1095:                                 * Overflowed the input buffer.
                   1096:                                 * Pretend the line ended here.
                   1097:                                 */
                   1098:                                new_pos = ch_tell() + 1;
                   1099:                                break;
                   1100:                        }
1.1       etheisen 1101:                        /*
1.5       millert  1102:                         * Shift the data to the end of the new linebuf.
1.1       etheisen 1103:                         */
1.5       millert  1104:                        n = size_linebuf - old_size_linebuf;
1.8       millert  1105:                        memmove(linebuf + n, linebuf, old_size_linebuf);
1.1       etheisen 1106:                }
1.5       millert  1107:                linebuf[--n] = c;
1.1       etheisen 1108:        }
                   1109:        if (linep != NULL)
1.5       millert  1110:                *linep = &linebuf[n];
1.10      shadchin 1111:        if (line_lenp != NULL)
                   1112:                *line_lenp = size_linebuf - 1 - n;
1.1       etheisen 1113:        return (new_pos);
                   1114: }