Annotation of src/usr.bin/mandoc/term.c, Revision 1.31
1.31 ! schwarze 1: /* $Id: term.c,v 1.30 2010/04/23 00:23:47 schwarze Exp $ */
1.1 kristaps 2: /*
1.2 schwarze 3: * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
1.1 kristaps 4: *
5: * Permission to use, copy, modify, and distribute this software for any
1.2 schwarze 6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
1.1 kristaps 8: *
1.2 schwarze 9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1.1 kristaps 16: */
1.20 schwarze 17: #include <sys/types.h>
18:
1.1 kristaps 19: #include <assert.h>
1.20 schwarze 20: #include <ctype.h>
1.1 kristaps 21: #include <stdio.h>
22: #include <stdlib.h>
23: #include <string.h>
1.19 schwarze 24: #include <time.h>
1.1 kristaps 25:
1.15 schwarze 26: #include "chars.h"
1.16 schwarze 27: #include "out.h"
1.1 kristaps 28: #include "term.h"
29: #include "man.h"
30: #include "mdoc.h"
1.16 schwarze 31: #include "main.h"
1.1 kristaps 32:
33: static struct termp *term_alloc(enum termenc);
34: static void term_free(struct termp *);
1.20 schwarze 35: static void spec(struct termp *, const char *, size_t);
36: static void res(struct termp *, const char *, size_t);
37: static void buffera(struct termp *, const char *, size_t);
38: static void bufferc(struct termp *, char);
39: static void adjbuf(struct termp *p, size_t);
40: static void encode(struct termp *, const char *, size_t);
1.1 kristaps 41:
42:
43: void *
44: ascii_alloc(void)
45: {
46:
47: return(term_alloc(TERMENC_ASCII));
48: }
49:
50:
1.13 schwarze 51: void
1.1 kristaps 52: terminal_free(void *arg)
53: {
54:
55: term_free((struct termp *)arg);
56: }
57:
58:
59: static void
60: term_free(struct termp *p)
61: {
62:
63: if (p->buf)
64: free(p->buf);
1.15 schwarze 65: if (p->symtab)
66: chars_free(p->symtab);
1.1 kristaps 67:
68: free(p);
69: }
70:
71:
72: static struct termp *
73: term_alloc(enum termenc enc)
74: {
75: struct termp *p;
76:
1.19 schwarze 77: p = calloc(1, sizeof(struct termp));
78: if (NULL == p) {
79: perror(NULL);
80: exit(EXIT_FAILURE);
81: }
1.30 schwarze 82: p->tabwidth = 5;
1.1 kristaps 83: p->enc = enc;
84: return(p);
85: }
86:
87:
88: /*
89: * Flush a line of text. A "line" is loosely defined as being something
90: * that should be followed by a newline, regardless of whether it's
91: * broken apart by newlines getting there. A line can also be a
1.27 schwarze 92: * fragment of a columnar list (`Bl -tag' or `Bl -column'), which does
93: * not have a trailing newline.
1.1 kristaps 94: *
1.27 schwarze 95: * The following flags may be specified:
1.1 kristaps 96: *
97: * - TERMP_NOLPAD: when beginning to write the line, don't left-pad the
98: * offset value. This is useful when doing columnar lists where the
99: * prior column has right-padded.
100: *
101: * - TERMP_NOBREAK: this is the most important and is used when making
102: * columns. In short: don't print a newline and instead pad to the
103: * right margin. Used in conjunction with TERMP_NOLPAD.
104: *
1.9 schwarze 105: * - TERMP_TWOSPACE: when padding, make sure there are at least two
106: * space characters of padding. Otherwise, rather break the line.
107: *
1.6 schwarze 108: * - TERMP_DANGLE: don't newline when TERMP_NOBREAK is specified and
109: * the line is overrun, and don't pad-right if it's underrun.
110: *
111: * - TERMP_HANG: like TERMP_DANGLE, but doesn't newline when
112: * overruning, instead save the position and continue at that point
113: * when the next invocation.
1.1 kristaps 114: *
115: * In-line line breaking:
116: *
117: * If TERMP_NOBREAK is specified and the line overruns the right
118: * margin, it will break and pad-right to the right margin after
119: * writing. If maxrmargin is violated, it will break and continue
1.19 schwarze 120: * writing from the right-margin, which will lead to the above scenario
121: * upon exit. Otherwise, the line will break at the right margin.
1.1 kristaps 122: */
123: void
124: term_flushln(struct termp *p)
125: {
1.19 schwarze 126: int i; /* current input position in p->buf */
127: size_t vis; /* current visual position on output */
128: size_t vbl; /* number of blanks to prepend to output */
1.22 schwarze 129: size_t vend; /* end of word visual position on output */
1.19 schwarze 130: size_t bp; /* visual right border position */
131: int j; /* temporary loop index */
1.22 schwarze 132: int jhy; /* last hyphen before line overflow */
1.19 schwarze 133: size_t maxvis, mmax;
1.1 kristaps 134:
135: /*
136: * First, establish the maximum columns of "visible" content.
137: * This is usually the difference between the right-margin and
138: * an indentation, but can be, for tagged lists or columns, a
1.19 schwarze 139: * small set of values.
1.1 kristaps 140: */
141:
142: assert(p->offset < p->rmargin);
1.9 schwarze 143:
1.26 schwarze 144: maxvis = (int)(p->rmargin - p->offset) - p->overstep < 0 ?
1.19 schwarze 145: /* LINTED */
1.26 schwarze 146: 0 : p->rmargin - p->offset - p->overstep;
147: mmax = (int)(p->maxrmargin - p->offset) - p->overstep < 0 ?
1.19 schwarze 148: /* LINTED */
1.26 schwarze 149: 0 : p->maxrmargin - p->offset - p->overstep;
1.9 schwarze 150:
1.1 kristaps 151: bp = TERMP_NOBREAK & p->flags ? mmax : maxvis;
1.19 schwarze 152:
153: /*
154: * FIXME: if bp is zero, we still output the first word before
155: * breaking the line.
156: */
157:
1.30 schwarze 158: vis = i = 0;
1.22 schwarze 159: while (i < (int)p->col) {
160:
161: /*
162: * Choose the number of blanks to prepend: no blank at the
163: * beginning of a line, one between words -- but do not
164: * actually write them yet.
165: */
1.31 ! schwarze 166: vbl = (size_t)(0 == vis ? 0 : 1);
1.22 schwarze 167: vis += vbl;
1.30 schwarze 168: vend = vis;
169:
170: /*
171: * Handle literal tab characters.
172: */
173: for (j = i; j < (int)p->col; j++) {
174: if ('\t' != p->buf[j])
175: break;
176: /* Collapse tab with inter-word spacing. */
177: if (vis > 0 && j == i)
178: vend = vis - 1;
179: vend = (vend/p->tabwidth+1)*p->tabwidth;
180: vbl += vend - vis;
181: vis = vend;
182: }
1.22 schwarze 183:
1.1 kristaps 184: /*
185: * Count up visible word characters. Control sequences
186: * (starting with the CSI) aren't counted. A space
187: * generates a non-printing word, which is valid (the
188: * space is printed according to regular spacing rules).
189: */
190:
191: /* LINTED */
1.30 schwarze 192: for (jhy = 0; j < (int)p->col; j++) {
193: if ((j && ' ' == p->buf[j]) || '\t' == p->buf[j])
1.1 kristaps 194: break;
195: else if (8 == p->buf[j])
1.22 schwarze 196: vend--;
1.31 ! schwarze 197: else {
1.22 schwarze 198: if (vend > vis && vend < bp &&
199: '-' == p->buf[j])
200: jhy = j;
201: vend++;
202: }
1.1 kristaps 203: }
204:
205: /*
1.23 schwarze 206: * Usually, indent the first line of each paragraph.
207: */
1.29 schwarze 208: if (0 == i && ! (p->flags & TERMP_NOLPAD)) {
209: p->viscol += p->offset;
1.23 schwarze 210: /* LINTED */
211: for (j = 0; j < (int)p->offset; j++)
212: putchar(' ');
1.29 schwarze 213: }
1.23 schwarze 214:
215: /*
1.5 schwarze 216: * Find out whether we would exceed the right margin.
217: * If so, break to the next line. (TODO: hyphenate)
218: * Otherwise, write the chosen number of blanks now.
219: */
1.24 schwarze 220: if (vend > bp && 0 == jhy && vis > vbl) {
1.22 schwarze 221: vend -= vis;
1.5 schwarze 222: putchar('\n');
223: if (TERMP_NOBREAK & p->flags) {
1.29 schwarze 224: p->viscol = p->rmargin;
1.5 schwarze 225: for (j = 0; j < (int)p->rmargin; j++)
226: putchar(' ');
1.22 schwarze 227: vend += p->rmargin - p->offset;
1.5 schwarze 228: } else {
1.29 schwarze 229: p->viscol = p->offset;
1.1 kristaps 230: for (j = 0; j < (int)p->offset; j++)
231: putchar(' ');
1.5 schwarze 232: }
1.26 schwarze 233: /* Remove the p->overstep width. */
1.18 schwarze 234: bp += (int)/* LINTED */
1.26 schwarze 235: p->overstep;
236: p->overstep = 0;
1.5 schwarze 237: } else {
1.29 schwarze 238: p->viscol += vbl;
1.5 schwarze 239: for (j = 0; j < (int)vbl; j++)
1.1 kristaps 240: putchar(' ');
241: }
242:
1.3 schwarze 243: /*
1.30 schwarze 244: * Skip leading tabs, they were handled above.
245: */
246: while (i < (int)p->col && '\t' == p->buf[i])
247: i++;
248:
249: /*
1.5 schwarze 250: * Finally, write out the word.
1.1 kristaps 251: */
252: for ( ; i < (int)p->col; i++) {
1.25 schwarze 253: if (vend > bp && jhy > 0 && i > jhy)
1.30 schwarze 254: break;
255: if ('\t' == p->buf[i])
1.1 kristaps 256: break;
1.22 schwarze 257: if (' ' == p->buf[i]) {
258: i++;
259: break;
260: }
1.21 schwarze 261: if (ASCII_NBRSP == p->buf[i])
1.20 schwarze 262: putchar(' ');
1.31 ! schwarze 263: else
1.20 schwarze 264: putchar(p->buf[i]);
1.1 kristaps 265: }
1.29 schwarze 266: p->viscol += vend - vis;
1.22 schwarze 267: vis = vend;
1.1 kristaps 268: }
1.18 schwarze 269:
1.9 schwarze 270: p->col = 0;
1.26 schwarze 271: p->overstep = 0;
1.1 kristaps 272:
1.9 schwarze 273: if ( ! (TERMP_NOBREAK & p->flags)) {
1.29 schwarze 274: p->viscol = 0;
1.28 schwarze 275: putchar('\n');
1.1 kristaps 276: return;
277: }
278:
1.9 schwarze 279: if (TERMP_HANG & p->flags) {
280: /* We need one blank after the tag. */
1.26 schwarze 281: p->overstep = /* LINTED */
1.9 schwarze 282: vis - maxvis + 1;
283:
284: /*
285: * Behave exactly the same way as groff:
286: * If we have overstepped the margin, temporarily move
287: * it to the right and flag the rest of the line to be
288: * shorter.
289: * If we landed right at the margin, be happy.
290: * If we are one step before the margin, temporarily
291: * move it one step LEFT and flag the rest of the line
292: * to be longer.
293: */
1.26 schwarze 294: if (p->overstep >= -1) {
295: assert((int)maxvis + p->overstep >= 0);
1.9 schwarze 296: /* LINTED */
1.26 schwarze 297: maxvis += p->overstep;
1.9 schwarze 298: } else
1.26 schwarze 299: p->overstep = 0;
1.9 schwarze 300:
301: } else if (TERMP_DANGLE & p->flags)
302: return;
1.1 kristaps 303:
1.9 schwarze 304: /* Right-pad. */
305: if (maxvis > vis + /* LINTED */
1.29 schwarze 306: ((TERMP_TWOSPACE & p->flags) ? 1 : 0)) {
307: p->viscol += maxvis - vis;
1.9 schwarze 308: for ( ; vis < maxvis; vis++)
309: putchar(' ');
1.29 schwarze 310: } else { /* ...or newline break. */
1.1 kristaps 311: putchar('\n');
1.29 schwarze 312: p->viscol = p->rmargin;
1.9 schwarze 313: for (i = 0; i < (int)p->rmargin; i++)
314: putchar(' ');
315: }
1.1 kristaps 316: }
317:
318:
319: /*
320: * A newline only breaks an existing line; it won't assert vertical
321: * space. All data in the output buffer is flushed prior to the newline
322: * assertion.
323: */
324: void
325: term_newln(struct termp *p)
326: {
327:
328: p->flags |= TERMP_NOSPACE;
1.29 schwarze 329: if (0 == p->col && 0 == p->viscol) {
1.1 kristaps 330: p->flags &= ~TERMP_NOLPAD;
331: return;
332: }
333: term_flushln(p);
334: p->flags &= ~TERMP_NOLPAD;
335: }
336:
337:
338: /*
339: * Asserts a vertical space (a full, empty line-break between lines).
340: * Note that if used twice, this will cause two blank spaces and so on.
341: * All data in the output buffer is flushed prior to the newline
342: * assertion.
343: */
344: void
345: term_vspace(struct termp *p)
346: {
347:
348: term_newln(p);
1.29 schwarze 349: p->viscol = 0;
1.1 kristaps 350: putchar('\n');
351: }
352:
353:
354: static void
1.20 schwarze 355: spec(struct termp *p, const char *word, size_t len)
1.1 kristaps 356: {
357: const char *rhs;
358: size_t sz;
359:
1.15 schwarze 360: rhs = chars_a2ascii(p->symtab, word, len, &sz);
1.20 schwarze 361: if (rhs)
362: encode(p, rhs, sz);
1.11 schwarze 363: }
364:
365:
366: static void
1.20 schwarze 367: res(struct termp *p, const char *word, size_t len)
1.11 schwarze 368: {
369: const char *rhs;
370: size_t sz;
371:
1.15 schwarze 372: rhs = chars_a2res(p->symtab, word, len, &sz);
1.20 schwarze 373: if (rhs)
374: encode(p, rhs, sz);
375: }
376:
377:
378: void
379: term_fontlast(struct termp *p)
380: {
381: enum termfont f;
1.11 schwarze 382:
1.20 schwarze 383: f = p->fontl;
384: p->fontl = p->fontq[p->fonti];
385: p->fontq[p->fonti] = f;
386: }
387:
388:
389: void
390: term_fontrepl(struct termp *p, enum termfont f)
391: {
392:
393: p->fontl = p->fontq[p->fonti];
394: p->fontq[p->fonti] = f;
1.1 kristaps 395: }
396:
397:
1.20 schwarze 398: void
399: term_fontpush(struct termp *p, enum termfont f)
1.1 kristaps 400: {
1.7 schwarze 401:
1.20 schwarze 402: assert(p->fonti + 1 < 10);
403: p->fontl = p->fontq[p->fonti];
404: p->fontq[++p->fonti] = f;
405: }
1.1 kristaps 406:
407:
1.20 schwarze 408: const void *
409: term_fontq(struct termp *p)
410: {
1.1 kristaps 411:
1.20 schwarze 412: return(&p->fontq[p->fonti]);
413: }
1.1 kristaps 414:
415:
1.20 schwarze 416: enum termfont
417: term_fonttop(struct termp *p)
418: {
1.1 kristaps 419:
1.20 schwarze 420: return(p->fontq[p->fonti]);
421: }
1.7 schwarze 422:
423:
1.20 schwarze 424: void
425: term_fontpopq(struct termp *p, const void *key)
426: {
1.1 kristaps 427:
1.20 schwarze 428: while (p->fonti >= 0 && key != &p->fontq[p->fonti])
429: p->fonti--;
430: assert(p->fonti >= 0);
431: }
1.1 kristaps 432:
433:
1.20 schwarze 434: void
435: term_fontpop(struct termp *p)
436: {
1.1 kristaps 437:
1.20 schwarze 438: assert(p->fonti);
439: p->fonti--;
1.1 kristaps 440: }
441:
442:
443: /*
444: * Handle pwords, partial words, which may be either a single word or a
445: * phrase that cannot be broken down (such as a literal string). This
446: * handles word styling.
447: */
1.7 schwarze 448: void
449: term_word(struct termp *p, const char *word)
1.1 kristaps 450: {
1.20 schwarze 451: const char *sv, *seq;
452: int sz;
453: size_t ssz;
454: enum roffdeco deco;
1.1 kristaps 455:
1.14 schwarze 456: sv = word;
457:
1.20 schwarze 458: if (word[0] && '\0' == word[1])
1.14 schwarze 459: switch (word[0]) {
460: case('.'):
461: /* FALLTHROUGH */
462: case(','):
463: /* FALLTHROUGH */
464: case(';'):
465: /* FALLTHROUGH */
466: case(':'):
467: /* FALLTHROUGH */
468: case('?'):
469: /* FALLTHROUGH */
470: case('!'):
471: /* FALLTHROUGH */
472: case(')'):
473: /* FALLTHROUGH */
474: case(']'):
475: if ( ! (TERMP_IGNDELIM & p->flags))
476: p->flags |= TERMP_NOSPACE;
477: break;
478: default:
479: break;
480: }
1.1 kristaps 481:
1.31 ! schwarze 482: if ( ! (TERMP_NOSPACE & p->flags)) {
1.20 schwarze 483: bufferc(p, ' ');
1.31 ! schwarze 484: if (TERMP_SENTENCE & p->flags)
! 485: bufferc(p, ' ');
! 486: }
1.1 kristaps 487:
488: if ( ! (p->flags & TERMP_NONOSPACE))
489: p->flags &= ~TERMP_NOSPACE;
490:
1.31 ! schwarze 491: p->flags &= ~TERMP_SENTENCE;
! 492:
1.20 schwarze 493: /* FIXME: use strcspn. */
494:
495: while (*word) {
496: if ('\\' != *word) {
497: encode(p, word, 1);
498: word++;
499: continue;
500: }
501:
502: seq = ++word;
503: sz = a2roffdeco(&deco, &seq, &ssz);
504:
505: switch (deco) {
506: case (DECO_RESERVED):
507: res(p, seq, ssz);
508: break;
509: case (DECO_SPECIAL):
510: spec(p, seq, ssz);
511: break;
512: case (DECO_BOLD):
513: term_fontrepl(p, TERMFONT_BOLD);
514: break;
515: case (DECO_ITALIC):
516: term_fontrepl(p, TERMFONT_UNDER);
517: break;
518: case (DECO_ROMAN):
519: term_fontrepl(p, TERMFONT_NONE);
520: break;
521: case (DECO_PREVIOUS):
522: term_fontlast(p);
523: break;
524: default:
525: break;
526: }
527:
528: word += sz;
529: if (DECO_NOSPACE == deco && '\0' == *word)
530: p->flags |= TERMP_NOSPACE;
531: }
1.1 kristaps 532:
1.31 ! schwarze 533: /*
! 534: * Note that we don't process the pipe: the parser sees it as
! 535: * punctuation, but we don't in terms of typography.
! 536: */
1.14 schwarze 537: if (sv[0] && 0 == sv[1])
538: switch (sv[0]) {
539: case('('):
540: /* FALLTHROUGH */
541: case('['):
542: p->flags |= TERMP_NOSPACE;
543: break;
544: default:
545: break;
546: }
1.1 kristaps 547: }
548:
549:
550: static void
1.20 schwarze 551: adjbuf(struct termp *p, size_t sz)
1.1 kristaps 552: {
553:
1.20 schwarze 554: if (0 == p->maxcols)
555: p->maxcols = 1024;
556: while (sz >= p->maxcols)
557: p->maxcols <<= 2;
558:
559: p->buf = realloc(p->buf, p->maxcols);
560: if (NULL == p->buf) {
561: perror(NULL);
562: exit(EXIT_FAILURE);
1.1 kristaps 563: }
564: }
565:
1.4 schwarze 566:
567: static void
1.20 schwarze 568: buffera(struct termp *p, const char *word, size_t sz)
569: {
570:
571: if (p->col + sz >= p->maxcols)
572: adjbuf(p, p->col + sz);
573:
574: memcpy(&p->buf[(int)p->col], word, sz);
575: p->col += sz;
576: }
577:
578:
579: static void
580: bufferc(struct termp *p, char c)
581: {
582:
583: if (p->col + 1 >= p->maxcols)
584: adjbuf(p, p->col + 1);
585:
586: p->buf[(int)p->col++] = c;
587: }
588:
589:
590: static void
591: encode(struct termp *p, const char *word, size_t sz)
1.4 schwarze 592: {
1.20 schwarze 593: enum termfont f;
594: int i;
595:
596: /*
597: * Encode and buffer a string of characters. If the current
598: * font mode is unset, buffer directly, else encode then buffer
599: * character by character.
600: */
601:
602: if (TERMFONT_NONE == (f = term_fonttop(p))) {
603: buffera(p, word, sz);
604: return;
605: }
606:
607: for (i = 0; i < (int)sz; i++) {
608: if ( ! isgraph((u_char)word[i])) {
609: bufferc(p, word[i]);
610: continue;
1.4 schwarze 611: }
1.20 schwarze 612:
613: if (TERMFONT_UNDER == f)
614: bufferc(p, '_');
615: else
616: bufferc(p, word[i]);
617:
618: bufferc(p, 8);
619: bufferc(p, word[i]);
1.4 schwarze 620: }
621: }
1.16 schwarze 622:
623:
624: size_t
625: term_vspan(const struct roffsu *su)
626: {
627: double r;
628:
629: switch (su->unit) {
630: case (SCALE_CM):
631: r = su->scale * 2;
632: break;
633: case (SCALE_IN):
634: r = su->scale * 6;
635: break;
636: case (SCALE_PC):
637: r = su->scale;
638: break;
639: case (SCALE_PT):
640: r = su->scale / 8;
641: break;
642: case (SCALE_MM):
643: r = su->scale / 1000;
644: break;
645: case (SCALE_VS):
646: r = su->scale;
647: break;
648: default:
649: r = su->scale - 1;
650: break;
651: }
652:
653: if (r < 0.0)
654: r = 0.0;
655: return(/* LINTED */(size_t)
656: r);
657: }
658:
659:
660: size_t
661: term_hspan(const struct roffsu *su)
662: {
663: double r;
664:
665: /* XXX: CM, IN, and PT are approximations. */
666:
667: switch (su->unit) {
668: case (SCALE_CM):
669: r = 4 * su->scale;
670: break;
671: case (SCALE_IN):
672: /* XXX: this is an approximation. */
673: r = 10 * su->scale;
674: break;
675: case (SCALE_PC):
676: r = (10 * su->scale) / 6;
677: break;
678: case (SCALE_PT):
679: r = (10 * su->scale) / 72;
680: break;
681: case (SCALE_MM):
682: r = su->scale / 1000; /* FIXME: double-check. */
683: break;
684: case (SCALE_VS):
685: r = su->scale * 2 - 1; /* FIXME: double-check. */
686: break;
687: default:
688: r = su->scale;
689: break;
690: }
691:
692: if (r < 0.0)
693: r = 0.0;
694: return((size_t)/* LINTED */
695: r);
696: }
697:
698: