Annotation of src/usr.bin/mandoc/term.c, Revision 1.38
1.38 ! schwarze 1: /* $Id: term.c,v 1.37 2010/06/10 22:50:10 schwarze Exp $ */
1.1 kristaps 2: /*
1.38 ! schwarze 3: * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@bsd.lv>
1.1 kristaps 4: *
5: * Permission to use, copy, modify, and distribute this software for any
1.2 schwarze 6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
1.1 kristaps 8: *
1.2 schwarze 9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1.1 kristaps 16: */
1.20 schwarze 17: #include <sys/types.h>
18:
1.1 kristaps 19: #include <assert.h>
1.20 schwarze 20: #include <ctype.h>
1.36 schwarze 21: #include <stdint.h>
1.1 kristaps 22: #include <stdio.h>
23: #include <stdlib.h>
24: #include <string.h>
25:
1.34 schwarze 26: #include "mandoc.h"
1.15 schwarze 27: #include "chars.h"
1.16 schwarze 28: #include "out.h"
1.1 kristaps 29: #include "term.h"
30: #include "man.h"
31: #include "mdoc.h"
1.16 schwarze 32: #include "main.h"
1.1 kristaps 33:
1.20 schwarze 34: static void spec(struct termp *, const char *, size_t);
35: static void res(struct termp *, const char *, size_t);
36: static void buffera(struct termp *, const char *, size_t);
37: static void bufferc(struct termp *, char);
38: static void adjbuf(struct termp *p, size_t);
39: static void encode(struct termp *, const char *, size_t);
1.1 kristaps 40:
41:
1.37 schwarze 42: void
43: term_free(struct termp *p)
1.1 kristaps 44: {
45:
1.37 schwarze 46: if (p->buf)
47: free(p->buf);
48: if (p->symtab)
49: chars_free(p->symtab);
50:
51: free(p);
1.1 kristaps 52: }
53:
54:
1.13 schwarze 55: void
1.37 schwarze 56: term_begin(struct termp *p, term_margin head,
57: term_margin foot, const void *arg)
1.1 kristaps 58: {
59:
1.37 schwarze 60: p->headf = head;
61: p->footf = foot;
62: p->argf = arg;
63: (*p->begin)(p);
1.1 kristaps 64: }
65:
66:
1.37 schwarze 67: void
68: term_end(struct termp *p)
1.1 kristaps 69: {
70:
1.37 schwarze 71: (*p->end)(p);
1.1 kristaps 72: }
73:
74:
1.37 schwarze 75: struct termp *
76: term_alloc(enum termenc enc)
1.1 kristaps 77: {
1.36 schwarze 78: struct termp *p;
1.1 kristaps 79:
1.19 schwarze 80: p = calloc(1, sizeof(struct termp));
81: if (NULL == p) {
82: perror(NULL);
83: exit(EXIT_FAILURE);
84: }
1.36 schwarze 85:
1.30 schwarze 86: p->tabwidth = 5;
1.1 kristaps 87: p->enc = enc;
1.37 schwarze 88: p->defrmargin = 78;
1.1 kristaps 89: return(p);
90: }
91:
92:
93: /*
94: * Flush a line of text. A "line" is loosely defined as being something
95: * that should be followed by a newline, regardless of whether it's
96: * broken apart by newlines getting there. A line can also be a
1.27 schwarze 97: * fragment of a columnar list (`Bl -tag' or `Bl -column'), which does
98: * not have a trailing newline.
1.1 kristaps 99: *
1.27 schwarze 100: * The following flags may be specified:
1.1 kristaps 101: *
102: * - TERMP_NOLPAD: when beginning to write the line, don't left-pad the
103: * offset value. This is useful when doing columnar lists where the
104: * prior column has right-padded.
105: *
106: * - TERMP_NOBREAK: this is the most important and is used when making
107: * columns. In short: don't print a newline and instead pad to the
108: * right margin. Used in conjunction with TERMP_NOLPAD.
109: *
1.9 schwarze 110: * - TERMP_TWOSPACE: when padding, make sure there are at least two
111: * space characters of padding. Otherwise, rather break the line.
112: *
1.6 schwarze 113: * - TERMP_DANGLE: don't newline when TERMP_NOBREAK is specified and
114: * the line is overrun, and don't pad-right if it's underrun.
115: *
116: * - TERMP_HANG: like TERMP_DANGLE, but doesn't newline when
117: * overruning, instead save the position and continue at that point
118: * when the next invocation.
1.1 kristaps 119: *
120: * In-line line breaking:
121: *
122: * If TERMP_NOBREAK is specified and the line overruns the right
123: * margin, it will break and pad-right to the right margin after
124: * writing. If maxrmargin is violated, it will break and continue
1.19 schwarze 125: * writing from the right-margin, which will lead to the above scenario
126: * upon exit. Otherwise, the line will break at the right margin.
1.1 kristaps 127: */
128: void
129: term_flushln(struct termp *p)
130: {
1.19 schwarze 131: int i; /* current input position in p->buf */
132: size_t vis; /* current visual position on output */
133: size_t vbl; /* number of blanks to prepend to output */
1.33 schwarze 134: size_t vend; /* end of word visual position on output */
1.19 schwarze 135: size_t bp; /* visual right border position */
136: int j; /* temporary loop index */
1.22 schwarze 137: int jhy; /* last hyphen before line overflow */
1.19 schwarze 138: size_t maxvis, mmax;
1.1 kristaps 139:
140: /*
141: * First, establish the maximum columns of "visible" content.
142: * This is usually the difference between the right-margin and
143: * an indentation, but can be, for tagged lists or columns, a
1.19 schwarze 144: * small set of values.
1.1 kristaps 145: */
146:
147: assert(p->offset < p->rmargin);
1.9 schwarze 148:
1.26 schwarze 149: maxvis = (int)(p->rmargin - p->offset) - p->overstep < 0 ?
1.19 schwarze 150: /* LINTED */
1.26 schwarze 151: 0 : p->rmargin - p->offset - p->overstep;
152: mmax = (int)(p->maxrmargin - p->offset) - p->overstep < 0 ?
1.19 schwarze 153: /* LINTED */
1.26 schwarze 154: 0 : p->maxrmargin - p->offset - p->overstep;
1.9 schwarze 155:
1.1 kristaps 156: bp = TERMP_NOBREAK & p->flags ? mmax : maxvis;
1.19 schwarze 157:
1.33 schwarze 158: /*
159: * Indent the first line of a paragraph.
160: */
161: vbl = p->flags & TERMP_NOLPAD ? 0 : p->offset;
162:
1.19 schwarze 163: /*
164: * FIXME: if bp is zero, we still output the first word before
165: * breaking the line.
166: */
167:
1.33 schwarze 168: vis = vend = i = 0;
1.22 schwarze 169: while (i < (int)p->col) {
170:
171: /*
1.30 schwarze 172: * Handle literal tab characters.
173: */
174: for (j = i; j < (int)p->col; j++) {
175: if ('\t' != p->buf[j])
176: break;
1.33 schwarze 177: vend = (vis/p->tabwidth+1)*p->tabwidth;
1.30 schwarze 178: vbl += vend - vis;
179: vis = vend;
180: }
1.22 schwarze 181:
1.1 kristaps 182: /*
183: * Count up visible word characters. Control sequences
184: * (starting with the CSI) aren't counted. A space
185: * generates a non-printing word, which is valid (the
186: * space is printed according to regular spacing rules).
187: */
188:
189: /* LINTED */
1.30 schwarze 190: for (jhy = 0; j < (int)p->col; j++) {
191: if ((j && ' ' == p->buf[j]) || '\t' == p->buf[j])
1.1 kristaps 192: break;
1.35 schwarze 193: if (8 != p->buf[j]) {
1.22 schwarze 194: if (vend > vis && vend < bp &&
1.35 schwarze 195: ASCII_HYPH == p->buf[j])
1.22 schwarze 196: jhy = j;
197: vend++;
1.35 schwarze 198: } else
199: vend--;
1.1 kristaps 200: }
201:
202: /*
1.5 schwarze 203: * Find out whether we would exceed the right margin.
1.33 schwarze 204: * If so, break to the next line.
1.5 schwarze 205: */
1.33 schwarze 206: if (vend > bp && 0 == jhy && vis > 0) {
1.22 schwarze 207: vend -= vis;
1.37 schwarze 208: (*p->endline)(p);
1.5 schwarze 209: if (TERMP_NOBREAK & p->flags) {
1.29 schwarze 210: p->viscol = p->rmargin;
1.37 schwarze 211: (*p->advance)(p, p->rmargin);
1.22 schwarze 212: vend += p->rmargin - p->offset;
1.5 schwarze 213: } else {
1.33 schwarze 214: p->viscol = 0;
215: vbl = p->offset;
1.5 schwarze 216: }
1.33 schwarze 217:
1.26 schwarze 218: /* Remove the p->overstep width. */
1.33 schwarze 219:
1.18 schwarze 220: bp += (int)/* LINTED */
1.26 schwarze 221: p->overstep;
222: p->overstep = 0;
1.1 kristaps 223: }
224:
1.3 schwarze 225: /*
1.30 schwarze 226: * Skip leading tabs, they were handled above.
227: */
228: while (i < (int)p->col && '\t' == p->buf[i])
229: i++;
230:
1.33 schwarze 231: /* Write out the [remaining] word. */
1.1 kristaps 232: for ( ; i < (int)p->col; i++) {
1.25 schwarze 233: if (vend > bp && jhy > 0 && i > jhy)
1.30 schwarze 234: break;
235: if ('\t' == p->buf[i])
1.1 kristaps 236: break;
1.22 schwarze 237: if (' ' == p->buf[i]) {
1.33 schwarze 238: while (' ' == p->buf[i]) {
239: vbl++;
240: i++;
241: }
1.22 schwarze 242: break;
243: }
1.33 schwarze 244: if (ASCII_NBRSP == p->buf[i]) {
245: vbl++;
246: continue;
247: }
248:
249: /*
250: * Now we definitely know there will be
251: * printable characters to output,
252: * so write preceding white space now.
253: */
254: if (vbl) {
1.37 schwarze 255: (*p->advance)(p, vbl);
1.33 schwarze 256: p->viscol += vbl;
257: vbl = 0;
258: }
1.35 schwarze 259:
260: if (ASCII_HYPH == p->buf[i])
1.37 schwarze 261: (*p->letter)(p, '-');
1.35 schwarze 262: else
1.37 schwarze 263: (*p->letter)(p, p->buf[i]);
1.35 schwarze 264:
1.33 schwarze 265: p->viscol += 1;
1.1 kristaps 266: }
1.33 schwarze 267: vend += vbl;
1.22 schwarze 268: vis = vend;
1.1 kristaps 269: }
1.18 schwarze 270:
1.9 schwarze 271: p->col = 0;
1.26 schwarze 272: p->overstep = 0;
1.1 kristaps 273:
1.9 schwarze 274: if ( ! (TERMP_NOBREAK & p->flags)) {
1.29 schwarze 275: p->viscol = 0;
1.37 schwarze 276: (*p->endline)(p);
1.1 kristaps 277: return;
278: }
279:
1.9 schwarze 280: if (TERMP_HANG & p->flags) {
281: /* We need one blank after the tag. */
1.26 schwarze 282: p->overstep = /* LINTED */
1.9 schwarze 283: vis - maxvis + 1;
284:
285: /*
286: * Behave exactly the same way as groff:
287: * If we have overstepped the margin, temporarily move
288: * it to the right and flag the rest of the line to be
289: * shorter.
290: * If we landed right at the margin, be happy.
291: * If we are one step before the margin, temporarily
292: * move it one step LEFT and flag the rest of the line
293: * to be longer.
294: */
1.26 schwarze 295: if (p->overstep >= -1) {
296: assert((int)maxvis + p->overstep >= 0);
1.9 schwarze 297: /* LINTED */
1.26 schwarze 298: maxvis += p->overstep;
1.9 schwarze 299: } else
1.26 schwarze 300: p->overstep = 0;
1.9 schwarze 301:
302: } else if (TERMP_DANGLE & p->flags)
303: return;
1.1 kristaps 304:
1.9 schwarze 305: /* Right-pad. */
306: if (maxvis > vis + /* LINTED */
1.29 schwarze 307: ((TERMP_TWOSPACE & p->flags) ? 1 : 0)) {
308: p->viscol += maxvis - vis;
1.37 schwarze 309: (*p->advance)(p, maxvis - vis);
310: vis += (maxvis - vis);
1.29 schwarze 311: } else { /* ...or newline break. */
1.37 schwarze 312: (*p->endline)(p);
1.29 schwarze 313: p->viscol = p->rmargin;
1.37 schwarze 314: (*p->advance)(p, p->rmargin);
1.9 schwarze 315: }
1.1 kristaps 316: }
317:
318:
319: /*
320: * A newline only breaks an existing line; it won't assert vertical
321: * space. All data in the output buffer is flushed prior to the newline
322: * assertion.
323: */
324: void
325: term_newln(struct termp *p)
326: {
327:
328: p->flags |= TERMP_NOSPACE;
1.29 schwarze 329: if (0 == p->col && 0 == p->viscol) {
1.1 kristaps 330: p->flags &= ~TERMP_NOLPAD;
331: return;
332: }
333: term_flushln(p);
334: p->flags &= ~TERMP_NOLPAD;
335: }
336:
337:
338: /*
339: * Asserts a vertical space (a full, empty line-break between lines).
340: * Note that if used twice, this will cause two blank spaces and so on.
341: * All data in the output buffer is flushed prior to the newline
342: * assertion.
343: */
344: void
345: term_vspace(struct termp *p)
346: {
347:
348: term_newln(p);
1.29 schwarze 349: p->viscol = 0;
1.37 schwarze 350: (*p->endline)(p);
1.1 kristaps 351: }
352:
353:
354: static void
1.20 schwarze 355: spec(struct termp *p, const char *word, size_t len)
1.1 kristaps 356: {
357: const char *rhs;
358: size_t sz;
359:
1.15 schwarze 360: rhs = chars_a2ascii(p->symtab, word, len, &sz);
1.20 schwarze 361: if (rhs)
362: encode(p, rhs, sz);
1.11 schwarze 363: }
364:
365:
366: static void
1.20 schwarze 367: res(struct termp *p, const char *word, size_t len)
1.11 schwarze 368: {
369: const char *rhs;
370: size_t sz;
371:
1.15 schwarze 372: rhs = chars_a2res(p->symtab, word, len, &sz);
1.20 schwarze 373: if (rhs)
374: encode(p, rhs, sz);
375: }
376:
377:
378: void
379: term_fontlast(struct termp *p)
380: {
381: enum termfont f;
1.11 schwarze 382:
1.20 schwarze 383: f = p->fontl;
384: p->fontl = p->fontq[p->fonti];
385: p->fontq[p->fonti] = f;
386: }
387:
388:
389: void
390: term_fontrepl(struct termp *p, enum termfont f)
391: {
392:
393: p->fontl = p->fontq[p->fonti];
394: p->fontq[p->fonti] = f;
1.1 kristaps 395: }
396:
397:
1.20 schwarze 398: void
399: term_fontpush(struct termp *p, enum termfont f)
1.1 kristaps 400: {
1.7 schwarze 401:
1.20 schwarze 402: assert(p->fonti + 1 < 10);
403: p->fontl = p->fontq[p->fonti];
404: p->fontq[++p->fonti] = f;
405: }
1.1 kristaps 406:
407:
1.20 schwarze 408: const void *
409: term_fontq(struct termp *p)
410: {
1.1 kristaps 411:
1.20 schwarze 412: return(&p->fontq[p->fonti]);
413: }
1.1 kristaps 414:
415:
1.20 schwarze 416: enum termfont
417: term_fonttop(struct termp *p)
418: {
1.1 kristaps 419:
1.20 schwarze 420: return(p->fontq[p->fonti]);
421: }
1.7 schwarze 422:
423:
1.20 schwarze 424: void
425: term_fontpopq(struct termp *p, const void *key)
426: {
1.1 kristaps 427:
1.20 schwarze 428: while (p->fonti >= 0 && key != &p->fontq[p->fonti])
429: p->fonti--;
430: assert(p->fonti >= 0);
431: }
1.1 kristaps 432:
433:
1.20 schwarze 434: void
435: term_fontpop(struct termp *p)
436: {
1.1 kristaps 437:
1.20 schwarze 438: assert(p->fonti);
439: p->fonti--;
1.1 kristaps 440: }
441:
442:
443: /*
444: * Handle pwords, partial words, which may be either a single word or a
445: * phrase that cannot be broken down (such as a literal string). This
446: * handles word styling.
447: */
1.7 schwarze 448: void
449: term_word(struct termp *p, const char *word)
1.1 kristaps 450: {
1.20 schwarze 451: const char *sv, *seq;
452: int sz;
453: size_t ssz;
454: enum roffdeco deco;
1.1 kristaps 455:
1.14 schwarze 456: sv = word;
457:
1.20 schwarze 458: if (word[0] && '\0' == word[1])
1.14 schwarze 459: switch (word[0]) {
460: case('.'):
461: /* FALLTHROUGH */
462: case(','):
463: /* FALLTHROUGH */
464: case(';'):
465: /* FALLTHROUGH */
466: case(':'):
467: /* FALLTHROUGH */
468: case('?'):
469: /* FALLTHROUGH */
470: case('!'):
471: /* FALLTHROUGH */
472: case(')'):
473: /* FALLTHROUGH */
474: case(']'):
475: if ( ! (TERMP_IGNDELIM & p->flags))
476: p->flags |= TERMP_NOSPACE;
477: break;
478: default:
479: break;
480: }
1.1 kristaps 481:
1.31 schwarze 482: if ( ! (TERMP_NOSPACE & p->flags)) {
1.20 schwarze 483: bufferc(p, ' ');
1.31 schwarze 484: if (TERMP_SENTENCE & p->flags)
485: bufferc(p, ' ');
486: }
1.1 kristaps 487:
488: if ( ! (p->flags & TERMP_NONOSPACE))
489: p->flags &= ~TERMP_NOSPACE;
490:
1.31 schwarze 491: p->flags &= ~TERMP_SENTENCE;
492:
1.20 schwarze 493: /* FIXME: use strcspn. */
494:
495: while (*word) {
496: if ('\\' != *word) {
497: encode(p, word, 1);
498: word++;
499: continue;
500: }
501:
502: seq = ++word;
503: sz = a2roffdeco(&deco, &seq, &ssz);
504:
505: switch (deco) {
506: case (DECO_RESERVED):
507: res(p, seq, ssz);
508: break;
509: case (DECO_SPECIAL):
510: spec(p, seq, ssz);
511: break;
512: case (DECO_BOLD):
513: term_fontrepl(p, TERMFONT_BOLD);
514: break;
515: case (DECO_ITALIC):
516: term_fontrepl(p, TERMFONT_UNDER);
517: break;
518: case (DECO_ROMAN):
519: term_fontrepl(p, TERMFONT_NONE);
520: break;
521: case (DECO_PREVIOUS):
522: term_fontlast(p);
523: break;
524: default:
525: break;
526: }
527:
528: word += sz;
529: if (DECO_NOSPACE == deco && '\0' == *word)
530: p->flags |= TERMP_NOSPACE;
531: }
1.1 kristaps 532:
1.31 schwarze 533: /*
534: * Note that we don't process the pipe: the parser sees it as
535: * punctuation, but we don't in terms of typography.
536: */
1.14 schwarze 537: if (sv[0] && 0 == sv[1])
538: switch (sv[0]) {
539: case('('):
540: /* FALLTHROUGH */
541: case('['):
542: p->flags |= TERMP_NOSPACE;
543: break;
544: default:
545: break;
546: }
1.1 kristaps 547: }
548:
549:
550: static void
1.20 schwarze 551: adjbuf(struct termp *p, size_t sz)
1.1 kristaps 552: {
553:
1.20 schwarze 554: if (0 == p->maxcols)
555: p->maxcols = 1024;
556: while (sz >= p->maxcols)
557: p->maxcols <<= 2;
558:
559: p->buf = realloc(p->buf, p->maxcols);
560: if (NULL == p->buf) {
561: perror(NULL);
562: exit(EXIT_FAILURE);
1.1 kristaps 563: }
564: }
565:
1.4 schwarze 566:
567: static void
1.20 schwarze 568: buffera(struct termp *p, const char *word, size_t sz)
569: {
570:
571: if (p->col + sz >= p->maxcols)
572: adjbuf(p, p->col + sz);
573:
574: memcpy(&p->buf[(int)p->col], word, sz);
575: p->col += sz;
576: }
577:
578:
579: static void
580: bufferc(struct termp *p, char c)
581: {
582:
583: if (p->col + 1 >= p->maxcols)
584: adjbuf(p, p->col + 1);
585:
586: p->buf[(int)p->col++] = c;
587: }
588:
589:
590: static void
591: encode(struct termp *p, const char *word, size_t sz)
1.4 schwarze 592: {
1.20 schwarze 593: enum termfont f;
594: int i;
595:
596: /*
597: * Encode and buffer a string of characters. If the current
598: * font mode is unset, buffer directly, else encode then buffer
599: * character by character.
600: */
601:
1.38 ! schwarze 602: if (TERMFONT_NONE == (f = term_fonttop(p))) {
1.20 schwarze 603: buffera(p, word, sz);
604: return;
605: }
606:
607: for (i = 0; i < (int)sz; i++) {
608: if ( ! isgraph((u_char)word[i])) {
609: bufferc(p, word[i]);
610: continue;
1.4 schwarze 611: }
1.20 schwarze 612:
613: if (TERMFONT_UNDER == f)
614: bufferc(p, '_');
615: else
616: bufferc(p, word[i]);
617:
618: bufferc(p, 8);
619: bufferc(p, word[i]);
1.4 schwarze 620: }
621: }
1.16 schwarze 622:
623:
624: size_t
625: term_vspan(const struct roffsu *su)
626: {
627: double r;
628:
629: switch (su->unit) {
630: case (SCALE_CM):
631: r = su->scale * 2;
632: break;
633: case (SCALE_IN):
634: r = su->scale * 6;
635: break;
636: case (SCALE_PC):
637: r = su->scale;
638: break;
639: case (SCALE_PT):
640: r = su->scale / 8;
641: break;
642: case (SCALE_MM):
643: r = su->scale / 1000;
644: break;
645: case (SCALE_VS):
646: r = su->scale;
647: break;
648: default:
649: r = su->scale - 1;
650: break;
651: }
652:
653: if (r < 0.0)
654: r = 0.0;
655: return(/* LINTED */(size_t)
656: r);
657: }
658:
659:
660: size_t
661: term_hspan(const struct roffsu *su)
662: {
663: double r;
664:
665: /* XXX: CM, IN, and PT are approximations. */
666:
667: switch (su->unit) {
668: case (SCALE_CM):
669: r = 4 * su->scale;
670: break;
671: case (SCALE_IN):
672: /* XXX: this is an approximation. */
673: r = 10 * su->scale;
674: break;
675: case (SCALE_PC):
676: r = (10 * su->scale) / 6;
677: break;
678: case (SCALE_PT):
679: r = (10 * su->scale) / 72;
680: break;
681: case (SCALE_MM):
682: r = su->scale / 1000; /* FIXME: double-check. */
683: break;
684: case (SCALE_VS):
685: r = su->scale * 2 - 1; /* FIXME: double-check. */
686: break;
687: default:
688: r = su->scale;
689: break;
690: }
691:
692: if (r < 0.0)
693: r = 0.0;
694: return((size_t)/* LINTED */
695: r);
696: }
697:
698: