[BACK]Return to mdoc.3 CVS log [TXT][DIR] Up to [local] / src / usr.bin / mandoc

Annotation of src/usr.bin/mandoc/mdoc.3, Revision 1.12

1.12    ! schwarze    1: .\"    $Id: mdoc.3,v 1.11 2010/07/13 01:09:13 schwarze Exp $
1.1       kristaps    2: .\"
1.11      schwarze    3: .\" Copyright (c) 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
                      4: .\" Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
1.1       kristaps    5: .\"
                      6: .\" Permission to use, copy, modify, and distribute this software for any
1.2       schwarze    7: .\" purpose with or without fee is hereby granted, provided that the above
                      8: .\" copyright notice and this permission notice appear in all copies.
1.1       kristaps    9: .\"
1.2       schwarze   10: .\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
                     11: .\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
                     12: .\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
                     13: .\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
                     14: .\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
                     15: .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
                     16: .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1.4       schwarze   17: .\"
1.12    ! schwarze   18: .Dd $Mdocdate: July 13 2010 $
1.2       schwarze   19: .Dt MDOC 3
1.1       kristaps   20: .Os
                     21: .Sh NAME
1.7       schwarze   22: .Nm mdoc ,
1.1       kristaps   23: .Nm mdoc_alloc ,
                     24: .Nm mdoc_endparse ,
1.7       schwarze   25: .Nm mdoc_free ,
                     26: .Nm mdoc_meta ,
1.1       kristaps   27: .Nm mdoc_node ,
1.7       schwarze   28: .Nm mdoc_parseln ,
1.1       kristaps   29: .Nm mdoc_reset
                     30: .Nd mdoc macro compiler library
                     31: .Sh SYNOPSIS
1.7       schwarze   32: .In mandoc.h
1.5       schwarze   33: .In mdoc.h
1.1       kristaps   34: .Vt extern const char * const * mdoc_macronames;
                     35: .Vt extern const char * const * mdoc_argnames;
                     36: .Ft "struct mdoc *"
1.9       schwarze   37: .Fo mdoc_alloc
                     38: .Fa "struct regset *regs"
                     39: .Fa "void *data"
                     40: .Fa "mandocmsg msgs"
                     41: .Fc
1.1       kristaps   42: .Ft int
1.7       schwarze   43: .Fn mdoc_endparse "struct mdoc *mdoc"
1.1       kristaps   44: .Ft void
                     45: .Fn mdoc_free "struct mdoc *mdoc"
1.7       schwarze   46: .Ft "const struct mdoc_meta *"
                     47: .Fn mdoc_meta "const struct mdoc *mdoc"
                     48: .Ft "const struct mdoc_node *"
                     49: .Fn mdoc_node "const struct mdoc *mdoc"
1.1       kristaps   50: .Ft int
1.9       schwarze   51: .Fo mdoc_parseln
                     52: .Fa "struct mdoc *mdoc"
                     53: .Fa "int line"
                     54: .Fa "char *buf"
                     55: .Fc
1.1       kristaps   56: .Ft int
1.7       schwarze   57: .Fn mdoc_reset "struct mdoc *mdoc"
1.1       kristaps   58: .Sh DESCRIPTION
                     59: The
                     60: .Nm mdoc
1.4       schwarze   61: library parses lines of
1.1       kristaps   62: .Xr mdoc 7
1.7       schwarze   63: input
                     64: into an abstract syntax tree (AST).
1.1       kristaps   65: .Pp
                     66: In general, applications initiate a parsing sequence with
                     67: .Fn mdoc_alloc ,
1.4       schwarze   68: parse each line in a document with
1.1       kristaps   69: .Fn mdoc_parseln ,
                     70: close the parsing session with
                     71: .Fn mdoc_endparse ,
                     72: operate over the syntax tree returned by
1.4       schwarze   73: .Fn mdoc_node
1.1       kristaps   74: and
                     75: .Fn mdoc_meta ,
                     76: then free all allocated memory with
                     77: .Fn mdoc_free .
                     78: The
                     79: .Fn mdoc_reset
                     80: function may be used in order to reset the parser for another input
1.7       schwarze   81: sequence.
                     82: See the
1.1       kristaps   83: .Sx EXAMPLES
1.7       schwarze   84: section for a simple example.
1.1       kristaps   85: .Pp
1.4       schwarze   86: This section further defines the
1.1       kristaps   87: .Sx Types ,
1.4       schwarze   88: .Sx Functions
1.1       kristaps   89: and
                     90: .Sx Variables
1.7       schwarze   91: available to programmers.
                     92: Following that, the
1.4       schwarze   93: .Sx Abstract Syntax Tree
1.1       kristaps   94: section documents the output tree.
                     95: .Ss Types
                     96: Both functions (see
                     97: .Sx Functions )
                     98: and variables (see
                     99: .Sx Variables )
                    100: may use the following types:
1.6       schwarze  101: .Bl -ohang
1.1       kristaps  102: .It Vt struct mdoc
                    103: An opaque type defined in
                    104: .Pa mdoc.c .
                    105: Its values are only used privately within the library.
                    106: .It Vt struct mdoc_node
1.7       schwarze  107: A parsed node.
                    108: Defined in
1.1       kristaps  109: .Pa mdoc.h .
1.4       schwarze  110: See
1.1       kristaps  111: .Sx Abstract Syntax Tree
                    112: for details.
1.7       schwarze  113: .It Vt mandocmsg
                    114: A function callback type defined in
                    115: .Pa mandoc.h .
1.1       kristaps  116: .El
                    117: .Ss Functions
                    118: Function descriptions follow:
1.6       schwarze  119: .Bl -ohang
1.1       kristaps  120: .It Fn mdoc_alloc
1.7       schwarze  121: Allocates a parsing structure.
                    122: The
1.1       kristaps  123: .Fa data
1.7       schwarze  124: pointer is passed to
                    125: .Fa msgs .
                    126: Returns NULL on failure.
                    127: If non-NULL, the pointer must be freed with
1.1       kristaps  128: .Fn mdoc_free .
                    129: .It Fn mdoc_reset
1.7       schwarze  130: Reset the parser for another parse routine.
                    131: After its use,
1.1       kristaps  132: .Fn mdoc_parseln
1.7       schwarze  133: behaves as if invoked for the first time.
                    134: If it returns 0, memory could not be allocated.
1.1       kristaps  135: .It Fn mdoc_free
1.7       schwarze  136: Free all resources of a parser.
                    137: The pointer is no longer valid after invocation.
1.1       kristaps  138: .It Fn mdoc_parseln
1.7       schwarze  139: Parse a nil-terminated line of input.
                    140: This line should not contain the trailing newline.
                    141: Returns 0 on failure, 1 on success.
                    142: The input buffer
1.1       kristaps  143: .Fa buf
                    144: is modified by this function.
                    145: .It Fn mdoc_endparse
1.7       schwarze  146: Signals that the parse is complete.
                    147: Note that if
1.1       kristaps  148: .Fn mdoc_endparse
                    149: is called subsequent to
                    150: .Fn mdoc_node ,
1.7       schwarze  151: the resulting tree is incomplete.
                    152: Returns 0 on failure, 1 on success.
1.1       kristaps  153: .It Fn mdoc_node
1.7       schwarze  154: Returns the first node of the parse.
                    155: Note that if
1.1       kristaps  156: .Fn mdoc_parseln
                    157: or
                    158: .Fn mdoc_endparse
                    159: return 0, the tree will be incomplete.
                    160: .It Fn mdoc_meta
1.7       schwarze  161: Returns the document's parsed meta-data.
                    162: If this information has not yet been supplied or
1.1       kristaps  163: .Fn mdoc_parseln
                    164: or
                    165: .Fn mdoc_endparse
                    166: return 0, the data will be incomplete.
                    167: .El
                    168: .Ss Variables
                    169: The following variables are also defined:
1.6       schwarze  170: .Bl -ohang
1.1       kristaps  171: .It Va mdoc_macronames
                    172: An array of string-ified token names.
                    173: .It Va mdoc_argnames
                    174: An array of string-ified token argument names.
                    175: .El
                    176: .Ss Abstract Syntax Tree
1.4       schwarze  177: The
1.1       kristaps  178: .Nm
                    179: functions produce an abstract syntax tree (AST) describing input in a
1.7       schwarze  180: regular form.
                    181: It may be reviewed at any time with
1.1       kristaps  182: .Fn mdoc_nodes ;
                    183: however, if called before
                    184: .Fn mdoc_endparse ,
                    185: or after
1.4       schwarze  186: .Fn mdoc_endparse
1.1       kristaps  187: or
                    188: .Fn mdoc_parseln
1.4       schwarze  189: fail, it may be incomplete.
1.1       kristaps  190: .Pp
                    191: This AST is governed by the ontological
                    192: rules dictated in
                    193: .Xr mdoc 7
1.4       schwarze  194: and derives its terminology accordingly.
1.1       kristaps  195: .Qq In-line
                    196: elements described in
                    197: .Xr mdoc 7
1.4       schwarze  198: are described simply as
1.1       kristaps  199: .Qq elements .
                    200: .Pp
1.4       schwarze  201: The AST is composed of
1.1       kristaps  202: .Vt struct mdoc_node
                    203: nodes with block, head, body, element, root and text types as declared
                    204: by the
                    205: .Va type
1.7       schwarze  206: field.
                    207: Each node also provides its parse point (the
1.1       kristaps  208: .Va line ,
                    209: .Va sec ,
                    210: and
                    211: .Va pos
                    212: fields), its position in the tree (the
                    213: .Va parent ,
                    214: .Va child ,
1.10      schwarze  215: .Va nchild ,
1.4       schwarze  216: .Va next
1.1       kristaps  217: and
1.4       schwarze  218: .Va prev
1.10      schwarze  219: fields) and some type-specific data, in particular, for nodes generated
                    220: from macros, the generating macro in the
                    221: .Va tok
                    222: field.
1.1       kristaps  223: .Pp
                    224: The tree itself is arranged according to the following normal form,
                    225: where capitalised non-terminals represent nodes.
                    226: .Pp
1.6       schwarze  227: .Bl -tag -width "ELEMENTXX" -compact
1.1       kristaps  228: .It ROOT
                    229: \(<- mnode+
                    230: .It mnode
                    231: \(<- BLOCK | ELEMENT | TEXT
                    232: .It BLOCK
1.8       schwarze  233: \(<- HEAD [TEXT] (BODY [TEXT])+ [TAIL [TEXT]]
1.1       kristaps  234: .It ELEMENT
                    235: \(<- TEXT*
                    236: .It HEAD
1.10      schwarze  237: \(<- mnode*
1.1       kristaps  238: .It BODY
1.10      schwarze  239: \(<- mnode* [ENDBODY mnode*]
1.1       kristaps  240: .It TAIL
1.10      schwarze  241: \(<- mnode*
1.1       kristaps  242: .It TEXT
1.7       schwarze  243: \(<- [[:printable:],0x1e]*
1.1       kristaps  244: .El
                    245: .Pp
                    246: Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of
1.8       schwarze  247: the BLOCK production: these refer to punctuation marks.
1.7       schwarze  248: Furthermore, although a TEXT node will generally have a non-zero-length
                    249: string, in the specific case of
1.1       kristaps  250: .Sq \&.Bd \-literal ,
                    251: an empty line will produce a zero-length string.
1.8       schwarze  252: Multiple body parts are only found in invocations of
                    253: .Sq \&Bl \-column ,
                    254: where a new body introduces a new phrase.
1.11      schwarze  255: .Ss Badly-nested Blocks
                    256: The ENDBODY node is available to end the formatting associated
                    257: with a given block before the physical end of that block.
                    258: It has a non-null
1.10      schwarze  259: .Va end
                    260: field, is of the BODY
                    261: .Va type ,
                    262: has the same
                    263: .Va tok
                    264: as the BLOCK it is ending, and has a
                    265: .Va pending
                    266: field pointing to that BLOCK's BODY node.
                    267: It is an indirect child of that BODY node
                    268: and has no children of its own.
                    269: .Pp
                    270: An ENDBODY node is generated when a block ends while one of its child
                    271: blocks is still open, like in the following example:
                    272: .Bd -literal -offset indent
                    273: \&.Ao ao
                    274: \&.Bo bo ac
                    275: \&.Ac bc
                    276: \&.Bc end
                    277: .Ed
                    278: .Pp
                    279: This example results in the following block structure:
                    280: .Bd -literal -offset indent
                    281: BLOCK Ao
                    282:        HEAD Ao
                    283:        BODY Ao
                    284:                TEXT ao
                    285:                BLOCK Bo, pending -> Ao
                    286:                        HEAD Bo
                    287:                        BODY Bo
                    288:                                TEXT bo
                    289:                                TEXT ac
                    290:                                ENDBODY Ao, pending -> Ao
                    291:                                TEXT bc
                    292: TEXT end
                    293: .Ed
                    294: .Pp
1.11      schwarze  295: Here, the formatting of the
                    296: .Sq \&Ao
                    297: block extends from TEXT ao to TEXT ac,
                    298: while the formatting of the
                    299: .Sq \&Bo
                    300: block extends from TEXT bo to TEXT bc.
                    301: It renders as follows in
1.10      schwarze  302: .Fl T Ns Cm ascii
                    303: mode:
1.11      schwarze  304: .Pp
1.10      schwarze  305: .Dl <ao [bo ac> bc] end
1.11      schwarze  306: .Pp
                    307: Support for badly-nested blocks is only provided for backward
1.10      schwarze  308: compatibility with some older
                    309: .Xr mdoc 7
                    310: implementations.
1.11      schwarze  311: Using badly-nested blocks is
                    312: .Em strongly discouraged :
                    313: the
                    314: .Fl T Ns Cm html
                    315: and
                    316: .Fl T Ns Cm xhtml
                    317: front-ends are unable to render them in any meaningful way.
                    318: Furthermore, behaviour when encountering badly-nested blocks is not
                    319: consistent across troff implementations, especially when using  multiple
                    320: levels of badly-nested blocks.
1.1       kristaps  321: .Sh EXAMPLES
                    322: The following example reads lines from stdin and parses them, operating
1.4       schwarze  323: on the finished parse tree with
1.1       kristaps  324: .Fn parsed .
1.6       schwarze  325: This example does not error-check nor free memory upon failure.
                    326: .Bd -literal -offset indent
1.9       schwarze  327: struct regset regs;
1.1       kristaps  328: struct mdoc *mdoc;
1.3       schwarze  329: const struct mdoc_node *node;
1.1       kristaps  330: char *buf;
                    331: size_t len;
                    332: int line;
                    333:
1.9       schwarze  334: bzero(&regs, sizeof(struct regset));
1.1       kristaps  335: line = 1;
1.12    ! schwarze  336: mdoc = mdoc_alloc(&regs, NULL, NULL);
1.6       schwarze  337: buf = NULL;
                    338: alloc_len = 0;
1.1       kristaps  339:
1.6       schwarze  340: while ((len = getline(&buf, &alloc_len, stdin)) >= 0) {
                    341:     if (len && buflen[len - 1] = '\en')
                    342:         buf[len - 1] = '\e0';
                    343:     if ( ! mdoc_parseln(mdoc, line, buf))
                    344:         errx(1, "mdoc_parseln");
                    345:     line++;
1.1       kristaps  346: }
                    347:
                    348: if ( ! mdoc_endparse(mdoc))
1.6       schwarze  349:     errx(1, "mdoc_endparse");
1.1       kristaps  350: if (NULL == (node = mdoc_node(mdoc)))
1.6       schwarze  351:     errx(1, "mdoc_node");
1.1       kristaps  352:
                    353: parsed(mdoc, node);
                    354: mdoc_free(mdoc);
                    355: .Ed
1.7       schwarze  356: .Pp
                    357: Please see
                    358: .Pa main.c
                    359: in the source archive for a rigorous reference.
1.1       kristaps  360: .Sh SEE ALSO
                    361: .Xr mandoc 1 ,
                    362: .Xr mdoc 7
                    363: .Sh AUTHORS
                    364: The
                    365: .Nm
1.7       schwarze  366: library was written by
1.6       schwarze  367: .An Kristaps Dzonsons Aq kristaps@bsd.lv .