=================================================================== RCS file: /cvsrepo/anoncvs/cvs/src/usr.bin/mandoc/mandoc.c,v retrieving revision 1.32 retrieving revision 1.33 diff -c -r1.32 -r1.33 *** src/usr.bin/mandoc/mandoc.c 2012/05/28 13:00:51 1.32 --- src/usr.bin/mandoc/mandoc.c 2012/05/28 17:08:48 1.33 *************** *** 1,4 **** ! /* $Id: mandoc.c,v 1.32 2012/05/28 13:00:51 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons * Copyright (c) 2011, 2012 Ingo Schwarze --- 1,4 ---- ! /* $Id: mandoc.c,v 1.33 2012/05/28 17:08:48 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons * Copyright (c) 2011, 2012 Ingo Schwarze *************** *** 33,103 **** static int a2time(time_t *, const char *, const char *); static char *time2a(time_t); - static int numescape(const char *); - /* - * Pass over recursive numerical expressions. This context of this - * function is important: it's only called within character-terminating - * escapes (e.g., \s[xxxyyy]), so all we need to do is handle initial - * recursion: we don't care about what's in these blocks. - * This returns the number of characters skipped or -1 if an error - * occurs (the caller should bail). - */ - static int - numescape(const char *start) - { - int i; - size_t sz; - const char *cp; - i = 0; - - /* The expression consists of a subexpression. */ - - if ('\\' == start[i]) { - cp = &start[++i]; - /* - * Read past the end of the subexpression. - * Bail immediately on errors. - */ - if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL)) - return(-1); - return(i + cp - &start[i]); - } - - if ('(' != start[i++]) - return(0); - - /* - * A parenthesised subexpression. Read until the closing - * parenthesis, making sure to handle any nested subexpressions - * that might ruin our parse. - */ - - while (')' != start[i]) { - sz = strcspn(&start[i], ")\\"); - i += (int)sz; - - if ('\0' == start[i]) - return(-1); - else if ('\\' != start[i]) - continue; - - cp = &start[++i]; - if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL)) - return(-1); - i += cp - &start[i]; - } - - /* Read past the terminating ')'. */ - return(++i); - } - enum mandoc_esc mandoc_escape(const char **end, const char **start, int *sz) { ! char c, term, numeric; ! int i, lim, ssz, rlim; const char *cp, *rstart; enum mandoc_esc gly; --- 33,45 ---- static int a2time(time_t *, const char *, const char *); static char *time2a(time_t); enum mandoc_esc mandoc_escape(const char **end, const char **start, int *sz) { ! char c, term; ! int i, rlim; const char *cp, *rstart; enum mandoc_esc gly; *************** *** 105,113 **** rstart = cp; if (start) *start = rstart; ! i = lim = 0; gly = ESCAPE_ERROR; ! term = numeric = '\0'; switch ((c = cp[i++])) { /* --- 47,55 ---- rstart = cp; if (start) *start = rstart; ! i = rlim = 0; gly = ESCAPE_ERROR; ! term = '\0'; switch ((c = cp[i++])) { /* *************** *** 117,123 **** */ case ('('): gly = ESCAPE_SPECIAL; ! lim = 2; break; case ('['): gly = ESCAPE_SPECIAL; --- 59,65 ---- */ case ('('): gly = ESCAPE_SPECIAL; ! rlim = 2; break; case ('['): gly = ESCAPE_SPECIAL; *************** *** 179,191 **** switch (cp[i++]) { case ('('): ! lim = 2; break; case ('['): term = ']'; break; default: ! lim = 1; i--; break; } --- 121,133 ---- switch (cp[i++]) { case ('('): ! rlim = 2; break; case ('['): term = ']'; break; default: ! rlim = 1; i--; break; } *************** *** 240,246 **** gly = ESCAPE_IGNORE; if ('\'' != cp[i++]) return(ESCAPE_ERROR); ! term = numeric = '\''; break; /* --- 182,188 ---- gly = ESCAPE_IGNORE; if ('\'' != cp[i++]) return(ESCAPE_ERROR); ! term = '\''; break; /* *************** *** 280,295 **** switch (cp[i++]) { case ('('): ! lim = 2; break; case ('['): ! term = numeric = ']'; break; case ('\''): ! term = numeric = '\''; break; default: ! lim = 1; i--; break; } --- 222,237 ---- switch (cp[i++]) { case ('('): ! rlim = 2; break; case ('['): ! term = ']'; break; case ('\''): ! term = '\''; break; default: ! rlim = 1; i--; break; } *************** *** 306,375 **** */ default: gly = ESCAPE_SPECIAL; ! lim = 1; i--; break; } assert(ESCAPE_ERROR != gly); ! rstart = &cp[i]; if (start) *start = rstart; /* ! * If a terminating block has been specified, we need to ! * handle the case of recursion, which could have their ! * own terminating blocks that mess up our parse. This, by the ! * way, means that the "start" and "size" values will be ! * effectively meaningless. */ - ssz = 0; - if (numeric && -1 == (ssz = numescape(&cp[i]))) - return(ESCAPE_ERROR); - - i += ssz; - rlim = -1; - - /* - * We have a character terminator. Try to read up to that - * character. If we can't (i.e., we hit the nil), then return - * an error; if we can, calculate our length, read past the - * terminating character, and exit. - */ - if ('\0' != term) { ! *end = strchr(&cp[i], term); ! if ('\0' == *end) return(ESCAPE_ERROR); ! ! rlim = *end - &cp[i]; ! if (sz) ! *sz = rlim; ! (*end)++; ! goto out; } - - assert(lim > 0); - - /* - * We have a numeric limit. If the string is shorter than that, - * stop and return an error. Else adjust our endpoint, length, - * and return the current glyph. - */ - - if ((size_t)lim > strlen(&cp[i])) - return(ESCAPE_ERROR); - - rlim = lim; if (sz) *sz = rlim; - - *end = &cp[i] + lim; - - out: - assert(rlim >= 0 && rstart); /* Run post-processors. */ --- 248,294 ---- */ default: gly = ESCAPE_SPECIAL; ! rlim = 1; i--; break; } assert(ESCAPE_ERROR != gly); ! *end = rstart = &cp[i]; if (start) *start = rstart; /* ! * Read up to the terminating character, ! * paying attention to nested escapes. */ if ('\0' != term) { ! while (**end != term) { ! switch (**end) { ! case ('\0'): ! return(ESCAPE_ERROR); ! case ('\\'): ! (*end)++; ! if (ESCAPE_ERROR == ! mandoc_escape(end, NULL, NULL)) ! return(ESCAPE_ERROR); ! break; ! default: ! (*end)++; ! break; ! } ! } ! rlim = (*end)++ - rstart; ! } else { ! assert(rlim > 0); ! if ((size_t)rlim > strlen(rstart)) return(ESCAPE_ERROR); ! *end += rlim; } if (sz) *sz = rlim; /* Run post-processors. */