=================================================================== RCS file: /cvsrepo/anoncvs/cvs/src/usr.bin/mandoc/mdoc_validate.c,v retrieving revision 1.248 retrieving revision 1.249 diff -c -r1.248 -r1.249 *** src/usr.bin/mandoc/mdoc_validate.c 2017/06/10 01:48:31 1.248 --- src/usr.bin/mandoc/mdoc_validate.c 2017/06/10 16:53:58 1.249 *************** *** 1,4 **** ! /* $OpenBSD: mdoc_validate.c,v 1.248 2017/06/10 01:48:31 schwarze Exp $ */ /* * Copyright (c) 2008-2012 Kristaps Dzonsons * Copyright (c) 2010-2017 Ingo Schwarze --- 1,4 ---- ! /* $OpenBSD: mdoc_validate.c,v 1.249 2017/06/10 16:53:58 schwarze Exp $ */ /* * Copyright (c) 2008-2012 Kristaps Dzonsons * Copyright (c) 2010-2017 Ingo Schwarze *************** *** 410,418 **** post_delim(POST_ARGS) { const struct roff_node *nch; ! const char *lc; enum mdelim delim; nch = mdoc->last->last; if (nch == NULL || nch->type != ROFFT_TEXT) return; --- 410,426 ---- post_delim(POST_ARGS) { const struct roff_node *nch; ! const char *lc, *cp; ! int nw; enum mdelim delim; + enum roff_tok tok; + /* + * Find candidates: at least two bytes, + * the last one a closing or middle delimiter. + */ + + tok = mdoc->last->tok; nch = mdoc->last->last; if (nch == NULL || nch->type != ROFFT_TEXT) return; *************** *** 422,430 **** delim = mdoc_isdelim(lc); if (delim == DELIM_NONE || delim == DELIM_OPEN) return; mandoc_vmsg(MANDOCERR_DELIM, mdoc->parse, nch->line, nch->pos + (lc - nch->string), ! "%s%s %s", roff_name[mdoc->last->tok], nch == mdoc->last->child ? "" : " ...", nch->string); } --- 430,503 ---- delim = mdoc_isdelim(lc); if (delim == DELIM_NONE || delim == DELIM_OPEN) return; + + /* + * Reduce false positives by allowing various cases. + */ + + /* Escaped delimiters. */ + if (lc > nch->string + 1 && lc[-2] == '\\' && + (lc[-1] == '&' || lc[-1] == 'e')) + return; + + /* Specific byte sequences. */ + switch (*lc) { + case ')': + for (cp = lc; cp >= nch->string; cp--) + if (*cp == '(') + return; + break; + case '.': + if (lc > nch->string + 1 && lc[-2] == '.' && lc[-1] == '.') + return; + if (lc[-1] == '.') + return; + break; + case ';': + if (tok == MDOC_Vt) + return; + break; + case '?': + if (lc[-1] == '?') + return; + break; + case ']': + for (cp = lc; cp >= nch->string; cp--) + if (*cp == '[') + return; + break; + case '|': + if (lc == nch->string + 1 && lc[-1] == '|') + return; + default: + break; + } + + /* Exactly two non-alphanumeric bytes. */ + if (lc == nch->string + 1 && !isalnum((unsigned char)lc[-1])) + return; + + /* At least three alphabetic words with a sentence ending. */ + if (strchr("!.:?", *lc) != NULL && (tok == MDOC_Em || + tok == MDOC_Li || tok == MDOC_No || tok == MDOC_Po || + tok == MDOC_Pq || tok == MDOC_Sy)) { + nw = 0; + for (cp = lc - 1; cp >= nch->string; cp--) { + if (*cp == ' ') { + nw++; + if (cp > nch->string && cp[-1] == ',') + cp--; + } else if (isalpha((unsigned int)*cp)) { + if (nw > 1) + return; + } else + break; + } + } + mandoc_vmsg(MANDOCERR_DELIM, mdoc->parse, nch->line, nch->pos + (lc - nch->string), ! "%s%s %s", roff_name[tok], nch == mdoc->last->child ? "" : " ...", nch->string); }