org-element: Fix parapraph parsing

* lisp/org-element.el (org-element--set-regexps): Refine
  `org-element-paragraph-separate'.
(org-element-paragraph-parser): Fix paragraph parsing when followed by
a dual keyword or an ill-formed latex environment.

* testing/lisp/test-org-element.el (test-org-element/paragraph-parser):
  Add tests.

Reported-by: Jacob Gerlach <jacobgerlach@gmail.com>
<http://permalink.gmane.org/gmane.emacs.orgmode/97256>
This commit is contained in:
Nicolas Goaziou 2015-04-27 22:24:00 +02:00
parent 2965f8fb0c
commit eb77fed33f
2 changed files with 66 additions and 66 deletions

View File

@ -159,16 +159,20 @@ specially in `org-element--object-lex'.")
"$" "\\|"
;; Tables (any type).
"\\(?:|\\|\\+-[-+]\\)" "\\|"
;; Blocks (any type), Babel calls and keywords. This
;; is only an indication and need some thorough check.
"#\\(?:[+ ]\\|$\\)" "\\|"
;; Drawers (any type) and fixed-width areas. This is
;; also only an indication.
":" "\\|"
;; Comments, keyword-like or block-like constructs.
;; Blocks and keywords with dual values need to be
;; double-checked.
"#\\(?: \\|$\\|\\+\\(?:"
"BEGIN_\\S-+" "\\|"
"\\S-+\\(?:\\[.*\\]\\)?:\\(?: \\|$\\)\\)\\)"
"\\|"
;; Drawers (any type) and fixed-width areas. Drawers
;; need to be double-checked.
":\\(?: \\|$\\|[-_[:word:]]+:[ \t]*$\\)" "\\|"
;; Horizontal rules.
"-\\{5,\\}[ \t]*$" "\\|"
;; LaTeX environments.
"\\\\begin{\\([A-Za-z0-9]+\\*?\\)}" "\\|"
"\\\\begin{\\([A-Za-z0-9*]+\\)}" "\\|"
;; Clock lines.
(regexp-quote org-clock-string) "\\|"
;; Lists.
@ -2233,65 +2237,42 @@ Assume point is at the beginning of the paragraph."
(before-blank
(let ((case-fold-search t))
(end-of-line)
(if (not (re-search-forward
org-element-paragraph-separate limit 'm))
limit
;; A matching `org-element-paragraph-separate' is not
;; necessarily the end of the paragraph. In
;; particular, lines starting with # or : as a first
;; non-space character are ambiguous. We have to
;; check if they are valid Org syntax (e.g., not an
;; incomplete keyword).
(beginning-of-line)
(while (not
(or
;; There's no ambiguity for other symbols or
;; empty lines: stop here.
(looking-at "[ \t]*\\(?:[^:#]\\|$\\)")
;; Stop at valid fixed-width areas.
(looking-at "[ \t]*:\\(?: \\|$\\)")
;; Stop at drawers.
(and (looking-at org-drawer-regexp)
(save-excursion
(re-search-forward
"^[ \t]*:END:[ \t]*$" limit t)))
;; Stop at valid comments.
(looking-at "[ \t]*#\\(?: \\|$\\)")
;; Stop at valid dynamic blocks.
(and (looking-at org-dblock-start-re)
(save-excursion
(re-search-forward
"^[ \t]*#\\+END:?[ \t]*$" limit t)))
;; Stop at valid blocks.
(and (looking-at "[ \t]*#\\+BEGIN_\\(\\S-+\\)")
(save-excursion
(re-search-forward
(format "^[ \t]*#\\+END_%s[ \t]*$"
(regexp-quote
(org-match-string-no-properties 1)))
limit t)))
;; Stop at valid latex environments.
(and (looking-at org-element--latex-begin-environment)
(save-excursion
(re-search-forward
(format org-element--latex-end-environment
(regexp-quote
(org-match-string-no-properties 1)))
limit t)))
;; Stop at valid keywords.
(looking-at "[ \t]*#\\+\\S-+:")
;; Skip everything else.
(not
(progn
(end-of-line)
(re-search-forward org-element-paragraph-separate
limit 'm)))))
(beginning-of-line)))
;; A matching `org-element-paragraph-separate' is not
;; necessarily the end of the paragraph. In particular,
;; drawers, blocks or LaTeX environments opening lines
;; must be closed. Moreover keywords with a secondary
;; value must belong to "dual keywords".
(while (not
(cond
((not (and (re-search-forward
org-element-paragraph-separate limit 'move)
(progn (beginning-of-line) t))))
((looking-at org-drawer-regexp)
(save-excursion
(re-search-forward "^[ \t]*:END:[ \t]*$" limit t)))
((looking-at "[ \t]*#\\+BEGIN_\\(\\S-+\\)")
(save-excursion
(re-search-forward
(format "^[ \t]*#\\+END_%s[ \t]*$"
(regexp-quote (match-string 1)))
limit t)))
((looking-at org-element--latex-begin-environment)
(save-excursion
(re-search-forward
(format org-element--latex-end-environment
(regexp-quote (match-string 1)))
limit t)))
((looking-at "[ \t]*#\\+\\(\\S-+\\)\\[.*\\]:")
(member-ignore-case (match-string 1)
org-element-dual-keywords))
;; Everything else is unambiguous.
(t)))
(end-of-line))
(if (= (point) limit) limit
(goto-char (line-beginning-position)))))
(contents-end (progn (skip-chars-backward " \r\t\n" contents-begin)
(forward-line)
(point)))
(contents-end (save-excursion
(skip-chars-backward " \r\t\n" contents-begin)
(line-beginning-position 2)))
(end (progn (skip-chars-forward " \r\t\n" limit)
(if (eobp) (point) (line-beginning-position)))))
(list 'paragraph

View File

@ -1744,10 +1744,29 @@ e^{i\\pi}+1=0
(let ((elem (org-element-at-point)))
(and (eq (org-element-type elem) 'paragraph)
(= (point-max) (org-element-property :end elem))))))
(should
(org-test-with-temp-text "Paragraph\n\begin{equation}"
(let ((elem (org-element-at-point)))
(and (eq (org-element-type elem) 'paragraph)
(= (point-max) (org-element-property :end elem))))))
;; Stop at affiliated keywords.
(should
(org-test-with-temp-text "Paragraph\n#+NAME: test\n| table |"
(let ((elem (org-element-at-point)))
(and (eq (org-element-type elem) 'paragraph)
(not (org-element-property :name elem))
(= (org-element-property :end elem) (line-beginning-position 2))))))
(should
(org-test-with-temp-text
"Paragraph\n#+CAPTION[with short caption]: test\n| table |"
(let ((elem (org-element-at-point)))
(and (eq (org-element-type elem) 'paragraph)
(not (org-element-property :name elem))
(= (org-element-property :end elem) (line-beginning-position 2))))))
;; Do not steal affiliated keywords from container.
(should
(org-test-with-temp-text "#+ATTR_LATEX: test\n- item 1"
(let ((elem (progn (search-forward "item") (org-element-at-point))))
(org-test-with-temp-text "#+ATTR_LATEX: test\n- item<point> 1"
(let ((elem (org-element-at-point)))
(and (eq (org-element-type elem) 'paragraph)
(not (org-element-property :attr_latex elem))
(/= (org-element-property :begin elem) 1)))))