0
0
Fork 1
mirror of https://git.savannah.gnu.org/git/emacs/org-mode.git synced 2024-09-29 18:00:49 +00:00

Improve org-link-plain-re

* lisp/ol.el (org-link-make-regexps): Allow URLs with up to two
levels of nested brackets.

* lisp/org.el (org-link-plain-re): Update docstring.

* testing/lisp/test-ol.el: Add tests for the plain link regular
expression.

(org-link-plain-re): Update docstring.  Now,

Now, URLs like [1] can be matched.  The new regexp is based on [2].
The docstring of `org-link-plain-re'  explicitly mentions that the
regexp must contain groups for the link type and the path.

[1] https://doi.org/10.1016/0160-791x(79)90023-x
[2] https://daringfireball.net/2010/07/improved_regex_for_matching_urls
This commit is contained in:
Ihor Radchenko 2021-03-24 21:27:24 +08:00 committed by Bastien
parent f00ad09e25
commit 7dcb1afb69
2 changed files with 141 additions and 10 deletions

View file

@ -512,7 +512,10 @@ links more efficient."
"Matches link with angular brackets, spaces are allowed.")
(defvar org-link-plain-re nil
"Matches plain link, without spaces.")
"Matches plain link, without spaces.
Group 1 must contain the link type (i.e. https).
Group 2 must contain the link path (i.e. //example.com).
Used by `org-element-link-parser'.")
(defvar org-link-bracket-re nil
"Matches a link in double brackets.")
@ -800,15 +803,33 @@ This should be called after the variable `org-link-parameters' has changed."
(format "<%s:\\([^>\n]*\\(?:\n[ \t]*[^> \t\n][^>\n]*\\)*\\)>"
types-re)
org-link-plain-re
(concat
"\\<" types-re ":"
"\\([^][ \t\n()<>]+\\(?:([[:word:]0-9_]+)\\|\\([^[:punct:] \t\n]\\|/\\)\\)\\)")
;; "\\([^]\t\n\r<>() ]+[^]\t\n\r<>,.;() ]\\)")
org-link-bracket-re
(rx (seq "[["
;; URI part: match group 1.
(group
(one-or-more
(let* ((non-space-bracket "[^][ \t\n()<>]")
(parenthesis
`(seq "("
(0+ (or (regex ,non-space-bracket)
(seq "("
(0+ (regex ,non-space-bracket))
")")))
")")))
;; Heuristics for an URL link inspired by
;; https://daringfireball.net/2010/07/improved_regex_for_matching_urls
(rx-to-string
`(seq word-start
;; Link type: match group 1.
(regexp ,types-re)
":"
;; Link path: match group 2.
(group
(1+ (or (regex ,non-space-bracket)
,parenthesis))
(or (regexp "[^[:punct:] \t\n]")
?/
,parenthesis)))))
org-link-bracket-re
(rx (seq "[["
;; URI part: match group 1.
(group
(one-or-more
(or (not (any "[]\\"))
(and "\\" (zero-or-more "\\\\") (any "[]"))
(and (one-or-more "\\") (not (any "[]"))))))

View file

@ -491,5 +491,115 @@
(org-previous-link))
(buffer-substring (point) (line-end-position))))))
;;; Link regexps
(defmacro test-ol-parse-link-in-text (text)
"Return list of :type and :path of link parsed in TEXT.
\"<point>\" string must be at the beginning of the link to be parsed."
(declare (indent 1))
`(org-test-with-temp-text ,text
(list (org-element-property :type (org-element-link-parser))
(org-element-property :path (org-element-link-parser)))))
(ert-deftest test-ol/plain-link-re ()
"Test `org-link-plain-re'."
(should
(equal
'("https" "//example.com")
(test-ol-parse-link-in-text
"(<point>https://example.com)")))
(should
(equal
'("https" "//example.com/qwe()")
(test-ol-parse-link-in-text
"(Some text <point>https://example.com/qwe())")))
(should
(equal
'("https" "//doi.org/10.1016/0160-791x(79)90023-x")
(test-ol-parse-link-in-text
"<point>https://doi.org/10.1016/0160-791x(79)90023-x")))
(should
(equal
'("file" "aa")
(test-ol-parse-link-in-text
"The <point>file:aa link")))
(should
(equal
'("file" "a(b)c")
(test-ol-parse-link-in-text
"The <point>file:a(b)c link")))
(should
(equal
'("file" "a()")
(test-ol-parse-link-in-text
"The <point>file:a() link")))
(should
(equal
'("file" "aa((a))")
(test-ol-parse-link-in-text
"The <point>file:aa((a)) link")))
(should
(equal
'("file" "aa(())")
(test-ol-parse-link-in-text
"The <point>file:aa(()) link")))
(should
(equal
'("file" "/a")
(test-ol-parse-link-in-text
"The <point>file:/a link")))
(should
(equal
'("file" "/a/")
(test-ol-parse-link-in-text
"The <point>file:/a/ link")))
(should
(equal
'("http" "//")
(test-ol-parse-link-in-text
"The <point>http:// link")))
(should
(equal
'("file" "ab")
(test-ol-parse-link-in-text
"The (some <point>file:ab) link")))
(should
(equal
'("file" "aa")
(test-ol-parse-link-in-text
"The <point>file:aa) link")))
(should
(equal
'("file" "aa")
(test-ol-parse-link-in-text
"The <point>file:aa( link")))
(should
(equal
'("http" "//foo.com/more_(than)_one_(parens)")
(test-ol-parse-link-in-text
"The <point>http://foo.com/more_(than)_one_(parens) link")))
(should
(equal
'("http" "//foo.com/blah_(wikipedia)#cite-1")
(test-ol-parse-link-in-text
"The <point>http://foo.com/blah_(wikipedia)#cite-1 link")))
(should
(equal
'("http" "//foo.com/blah_(wikipedia)_blah#cite-1")
(test-ol-parse-link-in-text
"The <point>http://foo.com/blah_(wikipedia)_blah#cite-1 link")))
(should
(equal
'("http" "//foo.com/unicode_(✪)_in_parens")
(test-ol-parse-link-in-text
"The <point>http://foo.com/unicode_(✪)_in_parens link")))
(should
(equal
'("http" "//foo.com/(something)?after=parens")
(test-ol-parse-link-in-text
"The <point>http://foo.com/(something)?after=parens link"))))
(provide 'test-ol)
;;; test-ol.el ends here