org-element: Optimize working with match data and Emacs regexps

* lisp/org-element.el: Add commentary explaining some regexp-related
optimizations useful for the parser.

Link: https://debbugs.gnu.org/cgi/bugreport.cgi?bug=63225
(org-element--headline-deferred):
(org-element-item-parser):
(org-element-item-interpreter):
(org-element--list-struct):
(org-element-plain-list-parser):
(org-element-example-block-parser):
(org-element-fixed-width-interpreter):
(org-element-paragraph-parser):
(org-element-src-block-parser):
(org-element-table-parser):
(org-element--parse-generic-emphasis):
(org-element-export-snippet-interpreter):
(org-element-link-parser):
(org-element--current-element):
(org-element--collect-affiliated-keywords):
(org-element-parse-buffer):
(org-element-normalize-string):
(org-element-normalize-contents):
(org-element--parse-to):
(org-element--cache-before-change):
(org-element--cache-for-removal):
(org-element-context): Avoid storing match data unless strictly
necessary.  Explain the necessity in places where we have to use
`save-match-data'.  Prefer `looking-at-p' (does not alter match data)
to `looking-at'.  Simplify regexps.  Update docstrings clearly
indicating when match data might be modified.
* lisp/org.el:
(org-offer-links-in-entry):
* lisp/ob-exp.el (org-babel-exp-process-buffer):
* lisp/org-agenda.el: Fix places where we need to protect match data.
This commit is contained in:
Ihor Radchenko 2023-05-06 15:03:04 +02:00
parent dfd36d1969
commit fefaadc2d5
No known key found for this signature in database
GPG Key ID: 6470762A7DA11D8B
4 changed files with 475 additions and 435 deletions

View File

@ -173,7 +173,7 @@ this template."
;; buffer.
(org-fold-core-ignore-modifications
(while (re-search-forward regexp nil t)
(setq element (org-element-at-point))
(setq element (save-match-data (org-element-at-point)))
(unless (save-match-data
(or (org-in-commented-heading-p nil element)
(org-in-archived-heading-p nil element)))

View File

@ -6892,7 +6892,7 @@ scheduled items with an hour specification like [h]h:mm."
(while (re-search-forward regexp nil t)
(catch :skip
(unless (save-match-data (org-at-planning-p)) (throw :skip nil))
(org-agenda-skip)
(save-match-data (org-agenda-skip))
(let* ((s (match-string 1))
(pos (1- (match-beginning 1)))
(todo-state (save-match-data (org-get-todo-state)))

View File

@ -655,7 +655,26 @@ and END-OFFSET."
;; greater element requires tweaking `org-element--current-element'.
;; Moreover, the newly defined type must be added to both
;; `org-element-all-elements' and `org-element-greater-elements'.
;;
;; When adding or modifying the parser, please keep in mind the
;; following rules. They are important to keep parser performance
;; optimal.
;;
;; 1. When you can use `looking-at-p' or `string-match-p' instead of
;; `looking-at' or `string-match' and keep match data unmodified,
;; do it.
;; 2. When regexps can be grouped together, avoiding multiple regexp
;; match calls, they should be grouped.
;; 3. When `save-match-data' can be avoided, avoid it.
;; 4. When simpler regexps can be used for analysis, use the simpler
;; regexps.
;; 5. When regexps can be calculated in advance, not dynamically, they
;; should be calculated in advance.
;; 6 Note that it is not an obligation of a given function to preserve
;; match data - `save-match-data' is costly and must be arranged by
;; the caller if necessary.
;;
;; See https://debbugs.gnu.org/cgi/bugreport.cgi?bug=63225
;;;; Center Block
@ -973,7 +992,7 @@ parse properties for property drawer at point."
"Return time properties associated to headline at point.
Return value is a plist."
(save-excursion
(when (progn (forward-line) (looking-at org-element-planning-line-re))
(when (progn (forward-line) (looking-at-p org-element-planning-line-re))
(let ((end (line-end-position))
plist)
(while (re-search-forward org-element-planning-keywords-re end t)
@ -1483,19 +1502,12 @@ Assume point is at the beginning of the item."
(looking-at org-list-full-item-re)
(let* ((begin (point))
(bullet (org-element--get-cached-string (match-string-no-properties 1)))
(tag-begin (match-beginning 4))
(tag-end (match-end 4))
(checkbox (let ((box (match-string 3)))
(cond ((equal "[ ]" box) 'off)
((equal "[X]" box) 'on)
((equal "[-]" box) 'trans))))
(counter (let ((c (match-string 2)))
(save-match-data
(cond
((not c) nil)
((string-match "[A-Za-z]" c)
(- (string-to-char (upcase (match-string-no-properties 0 c)))
64))
((string-match "[0-9]+" c)
(string-to-number (match-string-no-properties 0 c)))))))
(end (progn (goto-char (nth 6 (assq (point) struct)))
(if (bolp) (point) (line-beginning-position 2))))
(pre-blank 0)
@ -1505,7 +1517,7 @@ Assume point is at the beginning of the item."
;; Ignore tags in un-ordered lists: they are just
;; a part of item's body.
(if (and (match-beginning 4)
(save-match-data (string-match "[.)]" bullet)))
(string-match-p "[.)]" bullet))
(match-beginning 4)
(match-end 0)))
(skip-chars-forward " \r\t\n" end)
@ -1521,6 +1533,14 @@ Assume point is at the beginning of the item."
(progn (goto-char end)
(skip-chars-backward " \r\t\n")
(line-beginning-position 2))))
(counter (let ((c (match-string 2)))
(cond
((not c) nil)
((string-match "[A-Za-z]" c)
(- (string-to-char (upcase (match-string-no-properties 0 c)))
64))
((string-match "[0-9]+" c)
(string-to-number (match-string-no-properties 0 c))))))
(item
(org-element-create
'item
@ -1544,7 +1564,7 @@ Assume point is at the beginning of the item."
(when raw
(if raw-secondary-p raw
(org-element--parse-objects
(match-beginning 4) (match-end 4) nil
tag-begin tag-end nil
(org-element-restriction 'item)
item))))))))
@ -1616,11 +1636,11 @@ CONTENTS is the contents of the element."
(dolist (item items) (setcar (nthcdr 6 item) end)))
(throw :exit (sort (nconc items struct) #'car-less-than-car)))
;; At list end: end all items.
((looking-at org-list-end-re)
((looking-at-p org-list-end-re)
(dolist (item items) (setcar (nthcdr 6 item) (point)))
(throw :exit (sort (nconc items struct) #'car-less-than-car)))
;; At a new item: end previous sibling.
((looking-at item-re)
((looking-at-p item-re)
(let ((ind (save-excursion (skip-chars-forward " \t")
(org-current-text-column))))
(setq top-ind (min top-ind ind))
@ -1636,17 +1656,17 @@ CONTENTS is the contents of the element."
(match-string-no-properties 2) ; counter
(match-string-no-properties 3) ; checkbox
;; Description tag.
(and (save-match-data
(string-match "[-+*]" bullet))
(and
(string-match-p "[-+*]" bullet)
(match-string-no-properties 4))
;; Ending position, unknown so far.
nil)))
items))
(forward-line))
;; Skip empty lines.
((looking-at "^[ \t]*$") (forward-line))
((looking-at-p "^[ \t]*$") (forward-line))
;; Skip inline tasks and blank lines along the way.
((and inlinetask-re (looking-at inlinetask-re))
((and inlinetask-re (looking-at-p inlinetask-re))
(forward-line)
(let ((origin (point)))
(when (re-search-forward inlinetask-re limit t)
@ -1672,7 +1692,7 @@ CONTENTS is the contents of the element."
(re-search-forward
(format "^[ \t]*#\\+END%s[ \t]*$" (match-string 1))
limit t)))
((and (looking-at org-element-drawer-re)
((and (looking-at-p org-element-drawer-re)
(re-search-forward "^[ \t]*:END:[ \t]*$" limit t))))
(forward-line))))))))
@ -2218,19 +2238,19 @@ Return a new syntax node of `example-block' type containing `:begin',
;; first line.
(1- (string-to-number (match-string 2 switches)))))))
(preserve-indent
(and switches (string-match "-i\\>" switches)))
(and switches (string-match-p "-i\\>" switches)))
;; Should labels be retained in (or stripped from) example
;; blocks?
(retain-labels
(or (not switches)
(not (string-match "-r\\>" switches))
(and number-lines (string-match "-k\\>" switches))))
(not (string-match-p "-r\\>" switches))
(and number-lines (string-match-p "-k\\>" switches))))
;; What should code-references use - labels or
;; line-numbers?
(use-labels
(or (not switches)
(and retain-labels
(not (string-match "-k\\>" switches)))))
(not (string-match-p "-k\\>" switches)))))
(label-fmt
(and switches
(string-match "-l +\"\\([^\"\n]+\\)\"" switches)
@ -2364,7 +2384,7 @@ Assume point is at the beginning of the fixed-width area."
(end-area
(progn
(while (and (< (point) limit)
(looking-at "[ \t]*:\\( \\|$\\)"))
(looking-at-p "[ \t]*:\\( \\|$\\)"))
(forward-line))
(if (bolp) (line-end-position 0) (point))))
(end (progn (skip-chars-forward " \r\t\n" limit)
@ -2595,7 +2615,7 @@ Assume point is at the beginning of the paragraph."
((not (and (re-search-forward
org-element-paragraph-separate limit 'move)
(progn (beginning-of-line) t))))
((looking-at org-element-drawer-re)
((looking-at-p org-element-drawer-re)
(save-excursion
(re-search-forward "^[ \t]*:END:[ \t]*$" limit t)))
((looking-at "[ \t]*#\\+BEGIN_\\(\\S-+\\)")
@ -2752,7 +2772,7 @@ Assume point is at the beginning of the block."
;; first line.
(1- (string-to-number (match-string 2 switches)))))))
(preserve-indent (and switches
(string-match "-i\\>" switches)))
(string-match-p "-i\\>" switches)))
(label-fmt
(and switches
(string-match "-l +\"\\([^\"\n]+\\)\"" switches)
@ -2761,14 +2781,14 @@ Assume point is at the beginning of the block."
;; source blocks?
(retain-labels
(or (not switches)
(not (string-match "-r\\>" switches))
(and number-lines (string-match "-k\\>" switches))))
(not (string-match-p "-r\\>" switches))
(and number-lines (string-match-p "-k\\>" switches))))
;; What should code-references use - labels or
;; line-numbers?
(use-labels
(or (not switches)
(and retain-labels
(not (string-match "-k\\>" switches)))))
(not (string-match-p "-k\\>" switches)))))
;; Retrieve code.
(value
(org-element-deferred-create
@ -2844,7 +2864,7 @@ Assume point is at the beginning of the table."
(save-excursion
(let* ((case-fold-search t)
(table-begin (point))
(type (if (looking-at "[ \t]*|") 'org 'table.el))
(type (if (looking-at-p "[ \t]*|") 'org 'table.el))
(end-re (format "^[ \t]*\\($\\|[^| \t%s]\\)"
(if (eq type 'org) "" "+")))
(begin (car affiliated))
@ -2903,7 +2923,7 @@ Return a new syntax node of `table-row' type containing `:begin',
`:end', `:contents-begin', `:contents-end', `:type', `:post-blank' and
`:post-affiliated' properties."
(save-excursion
(let* ((type (if (looking-at "^[ \t]*|-") 'rule 'standard))
(let* ((type (if (looking-at-p "^[ \t]*|-") 'rule 'standard))
(begin (point))
;; A table rule has no contents. In that case, ensure
;; CONTENTS-BEGIN matches CONTENTS-END.
@ -3012,7 +3032,7 @@ Assume point is at first MARK."
`(seq (or line-start (any space ?- ?\( ?' ?\" ?\{))
,mark
(not space)))))
(when (looking-at opening-re)
(when (looking-at-p opening-re)
(goto-char (1+ origin))
(let ((closing-re
(rx-to-string
@ -3263,21 +3283,21 @@ When at an export snippet, return a new syntax node of
Assume point is at the beginning of the snippet."
(save-excursion
(let (contents-end)
(when (and (looking-at "@@\\([-A-Za-z0-9]+\\):")
(setq contents-end
(save-match-data (goto-char (match-end 0))
(when
(re-search-forward "@@" nil t)
(match-beginning 0)))))
(when (looking-at "@@\\([-A-Za-z0-9]+\\):")
(goto-char (match-end 0))
(let* ((begin (match-beginning 0))
(contents-begin (match-end 0))
(backend (org-element--get-cached-string
(match-string-no-properties 1)))
(contents-end
(when (re-search-forward "@@" nil t)
(match-beginning 0)))
(value
(when contents-end
(org-element-deferred-create
nil #'org-element--substring
(- (match-end 0) begin)
(- contents-end begin)))
(- contents-begin begin)
(- contents-end begin))))
(post-blank (skip-chars-forward " \t"))
(end (point)))
(org-element-create
@ -3286,7 +3306,7 @@ Assume point is at the beginning of the snippet."
:value value
:begin begin
:end end
:post-blank post-blank)))))))
:post-blank post-blank))))))
(defun org-element-export-snippet-interpreter (export-snippet _)
"Interpret EXPORT-SNIPPET object as Org syntax."
@ -3590,7 +3610,7 @@ Assume point is at the beginning of the link."
(cond
;; File type.
((or (file-name-absolute-p raw-link)
(string-match "\\`\\.\\.?/" raw-link))
(string-match-p "\\`\\.\\.?/" raw-link))
(setq type "file")
(setq path raw-link))
;; Explicit type (http, irc, bbdb...).
@ -4215,6 +4235,26 @@ Assume point is at the first equal sign marker."
;; It returns the Lisp representation of the element starting at
;; point.
(defconst org-element--current-element-re
(rx
(or
(group-n 1 (regexp org-element--latex-begin-environment))
(group-n 2 (regexp org-element-drawer-re-nogroup))
(group-n 3 (regexp "[ \t]*:\\( \\|$\\)"))
(group-n 7 (regexp org-element-dynamic-block-open-re))
(seq (group-n 4 (regexp "[ \t]*#\\+"))
(or
(seq "BEGIN_" (group-n 5 (1+ (not space))))
(group-n 6 "CALL:")
(group-n 8 (1+ (not space)) ":")
))
(group-n 9 (regexp org-footnote-definition-re))
(group-n 10 (regexp "[ \t]*-\\{5,\\}[ \t]*$"))
(group-n 11 "%%(")))
"Bulk regexp matching multiple elements in a single regexp.
This is a bit more efficient compared to invoking regexp search
multiple times.")
(defvar org-inlinetask-min-level); Declared in org-inlinetask.el
(defvar org-element--cache-sync-requests); Declared later
(defsubst org-element--current-element (limit &optional granularity mode structure)
@ -4315,23 +4355,23 @@ element it has to parse."
((and (cdr affiliated) (>= (point) limit))
(goto-char (car affiliated))
(org-element-keyword-parser limit nil))
;; Do a single regexp match do reduce overheads for
;; multiple regexp search invocations.
((looking-at org-element--current-element-re)
(cond
;; LaTeX Environment.
((looking-at-p org-element--latex-begin-environment)
((match-beginning 1)
(org-element-latex-environment-parser limit affiliated))
;; Drawer.
((looking-at-p org-element-drawer-re)
((match-beginning 2)
(org-element-drawer-parser limit affiliated))
;; Fixed Width
((looking-at-p "[ \t]*:\\( \\|$\\)")
((match-beginning 3)
(org-element-fixed-width-parser limit affiliated))
;; Inline Comments, Blocks, Babel Calls, Dynamic Blocks and
;; Keywords.
((looking-at "[ \t]*#\\+")
(goto-char (match-end 0))
(cond
((looking-at "BEGIN_\\(\\S-+\\)")
(beginning-of-line)
(funcall (pcase (upcase (match-string 1))
((match-beginning 5)
(funcall (pcase (upcase (match-string 5))
("CENTER" #'org-element-center-block-parser)
("COMMENT" #'org-element-comment-block-parser)
("EXAMPLE" #'org-element-example-block-parser)
@ -4342,29 +4382,24 @@ element it has to parse."
(_ #'org-element-special-block-parser))
limit
affiliated))
((looking-at-p "CALL:")
(beginning-of-line)
((match-beginning 6)
(org-element-babel-call-parser limit affiliated))
((save-excursion
(beginning-of-line)
(looking-at-p org-element-dynamic-block-open-re))
((match-beginning 7)
(beginning-of-line)
(org-element-dynamic-block-parser limit affiliated))
((looking-at-p "\\S-+:")
(beginning-of-line)
((match-beginning 8)
(org-element-keyword-parser limit affiliated))
(t
(beginning-of-line)
(org-element-paragraph-parser limit affiliated))))
((match-beginning 4)
(org-element-paragraph-parser limit affiliated))
;; Footnote Definition.
((looking-at-p org-footnote-definition-re)
((match-beginning 9)
(org-element-footnote-definition-parser limit affiliated))
;; Horizontal Rule.
((looking-at-p "[ \t]*-\\{5,\\}[ \t]*$")
((match-beginning 10)
(org-element-horizontal-rule-parser limit affiliated))
;; Diary Sexp.
((looking-at-p "%%(")
(org-element-diary-sexp-parser limit affiliated))
((match-beginning 11)
(org-element-diary-sexp-parser limit affiliated))))
;; Table.
((or (looking-at-p "[ \t]*|")
;; There is no strict definition of a table.el
@ -4451,16 +4486,13 @@ When PARSE is non-nil, values from keywords belonging to
;; value parsed.
(parsed? (member kwd org-element-parsed-keywords))
;; Find main value for any keyword.
(value
(let ((beg (match-end 0))
(end (save-excursion
(value-begin (match-end 0))
(value-end
(save-excursion
(end-of-line)
(skip-chars-backward " \t")
(point))))
(if parsed?
(save-match-data
(org-element--parse-objects beg end nil restrict))
(org-trim (buffer-substring-no-properties beg end)))))
(point)))
value
;; If KWD is a dual keyword, find its secondary value.
;; Maybe parse it.
(dual? (member kwd org-element-dual-keywords))
@ -4469,18 +4501,23 @@ When PARSE is non-nil, values from keywords belonging to
(let ((sec (match-string-no-properties 2)))
(cond
((and sec parsed?)
(save-match-data
(org-element--parse-objects
(match-beginning 2) (match-end 2) nil restrict)))
(match-beginning 2) (match-end 2) nil restrict))
(sec sec)))))
;; Attribute a property name to KWD.
(kwd-sym (and kwd (intern (concat ":" (downcase kwd))))))
(setq value
(if parsed?
(org-element--parse-objects
value-begin value-end nil restrict)
(org-trim (buffer-substring-no-properties
value-begin value-end))))
;; Now set final shape for VALUE.
(when dual?
(setq value (and (or value dual-value) (cons value dual-value))))
(when (or (member kwd org-element-multiple-keywords)
;; Attributes can always appear on multiple lines.
(string-match "^ATTR_" kwd))
(string-match-p "^ATTR_" kwd))
(setq value (nconc (plist-get output kwd-sym) (list value))))
;; Eventually store the new value in OUTPUT.
(setq output (plist-put output kwd-sym value))
@ -4488,7 +4525,7 @@ When PARSE is non-nil, values from keywords belonging to
(forward-line)))
;; If affiliated keywords are orphaned: move back to first one.
;; They will be parsed as a paragraph.
(when (looking-at "[ \t]*$") (goto-char origin) (setq output nil))
(when (looking-at-p "[ \t]*$") (goto-char origin) (setq output nil))
;; Return value.
(cons origin output))))
@ -5108,7 +5145,7 @@ If there is no affiliated keyword, return the empty string."
(when value
(if (or (member keyword org-element-multiple-keywords)
;; All attribute keywords can have multiple lines.
(string-match "^ATTR_" keyword))
(string-match-p "^ATTR_" keyword))
(mapconcat (lambda (line) (funcall keyword-to-org keyword line))
value "")
(funcall keyword-to-org keyword value)))))
@ -5119,7 +5156,7 @@ If there is no affiliated keyword, return the empty string."
(org-element-properties-mapc
(lambda (prop _ _)
(let ((keyword (upcase (substring (symbol-name prop) 1))))
(when (or (string-match "^ATTR_" keyword)
(when (or (string-match-p "^ATTR_" keyword)
(and
(member keyword org-element-affiliated-keywords)
(not (assoc keyword
@ -6476,7 +6513,6 @@ when the parsing should stop. The function throws
`org-element--cache-interrupt' if the process stopped before finding
the expected result."
(catch 'exit
(save-match-data
(org-with-base-buffer nil
(org-with-wide-buffer
(goto-char pos)
@ -6610,7 +6646,6 @@ If you observe Emacs hangs frequently, please report this to Org mode mailing li
;; Avoid parsing headline siblings above.
(goto-char elem-end)
(when (eq type 'headline)
(save-match-data
(unless (when (and (/= 1 (org-element-property :true-level element))
(re-search-forward
(org-headline-re (1- (org-element-property :true-level element)))
@ -6628,7 +6663,7 @@ If you observe Emacs hangs frequently, please report this to Org mode mailing li
(org-headline-re (org-element-property :true-level element))
elem-end t)
;; Roll-back to normal parsing.
(goto-char elem-end)))))
(goto-char elem-end))))
(setq mode (org-element--next-mode mode type nil)))
;; A non-greater element contains point: return it.
((not (memq type org-element-greater-elements))
@ -6683,7 +6718,7 @@ If you observe Emacs hangs frequently, please report this to Org mode mailing li
;; Otherwise, return ELEMENT as it is the smallest
;; element containing POS.
(t (throw 'exit (if syncp parent element)))))
(setq element nil)))))))))
(setq element nil))))))))
;;;; Staging Buffer Changes
@ -6744,6 +6779,7 @@ The function returns the new value of `org-element--cache-change-warning'."
(let ((org-element--cache-change-warning-before org-element--cache-change-warning)
(org-element--cache-change-warning-after))
(setq org-element--cache-change-warning-after
;; We must preserve match data when called as `before-change-functions'.
(save-match-data
(let ((case-fold-search t))
(when (re-search-forward
@ -6802,6 +6838,7 @@ that range. See `after-change-functions' for more information."
(line-beginning-position)))
;; Store synchronization request.
(let ((offset (- end beg pre)))
;; We must preserve match data when called as `after-change-functions'.
(save-match-data
(org-element--cache-submit-request beg (- end offset) offset)))
;; Activate a timer to process the request during idle time.
@ -6909,7 +6946,7 @@ known element in cache (it may start after END)."
(org-element-property :level up)))
(org-with-point-at (org-element-contents-begin up)
(unless
(save-match-data
(progn
(when (looking-at-p org-element-planning-line-re)
(forward-line))
(when (looking-at org-property-drawer-re)
@ -6926,9 +6963,8 @@ known element in cache (it may start after END)."
;; Should not see property
;; drawer within changed
;; region.
(save-match-data
(or (not (looking-at org-property-drawer-re))
(> beg (match-end 0)))))))
(> beg (match-end 0))))))
(_ 'robust)))))
;; UP is a robust greater element containing changes.
;; We only need to extend its ending boundaries.
@ -7508,6 +7544,7 @@ the cache."
;; can be found. When RE is nil, just find element at
;; point.
(move-start-to-next-match
;; Preserve match data that might be set by FUNC.
(re) `(save-match-data
(if (or (not ,re)
(if org-element--cache-map-statistics
@ -7890,7 +7927,9 @@ the first row of a table, returned element will be the table
instead of the first row.
When point is at the end of the buffer, return the innermost
element ending there."
element ending there.
This function may modify the match data."
(setq pom (or pom (point)))
;; Allow re-parsing when the command can benefit from it.
(when (and cached-only
@ -7969,8 +8008,9 @@ the beginning of any other object, return that object.
Optional argument ELEMENT, when non-nil, is the closest element
containing point, as returned by `org-element-at-point'.
Providing it allows for quicker computation."
(save-match-data
Providing it allows for quicker computation.
This function may modify match data."
(catch 'objects-forbidden
(org-with-wide-buffer
(let* ((pos (point))
@ -8079,7 +8119,7 @@ Providing it allows for quicker computation."
(setq parent next)
(setq restriction (org-element-restriction next)))
;; Otherwise, return NEXT.
(t (throw 'exit next))))))))))))))
(t (throw 'exit next)))))))))))))
(defun org-element-nested-p (elem-A elem-B)
"Non-nil when elements ELEM-A and ELEM-B are nested."

View File

@ -8537,7 +8537,7 @@ there is one, return it."
;; Only consider valid links or links openable via
;; `org-open-at-point'.
(when (org-element-type-p
(org-element-context)
(save-match-data (org-element-context))
'(link comment comment-block node-property keyword))
(push (match-string 0) links)))
(setq links (org-uniquify (reverse links))))