org-element: Speed optimizations

* contrib/lisp/org-element.el (org-element--element-block-re): New
  variable.
(org-element-current-element): New function.
(org-element-parse-elements): Make use of the new specialized function
  instead of `org-element-at-point'.  Also narrow buffer to current
  container (greater element or recursive object) being parsed to
  improve search speed.
(org-element-comment-parser): Speed-up parsing for comments at column 0.
(org-element-guess-type): Make comment regexp less restrictive, due to
  comment optimizations.
This commit is contained in:
Nicolas Goaziou 2012-01-13 17:01:45 +01:00
parent 102adf13d4
commit fb046f5b59
1 changed files with 208 additions and 78 deletions

View File

@ -877,39 +877,43 @@ CONTENTS is nil."
Return a list whose car is `comment' and cdr is a plist Return a list whose car is `comment' and cdr is a plist
containing `:begin', `:end', `:value' and `:post-blank' containing `:begin', `:end', `:value' and `:post-blank'
keywords." keywords."
(let ((comment-re "\\(#\\|[ \t]*#\\+\\( \\|$\\)\\)") (let (beg-coms begin end end-coms keywords)
beg-coms begin end value pos-before-blank keywords)
(save-excursion (save-excursion
;; Move to the beginning of comments. (if (looking-at "#")
(unless (bobp) ;; First type of comment: comments at column 0.
(while (and (not (bobp)) (looking-at comment-re)) (let ((comment-re "^\\([^#]\\|#\\+[a-z]\\)"))
(forward-line -1)) (save-excursion
(unless (looking-at comment-re) (forward-line 1))) (re-search-backward comment-re nil 'move)
(setq beg-coms (point)) (if (bobp) (setq keywords nil beg-coms (point))
;; Get affiliated keywords, if any. (forward-line)
(setq keywords (org-element-collect-affiliated-keywords)) (setq keywords (org-element-collect-affiliated-keywords)
;; Store true beginning of element. beg-coms (point))))
(setq begin (car keywords)) (re-search-forward comment-re nil 'move)
;; Get ending of comments. If point is in a list, ensure to not (setq end-coms (if (eobp) (point) (match-beginning 0))))
;; get outside of it. ;; Second type of comment: indented comments.
(let* ((itemp (org-in-item-p)) (let ((comment-re "[ \t]*#\\+\\(?: \\|$\\)"))
(max-pos (if itemp (unless (bobp)
(org-list-get-bottom-point (while (and (not (bobp)) (looking-at comment-re))
(save-excursion (goto-char itemp) (org-list-struct))) (forward-line -1))
(point-max)))) (unless (looking-at comment-re) (forward-line)))
(while (and (looking-at comment-re) (< (point) max-pos)) (setq beg-coms (point))
(forward-line))) (setq keywords (org-element-collect-affiliated-keywords))
(setq pos-before-blank (point)) ;; Get comments ending. This may not be accurate if
;; commented lines within an item are followed by commented
;; lines outside of the list. Though, parser will always
;; get it right as it already knows surrounding element and
;; has narrowed buffer to its contents.
(while (looking-at comment-re) (forward-line))
(setq end-coms (point))))
;; Find position after blank. ;; Find position after blank.
(goto-char end-coms)
(org-skip-whitespace) (org-skip-whitespace)
(setq end (if (eobp) (point) (point-at-bol))) (setq end (if (eobp) (point) (point-at-bol))))
;; Extract value.
(setq value (buffer-substring-no-properties beg-coms pos-before-blank)))
`(comment `(comment
(:begin ,begin (:begin ,(or (car keywords) beg-coms)
:end ,end :end ,end
:value ,value :value ,(buffer-substring-no-properties beg-coms end-coms)
:post-blank ,(count-lines pos-before-blank end) :post-blank ,(count-lines end-coms end)
,@(cadr keywords))))) ,@(cadr keywords)))))
(defun org-element-comment-interpreter (comment contents) (defun org-element-comment-interpreter (comment contents)
@ -2765,7 +2769,7 @@ point is in a section in priority."
"^[ \t]*#\\+end:\\(?:\\s-\\|$\\)"))) "^[ \t]*#\\+end:\\(?:\\s-\\|$\\)")))
(if (not completep) 'paragraph (if (not completep) 'paragraph
(goto-char (car completep)) 'dynamic-block))) (goto-char (car completep)) 'dynamic-block)))
((looking-at "\\(#\\|[ \t]*#\\+\\( \\|$\\)\\)") 'comment) ((looking-at "\\(#\\|[ \t]*#\\+\\)\\(?: \\|$\\)") 'comment)
((looking-at "[ \t]*-\\{5,\\}[ \t]*$") 'horizontal-rule) ((looking-at "[ \t]*-\\{5,\\}[ \t]*$") 'horizontal-rule)
((org-at-table-p t) 'table) ((org-at-table-p t) 'table)
((looking-at "[ \t]*#\\+tblfm:") ((looking-at "[ \t]*#\\+tblfm:")
@ -3056,13 +3060,21 @@ Nil values returned from FUN are ignored in the result."
;; Return value in a proper order. ;; Return value in a proper order.
(reverse --acc)))) (reverse --acc))))
;; The following functions are internal parts of the parser. The ;; The following functions are internal parts of the parser.
;; first one, `org-element-parse-elements' acts at the element's
;; level. The second one, `org-element-parse-objects' applies on all ;; The first one, `org-element-parse-elements' acts at the element's
;; objects of a paragraph or a secondary string. It uses ;; level. As point is always at the beginning of an element during
;; parsing, it doesn't have to rely on `org-element-at-point'.
;; Instead, it calls a more restrictive, though way quicker,
;; alternative: `org-element-current-element'. That function
;; internally uses `org-element--element-block-re' for quick access to
;; a common regexp.
;; The second one, `org-element-parse-objects' applies on all objects
;; of a paragraph or a secondary string. It uses
;; `org-element-get-candidates' to optimize the search of the next ;; `org-element-get-candidates' to optimize the search of the next
;; object in the buffer. ;; object in the buffer.
;;
;; More precisely, that function looks for every allowed object type ;; More precisely, that function looks for every allowed object type
;; first. Then, it discards failed searches, keeps further matches, ;; first. Then, it discards failed searches, keeps further matches,
;; and searches again types matched behind point, for subsequent ;; and searches again types matched behind point, for subsequent
@ -3094,54 +3106,40 @@ elements.
Elements are accumulated into ACC." Elements are accumulated into ACC."
(save-excursion (save-excursion
(goto-char beg) (goto-char beg)
;; Shortcut when parsing only headlines. ;; When parsing only headlines, skip any text before first one.
(when (and (eq granularity 'headline) (not (org-at-heading-p))) (when (and (eq granularity 'headline) (not (org-at-heading-p)))
(org-with-limited-levels (outline-next-heading))) (org-with-limited-levels (outline-next-heading)))
;; Main loop start. ;; Main loop start.
(while (and (< (point) end) (not (eobp))) (while (not (eobp))
(push (push
;; 1. Item mode is active: point is at an item. Knowing that, ;; 1. Item mode is active: point must be at an item. Parse it
;; there's no need to go through `org-element-at-point'. ;; directly, skipping `org-element-current-element'.
(if (eq special 'item) (if (eq special 'item)
(let* ((element (org-element-item-parser structure)) (let ((element (org-element-item-parser structure)))
(cbeg (org-element-get-property :contents-begin element))
(cend (org-element-get-property :contents-end element)))
(goto-char (org-element-get-property :end element)) (goto-char (org-element-get-property :end element))
;; Narrow region to contents, so that item bullet don't
;; interfere with paragraph parsing.
(save-restriction (save-restriction
(narrow-to-region cbeg cend) (narrow-to-region
(org-element-get-property :contents-begin element)
(org-element-get-property :contents-end element))
(org-element-parse-elements (org-element-parse-elements
cbeg cend nil structure granularity visible-only (point-min) (point-max) nil structure granularity visible-only
(reverse element)))) (reverse element))))
;; 2. When ITEM is nil, find current element's type and parse ;; 2. When ITEM is nil, find current element's type and parse
;; it accordingly to its category. ;; it accordingly to its category.
(let ((element (org-element-at-point special structure))) (let ((element (org-element-current-element special structure)))
(goto-char (org-element-get-property :end element)) (goto-char (org-element-get-property :end element))
(cond (cond
;; Case 1. ELEMENT is a footnote-definition. If ;; Case 1. ELEMENT is a paragraph. Parse objects inside,
;; GRANURALITY allows parsing, use narrowing so that
;; footnote label don't interfere with paragraph
;; recognition.
((and (eq (car element) 'footnote-definition)
(not (memq granularity '(headline greater-element))))
(let ((cbeg (org-element-get-property :contents-begin element))
(cend (org-element-get-property :contents-end element)))
(save-restriction
(narrow-to-region cbeg cend)
(org-element-parse-elements
cbeg cend nil structure granularity visible-only
(reverse element)))))
;; Case 2. ELEMENT is a paragraph. Parse objects inside,
;; if GRANULARITY allows it. ;; if GRANULARITY allows it.
((and (eq (car element) 'paragraph) ((and (eq (car element) 'paragraph)
(or (not granularity) (eq granularity 'object))) (or (not granularity) (eq granularity 'object)))
(org-element-parse-objects (save-restriction
(org-element-get-property :contents-begin element) (narrow-to-region
(org-element-get-property :contents-end element) (org-element-get-property :contents-begin element)
(reverse element) (org-element-get-property :contents-end element))
nil)) (org-element-parse-objects
;; Case 3. ELEMENT is recursive: parse it between (point-min) (point-max) (reverse element) nil)))
;; Case 2. ELEMENT is recursive: parse it between
;; `contents-begin' and `contents-end'. Make sure ;; `contents-begin' and `contents-end'. Make sure
;; GRANULARITY allows the recursion, or ELEMENT is an ;; GRANULARITY allows the recursion, or ELEMENT is an
;; headline, in which case going inside is mandatory, in ;; headline, in which case going inside is mandatory, in
@ -3153,24 +3151,156 @@ Elements are accumulated into ACC."
(eq (car element) 'headline)) (eq (car element) 'headline))
(not (and visible-only (not (and visible-only
(org-element-get-property :hiddenp element)))) (org-element-get-property :hiddenp element))))
(org-element-parse-elements (save-restriction
(org-element-get-property :contents-begin element) (narrow-to-region
(org-element-get-property :contents-end element) (org-element-get-property :contents-begin element)
;; At a plain list, switch to item mode. At an (org-element-get-property :contents-end element))
;; headline, switch to section mode. Any other element (org-element-parse-elements
;; turns off special modes. (point-min) (point-max)
(case (car element) (plain-list 'item) (headline 'section)) ;; At a plain list, switch to item mode. At an
(org-element-get-property :structure element) ;; headline, switch to section mode. Any other
granularity ;; element turns off special modes.
visible-only (case (car element) (plain-list 'item) (headline 'section))
(reverse element))) (org-element-get-property :structure element)
;; Case 4. Else, just accumulate ELEMENT. granularity visible-only (reverse element))))
;; Case 3. Else, just accumulate ELEMENT.
(t element)))) (t element))))
acc) acc)
(org-skip-whitespace)) (org-skip-whitespace))
;; Return result. ;; Return result.
(nreverse acc))) (nreverse acc)))
(defconst org-element--element-block-re
(format "[ \t]*#\\+begin_\\(%s\\)\\(?: \\|$\\)"
(mapconcat
'regexp-quote
(mapcar 'car org-element-non-recursive-block-alist) "\\|"))
"Regexp matching the beginning of a non-recursive block type.
Used internally by `org-element-current-element'. Do not modify
it directly, set `org-element-recursive-block-alist' instead.")
(defun org-element-current-element (&optional special structure)
"Parse the element at point.
Return value is a list \(TYPE PROPS\) where TYPE is the type of
the element and PROPS a plist of properties associated to the
element.
Possible types are defined in `org-element-all-elements'.
Optional argument SPECIAL, when non-nil, can be either `item' or
`section'. The former allows to parse item wise instead of
plain-list wise, using STRUCTURE as the current list structure.
The latter will try to parse a section before anything else.
If STRUCTURE isn't provided but SPECIAL is set to `item', it will
be computed.
Unlike to `org-element-at-point', this function assumes point is
always at the beginning of the element it has to parse. As such,
it is quicker than its counterpart and always accurate, albeit
more restrictive."
(save-excursion
(beginning-of-line)
;; If point is at an affiliated keyword, try moving to the
;; beginning of the associated element. If none is found, the
;; keyword is orphaned and will be treated as plain text.
(when (looking-at org-element--affiliated-re)
(let ((opoint (point)))
(while (looking-at org-element--affiliated-re) (forward-line))
(when (looking-at "[ \t]*$") (goto-char opoint))))
(let ((case-fold-search t))
(cond
;; Headline.
((org-with-limited-levels (org-at-heading-p))
(org-element-headline-parser))
;; Quote section.
((let ((headline (ignore-errors (nth 4 (org-heading-components)))))
(and headline
(let (case-fold-search)
(string-match (format "^%s\\(?: \\|$\\)" org-quote-string)
headline))))
(org-element-quote-section-parser))
;; Section.
((eq special 'section) (org-element-section-parser))
;; Non-recursive block.
((when (looking-at org-element--element-block-re)
(let ((type (downcase (match-string 1))))
(if (save-excursion
(re-search-forward
(format "[ \t]*#\\+end_%s\\(?: \\|$\\)" type) nil t))
;; Build appropriate parser.
(funcall
(intern
(format "org-element-%s-parser"
(cdr (assoc type
org-element-non-recursive-block-alist)))))
(org-element-paragraph-parser)))))
;; Inlinetask.
((org-at-heading-p) (org-element-inlinetask-parser))
;; LaTeX Environment or paragraph if incomplete.
((looking-at "^[ \t]*\\\\begin{")
(if (save-excursion
(re-search-forward "^[ \t]*\\\\end{[^}]*}[ \t]*" nil t))
(org-element-latex-environment-parser)
(org-element-paragraph-parser)))
;; Property drawer.
((looking-at org-property-start-re)
(if (save-excursion (re-search-forward org-property-end-re nil t))
(org-element-property-drawer-parser)
(org-element-paragraph-parser)))
;; Recursive block, or paragraph if incomplete.
((looking-at "[ \t]*#\\+begin_\\([-A-Za-z0-9]+\\)\\(?: \\|$\\)")
(let ((type (downcase (match-string 1))))
(cond
((not (save-excursion
(re-search-forward
(format "[ \t]*#\\+end_%s\\(?: \\|$\\)" type) nil t)))
(org-element-paragraph-parser))
((string= type "center") (org-element-center-block-parser))
((string= type "quote") (org-element-quote-block-parser))
(t (org-element-special-block-parser)))))
;; Drawer.
((looking-at org-drawer-regexp)
(if (save-excursion (re-search-forward "^[ \t]*:END:[ \t]*$" nil t))
(org-element-drawer-parser)
(org-element-paragraph-parser)))
((looking-at "[ \t]*:\\( \\|$\\)") (org-element-fixed-width-parser))
;; Babel call.
((looking-at org-babel-block-lob-one-liner-regexp)
(org-element-babel-call-parser))
;; Keyword, or paragraph if at an affiliated keyword.
((looking-at "[ \t]*#\\+\\([a-z]+\\(:?_[a-z]+\\)*\\):")
(let ((key (downcase (match-string 1))))
(if (or (string= key "tblfm")
(member key org-element-affiliated-keywords))
(org-element-paragraph-parser)
(org-element-keyword-parser))))
;; Footnote definition.
((looking-at org-footnote-definition-re)
(org-element-footnote-definition-parser))
;; Dynamic block or paragraph if incomplete.
((looking-at "[ \t]*#\\+begin:\\(?: \\|$\\)")
(if (save-excursion
(re-search-forward "^[ \t]*#\\+end:\\(?: \\|$\\)" nil t))
(org-element-dynamic-block-parser)
(org-element-paragraph-parser)))
;; Comment.
((looking-at "\\(#\\|[ \t]*#\\+\\)\\(?: \\|$\\)")
(org-element-comment-parser))
;; Horizontal rule.
((looking-at "[ \t]*-\\{5,\\}[ \t]*$")
(org-element-horizontal-rule-parser))
;; Table.
((org-at-table-p t) (org-element-table-parser))
;; List or item.
((looking-at (org-item-re))
(if (eq special 'item)
(org-element-item-parser (or structure (org-list-struct)))
(org-element-plain-list-parser (or structure (org-list-struct)))))
;; Default element: Paragraph.
(t (org-element-paragraph-parser))))))
(defun org-element-parse-objects (beg end acc restriction) (defun org-element-parse-objects (beg end acc restriction)
"Parse objects between BEG and END and return recursive structure. "Parse objects between BEG and END and return recursive structure.