org-element: Speed optimizations

* contrib/lisp/org-element.el (org-element--element-block-re): New
  variable.
(org-element-current-element): New function.
(org-element-parse-elements): Make use of the new specialized function
  instead of `org-element-at-point'.  Also narrow buffer to current
  container (greater element or recursive object) being parsed to
  improve search speed.
(org-element-comment-parser): Speed-up parsing for comments at column 0.
(org-element-guess-type): Make comment regexp less restrictive, due to
  comment optimizations.
This commit is contained in:
Nicolas Goaziou 2012-01-13 17:01:45 +01:00
parent 102adf13d4
commit fb046f5b59
1 changed files with 208 additions and 78 deletions

View File

@ -877,39 +877,43 @@ CONTENTS is nil."
Return a list whose car is `comment' and cdr is a plist
containing `:begin', `:end', `:value' and `:post-blank'
keywords."
(let ((comment-re "\\(#\\|[ \t]*#\\+\\( \\|$\\)\\)")
beg-coms begin end value pos-before-blank keywords)
(let (beg-coms begin end end-coms keywords)
(save-excursion
;; Move to the beginning of comments.
(unless (bobp)
(while (and (not (bobp)) (looking-at comment-re))
(forward-line -1))
(unless (looking-at comment-re) (forward-line 1)))
(setq beg-coms (point))
;; Get affiliated keywords, if any.
(setq keywords (org-element-collect-affiliated-keywords))
;; Store true beginning of element.
(setq begin (car keywords))
;; Get ending of comments. If point is in a list, ensure to not
;; get outside of it.
(let* ((itemp (org-in-item-p))
(max-pos (if itemp
(org-list-get-bottom-point
(save-excursion (goto-char itemp) (org-list-struct)))
(point-max))))
(while (and (looking-at comment-re) (< (point) max-pos))
(forward-line)))
(setq pos-before-blank (point))
(if (looking-at "#")
;; First type of comment: comments at column 0.
(let ((comment-re "^\\([^#]\\|#\\+[a-z]\\)"))
(save-excursion
(re-search-backward comment-re nil 'move)
(if (bobp) (setq keywords nil beg-coms (point))
(forward-line)
(setq keywords (org-element-collect-affiliated-keywords)
beg-coms (point))))
(re-search-forward comment-re nil 'move)
(setq end-coms (if (eobp) (point) (match-beginning 0))))
;; Second type of comment: indented comments.
(let ((comment-re "[ \t]*#\\+\\(?: \\|$\\)"))
(unless (bobp)
(while (and (not (bobp)) (looking-at comment-re))
(forward-line -1))
(unless (looking-at comment-re) (forward-line)))
(setq beg-coms (point))
(setq keywords (org-element-collect-affiliated-keywords))
;; Get comments ending. This may not be accurate if
;; commented lines within an item are followed by commented
;; lines outside of the list. Though, parser will always
;; get it right as it already knows surrounding element and
;; has narrowed buffer to its contents.
(while (looking-at comment-re) (forward-line))
(setq end-coms (point))))
;; Find position after blank.
(goto-char end-coms)
(org-skip-whitespace)
(setq end (if (eobp) (point) (point-at-bol)))
;; Extract value.
(setq value (buffer-substring-no-properties beg-coms pos-before-blank)))
(setq end (if (eobp) (point) (point-at-bol))))
`(comment
(:begin ,begin
(:begin ,(or (car keywords) beg-coms)
:end ,end
:value ,value
:post-blank ,(count-lines pos-before-blank end)
:value ,(buffer-substring-no-properties beg-coms end-coms)
:post-blank ,(count-lines end-coms end)
,@(cadr keywords)))))
(defun org-element-comment-interpreter (comment contents)
@ -2765,7 +2769,7 @@ point is in a section in priority."
"^[ \t]*#\\+end:\\(?:\\s-\\|$\\)")))
(if (not completep) 'paragraph
(goto-char (car completep)) 'dynamic-block)))
((looking-at "\\(#\\|[ \t]*#\\+\\( \\|$\\)\\)") 'comment)
((looking-at "\\(#\\|[ \t]*#\\+\\)\\(?: \\|$\\)") 'comment)
((looking-at "[ \t]*-\\{5,\\}[ \t]*$") 'horizontal-rule)
((org-at-table-p t) 'table)
((looking-at "[ \t]*#\\+tblfm:")
@ -3056,13 +3060,21 @@ Nil values returned from FUN are ignored in the result."
;; Return value in a proper order.
(reverse --acc))))
;; The following functions are internal parts of the parser. The
;; first one, `org-element-parse-elements' acts at the element's
;; level. The second one, `org-element-parse-objects' applies on all
;; objects of a paragraph or a secondary string. It uses
;; The following functions are internal parts of the parser.
;; The first one, `org-element-parse-elements' acts at the element's
;; level. As point is always at the beginning of an element during
;; parsing, it doesn't have to rely on `org-element-at-point'.
;; Instead, it calls a more restrictive, though way quicker,
;; alternative: `org-element-current-element'. That function
;; internally uses `org-element--element-block-re' for quick access to
;; a common regexp.
;; The second one, `org-element-parse-objects' applies on all objects
;; of a paragraph or a secondary string. It uses
;; `org-element-get-candidates' to optimize the search of the next
;; object in the buffer.
;;
;; More precisely, that function looks for every allowed object type
;; first. Then, it discards failed searches, keeps further matches,
;; and searches again types matched behind point, for subsequent
@ -3094,54 +3106,40 @@ elements.
Elements are accumulated into ACC."
(save-excursion
(goto-char beg)
;; Shortcut when parsing only headlines.
;; When parsing only headlines, skip any text before first one.
(when (and (eq granularity 'headline) (not (org-at-heading-p)))
(org-with-limited-levels (outline-next-heading)))
;; Main loop start.
(while (and (< (point) end) (not (eobp)))
(while (not (eobp))
(push
;; 1. Item mode is active: point is at an item. Knowing that,
;; there's no need to go through `org-element-at-point'.
;; 1. Item mode is active: point must be at an item. Parse it
;; directly, skipping `org-element-current-element'.
(if (eq special 'item)
(let* ((element (org-element-item-parser structure))
(cbeg (org-element-get-property :contents-begin element))
(cend (org-element-get-property :contents-end element)))
(let ((element (org-element-item-parser structure)))
(goto-char (org-element-get-property :end element))
;; Narrow region to contents, so that item bullet don't
;; interfere with paragraph parsing.
(save-restriction
(narrow-to-region cbeg cend)
(narrow-to-region
(org-element-get-property :contents-begin element)
(org-element-get-property :contents-end element))
(org-element-parse-elements
cbeg cend nil structure granularity visible-only
(point-min) (point-max) nil structure granularity visible-only
(reverse element))))
;; 2. When ITEM is nil, find current element's type and parse
;; it accordingly to its category.
(let ((element (org-element-at-point special structure)))
(let ((element (org-element-current-element special structure)))
(goto-char (org-element-get-property :end element))
(cond
;; Case 1. ELEMENT is a footnote-definition. If
;; GRANURALITY allows parsing, use narrowing so that
;; footnote label don't interfere with paragraph
;; recognition.
((and (eq (car element) 'footnote-definition)
(not (memq granularity '(headline greater-element))))
(let ((cbeg (org-element-get-property :contents-begin element))
(cend (org-element-get-property :contents-end element)))
(save-restriction
(narrow-to-region cbeg cend)
(org-element-parse-elements
cbeg cend nil structure granularity visible-only
(reverse element)))))
;; Case 2. ELEMENT is a paragraph. Parse objects inside,
;; Case 1. ELEMENT is a paragraph. Parse objects inside,
;; if GRANULARITY allows it.
((and (eq (car element) 'paragraph)
(or (not granularity) (eq granularity 'object)))
(org-element-parse-objects
(org-element-get-property :contents-begin element)
(org-element-get-property :contents-end element)
(reverse element)
nil))
;; Case 3. ELEMENT is recursive: parse it between
(save-restriction
(narrow-to-region
(org-element-get-property :contents-begin element)
(org-element-get-property :contents-end element))
(org-element-parse-objects
(point-min) (point-max) (reverse element) nil)))
;; Case 2. ELEMENT is recursive: parse it between
;; `contents-begin' and `contents-end'. Make sure
;; GRANULARITY allows the recursion, or ELEMENT is an
;; headline, in which case going inside is mandatory, in
@ -3153,24 +3151,156 @@ Elements are accumulated into ACC."
(eq (car element) 'headline))
(not (and visible-only
(org-element-get-property :hiddenp element))))
(org-element-parse-elements
(org-element-get-property :contents-begin element)
(org-element-get-property :contents-end element)
;; At a plain list, switch to item mode. At an
;; headline, switch to section mode. Any other element
;; turns off special modes.
(case (car element) (plain-list 'item) (headline 'section))
(org-element-get-property :structure element)
granularity
visible-only
(reverse element)))
;; Case 4. Else, just accumulate ELEMENT.
(save-restriction
(narrow-to-region
(org-element-get-property :contents-begin element)
(org-element-get-property :contents-end element))
(org-element-parse-elements
(point-min) (point-max)
;; At a plain list, switch to item mode. At an
;; headline, switch to section mode. Any other
;; element turns off special modes.
(case (car element) (plain-list 'item) (headline 'section))
(org-element-get-property :structure element)
granularity visible-only (reverse element))))
;; Case 3. Else, just accumulate ELEMENT.
(t element))))
acc)
(org-skip-whitespace))
;; Return result.
(nreverse acc)))
(defconst org-element--element-block-re
(format "[ \t]*#\\+begin_\\(%s\\)\\(?: \\|$\\)"
(mapconcat
'regexp-quote
(mapcar 'car org-element-non-recursive-block-alist) "\\|"))
"Regexp matching the beginning of a non-recursive block type.
Used internally by `org-element-current-element'. Do not modify
it directly, set `org-element-recursive-block-alist' instead.")
(defun org-element-current-element (&optional special structure)
"Parse the element at point.
Return value is a list \(TYPE PROPS\) where TYPE is the type of
the element and PROPS a plist of properties associated to the
element.
Possible types are defined in `org-element-all-elements'.
Optional argument SPECIAL, when non-nil, can be either `item' or
`section'. The former allows to parse item wise instead of
plain-list wise, using STRUCTURE as the current list structure.
The latter will try to parse a section before anything else.
If STRUCTURE isn't provided but SPECIAL is set to `item', it will
be computed.
Unlike to `org-element-at-point', this function assumes point is
always at the beginning of the element it has to parse. As such,
it is quicker than its counterpart and always accurate, albeit
more restrictive."
(save-excursion
(beginning-of-line)
;; If point is at an affiliated keyword, try moving to the
;; beginning of the associated element. If none is found, the
;; keyword is orphaned and will be treated as plain text.
(when (looking-at org-element--affiliated-re)
(let ((opoint (point)))
(while (looking-at org-element--affiliated-re) (forward-line))
(when (looking-at "[ \t]*$") (goto-char opoint))))
(let ((case-fold-search t))
(cond
;; Headline.
((org-with-limited-levels (org-at-heading-p))
(org-element-headline-parser))
;; Quote section.
((let ((headline (ignore-errors (nth 4 (org-heading-components)))))
(and headline
(let (case-fold-search)
(string-match (format "^%s\\(?: \\|$\\)" org-quote-string)
headline))))
(org-element-quote-section-parser))
;; Section.
((eq special 'section) (org-element-section-parser))
;; Non-recursive block.
((when (looking-at org-element--element-block-re)
(let ((type (downcase (match-string 1))))
(if (save-excursion
(re-search-forward
(format "[ \t]*#\\+end_%s\\(?: \\|$\\)" type) nil t))
;; Build appropriate parser.
(funcall
(intern
(format "org-element-%s-parser"
(cdr (assoc type
org-element-non-recursive-block-alist)))))
(org-element-paragraph-parser)))))
;; Inlinetask.
((org-at-heading-p) (org-element-inlinetask-parser))
;; LaTeX Environment or paragraph if incomplete.
((looking-at "^[ \t]*\\\\begin{")
(if (save-excursion
(re-search-forward "^[ \t]*\\\\end{[^}]*}[ \t]*" nil t))
(org-element-latex-environment-parser)
(org-element-paragraph-parser)))
;; Property drawer.
((looking-at org-property-start-re)
(if (save-excursion (re-search-forward org-property-end-re nil t))
(org-element-property-drawer-parser)
(org-element-paragraph-parser)))
;; Recursive block, or paragraph if incomplete.
((looking-at "[ \t]*#\\+begin_\\([-A-Za-z0-9]+\\)\\(?: \\|$\\)")
(let ((type (downcase (match-string 1))))
(cond
((not (save-excursion
(re-search-forward
(format "[ \t]*#\\+end_%s\\(?: \\|$\\)" type) nil t)))
(org-element-paragraph-parser))
((string= type "center") (org-element-center-block-parser))
((string= type "quote") (org-element-quote-block-parser))
(t (org-element-special-block-parser)))))
;; Drawer.
((looking-at org-drawer-regexp)
(if (save-excursion (re-search-forward "^[ \t]*:END:[ \t]*$" nil t))
(org-element-drawer-parser)
(org-element-paragraph-parser)))
((looking-at "[ \t]*:\\( \\|$\\)") (org-element-fixed-width-parser))
;; Babel call.
((looking-at org-babel-block-lob-one-liner-regexp)
(org-element-babel-call-parser))
;; Keyword, or paragraph if at an affiliated keyword.
((looking-at "[ \t]*#\\+\\([a-z]+\\(:?_[a-z]+\\)*\\):")
(let ((key (downcase (match-string 1))))
(if (or (string= key "tblfm")
(member key org-element-affiliated-keywords))
(org-element-paragraph-parser)
(org-element-keyword-parser))))
;; Footnote definition.
((looking-at org-footnote-definition-re)
(org-element-footnote-definition-parser))
;; Dynamic block or paragraph if incomplete.
((looking-at "[ \t]*#\\+begin:\\(?: \\|$\\)")
(if (save-excursion
(re-search-forward "^[ \t]*#\\+end:\\(?: \\|$\\)" nil t))
(org-element-dynamic-block-parser)
(org-element-paragraph-parser)))
;; Comment.
((looking-at "\\(#\\|[ \t]*#\\+\\)\\(?: \\|$\\)")
(org-element-comment-parser))
;; Horizontal rule.
((looking-at "[ \t]*-\\{5,\\}[ \t]*$")
(org-element-horizontal-rule-parser))
;; Table.
((org-at-table-p t) (org-element-table-parser))
;; List or item.
((looking-at (org-item-re))
(if (eq special 'item)
(org-element-item-parser (or structure (org-list-struct)))
(org-element-plain-list-parser (or structure (org-list-struct)))))
;; Default element: Paragraph.
(t (org-element-paragraph-parser))))))
(defun org-element-parse-objects (beg end acc restriction)
"Parse objects between BEG and END and return recursive structure.