org-element: Speed up parsing when granularity is bigger than default

* contrib/lisp/org-element.el (org-element-headline-parser,
  org-element-inlinetask-parser, org-element-item-parser,
  org-element-verse-block-parser, org-element-current-element): New
  optional argument so parsing of secondary string is not mandatory.
(org-element-parse-elements): Remove duplicate part from doc-string.
(org-element-at-point): Improve speed of function since secondary
string are never parsed.
(org-element-secondary-value-alist): Simplify doc-string.
* testing/lisp/test-org-element.el: Add test.
This commit is contained in:
Nicolas Goaziou 2012-03-28 15:33:20 +02:00
parent 3abf884c49
commit bb671936b5
2 changed files with 160 additions and 59 deletions

View File

@ -111,12 +111,18 @@
;; For each greater element type, we define a parser and an ;; For each greater element type, we define a parser and an
;; interpreter. ;; interpreter.
;; A parser (`item''s excepted) accepts no argument and represents the ;; A parser returns the element or object as the list described above.
;; element or object as the list described above. An interpreter ;; Most of them accepts no argument. Though, exceptions exist. Hence
;; accepts two arguments: the list representation of the element or ;; every element containing a secondary string (see
;; object, and its contents. The latter may be nil, depending on the ;; `org-element-secondary-value-alist') will accept an optional
;; element or object considered. It returns the appropriate Org ;; argument to toggle parsing of that secondary string. Moreover,
;; syntax, as a string. ;; `item' parser requires current list's structure as its first
;; element.
;; An interpreter accepts two arguments: the list representation of
;; the element or object, and its contents. The latter may be nil,
;; depending on the element or object considered. It returns the
;; appropriate Org syntax, as a string.
;; Parsing functions must follow the naming convention: ;; Parsing functions must follow the naming convention:
;; org-element-TYPE-parser, where TYPE is greater element's type, as ;; org-element-TYPE-parser, where TYPE is greater element's type, as
@ -303,7 +309,7 @@ CONTENTS is the contents of the footnote-definition."
;;;; Headline ;;;; Headline
(defun org-element-headline-parser () (defun org-element-headline-parser (&optional raw-secondary-p)
"Parse an headline. "Parse an headline.
Return a list whose car is `headline' and cdr is a plist Return a list whose car is `headline' and cdr is a plist
@ -318,6 +324,9 @@ The plist also contains any property set in the property drawer,
with its name in lowercase, the underscores replaced with hyphens with its name in lowercase, the underscores replaced with hyphens
and colons at the beginning (i.e. `:custom-id'). and colons at the beginning (i.e. `:custom-id').
When RAW-SECONDARY-P is non-nil, headline's title will not be
parsed as a secondary string, but as a plain string instead.
Assume point is at beginning of the headline." Assume point is at beginning of the headline."
(save-excursion (save-excursion
(let* ((components (org-heading-components)) (let* ((components (org-heading-components))
@ -382,9 +391,11 @@ Assume point is at beginning of the headline."
(concat org-archive-tag ":") "" tags))) (concat org-archive-tag ":") "" tags)))
(when (string= tags ":") (setq tags nil))) (when (string= tags ":") (setq tags nil)))
;; Then get TITLE. ;; Then get TITLE.
(setq title (org-element-parse-secondary-string (setq title
raw-value (if raw-secondary-p raw-value
(cdr (assq 'headline org-element-string-restrictions)))) (org-element-parse-secondary-string
raw-value
(cdr (assq 'headline org-element-string-restrictions)))))
`(headline `(headline
(:raw-value ,raw-value (:raw-value ,raw-value
:title ,title :title ,title
@ -457,7 +468,7 @@ CONTENTS is the contents of the element."
;;;; Inlinetask ;;;; Inlinetask
(defun org-element-inlinetask-parser () (defun org-element-inlinetask-parser (&optional raw-secondary-p)
"Parse an inline task. "Parse an inline task.
Return a list whose car is `inlinetask' and cdr is a plist Return a list whose car is `inlinetask' and cdr is a plist
@ -470,6 +481,10 @@ The plist also contains any property set in the property drawer,
with its name in lowercase, the underscores replaced with hyphens with its name in lowercase, the underscores replaced with hyphens
and colons at the beginning (i.e. `:custom-id'). and colons at the beginning (i.e. `:custom-id').
When optional argument RAW-SECONDARY-P is non-nil, inline-task's
title will not be parsed as a secondary string, but as a plain
string instead.
Assume point is at beginning of the inline task." Assume point is at beginning of the inline task."
(save-excursion (save-excursion
(let* ((keywords (org-element-collect-affiliated-keywords)) (let* ((keywords (org-element-collect-affiliated-keywords))
@ -478,9 +493,10 @@ Assume point is at beginning of the inline task."
(todo (nth 2 components)) (todo (nth 2 components))
(todo-type (and todo (todo-type (and todo
(if (member todo org-done-keywords) 'done 'todo))) (if (member todo org-done-keywords) 'done 'todo)))
(title (org-element-parse-secondary-string (title (if raw-secondary-p (nth 4 components)
(nth 4 components) (org-element-parse-secondary-string
(cdr (assq 'inlinetask org-element-string-restrictions)))) (nth 4 components)
(cdr (assq 'inlinetask org-element-string-restrictions)))))
(standard-props (let (plist) (standard-props (let (plist)
(mapc (mapc
(lambda (p) (lambda (p)
@ -557,7 +573,7 @@ CONTENTS is the contents of inlinetask."
;;;; Item ;;;; Item
(defun org-element-item-parser (struct) (defun org-element-item-parser (struct &optional raw-secondary-p)
"Parse an item. "Parse an item.
STRUCT is the structure of the plain list. STRUCT is the structure of the plain list.
@ -567,6 +583,10 @@ Return a list whose car is `item' and cdr is a plist containing
`:checkbox', `:counter', `:tag', `:structure', `:hiddenp' and `:checkbox', `:counter', `:tag', `:structure', `:hiddenp' and
`:post-blank' keywords. `:post-blank' keywords.
When optional argument RAW-SECONDARY-P is non-nil, item's tag, if
any, will not be parsed as a secondary string, but as a plain
string instead.
Assume point is at the beginning of the item." Assume point is at the beginning of the item."
(save-excursion (save-excursion
(beginning-of-line) (beginning-of-line)
@ -584,11 +604,13 @@ Assume point is at the beginning of the item."
64)) 64))
((string-match "[0-9]+" c) ((string-match "[0-9]+" c)
(string-to-number (match-string 0 c)))))) (string-to-number (match-string 0 c))))))
(tag (let ((raw-tag (org-list-get-tag begin struct))) (tag
(and raw-tag (let ((raw-tag (org-list-get-tag begin struct)))
(org-element-parse-secondary-string (and raw-tag
raw-tag (if raw-secondary-p raw-tag
(cdr (assq 'item org-element-string-restrictions)))))) (org-element-parse-secondary-string
raw-tag
(cdr (assq 'item org-element-string-restrictions)))))))
(end (org-list-get-item-end begin struct)) (end (org-list-get-item-end begin struct))
(contents-begin (progn (looking-at org-list-full-item-re) (contents-begin (progn (looking-at org-list-full-item-re)
(goto-char (match-end 0)) (goto-char (match-end 0))
@ -1507,13 +1529,17 @@ CONTENTS is nil."
;;;; Verse Block ;;;; Verse Block
(defun org-element-verse-block-parser () (defun org-element-verse-block-parser (&optional raw-secondary-p)
"Parse a verse block. "Parse a verse block.
Return a list whose car is `verse-block' and cdr is a plist Return a list whose car is `verse-block' and cdr is a plist
containing `:begin', `:end', `:hiddenp', `:value' and containing `:begin', `:end', `:hiddenp', `:value' and
`:post-blank' keywords. `:post-blank' keywords.
When optional argument RAW-SECONDARY-P is non-nil, verse-block's
value will not be parsed as a secondary string, but as a plain
string instead.
Assume point is at beginning or end of the block." Assume point is at beginning or end of the block."
(save-excursion (save-excursion
(let* ((case-fold-search t) (let* ((case-fold-search t)
@ -1524,16 +1550,20 @@ Assume point is at beginning or end of the block."
(org-element-collect-affiliated-keywords))) (org-element-collect-affiliated-keywords)))
(begin (car keywords)) (begin (car keywords))
(hidden (progn (forward-line) (org-truely-invisible-p))) (hidden (progn (forward-line) (org-truely-invisible-p)))
(value-begin (point))
(value-end
(progn
(re-search-forward (concat "^[ \t]*#\\+END_VERSE") nil t)
(point-at-bol)))
(pos-before-blank (progn (forward-line) (point))) (pos-before-blank (progn (forward-line) (point)))
(value (org-element-parse-secondary-string
(buffer-substring-no-properties
(point)
(progn
(re-search-forward (concat "^[ \t]*#\\+END_VERSE") nil t)
(point-at-bol)))
(cdr (assq 'verse-block org-element-string-restrictions))))
(end (progn (org-skip-whitespace) (end (progn (org-skip-whitespace)
(if (eobp) (point) (point-at-bol))))) (if (eobp) (point) (point-at-bol))))
(value
(if raw-secondary-p
(buffer-substring-no-properties value-begin value-end)
(org-element-parse-secondary-string
(buffer-substring-no-properties value-begin value-end)
(cdr (assq 'verse-block org-element-string-restrictions))))))
`(verse-block `(verse-block
(:begin ,begin (:begin ,begin
:end ,end :end ,end
@ -2638,11 +2668,7 @@ matching `org-element-parsed-keywords'.")
(item . :tag) (item . :tag)
(footnote-reference . :inline-definition) (footnote-reference . :inline-definition)
(verse-block . :value)) (verse-block . :value))
"Alist between element types and location of secondary value. "Alist between element types and location of secondary value.")
Only elements with a secondary value available at parse time are
considered here. This is used internally by `org-element-map',
which will look into the secondary strings of an element only if
its type is listed here.")
@ -2689,7 +2715,7 @@ It can also return the following special value:
Used internally by `org-element-current-element'. Do not modify Used internally by `org-element-current-element'. Do not modify
it directly, set `org-element-recursive-block-alist' instead.") it directly, set `org-element-recursive-block-alist' instead.")
(defun org-element-current-element (&optional special structure) (defun org-element-current-element (&optional granularity special structure)
"Parse the element starting at point. "Parse the element starting at point.
Return value is a list like (TYPE PROPS) where TYPE is the type Return value is a list like (TYPE PROPS) where TYPE is the type
@ -2698,6 +2724,12 @@ element.
Possible types are defined in `org-element-all-elements'. Possible types are defined in `org-element-all-elements'.
Optional argument GRANULARITY determines the depth of the
recursion. Allowed values are `headline', `greater-element',
`element', `object' or nil. When it is bigger than `object' (or
nil), secondary values will not be parsed, since they only
contain objects.
Optional argument SPECIAL, when non-nil, can be either `item', Optional argument SPECIAL, when non-nil, can be either `item',
`section' or `quote-section'. `item' allows to parse item wise `section' or `quote-section'. `item' allows to parse item wise
instead of plain-list wise, using STRUCTURE as the current list instead of plain-list wise, using STRUCTURE as the current list
@ -2719,11 +2751,15 @@ it is quicker than its counterpart, albeit more restrictive."
(let ((opoint (point))) (let ((opoint (point)))
(while (looking-at org-element--affiliated-re) (forward-line)) (while (looking-at org-element--affiliated-re) (forward-line))
(when (looking-at "[ \t]*$") (goto-char opoint)))) (when (looking-at "[ \t]*$") (goto-char opoint))))
(let ((case-fold-search t)) (let ((case-fold-search t)
;; Determine if parsing depth allows for secondary strings
;; parsing. It only applies to elements referenced in
;; `org-element-secondary-value-alist'.
(raw-secondary-p (and granularity (not (eq granularity 'object)))))
(cond (cond
;; Headline. ;; Headline.
((org-with-limited-levels (org-at-heading-p)) ((org-with-limited-levels (org-at-heading-p))
(org-element-headline-parser)) (org-element-headline-parser raw-secondary-p))
;; Quote section. ;; Quote section.
((eq special 'quote-section) (org-element-quote-section-parser)) ((eq special 'quote-section) (org-element-quote-section-parser))
;; Section. ;; Section.
@ -2734,15 +2770,20 @@ it is quicker than its counterpart, albeit more restrictive."
(if (save-excursion (if (save-excursion
(re-search-forward (re-search-forward
(format "[ \t]*#\\+END_%s\\(?: \\|$\\)" type) nil t)) (format "[ \t]*#\\+END_%s\\(?: \\|$\\)" type) nil t))
;; Build appropriate parser. ;; Build appropriate parser. `verse-block' type
(funcall ;; elements require an additional argument, so they
(intern ;; must be treated separately.
(format "org-element-%s-parser" (if (string= "VERSE" type)
(cdr (assoc type (org-element-verse-block-parser raw-secondary-p)
org-element-non-recursive-block-alist))))) (funcall
(intern
(format
"org-element-%s-parser"
(cdr (assoc type
org-element-non-recursive-block-alist))))))
(org-element-paragraph-parser))))) (org-element-paragraph-parser)))))
;; Inlinetask. ;; Inlinetask.
((org-at-heading-p) (org-element-inlinetask-parser)) ((org-at-heading-p) (org-element-inlinetask-parser raw-secondary-p))
;; LaTeX Environment or paragraph if incomplete. ;; LaTeX Environment or paragraph if incomplete.
((looking-at "^[ \t]*\\\\begin{") ((looking-at "^[ \t]*\\\\begin{")
(if (save-excursion (if (save-excursion
@ -2801,7 +2842,9 @@ it is quicker than its counterpart, albeit more restrictive."
;; List or item. ;; List or item.
((looking-at (org-item-re)) ((looking-at (org-item-re))
(if (eq special 'item) (if (eq special 'item)
(org-element-item-parser (or structure (org-list-struct))) (org-element-item-parser
(or structure (org-list-struct))
raw-secondary-p)
(org-element-plain-list-parser (or structure (org-list-struct))))) (org-element-plain-list-parser (or structure (org-list-struct)))))
;; Default element: Paragraph. ;; Default element: Paragraph.
(t (org-element-paragraph-parser)))))) (t (org-element-paragraph-parser))))))
@ -3099,14 +3142,8 @@ respectively, on quote sections, sections and items. Moreover,
when value is `item', STRUCTURE will be used as the current list when value is `item', STRUCTURE will be used as the current list
structure. structure.
GRANULARITY determines the depth of the recursion. It can be set GRANULARITY determines the depth of the recursion. See
to the following symbols: `org-element-parse-buffer' for more information.
`headline' Only parse headlines.
`greater-element' Don't recurse into greater elements. Thus,
elements parsed are the top-level ones.
`element' Parse everything but objects and plain text.
`object' or nil Parse the complete buffer.
When VISIBLE-ONLY is non-nil, don't parse contents of hidden When VISIBLE-ONLY is non-nil, don't parse contents of hidden
elements. elements.
@ -3125,7 +3162,10 @@ Elements are accumulated into ACC."
;; 1. Item mode is active: point must be at an item. Parse it ;; 1. Item mode is active: point must be at an item. Parse it
;; directly, skipping `org-element-current-element'. ;; directly, skipping `org-element-current-element'.
(if (eq special 'item) (if (eq special 'item)
(let ((element (org-element-item-parser structure))) (let ((element
(org-element-item-parser
structure
(and granularity (not (eq granularity 'object))))))
(goto-char (org-element-property :end element)) (goto-char (org-element-property :end element))
(org-element-parse-elements (org-element-parse-elements
(org-element-property :contents-begin element) (org-element-property :contents-begin element)
@ -3133,7 +3173,8 @@ Elements are accumulated into ACC."
nil structure granularity visible-only (reverse element))) nil structure granularity visible-only (reverse element)))
;; 2. When ITEM is nil, find current element's type and parse ;; 2. When ITEM is nil, find current element's type and parse
;; it accordingly to its category. ;; it accordingly to its category.
(let* ((element (org-element-current-element special structure)) (let* ((element (org-element-current-element
granularity special structure))
(type (org-element-type element))) (type (org-element-type element)))
(goto-char (org-element-property :end element)) (goto-char (org-element-property :end element))
(cond (cond
@ -3512,6 +3553,9 @@ indentation is not done with TAB characters."
;; basically jumps back to the beginning of section containing point ;; basically jumps back to the beginning of section containing point
;; and moves, element after element, with ;; and moves, element after element, with
;; `org-element-current-element' until the container is found. ;; `org-element-current-element' until the container is found.
;;
;; Note: When using `org-element-at-point', secondary values are never
;; parsed since the function focuses on elements, not on objects.
(defun org-element-at-point (&optional keep-trail) (defun org-element-at-point (&optional keep-trail)
"Determine closest element around point. "Determine closest element around point.
@ -3535,10 +3579,11 @@ contains that headline as its single element). Elements
in-between, if any, are siblings of the element at point." in-between, if any, are siblings of the element at point."
(org-with-wide-buffer (org-with-wide-buffer
;; If at an headline, parse it. It is the sole element that ;; If at an headline, parse it. It is the sole element that
;; doesn't require to know about context. ;; doesn't require to know about context. Be sure to disallow
;; secondary string parsing, though.
(if (org-with-limited-levels (org-at-heading-p)) (if (org-with-limited-levels (org-at-heading-p))
(if (not keep-trail) (org-element-headline-parser) (if (not keep-trail) (org-element-headline-parser t)
(list (org-element-headline-parser))) (list (org-element-headline-parser t)))
;; Otherwise move at the beginning of the section containing ;; Otherwise move at the beginning of the section containing
;; point. ;; point.
(let ((origin (point)) element type item-flag trail struct prevs) (let ((origin (point)) element type item-flag trail struct prevs)
@ -3553,7 +3598,7 @@ in-between, if any, are siblings of the element at point."
;; original position. ;; original position.
(catch 'exit (catch 'exit
(while t (while t
(setq element (org-element-current-element item-flag struct) (setq element (org-element-current-element 'element item-flag struct)
type (car element)) type (car element))
(when keep-trail (push element trail)) (when keep-trail (push element trail))
(cond (cond

View File

@ -203,6 +203,62 @@
(equal (org-element-property :label-fmt element) "[ref:%s]")))))) (equal (org-element-property :label-fmt element) "[ref:%s]"))))))
;;; Secondary strings
(ert-deftest test-org-element/secondary-string-parsing ()
"Test granularity correctly toggles secondary strings parsing."
;; 1. With a granularity bigger than `object', no secondary string
;; should be parsed.
;;
;; 1.1. Test with `headline' type.
(org-test-with-temp-text "* Headline"
(let ((headline
(org-element-map (org-element-parse-buffer 'headline) 'headline
'identity
nil
'first-match)))
(should (stringp (org-element-property :title headline)))))
;; 1.2. Test with `item' type.
(org-test-with-temp-text "* Headline\n- tag :: item"
(let ((item (org-element-map (org-element-parse-buffer 'element)
'item
'identity
nil
'first-match)))
(should (stringp (org-element-property :tag item)))))
;; 1.3. Test with `verse-block' type.
(org-test-with-temp-text "#+BEGIN_VERSE\nTest\n#+END_VERSE"
(let ((verse-block (org-element-map (org-element-parse-buffer 'element)
'verse-block
'identity
nil
'first-match)))
(should (stringp (org-element-property :value verse-block)))))
;; 1.4. Test with `inlinetask' type, if avalaible.
(when (featurep 'org-inlinetask)
(let ((org-inlinetask-min-level 15))
(org-test-with-temp-text "*************** Inlinetask"
(let ((inlinetask (org-element-map (org-element-parse-buffer 'element)
'inlinetask
'identity
nil
'first-match)))
(should (stringp (org-element-property :title inlinetask)))))))
;; 2. With a default granularity, secondary strings should be
;; parsed.
(org-test-with-temp-text "* Headline"
(let ((headline
(org-element-map (org-element-parse-buffer) 'headline
'identity
nil
'first-match)))
(should (listp (org-element-property :title headline)))))
;; 3. `org-element-at-point' should never parse a secondary string.
(org-test-with-temp-text "* Headline"
(should (stringp (org-element-property :title (org-element-at-point))))))
;;; Navigation tools. ;;; Navigation tools.