From eed0500913a3534a32abfd5864cf674d9d0cdf64 Mon Sep 17 00:00:00 2001 From: Nicolas Goaziou Date: Fri, 31 Jan 2014 01:14:44 +0100 Subject: [PATCH] org-element: Implement lazy cache synchronization * lisp/org-element.el (org-element-cache-sync-idle-time): Change default value. (org-element-cache-sync-duration, org-element-cache-sync-break, org-element--cache-sync-requests, org-element--cache-sync-timer, org-element--cache-sync-keys, org-element--cache-default-key, org-element--cache-change-warning): New variables. (org-element-cache-merge-changes-threshold, org-element--cache-status): Removed variables. (org-element--cache-key, org-element--cache-generate-key, org-element--cache-key-less-p, org-element--cache-find, org-element--cache-set-timer, org-element--cache-process-request, org-element--cache-submit-request, org-element--parse-to, org-element--cache-interrupt-p, org-element--cache-put, org-element--cache-active-p): New functions. (org-element--cache-compare): Adapt to new keys in AVL tree. (org-element--cache-pending-changes-p, org-element--cache-cancel-changes, org-element--cache-mapc, org-element-cache-get, org-element-cache-put): Removed functions. (org-element--cache-before-change): Use new variables. (org-element--cache-after-change): Renamed from `org-element--cache-record-change'. (org-element-cache-get): Change signature. (org-element-cache-put): Rewrite function. Use new tools. (org-element-cache-reset): Adapt to new variables. (org-element--cache-sync): Rewrite function. * lisp/ox.el (org-export--generate-copy-script): Do not copy through new cache-related variables. (org-export-ignored-local-variables): New variable. * testing/lisp/test-org-element.el (test-org-element/cache): New test. Now only the part of the cache that needs to be accessed is updated synchronously. Otherwise, it happens on idle time. --- lisp/org-element.el | 1685 ++++++++++++++++++------------ lisp/ox.el | 16 +- testing/lisp/test-org-element.el | 111 ++ 3 files changed, 1140 insertions(+), 672 deletions(-) diff --git a/lisp/org-element.el b/lisp/org-element.el index 387af76c6..55eb78fc2 100644 --- a/lisp/org-element.el +++ b/lisp/org-element.el @@ -109,8 +109,8 @@ ;; ;; The library ends by furnishing `org-element-at-point' function, and ;; a way to give information about document structure around point -;; with `org-element-context'. A simple cache mechanism is also -;; provided for these functions. +;; with `org-element-context'. A cache mechanism is also provided for +;; these functions. ;;; Code: @@ -4645,6 +4645,1002 @@ indentation is not done with TAB characters." (funcall build element (not ignore-first)))))) + +;;; Cache +;; +;; Implement a caching mechanism for `org-element-at-point' and +;; `org-element-context', which see. +;; +;; A single public function is provided: `org-element-cache-reset'. +;; +;; Cache is enabled by default, but can be disabled globally with +;; `org-element-use-cache'. `org-element-cache-sync-idle-time', +;; org-element-cache-sync-duration' and `org-element-cache-sync-break' +;; can be tweaked to control caching behaviour. +;; +;; Internally, parsed elements are stored in an AVL tree, +;; `org-element--cache'. This tree is updated lazily: whenever +;; a change happens to the buffer, a synchronization request is +;; registered in `org-element--cache-sync-requests' (see +;; `org-element--cache-submit-request'). During idle time, requests +;; are processed by `org-element--cache-sync'. Synchronization also +;; happens when an element is required from the cache. In this case, +;; the process stops as soon as the needed element is up-to-date. +;; +;; A synchronization request can only apply on a synchronized part of +;; the cache. Therefore, the cache is updated at least to the +;; location where the new request applies. Thus, requests are ordered +;; from left to right and all elements starting before the first +;; request are correct. This property is used by functions like +;; `org-element--cache-find' to retrieve elements in the part of the +;; cache that can be trusted. +;; +;; A request applies to every element, starting from its original +;; location (or key, see below). When a request is processed, it +;; moves forward and may collide the next one. In this case, both +;; requests are merged into a new one that starts from that element. +;; As a consequence, the whole synchronization complexity does not +;; depend on the number of pending requests, but on the number of +;; elements the very first request will be applied on. +;; +;; Elements cannot be accessed through their beginning position, which +;; may or may not be up-to-date. Instead, each element in the tree is +;; associated to a key, obtained with `org-element--cache-key'. This +;; mechanism is robust enough to preserve total order among elements +;; even when the tree is only partially synchronized. +;; +;; Objects contained in an element are stored in a hash table, +;; `org-element--cache-objects'. + + +(defvar org-element-use-cache t + "Non nil when Org parser should cache its results. +This is mostly for debugging purpose.") + +(defvar org-element-cache-sync-idle-time 0.4 + "Length, in seconds, of idle time before syncing cache.") + +(defvar org-element-cache-sync-duration (seconds-to-time 0.04) + "Maximum duration, as a time value, for a cache synchronization. +If the synchronization is not over after this delay, the process +pauses and resumes after `org-element-cache-sync-break' +seconds.") + +(defvar org-element-cache-sync-break (seconds-to-time 0.2) + "Duration, as a time value, of the pause between synchronizations. +See `org-element-cache-sync-duration' for more information.") + + +;;;; Data Structure + +(defvar org-element--cache nil + "AVL tree used to cache elements. +Each node of the tree contains an element. Comparison is done +with `org-element--cache-compare'. This cache is used in +`org-element-at-point'.") + +(defvar org-element--cache-objects nil + "Hash table used as to cache objects. +Key is an element, as returned by `org-element-at-point', and +value is an alist where each association is: + + \(POS CANDIDATES . OBJECTS) + +where POS is a buffer position, CANDIDATES is the last know list +of successors (see `org-element--get-next-object-candidates') in +container starting at POS and OBJECTS is a list of objects known +to live within that container, from farthest to closest. + +In the following example, \\alpha, bold object and \\beta start +at, respectively, positions 1, 7 and 8, + + \\alpha *\\beta* + +If the paragraph is completely parsed, OBJECTS-DATA will be + + \((1 nil BOLD-OBJECT ENTITY-OBJECT) + \(8 nil ENTITY-OBJECT)) + +whereas in a partially parsed paragraph, it could be + + \((1 ((entity . 1) (bold . 7)) ENTITY-OBJECT)) + +This cache is used in `org-element-context'.") + +(defvar org-element--cache-sync-requests nil + "List of pending synchronization requests. + +A request is a vector with the following pattern: + + \[NEXT END OFFSET PARENT PHASE] + +Processing a synchronization request consists in three phases: + + 0. Delete modified elements, + 1. Fill missing area in cache, + 2. Shift positions and re-parent elements after the changes. + +During phase 0, NEXT is the key of the first element to be +removed and END is buffer position delimiting the modifications. +Every element starting between these are removed. PARENT is an +element to be removed. Every element contained in it will also +be removed. + +During phase 1, NEXT is the key of the next known element in +cache. Parse buffer between that element and the one before it +in order to determine the parent of the next element. Set PARENT +to the element containing NEXT. + +During phase 2, NEXT is the key of the next element to shift in +the parse tree. All elements starting from this one have their +properties relatives to buffer positions shifted by integer +OFFSET and, if they belong to element PARENT, are adopted by it. + +PHASE specifies the phase number, as an integer.") + +(defvar org-element--cache-sync-timer nil + "Timer used for cache synchronization.") + +(defvar org-element--cache-sync-keys nil + "Hash table used to store keys during synchronization. +See `org-element--cache-key' for more information.") + +(defsubst org-element--cache-key (element) + "Return a unique key for ELEMENT in cache tree. + +Keys are used to keep a total order among elements in the cache. +Comparison is done with `org-element--cache-key-less-p'. + +When no synchronization is taking place, a key is simply the +beginning position of the element, or that position plus one in +the case of an first item (respectively row) in +a list (respectively a table). + +During a synchronization, the key is the one the element had when +the cache was synchronized for the last time. Elements added to +cache during the synchronization get a new key generated with +`org-element--cache-generate-key'. + +Such keys are stored in `org-element--cache-sync-keys'. The hash +table is cleared once the synchronization is complete." + (or (gethash element org-element--cache-sync-keys) + (let* ((begin (org-element-property :begin element)) + ;; Increase beginning position of items (respectively + ;; table rows) by one, so the first item can get + ;; a different key from its parent list (respectively + ;; table). + (key (if (memq (org-element-type element) '(item table-row)) + (1+ begin) + begin))) + (if org-element--cache-sync-requests + (puthash element key org-element--cache-sync-keys) + key)))) + +(defconst org-element--cache-default-key (ash most-positive-fixnum -1) + "Default value for a new key level. +See `org-element--cache-generate-key' for more information.") + +(defun org-element--cache-generate-key (lower upper) + "Generate a key between LOWER and UPPER. + +LOWER and UPPER are integers or lists, possibly empty. + +If LOWER and UPPER are equals, return LOWER. Otherwise, return +a unique key, as an integer or a list of integers, according to +the following rules: + + - LOWER and UPPER are compared level-wise until values differ. + + - If, at a given level, LOWER and UPPER differ from more than + 2, the new key shares all the levels above with LOWER and + gets a new level. Its value is the mean between LOWER and + UPPER. + + \(1 2) + (1 4) --> (1 3) + + - If LOWER has no value to compare with, it is assumed that its + value is 0: + + \(1 1) + (1 1 2) --> (1 1 1) + + Likewise, if UPPER is short of levels, the current value is + `most-positive-fixnum'. + + - If they differ from only one, the new key inherits from + current LOWER lever and has a new level at the value + `org-element--cache-default-key'. + + \(1 2) + (1 3) --> (1 2 org-element--cache-default-key) + + - If the key is only one level long, it is returned as an + integer. + + \(1 2) + (3 2) --> 2" + (if (equal lower upper) lower + (let ((lower (if (integerp lower) (list lower) lower)) + (upper (if (integerp upper) (list upper) upper)) + key) + (catch 'exit + (while (and lower upper) + (let ((lower-level (car lower)) + (upper-level (car upper))) + (cond + ((= lower-level upper-level) + (push lower-level key) + (setq lower (cdr lower) upper (cdr upper))) + ((= (- upper-level lower-level) 1) + (push lower-level key) + (setq lower (cdr lower)) + (while (and lower (= (car lower) most-positive-fixnum)) + (push most-positive-fixnum key) + (setq lower (cdr lower))) + (push (if lower + (let ((n (car lower))) + (+ (ash (if (zerop (mod n 2)) n (1+ n)) -1) + org-element--cache-default-key)) + org-element--cache-default-key) + key) + (throw 'exit t)) + (t + (push (let ((n (car lower))) + (+ (ash (if (zerop (mod n 2)) n (1+ n)) -1) + (ash (car upper) -1))) + key) + (throw 'exit t))))) + (cond + ((not lower) + (while (and upper (zerop (car upper))) + (push 0 key) + (setq upper (cdr upper))) + ;; (n) is equivalent to (n 0 0 0 0 ...) so we want to avoid + ;; ending on a sequence of 0. + (if (= (car upper) 1) + (progn (push 0 key) + (push org-element--cache-default-key key)) + (push (if upper (ash (car upper) -1) org-element--cache-default-key) + key))) + ((not upper) + (while (and lower (= (car lower) most-positive-fixnum)) + (push most-positive-fixnum key) + (setq lower (cdr lower))) + (push (if lower + (let ((n (car lower))) + (+ (ash (if (zerop (mod n 2)) n (1+ n)) -1) + org-element--cache-default-key)) + org-element--cache-default-key) + key)))) + ;; Ensure we don't return a list with a single element. + (if (cdr key) (nreverse key) (car key))))) + +(defsubst org-element--cache-key-less-p (a b) + "Non-nil if key A is less than key B. +A and B are either integers or lists of integers, as returned by +`org-element--cache-key'." + (if (integerp a) (if (integerp b) (< a b) (<= a (car b))) + (if (integerp b) (< (car a) b) + (catch 'exit + (while (and a b) + (cond ((car-less-than-car a b) (throw 'exit t)) + ((car-less-than-car b a) (throw 'exit nil)) + (t (setq a (cdr a) b (cdr b))))) + ;; If A is empty, either keys are equal (B is also empty) or + ;; B is less than A (B is longer). Therefore return nil. + ;; + ;; If A is not empty, B is necessarily empty and A is less + ;; than B (A is longer). Therefore, return a non-nil value. + a)))) + +(defun org-element--cache-compare (a b) + "Non-nil when element A is located before element B." + (org-element--cache-key-less-p (org-element--cache-key a) + (org-element--cache-key b))) + +(defsubst org-element--cache-root () + "Return root value in cache. +This function assumes `org-element--cache' is a valid AVL tree." + (avl-tree--node-left (avl-tree--dummyroot org-element--cache))) + + +;;;; Tools + +(defsubst org-element--cache-active-p () + "Non-nil when cache is active in current buffer." + (and org-element-use-cache + (or (derived-mode-p 'org-mode) orgstruct-mode))) + +(defun org-element--cache-find (pos &optional side) + "Find element in cache starting at POS or before. + +POS refers to a buffer position. + +When optional argument SIDE is non-nil, the function checks for +elements starting at or past POS instead. If SIDE is `both', the +function returns a cons cell where car is the first element +starting at or before POS and cdr the first element starting +after POS. + +The function can only find elements in the synchronized part of +the cache." + (let ((limit (and org-element--cache-sync-requests + (aref (car org-element--cache-sync-requests) 0))) + (node (org-element--cache-root)) + lower upper) + (while node + (let* ((element (avl-tree--node-data node)) + (begin (org-element-property :begin element))) + (cond + ((and limit + (not (org-element--cache-key-less-p + (org-element--cache-key element) limit))) + (setq node (avl-tree--node-left node))) + ((> begin pos) + (setq upper element + node (avl-tree--node-left node))) + ((< begin pos) + (setq lower element + node (avl-tree--node-right node))) + ;; We found an element in cache starting at POS. If `side' + ;; is `both' we also want the next one in order to generate + ;; a key in-between. + ;; + ;; If the element is the first row or item in a table or + ;; a plain list, we always return the table or the plain + ;; list. + ;; + ;; In any other case, we return the element found. + ((eq side 'both) + (setq lower element) + (setq node (avl-tree--node-right node))) + ((and (memq (org-element-type element) '(item table-row)) + (let ((parent (org-element-property :parent element))) + (and (= (org-element-property :begin element) + (org-element-property :contents-begin parent)) + (setq node nil + lower parent + upper parent))))) + (t + (setq node nil + lower element + upper element))))) + (case side + (both (cons lower upper)) + ((nil) lower) + (otherwise upper)))) + +(defun org-element--cache-put (element &optional data) + "Store ELEMENT in current buffer's cache, if allowed. +When optional argument DATA is non-nil, assume is it object data +relative to ELEMENT and store it in the objects cache. This +function does nothing if `org-element-use-cache' is nil." + (when org-element-use-cache + (if data (puthash element data org-element--cache-objects) + (when org-element--cache-sync-requests + ;; During synchronization, first build an appropriate key for + ;; the new element so `avl-tree-enter' can insert it at the + ;; right spot in the cache. + (let ((keys (org-element--cache-find + (org-element-property :begin element) 'both))) + (puthash element + (org-element--cache-generate-key + (and (car keys) (org-element--cache-key (car keys))) + (cond ((cdr keys) (org-element--cache-key (cdr keys))) + (org-element--cache-sync-requests + (aref (car org-element--cache-sync-requests) 0)))) + org-element--cache-sync-keys))) + (avl-tree-enter org-element--cache element)))) + + +;;;; Synchronization + +(defsubst org-element--cache-set-timer (buffer) + "Set idle timer for cache synchronization in BUFFER." + (when org-element--cache-sync-timer + (cancel-timer org-element--cache-sync-timer)) + (setq org-element--cache-sync-timer + (run-with-idle-timer + (let ((idle (current-idle-time))) + (if idle (time-add idle org-element-cache-sync-break) + org-element-cache-sync-idle-time)) + nil + #'org-element--cache-sync + buffer))) + +(defsubst org-element--cache-interrupt-p (time-limit) + "Non-nil when synchronization process should be interrupted. +TIME-LIMIT is a time value or nil." + (and time-limit + (or (input-pending-p) + (time-less-p time-limit (current-time))))) + +(defsubst org-element--cache-shift-positions (element offset &optional props) + "Shift ELEMENT properties relative to buffer positions by OFFSET. + +Properties containing buffer positions are `:begin', `:end', +`:contents-begin', `:contents-end' and `:structure'. When +optional argument PROPS is a list of keywords, only shift +properties provided in that list. + +Properties are modified by side-effect." + (let ((properties (nth 1 element))) + ;; Shift `:structure' property for the first plain list only: it + ;; is the only one that really matters and it prevents from + ;; shifting it more than once. + (when (and (or (not props) (memq :structure props)) + (eq (org-element-type element) 'plain-list) + (not (eq (org-element-type (plist-get properties :parent)) + 'item))) + (dolist (item (plist-get properties :structure)) + (incf (car item) offset) + (incf (nth 6 item) offset))) + (dolist (key '(:begin :contents-begin :contents-end :end :post-affiliated)) + (let ((value (and (or (not props) (memq key props)) + (plist-get properties key)))) + (and value (plist-put properties key (+ offset value))))))) + +(defun org-element--cache-sync (buffer &optional threshold) + "Synchronize cache with recent modification in BUFFER. +When optional argument THRESHOLD is non-nil, do the +synchronization for all elements starting before or at threshold, +then exit. Otherwise, synchronize cache for as long as +`org-element-cache-sync-duration' or until Emacs leaves idle +state." + (when (buffer-live-p buffer) + (with-current-buffer buffer + (let ((inhibit-quit t) request next) + (when org-element--cache-sync-timer + (cancel-timer org-element--cache-sync-timer)) + (catch 'interrupt + (while org-element--cache-sync-requests + (setq request (car org-element--cache-sync-requests) + next (nth 1 org-element--cache-sync-requests)) + (or (org-element--cache-process-request + request + (and next (aref next 0)) + threshold + (and (not threshold) + (time-add (current-time) + org-element-cache-sync-duration))) + (throw 'interrupt t)) + ;; Request processed. Merge current and next offsets and + ;; transfer phase number and ending position. + (when next + (incf (aref next 2) (aref request 2)) + (aset next 1 (aref request 1)) + (aset next 4 (aref request 4))) + (setq org-element--cache-sync-requests + (cdr org-element--cache-sync-requests)))) + ;; If more requests are awaiting, set idle timer accordingly. + ;; Otherwise, reset keys. + (if org-element--cache-sync-requests + (org-element--cache-set-timer buffer) + (clrhash org-element--cache-sync-keys)))))) + +(defun org-element--cache-process-request (request next threshold time-limit) + "Process synchronization REQUEST for all entries before NEXT. + +REQUEST is a vector, built by `org-element--cache-submit-request'. + +NEXT is a cache key, as returned by `org-element--cache-key'. + +When non-nil, THRESHOLD is a buffer position. Synchronization +stops as soon as a shifted element begins after it. + +When non-nil, TIME-LIMIT is a time value. Synchronization stops +after this time or when Emacs exits idle state. + +Return nil if the process stops before completing the request, +t otherwise." + (catch 'quit + (when (= (aref request 4) 0) + ;; Phase 1. + ;; + ;; Delete all elements starting after BEG, but not after buffer + ;; position END or past element with key NEXT. + ;; + ;; As an exception, also delete elements starting after + ;; modifications but included in an element altered by + ;; modifications (orphans). + ;; + ;; At each iteration, we start again at tree root since + ;; a deletion modifies structure of the balanced tree. + (catch 'end-phase + (let ((beg (aref request 0)) + (end (aref request 1)) + (deleted-parent (aref request 3))) + (while t + (when (org-element--cache-interrupt-p time-limit) + (aset request 3 deleted-parent) + (throw 'quit nil)) + ;; Find first element in cache with key BEG or after it. + ;; We don't use `org-element--cache-find' because it + ;; couldn't reach orphaned elements past NEXT. Moreover, + ;; BEG is a key, not a buffer position. + (let ((node (org-element--cache-root)) data data-key) + (while node + (let* ((element (avl-tree--node-data node)) + (key (org-element--cache-key element))) + (cond + ((org-element--cache-key-less-p key beg) + (setq node (avl-tree--node-right node))) + ((org-element--cache-key-less-p beg key) + (setq data element + data-key key + node (avl-tree--node-left node))) + (t (setq data element + data-key key + node nil))))) + (if (not data) (throw 'quit t) + (let ((pos (org-element-property :begin data))) + (cond + ;; Remove orphaned elements. + ((and deleted-parent + (let ((up data)) + (while (and + (setq up (org-element-property :parent up)) + (not (eq up deleted-parent)))) + up)) + (avl-tree-delete org-element--cache data)) + ((or (and next + (not (org-element--cache-key-less-p data-key + next))) + (> pos end)) + (aset request 0 data-key) + (aset request 1 pos) + (aset request 4 1) + (throw 'end-phase nil)) + (t (avl-tree-delete org-element--cache data) + (when (= (org-element-property :end data) end) + (setq deleted-parent data))))))))))) + (when (= (aref request 4) 1) + ;; Phase 2. + ;; + ;; Phase 1 left a hole in the parse tree. Some elements after + ;; it could have parents within. For example, in the following + ;; buffer: + ;; + ;; + ;; - item + ;; + ;; + ;; Paragraph1 + ;; + ;; Paragraph2 + ;; + ;; + ;; if we remove a blank line between "item" and "Paragraph1", + ;; everything down to "Paragraph2" is removed from cache. But + ;; the paragraph now belongs to the list, and its `:parent' + ;; property no longer is accurate. + ;; + ;; Therefore we need to parse again elements in the hole, or at + ;; least in its last section, so that we can re-parent + ;; subsequent elements, during phase 3. + ;; + ;; Note that we only need to get the parent from the first + ;; element in cache after the hole. + ;; + ;; Also, this part can be delayed if we don't need to retrieve + ;; an element after the hole. + (catch 'end-phase + ;; Next element will start at its beginning position plus + ;; offset, since it hasn't been shifted yet. Therefore, LIMIT + ;; contains the real beginning position of the first element + ;; to shift and re-parent. + (when (equal (aref request 0) next) (throw 'quit t)) + (let ((limit (+ (aref request 1) (aref request 2)))) + (when (and threshold (< threshold limit)) (throw 'quit nil)) + (let ((parent (org-element--parse-to limit t time-limit))) + (if (eq parent 'interrupted) (throw 'quit nil) + (aset request 3 parent) + (aset request 4 2) + (throw 'end-phase nil)))))) + ;; Phase 3. + ;; + ;; Shift all elements starting from key START, but before NEXT, by + ;; OFFSET, and re-parent them when appropriate. + ;; + ;; Elements are modified by side-effect so the tree structure + ;; remains intact. + ;; + ;; Once THRESHOLD, if any, is reached, or once there is an input + ;; pending, exit. Before leaving, the current synchronization + ;; request is updated. + (let ((start (aref request 0)) + (offset (aref request 2)) + (parent (aref request 3)) + (node (org-element--cache-root)) + (stack (list nil)) + (leftp t) + exit-flag) + ;; No re-parenting nor shifting planned: request is over. + (when (and (not parent) (zerop offset)) (throw 'quit t)) + (while node + (let* ((data (avl-tree--node-data node)) + (key (org-element--cache-key data))) + (if (and leftp (avl-tree--node-left node) + (not (org-element--cache-key-less-p key start))) + (progn (push node stack) + (setq node (avl-tree--node-left node))) + (unless (org-element--cache-key-less-p key start) + ;; We reached NEXT. Request is complete. + (when (equal key next) (throw 'quit t)) + ;; Handle interruption request. Update current request. + (when (or exit-flag (org-element--cache-interrupt-p time-limit)) + (aset request 0 key) + (aset request 3 parent) + (throw 'quit nil)) + ;; Shift element. + (unless (zerop offset) + (org-element--cache-shift-positions data offset) + ;; Shift associated objects data, if any. + (dolist (object-data (gethash data org-element--cache-objects)) + (incf (car object-data) offset) + (dolist (successor (nth 1 object-data)) + (incf (cdr successor) offset)) + (dolist (object (cddr object-data)) + (org-element--cache-shift-positions object offset)))) + (let ((begin (org-element-property :begin data))) + ;; Re-parent it. + (while (and parent + (<= (org-element-property :end parent) begin)) + (setq parent (org-element-property :parent parent))) + (cond (parent (org-element-put-property data :parent parent)) + ((zerop offset) (throw 'quit t))) + ;; Cache is up-to-date past THRESHOLD. Request + ;; interruption. + (when (and threshold (> begin threshold)) (setq exit-flag t)))) + (setq node (if (setq leftp (avl-tree--node-right node)) + (avl-tree--node-right node) + (pop stack)))))) + ;; We reached end of tree: synchronization complete. + t))) + +(defun org-element--parse-to (pos &optional syncp time-limit) + "Parse elements in current section, down to POS. + +Start parsing from the closest between the last known element in +cache or headline above. Return the smallest element containing +POS. + +When optional argument SYNCP is non-nil, return the parent of the +element containing POS instead. In that case, it is also +possible to provide TIME-LIMIT, which is a time value specifying +when the parsing should stop. The function returns `interrupted' +if the process stopped before finding the expected result." + (catch 'exit + (org-with-wide-buffer + (goto-char pos) + (let* ((cached (and (org-element--cache-active-p) + (org-element--cache-find pos nil))) + (begin (org-element-property :begin cached)) + element next) + (cond + ;; Nothing in cache before point: start parsing from first + ;; element following headline above, or first element in + ;; buffer. + ((not cached) + (when (org-with-limited-levels (outline-previous-heading)) + (forward-line)) + (skip-chars-forward " \r\t\n") + (beginning-of-line)) + ;; Cache returned exact match: return it. + ((= pos begin) + (throw 'exit (if syncp (org-element-property :parent cached) cached))) + ;; There's a headline between cached value and POS: cached + ;; value is invalid. Start parsing from first element + ;; following the headline. + ((re-search-backward + (org-with-limited-levels org-outline-regexp-bol) begin t) + (forward-line) + (skip-chars-forward " \r\t\n") + (beginning-of-line)) + ;; Check if CACHED or any of its ancestors contain point. + ;; + ;; If there is such an element, we inspect it in order to know + ;; if we return it or if we need to parse its contents. + ;; Otherwise, we just start parsing from current location, + ;; which is right after the top-most element containing + ;; CACHED. + ;; + ;; As a special case, if POS is at the end of the buffer, we + ;; want to return the innermost element ending there. + ;; + ;; Also, if we find an ancestor and discover that we need to + ;; parse its contents, make sure we don't start from + ;; `:contents-begin', as we would otherwise go past CACHED + ;; again. Instead, in that situation, we will resume parsing + ;; from NEXT, which is located after CACHED or its higher + ;; ancestor not containing point. + (t + (let ((up cached) + (pos (if (= (point-max) pos) (1- pos) pos))) + (goto-char (or (org-element-property :contents-begin cached) begin)) + (while (let ((end (org-element-property :end up))) + (and (<= end pos) + (goto-char end) + (setq up (org-element-property :parent up))))) + (cond ((not up)) + ((eobp) (setq element up)) + (t (setq element up next (point))))))) + ;; Nothing to parse before POS in order to know the result: + ;; return current parent, if any. + (when (and syncp (= (point) pos)) (throw 'exit element)) + ;; Parse successively each element until we reach POS. + (let ((end (or (org-element-property :end element) + (save-excursion + (org-with-limited-levels (outline-next-heading)) + (point)))) + parent special-flag) + (while t + ;; Break requested: return `interrupted' so there is no + ;; confusion where there is no parent (nil value). + (when (and syncp (org-element--cache-interrupt-p time-limit)) + (throw 'exit 'interrupted)) + (unless element + (setq element (org-element--current-element + end 'element special-flag + (org-element-property :structure parent))) + (org-element-put-property element :parent parent) + (org-element--cache-put element)) + (let ((elem-end (org-element-property :end element)) + (type (org-element-type element))) + (cond + ((and syncp (= elem-end pos)) (throw 'exit parent)) + ;; Special case: POS is at the end of the buffer and + ;; CACHED ends here. No element can start after it, but + ;; more than one may end there. Arbitrarily, we choose + ;; to return the innermost of such elements. This + ;; cannot happen when SYNCP is non-nil. + ((and (not syncp) (= (point-max) pos) (= pos elem-end)) + (let ((cend (org-element-property :contents-end element))) + (if (or (not (memq type org-element-greater-elements)) + (not cend) + (< cend pos)) + (throw 'exit element) + (goto-char + (or next (org-element-property :contents-begin element))) + (setq special-flag (case type + (plain-list 'item) + (property-drawer 'node-property) + (table 'table-row)) + parent element + end cend)))) + ;; Skip any element ending before point. Also skip + ;; element ending at point since we're sure that another + ;; element begins after it. + ((<= elem-end pos) (goto-char elem-end)) + ;; A non-greater element contains point: return it. + ;; This cannot happen when SYNCP is non-nil. + ((not (or syncp (memq type org-element-greater-elements))) + (throw 'exit element)) + ;; Otherwise, we have to decide if ELEMENT really + ;; contains POS. In that case we start parsing from + ;; contents' beginning. Otherwise we return UP as it is + ;; the smallest element containing POS. + ;; + ;; There is a special cases to consider, though. If POS + ;; is at contents' beginning but it is also at the + ;; beginning of the first item in a list or a table. In + ;; that case, we need to create an anchor for that list + ;; or table, so return it. + (t + (let ((cbeg (org-element-property :contents-begin element)) + (cend (org-element-property :contents-end element))) + (if (or (not cbeg) (not cend) (> cbeg pos) (<= cend pos) + (and (= cbeg pos) (memq type '(plain-list table)))) + (throw 'exit element) + (goto-char (or next cbeg)) + (setq special-flag (case type + (plain-list 'item) + (property-drawer 'node-property) + (table 'table-row)) + parent element + end cend)))))) + ;; Continue parsing buffer contents from new position. + (setq element nil next nil))))))) + + +;;;; Staging Buffer Changes + +(defconst org-element--cache-opening-line + (concat "^[ \t]*\\(?:" + "#\\+BEGIN[:_]" "\\|" + "\\\\begin{[A-Za-z0-9]+\\*?}" "\\|" + ":\\S-+:[ \t]*$" + "\\)") + "Regexp matching an element opening line. +When such a line is modified, modifications may propagate after +modified area. In that situation, every element between that +area and next section is removed from cache.") + +(defconst org-element--cache-closing-line + (concat "^[ \t]*\\(?:" + "#\\+END\\(?:_\\|:?[ \t]*$\\)" "\\|" + "\\\\end{[A-Za-z0-9]+\\*?}[ \t]*$" "\\|" + ":END:[ \t]*$" + "\\)") + "Regexp matching an element closing line. +When such a line is modified, modifications may propagate before +modified area. In that situation, every element between that +area and previous section is removed from cache.") + +(defvar org-element--cache-change-warning nil + "Non-nil when a sensitive line is about to be changed. +It is a symbol among nil, t and `headline'.") + +(defun org-element--cache-before-change (beg end) + "Request extension of area going to be modified if needed. +BEG and END are the beginning and end of the range of changed +text. See `before-change-functions' for more information." + (let ((inhibit-quit t)) + ;; Make sure buffer positions in cache are correct until END. + (org-element--cache-sync (current-buffer) end) + (org-with-wide-buffer + (goto-char beg) + (beginning-of-line) + (let ((top (point)) + (bottom (save-excursion (goto-char end) (line-end-position))) + (sensitive-re + ;; A sensitive line is a headline or a block (or drawer, + ;; or latex-environment) boundary. Inserting one can + ;; modify buffer drastically both above and below that + ;; line, possibly making cache invalid. Therefore, we + ;; need to pay attention to changes happening to them. + (concat + "\\(" (org-with-limited-levels org-outline-regexp-bol) "\\)" "\\|" + org-element--cache-closing-line "\\|" + org-element--cache-opening-line))) + (save-match-data + (setq org-element--cache-change-warning + (cond ((not (re-search-forward sensitive-re bottom t)) nil) + ((and (match-beginning 1) + (progn (goto-char bottom) + (or (not (re-search-backward sensitive-re + (match-end 1) t)) + (match-beginning 1)))) + 'headline) + (t)))))))) + +(defun org-element--cache-after-change (beg end pre) + "Update buffer modifications for current buffer. +BEG and END are the beginning and end of the range of changed +text, and the length in bytes of the pre-change text replaced by +that range. See `after-change-functions' for more information." + (let ((inhibit-quit t)) + (when (org-element--cache-active-p) + (org-with-wide-buffer + (goto-char beg) + (beginning-of-line) + (let ((top (point)) + (bottom (save-excursion (goto-char end) (line-end-position)))) + (org-with-limited-levels + (save-match-data + ;; Determine if modified area needs to be extended, + ;; according to both previous and current state. We make + ;; a special case for headline editing: if a headline is + ;; modified but not removed, do not extend. + (when (let ((previous-state org-element--cache-change-warning) + (sensitive-re + (concat "\\(" org-outline-regexp-bol "\\)" "\\|" + org-element--cache-closing-line "\\|" + org-element--cache-opening-line)) + (case-fold-search t)) + (cond ((eq previous-state t)) + ((not (re-search-forward sensitive-re bottom t)) + (eq previous-state 'headline)) + ((match-beginning 1) + (or (not (eq previous-state 'headline)) + (and (progn (goto-char bottom) + (re-search-backward + sensitive-re (match-end 1) t)) + (not (match-beginning 1))))) + (t))) + ;; Effectively extend modified area. + (setq top (progn (goto-char top) + (when (outline-previous-heading) (forward-line)) + (point))) + (setq bottom (progn (goto-char bottom) + (if (outline-next-heading) (1- (point)) + (point))))))) + ;; Store synchronization request. + (let ((offset (- end beg pre))) + (org-element--cache-submit-request top (- bottom offset) offset)))) + ;; Activate a timer to process the request during idle time. + (org-element--cache-set-timer (current-buffer))))) + +(defun org-element--cache-submit-request (beg end offset) + "Submit a new cache synchronization request for current buffer. +BEG and END are buffer positions delimiting the minimal area +where cache data should be removed. OFFSET is the size of the +change, as an integer." + (let ((first-element + ;; Find the position of the first element in cache to remove. + ;; + ;; Partially modified elements will be removed during request + ;; processing. As an exception, greater elements around the + ;; changes that are robust to contents modifications are + ;; preserved. + ;; + ;; We look just before BEG because an element ending at BEG + ;; needs to be removed too. + (let* ((elements (org-element--cache-find (1- beg) 'both)) + (before (car elements)) + (after (cdr elements))) + (if (not before) after + (let ((up before)) + (while (setq up (org-element-property :parent up)) + (if (and (memq (org-element-type up) + '(center-block + drawer dynamic-block inlinetask + property-drawer quote-block special-block)) + (<= (org-element-property :contents-begin up) beg) + (> (org-element-property :contents-end up) end)) + ;; UP is a greater element that is wrapped around + ;; the changes. We only need to extend its + ;; ending boundaries and those of all its + ;; parents. + (while up + (org-element--cache-shift-positions + up offset '(:contents-end :end)) + (setq up (org-element-property :parent up))) + (setq before up))) + ;; We're at top level element containing ELEMENT: if + ;; it's altered by buffer modifications, it is first + ;; element in cache to be removed. Otherwise, that + ;; first element is the following one. + (if (< (org-element-property :end before) beg) after before)))))) + (cond + ;; Changes happened before the first known element. Shift the + ;; rest of the cache. + ((and first-element (> (org-element-property :begin first-element) end)) + (push (vector (org-element--cache-key first-element) nil offset nil 2) + org-element--cache-sync-requests)) + ;; There is at least an element to remove. Find position past + ;; every element containing END. + (first-element + (if (> (org-element-property :end first-element) end) + (setq end (org-element-property :end first-element)) + (let ((element (org-element--cache-find end))) + (setq end (org-element-property :end element)) + (let ((up element)) + (while (and (setq up (org-element-property :parent up)) + (>= (org-element-property :begin up) beg)) + (setq end (org-element-property :end up)))))) + (push (vector (org-element--cache-key first-element) end offset nil 0) + org-element--cache-sync-requests)) + ;; No element to remove. No need to re-parent either. Simply + ;; shift additional elements, if any, by OFFSET. + (org-element--cache-sync-requests + (incf (aref (car org-element--cache-sync-requests) 2) offset))))) + + +;;;; Public Functions + +;;;###autoload +(defun org-element-cache-reset (&optional all) + "Reset cache in current buffer. +When optional argument ALL is non-nil, reset cache in all Org +buffers. This function will do nothing if +`org-element-use-cache' is nil." + (interactive "P") + (dolist (buffer (if all (buffer-list) (list (current-buffer)))) + (with-current-buffer buffer + (when (org-element--cache-active-p) + (org-set-local 'org-element--cache + (avl-tree-create #'org-element--cache-compare)) + (org-set-local 'org-element--cache-objects + (make-hash-table :weakness 'key :test #'eq)) + (org-set-local 'org-element--cache-sync-keys + (make-hash-table :weakness 'key :test #'eq)) + (org-set-local 'org-element--cache-change-warning nil) + (org-set-local 'org-element--cache-sync-requests nil) + (org-set-local 'org-element--cache-sync-timer nil) + (add-hook 'before-change-functions + #'org-element--cache-before-change nil t) + (add-hook 'after-change-functions + #'org-element--cache-after-change nil t))))) + + ;;; The Toolbox ;; @@ -4683,137 +5679,24 @@ instead of the first row. When point is at the end of the buffer, return the innermost element ending there." - (catch 'exit - (org-with-wide-buffer - (let ((origin (point)) element next) - (end-of-line) - (skip-chars-backward " \r\t\n") - (cond - ;; Within blank lines at the beginning of buffer, return nil. - ((bobp) (throw 'exit nil)) - ;; Within blank lines right after a headline, return that - ;; headline. - ((org-with-limited-levels (org-at-heading-p)) - (beginning-of-line) - (throw 'exit (org-element-headline-parser (point-max) t)))) - ;; Otherwise use cache in order to approximate current element. - (goto-char origin) - (let* ((cached (org-element-cache-get origin)) - (begin (org-element-property :begin cached))) - (cond - ;; Nothing in cache before point: start parsing from first - ;; element following headline above, or first element in - ;; buffer. - ((not cached) - (org-with-limited-levels (outline-previous-heading) - (when (org-at-heading-p) (forward-line))) - (skip-chars-forward " \r\t\n") - (beginning-of-line)) - ;; Cache returned exact match: return it. - ((= origin begin) (throw 'exit cached)) - ;; There's a headline between cached value and ORIGIN: - ;; cached value is invalid. Start parsing from first - ;; element following the headline. - ((re-search-backward - (org-with-limited-levels org-outline-regexp-bol) begin t) - (forward-line) - (skip-chars-forward " \r\t\n") - (beginning-of-line)) - ;; Check if CACHED or any of its ancestors contain point. - ;; - ;; If there is such an element, we inspect it in order to - ;; know if we return it or if we need to parse its contents. - ;; Otherwise, we just start parsing from current location, - ;; which is right after the top-most element containing - ;; CACHED. - ;; - ;; As a special case, if ORIGIN is at the end of the buffer, - ;; we want to return the innermost element ending there. - ;; - ;; Also, if we find an ancestor and discover that we need to - ;; parse its contents, make sure we don't start from - ;; `:contents-begin', as we would otherwise go past CACHED - ;; again. Instead, in that situation, we will resume - ;; parsing from NEXT, which is located after CACHED or its - ;; higher ancestor not containing point. - (t - (let ((up cached) - (origin (if (= (point-max) origin) (1- origin) origin))) - (goto-char (or (org-element-property :contents-begin cached) - begin)) - (while (let ((end (org-element-property :end up))) - (and (<= end origin) - (goto-char end) - (setq up (org-element-property :parent up))))) - (cond ((not up)) - ((eobp) (setq element up)) - (t (setq element up next (point)))))))) - ;; Parse successively each element until we reach ORIGIN. - (let ((end (or (org-element-property :end element) - (save-excursion - (org-with-limited-levels (outline-next-heading)) - (point)))) - parent special-flag) - (while t - (unless element - (let ((e (org-element--current-element - end 'element special-flag - (org-element-property :structure parent)))) - (org-element-put-property e :parent parent) - (setq element (org-element-cache-put e)))) - (let ((elem-end (org-element-property :end element)) - (type (org-element-type element))) - (cond - ;; Special case: ORIGIN is at the end of the buffer and - ;; CACHED ends here. No element can start after it, but - ;; more than one may end there. Arbitrarily, we choose - ;; to return the innermost of such elements. - ((and (= (point-max) origin) (= origin elem-end)) - (let ((cend (org-element-property :contents-end element))) - (if (or (not (memq type org-element-greater-elements)) - (not cend) - (< cend origin)) - (throw 'exit element) - (goto-char - (or next (org-element-property :contents-begin element))) - (setq special-flag (case type - (plain-list 'item) - (property-drawer 'node-property) - (table 'table-row)) - parent element - end cend)))) - ;; Skip any element ending before point. Also skip - ;; element ending at point since we're sure that another - ;; element begins after it. - ((<= elem-end origin) (goto-char elem-end)) - ;; A non-greater element contains point: return it. - ((not (memq type org-element-greater-elements)) - (throw 'exit element)) - ;; Otherwise, we have to decide if ELEMENT really - ;; contains ORIGIN. In that case we start parsing from - ;; contents' beginning. Otherwise we return UP as it is - ;; the smallest element containing ORIGIN. - ;; - ;; There is a special cases to consider, though. If - ;; ORIGIN is at contents' beginning but it is also at - ;; the beginning of the first item in a list or a table. - ;; In that case, we need to create an anchor for that - ;; list or table, so return it. - (t - (let ((cbeg (org-element-property :contents-begin element)) - (cend (org-element-property :contents-end element))) - (if (or (not cbeg) (not cend) (> cbeg origin) (<= cend origin) - (and (= cbeg origin) (memq type '(plain-list table)))) - (throw 'exit element) - (goto-char (or next cbeg)) - (setq special-flag (case type - (plain-list 'item) - (property-drawer 'node-property) - (table 'table-row)) - parent element - end cend)))))) - ;; Continue parsing buffer contents from new position. - (setq element nil next nil))))))) + (org-with-wide-buffer + (let ((origin (point))) + (end-of-line) + (skip-chars-backward " \r\t\n") + (cond + ;; Within blank lines at the beginning of buffer, return nil. + ((bobp) nil) + ;; Within blank lines right after a headline, return that + ;; headline. + ((org-with-limited-levels (org-at-heading-p)) + (beginning-of-line) + (org-element-headline-parser (point-max) t)) + ;; Otherwise parse until we find element containing ORIGIN. + (t + (when (org-element--cache-active-p) + (if (not org-element--cache) (org-element-cache-reset) + (org-element--cache-sync (current-buffer) origin))) + (org-element--parse-to origin)))))) ;;;###autoload (defun org-element-context (&optional element) @@ -4901,7 +5784,7 @@ Providing it allows for quicker computation." (let* ((restriction (org-element-restriction type)) (parent element) (candidates 'initial) - (cache (org-element-cache-get element)) + (cache (gethash element org-element--cache-objects)) objects-data next update-cache-flag) (prog1 (catch 'exit @@ -4974,7 +5857,7 @@ Providing it allows for quicker computation." objects-data nil candidates 'initial)))))) ;; Store results in cache, if applicable. - (org-element-cache-put cache element))))))) + (org-element--cache-put element cache))))))) (defun org-element-nested-p (elem-A elem-B) "Non-nil when elements ELEM-A and ELEM-B are nested." @@ -5080,538 +5963,6 @@ function doesn't call `untabify' on S." (buffer-string))))) - -;;; Cache -;; -;; Both functions `org-element-at-point' and `org-element-context' -;; benefit from a simple caching mechanism. -;; -;; Three public functions are provided: `org-element-cache-put', -;; `org-element-cache-get' and `org-element-cache-reset'. -;; -;; Cache is enabled by default, but can be disabled globally with -;; `org-element-use-cache'. `org-element-cache-sync-idle-time' and -;; `org-element-cache-merge-changes-threshold' can be tweaked to -;; control caching behaviour. - - -(defvar org-element-use-cache t - "Non nil when Org parser should cache its results. -This is mostly for debugging purpose.") - -(defvar org-element-cache-merge-changes-threshold 200 - "Number of characters triggering cache syncing. - -The cache mechanism only stores one buffer modification at any -given time. When another change happens, it replaces it with -a change containing both the stored modification and the current -one. This is a trade-off, as merging them prevents another -syncing, but every element between them is then lost. - -This variable determines the maximum size, in characters, we -accept to lose in order to avoid syncing the cache.") - -(defvar org-element-cache-sync-idle-time 0.5 - "Number of seconds of idle time wait before syncing buffer cache. -Syncing also happens when current modification is too distant -from the stored one (for more information, see -`org-element-cache-merge-changes-threshold').") - - -;;;; Data Structure - -(defvar org-element--cache nil - "AVL tree used to cache elements. -Each node of the tree contains an element. Comparison is done -with `org-element--cache-compare'. This cache is used in -`org-element-at-point'.") - -(defvar org-element--cache-objects nil - "Hash table used as to cache objects. -Key is an element, as returned by `org-element-at-point', and -value is an alist where each association is: - - \(POS CANDIDATES . OBJECTS) - -where POS is a buffer position, CANDIDATES is the last know list -of successors (see `org-element--get-next-object-candidates') in -container starting at POS and OBJECTS is a list of objects known -to live within that container, from farthest to closest. - -In the following example, \\alpha, bold object and \\beta start -at, respectively, positions 1, 7 and 8, - - \\alpha *\\beta* - -If the paragraph is completely parsed, OBJECTS-DATA will be - - \((1 nil BOLD-OBJECT ENTITY-OBJECT) - \(8 nil ENTITY-OBJECT)) - -whereas in a partially parsed paragraph, it could be - - \((1 ((entity . 1) (bold . 7)) ENTITY-OBJECT)) - -This cache is used in `org-element-context'.") - -(defun org-element--cache-compare (a b) - "Non-nil when element A is located before element B." - (let ((beg-a (org-element-property :begin a)) - (beg-b (org-element-property :begin b))) - (or (< beg-a beg-b) - ;; Items and plain lists on the one hand, table rows and - ;; tables on the other hand can start at the same position. - ;; In this case, the parent element is always before its child - ;; in the buffer. - (and (= beg-a beg-b) - (memq (org-element-type a) '(plain-list table)) - (memq (org-element-type b) '(item table-row)))))) - -(defsubst org-element--cache-root () - "Return root value in cache. -This function assumes `org-element--cache' is a valid AVL tree." - (avl-tree--node-left (avl-tree--dummyroot org-element--cache))) - - -;;;; Staging Buffer Changes - -(defvar org-element--cache-status nil - "Contains data about cache validity for current buffer. - -Value is a vector of seven elements, - - [ACTIVEP BEGIN END OFFSET TIMER PREVIOUS-STATE] - -ACTIVEP is a boolean non-nil when changes described in the other -slots are valid for current buffer. - -BEGIN and END are the beginning and ending position of the area -for which cache cannot be trusted. - -OFFSET it an integer specifying the number to add to position of -elements after that area. - -TIMER is a timer used to apply these changes to cache when Emacs -is idle. - -PREVIOUS-STATE is a symbol referring to the state of the buffer -before a change happens. It is used to know if sensitive -areas (block boundaries, headlines) were modified. It can be set -to nil, `headline' or `other'.") - -(defconst org-element--cache-opening-line - (concat "^[ \t]*\\(?:" - "#\\+BEGIN[:_]" "\\|" - "\\\\begin{[A-Za-z0-9]+\\*?}" "\\|" - ":\\S-+:[ \t]*$" - "\\)") - "Regexp matching an element opening line. -When such a line is modified, modifications may propagate after -modified area. In that situation, every element between that -area and next section is removed from cache.") - -(defconst org-element--cache-closing-line - (concat "^[ \t]*\\(?:" - "#\\+END\\(?:_\\|:?[ \t]*$\\)" "\\|" - "\\\\end{[A-Za-z0-9]+\\*?}[ \t]*$" "\\|" - ":END:[ \t]*$" - "\\)") - "Regexp matching an element closing line. -When such a line is modified, modifications may propagate before -modified area. In that situation, every element between that -area and previous section is removed from cache.") - -(defsubst org-element--cache-pending-changes-p () - "Non-nil when changes are not integrated in cache yet." - (and org-element--cache-status - (aref org-element--cache-status 0))) - -(defsubst org-element--cache-push-change (beg end offset) - "Push change to current buffer staging area. -BEG and END and the beginning and ending position of the -modification area. OFFSET is the size of the change, as an -integer." - (aset org-element--cache-status 1 beg) - (aset org-element--cache-status 2 end) - (aset org-element--cache-status 3 offset) - (let ((timer (aref org-element--cache-status 4))) - (if timer (timer-activate-when-idle timer t) - (aset org-element--cache-status 4 - (run-with-idle-timer org-element-cache-sync-idle-time - nil - #'org-element--cache-sync - (current-buffer))))) - (aset org-element--cache-status 0 t)) - -(defsubst org-element--cache-cancel-changes () - "Remove any cache change set for current buffer." - (let ((timer (aref org-element--cache-status 4))) - (and timer (cancel-timer timer))) - (aset org-element--cache-status 0 nil)) - -(defun org-element--cache-before-change (beg end) - "Request extension of area going to be modified if needed. -BEG and END are the beginning and end of the range of changed -text. See `before-change-functions' for more information." - (let ((inhibit-quit t)) - (org-with-wide-buffer - (goto-char beg) - (beginning-of-line) - (let ((top (point)) - (bottom (save-excursion (goto-char end) (line-end-position))) - (sensitive-re - ;; A sensitive line is a headline or a block (or drawer, - ;; or latex-environment) boundary. Inserting one can - ;; modify buffer drastically both above and below that - ;; line, possibly making cache invalid. Therefore, we - ;; need to pay special attention to changes happening to - ;; them. - (concat - "\\(" (org-with-limited-levels org-outline-regexp-bol) "\\)" "\\|" - org-element--cache-closing-line "\\|" - org-element--cache-opening-line))) - (save-match-data - (aset org-element--cache-status 5 - (cond ((not (re-search-forward sensitive-re bottom t)) nil) - ((and (match-beginning 1) - (progn (goto-char bottom) - (or (not (re-search-backward sensitive-re - (match-end 1) t)) - (match-beginning 1)))) - 'headline) - (t 'other)))))))) - -(defun org-element--cache-record-change (beg end pre) - "Update buffer modifications for current buffer. - -BEG and END are the beginning and end of the range of changed -text, and the length in bytes of the pre-change text replaced by -that range. See `after-change-functions' for more information. - -If there are already pending changes, try to merge them into -a bigger change record. If that's not possible, the function -will first synchronize cache with previous change and store the -new one." - (let ((inhibit-quit t)) - (when (and org-element-use-cache org-element--cache) - (org-with-wide-buffer - (goto-char beg) - (beginning-of-line) - (let ((top (point)) - (bottom (save-excursion (goto-char end) (line-end-position)))) - (org-with-limited-levels - (save-match-data - ;; Determine if modified area needs to be extended, - ;; according to both previous and current state. We make - ;; a special case for headline editing: if a headline is - ;; modified but not removed, do not extend. - (when (let ((previous-state (aref org-element--cache-status 5)) - (sensitive-re - (concat "\\(" org-outline-regexp-bol "\\)" "\\|" - org-element--cache-closing-line "\\|" - org-element--cache-opening-line))) - (cond ((eq previous-state 'other)) - ((not (re-search-forward sensitive-re bottom t)) - (eq previous-state 'headline)) - ((match-beginning 1) - (or (not (eq previous-state 'headline)) - (and (progn (goto-char bottom) - (re-search-backward - sensitive-re (match-end 1) t)) - (not (match-beginning 1))))) - (t))) - ;; Effectively extend modified area. - (setq top (progn (goto-char top) - (outline-previous-heading) - ;; Headline above is inclusive. - (point))) - (setq bottom (progn (goto-char bottom) - (outline-next-heading) - ;; Headline below is exclusive. - (if (eobp) (point) (1- (point)))))))) - ;; Store changes. - (let ((offset (- end beg pre))) - (if (not (org-element--cache-pending-changes-p)) - ;; No pending changes. Store the new ones. - (org-element--cache-push-change top (- bottom offset) offset) - (let* ((current-start (aref org-element--cache-status 1)) - (current-end (+ (aref org-element--cache-status 2) - (aref org-element--cache-status 3))) - (gap (max (- beg current-end) (- current-start end)))) - (if (> gap org-element-cache-merge-changes-threshold) - ;; If we cannot merge two change sets (i.e. they - ;; modify distinct buffer parts) first apply current - ;; change set and store new one. This way, there is - ;; never more than one pending change set, which - ;; avoids handling costly merges. - (progn (org-element--cache-sync (current-buffer)) - (org-element--cache-push-change - top (- bottom offset) offset)) - ;; Change sets can be merged. We can expand the area - ;; that requires an update, and postpone the sync. - (timer-activate-when-idle (aref org-element--cache-status 4) t) - (aset org-element--cache-status 0 t) - (aset org-element--cache-status 1 (min top current-start)) - (aset org-element--cache-status 2 - (- (max current-end bottom) offset)) - (incf (aref org-element--cache-status 3) offset)))))))))) - - -;;;; Synchronization - -(defsubst org-element--cache-shift-positions (element offset &optional props) - "Shift ELEMENT properties relative to buffer positions by OFFSET. - -Properties containing buffer positions are `:begin', `:end', -`:contents-begin', `:contents-end' and `:structure'. When -optional argument PROPS is a list of keywords, only shift -properties provided in that list. - -Properties are modified by side-effect. Return ELEMENT." - (let ((properties (nth 1 element))) - ;; Shift :structure property for the first plain list only: it is - ;; the only one that really matters and it prevents from shifting - ;; it more than once. - (when (and (or (not props) (memq :structure props)) - (eq (org-element-type element) 'plain-list) - (not (eq (org-element-type (plist-get properties :parent)) - 'item))) - (dolist (item (plist-get properties :structure)) - (incf (car item) offset) - (incf (nth 6 item) offset))) - (dolist (key '(:begin :contents-begin :contents-end :end :post-affiliated)) - (let ((value (and (or (not props) (memq key props)) - (plist-get properties key)))) - (and value (plist-put properties key (+ offset value)))))) - element) - -(defun org-element--cache-mapc (__map-function__ &optional reverse) - "Apply FUNCTION to all elements in cache. -FUNCTION is applied to the elements in ascending order, or -descending order if REVERSE is non-nil." - (avl-tree--mapc - #'(lambda (node) - (funcall __map-function__ (avl-tree--node-data node))) - (org-element--cache-root) - (if reverse 1 0))) - -(defun org-element--cache-sync (buffer) - "Synchronize cache with recent modification in BUFFER. -Elements ending before modification area are kept in cache. -Elements starting after modification area have their position -shifted by the size of the modification. Every other element is -removed from the cache." - (when (buffer-live-p buffer) - (with-current-buffer buffer - (when (org-element--cache-pending-changes-p) - (catch 'escape - (let ((inhibit-quit t) - (offset (aref org-element--cache-status 3)) - ;; END is the beginning position of the first element - ;; in cache that isn't removed but needs to be - ;; shifted. It will be updated during phase 1. - (end (aref org-element--cache-status 2))) - ;; Phase 1. - ;; - ;; Delete, in ascending order, all elements starting after - ;; BEG, but before END. - ;; - ;; BEG is the position of the first element in cache to - ;; remove. It takes into consideration partially modified - ;; elements (starting before changes but ending after - ;; them). Though, it preserves greater elements that are - ;; not affected when changes alter only their contents. - ;; - ;; END is updated when necessary to include elements - ;; starting after modifications but included in an element - ;; altered by modifications. - ;; - ;; At each iteration, we start again at tree root since - ;; a deletion modifies structure of the balanced tree. - (let ((beg - (let* ((beg (aref org-element--cache-status 1)) - (element (org-element-cache-get (1- beg) t))) - (if (not element) beg - (catch 'exit - (let ((up element)) - (while (setq up (org-element-property :parent up)) - (if (and - (memq (org-element-type up) - '(center-block - drawer dynamic-block inlinetask - property-drawer quote-block - special-block)) - (<= (org-element-property :contents-begin up) - beg) - (> (org-element-property :contents-end up) - end)) - ;; UP is a greater element that is - ;; wrapped around the changes. We - ;; only need to extend its ending - ;; boundaries and those of all its - ;; parents. - (throw 'exit - (progn - (while up - (org-element--cache-shift-positions - up offset '(:contents-end :end)) - (setq up (org-element-property - :parent up))) - (org-element-property - :begin element)))) - (setq element up)) - ;; We're at top level element containing - ;; ELEMENT: if it's altered by buffer - ;; modifications, it is first element in - ;; cache to be removed. Otherwise, that - ;; first element is the following one. - (if (< (org-element-property :end element) beg) - (org-element-property :end element) - (org-element-property :begin element)))))))) - (while (let ((node (org-element--cache-root)) data) - ;; DATA will contain the closest element from - ;; BEG, always after it. - (while node - (let* ((element (avl-tree--node-data node)) - (pos (org-element-property :begin element))) - (cond - ((< pos beg) - (setq node (avl-tree--node-right node))) - ((> pos beg) - (setq data (avl-tree--node-data node) - node (avl-tree--node-left node))) - (t - (setq data (avl-tree--node-data node) - node nil))))) - (cond - ;; No DATA is found so there's no element left - ;; after BEG. Bail out. - ((not data) (throw 'escape t)) - ;; Element starts after END, it is the first - ;; one that needn't be removed from cache. - ;; Move to second phase. - ((> (org-element-property :begin data) end) nil) - ;; Remove element. Extend END so that all - ;; elements it may contain are also removed. - (t - (setq end - (max (1- (org-element-property :end data)) end)) - (avl-tree-delete org-element--cache data) - t))))) - ;; Phase 2. - ;; - ;; Shift all elements starting after END by OFFSET (for an - ;; offset different from 0). - ;; - ;; Increasing all beginning positions by OFFSET doesn't - ;; alter tree structure, so elements are modified by - ;; side-effect. - ;; - ;; We change all elements in decreasing order and make - ;; sure to quit at the first element in cache starting - ;; before END. - (unless (zerop offset) - (catch 'exit - (org-element--cache-mapc - #'(lambda (data) - (if (<= (org-element-property :begin data) end) - (throw 'exit t) - ;; Shift element. - (org-element--cache-shift-positions data offset) - ;; Shift associated objects data, if any. - (dolist (object-data - (gethash data org-element--cache-objects)) - (incf (car object-data) offset) - (dolist (successor (nth 1 object-data)) - (incf (cdr successor) offset)) - (dolist (object (cddr object-data)) - (org-element--cache-shift-positions - object offset))))) - 'reverse))))) - ;; Eventually signal cache as up-to-date. - (org-element--cache-cancel-changes))))) - - -;;;; Public Functions - -(defun org-element-cache-get (key &optional ignore-changes) - "Return cached data relative to KEY. - -KEY is either a number or an Org element, as returned by -`org-element-at-point'. If KEY is a number, return closest -cached data before or at position KEY. Otherwise, return cached -objects contained in element KEY. - -In any case, return nil if no data is found, or if caching is not -allowed. - -If changes are pending in current buffer, first synchronize the -cache, unless optional argument IGNORE-CHANGES is non-nil." - (when (and org-element-use-cache org-element--cache) - ;; If there are pending changes, first sync them. - (when (and (not ignore-changes) (org-element--cache-pending-changes-p)) - (org-element--cache-sync (current-buffer))) - (if (not (wholenump key)) (gethash key org-element--cache-objects) - (let ((node (org-element--cache-root)) last) - (catch 'found - (while node - (let* ((element (avl-tree--node-data node)) - (beg (org-element-property :begin element))) - (cond - ((< key beg) - (setq node (avl-tree--node-left node))) - ((> key beg) - (setq last (avl-tree--node-data node) - node (avl-tree--node-right node))) - ;; When KEY is at the beginning of a table or list, - ;; make sure to return it instead of the first row or - ;; item. - ((and (memq (org-element-type element) '(item table-row)) - (= (org-element-property - :contents-begin (org-element-property :parent element)) - beg)) - (setq last (avl-tree--node-data node) - node (avl-tree--node-left node))) - (t (throw 'found (avl-tree--node-data node)))))) - last))))) - -(defun org-element-cache-put (data &optional element) - "Store DATA in current buffer's cache, if allowed. -If optional argument ELEMENT is non-nil, store DATA as objects -relative to it. Otherwise, store DATA as an element. Nothing -will be stored if `org-element-use-cache' is nil. Return DATA." - (if (not (and org-element-use-cache (derived-mode-p 'org-mode))) data - (unless (and org-element--cache org-element--cache-objects) - (org-element-cache-reset)) - (if element (puthash element data org-element--cache-objects) - (avl-tree-enter org-element--cache data)))) - -;;;###autoload -(defun org-element-cache-reset (&optional all) - "Reset cache in current buffer. -When optional argument ALL is non-nil, reset cache in all Org -buffers. This function will do nothing if -`org-element-use-cache' is nil." - (interactive "P") - (when org-element-use-cache - (dolist (buffer (if all (buffer-list) (list (current-buffer)))) - (with-current-buffer buffer - (when (derived-mode-p 'org-mode) - (if (org-bound-and-true-p org-element--cache) - (avl-tree-clear org-element--cache) - (org-set-local 'org-element--cache - (avl-tree-create #'org-element--cache-compare))) - (if org-element--cache-objects (clrhash org-element--cache-objects) - (org-set-local - 'org-element--cache-objects - (make-hash-table :size 1009 :weakness 'key :test #'eq))) - (org-set-local 'org-element--cache-status (make-vector 6 nil)) - (add-hook 'before-change-functions - 'org-element--cache-before-change nil t) - (add-hook 'after-change-functions - 'org-element--cache-record-change nil t)))))) - - (provide 'org-element) diff --git a/lisp/ox.el b/lisp/ox.el index f8bcfed3e..c43703864 100644 --- a/lisp/ox.el +++ b/lisp/ox.el @@ -263,6 +263,16 @@ whose extension is either \"png\", \"jpeg\", \"jpg\", \"gif\", See `org-export-inline-image-p' for more information about rules.") +(defconst org-export-ignored-local-variables + '(org-font-lock-keywords + org-element--cache org-element--cache-objects org-element--cache-sync-keys + org-element--cache-sync-requests org-element--cache-sync-timer) + "List of variables not copied through upon buffer duplication. +Export process takes place on a copy of the original buffer. +When this copy is created, all Org related local variables not in +this list are copied to the new buffer. Variables with an +unreadable value are also ignored.") + (defvar org-export-async-debug nil "Non-nil means asynchronous export process should leave data behind. @@ -2964,11 +2974,7 @@ The function assumes BUFFER's major mode is `org-mode'." (when (consp entry) (let ((var (car entry)) (val (cdr entry))) - (and (not (memq var '(org-font-lock-keywords - ;; Do not share cache across - ;; buffers as values are - ;; modified by side effect. - org-element--cache))) + (and (not (memq var org-export-ignored-local-variables)) (or (memq var '(default-directory buffer-file-name diff --git a/testing/lisp/test-org-element.el b/testing/lisp/test-org-element.el index 08142c0cc..6c692ace8 100644 --- a/testing/lisp/test-org-element.el +++ b/testing/lisp/test-org-element.el @@ -3036,5 +3036,116 @@ Paragraph \\alpha." (org-element-type (org-element-context (org-element-at-point)))))))) + +;;; Test Cache. + +(ert-deftest test-org-element/cache () + "Test basic expectations and common pitfalls for cache." + ;; Shift positions. + (should + (equal '(18 . 23) + (org-test-with-temp-text "para1\n\npara2\n\npara3" + (let ((org-element-use-cache t)) + (save-excursion (goto-char (point-max)) (org-element-at-point)) + (insert "add") + (forward-line 4) + (let ((element (org-element-at-point))) + (cons (org-element-property :begin element) + (org-element-property :end element))))))) + ;; Partial shifting: when the contents of a greater element are + ;; modified, only shift ending positions. + (should + (org-test-with-temp-text + "#+BEGIN_CENTER\nPara1\n\nPara2\n\nPara3\n#+END_CENTER" + (let ((org-element-use-cache t)) + (save-excursion (search-forward "3") (org-element-at-point)) + (search-forward "Para2") + (insert " ") + (let ((element (org-element-property :parent (org-element-at-point)))) + (equal (cons (org-element-property :begin element) + (org-element-property :end element)) + (cons (point-min) (point-max))))))) + ;; Re-parent shifted elements. + (should + (eq 'item + (org-test-with-temp-text "- item\n\n\n para1\n para2" + (let ((org-element-use-cache t)) + (end-of-line) + (org-element-at-point) + (save-excursion (goto-char (point-max)) (org-element-at-point)) + (forward-line) + (delete-char 1) + (goto-char (point-max)) + (org-element-type + (org-element-property :parent (org-element-at-point))))))) + ;; Modifying the last line of an element alters the element below. + (should + (org-test-with-temp-text "para1\n\npara2" + (let ((org-element-use-cache t)) + (goto-char (point-max)) + (org-element-at-point) + (forward-line -1) + (insert "merge") + (let ((element (org-element-at-point))) + (equal (cons (org-element-property :begin element) + (org-element-property :end element)) + (cons (point-min) (point-max))))))) + ;; Modifying the first line of an element alters the element above. + (should + (org-test-with-temp-text ": fixed-width\n:not-fixed-width" + (let ((org-element-use-cache t)) + (goto-char (point-max)) + (org-element-at-point) + (search-backward ":") + (forward-char) + (insert " ") + (let ((element (org-element-at-point))) + (equal (cons (org-element-property :begin element) + (org-element-property :end element)) + (cons (point-min) (point-max))))))) + ;; Sensitive change: adding a line alters document structure both + ;; above and below. + (should + (eq 'example-block + (org-test-with-temp-text "#+BEGIN_EXAMPLE\nPara1\n\nPara2\n" + (let ((org-element-use-cache t)) + (goto-char (point-max)) + (org-element-at-point) + (insert "#+END_EXAMPLE") + (search-backward "Para1") + (org-element-type (org-element-at-point)))))) + (should + (eq 'example-block + (org-test-with-temp-text "Para1\n\nPara2\n#+END_EXAMPLE" + (let ((org-element-use-cache t)) + (save-excursion (goto-char (point-max)) (org-element-at-point)) + (insert "#+BEGIN_EXAMPLE\n") + (search-forward "Para2") + (org-element-type (org-element-at-point)))))) + ;; Sensitive change: removing a line alters document structure both + ;; above and below. + (should + (eq 'example-block + (org-test-with-temp-text + "# +BEGIN_EXAMPLE\nPara1\n\nPara2\n#+END_EXAMPLE" + (let ((org-element-use-cache t)) + (save-excursion (goto-char (point-max)) (org-element-at-point)) + (forward-char) + (delete-char 1) + (search-forward "Para2") + (org-element-type (org-element-at-point)))))) + (should + (eq 'example-block + (org-test-with-temp-text + "#+BEGIN_EXAMPLE\nPara1\n\nPara2\n# +END_EXAMPLE" + (let ((org-element-use-cache t)) + (save-excursion (goto-char (point-max)) (org-element-at-point)) + (search-forward "# ") + (delete-char -1) + (search-backward "Para1") + (org-element-type (org-element-at-point))))))) + + (provide 'test-org-element) + ;;; test-org-element.el ends here