Refactor unescaping functions
* org.el (org-link-unescape): Simpler algorithm for replacing percent escapes. (org-link-unescape-compound): Use cond statements instead of nested if, convert hex string with string-to-number, save match data. (org-link-unescape-single-byte-sequence): Use mapconcat and string-to-number for unescaping single byte sequence.
This commit is contained in:
parent
dc76fd5d71
commit
7b58cccddd
78
lisp/org.el
78
lisp/org.el
|
@ -8642,53 +8642,38 @@ If optional argument MERGE is set, merge TABLE into
|
||||||
(defun org-link-unescape (str)
|
(defun org-link-unescape (str)
|
||||||
"Unhex hexified unicode strings as returned from the JavaScript function
|
"Unhex hexified unicode strings as returned from the JavaScript function
|
||||||
encodeURIComponent. E.g. `%C3%B6' is the german Umlaut `ö'."
|
encodeURIComponent. E.g. `%C3%B6' is the german Umlaut `ö'."
|
||||||
(setq str (or str ""))
|
(unless (and (null str) (string= "" str))
|
||||||
(let ((tmp "")
|
(let ((pos 0) (case-fold-search t) unhexed)
|
||||||
(case-fold-search t))
|
(while (setq pos (string-match "\\(%[0-9a-f][0-9a-f]\\)+" str pos))
|
||||||
(while (string-match "\\(%[0-9a-f][0-9a-f]\\)+" str)
|
(setq unhexed (org-link-unescape-compound (match-string 0 str)))
|
||||||
(let* ((start (match-beginning 0))
|
(setq str (replace-match unhexed t t str))
|
||||||
(end (match-end 0))
|
(setq pos (+ pos (length unhexed))))))
|
||||||
(hex (match-string 0 str))
|
str)
|
||||||
(replacement (org-link-unescape-compound (upcase hex))))
|
|
||||||
(setq tmp (concat tmp (substring str 0 start) replacement))
|
|
||||||
(setq str (substring str end))))
|
|
||||||
(setq tmp (concat tmp str))
|
|
||||||
tmp))
|
|
||||||
|
|
||||||
(defun org-link-unescape-compound (hex)
|
(defun org-link-unescape-compound (hex)
|
||||||
"Unhexify unicode hex-chars. E.g. `%C3%B6' is the German Umlaut `ö'.
|
"Unhexify unicode hex-chars. E.g. `%C3%B6' is the German Umlaut `ö'.
|
||||||
Note: this function also decodes single byte encodings like
|
Note: this function also decodes single byte encodings like
|
||||||
`%E1' (\"á\") if not followed by another `%[A-F0-9]{2}' group."
|
`%E1' (\"á\") if not followed by another `%[A-F0-9]{2}' group."
|
||||||
(let* ((bytes (remove "" (split-string hex "%")))
|
(save-match-data
|
||||||
|
(let* ((bytes (cdr (split-string hex "%")))
|
||||||
(ret "")
|
(ret "")
|
||||||
(eat 0)
|
(eat 0)
|
||||||
(sum 0))
|
(sum 0))
|
||||||
(while bytes
|
(while bytes
|
||||||
(let* ((b (pop bytes))
|
(let* ((val (string-to-number (pop bytes) 16))
|
||||||
(a (elt b 0))
|
(shift-xor
|
||||||
(b (elt b 1))
|
(if (= 0 eat)
|
||||||
(c1 (if (> a ?9) (+ 10 (- a ?A)) (- a ?0)))
|
(cond
|
||||||
(c2 (if (> b ?9) (+ 10 (- b ?A)) (- b ?0)))
|
((>= val 252) (cons 6 252))
|
||||||
(val (+ (lsh c1 4) c2))
|
((>= val 248) (cons 5 248))
|
||||||
(shift
|
((>= val 240) (cons 4 240))
|
||||||
(if (= 0 eat) ;; new byte
|
((>= val 224) (cons 3 224))
|
||||||
(if (>= val 252) 6
|
((>= val 192) (cons 2 192))
|
||||||
(if (>= val 248) 5
|
(t (cons 0 0)))
|
||||||
(if (>= val 240) 4
|
(cons 6 128))))
|
||||||
(if (>= val 224) 3
|
(if (>= val 192) (setq eat (car shift-xor)))
|
||||||
(if (>= val 192) 2 0)))))
|
(setq val (logxor val (cdr shift-xor)))
|
||||||
6))
|
(setq sum (+ (lsh sum (car shift-xor)) val))
|
||||||
(xor
|
|
||||||
(if (= 0 eat) ;; new byte
|
|
||||||
(if (>= val 252) 252
|
|
||||||
(if (>= val 248) 248
|
|
||||||
(if (>= val 240) 240
|
|
||||||
(if (>= val 224) 224
|
|
||||||
(if (>= val 192) 192 0)))))
|
|
||||||
128)))
|
|
||||||
(if (>= val 192) (setq eat shift))
|
|
||||||
(setq val (logxor val xor))
|
|
||||||
(setq sum (+ (lsh sum shift) val))
|
|
||||||
(if (> eat 0) (setq eat (- eat 1)))
|
(if (> eat 0) (setq eat (- eat 1)))
|
||||||
(cond
|
(cond
|
||||||
((= 0 eat) ;multi byte
|
((= 0 eat) ;multi byte
|
||||||
|
@ -8697,22 +8682,13 @@ Note: this function also decodes single byte encodings like
|
||||||
((not bytes) ; single byte(s)
|
((not bytes) ; single byte(s)
|
||||||
(setq ret (org-link-unescape-single-byte-sequence hex))))
|
(setq ret (org-link-unescape-single-byte-sequence hex))))
|
||||||
)) ;; end (while bytes
|
)) ;; end (while bytes
|
||||||
ret ))
|
ret )))
|
||||||
|
|
||||||
(defun org-link-unescape-single-byte-sequence (hex)
|
(defun org-link-unescape-single-byte-sequence (hex)
|
||||||
"Unhexify hex-encoded single byte character sequences."
|
"Unhexify hex-encoded single byte character sequences."
|
||||||
(let ((bytes (remove "" (split-string hex "%")))
|
(mapconcat (lambda (byte)
|
||||||
(ret ""))
|
(char-to-string (string-to-number byte 16)))
|
||||||
(while bytes
|
(cdr (split-string hex "%")) ""))
|
||||||
(let* ((b (pop bytes))
|
|
||||||
(a (elt b 0))
|
|
||||||
(b (elt b 1))
|
|
||||||
(c1 (if (> a ?9) (+ 10 (- a ?A)) (- a ?0)))
|
|
||||||
(c2 (if (> b ?9) (+ 10 (- b ?A)) (- b ?0))))
|
|
||||||
(setq ret
|
|
||||||
(concat ret (char-to-string
|
|
||||||
(+ (lsh c1 4) c2))))))
|
|
||||||
ret))
|
|
||||||
|
|
||||||
(defun org-xor (a b)
|
(defun org-xor (a b)
|
||||||
"Exclusive or."
|
"Exclusive or."
|
||||||
|
|
Loading…
Reference in New Issue