Refactor unescaping functions

* org.el (org-link-unescape): Simpler algorithm for replacing percent
escapes.
(org-link-unescape-compound): Use cond statements instead of nested
if, convert hex string with string-to-number, save match data.
(org-link-unescape-single-byte-sequence): Use mapconcat and
string-to-number for unescaping single byte sequence.
This commit is contained in:
David Maus 2010-11-27 20:02:23 +01:00
parent dc76fd5d71
commit 7b58cccddd
1 changed files with 39 additions and 63 deletions

View File

@ -8642,53 +8642,38 @@ If optional argument MERGE is set, merge TABLE into
(defun org-link-unescape (str) (defun org-link-unescape (str)
"Unhex hexified unicode strings as returned from the JavaScript function "Unhex hexified unicode strings as returned from the JavaScript function
encodeURIComponent. E.g. `%C3%B6' is the german Umlaut `ö'." encodeURIComponent. E.g. `%C3%B6' is the german Umlaut `ö'."
(setq str (or str "")) (unless (and (null str) (string= "" str))
(let ((tmp "") (let ((pos 0) (case-fold-search t) unhexed)
(case-fold-search t)) (while (setq pos (string-match "\\(%[0-9a-f][0-9a-f]\\)+" str pos))
(while (string-match "\\(%[0-9a-f][0-9a-f]\\)+" str) (setq unhexed (org-link-unescape-compound (match-string 0 str)))
(let* ((start (match-beginning 0)) (setq str (replace-match unhexed t t str))
(end (match-end 0)) (setq pos (+ pos (length unhexed))))))
(hex (match-string 0 str)) str)
(replacement (org-link-unescape-compound (upcase hex))))
(setq tmp (concat tmp (substring str 0 start) replacement))
(setq str (substring str end))))
(setq tmp (concat tmp str))
tmp))
(defun org-link-unescape-compound (hex) (defun org-link-unescape-compound (hex)
"Unhexify unicode hex-chars. E.g. `%C3%B6' is the German Umlaut `ö'. "Unhexify unicode hex-chars. E.g. `%C3%B6' is the German Umlaut `ö'.
Note: this function also decodes single byte encodings like Note: this function also decodes single byte encodings like
`%E1' (\"á\") if not followed by another `%[A-F0-9]{2}' group." `%E1' (\"á\") if not followed by another `%[A-F0-9]{2}' group."
(let* ((bytes (remove "" (split-string hex "%"))) (save-match-data
(let* ((bytes (cdr (split-string hex "%")))
(ret "") (ret "")
(eat 0) (eat 0)
(sum 0)) (sum 0))
(while bytes (while bytes
(let* ((b (pop bytes)) (let* ((val (string-to-number (pop bytes) 16))
(a (elt b 0)) (shift-xor
(b (elt b 1)) (if (= 0 eat)
(c1 (if (> a ?9) (+ 10 (- a ?A)) (- a ?0))) (cond
(c2 (if (> b ?9) (+ 10 (- b ?A)) (- b ?0))) ((>= val 252) (cons 6 252))
(val (+ (lsh c1 4) c2)) ((>= val 248) (cons 5 248))
(shift ((>= val 240) (cons 4 240))
(if (= 0 eat) ;; new byte ((>= val 224) (cons 3 224))
(if (>= val 252) 6 ((>= val 192) (cons 2 192))
(if (>= val 248) 5 (t (cons 0 0)))
(if (>= val 240) 4 (cons 6 128))))
(if (>= val 224) 3 (if (>= val 192) (setq eat (car shift-xor)))
(if (>= val 192) 2 0))))) (setq val (logxor val (cdr shift-xor)))
6)) (setq sum (+ (lsh sum (car shift-xor)) val))
(xor
(if (= 0 eat) ;; new byte
(if (>= val 252) 252
(if (>= val 248) 248
(if (>= val 240) 240
(if (>= val 224) 224
(if (>= val 192) 192 0)))))
128)))
(if (>= val 192) (setq eat shift))
(setq val (logxor val xor))
(setq sum (+ (lsh sum shift) val))
(if (> eat 0) (setq eat (- eat 1))) (if (> eat 0) (setq eat (- eat 1)))
(cond (cond
((= 0 eat) ;multi byte ((= 0 eat) ;multi byte
@ -8697,22 +8682,13 @@ Note: this function also decodes single byte encodings like
((not bytes) ; single byte(s) ((not bytes) ; single byte(s)
(setq ret (org-link-unescape-single-byte-sequence hex)))) (setq ret (org-link-unescape-single-byte-sequence hex))))
)) ;; end (while bytes )) ;; end (while bytes
ret )) ret )))
(defun org-link-unescape-single-byte-sequence (hex) (defun org-link-unescape-single-byte-sequence (hex)
"Unhexify hex-encoded single byte character sequences." "Unhexify hex-encoded single byte character sequences."
(let ((bytes (remove "" (split-string hex "%"))) (mapconcat (lambda (byte)
(ret "")) (char-to-string (string-to-number byte 16)))
(while bytes (cdr (split-string hex "%")) ""))
(let* ((b (pop bytes))
(a (elt b 0))
(b (elt b 1))
(c1 (if (> a ?9) (+ 10 (- a ?A)) (- a ?0)))
(c2 (if (> b ?9) (+ 10 (- b ?A)) (- b ?0))))
(setq ret
(concat ret (char-to-string
(+ (lsh c1 4) c2))))))
ret))
(defun org-xor (a b) (defun org-xor (a b)
"Exclusive or." "Exclusive or."