From a7deff3d314512d22cae633184b9552d059a1f25 Mon Sep 17 00:00:00 2001 From: ndwarshuis Date: Sun, 8 May 2022 13:50:42 -0400 Subject: [PATCH] ADD bedtools knockoff library --- etc/conf.org | 1 + local/lib/interval/interval.el | 158 +++++++++++++++++++++++++++++++++ 2 files changed, 159 insertions(+) create mode 100644 local/lib/interval/interval.el diff --git a/etc/conf.org b/etc/conf.org index 35fd346..7f65625 100644 --- a/etc/conf.org +++ b/etc/conf.org @@ -1895,6 +1895,7 @@ The advantage of doing it this way is that I can byte-compile and test independe (--each (directory-files dir t ".*\\.el$") (byte-recompile-file it nil 0))) (nd/load-and-compile (nd/expand-lib-directory "either")) +(nd/load-and-compile (nd/expand-lib-directory "interval")) (nd/load-and-compile (nd/expand-lib-directory "dag")) (nd/load-and-compile (nd/expand-lib-directory "org-x")) diff --git a/local/lib/interval/interval.el b/local/lib/interval/interval.el new file mode 100644 index 0000000..9118801 --- /dev/null +++ b/local/lib/interval/interval.el @@ -0,0 +1,158 @@ +;;; interval.el --- a bedtools knockoff -*- lexical-binding: t; -*- + +;; Copyright (C) 2022 Nathan Dwarshuis + +;; This program is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see . + +;;; Commentary: + +;; Functions pertaining to intervals. + +;; An interval is defined as a list like (START END) where START and END are +;; two numbers, and END >= START. + +;; Inspired by bedtools (https://github.com/arq5x/bedtools2). + +;;; Code: + +(require 'dash) + +(defun interval< (a b) + "Test if A comes before B. + +Return t if A starts before B, and return t if A ends before B +and they have the same start time." + (let ((a0 (car a)) + (b0 (car b))) + (if (= a0 b0) (< (cadr a) (cadr b)) (< a0 b0)))) + +(defun interval<= (a b) + "Test if A comes before B or is the same as B." + (not (interval< b a))) + +(defun interval-bimap (fun int) + "Apply FUN to both numbers in INT." + `(,(funcall fun (car int)) ,(funcall fun (cadr int)))) + +(defun interval-len (int) + "Return the length of INT." + (- (cadr int) (car int))) + +(defun interval-min (ints) + "Return the earliest starting value in INTS." + (-min (-map #'car ints))) + +(defun interval-max (ints) + "Return the latest ending value in INTS." + (-max (-map #'cadr ints))) + +(defun interval-span (ints) + "Return the length covered by all INTS." + (if (not ints) 0 + (-let (((mn mx) (car ints))) + ;; I could just get the min/max but this avoids looping twice + (--each (cdr ints) + (when (< (car it) mn) + (setq mn (car it))) + (when (< mx (cadr it) + (setq mx (cadr it))))) + (- mx mn)))) + +(defun interval-group-overlaps (interval-fun xs) + "Group XS based on when their intervals overlap. + +INTERVAL-FUN is a function that takes one of XS and returns an +interval like (START END) where START and END are numbers. + +Return a list of all pairs in XS for which their intervals overlap. + +Complexity is O(N^2) in case all members in XS conflict with each other, and +O(N) in case there are no conflicts." + (cl-labels + ((get-overlaps + (acc ss) + (-if-let (s0 (car ss)) + (-let* (((acc+ acc-) acc) + (A (cdr s0)) + (a1 (cadr (car s0))) + (rest (cdr ss))) + ;; add members while if the starting value is less than the ending + ;; value of the current member + (-if-let (over (->> (--take-while (< (car (car it)) a1) rest) + (--map (list A (cdr it))) + (reverse))) + (get-overlaps `((,@over ,@acc+) ,acc-) rest) + (get-overlaps `(,acc+ (,A ,@acc-)) rest))) + acc))) + (-let (((over non-over) (->> (-annotate interval-fun xs) + (--sort (interval< (car it) (car other))) + (get-overlaps nil)))) + (list (nreverse over) (nreverse non-over))))) + +(defun interval-sort (ints) + "Sort INTS according to `interval-rank'." + (-sort #'interval< ints)) + +(defun interval-merge (ints) + "Merge a list of overlapping intervals. + +Two intervals overlap if the start/end of one is within the other +interval (inclusive). + +Assume INTS is sorted according to `interval-sort'. + +Complexity is O(N) where N is the length of INTS." + (cl-flet + ((merge-intervals + (acc interval) + (-let ((((sp ep) . accp) acc) + ((s e) interval)) + (if (<= s ep) `((,sp ,e) ,@accp) `((,s ,e) ,@acc))))) + (when ints + (->> (cdr ints) + (-reduce-from #'merge-intervals `(,(car ints))) + (reverse))))) + +(defun interval-complement (start end ints) + "Return the complement of intervals in INTS. + +START and END are the lower and upper bound to determine where +the first/last compliment should start/end in case the first/last +in INTS starts/ends after/before START/END respectively. + +Assume that INTS is sorted according to `interval-sort', that no +members of INT overlap, and that no members in INT have an end +before START or a start after END. + +Complexity is O(N)." + (cl-flet + ((complement + (acc interval) + (-let (((last gaps) acc) + ((s e) interval)) + `(,e ((,last ,s) ,@gaps))))) + (if (not ints) + `(,start ,end) + (-let* (((s e) (car ints)) + ((init ints*) (if (<= s start) `(,e ,(cdr ints)) `(,start ,ints))) + ((last gaps) (-reduce-from #'complement `(,init) ints*))) + (->> (if (<= end last) gaps `((,last ,end) ,@gaps)) + (reverse)))))) + +(defun interval-sum (ints) + "Return the sum of INTS." + (-sum (-map #'interval-len ints))) + +(provide 'interval) +;;; interval.el ends here