summaryrefslogtreecommitdiff
path: root/gnu/packages/bioinformatics.scm
diff options
context:
space:
mode:
authorHartmut Goebel <h.goebel@crazy-compilers.com>2016-11-29 18:47:16 +0100
committerHartmut Goebel <h.goebel@crazy-compilers.com>2016-11-29 18:47:16 +0100
commit3bf428065916f1a47c5ed12f5622f0eff4123644 (patch)
treef424c57b8a00a019e04fc29f42c8527a811ba281 /gnu/packages/bioinformatics.scm
parent2cb64f3b1b3df338acfc0ba9f719875db21812b0 (diff)
parent683c5ab70accb909697717bb61741a7692c52c09 (diff)
Merge branch 'master' into python-build-system
Diffstat (limited to 'gnu/packages/bioinformatics.scm')
-rw-r--r--gnu/packages/bioinformatics.scm254
1 files changed, 252 insertions, 2 deletions
diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
index 959a7ac2fd..415024fadc 100644
--- a/gnu/packages/bioinformatics.scm
+++ b/gnu/packages/bioinformatics.scm
@@ -50,6 +50,7 @@
#:use-module (gnu packages documentation)
#:use-module (gnu packages datastructures)
#:use-module (gnu packages file)
+ #:use-module (gnu packages flex)
#:use-module (gnu packages gawk)
#:use-module (gnu packages gcc)
#:use-module (gnu packages gd)
@@ -3435,6 +3436,45 @@ program for nucleotide and protein sequences.")
;; License information found in 'muscle -h' and usage.cpp.
(license license:public-domain)))
+(define-public newick-utils
+ ;; There are no recent releases so we package from git.
+ (let ((commit "da121155a977197cab9fbb15953ca1b40b11eb87"))
+ (package
+ (name "newick-utils")
+ (version (string-append "1.6-1." (string-take commit 8)))
+ (source (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/tjunier/newick_utils.git")
+ (commit commit)))
+ (file-name (string-append name "-" version "-checkout"))
+ (sha256
+ (base32
+ "1hkw21rq1mwf7xp0rmbb2gqc0i6p11108m69i7mr7xcjl268pxnb"))))
+ (build-system gnu-build-system)
+ (arguments
+ `(#:phases
+ (modify-phases %standard-phases
+ (add-after 'unpack 'autoconf
+ (lambda _ (zero? (system* "autoreconf" "-vif")))))))
+ (inputs
+ ;; XXX: TODO: Enable Lua and Guile bindings.
+ ;; https://github.com/tjunier/newick_utils/issues/13
+ `(("libxml2" ,libxml2)
+ ("flex" ,flex)
+ ("bison" ,bison)))
+ (native-inputs
+ `(("autoconf" ,autoconf)
+ ("automake" ,automake)
+ ("libtool" ,libtool)))
+ (synopsis "Programs for working with newick format phylogenetic trees")
+ (description
+ "Newick-utils is a suite of utilities for processing phylogenetic trees
+in Newick format. Functions include re-rooting, extracting subtrees,
+trimming, pruning, condensing, drawing (ASCII graphics or SVG).")
+ (home-page "https://github.com/tjunier/newick_utils")
+ (license license:bsd-3))))
+
(define-public orfm
(package
(name "orfm")
@@ -3635,6 +3675,58 @@ for sequences to be aligned and then, simultaneously with the alignment,
predicts the locations of structural units in the sequences.")
(license license:gpl2+)))
+(define-public proteinortho
+ (package
+ (name "proteinortho")
+ (version "5.15")
+ (source
+ (origin
+ (method url-fetch)
+ (uri
+ (string-append
+ "http://www.bioinf.uni-leipzig.de/Software/proteinortho/proteinortho_v"
+ version "_src.tar.gz"))
+ (sha256
+ (base32
+ "05wacnnbx56avpcwhzlcf6b7s77swcpv3qnwz5sh1z54i51gg2ki"))))
+ (build-system gnu-build-system)
+ (arguments
+ `(#:test-target "test"
+ #:phases
+ (modify-phases %standard-phases
+ (replace 'configure
+ ;; There is no configure script, so we modify the Makefile directly.
+ (lambda* (#:key outputs #:allow-other-keys)
+ (substitute* "Makefile"
+ (("INSTALLDIR=.*")
+ (string-append
+ "INSTALLDIR=" (assoc-ref outputs "out") "/bin\n")))
+ #t))
+ (add-before 'install 'make-install-directory
+ ;; The install directory is not created during 'make install'.
+ (lambda* (#:key outputs #:allow-other-keys)
+ (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
+ #t))
+ (add-after 'install 'wrap-programs
+ (lambda* (#:key inputs outputs #:allow-other-keys)
+ (let* ((path (getenv "PATH"))
+ (out (assoc-ref outputs "out"))
+ (binary (string-append out "/bin/proteinortho5.pl")))
+ (wrap-program binary `("PATH" ":" prefix (,path))))
+ #t)))))
+ (inputs
+ `(("perl" ,perl)
+ ("python" ,python-2)
+ ("blast+" ,blast+)))
+ (home-page "http://www.bioinf.uni-leipzig.de/Software/proteinortho")
+ (synopsis "Detect orthologous genes across species")
+ (description
+ "Proteinortho is a tool to detect orthologous genes across different
+species. For doing so, it compares similarities of given gene sequences and
+clusters them to find significant groups. The algorithm was designed to handle
+large-scale data and can be applied to hundreds of species at once.")
+ (license license:gpl2+)))
+
(define-public pyicoteo
(package
(name "pyicoteo")
@@ -3707,7 +3799,7 @@ partial genes, and identifies translation initiation sites.")
(define-public roary
(package
(name "roary")
- (version "3.6.8")
+ (version "3.7.0")
(source
(origin
(method url-fetch)
@@ -3716,7 +3808,7 @@ partial genes, and identifies translation initiation sites.")
version ".tar.gz"))
(sha256
(base32
- "0g0pzcv8y7n2w8q7c9q0a7s2ghkwci6w8smg9mjw4agad5cd7yaw"))))
+ "0x2hpb3nfsc6x2nq1788w0fhqfzc7cn2dp4xwyva9m3k6xlz0m43"))))
(build-system perl-build-system)
(arguments
`(#:phases
@@ -7182,6 +7274,29 @@ two-dimensional genome scans.")
libraries for systems that do not have these available via other means.")
(license license:artistic2.0)))
+(define-public r-r4rna
+ (package
+ (name "r-r4rna")
+ (version "0.1.4")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (string-append "http://www.e-rna.org/r-chie/files/R4RNA_"
+ version ".tar.gz"))
+ (sha256
+ (base32
+ "1p0i78wh76jfgmn9jphbwwaz6yy6pipzfg08xs54cxavxg2j81p5"))))
+ (build-system r-build-system)
+ (propagated-inputs
+ `(("r-optparse" ,r-optparse)
+ ("r-rcolorbrewer" ,r-rcolorbrewer)))
+ (home-page "http://www.e-rna.org/r-chie/index.cgi")
+ (synopsis "Analysis framework for RNA secondary structure")
+ (description
+ "The R4RNA package aims to be a general framework for the analysis of RNA
+secondary structure and comparative analysis in R.")
+ (license license:gpl3+)))
+
(define-public r-rhtslib
(package
(name "r-rhtslib")
@@ -7362,6 +7477,141 @@ characterization and visualization of a wide range of mutational patterns
in SNV base substitution data.")
(license license:expat)))
+(define-public r-wgcna
+ (package
+ (name "r-wgcna")
+ (version "1.51")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (cran-uri "WGCNA" version))
+ (sha256
+ (base32
+ "0hzvnhw76vwg8bl8x368f0c5szpwb8323bmrb3bir93i5bmfjsxx"))))
+ (properties `((upstream-name . "WGCNA")))
+ (build-system r-build-system)
+ (propagated-inputs
+ `(("r-annotationdbi" ,r-annotationdbi)
+ ("r-doparallel" ,r-doparallel)
+ ("r-dynamictreecut" ,r-dynamictreecut)
+ ("r-fastcluster" ,r-fastcluster)
+ ("r-foreach" ,r-foreach)
+ ("r-go-db" ,r-go-db)
+ ("r-hmisc" ,r-hmisc)
+ ("r-impute" ,r-impute)
+ ("r-matrixstats" ,r-matrixstats)
+ ("r-preprocesscore" ,r-preprocesscore)))
+ (home-page
+ "http://www.genetics.ucla.edu/labs/horvath/CoexpressionNetwork/Rpackages/WGCNA/")
+ (synopsis "Weighted correlation network analysis")
+ (description
+ "This package provides functions necessary to perform Weighted
+Correlation Network Analysis on high-dimensional data. It includes functions
+for rudimentary data cleaning, construction and summarization of correlation
+networks, module identification and functions for relating both variables and
+modules to sample traits. It also includes a number of utility functions for
+data manipulation and visualization.")
+ (license license:gpl2+)))
+
+(define-public r-chipkernels
+ (let ((commit "c9cfcacb626b1221094fb3490ea7bac0fd625372")
+ (revision "1"))
+ (package
+ (name "r-chipkernels")
+ (version (string-append "1.1-" revision "." (string-take commit 9)))
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/ManuSetty/ChIPKernels.git")
+ (commit commit)))
+ (file-name (string-append name "-" version))
+ (sha256
+ (base32
+ "14bj5qhjm1hsm9ay561nfbqi9wxsa7y487df2idsaaf6z10nw4v0"))))
+ (build-system r-build-system)
+ (propagated-inputs
+ `(("r-iranges" ,r-iranges)
+ ("r-xvector" ,r-xvector)
+ ("r-biostrings" ,r-biostrings)
+ ("r-bsgenome" ,r-bsgenome)
+ ("r-gtools" ,r-gtools)
+ ("r-genomicranges" ,r-genomicranges)
+ ("r-sfsmisc" ,r-sfsmisc)
+ ("r-kernlab" ,r-kernlab)
+ ("r-s4vectors" ,r-s4vectors)
+ ("r-biocgenerics" ,r-biocgenerics)))
+ (home-page "https://github.com/ManuSetty/ChIPKernels")
+ (synopsis "Build string kernels for DNA Sequence analysis")
+ (description "ChIPKernels is an R package for building different string
+kernels used for DNA Sequence analysis. A dictionary of the desired kernel
+must be built and this dictionary can be used for determining kernels for DNA
+Sequences.")
+ (license license:gpl2+))))
+
+(define-public r-seqgl
+ (package
+ (name "r-seqgl")
+ (version "1.1.4")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (string-append "https://github.com/ManuSetty/SeqGL/"
+ "archive/" version ".tar.gz"))
+ (file-name (string-append name "-" version ".tar.gz"))
+ (sha256
+ (base32
+ "0pnk1p3sci5yipyc8xnb6jbmydpl80fld927xgnbcv104hy8h8yh"))))
+ (build-system r-build-system)
+ (propagated-inputs
+ `(("r-biostrings" ,r-biostrings)
+ ("r-chipkernels" ,r-chipkernels)
+ ("r-genomicranges" ,r-genomicranges)
+ ("r-spams" ,r-spams)
+ ("r-wgcna" ,r-wgcna)
+ ("r-fastcluster" ,r-fastcluster)))
+ (home-page "https://github.com/ManuSetty/SeqGL")
+ (synopsis "Group lasso for Dnase/ChIP-seq data")
+ (description "SeqGL is a group lasso based algorithm to extract
+transcription factor sequence signals from ChIP, DNase and ATAC-seq profiles.
+This package presents a method which uses group lasso to discriminate between
+bound and non bound genomic regions to accurately identify transcription
+factors bound at the specific regions.")
+ (license license:gpl2+)))
+
+(define-public r-gkmsvm
+ (package
+ (name "r-gkmsvm")
+ (version "0.71.0")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (cran-uri "gkmSVM" version))
+ (sha256
+ (base32
+ "1zpxgxmf2nd5j5wn00ps6kfxr8wxh7d1swr1rr4spq7sj5z5z0k0"))))
+ (properties `((upstream-name . "gkmSVM")))
+ (build-system r-build-system)
+ (propagated-inputs
+ `(("r-biocgenerics" ,r-biocgenerics)
+ ("r-biostrings" ,r-biostrings)
+ ("r-genomeinfodb" ,r-genomeinfodb)
+ ("r-genomicranges" ,r-genomicranges)
+ ("r-iranges" ,r-iranges)
+ ("r-kernlab" ,r-kernlab)
+ ("r-rcpp" ,r-rcpp)
+ ("r-rocr" ,r-rocr)
+ ("r-rtracklayer" ,r-rtracklayer)
+ ("r-s4vectors" ,r-s4vectors)
+ ("r-seqinr" ,r-seqinr)))
+ (home-page "http://cran.r-project.org/web/packages/gkmSVM")
+ (synopsis "Gapped-kmer support vector machine")
+ (description
+ "This R package provides tools for training gapped-kmer SVM classifiers
+for DNA and protein sequences. This package supports several sequence
+kernels, including: gkmSVM, kmer-SVM, mismatch kernel and wildcard kernel.")
+ (license license:gpl2+)))
+
(define-public emboss
(package
(name "emboss")