summaryrefslogtreecommitdiff
path: root/gnu/packages/bioinformatics.scm
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/packages/bioinformatics.scm')
-rw-r--r--gnu/packages/bioinformatics.scm334
1 files changed, 106 insertions, 228 deletions
diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
index 1836939970..8f6a37a4f8 100644
--- a/gnu/packages/bioinformatics.scm
+++ b/gnu/packages/bioinformatics.scm
@@ -2325,23 +2325,21 @@ files. The code was previously part of the cutadapt tool.")
(define-public cutadapt
(package
(name "cutadapt")
- (version "1.18")
+ (version "2.1")
(source (origin
- (method git-fetch)
- (uri (git-reference
- (url "https://github.com/marcelm/cutadapt.git")
- (commit (string-append "v" version))))
- (file-name (git-file-name name version))
+ (method url-fetch)
+ (uri (pypi-uri "cutadapt" version))
(sha256
(base32
- "08bbfwyc0kvcd95jf2s95xiv9s3cbsxm39ydl0qck3fw3cviwxpg"))))
+ "1vqmsfkm6llxzmsz9wcfcvzx9a9f8iabvwik2rbyn7nc4wm25z89"))))
(build-system python-build-system)
(inputs
`(("python-dnaio" ,python-dnaio)
("python-xopen" ,python-xopen)))
(native-inputs
`(("python-cython" ,python-cython)
- ("python-pytest" ,python-pytest)))
+ ("python-pytest" ,python-pytest)
+ ("python-setuptools-scm" ,python-setuptools-scm)))
(home-page "https://cutadapt.readthedocs.io/en/stable/")
(synopsis "Remove adapter sequences from nucleotide sequencing reads")
(description
@@ -6792,14 +6790,14 @@ databases. Packages produced are intended to be used with AnnotationDbi.")
(define-public r-rbgl
(package
(name "r-rbgl")
- (version "1.58.1")
+ (version "1.58.2")
(source
(origin
(method url-fetch)
(uri (bioconductor-uri "RBGL" version))
(sha256
(base32
- "1l5x2icv9di1lr3gqfi0vjnyd9xc3l77yc42ippqd4cadj3d1pzf"))))
+ "0vhnh47pswnp27c0zqcbnnsayfmq3cxcgrs9g860555ldqfl4cyl"))))
(properties `((upstream-name . "RBGL")))
(build-system r-build-system)
(propagated-inputs `(("r-graph" ,r-graph)))
@@ -7014,29 +7012,6 @@ annotation infrastructure.")
"This package provides a pipeline for the analysis of GRO-seq data.")
(license license:gpl3+)))
-(define-public r-sparql
- (package
- (name "r-sparql")
- (version "1.16")
- (source (origin
- (method url-fetch)
- (uri (cran-uri "SPARQL" version))
- (sha256
- (base32
- "0gak1q06yyhdmcxb2n3v0h9gr1vqd0viqji52wpw211qp6r6dcrc"))))
- (properties `((upstream-name . "SPARQL")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-rcurl" ,r-rcurl)
- ("r-xml" ,r-xml)))
- (home-page "https://cran.r-project.org/web/packages/SPARQL")
- (synopsis "SPARQL client for R")
- (description "This package provides an interface to use SPARQL to pose
-SELECT or UPDATE queries to an end-point.")
- ;; The only license indication is found in the DESCRIPTION file,
- ;; which states GPL-3. So we cannot assume GPLv3+.
- (license license:gpl3)))
-
(define-public vsearch
(package
(name "vsearch")
@@ -7243,32 +7218,6 @@ BLAST, KEGG, GenBank, MEDLINE and GO.")
;; (LGPLv2.1+) and scripts in samples (which have GPL2 and GPL2+)
(license (list license:ruby license:lgpl2.1+ license:gpl2+ ))))
-(define-public r-acsnminer
- (package
- (name "r-acsnminer")
- (version "0.16.8.25")
- (source (origin
- (method url-fetch)
- (uri (cran-uri "ACSNMineR" version))
- (sha256
- (base32
- "0gh604s8qall6zfjlwcg2ilxjvz08dplf9k5g47idhv43scm748l"))))
- (properties `((upstream-name . "ACSNMineR")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-ggplot2" ,r-ggplot2)
- ("r-gridextra" ,r-gridextra)))
- (home-page "https://cran.r-project.org/web/packages/ACSNMineR")
- (synopsis "Gene enrichment analysis")
- (description
- "This package provides tools to compute and represent gene set enrichment
-or depletion from your data based on pre-saved maps from the @dfn{Atlas of
-Cancer Signalling Networks} (ACSN) or user imported maps. The gene set
-enrichment can be run with hypergeometric test or Fisher exact test, and can
-use multiple corrections. Visualization of data can be done either by
-barplots or heatmaps.")
- (license license:gpl2+)))
-
(define-public r-biocinstaller
(package
(name "r-biocinstaller")
@@ -7315,30 +7264,6 @@ categorize packages in a Bioconductor package repository according to keywords,
also known as views, in a controlled vocabulary.")
(license license:artistic2.0)))
-(define-public r-bookdown
- (package
- (name "r-bookdown")
- (version "0.9")
- (source (origin
- (method url-fetch)
- (uri (cran-uri "bookdown" version))
- (sha256
- (base32
- "0vg1s1w0l9pm95asqb21yf39mfk1nc9rdhmlys9xwr7p7i7rsz32"))))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-htmltools" ,r-htmltools)
- ("r-knitr" ,r-knitr)
- ("r-rmarkdown" ,r-rmarkdown)
- ("r-tinytex" ,r-tinytex)
- ("r-yaml" ,r-yaml)
- ("r-xfun" ,r-xfun)))
- (home-page "https://github.com/rstudio/bookdown")
- (synopsis "Authoring books and technical documents with R markdown")
- (description "This package provides output formats and utilities for
-authoring books and technical documents with R Markdown.")
- (license license:gpl3)))
-
(define-public r-biocstyle
(package
(name "r-biocstyle")
@@ -7412,29 +7337,6 @@ functionality.")
checks on R packages that are to be submitted to the Bioconductor repository.")
(license license:artistic2.0)))
-(define-public r-optparse
- (package
- (name "r-optparse")
- (version "1.6.1")
- (source
- (origin
- (method url-fetch)
- (uri (cran-uri "optparse" version))
- (sha256
- (base32
- "04vyb6dhcga30mvghsg1p052jmf69xqxkvh3hzqz7dscyppy76w1"))))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-getopt" ,r-getopt)))
- (home-page
- "https://github.com/trevorld/optparse")
- (synopsis "Command line option parser")
- (description
- "This package provides a command line parser inspired by Python's
-@code{optparse} library to be used with Rscript to write shebang scripts
-that accept short and long options.")
- (license license:gpl2+)))
-
(define-public r-s4vectors
(package
(name "r-s4vectors")
@@ -7462,31 +7364,6 @@ In addition, a few low-level concrete subclasses of general interest (e.g.
S4Vectors package itself.")
(license license:artistic2.0)))
-(define-public r-seqinr
- (package
- (name "r-seqinr")
- (version "3.4-5")
- (source
- (origin
- (method url-fetch)
- (uri (cran-uri "seqinr" version))
- (sha256
- (base32
- "17zv0n5cji17izwmwg0jcbxbjl3w5rls91w15svcnlpxjms38ahn"))))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-ade4" ,r-ade4)
- ("r-segmented" ,r-segmented)))
- (inputs
- `(("zlib" ,zlib)))
- (home-page "http://seqinr.r-forge.r-project.org/")
- (synopsis "Biological sequences retrieval and analysis")
- (description
- "This package provides tools for exploratory data analysis and data
-visualization of biological sequence (DNA and protein) data. It also includes
-utilities for sequence data management under the ACNUC system.")
- (license license:gpl2+)))
-
(define-public r-iranges
(package
(name "r-iranges")
@@ -7888,10 +7765,10 @@ biological sequences or sets of sequences.")
(home-page "https://bioconductor.org/packages/release/bioc/html/Rsamtools.html")
(synopsis "Interface to samtools, bcftools, and tabix")
(description
- "This package provides an interface to the 'samtools', 'bcftools', and
-'tabix' utilities for manipulating SAM (Sequence Alignment / Map), FASTA,
-binary variant call (BCF) and compressed indexed tab-delimited (tabix)
-files.")
+ "This package provides an interface to the @code{samtools},
+@code{bcftools}, and @code{tabix} utilities for manipulating SAM (Sequence
+Alignment / Map), FASTA, binary variant call (BCF) and compressed indexed
+tab-delimited (tabix) files.")
(license license:expat)))
(define-public r-delayedarray
@@ -8039,13 +7916,13 @@ as well as query and modify the browser state, such as the current viewport.")
(define-public r-genomicfeatures
(package
(name "r-genomicfeatures")
- (version "1.34.6")
+ (version "1.34.7")
(source (origin
(method url-fetch)
(uri (bioconductor-uri "GenomicFeatures" version))
(sha256
(base32
- "1cz7qx324dmsrkzyhm956cfgr08gpily5rpym7hc8zz5kbl6i3ra"))))
+ "100y8cx9xfglbn36k25y09y0qfwm0qpb4b01qhk367832rqz5dhz"))))
(properties
`((upstream-name . "GenomicFeatures")))
(build-system r-build-system)
@@ -8587,45 +8464,6 @@ characterization and visualization of a wide range of mutational patterns
in SNV base substitution data.")
(license license:expat)))
-(define-public r-wgcna
- (package
- (name "r-wgcna")
- (version "1.66")
- (source
- (origin
- (method url-fetch)
- (uri (cran-uri "WGCNA" version))
- (sha256
- (base32
- "0rhnyhzfn93yp24jz9v6dzrmyizwzdw070a7idm0k33w1cm8sjqv"))))
- (properties `((upstream-name . "WGCNA")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-annotationdbi" ,r-annotationdbi)
- ("r-doparallel" ,r-doparallel)
- ("r-dynamictreecut" ,r-dynamictreecut)
- ("r-fastcluster" ,r-fastcluster)
- ("r-foreach" ,r-foreach)
- ("r-go-db" ,r-go-db)
- ("r-hmisc" ,r-hmisc)
- ("r-impute" ,r-impute)
- ("r-rcpp" ,r-rcpp)
- ("r-robust" ,r-robust)
- ("r-survival" ,r-survival)
- ("r-matrixstats" ,r-matrixstats)
- ("r-preprocesscore" ,r-preprocesscore)))
- (home-page
- "http://www.genetics.ucla.edu/labs/horvath/CoexpressionNetwork/Rpackages/WGCNA/")
- (synopsis "Weighted correlation network analysis")
- (description
- "This package provides functions necessary to perform Weighted
-Correlation Network Analysis on high-dimensional data. It includes functions
-for rudimentary data cleaning, construction and summarization of correlation
-networks, module identification and functions for relating both variables and
-modules to sample traits. It also includes a number of utility functions for
-data manipulation and visualization.")
- (license license:gpl2+)))
-
(define-public r-chipkernels
(let ((commit "c9cfcacb626b1221094fb3490ea7bac0fd625372")
(revision "1"))
@@ -8693,39 +8531,6 @@ bound and non bound genomic regions to accurately identify transcription
factors bound at the specific regions.")
(license license:gpl2+)))
-(define-public r-gkmsvm
- (package
- (name "r-gkmsvm")
- (version "0.79.0")
- (source
- (origin
- (method url-fetch)
- (uri (cran-uri "gkmSVM" version))
- (sha256
- (base32
- "04dakbgfvfalz4rm4fvvybp506dn5fbj5g86ybfhrc6wywjllsz3"))))
- (properties `((upstream-name . "gkmSVM")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-biocgenerics" ,r-biocgenerics)
- ("r-biostrings" ,r-biostrings)
- ("r-genomeinfodb" ,r-genomeinfodb)
- ("r-genomicranges" ,r-genomicranges)
- ("r-iranges" ,r-iranges)
- ("r-kernlab" ,r-kernlab)
- ("r-rcpp" ,r-rcpp)
- ("r-rocr" ,r-rocr)
- ("r-rtracklayer" ,r-rtracklayer)
- ("r-s4vectors" ,r-s4vectors)
- ("r-seqinr" ,r-seqinr)))
- (home-page "https://cran.r-project.org/web/packages/gkmSVM")
- (synopsis "Gapped-kmer support vector machine")
- (description
- "This R package provides tools for training gapped-kmer SVM classifiers
-for DNA and protein sequences. This package supports several sequence
-kernels, including: gkmSVM, kmer-SVM, mismatch kernel and wildcard kernel.")
- (license license:gpl2+)))
-
(define-public r-tximport
(package
(name "r-tximport")
@@ -10384,14 +10189,14 @@ defining LD blocks.")
(define-public r-gqtlstats
(package
(name "r-gqtlstats")
- (version "1.14.0")
+ (version "1.14.1")
(source
(origin
(method url-fetch)
(uri (bioconductor-uri "gQTLstats" version))
(sha256
(base32
- "1sg9kw59dlayj7qxql9pd93d4hmml504sa3kkfpzfh3xri7m5pxf"))))
+ "1rkbnb3h02fdksc4nacqvmq4jgbj9fz4hm7j51yr2ggcgcykwraa"))))
(properties `((upstream-name . "gQTLstats")))
(build-system r-build-system)
(propagated-inputs
@@ -10625,14 +10430,14 @@ block processing.")
(define-public r-rhdf5lib
(package
(name "r-rhdf5lib")
- (version "1.4.2")
+ (version "1.4.3")
(source
(origin
(method url-fetch)
(uri (bioconductor-uri "Rhdf5lib" version))
(sha256
(base32
- "06bxd3wz8lrvh2hzvmjpdv4lvzj5lz9353bw5b3zb98cb8w9r2j5"))
+ "0hjhjvg2kss71fkmxlbgnyyy1agwzgq57rxkgkm4riw82x2rvw7q"))
(modules '((guix build utils)))
(snippet
'(begin
@@ -10861,14 +10666,14 @@ memory usage and processing time is minimized.")
(define-public r-phangorn
(package
(name "r-phangorn")
- (version "2.4.0")
+ (version "2.5.3")
(source
(origin
(method url-fetch)
(uri (cran-uri "phangorn" version))
(sha256
(base32
- "0xc8k552nxczy19jr0xjjagrzc8x6lafasgk2c099ls8bc1yml1i"))))
+ "1bv86yfk5r015s7ij6v4zz7bagwrw9m13yfs5853drxb19d5h1m3"))))
(build-system r-build-system)
(propagated-inputs
`(("r-ape" ,r-ape)
@@ -11075,23 +10880,31 @@ with narrow binding events such as transcription factor ChIP-seq.")
(define-public trim-galore
(package
(name "trim-galore")
- (version "0.4.5")
+ (version "0.6.1")
(source
(origin
(method git-fetch)
(uri (git-reference
(url "https://github.com/FelixKrueger/TrimGalore.git")
(commit version)))
- (file-name (string-append name "-" version "-checkout"))
+ (file-name (git-file-name name version))
(sha256
(base32
- "0x5892l48c816pf00wmnz5vq0zq6170d3xc8zrxncd4jcz7h1p71"))))
+ "1y31wbxwkm9xqzr5zv1pk5q418whnmlmgmfyxxpnl12h83m2i9iv"))))
(build-system gnu-build-system)
(arguments
`(#:tests? #f ; no tests
#:phases
(modify-phases %standard-phases
- (delete 'configure)
+ (replace 'configure
+ (lambda _
+ ;; Trim Galore tries to figure out what version of Python
+ ;; cutadapt is using by looking at the shebang. Of course that
+ ;; doesn't work, because cutadapt is wrapped in a shell script.
+ (substitute* "trim_galore"
+ (("my \\$python_return.*")
+ "my $python_return = \"Python 3.999\";\n"))
+ #t))
(delete 'build)
(add-after 'unpack 'hardcode-tool-references
(lambda* (#:key inputs #:allow-other-keys)
@@ -11100,14 +10913,18 @@ with narrow binding events such as transcription factor ChIP-seq.")
(string-append "$path_to_cutadapt = '"
(assoc-ref inputs "cutadapt")
"/bin/cutadapt'"))
- (("\\| gzip")
- (string-append "| "
+ (("\\$compression_path = \"gzip\"")
+ (string-append "$compression_path = \""
(assoc-ref inputs "gzip")
- "/bin/gzip"))
+ "/bin/gzip\""))
(("\"gunzip")
(string-append "\""
(assoc-ref inputs "gzip")
- "/bin/gunzip")))
+ "/bin/gunzip"))
+ (("\"pigz")
+ (string-append "\""
+ (assoc-ref inputs "pigz")
+ "/bin/pigz")))
#t))
(replace 'install
(lambda* (#:key outputs #:allow-other-keys)
@@ -11119,6 +10936,7 @@ with narrow binding events such as transcription factor ChIP-seq.")
(inputs
`(("gzip" ,gzip)
("perl" ,perl)
+ ("pigz" ,pigz)
("cutadapt" ,cutadapt)))
(native-inputs
`(("unzip" ,unzip)))
@@ -13461,15 +13279,41 @@ in RNA-seq data.")
(define-public python-scanpy
(package
(name "python-scanpy")
- (version "1.2.2")
+ (version "1.4")
+ ;; Fetch from git because the pypi tarball does not include tests.
(source
(origin
- (method url-fetch)
- (uri (pypi-uri "scanpy" version))
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/theislab/scanpy.git")
+ (commit version)))
+ (file-name (git-file-name name version))
(sha256
(base32
- "1ak7bxms5a0yvf65prppq2g38clkv7c7jnjbnfpkh3xxv7q512jz"))))
+ "0zn6x6c0cnm1a20i6isigwb51g3pr9zpjk8r1minjqnxi5yc9pm4"))))
(build-system python-build-system)
+ (arguments
+ `(#:phases
+ (modify-phases %standard-phases
+ (replace 'check
+ (lambda* (#:key inputs #:allow-other-keys)
+ ;; These tests require Internet access.
+ (delete-file-recursively "scanpy/tests/notebooks")
+ (delete-file "scanpy/tests/test_clustering.py")
+
+ ;; TODO: No module named 'louvain'
+ (delete-file "scanpy/tests/test_rank_genes_groups_logreg.py")
+
+ ;; TODO: I can't get the plotting tests to work, even with Xvfb.
+ (delete-file "scanpy/tests/test_plotting.py")
+ (delete-file "scanpy/tests/test_preprocessing.py")
+ (delete-file "scanpy/tests/test_read_10x.py")
+
+ (setenv "PYTHONPATH"
+ (string-append (getcwd) ":"
+ (getenv "PYTHONPATH")))
+ (invoke "pytest")
+ #t)))))
(propagated-inputs
`(("python-anndata" ,python-anndata)
("python-igraph" ,python-igraph)
@@ -13485,7 +13329,9 @@ in RNA-seq data.")
("python-seaborn" ,python-seaborn)
("python-h5py" ,python-h5py)
("python-tables" ,python-tables)))
- (home-page "http://github.com/theislab/scanpy")
+ (native-inputs
+ `(("python-pytest" ,python-pytest)))
+ (home-page "https://github.com/theislab/scanpy")
(synopsis "Single-Cell Analysis in Python.")
(description "Scanpy is a scalable toolkit for analyzing single-cell gene
expression data. It includes preprocessing, visualization, clustering,
@@ -13494,6 +13340,38 @@ Python-based implementation efficiently deals with datasets of more than one
million cells.")
(license license:bsd-3)))
+(define-public python-bbknn
+ (package
+ (name "python-bbknn")
+ (version "1.3.1")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (pypi-uri "bbknn" version))
+ (sha256
+ (base32
+ "1qgdganvj3lyxj84v7alm23b9vqhwpn8z0115qndpnpy90qxynwz"))))
+ (build-system python-build-system)
+ (propagated-inputs
+ `(("python-annoy" ,python-annoy)
+ ("python-cython" ,python-cython)
+ ("python-faiss" ,python-faiss)
+ ("python-numpy" ,python-numpy)
+ ("python-scanpy" ,python-scanpy)))
+ (home-page "https://github.com/Teichlab/bbknn")
+ (synopsis "Batch balanced KNN")
+ (description "BBKNN is a batch effect removal tool that can be directly
+used in the Scanpy workflow. It serves as an alternative to
+@code{scanpy.api.pp.neighbors()}, with both functions creating a neighbour
+graph for subsequent use in clustering, pseudotime and UMAP visualisation. If
+technical artifacts are present in the data, they will make it challenging to
+link corresponding cell types across different batches. BBKNN actively
+combats this effect by splitting your data into batches and finding a smaller
+number of neighbours for each cell within each of the groups. This helps
+create connections between analogous cells in different batches without
+altering the counts or PCA space.")
+ (license license:expat)))
+
(define-public gffcompare
(let ((commit "be56ef4349ea3966c12c6397f85e49e047361c41")
(revision "1"))