summaryrefslogtreecommitdiff
path: root/gnu/packages/bioinformatics.scm
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/packages/bioinformatics.scm')
-rw-r--r--gnu/packages/bioinformatics.scm1357
1 files changed, 1070 insertions, 287 deletions
diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
index 3a4bcd093a..4198acecda 100644
--- a/gnu/packages/bioinformatics.scm
+++ b/gnu/packages/bioinformatics.scm
@@ -1,5 +1,5 @@
;;; GNU Guix --- Functional package management for GNU
-;;; Copyright © 2014-2023 Ricardo Wurmus <rekado@elephly.net>
+;;; Copyright © 2014-2024 Ricardo Wurmus <rekado@elephly.net>
;;; Copyright © 2015, 2016, 2017, 2018 Ben Woodcroft <donttrustben@gmail.com>
;;; Copyright © 2015, 2016, 2018, 2019, 2020 Pjotr Prins <pjotr.guix@thebird.nl>
;;; Copyright © 2015 Andreas Enge <andreas@enge.fr>
@@ -8,7 +8,7 @@
;;; Copyright © 2016, 2020, 2022 Marius Bakke <marius@gnu.org>
;;; Copyright © 2016, 2018 Raoul Bonnal <ilpuccio.febo@gmail.com>
;;; Copyright © 2017, 2018 Tobias Geerinckx-Rice <me@tobias.gr>
-;;; Copyright © 2017, 2021, 2022 Arun Isaac <arunisaac@systemreboot.net>
+;;; Copyright © 2017, 2021, 2022, 2024 Arun Isaac <arunisaac@systemreboot.net>
;;; Copyright © 2018 Joshua Sierles, Nextjournal <joshua@nextjournal.com>
;;; Copyright © 2018 Gábor Boskovits <boskovits@gmail.com>
;;; Copyright © 2018-2023 Mădălin Ionel Patrașcu <madalinionel.patrascu@mdc-berlin.de>
@@ -130,6 +130,7 @@
#:use-module (gnu packages pdf)
#:use-module (gnu packages perl)
#:use-module (gnu packages perl-check)
+ #:use-module (gnu packages perl-web)
#:use-module (gnu packages pkg-config)
#:use-module (gnu packages popt)
#:use-module (gnu packages protobuf)
@@ -161,6 +162,7 @@
#:use-module (gnu packages time)
#:use-module (gnu packages tls)
#:use-module (gnu packages uglifyjs)
+ #:use-module (gnu packages video)
#:use-module (gnu packages vim)
#:use-module (gnu packages web)
#:use-module (gnu packages wget)
@@ -573,6 +575,30 @@ BED, GFF/GTF, VCF.")
whole-genome bisulfite sequencing (WGBS) reads from directional protocol.")
(license license:asl2.0)))
+(define-public bustools
+ (package
+ (name "bustools")
+ (version "0.43.2")
+ (source (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/BUStools/bustools")
+ (commit (string-append "v" version))))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "068kjlc4d528269nl5mc3j8h2c95r1v545d3fi1iw1ckg8rba0hg"))))
+ (build-system cmake-build-system)
+ (arguments (list #:tests? #f)) ;no test target
+ (inputs (list zlib))
+ (home-page "https://bustools.github.io")
+ (synopsis "Tools for working with BUS files")
+ (description "bustools is a program for manipulating BUS files for single
+cell RNA-Seq datasets. It can be used to error correct barcodes, collapse
+UMIs, produce gene count or transcript compatibility count matrices, and is useful
+for many other tasks.")
+ (license license:bsd-2)))
+
(define-public cellsnp-lite
;; Last release is from November 2021 and does not contain fixes.
(let ((commit "0885d746b0b1ea65c8ef92f8943ca7669ca9734a")
@@ -616,6 +642,50 @@ Compared to cellSNP, this package is more efficient with higher speed and less
memory usage.")
(license license:asl2.0))))
+(define-public cpat
+ (package
+ (name "cpat")
+ (version "3.0.4")
+ (source (origin
+ (method url-fetch)
+ (uri (pypi-uri "CPAT" version))
+ (sha256
+ (base32
+ "0dfrwwbhv1n4nh2a903d1qfb30fgxgya89sa70aci3wzf8h2z0vd"))
+ (modules '((guix build utils)))
+ (snippet
+ '(for-each delete-file-recursively
+ (list ".eggs"
+ "lib/__pycache__/"
+ "lib/cpmodule/__pycache__/")))))
+ (build-system pyproject-build-system)
+ (arguments
+ (list
+ #:phases
+ '(modify-phases %standard-phases
+ (replace 'check
+ (lambda* (#:key tests? #:allow-other-keys)
+ (when tests?
+ (with-directory-excursion "test"
+ ;; There is no test4.fa
+ (substitute* "test.sh"
+ ((".*-g test4.fa.*") ""))
+ (invoke "bash" "test.sh"))))))))
+ (propagated-inputs
+ (list python-numpy python-pysam))
+ (inputs
+ (list r-minimal))
+ (home-page "https://wlcb.oit.uci.edu/cpat/")
+ (synopsis "Alignment-free distinction between coding and noncoding RNA")
+ (description
+ "CPAT is a method to distinguish coding and noncoding RNA by using a
+logistic regression model based on four pure sequence-based, linguistic
+features: ORF size, ORF coverage, Ficket TESTCODE, and Hexamer usage bias.
+Linguistic features based method does not require other genomes or protein
+databases to perform alignment and is more robust. Because it is
+alignment-free, it runs much faster and also easier to use.")
+ (license license:gpl2+)))
+
(define-public pbcopper
(package
(name "pbcopper")
@@ -876,6 +946,38 @@ attributes of microbiome data - zero-inflation and over-dispersion, are
simultaneously considered.")
(license license:gpl3))))
+(define-public r-ewastools
+ (let ((commit "f7646cacd73266708479b3fea5d625054d179f95")
+ (revision "1"))
+ (package
+ (name "r-ewastools")
+ (version (git-version "1.7.2" revision commit))
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/hhhh5/ewastools/")
+ (commit commit)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32 "0irarlnxfnasa755adxsn67rxsy01zwhjhw18g4cag08cqiyyw41"))))
+ (properties `((upstream-name . "ewastools")))
+ (build-system r-build-system)
+ (propagated-inputs
+ (list r-data-table
+ r-igraph
+ r-illuminaio
+ r-mblm
+ r-quadprog))
+ (native-inputs (list r-knitr))
+ (home-page "https://github.com/hhhh5/ewastools/")
+ (synopsis
+ "Quality control toolset for the Illumina Infinium DNA methylation")
+ (description
+ "This package provides a collection of useful functions for working
+with DNA methylation micro-array data.")
+ (license license:unlicense))))
+
(define-public r-numbat
(let ((commit "4ab7752e7d267a3f443756675728521a9b0a7295")
(revision "1"))
@@ -1037,12 +1139,48 @@ of single-cell data using Seurat, RcppML nmf, SingleCellExperiments and
similar.")
(license license:gpl2+))))
+(define-public r-stacas
+ (package
+ (name "r-stacas")
+ (version "2.2.0")
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/carmonalab/STACAS")
+ (commit version)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32 "13i0h5i6vlbrb8ndq9gr81560z9d74b2c7m3rjfzls01irjza9hm"))))
+ (properties `((upstream-name . "STACAS")))
+ (build-system r-build-system)
+ (propagated-inputs
+ (list r-biocneighbors
+ r-biocparallel
+ r-ggplot2
+ r-ggridges
+ r-pbapply
+ r-r-utils
+ r-seurat))
+ (home-page "https://github.com/carmonalab/STACAS")
+ (synopsis "Sub-type anchoring correction for alignment in Seurat")
+ (description
+ "This package implements methods for batch correction and integration of
+scRNA-seq datasets, based on the Seurat anchor-based integration framework.
+In particular, STACAS is optimized for the integration of heterogenous
+datasets with only limited overlap between cell sub-types (e.g. TIL sets of
+CD8 from tumor with CD8/CD4 T cells from lymphnode), for which the default
+Seurat alignment methods would tend to over-correct biological differences.
+The 2.0 version of the package allows the users to incorporate explicit
+information about cell-types in order to assist the integration process.")
+ (license license:gpl3)))
+
(define-public r-stringendo
- (let ((commit "83b8f2d82a09b33b9e895438bb523a021138be01")
+ (let ((commit "15594b1bba11048a812874bafec0eea1dcc8618a")
(revision "1"))
(package
(name "r-stringendo")
- (version (git-version "0.3.4" revision commit))
+ (version (git-version "0.6.0" revision commit))
(source (origin
(method git-fetch)
(uri (git-reference
@@ -1051,10 +1189,10 @@ similar.")
(file-name (git-file-name name version))
(sha256
(base32
- "1ap0nhbyd6xx0yl2vgmwk38p22yrkv4k9hw13r35z4wf343rry6v"))))
+ "15ij4zf2j9c8m9n4bqhmxkchjh2bhddwjfxngfpwv7c5wjqyi6ir"))))
(properties `((upstream-name . "Stringendo")))
(build-system r-build-system)
- (propagated-inputs (list r-devtools r-usethis))
+ (propagated-inputs (list r-clipr))
(home-page "https://github.com/vertesy/Stringendo")
(synopsis "Stringendo is a string parsing library")
(description
@@ -1063,11 +1201,11 @@ plotnames, filenames and paths.")
(license license:gpl3))))
(define-public r-readwriter
- (let ((commit "71454f4aa706f5d2fbe606acd95abc14224e7058")
+ (let ((commit "91373c44641014a1ce8e1c3e928747608aae8f54")
(revision "1"))
(package
(name "r-readwriter")
- (version (git-version "0.2.9" revision commit))
+ (version (git-version "1.5.3" revision commit))
(source (origin
(method git-fetch)
(uri (git-reference
@@ -1076,11 +1214,11 @@ plotnames, filenames and paths.")
(file-name (git-file-name name version))
(sha256
(base32
- "0sp27smhdva2hi2x0svia2l56k8xrh7p5akn78g5b0lcvz4x3hd7"))))
+ "156kvmplrip0w1zhs9yl5r0ayjipa0blhy614l65hbsjn1lwbskr"))))
(properties `((upstream-name . "ReadWriter")))
(build-system r-build-system)
(propagated-inputs
- (list r-gdata r-gtools r-openxlsx r-readr r-stringendo))
+ (list r-gtools r-openxlsx r-readr r-stringendo))
(home-page "https://github.com/vertesy/ReadWriter")
(synopsis "Functions to read and write files conveniently")
(description
@@ -1123,6 +1261,32 @@ shape. This package provides an @code{htmlwidget} for building streamgraph
visualizations.")
(license license:expat))))
+(define-public r-wasabi
+ (let ((commit "8c33cabde8d18c2657cd6e38e7cb834f87cf9846")
+ (revision "1"))
+ (package
+ (name "r-wasabi")
+ (version (git-version "1.0.1" revision commit))
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/COMBINE-lab/wasabi")
+ (commit commit)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32 "0rpdj6n4cnx8n2zl60dzgl638474sg49dknwi9x3qb4g56dpphfa"))))
+ (properties `((upstream-name . "wasabi")))
+ (build-system r-build-system)
+ (propagated-inputs (list r-data-table r-rhdf5 r-rjson))
+ (home-page "https://github.com/COMBINE-lab/wasabi")
+ (synopsis "Use Sailfish and Salmon with Sleuth")
+ (description
+ "This package converts the output of the Sailfish and Salmon RNA-seq
+quantification tools so that it can be used with the Sleuth differential
+analysis package.")
+ (license license:bsd-3))))
+
(define-public pbbam
(package
(name "pbbam")
@@ -1757,6 +1921,8 @@ biological activities from omics data within a unified framework.")
(base32
"1bhyxqjk44bmyd26m1smapf68wyf7252kk65i27k50dd3kswgnd6"))))
(build-system pyproject-build-system)
+ ;; There are no tests.
+ (arguments (list #:tests? #false))
(propagated-inputs
(list python-docopt
python-importlib-metadata
@@ -1946,6 +2112,29 @@ matplotlib Axes objects, making them easy to style and incorporate into
multi-panel figures.")
(license license:expat)))
+(define-public python-parabam
+ (package
+ (name "python-parabam")
+ (version "3.0.1")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (pypi-uri "parabam" version))
+ (sha256
+ (base32 "1cy9q3gzdawi1kilycpd7waymjmrwsg8czwycfp13g301ir9xyp3"))
+ (modules '((guix build utils)))
+ (snippet
+ '(substitute* "setup.py"
+ (("'argparse',") "")))))
+ (build-system pyproject-build-system)
+ (propagated-inputs (list python-numpy python-pysam))
+ (home-page "https://github.com/cancerit/parabam")
+ (synopsis "Parallel BAM File Analysis")
+ (description "Parabam is a tool for processing sequencing files in
+parallel. It uses Python's native multiprocessing framework to apply a user
+defined rule on an input file.")
+ (license license:gpl3)))
+
(define-public python-peaks2utr
(package
(name "python-peaks2utr")
@@ -1993,6 +2182,8 @@ three prime UTR.")
(base32
"0gqygspdy398vjymdy6756jmk99s7fhwav9rivdx59kpqjcdxaz9"))))
(build-system pyproject-build-system)
+ ;; There are no tests.
+ (arguments (list #:tests? #false))
(propagated-inputs
(list python-anndata
python-docopt
@@ -2226,6 +2417,66 @@ sequencing data and the end result are tables of UMI-unique DamID and CEL-Seq
counts.")
(license license:expat)))
+(define-public python-snaptools
+ (package
+ (name "python-snaptools")
+ (version "1.4.8")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (pypi-uri "snaptools" version))
+ (sha256
+ (base32
+ "1s5373g5jjbshh3q39zy7dlxr7nda6ksxq9d1gw46h82c4fsmfbn"))))
+ (build-system pyproject-build-system)
+ (arguments (list #:tests? #false)) ;there are none
+ (propagated-inputs
+ (list python-future
+ python-h5py
+ python-louvain
+ python-numpy
+ python-pybedtools
+ python-pysam))
+ (home-page "https://github.com/r3fang/SnapTools")
+ (synopsis "Tools for processing snap files" )
+ (description
+ "@code{SnapTools} can operate on snap files the following types of
+operations:
+
+@itemize
+@item index the reference genome before alignment;
+@item align reads to the corresponding reference genome;
+@item pre-process by convert pair-end reads into fragments, checking the
+ mapping quality score, alingment and filtration;
+@item create the cell-by-bin matrix.
+@end itemize")
+ (license license:asl2.0)))
+
+(define-public python-telomerecat
+ (package
+ (name "python-telomerecat")
+ (version "4.0.2")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (pypi-uri "telomerecat" version))
+ (sha256
+ (base32 "16mfdqmp0j6g3h26h59334w9lqb4qihqrlzwvgznj0fiqs1rkxn2"))))
+ (build-system pyproject-build-system)
+ (arguments (list #:tests? #false)) ;there are none
+ (propagated-inputs (list python-click python-numpy python-pandas
+ python-parabam python-pysam))
+ (home-page "https://github.com/cancerit/telomerecat")
+ (synopsis "Telomere computational analysis tool")
+ (description "Telomerecat is a tool for estimating the average telomere
+length (TL) for a paired end, whole genome sequencing (WGS) sample.
+
+Telomerecat is adaptable, accurate and fast. The algorithm accounts for
+sequencing amplification artifacts, anneouploidy (common in cancer samples)
+and noise generated by WGS. For a high coverage WGS BAM file of around 100GB
+telomerecat can produce an estimate in ~1 hour.")
+ (license license:gpl3)))
+
(define-public python-bioframe
(package
(name "python-bioframe")
@@ -2401,6 +2652,7 @@ alignments and perform the following operations:
(base32
"15m6ffks4zwpp1ycwk6n02py6mw2yh7qr0vhpc178b91gldr97ia"))))
(build-system pyproject-build-system)
+ (arguments (list #:tests? #false)) ;there are none
(propagated-inputs (list python-pandas))
(home-page "https://github.com/alexomics/read-paf")
(synopsis "Minimap2 PAF file reader")
@@ -3767,19 +4019,27 @@ UCSC genome browser.")
(define-public python-plastid
(package
(name "python-plastid")
- (version "0.6.0")
+ (version "0.6.1")
(source (origin
(method git-fetch)
(uri (git-reference
(url "https://github.com/joshuagryphon/plastid")
- (commit (string-append "v" version))))
+ (commit "d97f239d73b3a7c2eff46f71928b777431891f90")))
(file-name (git-file-name name version))
(sha256
(base32
- "1ka9j08j6i105l89w8b7sg0l8lm3lcrxzy4cjl5dp4cxdmycap62"))))
+ "0iccpywlpf1ws46279z9rl0l29pil0rj0g2j5nvqq7jfbnq581cf"))))
(build-system pyproject-build-system)
(arguments
(list
+ #:test-flags
+ '(list "plastid/test"
+ ;; These four failures look like errors in the test wrapper
+ ;; class.
+ "-k" (string-append "not test_chrom_sizes"
+ " and not test_no_crash_if_file_not_exist"
+ " and not test_fiveprime_variable"
+ " and not test_fiveprime_variable_from_file"))
#:phases
#~(modify-phases %standard-phases
(add-after 'unpack 'unpack-test-data
@@ -4255,6 +4515,107 @@ annotations of the genome.")
other types of unwanted sequence from high-throughput sequencing reads.")
(license license:expat)))
+(define-public lammps
+ (let ((commit "stable_2Aug2023_update2"))
+ (package
+ (name "lammps")
+ (version (string-append "0." commit))
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/lammps/lammps.git")
+ (commit commit)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "11xagacgxgldkx34qdzyjrjvn8x3hpl0kgzhh9zh7skpq79pwycz"))))
+ (build-system gnu-build-system)
+ (arguments
+ (list
+ #:tests? #f ; no check target
+ #:make-flags
+ '(list "CC=mpicc" "mpi"
+ "LMP_INC=-DLAMMPS_GZIP \
+-DLAMMPS_JPEG -DLAMMPS_PNG -DLAMMPS_FFMPEG -DLAMMPS_MEMALIGN=64"
+ "LIB=-gz -ljpeg -lpng -lavcodec")
+ #:phases
+ #~(modify-phases %standard-phases
+ (add-after 'unpack 'chdir
+ (lambda _ (chdir "src")))
+ (replace 'configure
+ (lambda _
+ (substitute* "MAKE/Makefile.mpi"
+ (("SHELL =.*")
+ (string-append "SHELL=" (which "bash") "\n"))
+ (("cc ") "mpicc "))
+ (substitute* "Makefile"
+ (("SHELL =.*")
+ (string-append "SHELL=" (which "bash") "\n")))))
+ (add-after 'configure 'configure-modules
+ (lambda _
+ (invoke "make"
+ "yes-molecule"
+ "yes-misc"
+ "yes-granular"
+ (string-append "HDF5_PATH="
+ #$(this-package-input "hdf5")))))
+ (replace 'install
+ (lambda _
+ (let ((bin (string-append #$output "/bin")))
+ (mkdir-p bin)
+ (install-file "lmp_mpi" bin)))))))
+ (inputs
+ (list ffmpeg
+ gfortran
+ gzip
+ hdf5
+ libjpeg-turbo
+ libpng
+ openmpi
+ python-wrapper))
+ (native-inputs (list bc))
+ (home-page "https://www.lammps.org/")
+ (synopsis "Classical molecular dynamics simulator")
+ (description "LAMMPS is a classical molecular dynamics simulator
+designed to run efficiently on parallel computers. LAMMPS has potentials for
+solid-state materials (metals, semiconductors), soft matter (biomolecules,
+polymers), and coarse-grained or mesoscopic systems. It can be used to model
+atoms or, more generically, as a parallel particle simulator at the atomic,
+meso, or continuum scale.")
+ (license license:gpl2+))))
+
+(define-public lammps-serial
+ (package
+ (inherit lammps)
+ (name "lammps-serial")
+ (arguments
+ (substitute-keyword-arguments (package-arguments lammps)
+ ((#:make-flags flags)
+ '(list "CC=gcc" "serial"
+ "LMP_INC=-DLAMMPS_GZIP \
+-DLAMMPS_JPEG -DLAMMPS_PNG -DLAMMPS_FFMPEG -DLAMMPS_MEMALIGN=64"
+ "LIB=-gz -ljpeg -lpng -lavcodec"))
+ ((#:phases phases)
+ #~(modify-phases #$phases
+ (replace 'configure
+ (lambda _
+ (substitute* "MAKE/Makefile.serial"
+ (("SHELL =.*")
+ (string-append "SHELL=" (which "bash") "\n"))
+ (("cc ") "gcc "))
+ (substitute* "Makefile"
+ (("SHELL =.*")
+ (string-append "SHELL=" (which "bash") "\n")))))
+ (replace 'install
+ (lambda _
+ (let ((bin (string-append #$output "/bin")))
+ (mkdir-p bin)
+ (install-file "lmp_serial" bin))))))))
+ (inputs
+ (modify-inputs (package-inputs lammps)
+ (delete "openmpi")))))
+
(define-public libbigwig
(package
(name "libbigwig")
@@ -4374,58 +4735,43 @@ and record oriented data modeling and the Semantic Web.")
(define-public python-scikit-bio
(package
(name "python-scikit-bio")
- (version "0.5.7")
+ (version "0.5.9")
(source (origin
(method url-fetch)
(uri (pypi-uri "scikit-bio" version))
(sha256
(base32
- "1a8xbp3vrw8wfpm3pa2nb4rcar0643iqnb043ifwqbqyc86clhv3"))))
+ "0429060pkyq1pm19zb2n1la7czh7b633mp4a4h01j8zfigf49q3s"))
+ (patches (search-patches "python-scikit-bio-1887.patch"))))
(build-system pyproject-build-system)
(arguments
(list
+ ;; Accuracy problem
+ #:test-flags '(list "-k" "not test_fisher_alpha")
#:phases
'(modify-phases %standard-phases
- ;; See https://github.com/biocore/scikit-bio/pull/1826
(add-after 'unpack 'compatibility
(lambda _
- (substitute* "skbio/sequence/tests/test_sequence.py"
- (("def test_concat_strict_many")
- "def _do_not_test_concat_strict_many"))
- (substitute* "skbio/stats/distance/_mantel.py"
- (("from scipy.stats import PearsonRConstantInputWarning")
- "from scipy.stats import ConstantInputWarning")
- (("from scipy.stats import PearsonRNearConstantInputWarning")
- "from scipy.stats import NearConstantInputWarning")
- (("from scipy.stats import SpearmanRConstantInputWarning") "")
- (("warnings.warn\\(PearsonRConstantInputWarning\\(\\)\\)")
- "warnings.warn(ConstantInputWarning())")
- (("warnings.warn\\(PearsonRNearConstantInputWarning\\(\\)\\)")
- "warnings.warn(NearConstantInputWarning())")
- (("warnings.warn\\(SpearmanRConstantInputWarning\\(\\)\\)")
- "warnings.warn(ConstantInputWarning())"))
- (substitute* "skbio/diversity/alpha/tests/test_base.py"
- (("self.assertEqual\\(pielou_e")
- "self.assertAlmostEqual(pielou_e"))))
+ (substitute* "skbio/diversity/__init__.py"
+ ((", numeric_only=True") ""))))
(add-before 'check 'build-extensions
(lambda _
;; Cython extensions have to be built before running the tests.
(invoke "python3" "setup.py" "build_ext" "--inplace")))
(replace 'check
- (lambda* (#:key tests? #:allow-other-keys)
- (when tests? (invoke "python3" "-m" "skbio.test")))))))
+ (lambda* (#:key tests? test-flags #:allow-other-keys)
+ (when tests?
+ (apply invoke "python3" "-m" "skbio.test" test-flags)))))))
(propagated-inputs
- (list python-cachecontrol
- python-decorator
+ (list python-decorator
python-h5py
python-hdmedians
python-ipython
- python-lockfile
python-matplotlib
python-natsort
python-numpy
python-pandas
- python-scikit-learn
+ python-requests
python-scipy))
(native-inputs
(list python-coverage python-pytest))
@@ -4759,6 +5105,126 @@ be of arbitrary length. Repeats with pattern size in the range from 1 to 2000
bases are detected.")
(license license:agpl3+)))
+(define-public trinityrnaseq
+ (package
+ (name "trinityrnaseq")
+ (version "2.13.2")
+ (source (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/trinityrnaseq/trinityrnaseq.git")
+ (commit (string-append "Trinity-v" version))
+ (recursive? #true)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "1qszrxqbx4q5pavpgm4rkrh1z1v1mf7qx83vv3fnlqdmncnsf1gv"))))
+ (build-system gnu-build-system)
+ (arguments
+ (list
+ #:test-target "test"
+ #:modules
+ '((guix build gnu-build-system)
+ (guix build utils)
+ (ice-9 match)
+ (srfi srfi-1))
+ #:make-flags
+ #~(list (string-append "CC=" #$(cc-for-target)))
+ #:phases
+ #~(modify-phases %standard-phases
+ (replace 'configure
+ (lambda _
+ (setenv "SHELL" (which "sh"))
+ (setenv "CONFIG_SHELL" (which "sh"))
+ ;; Do not require version.h, which triggers a local build of a
+ ;; vendored htslib.
+ (substitute* "trinity-plugins/bamsifter/Makefile"
+ (("sift_bam_max_cov.cpp htslib/version.h")
+ "sift_bam_max_cov.cpp"))))
+ (add-after 'build 'build-plugins
+ (lambda _
+ ;; Run this in the subdirectory to avoid running the
+ ;; tests right here.
+ (with-directory-excursion "trinity-plugins"
+ (invoke "make" "plugins"))))
+ ;; The install script uses rsync, provides no overrides for the
+ ;; default location at /usr/local/bin, and patching it would change
+ ;; all lines that do something.
+ (replace 'install
+ (lambda* (#:key inputs #:allow-other-keys)
+ (let ((share (string-append #$output "/share/trinity/"))
+ (bin (string-append #$output "/bin/")))
+ (mkdir-p bin)
+ (copy-recursively "." share)
+ (delete-file (string-append share "/Chrysalis/build/CMakeFiles/CMakeOutput.log"))
+ (delete-file (string-append share "/Inchworm/build/CMakeFiles/CMakeOutput.log"))
+
+ (wrap-program (string-append share "Trinity")
+ `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))
+ `("PERL5LIB" ":" = (,(getenv "PERL5LIB")))
+ `("PYTHONPATH" ":" = (,(getenv "GUIX_PYTHONPATH")))
+ `("PATH" ":" =
+ ,(cons (string-append share "/trinity-plugins/BIN")
+ (filter-map (match-lambda
+ ((name . dir)
+ (string-append dir "/bin")))
+ inputs))))
+ (symlink (string-append share "Trinity")
+ (string-append bin "Trinity"))))))))
+ (inputs
+ (list blast+
+ bowtie
+ fastqc
+ hisat
+ htslib
+ icedtea-8
+ jellyfish
+ kallisto
+ multiqc
+ perl
+ perl-uri-escape
+ python-numpy
+ python-wrapper
+ r-ape
+ r-argparse
+ r-biobase
+ r-ctc
+ r-deseq2
+ r-edger
+ r-fastcluster
+ r-glimma
+ r-goplot
+ r-goseq
+ r-gplots
+ r-minimal
+ r-qvalue
+ r-rots
+ r-sm
+ r-tidyverse
+ rsem
+ salmon
+ samtools
+ sra-tools
+ star
+ zlib))
+ (propagated-inputs
+ (list coreutils
+ gzip
+ which))
+ (native-inputs (list cmake))
+ (home-page "https://github.com/trinityrnaseq/trinityrnaseq/wiki")
+ (synopsis "Trinity RNA-Seq de novo transcriptome assembly")
+ (description "Trinity assembles transcript sequences from Illumina RNA-Seq
+data. Trinity represents a novel method for the efficient and robust de novo
+reconstruction of transcriptomes from RNA-seq data. Trinity combines three
+independent software modules: Inchworm, Chrysalis, and Butterfly, applied
+sequentially to process large volumes of RNA-seq reads. Trinity partitions
+the sequence data into many individual de Bruijn graphs, each representing the
+transcriptional complexity at a given gene or locus, and then processes each
+graph independently to extract full-length splicing isoforms and to tease
+apart transcripts derived from paralogous genes.")
+ (license license:bsd-3)))
+
(define-public repeat-masker
(package
(name "repeat-masker")
@@ -5334,6 +5800,8 @@ Illumina paired-end data (for CASAVA 1.8+).")
(base32
"124kdg7168nbh4a5wisfws1fgkd89dd4js9v6dml2lvgclbv4mjg"))))
(build-system pyproject-build-system)
+ ;; This contains two test data files but no way to run the tests.
+ (arguments (list #:tests? #false))
(propagated-inputs
(list python-biopython python-matplotlib python-pandas
python-scipy python-seaborn))
@@ -6691,7 +7159,7 @@ performance.")
(define-public htscodecs
(package
(name "htscodecs")
- (version "1.5.1")
+ (version "1.6.0")
(source (origin
(method url-fetch)
(uri (string-append "https://github.com/samtools/htscodecs/"
@@ -6699,7 +7167,7 @@ performance.")
version "/htscodecs-" version ".tar.gz"))
(sha256
(base32
- "0nykdf08wil6iiihgf5qlb04n70yv4zqqj7c27vpnpwpr2r2ns62"))))
+ "1h0827g9svil9jnjbpdlxjbl44rai5b95m61hs9ifbqrz9nvnjjb"))))
(build-system gnu-build-system)
(inputs (list bzip2 zlib))
(home-page "https://github.com/samtools/htscodecs")
@@ -6717,7 +7185,7 @@ name/ID compression and quality score compression derived from fqzcomp.")
(define-public htslib
(package
(name "htslib")
- (version "1.16")
+ (version "1.19")
(source (origin
(method url-fetch)
(uri (string-append
@@ -6725,7 +7193,7 @@ name/ID compression and quality score compression derived from fqzcomp.")
version "/htslib-" version ".tar.bz2"))
(sha256
(base32
- "093r1n4s134k50m9a925yn95gyi90ps5dlgc6gq4qwvkzxx7qsv0"))
+ "0dh79lwpspwwfbkmllrrhbk8nkvlfc5b5ib4d0xg5ld79w6c8lc7"))
(snippet
#~(begin
(use-modules (guix build utils))
@@ -8017,20 +8485,20 @@ to the user's query of interest.")
(define-public samtools
(package
(name "samtools")
- (version "1.14")
+ (version "1.19")
(source
(origin
(method url-fetch)
(uri
- (string-append "mirror://sourceforge/samtools/samtools/"
- version "/samtools-" version ".tar.bz2"))
+ (string-append "https://github.com/samtools/samtools"
+ "/releases/download/" version
+ "/samtools-" version ".tar.bz2"))
(sha256
(base32
- "0x3xdda78ac5vx66b3jdsv9sfhyz4npl4znl1zbaf3lbm6xdlhck"))
+ "10wby07w33rfypy4kf73v9wwnbyh0lrazbsmrgrvcl88w8c3nszs"))
(modules '((guix build utils)))
- (snippet '(begin
- ;; Delete bundled htslib.
- (delete-file-recursively "htslib-1.14")))))
+ ;; Delete bundled htslib.
+ (snippet '(delete-file-recursively "htslib-1.19"))))
(build-system gnu-build-system)
(arguments
`(#:configure-flags (list "--with-ncurses")
@@ -8053,6 +8521,26 @@ variant calling (in conjunction with bcftools), and a simple alignment
viewer.")
(license license:expat)))
+(define-public samtools-1.14
+ (package/inherit samtools
+ (version "1.14")
+ (source
+ (origin
+ (method url-fetch)
+ (uri
+ (string-append "mirror://sourceforge/samtools/samtools/"
+ version "/samtools-" version ".tar.bz2"))
+ (sha256
+ (base32
+ "0x3xdda78ac5vx66b3jdsv9sfhyz4npl4znl1zbaf3lbm6xdlhck"))
+ (modules '((guix build utils)))
+ (snippet '(begin
+ ;; Delete bundled htslib.
+ (delete-file-recursively "htslib-1.14")))))
+ (native-inputs (list pkg-config))
+ (inputs
+ (list htslib-1.14 ncurses perl python zlib))))
+
(define-public samtools-1.12
(package/inherit samtools
(version "1.12")
@@ -9891,6 +10379,51 @@ tasks.")
Pore-C concatemers.")
(license license:gpl3))))
+(define-public r-dnamcrosshyb
+ ;; There aren't any releases.
+ (let ((commit "fe8acb33667e81f00dcb84e0fa75c87ab2db5d8f")
+ (revision "1"))
+ (package
+ (name "r-dnamcrosshyb")
+ (version (git-version "0.0.0.9000" revision commit))
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/pjhop/DNAmCrosshyb")
+ (commit commit)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32 "12j1xsiqpvny5rp23z1az0k4cj5ajbcwkg65z00s16vywi2rx6nb"))))
+ (properties `((upstream-name . "DNAmCrosshyb")))
+ (build-system r-build-system)
+ (propagated-inputs
+ (list r-biocgenerics
+ r-biocparallel
+ r-biostrings
+ r-bsgenome-hsapiens-ucsc-hg19-masked
+ r-bsgenome-hsapiens-ucsc-hg38-masked
+ r-dplyr
+ r-genomicranges
+ r-ggplot2
+ r-iranges
+ r-magrittr
+ r-minfi
+ r-purrr
+ r-s4vectors
+ r-shiny
+ r-stringi
+ r-stringr
+ r-tibble
+ r-tidyr
+ r-watermelon))
+ (home-page "https://github.com/pjhop/DNAmCrosshyb")
+ (synopsis "DNAmCrosshyb")
+ (description
+ "This package provides helper functions to detect cross-hybridization
+on Illumina DNAm arrays.")
+ (license license:gpl3))))
+
(define-public r-doubletcollection
(let ((commit "c0d62f1853942ee6a087eaf7b000d9e4261e2dfd")
(revision "1"))
@@ -10134,6 +10667,51 @@ data. This package includes panel editing or renaming for FCS files,
bead-based normalization and debarcoding.")
(license license:gpl3))))
+(define-public r-projectils
+ (let ((commit "cc73b97471b4b6eea11ce779b5c4a7dc5c3e1709")
+ (revision "1"))
+ (package
+ (name "r-projectils")
+ (version (git-version "3.0.0" revision commit))
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/carmonalab/ProjecTILs")
+ (commit commit)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32 "0dpzvbhhb9andnj7angpj32cgkwd6rs6qgpl6i21pqzcn6vqqhqw"))))
+ (properties `((upstream-name . "ProjecTILs")))
+ (build-system r-build-system)
+ (propagated-inputs
+ (list r-biocneighbors
+ r-biocparallel
+ r-dplyr
+ r-ggplot2
+ r-matrix
+ r-patchwork
+ r-pheatmap
+ r-pracma
+ r-purrr
+ r-rcolorbrewer
+ r-reshape2
+ r-scales
+ r-scgate
+ r-seurat
+ r-seuratobject
+ r-stacas
+ r-ucell
+ r-umap
+ r-uwot))
+ (home-page "https://github.com/carmonalab/ProjecTILs")
+ (synopsis "Reference-based analysis of scRNA-seq data")
+ (description
+ "This package implements methods to project single-cell RNA-seq data
+onto a reference atlas, enabling interpretation of unknown cell transcriptomic
+states in the the context of known, reference states.")
+ (license license:gpl3))))
+
(define-public r-presto
(let ((commit "052085db9c88aa70a28d11cc58ebc807999bf0ad")
(revision "0"))
@@ -10168,55 +10746,54 @@ auROC analysis.")
(license license:gpl3))))
(define-public r-sccustomize
- (let ((commit "8414d1f5fb32277855b0619191a568932b7baeb0")
+ (let ((commit "397374590dae2ccc0c560897dcd1ce4382c18798")
(revision "1"))
(package
(name "r-sccustomize")
- (version (git-version "0.7.0" revision commit))
- (source (origin
- (method git-fetch)
- (uri (git-reference
- (url "https://github.com/samuel-marsh/scCustomize")
- (commit commit)))
- (file-name (git-file-name name version))
- (sha256
- (base32
- "1wcgfq7lx83a2kf8pjbw524gdvxf351n08cwd5wzmmy57kf4knbj"))))
+ (version (git-version "2.0.1" revision commit))
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/samuel-marsh/scCustomize")
+ (commit commit)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32 "11bafm0mlck27fqd8brz80pxb8dc5q0aqbp8zv0s9sx97njp7wsl"))))
(properties `((upstream-name . "scCustomize")))
(build-system r-build-system)
- (propagated-inputs
- (list r-circlize
- r-colorway
- r-cowplot
- r-data-table
- r-dittoseq
- r-dplyr
- r-forcats
- r-ggbeeswarm
- r-ggplot2
- r-ggprism
- r-ggpubr
- r-ggrastr
- r-ggrepel
- r-glue
- r-janitor
- r-magrittr
- r-matrix
- r-paletteer
- r-patchwork
- r-pbapply
- r-purrr
- r-remotes
- r-scales
- r-scattermore
- r-seurat
- r-seuratobject
- r-stringi
- r-stringr
- r-tibble
- r-tidyr
- r-tidyselect
- r-viridis))
+ (propagated-inputs (list r-circlize
+ r-colorway
+ r-cowplot
+ r-data-table
+ r-dittoseq
+ r-dplyr
+ r-forcats
+ r-ggbeeswarm
+ r-ggplot2
+ r-ggprism
+ r-ggpubr
+ r-ggrastr
+ r-ggrepel
+ r-glue
+ r-janitor
+ r-magrittr
+ r-matrix
+ r-paletteer
+ r-patchwork
+ r-pbapply
+ r-purrr
+ r-remotes
+ r-scales
+ r-scattermore
+ r-seurat
+ r-seuratobject
+ r-stringi
+ r-stringr
+ r-tibble
+ r-tidyr
+ r-tidyselect
+ r-viridis))
(native-inputs (list r-knitr))
(home-page "https://github.com/samuel-marsh/scCustomize")
(synopsis "Custom visualization and analyses of single-cell sequencing")
@@ -10349,6 +10926,43 @@ analysis of cell types, subtypes, transcriptional gradients,cell-cycle
variation, gene modules and their regulatory models and more.")
(license license:expat))))
+(define-public r-sleuth
+ (package
+ (name "r-sleuth")
+ (version "0.30.1")
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/pachterlab/sleuth")
+ (commit (string-append "v" version))))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32 "09xgc7r6iisjkk0c0wn0q56zy0aph386kphwixfzq4422y7vlqci"))))
+ (properties `((upstream-name . "sleuth")))
+ (build-system r-build-system)
+ (propagated-inputs (list r-aggregation
+ r-data-table
+ r-dplyr
+ r-ggplot2
+ r-lazyeval
+ r-matrixstats
+ r-pheatmap
+ r-reshape2
+ r-rhdf5
+ r-shiny
+ r-tidyr))
+ (native-inputs (list r-knitr))
+ (home-page "https://github.com/pachterlab/sleuth")
+ (synopsis "Tools for investigating RNA-Seq")
+ (description
+ "Sleuth is a program for differential analysis of RNA-Seq data.
+It makes use of quantification uncertainty estimates obtained via Kallisto for
+accurate differential analysis of isoforms or genes, allows testing in the
+context of experiments with complex designs, and supports interactive
+exploratory data analysis via sleuth live.")
+ (license license:gpl3)))
+
(define-public r-snapatac
(package
(name "r-snapatac")
@@ -10579,6 +11193,65 @@ single-cell data.")
"This package is designed to streamline scATAC analyses in R.")
(license license:gpl2+))))
+(define-public r-azimuth
+ (let ((commit "243ee5db80fcbffa3452c944254a325a3da2ef9e")
+ (revision "1"))
+ (package
+ (name "r-azimuth")
+ (version (git-version "0.5.0" revision commit))
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/satijalab/azimuth")
+ (commit commit)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32 "0vs4813kf2cv2j1z800ihwk62bw4dgmwgkd47rln58581chv39zm"))))
+ (properties `((upstream-name . "Azimuth")))
+ (build-system r-build-system)
+ (propagated-inputs (list r-bsgenome-hsapiens-ucsc-hg38
+ r-dt
+ r-ensdb-hsapiens-v86
+ r-future
+ r-ggplot2
+ r-glmgampoi
+ r-googlesheets4
+ r-hdf5r
+ r-htmltools
+ r-httr
+ r-jaspar2020
+ r-jsonlite
+ r-matrix
+ r-patchwork
+ r-plotly
+ r-presto
+ r-rcpp
+ r-rlang
+ r-scales
+ r-seurat
+ r-seuratdata
+ r-seuratdisk
+ r-seuratobject
+ r-shiny
+ r-shinybs
+ r-shinydashboard
+ r-shinyjs
+ r-signac
+ r-stringr
+ r-tfbstools
+ r-withr))
+ (home-page "https://github.com/satijalab/azimuth")
+ (synopsis
+ "Shiny app showcasing a single-cell data query-reference mapping algorithm")
+ (description "Azimuth utilizes an annotated reference dataset. It
+automates the processing, analysis, and interpretation. This applies
+specifically to new single-cell RNA-seq or ATAC-seq experiments. Azimuth
+leverages a reference-based mapping pipeline that inputs acounts matrix and
+performs normalization, visualization, cell annotation, and differential
+expression.")
+ (license license:gpl3))))
+
(define-public r-icellnet
;; v1.0 tagged in 2020, last commit contains many fixes.
;; DESCRIPTION says Version: 0.0.0.9000.
@@ -10665,6 +11338,40 @@ of transcriptional heterogeneity among single cells.")
;; See https://github.com/hms-dbmi/scde/issues/38
(license license:gpl2)))
+(define-public r-miamiplot
+ (let ((commit "beede9c5d6431b4d822aa42e064e01baeb5dd4a0")
+ (revision "1"))
+ (package
+ (name "r-miamiplot")
+ (version (git-version "1.1.0" revision commit))
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/juliedwhite/miamiplot")
+ (commit commit)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32 "0wxxk1lk9jbf0imf59qp302ffasvs84idinkvzirs3dw9w3589n9"))))
+ (properties `((upstream-name . "miamiplot")))
+ (build-system r-build-system)
+ (propagated-inputs (list r-checkmate
+ r-dplyr
+ r-ggplot2
+ r-ggrepel
+ r-gridextra
+ r-magrittr
+ r-rlang))
+ (native-inputs (list r-knitr))
+ (home-page "https://github.com/juliedwhite/miamiplot")
+ (synopsis "Create a ggplot2 miami plot")
+ (description
+ "This package generates a Miami plot with centered chromosome labels.
+The output is a ggplot2 object. Users can specify which data they want
+plotted on top vs. bottom, whether to display significance line(s), what
+colors to give chromosomes, and what points to label.")
+ (license license:gpl2))))
+
(define-public r-millefy
(package
(name "r-millefy")
@@ -12499,6 +13206,7 @@ applications for tackling some common problems in a user-friendly way.")
(invoke "python3" "test/test_all.py")))))))
(native-inputs
(list `(,glib "bin") ;for gtester
+ glib
pkg-config))
(inputs
;; TODO: add Chimera for visualization
@@ -15238,7 +15946,9 @@ activity prediction from transcriptomics data, and its R implementation
(add-after 'unpack 'set-HOME
(lambda _ (setenv "HOME" "/tmp"))))))
(propagated-inputs
- (list r-complexheatmap
+ (list r-basilisk
+ r-basilisk-utils
+ r-complexheatmap
r-dplyr
r-ggplot2
r-magrittr
@@ -15546,6 +16256,41 @@ analysing cytometry data in R.")
spatial single-cell expression data.")
(license license:expat))))
+;; Variant of r-illuminahumanmethylationepicmanifest in the
+;; (gnu packages bioconductor) module.
+(define-public r-illuminahumanmethylationepicmanifest-latest
+ (let ((commit "a9ffbad36f5e496ece6c4c37b80e2f4f7e02d0c3")
+ (revision "1"))
+ (package
+ (name "r-illuminahumanmethylationepicmanifest")
+ (version (git-version "1.0.0" revision commit))
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url
+ "https://github.com/achilleasNP/IlluminaHumanMethylationEPICmanifest")
+ (commit commit)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32 "0v8f0hl0v8gwi61vgqw56rn5j09h95hj54rb8pzbn0znm162n4fc"))))
+ (properties `((upstream-name . "IlluminaHumanMethylationEPICmanifest")))
+ (build-system r-build-system)
+ (home-page
+ "https://github.com/achilleasNP/IlluminaHumanMethylationEPICmanifest")
+ (synopsis "Illumina Human Methylation Manifest 1.0 B5 for R and minfi")
+ (description
+ "This is a drop-in replacement for the
+@code{IlluminaHumanMethylationEPIC} package. It utilizes a Manifest based on
+1.0B5 annotation. As of version 0.3.0, the
+@code{IlluminaHumanMethylationEPIC} package still employs the 1.0B2 annotation
+manifest. A corresponding annotation package,
+@code{IlluminaHumanMethylationEPICanno.ilm10b5.hg38}, is available to ensure
+proper annotation. The decision to maintain the same name is due to
+complications in downstream processing caused by array name lookup in certain
+preprocessing options.")
+ (license license:artistic2.0))))
+
(define-public r-illuminahumanmethylationepicanno-ilm10b5-hg38
(let ((commit "3db06910e27f626e0cc8b335ff45cf9a4050a36a")
(revision "1"))
@@ -15727,7 +16472,7 @@ implementation differs in these ways:
(define-public python-scanpy
(package
(name "python-scanpy")
- (version "1.9.1")
+ (version "1.9.6")
(source
(origin
(method git-fetch)
@@ -15737,21 +16482,28 @@ implementation differs in these ways:
(file-name (git-file-name name version))
(sha256
(base32
- "0k524xnx3dvpz5yx65p316wghvi01zs17is8w2m3w2qywiswk0sl"))))
+ "12rz0a9151fkry6ws1a8p5wnc4n5qbjl6xlynj7kxy223iz8isds"))))
(build-system pyproject-build-system)
(arguments
(list
#:test-flags
'(list "-k"
;; Plot tests that fail.
- (string-append "not test_dotplot_matrixplot_stacked_violin"
- " and not test_violin_without_raw"
- " and not test_correlation"
- " and not test_scatterplots"
- " and not test_scatter_embedding_add_outline_vmin_vmax_norm"
- " and not test_paga"
+ (string-append "not test_clustermap"
+ " and not test_dotplot_matrixplot_stacked_violin"
" and not test_paga_compare"
- " and not test_clustermap"
+ " and not test_paga_path"
+ " and not test_paga_pie"
+ " and not test_paga_plots"
+ " and not test_violin"
+ " and not test_scatter_no_basis_per_obs"
+
+ ;; Type mismatch
+ " and not test_obs_df"
+ " and not test_var_df"
+
+ ;; Minor accuracy problem
+ " and not test_consistency[morans_i-allclose]"
;; These try to connect to the network
" and not test_scrublet_plots"
@@ -15783,10 +16535,6 @@ implementation differs in these ways:
;; These two fail with "ValueError: I/O operation on closed file."
(delete-file "scanpy/tests/test_neighbors_key_added.py")
- ;; TODO: these fail with TypingError and "Use of unsupported
- ;; NumPy function 'numpy.split'".
- (delete-file "scanpy/tests/test_metrics.py")
-
;; The following tests requires 'scanorama', which isn't
;; packaged yet.
(delete-file "scanpy/tests/external/test_scanorama_integrate.py")
@@ -15827,8 +16575,11 @@ implementation differs in these ways:
`(;; This package needs anndata.tests, which is not installed.
("python-anndata:source" ,(package-source python-anndata))
("python-flit" ,python-flit)
+ ("python-hatchling" ,python-hatchling)
+ ("python-hatch-vcs" ,python-hatch-vcs)
("python-leidenalg" ,python-leidenalg)
("python-pytest" ,python-pytest)
+ ("python-pytest-nunit" ,python-pytest-nunit)
("python-setuptools-scm" ,python-setuptools-scm)))
(home-page "https://github.com/theislab/scanpy")
(synopsis "Single-Cell Analysis in Python")
@@ -15842,36 +16593,34 @@ million cells.")
(define-public python-bbknn
(package
(name "python-bbknn")
- (version "1.5.1")
+ (version "1.6.0")
(source
(origin
(method url-fetch)
(uri (pypi-uri "bbknn" version))
(sha256
(base32
- "0q11xdmjr2kf6f179a6kjizj3lllfrq743gslgw67qyzimvrrnhn"))))
- (build-system python-build-system)
+ "06q43cpi7wi6f2d2jqs8f9rbd94pg1hh7978gm92mi9gvzbaj08w"))))
+ (build-system pyproject-build-system)
(arguments
- `(#:tests? #f ; no tests are included
- #:phases
- (modify-phases %standard-phases
+ (list
+ #:tests? #f ;no tests are included
+ #:phases
+ '(modify-phases %standard-phases
;; Numba needs a writable dir to cache functions.
(add-before 'check 'set-numba-cache-dir
(lambda _
- (setenv "NUMBA_CACHE_DIR" "/tmp")))
- (add-after 'unpack 'do-not-fail-to-find-sklearn
- (lambda _
- ;; XXX: I have no idea why it cannot seem to find sklearn.
- (substitute* "setup.py"
- (("'sklearn'") "")))))))
+ (setenv "NUMBA_CACHE_DIR" "/tmp"))))))
(propagated-inputs
(list python-annoy
python-cython
python-numpy
python-pandas
+ python-pynndescent
python-scikit-learn
python-scipy
python-umap-learn))
+ (native-inputs (list python-flit-core))
(home-page "https://github.com/Teichlab/bbknn")
(synopsis "Batch balanced KNN")
(description "BBKNN is a batch effect removal tool that can be directly
@@ -16176,55 +16925,26 @@ fasta subsequences.")
(define-public python-cooler
(package
(name "python-cooler")
- (version "0.8.11")
+ (version "0.9.1")
(source
(origin
(method url-fetch)
(uri (pypi-uri "cooler" version))
(sha256
(base32
- "1i96fmpsimj4wrx51rxn8lw2gqxf5a2pvrj5rwdd6ivnm3pmhyrn"))))
- (build-system python-build-system)
+ "0capn4jj3mkxfwcc65cg644zvrv4sqr2wxr0ylx5w767jx3yb7p2"))))
+ (build-system pyproject-build-system)
(arguments
- `(#:phases
- (modify-phases %standard-phases
- ;; cooler requests cytoolz<0.11. It only uses cytoolz for "compose",
- ;; which composes two functions.
- (add-after 'unpack 'use-recent-cytoolz
- (lambda _
- (substitute* '("requirements.txt"
- "cooler.egg-info/requires.txt")
- (("cytoolz.*<.*0.11") "cytoolz"))))
- ;; This version of flake8 just won't work with this version of
- ;; pytest, because of dependency pinning.
- (add-after 'unpack 'do-not-use-flake8
- (lambda _
- (substitute* "setup.cfg"
- (("addopts = --flake8") "addopts = "))))
- (add-after 'unpack 'patch-tests
- (lambda _
- (substitute* "tests/test_create.py"
- (("def test_roundtrip")
- (string-append "@pytest.mark.skip(reason=\"requires network "
- "access to genome.ucsc.edu\")\n"
- "def test_roundtrip")))
- (substitute* "tests/test_util.py"
- (("def test_fetch_chromsizes")
- (string-append "@pytest.mark.skip(reason=\"requires network "
- "access to genome.ucsc.edu\")\n"
- "def test_fetch_chromsizes"))
- ;; See https://github.com/open2c/cooler/issues/287
- (("skipif\\(six.PY2, reason=\"Scipy on Py2 is too old\"")
- "skip(reason=\"Scipy is too new\""))
- ;; This test depends on ipytree, which contains a lot of minified
- ;; JavaScript.
- (substitute* "tests/test_fileops.py"
- (("def test_print_trees")
- "def _test_print_trees"))))
- (replace 'check
- (lambda* (#:key tests? #:allow-other-keys)
- (when tests?
- (invoke "python" "-m" "pytest" "-v")))))))
+ (list
+ #:test-flags
+ '(list "-k"
+ (string-append
+ ;; These tests download files from the internet.
+ "not test_fetch_chromsizes"
+ " and not test_roundtrip"
+ ;; This test depends on ipytree, which contains a lot of
+ ;; minified JavaScript.
+ " and not test_print_trees"))))
(propagated-inputs
(list python-asciitree
python-biopython
@@ -16240,12 +16960,13 @@ fasta subsequences.")
python-pysam
python-pyyaml
python-scipy
- python-simplejson
- python-six
- python-sparse))
+ python-simplejson))
(native-inputs
- (list python-codecov python-mock python-pytest python-pytest-cov
- python-pytest-flake8))
+ (list python-coverage
+ python-hatchling
+ python-isort
+ python-pytest
+ python-pytest-cov))
;; Almost all the projects of the Mirnylab are moved under Open2C umbrella
(home-page "https://github.com/open2c/cooler")
(synopsis "Sparse binary format for genomic interaction matrices")
@@ -16302,7 +17023,7 @@ includes operations like compartment, insulation or peak calling.")
(define-public python-hicmatrix
(package
(name "python-hicmatrix")
- (version "16")
+ (version "17.1")
(source
(origin
;;Pypi sources do not contain any test
@@ -16313,16 +17034,8 @@ includes operations like compartment, insulation or peak calling.")
(file-name (git-file-name name version))
(sha256
(base32
- "00b9l62j4knrsdp7l3pawi9cqcsl09diycbhmmnar850bzssmq4f"))))
+ "14gq7r9b64ff56l5f8h8zc2i2y3xri646jl0anb74japqxrwvlna"))))
(build-system pyproject-build-system)
- (arguments
- (list
- #:phases
- '(modify-phases %standard-phases
- (add-after 'unpack 'remove-invalid-syntax
- (lambda _
- (substitute* "setup.py"
- ((".\\*\"") "\"")))))))
(propagated-inputs
(list python-cooler
python-intervaltree
@@ -16353,7 +17066,16 @@ the HiCExplorer and pyGenomeTracks packages.")
(file-name (git-file-name name version))
(sha256
(base32
- "1yavgxry38g326z10bclvdf8glmma05fxj5m73h15m1r2l9xmw3v"))))
+ "1yavgxry38g326z10bclvdf8glmma05fxj5m73h15m1r2l9xmw3v"))
+ (modules '((guix build utils)))
+ ;; setup.py is malformed. The requirements are defined using a catchall
+ ;; pattern for the patch version number. This has been fixed in version
+ ;; 3.7.3, but we cannot upgrade to this version yet, since some Guix
+ ;; packages are not new enough. (See upstream commit
+ ;; 4845c715ec7b105e938d0c2426e27d0181690bfe for the fix).
+ (snippet '(substitute* "setup.py"
+ (("\\.\\*")
+ "")))))
(build-system pyproject-build-system)
(arguments
(list
@@ -16431,7 +17153,18 @@ the HiCExplorer and pyGenomeTracks packages.")
"general/test_hicHyperoptDetectLoopsHiCCUPS.py"
"general/test_hicAggregateContacts.py"
"general/test_hicInterIntraTAD.py")
- (("^memory =.*") "memory = 1\n"))))))))
+ (("^memory =.*") "memory = 1\n")))))
+ ;; This is fixed in version 3.7.3, but we cannot upgrade yet as we
+ ;; don't have Pandas 2.
+ (add-after 'unpack 'scipy-compatibility
+ (lambda _
+ (substitute* "hicexplorer/hicAverageRegions.py"
+ (("from scipy.sparse import csr_matrix, save_npz, lil_matrix")
+ "from scipy.sparse import csr_matrix, save_npz, lil_matrix, coo_matrix")
+ (("summed_matrix = np.array\\(summed_matrix\\)")
+ "summed_matrix = coo_matrix(summed_matrix)")
+ (("data = summed_matrix\\[np.nonzero\\(summed_matrix\\)\\]")
+ "data = summed_matrix.toarray()[np.nonzero(summed_matrix)]")))))))
(propagated-inputs
(list python-biopython
python-cleanlab-1
@@ -17165,7 +17898,7 @@ polymorphisms) and indels with respect to a reference genome and more.")
(define-public cnvkit
(package
(name "cnvkit")
- (version "0.9.9")
+ (version "0.9.10")
(source
(origin
(method git-fetch)
@@ -17174,17 +17907,8 @@ polymorphisms) and indels with respect to a reference genome and more.")
(commit (string-append "v" version))))
(file-name (git-file-name name version))
(sha256
- (base32 "1q4l7jhr1k135an3n9aa9wsid5lk6fwxb0hcldrr6v6y76zi4gj1"))))
+ (base32 "0r303pqjg70zpxa564bavbfj99c6di0dafgqqwx2vh4vfsiif94q"))))
(build-system pyproject-build-system)
- (arguments
- (list
- #:phases
- '(modify-phases %standard-phases
- ;; See upstream commit eee0f6eaec57d5c6e58142d661979f3aacc5f76a
- (add-after 'unpack 'compatibility
- (lambda _
- (substitute* "setup.py"
- (("'joblib.*") "")))))))
(propagated-inputs
(list python-biopython
python-future
@@ -17199,6 +17923,7 @@ polymorphisms) and indels with respect to a reference genome and more.")
python-scipy
;; R packages
r-dnacopy))
+ (inputs (list r-minimal)) ;for tests
(home-page "https://cnvkit.readthedocs.org/")
(synopsis "Copy number variant detection from targeted DNA sequencing")
(description
@@ -17747,12 +18472,40 @@ The tool enables the de novo search for new structural elements and
facilitates comparative analysis of known RNA families.")
(license license:bsd-3)))
+(define-public r-databaselinke-r
+ (let ((commit "cf3d6cc3d36f2e1c9a557390232e9a8ed5abb7fd")
+ (revision "1"))
+ (package
+ (name "r-databaselinke-r")
+ (version (git-version "1.7.0" revision commit))
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/vertesy/DatabaseLinke.R")
+ (commit commit)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32 "0hk76sb3w1v8a7c1knpc572ypsbgqlrv0p49c9y55a0dr12n16s1"))))
+ (properties `((upstream-name . "DatabaseLinke.R")))
+ (build-system r-build-system)
+ (propagated-inputs (list r-readwriter))
+ (home-page "https://github.com/vertesy/DatabaseLinke.R")
+ (synopsis
+ "Parse links to databases from your list of gene symbols")
+ (description
+ "This package provides a set of functions to parse and open (search
+query) links to genomics related and other websites for R. Useful when you
+want to explore e.g.: the function of a set of differentially expressed
+genes.")
+ (license license:gpl3))))
+
(define-public r-seurat-utils
- (let ((commit "0b6f5b548a49148cfbeaa654e8a618c0a020afa5")
+ (let ((commit "c0374cc9e25ce391ba8013fda0f8c7babbb9201d")
(revision "1"))
(package
(name "r-seurat-utils")
- (version (git-version "1.6.5" revision commit))
+ (version (git-version "2.5.0" revision commit))
(source (origin
(method git-fetch)
(uri (git-reference
@@ -17761,12 +18514,15 @@ facilitates comparative analysis of known RNA families.")
(file-name (git-file-name name version))
(sha256
(base32
- "1mn64h375mkj6x4ix5493z32gqg96yc507j5jr0lx9g5wk1bf762"))))
+ "15l86b43q245gzz7gsr5rhs4sir74lc14d64yqxfqcb0zrb2bzzd"))))
(properties `((upstream-name . "Seurat.utils")))
(build-system r-build-system)
(propagated-inputs (list r-codeandroll2
r-cowplot
+ r-databaselinke-r
r-dplyr
+ r-enhancedvolcano
+ r-foreach
r-ggcorrplot
r-ggexpress
r-ggplot2
@@ -17774,15 +18530,21 @@ facilitates comparative analysis of known RNA families.")
r-ggrepel
r-hgnchelper
r-htmlwidgets
+ r-job
+ r-magrittr
r-markdownhelpers
r-markdownreports
r-matrix
r-matrixstats
+ r-pheatmap
+ r-plotly
r-princurve
+ r-qs
r-r-utils
r-readr
r-readwriter
r-reshape2
+ r-rstudioapi
r-scales
r-seurat
r-soupx
@@ -17791,6 +18553,7 @@ facilitates comparative analysis of known RNA families.")
r-stringr
r-tibble
r-tictoc
+ r-tidyverse
r-vroom))
(home-page "https://github.com/vertesy/Seurat.utils")
(synopsis "Collection of utility functions for Seurat")
@@ -17842,7 +18605,7 @@ updated much more frequently.")
(define-public python-ctxcore
(package
(name "python-ctxcore")
- (version "0.1.1")
+ (version "0.2.0")
(source
(origin
(method git-fetch)
@@ -17852,8 +18615,8 @@ updated much more frequently.")
(file-name (git-file-name name version))
(sha256
(base32
- "16nlj7z8pirgjad7vlgm7226b3hpw4a7n967vyfg26dsf5n8k70d"))))
- (build-system python-build-system)
+ "0nv4lc46cnzpg5gcdxrsv7b4srmkq55zl3rcadw5pn3yyz5fzd2k"))))
+ (build-system pyproject-build-system)
(arguments
(list
#:phases
@@ -17865,12 +18628,12 @@ updated much more frequently.")
(setenv "SETUPTOOLS_SCM_PRETEND_VERSION" #$version))))))
(propagated-inputs
(list python-cytoolz
- python-numba
python-frozendict
+ python-numba
python-numpy
python-pandas
+ python-pyarrow
python-pyyaml
- python-pyarrow-0.16
python-tqdm))
(native-inputs
(list python-pytest
@@ -17918,67 +18681,77 @@ tree-based ensemble regressors.")
(license license:bsd-3)))
(define-public pyscenic
- (package
- (name "pyscenic")
- (version "0.11.2")
- (source
- (origin
- (method git-fetch)
- (uri (git-reference
- (url "https://github.com/aertslab/pySCENIC")
- (commit version)))
- (file-name (git-file-name name version))
- (sha256
- (base32
- "0pbmmr1zdb1vbbs6wx357s59d13pna6x03wq8blj6ckjws8bbq73"))))
- (build-system python-build-system)
- (arguments
- `(#:phases
- (modify-phases %standard-phases
- ;; Numba needs a writable dir to cache functions.
- (add-before 'check 'set-numba-cache-dir
- (lambda _
- (setenv "NUMBA_CACHE_DIR" "/tmp")))
- (replace 'check
- (lambda _
- (invoke "pytest" "-v"))))))
- (propagated-inputs
- (list python-ctxcore
- python-cytoolz
- python-multiprocessing-on-dill
- python-llvmlite
- python-numba
- python-attrs
- python-frozendict
- python-numpy
- python-pandas
- python-cloudpickle
- python-dask
- python-distributed
- python-arboreto
- python-boltons
- python-setuptools
- python-pyyaml
- python-tqdm
- python-interlap
- python-umap-learn
- python-loompy
- python-networkx
- python-scipy
- python-fsspec
- python-requests
- python-aiohttp
- python-scikit-learn))
- (native-inputs
- (list python-pytest))
- (home-page "https://scenic.aertslab.org/")
- (synopsis "Single-Cell regulatory network inference and clustering")
- (description
- "pySCENIC is a Python implementation of the SCENIC pipeline (Single-Cell
+ ;; Latest commit from the update-pyarrow branch
+ (let ((commit "5f170fdf474548c37ab381d1849c662820d658ee")
+ (revision "1"))
+ (package
+ (name "pyscenic")
+ (version (git-version "0.11.2" revision commit))
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/aertslab/pySCENIC")
+ (commit commit)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "03qkvy400rjndg2ds6bhcaprir71mqr2v3yv9vd77lcnzxgw3s0z"))))
+ (build-system pyproject-build-system)
+ (arguments
+ (list
+ #:phases
+ '(modify-phases %standard-phases
+ ;; The cli modules referenced here have been removed, so this
+ ;; breaks the sanity check.
+ (add-after 'unpack 'do-not-reference-deleted-modules
+ (lambda _
+ (substitute* "setup.py"
+ (("'db2feather = .*',") "")
+ (("'invertdb = .*',") "")
+ (("'gmt2regions = pyscenic.cli.gmt2regions:main'") ""))))
+ ;; Numba needs a writable dir to cache functions.
+ (add-before 'check 'set-numba-cache-dir
+ (lambda _
+ (setenv "NUMBA_CACHE_DIR" "/tmp"))))))
+ (propagated-inputs
+ (list python-ctxcore
+ python-cytoolz
+ python-multiprocessing-on-dill
+ python-llvmlite
+ python-numba
+ python-attrs
+ python-frozendict
+ python-numpy
+ python-pandas
+ python-cloudpickle
+ python-dask
+ python-pyarrow ;XXX for dask
+ python-distributed
+ python-arboreto
+ python-boltons
+ python-setuptools
+ python-pyyaml
+ python-tqdm
+ python-interlap
+ python-umap-learn
+ python-loompy
+ python-networkx
+ python-scipy
+ python-fsspec
+ python-requests
+ python-aiohttp
+ python-scikit-learn))
+ (native-inputs
+ (list python-pytest))
+ (home-page "https://scenic.aertslab.org/")
+ (synopsis "Single-Cell regulatory network inference and clustering")
+ (description
+ "pySCENIC is a Python implementation of the SCENIC pipeline (Single-Cell
rEgulatory Network Inference and Clustering) which enables biologists to infer
transcription factors, gene regulatory networks and cell types from
single-cell RNA-seq data.")
- (license license:gpl3+)))
+ (license license:gpl3+))))
(define-public python-ikarus
(package
@@ -19636,11 +20409,11 @@ translates between different variant encodings.")
(license license:asl2.0))))
(define-public r-signac
- (let ((commit "af4142724b72574d957f7fe3d422ed5828ec3ad0")
+ (let ((commit "8ecdde291676102bb3b503f48926c993354b5471")
(revision "1"))
(package
(name "r-signac")
- (version (git-version "1.9.0" revision commit))
+ (version (git-version "1.12.0" revision commit))
(source (origin
(method git-fetch)
(uri (git-reference
@@ -19649,7 +20422,7 @@ translates between different variant encodings.")
(file-name (git-file-name name version))
(sha256
(base32
- "0ps0lp1dcy20r6lakil6ih81m04r0s6fnirvfjf01sfs0gsyddww"))))
+ "0idkcqb1i13m164nxbdwgs3vflw0cxzm2ir84aw0i811vx9lqz8c"))))
(properties `((upstream-name . "Signac")))
(build-system r-build-system)
(inputs (list zlib))
@@ -20027,24 +20800,33 @@ aligner.")
(uri (pypi-uri "scvelo" version))
(sha256
(base32 "0h5ha1459ljs0qgpnlfsw592i8dxqn6p9bl08l1ikpwk36baxb7z"))))
- (build-system python-build-system)
+ (build-system pyproject-build-system)
(arguments
- `(#:phases
- (modify-phases %standard-phases
- ;; Numba needs a writable dir to cache functions.
- (add-before 'check 'set-numba-cache-dir
- (lambda _
- (setenv "NUMBA_CACHE_DIR" "/tmp")))
- (replace 'check
- (lambda* (#:key outputs tests? #:allow-other-keys)
- (when tests?
- ;; The discovered test file names must match the names of the
- ;; compiled files, so we cannot run the tests from
- ;; /tmp/guix-build-*.
- (with-directory-excursion
- (string-append (assoc-ref outputs "out")
- "/lib/python3.10/site-packages/scvelo/core/tests/")
- (invoke "pytest" "-v"))))))))
+ (list
+ #:test-flags
+ ;; XXX: these two tests fail for unknown reasons
+ '(list "-k" "not test_perfect_fit and not test_perfect_fit_2d")
+ #:phases
+ #~(modify-phases %standard-phases
+ (add-after 'unpack 'matplotlib-compatibility
+ (lambda _
+ (substitute* "scvelo/settings.py"
+ (("warnings.filterwarnings\\(\"ignore\", category=cbook.mplDeprecation\\)")
+ ""))))
+ ;; Numba needs a writable dir to cache functions.
+ (add-before 'check 'set-numba-cache-dir
+ (lambda _
+ (setenv "NUMBA_CACHE_DIR" "/tmp")))
+ (replace 'check
+ (lambda* (#:key tests? test-flags #:allow-other-keys)
+ (when tests?
+ ;; The discovered test file names must match the names of the
+ ;; compiled files, so we cannot run the tests from
+ ;; /tmp/guix-build-*.
+ (with-directory-excursion
+ (string-append #$output
+ "/lib/python3.10/site-packages/scvelo/core/tests/")
+ (apply invoke "pytest" "-v" test-flags))))))))
(propagated-inputs
(list python-anndata
python-hnswlib
@@ -20620,15 +21402,18 @@ based on the pairwise alignment of hidden Markov models (HMMs).")
(define-public wfmash
(package
(name "wfmash")
- (version "0.10.5")
+ (version "0.12.5")
(source
(origin
- (method url-fetch)
- (uri (string-append "https://github.com/ekg/wfmash/releases/download/v"
- version "/wfmash-v" version ".tar.gz"))
+ ;; There are no release tarballs after version 0.10.5.
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/waveygang/wfmash")
+ (commit (string-append "v" version))))
+ (file-name (git-file-name name version))
(sha256
(base32
- "1jsvnnh14h3ir4l13qhmglhd25kzwvni9apgvr1lbikqwgrpkiq4"))
+ "1qh2chnwp7nqgp88afc4xzdkd21vh6cfqq73siqw7vc0qinqadm6"))
(snippet
#~(begin
(use-modules (guix build utils))
@@ -20654,8 +21439,7 @@ based on the pairwise alignment of hidden Markov models (HMMs).")
(let ((samtools (search-input-file inputs "/bin/samtools")))
;; This is the easiest way to access the data
;; needed for the test suite.
- (symlink (string-append "../wfmash-v" #$version "/data")
- "data")
+ (symlink "../source/data" "data")
(and
;; This test takes 60 minutes on riscv64-linux.
#$@(if (not (target-riscv64?))
@@ -20747,8 +21531,7 @@ based on the pairwise alignment of hidden Markov models (HMMs).")
(lambda _
(invoke "bin/wfmash"
"data/reads.255bps.fa.gz"
- "data/reads.255bps.fa.gz"
- "-X" "-w" "16")))
+ "-w" "16" "-s" "100" "-L")))
(invoke "head" "reads.255bps.paf"))))))))))
(inputs
(list atomic-queue
@@ -20765,7 +21548,7 @@ distances and the wavefront alignment algorithm. It is a fork of MashMap that
implements base-level alignment via the wflign tiled wavefront global
alignment algorithm. It completes MashMap with a high-performance alignment
module capable of computing base-level alignments for very large sequences.")
- (home-page "https://github.com/ekg/wfmash")
+ (home-page "https://github.com/waveygang/wfmash")
(license license:expat)))
(define-public gdcm