diff options
Diffstat (limited to 'gnu/packages/bioinformatics.scm')
-rw-r--r-- | gnu/packages/bioinformatics.scm | 1357 |
1 files changed, 1070 insertions, 287 deletions
diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm index 3a4bcd093a..4198acecda 100644 --- a/gnu/packages/bioinformatics.scm +++ b/gnu/packages/bioinformatics.scm @@ -1,5 +1,5 @@ ;;; GNU Guix --- Functional package management for GNU -;;; Copyright © 2014-2023 Ricardo Wurmus <rekado@elephly.net> +;;; Copyright © 2014-2024 Ricardo Wurmus <rekado@elephly.net> ;;; Copyright © 2015, 2016, 2017, 2018 Ben Woodcroft <donttrustben@gmail.com> ;;; Copyright © 2015, 2016, 2018, 2019, 2020 Pjotr Prins <pjotr.guix@thebird.nl> ;;; Copyright © 2015 Andreas Enge <andreas@enge.fr> @@ -8,7 +8,7 @@ ;;; Copyright © 2016, 2020, 2022 Marius Bakke <marius@gnu.org> ;;; Copyright © 2016, 2018 Raoul Bonnal <ilpuccio.febo@gmail.com> ;;; Copyright © 2017, 2018 Tobias Geerinckx-Rice <me@tobias.gr> -;;; Copyright © 2017, 2021, 2022 Arun Isaac <arunisaac@systemreboot.net> +;;; Copyright © 2017, 2021, 2022, 2024 Arun Isaac <arunisaac@systemreboot.net> ;;; Copyright © 2018 Joshua Sierles, Nextjournal <joshua@nextjournal.com> ;;; Copyright © 2018 Gábor Boskovits <boskovits@gmail.com> ;;; Copyright © 2018-2023 Mădălin Ionel Patrașcu <madalinionel.patrascu@mdc-berlin.de> @@ -130,6 +130,7 @@ #:use-module (gnu packages pdf) #:use-module (gnu packages perl) #:use-module (gnu packages perl-check) + #:use-module (gnu packages perl-web) #:use-module (gnu packages pkg-config) #:use-module (gnu packages popt) #:use-module (gnu packages protobuf) @@ -161,6 +162,7 @@ #:use-module (gnu packages time) #:use-module (gnu packages tls) #:use-module (gnu packages uglifyjs) + #:use-module (gnu packages video) #:use-module (gnu packages vim) #:use-module (gnu packages web) #:use-module (gnu packages wget) @@ -573,6 +575,30 @@ BED, GFF/GTF, VCF.") whole-genome bisulfite sequencing (WGBS) reads from directional protocol.") (license license:asl2.0))) +(define-public bustools + (package + (name "bustools") + (version "0.43.2") + (source (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/BUStools/bustools") + (commit (string-append "v" version)))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "068kjlc4d528269nl5mc3j8h2c95r1v545d3fi1iw1ckg8rba0hg")))) + (build-system cmake-build-system) + (arguments (list #:tests? #f)) ;no test target + (inputs (list zlib)) + (home-page "https://bustools.github.io") + (synopsis "Tools for working with BUS files") + (description "bustools is a program for manipulating BUS files for single +cell RNA-Seq datasets. It can be used to error correct barcodes, collapse +UMIs, produce gene count or transcript compatibility count matrices, and is useful +for many other tasks.") + (license license:bsd-2))) + (define-public cellsnp-lite ;; Last release is from November 2021 and does not contain fixes. (let ((commit "0885d746b0b1ea65c8ef92f8943ca7669ca9734a") @@ -616,6 +642,50 @@ Compared to cellSNP, this package is more efficient with higher speed and less memory usage.") (license license:asl2.0)))) +(define-public cpat + (package + (name "cpat") + (version "3.0.4") + (source (origin + (method url-fetch) + (uri (pypi-uri "CPAT" version)) + (sha256 + (base32 + "0dfrwwbhv1n4nh2a903d1qfb30fgxgya89sa70aci3wzf8h2z0vd")) + (modules '((guix build utils))) + (snippet + '(for-each delete-file-recursively + (list ".eggs" + "lib/__pycache__/" + "lib/cpmodule/__pycache__/"))))) + (build-system pyproject-build-system) + (arguments + (list + #:phases + '(modify-phases %standard-phases + (replace 'check + (lambda* (#:key tests? #:allow-other-keys) + (when tests? + (with-directory-excursion "test" + ;; There is no test4.fa + (substitute* "test.sh" + ((".*-g test4.fa.*") "")) + (invoke "bash" "test.sh")))))))) + (propagated-inputs + (list python-numpy python-pysam)) + (inputs + (list r-minimal)) + (home-page "https://wlcb.oit.uci.edu/cpat/") + (synopsis "Alignment-free distinction between coding and noncoding RNA") + (description + "CPAT is a method to distinguish coding and noncoding RNA by using a +logistic regression model based on four pure sequence-based, linguistic +features: ORF size, ORF coverage, Ficket TESTCODE, and Hexamer usage bias. +Linguistic features based method does not require other genomes or protein +databases to perform alignment and is more robust. Because it is +alignment-free, it runs much faster and also easier to use.") + (license license:gpl2+))) + (define-public pbcopper (package (name "pbcopper") @@ -876,6 +946,38 @@ attributes of microbiome data - zero-inflation and over-dispersion, are simultaneously considered.") (license license:gpl3)))) +(define-public r-ewastools + (let ((commit "f7646cacd73266708479b3fea5d625054d179f95") + (revision "1")) + (package + (name "r-ewastools") + (version (git-version "1.7.2" revision commit)) + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/hhhh5/ewastools/") + (commit commit))) + (file-name (git-file-name name version)) + (sha256 + (base32 "0irarlnxfnasa755adxsn67rxsy01zwhjhw18g4cag08cqiyyw41")))) + (properties `((upstream-name . "ewastools"))) + (build-system r-build-system) + (propagated-inputs + (list r-data-table + r-igraph + r-illuminaio + r-mblm + r-quadprog)) + (native-inputs (list r-knitr)) + (home-page "https://github.com/hhhh5/ewastools/") + (synopsis + "Quality control toolset for the Illumina Infinium DNA methylation") + (description + "This package provides a collection of useful functions for working +with DNA methylation micro-array data.") + (license license:unlicense)))) + (define-public r-numbat (let ((commit "4ab7752e7d267a3f443756675728521a9b0a7295") (revision "1")) @@ -1037,12 +1139,48 @@ of single-cell data using Seurat, RcppML nmf, SingleCellExperiments and similar.") (license license:gpl2+)))) +(define-public r-stacas + (package + (name "r-stacas") + (version "2.2.0") + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/carmonalab/STACAS") + (commit version))) + (file-name (git-file-name name version)) + (sha256 + (base32 "13i0h5i6vlbrb8ndq9gr81560z9d74b2c7m3rjfzls01irjza9hm")))) + (properties `((upstream-name . "STACAS"))) + (build-system r-build-system) + (propagated-inputs + (list r-biocneighbors + r-biocparallel + r-ggplot2 + r-ggridges + r-pbapply + r-r-utils + r-seurat)) + (home-page "https://github.com/carmonalab/STACAS") + (synopsis "Sub-type anchoring correction for alignment in Seurat") + (description + "This package implements methods for batch correction and integration of +scRNA-seq datasets, based on the Seurat anchor-based integration framework. +In particular, STACAS is optimized for the integration of heterogenous +datasets with only limited overlap between cell sub-types (e.g. TIL sets of +CD8 from tumor with CD8/CD4 T cells from lymphnode), for which the default +Seurat alignment methods would tend to over-correct biological differences. +The 2.0 version of the package allows the users to incorporate explicit +information about cell-types in order to assist the integration process.") + (license license:gpl3))) + (define-public r-stringendo - (let ((commit "83b8f2d82a09b33b9e895438bb523a021138be01") + (let ((commit "15594b1bba11048a812874bafec0eea1dcc8618a") (revision "1")) (package (name "r-stringendo") - (version (git-version "0.3.4" revision commit)) + (version (git-version "0.6.0" revision commit)) (source (origin (method git-fetch) (uri (git-reference @@ -1051,10 +1189,10 @@ similar.") (file-name (git-file-name name version)) (sha256 (base32 - "1ap0nhbyd6xx0yl2vgmwk38p22yrkv4k9hw13r35z4wf343rry6v")))) + "15ij4zf2j9c8m9n4bqhmxkchjh2bhddwjfxngfpwv7c5wjqyi6ir")))) (properties `((upstream-name . "Stringendo"))) (build-system r-build-system) - (propagated-inputs (list r-devtools r-usethis)) + (propagated-inputs (list r-clipr)) (home-page "https://github.com/vertesy/Stringendo") (synopsis "Stringendo is a string parsing library") (description @@ -1063,11 +1201,11 @@ plotnames, filenames and paths.") (license license:gpl3)))) (define-public r-readwriter - (let ((commit "71454f4aa706f5d2fbe606acd95abc14224e7058") + (let ((commit "91373c44641014a1ce8e1c3e928747608aae8f54") (revision "1")) (package (name "r-readwriter") - (version (git-version "0.2.9" revision commit)) + (version (git-version "1.5.3" revision commit)) (source (origin (method git-fetch) (uri (git-reference @@ -1076,11 +1214,11 @@ plotnames, filenames and paths.") (file-name (git-file-name name version)) (sha256 (base32 - "0sp27smhdva2hi2x0svia2l56k8xrh7p5akn78g5b0lcvz4x3hd7")))) + "156kvmplrip0w1zhs9yl5r0ayjipa0blhy614l65hbsjn1lwbskr")))) (properties `((upstream-name . "ReadWriter"))) (build-system r-build-system) (propagated-inputs - (list r-gdata r-gtools r-openxlsx r-readr r-stringendo)) + (list r-gtools r-openxlsx r-readr r-stringendo)) (home-page "https://github.com/vertesy/ReadWriter") (synopsis "Functions to read and write files conveniently") (description @@ -1123,6 +1261,32 @@ shape. This package provides an @code{htmlwidget} for building streamgraph visualizations.") (license license:expat)))) +(define-public r-wasabi + (let ((commit "8c33cabde8d18c2657cd6e38e7cb834f87cf9846") + (revision "1")) + (package + (name "r-wasabi") + (version (git-version "1.0.1" revision commit)) + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/COMBINE-lab/wasabi") + (commit commit))) + (file-name (git-file-name name version)) + (sha256 + (base32 "0rpdj6n4cnx8n2zl60dzgl638474sg49dknwi9x3qb4g56dpphfa")))) + (properties `((upstream-name . "wasabi"))) + (build-system r-build-system) + (propagated-inputs (list r-data-table r-rhdf5 r-rjson)) + (home-page "https://github.com/COMBINE-lab/wasabi") + (synopsis "Use Sailfish and Salmon with Sleuth") + (description + "This package converts the output of the Sailfish and Salmon RNA-seq +quantification tools so that it can be used with the Sleuth differential +analysis package.") + (license license:bsd-3)))) + (define-public pbbam (package (name "pbbam") @@ -1757,6 +1921,8 @@ biological activities from omics data within a unified framework.") (base32 "1bhyxqjk44bmyd26m1smapf68wyf7252kk65i27k50dd3kswgnd6")))) (build-system pyproject-build-system) + ;; There are no tests. + (arguments (list #:tests? #false)) (propagated-inputs (list python-docopt python-importlib-metadata @@ -1946,6 +2112,29 @@ matplotlib Axes objects, making them easy to style and incorporate into multi-panel figures.") (license license:expat))) +(define-public python-parabam + (package + (name "python-parabam") + (version "3.0.1") + (source + (origin + (method url-fetch) + (uri (pypi-uri "parabam" version)) + (sha256 + (base32 "1cy9q3gzdawi1kilycpd7waymjmrwsg8czwycfp13g301ir9xyp3")) + (modules '((guix build utils))) + (snippet + '(substitute* "setup.py" + (("'argparse',") ""))))) + (build-system pyproject-build-system) + (propagated-inputs (list python-numpy python-pysam)) + (home-page "https://github.com/cancerit/parabam") + (synopsis "Parallel BAM File Analysis") + (description "Parabam is a tool for processing sequencing files in +parallel. It uses Python's native multiprocessing framework to apply a user +defined rule on an input file.") + (license license:gpl3))) + (define-public python-peaks2utr (package (name "python-peaks2utr") @@ -1993,6 +2182,8 @@ three prime UTR.") (base32 "0gqygspdy398vjymdy6756jmk99s7fhwav9rivdx59kpqjcdxaz9")))) (build-system pyproject-build-system) + ;; There are no tests. + (arguments (list #:tests? #false)) (propagated-inputs (list python-anndata python-docopt @@ -2226,6 +2417,66 @@ sequencing data and the end result are tables of UMI-unique DamID and CEL-Seq counts.") (license license:expat))) +(define-public python-snaptools + (package + (name "python-snaptools") + (version "1.4.8") + (source + (origin + (method url-fetch) + (uri (pypi-uri "snaptools" version)) + (sha256 + (base32 + "1s5373g5jjbshh3q39zy7dlxr7nda6ksxq9d1gw46h82c4fsmfbn")))) + (build-system pyproject-build-system) + (arguments (list #:tests? #false)) ;there are none + (propagated-inputs + (list python-future + python-h5py + python-louvain + python-numpy + python-pybedtools + python-pysam)) + (home-page "https://github.com/r3fang/SnapTools") + (synopsis "Tools for processing snap files" ) + (description + "@code{SnapTools} can operate on snap files the following types of +operations: + +@itemize +@item index the reference genome before alignment; +@item align reads to the corresponding reference genome; +@item pre-process by convert pair-end reads into fragments, checking the + mapping quality score, alingment and filtration; +@item create the cell-by-bin matrix. +@end itemize") + (license license:asl2.0))) + +(define-public python-telomerecat + (package + (name "python-telomerecat") + (version "4.0.2") + (source + (origin + (method url-fetch) + (uri (pypi-uri "telomerecat" version)) + (sha256 + (base32 "16mfdqmp0j6g3h26h59334w9lqb4qihqrlzwvgznj0fiqs1rkxn2")))) + (build-system pyproject-build-system) + (arguments (list #:tests? #false)) ;there are none + (propagated-inputs (list python-click python-numpy python-pandas + python-parabam python-pysam)) + (home-page "https://github.com/cancerit/telomerecat") + (synopsis "Telomere computational analysis tool") + (description "Telomerecat is a tool for estimating the average telomere +length (TL) for a paired end, whole genome sequencing (WGS) sample. + +Telomerecat is adaptable, accurate and fast. The algorithm accounts for +sequencing amplification artifacts, anneouploidy (common in cancer samples) +and noise generated by WGS. For a high coverage WGS BAM file of around 100GB +telomerecat can produce an estimate in ~1 hour.") + (license license:gpl3))) + (define-public python-bioframe (package (name "python-bioframe") @@ -2401,6 +2652,7 @@ alignments and perform the following operations: (base32 "15m6ffks4zwpp1ycwk6n02py6mw2yh7qr0vhpc178b91gldr97ia")))) (build-system pyproject-build-system) + (arguments (list #:tests? #false)) ;there are none (propagated-inputs (list python-pandas)) (home-page "https://github.com/alexomics/read-paf") (synopsis "Minimap2 PAF file reader") @@ -3767,19 +4019,27 @@ UCSC genome browser.") (define-public python-plastid (package (name "python-plastid") - (version "0.6.0") + (version "0.6.1") (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/joshuagryphon/plastid") - (commit (string-append "v" version)))) + (commit "d97f239d73b3a7c2eff46f71928b777431891f90"))) (file-name (git-file-name name version)) (sha256 (base32 - "1ka9j08j6i105l89w8b7sg0l8lm3lcrxzy4cjl5dp4cxdmycap62")))) + "0iccpywlpf1ws46279z9rl0l29pil0rj0g2j5nvqq7jfbnq581cf")))) (build-system pyproject-build-system) (arguments (list + #:test-flags + '(list "plastid/test" + ;; These four failures look like errors in the test wrapper + ;; class. + "-k" (string-append "not test_chrom_sizes" + " and not test_no_crash_if_file_not_exist" + " and not test_fiveprime_variable" + " and not test_fiveprime_variable_from_file")) #:phases #~(modify-phases %standard-phases (add-after 'unpack 'unpack-test-data @@ -4255,6 +4515,107 @@ annotations of the genome.") other types of unwanted sequence from high-throughput sequencing reads.") (license license:expat))) +(define-public lammps + (let ((commit "stable_2Aug2023_update2")) + (package + (name "lammps") + (version (string-append "0." commit)) + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/lammps/lammps.git") + (commit commit))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "11xagacgxgldkx34qdzyjrjvn8x3hpl0kgzhh9zh7skpq79pwycz")))) + (build-system gnu-build-system) + (arguments + (list + #:tests? #f ; no check target + #:make-flags + '(list "CC=mpicc" "mpi" + "LMP_INC=-DLAMMPS_GZIP \ +-DLAMMPS_JPEG -DLAMMPS_PNG -DLAMMPS_FFMPEG -DLAMMPS_MEMALIGN=64" + "LIB=-gz -ljpeg -lpng -lavcodec") + #:phases + #~(modify-phases %standard-phases + (add-after 'unpack 'chdir + (lambda _ (chdir "src"))) + (replace 'configure + (lambda _ + (substitute* "MAKE/Makefile.mpi" + (("SHELL =.*") + (string-append "SHELL=" (which "bash") "\n")) + (("cc ") "mpicc ")) + (substitute* "Makefile" + (("SHELL =.*") + (string-append "SHELL=" (which "bash") "\n"))))) + (add-after 'configure 'configure-modules + (lambda _ + (invoke "make" + "yes-molecule" + "yes-misc" + "yes-granular" + (string-append "HDF5_PATH=" + #$(this-package-input "hdf5"))))) + (replace 'install + (lambda _ + (let ((bin (string-append #$output "/bin"))) + (mkdir-p bin) + (install-file "lmp_mpi" bin))))))) + (inputs + (list ffmpeg + gfortran + gzip + hdf5 + libjpeg-turbo + libpng + openmpi + python-wrapper)) + (native-inputs (list bc)) + (home-page "https://www.lammps.org/") + (synopsis "Classical molecular dynamics simulator") + (description "LAMMPS is a classical molecular dynamics simulator +designed to run efficiently on parallel computers. LAMMPS has potentials for +solid-state materials (metals, semiconductors), soft matter (biomolecules, +polymers), and coarse-grained or mesoscopic systems. It can be used to model +atoms or, more generically, as a parallel particle simulator at the atomic, +meso, or continuum scale.") + (license license:gpl2+)))) + +(define-public lammps-serial + (package + (inherit lammps) + (name "lammps-serial") + (arguments + (substitute-keyword-arguments (package-arguments lammps) + ((#:make-flags flags) + '(list "CC=gcc" "serial" + "LMP_INC=-DLAMMPS_GZIP \ +-DLAMMPS_JPEG -DLAMMPS_PNG -DLAMMPS_FFMPEG -DLAMMPS_MEMALIGN=64" + "LIB=-gz -ljpeg -lpng -lavcodec")) + ((#:phases phases) + #~(modify-phases #$phases + (replace 'configure + (lambda _ + (substitute* "MAKE/Makefile.serial" + (("SHELL =.*") + (string-append "SHELL=" (which "bash") "\n")) + (("cc ") "gcc ")) + (substitute* "Makefile" + (("SHELL =.*") + (string-append "SHELL=" (which "bash") "\n"))))) + (replace 'install + (lambda _ + (let ((bin (string-append #$output "/bin"))) + (mkdir-p bin) + (install-file "lmp_serial" bin)))))))) + (inputs + (modify-inputs (package-inputs lammps) + (delete "openmpi"))))) + (define-public libbigwig (package (name "libbigwig") @@ -4374,58 +4735,43 @@ and record oriented data modeling and the Semantic Web.") (define-public python-scikit-bio (package (name "python-scikit-bio") - (version "0.5.7") + (version "0.5.9") (source (origin (method url-fetch) (uri (pypi-uri "scikit-bio" version)) (sha256 (base32 - "1a8xbp3vrw8wfpm3pa2nb4rcar0643iqnb043ifwqbqyc86clhv3")))) + "0429060pkyq1pm19zb2n1la7czh7b633mp4a4h01j8zfigf49q3s")) + (patches (search-patches "python-scikit-bio-1887.patch")))) (build-system pyproject-build-system) (arguments (list + ;; Accuracy problem + #:test-flags '(list "-k" "not test_fisher_alpha") #:phases '(modify-phases %standard-phases - ;; See https://github.com/biocore/scikit-bio/pull/1826 (add-after 'unpack 'compatibility (lambda _ - (substitute* "skbio/sequence/tests/test_sequence.py" - (("def test_concat_strict_many") - "def _do_not_test_concat_strict_many")) - (substitute* "skbio/stats/distance/_mantel.py" - (("from scipy.stats import PearsonRConstantInputWarning") - "from scipy.stats import ConstantInputWarning") - (("from scipy.stats import PearsonRNearConstantInputWarning") - "from scipy.stats import NearConstantInputWarning") - (("from scipy.stats import SpearmanRConstantInputWarning") "") - (("warnings.warn\\(PearsonRConstantInputWarning\\(\\)\\)") - "warnings.warn(ConstantInputWarning())") - (("warnings.warn\\(PearsonRNearConstantInputWarning\\(\\)\\)") - "warnings.warn(NearConstantInputWarning())") - (("warnings.warn\\(SpearmanRConstantInputWarning\\(\\)\\)") - "warnings.warn(ConstantInputWarning())")) - (substitute* "skbio/diversity/alpha/tests/test_base.py" - (("self.assertEqual\\(pielou_e") - "self.assertAlmostEqual(pielou_e")))) + (substitute* "skbio/diversity/__init__.py" + ((", numeric_only=True") "")))) (add-before 'check 'build-extensions (lambda _ ;; Cython extensions have to be built before running the tests. (invoke "python3" "setup.py" "build_ext" "--inplace"))) (replace 'check - (lambda* (#:key tests? #:allow-other-keys) - (when tests? (invoke "python3" "-m" "skbio.test"))))))) + (lambda* (#:key tests? test-flags #:allow-other-keys) + (when tests? + (apply invoke "python3" "-m" "skbio.test" test-flags))))))) (propagated-inputs - (list python-cachecontrol - python-decorator + (list python-decorator python-h5py python-hdmedians python-ipython - python-lockfile python-matplotlib python-natsort python-numpy python-pandas - python-scikit-learn + python-requests python-scipy)) (native-inputs (list python-coverage python-pytest)) @@ -4759,6 +5105,126 @@ be of arbitrary length. Repeats with pattern size in the range from 1 to 2000 bases are detected.") (license license:agpl3+))) +(define-public trinityrnaseq + (package + (name "trinityrnaseq") + (version "2.13.2") + (source (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/trinityrnaseq/trinityrnaseq.git") + (commit (string-append "Trinity-v" version)) + (recursive? #true))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "1qszrxqbx4q5pavpgm4rkrh1z1v1mf7qx83vv3fnlqdmncnsf1gv")))) + (build-system gnu-build-system) + (arguments + (list + #:test-target "test" + #:modules + '((guix build gnu-build-system) + (guix build utils) + (ice-9 match) + (srfi srfi-1)) + #:make-flags + #~(list (string-append "CC=" #$(cc-for-target))) + #:phases + #~(modify-phases %standard-phases + (replace 'configure + (lambda _ + (setenv "SHELL" (which "sh")) + (setenv "CONFIG_SHELL" (which "sh")) + ;; Do not require version.h, which triggers a local build of a + ;; vendored htslib. + (substitute* "trinity-plugins/bamsifter/Makefile" + (("sift_bam_max_cov.cpp htslib/version.h") + "sift_bam_max_cov.cpp")))) + (add-after 'build 'build-plugins + (lambda _ + ;; Run this in the subdirectory to avoid running the + ;; tests right here. + (with-directory-excursion "trinity-plugins" + (invoke "make" "plugins")))) + ;; The install script uses rsync, provides no overrides for the + ;; default location at /usr/local/bin, and patching it would change + ;; all lines that do something. + (replace 'install + (lambda* (#:key inputs #:allow-other-keys) + (let ((share (string-append #$output "/share/trinity/")) + (bin (string-append #$output "/bin/"))) + (mkdir-p bin) + (copy-recursively "." share) + (delete-file (string-append share "/Chrysalis/build/CMakeFiles/CMakeOutput.log")) + (delete-file (string-append share "/Inchworm/build/CMakeFiles/CMakeOutput.log")) + + (wrap-program (string-append share "Trinity") + `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE"))) + `("PERL5LIB" ":" = (,(getenv "PERL5LIB"))) + `("PYTHONPATH" ":" = (,(getenv "GUIX_PYTHONPATH"))) + `("PATH" ":" = + ,(cons (string-append share "/trinity-plugins/BIN") + (filter-map (match-lambda + ((name . dir) + (string-append dir "/bin"))) + inputs)))) + (symlink (string-append share "Trinity") + (string-append bin "Trinity")))))))) + (inputs + (list blast+ + bowtie + fastqc + hisat + htslib + icedtea-8 + jellyfish + kallisto + multiqc + perl + perl-uri-escape + python-numpy + python-wrapper + r-ape + r-argparse + r-biobase + r-ctc + r-deseq2 + r-edger + r-fastcluster + r-glimma + r-goplot + r-goseq + r-gplots + r-minimal + r-qvalue + r-rots + r-sm + r-tidyverse + rsem + salmon + samtools + sra-tools + star + zlib)) + (propagated-inputs + (list coreutils + gzip + which)) + (native-inputs (list cmake)) + (home-page "https://github.com/trinityrnaseq/trinityrnaseq/wiki") + (synopsis "Trinity RNA-Seq de novo transcriptome assembly") + (description "Trinity assembles transcript sequences from Illumina RNA-Seq +data. Trinity represents a novel method for the efficient and robust de novo +reconstruction of transcriptomes from RNA-seq data. Trinity combines three +independent software modules: Inchworm, Chrysalis, and Butterfly, applied +sequentially to process large volumes of RNA-seq reads. Trinity partitions +the sequence data into many individual de Bruijn graphs, each representing the +transcriptional complexity at a given gene or locus, and then processes each +graph independently to extract full-length splicing isoforms and to tease +apart transcripts derived from paralogous genes.") + (license license:bsd-3))) + (define-public repeat-masker (package (name "repeat-masker") @@ -5334,6 +5800,8 @@ Illumina paired-end data (for CASAVA 1.8+).") (base32 "124kdg7168nbh4a5wisfws1fgkd89dd4js9v6dml2lvgclbv4mjg")))) (build-system pyproject-build-system) + ;; This contains two test data files but no way to run the tests. + (arguments (list #:tests? #false)) (propagated-inputs (list python-biopython python-matplotlib python-pandas python-scipy python-seaborn)) @@ -6691,7 +7159,7 @@ performance.") (define-public htscodecs (package (name "htscodecs") - (version "1.5.1") + (version "1.6.0") (source (origin (method url-fetch) (uri (string-append "https://github.com/samtools/htscodecs/" @@ -6699,7 +7167,7 @@ performance.") version "/htscodecs-" version ".tar.gz")) (sha256 (base32 - "0nykdf08wil6iiihgf5qlb04n70yv4zqqj7c27vpnpwpr2r2ns62")))) + "1h0827g9svil9jnjbpdlxjbl44rai5b95m61hs9ifbqrz9nvnjjb")))) (build-system gnu-build-system) (inputs (list bzip2 zlib)) (home-page "https://github.com/samtools/htscodecs") @@ -6717,7 +7185,7 @@ name/ID compression and quality score compression derived from fqzcomp.") (define-public htslib (package (name "htslib") - (version "1.16") + (version "1.19") (source (origin (method url-fetch) (uri (string-append @@ -6725,7 +7193,7 @@ name/ID compression and quality score compression derived from fqzcomp.") version "/htslib-" version ".tar.bz2")) (sha256 (base32 - "093r1n4s134k50m9a925yn95gyi90ps5dlgc6gq4qwvkzxx7qsv0")) + "0dh79lwpspwwfbkmllrrhbk8nkvlfc5b5ib4d0xg5ld79w6c8lc7")) (snippet #~(begin (use-modules (guix build utils)) @@ -8017,20 +8485,20 @@ to the user's query of interest.") (define-public samtools (package (name "samtools") - (version "1.14") + (version "1.19") (source (origin (method url-fetch) (uri - (string-append "mirror://sourceforge/samtools/samtools/" - version "/samtools-" version ".tar.bz2")) + (string-append "https://github.com/samtools/samtools" + "/releases/download/" version + "/samtools-" version ".tar.bz2")) (sha256 (base32 - "0x3xdda78ac5vx66b3jdsv9sfhyz4npl4znl1zbaf3lbm6xdlhck")) + "10wby07w33rfypy4kf73v9wwnbyh0lrazbsmrgrvcl88w8c3nszs")) (modules '((guix build utils))) - (snippet '(begin - ;; Delete bundled htslib. - (delete-file-recursively "htslib-1.14"))))) + ;; Delete bundled htslib. + (snippet '(delete-file-recursively "htslib-1.19")))) (build-system gnu-build-system) (arguments `(#:configure-flags (list "--with-ncurses") @@ -8053,6 +8521,26 @@ variant calling (in conjunction with bcftools), and a simple alignment viewer.") (license license:expat))) +(define-public samtools-1.14 + (package/inherit samtools + (version "1.14") + (source + (origin + (method url-fetch) + (uri + (string-append "mirror://sourceforge/samtools/samtools/" + version "/samtools-" version ".tar.bz2")) + (sha256 + (base32 + "0x3xdda78ac5vx66b3jdsv9sfhyz4npl4znl1zbaf3lbm6xdlhck")) + (modules '((guix build utils))) + (snippet '(begin + ;; Delete bundled htslib. + (delete-file-recursively "htslib-1.14"))))) + (native-inputs (list pkg-config)) + (inputs + (list htslib-1.14 ncurses perl python zlib)))) + (define-public samtools-1.12 (package/inherit samtools (version "1.12") @@ -9891,6 +10379,51 @@ tasks.") Pore-C concatemers.") (license license:gpl3)))) +(define-public r-dnamcrosshyb + ;; There aren't any releases. + (let ((commit "fe8acb33667e81f00dcb84e0fa75c87ab2db5d8f") + (revision "1")) + (package + (name "r-dnamcrosshyb") + (version (git-version "0.0.0.9000" revision commit)) + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/pjhop/DNAmCrosshyb") + (commit commit))) + (file-name (git-file-name name version)) + (sha256 + (base32 "12j1xsiqpvny5rp23z1az0k4cj5ajbcwkg65z00s16vywi2rx6nb")))) + (properties `((upstream-name . "DNAmCrosshyb"))) + (build-system r-build-system) + (propagated-inputs + (list r-biocgenerics + r-biocparallel + r-biostrings + r-bsgenome-hsapiens-ucsc-hg19-masked + r-bsgenome-hsapiens-ucsc-hg38-masked + r-dplyr + r-genomicranges + r-ggplot2 + r-iranges + r-magrittr + r-minfi + r-purrr + r-s4vectors + r-shiny + r-stringi + r-stringr + r-tibble + r-tidyr + r-watermelon)) + (home-page "https://github.com/pjhop/DNAmCrosshyb") + (synopsis "DNAmCrosshyb") + (description + "This package provides helper functions to detect cross-hybridization +on Illumina DNAm arrays.") + (license license:gpl3)))) + (define-public r-doubletcollection (let ((commit "c0d62f1853942ee6a087eaf7b000d9e4261e2dfd") (revision "1")) @@ -10134,6 +10667,51 @@ data. This package includes panel editing or renaming for FCS files, bead-based normalization and debarcoding.") (license license:gpl3)))) +(define-public r-projectils + (let ((commit "cc73b97471b4b6eea11ce779b5c4a7dc5c3e1709") + (revision "1")) + (package + (name "r-projectils") + (version (git-version "3.0.0" revision commit)) + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/carmonalab/ProjecTILs") + (commit commit))) + (file-name (git-file-name name version)) + (sha256 + (base32 "0dpzvbhhb9andnj7angpj32cgkwd6rs6qgpl6i21pqzcn6vqqhqw")))) + (properties `((upstream-name . "ProjecTILs"))) + (build-system r-build-system) + (propagated-inputs + (list r-biocneighbors + r-biocparallel + r-dplyr + r-ggplot2 + r-matrix + r-patchwork + r-pheatmap + r-pracma + r-purrr + r-rcolorbrewer + r-reshape2 + r-scales + r-scgate + r-seurat + r-seuratobject + r-stacas + r-ucell + r-umap + r-uwot)) + (home-page "https://github.com/carmonalab/ProjecTILs") + (synopsis "Reference-based analysis of scRNA-seq data") + (description + "This package implements methods to project single-cell RNA-seq data +onto a reference atlas, enabling interpretation of unknown cell transcriptomic +states in the the context of known, reference states.") + (license license:gpl3)))) + (define-public r-presto (let ((commit "052085db9c88aa70a28d11cc58ebc807999bf0ad") (revision "0")) @@ -10168,55 +10746,54 @@ auROC analysis.") (license license:gpl3)))) (define-public r-sccustomize - (let ((commit "8414d1f5fb32277855b0619191a568932b7baeb0") + (let ((commit "397374590dae2ccc0c560897dcd1ce4382c18798") (revision "1")) (package (name "r-sccustomize") - (version (git-version "0.7.0" revision commit)) - (source (origin - (method git-fetch) - (uri (git-reference - (url "https://github.com/samuel-marsh/scCustomize") - (commit commit))) - (file-name (git-file-name name version)) - (sha256 - (base32 - "1wcgfq7lx83a2kf8pjbw524gdvxf351n08cwd5wzmmy57kf4knbj")))) + (version (git-version "2.0.1" revision commit)) + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/samuel-marsh/scCustomize") + (commit commit))) + (file-name (git-file-name name version)) + (sha256 + (base32 "11bafm0mlck27fqd8brz80pxb8dc5q0aqbp8zv0s9sx97njp7wsl")))) (properties `((upstream-name . "scCustomize"))) (build-system r-build-system) - (propagated-inputs - (list r-circlize - r-colorway - r-cowplot - r-data-table - r-dittoseq - r-dplyr - r-forcats - r-ggbeeswarm - r-ggplot2 - r-ggprism - r-ggpubr - r-ggrastr - r-ggrepel - r-glue - r-janitor - r-magrittr - r-matrix - r-paletteer - r-patchwork - r-pbapply - r-purrr - r-remotes - r-scales - r-scattermore - r-seurat - r-seuratobject - r-stringi - r-stringr - r-tibble - r-tidyr - r-tidyselect - r-viridis)) + (propagated-inputs (list r-circlize + r-colorway + r-cowplot + r-data-table + r-dittoseq + r-dplyr + r-forcats + r-ggbeeswarm + r-ggplot2 + r-ggprism + r-ggpubr + r-ggrastr + r-ggrepel + r-glue + r-janitor + r-magrittr + r-matrix + r-paletteer + r-patchwork + r-pbapply + r-purrr + r-remotes + r-scales + r-scattermore + r-seurat + r-seuratobject + r-stringi + r-stringr + r-tibble + r-tidyr + r-tidyselect + r-viridis)) (native-inputs (list r-knitr)) (home-page "https://github.com/samuel-marsh/scCustomize") (synopsis "Custom visualization and analyses of single-cell sequencing") @@ -10349,6 +10926,43 @@ analysis of cell types, subtypes, transcriptional gradients,cell-cycle variation, gene modules and their regulatory models and more.") (license license:expat)))) +(define-public r-sleuth + (package + (name "r-sleuth") + (version "0.30.1") + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/pachterlab/sleuth") + (commit (string-append "v" version)))) + (file-name (git-file-name name version)) + (sha256 + (base32 "09xgc7r6iisjkk0c0wn0q56zy0aph386kphwixfzq4422y7vlqci")))) + (properties `((upstream-name . "sleuth"))) + (build-system r-build-system) + (propagated-inputs (list r-aggregation + r-data-table + r-dplyr + r-ggplot2 + r-lazyeval + r-matrixstats + r-pheatmap + r-reshape2 + r-rhdf5 + r-shiny + r-tidyr)) + (native-inputs (list r-knitr)) + (home-page "https://github.com/pachterlab/sleuth") + (synopsis "Tools for investigating RNA-Seq") + (description + "Sleuth is a program for differential analysis of RNA-Seq data. +It makes use of quantification uncertainty estimates obtained via Kallisto for +accurate differential analysis of isoforms or genes, allows testing in the +context of experiments with complex designs, and supports interactive +exploratory data analysis via sleuth live.") + (license license:gpl3))) + (define-public r-snapatac (package (name "r-snapatac") @@ -10579,6 +11193,65 @@ single-cell data.") "This package is designed to streamline scATAC analyses in R.") (license license:gpl2+)))) +(define-public r-azimuth + (let ((commit "243ee5db80fcbffa3452c944254a325a3da2ef9e") + (revision "1")) + (package + (name "r-azimuth") + (version (git-version "0.5.0" revision commit)) + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/satijalab/azimuth") + (commit commit))) + (file-name (git-file-name name version)) + (sha256 + (base32 "0vs4813kf2cv2j1z800ihwk62bw4dgmwgkd47rln58581chv39zm")))) + (properties `((upstream-name . "Azimuth"))) + (build-system r-build-system) + (propagated-inputs (list r-bsgenome-hsapiens-ucsc-hg38 + r-dt + r-ensdb-hsapiens-v86 + r-future + r-ggplot2 + r-glmgampoi + r-googlesheets4 + r-hdf5r + r-htmltools + r-httr + r-jaspar2020 + r-jsonlite + r-matrix + r-patchwork + r-plotly + r-presto + r-rcpp + r-rlang + r-scales + r-seurat + r-seuratdata + r-seuratdisk + r-seuratobject + r-shiny + r-shinybs + r-shinydashboard + r-shinyjs + r-signac + r-stringr + r-tfbstools + r-withr)) + (home-page "https://github.com/satijalab/azimuth") + (synopsis + "Shiny app showcasing a single-cell data query-reference mapping algorithm") + (description "Azimuth utilizes an annotated reference dataset. It +automates the processing, analysis, and interpretation. This applies +specifically to new single-cell RNA-seq or ATAC-seq experiments. Azimuth +leverages a reference-based mapping pipeline that inputs acounts matrix and +performs normalization, visualization, cell annotation, and differential +expression.") + (license license:gpl3)))) + (define-public r-icellnet ;; v1.0 tagged in 2020, last commit contains many fixes. ;; DESCRIPTION says Version: 0.0.0.9000. @@ -10665,6 +11338,40 @@ of transcriptional heterogeneity among single cells.") ;; See https://github.com/hms-dbmi/scde/issues/38 (license license:gpl2))) +(define-public r-miamiplot + (let ((commit "beede9c5d6431b4d822aa42e064e01baeb5dd4a0") + (revision "1")) + (package + (name "r-miamiplot") + (version (git-version "1.1.0" revision commit)) + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/juliedwhite/miamiplot") + (commit commit))) + (file-name (git-file-name name version)) + (sha256 + (base32 "0wxxk1lk9jbf0imf59qp302ffasvs84idinkvzirs3dw9w3589n9")))) + (properties `((upstream-name . "miamiplot"))) + (build-system r-build-system) + (propagated-inputs (list r-checkmate + r-dplyr + r-ggplot2 + r-ggrepel + r-gridextra + r-magrittr + r-rlang)) + (native-inputs (list r-knitr)) + (home-page "https://github.com/juliedwhite/miamiplot") + (synopsis "Create a ggplot2 miami plot") + (description + "This package generates a Miami plot with centered chromosome labels. +The output is a ggplot2 object. Users can specify which data they want +plotted on top vs. bottom, whether to display significance line(s), what +colors to give chromosomes, and what points to label.") + (license license:gpl2)))) + (define-public r-millefy (package (name "r-millefy") @@ -12499,6 +13206,7 @@ applications for tackling some common problems in a user-friendly way.") (invoke "python3" "test/test_all.py"))))))) (native-inputs (list `(,glib "bin") ;for gtester + glib pkg-config)) (inputs ;; TODO: add Chimera for visualization @@ -15238,7 +15946,9 @@ activity prediction from transcriptomics data, and its R implementation (add-after 'unpack 'set-HOME (lambda _ (setenv "HOME" "/tmp")))))) (propagated-inputs - (list r-complexheatmap + (list r-basilisk + r-basilisk-utils + r-complexheatmap r-dplyr r-ggplot2 r-magrittr @@ -15546,6 +16256,41 @@ analysing cytometry data in R.") spatial single-cell expression data.") (license license:expat)))) +;; Variant of r-illuminahumanmethylationepicmanifest in the +;; (gnu packages bioconductor) module. +(define-public r-illuminahumanmethylationepicmanifest-latest + (let ((commit "a9ffbad36f5e496ece6c4c37b80e2f4f7e02d0c3") + (revision "1")) + (package + (name "r-illuminahumanmethylationepicmanifest") + (version (git-version "1.0.0" revision commit)) + (source + (origin + (method git-fetch) + (uri (git-reference + (url + "https://github.com/achilleasNP/IlluminaHumanMethylationEPICmanifest") + (commit commit))) + (file-name (git-file-name name version)) + (sha256 + (base32 "0v8f0hl0v8gwi61vgqw56rn5j09h95hj54rb8pzbn0znm162n4fc")))) + (properties `((upstream-name . "IlluminaHumanMethylationEPICmanifest"))) + (build-system r-build-system) + (home-page + "https://github.com/achilleasNP/IlluminaHumanMethylationEPICmanifest") + (synopsis "Illumina Human Methylation Manifest 1.0 B5 for R and minfi") + (description + "This is a drop-in replacement for the +@code{IlluminaHumanMethylationEPIC} package. It utilizes a Manifest based on +1.0B5 annotation. As of version 0.3.0, the +@code{IlluminaHumanMethylationEPIC} package still employs the 1.0B2 annotation +manifest. A corresponding annotation package, +@code{IlluminaHumanMethylationEPICanno.ilm10b5.hg38}, is available to ensure +proper annotation. The decision to maintain the same name is due to +complications in downstream processing caused by array name lookup in certain +preprocessing options.") + (license license:artistic2.0)))) + (define-public r-illuminahumanmethylationepicanno-ilm10b5-hg38 (let ((commit "3db06910e27f626e0cc8b335ff45cf9a4050a36a") (revision "1")) @@ -15727,7 +16472,7 @@ implementation differs in these ways: (define-public python-scanpy (package (name "python-scanpy") - (version "1.9.1") + (version "1.9.6") (source (origin (method git-fetch) @@ -15737,21 +16482,28 @@ implementation differs in these ways: (file-name (git-file-name name version)) (sha256 (base32 - "0k524xnx3dvpz5yx65p316wghvi01zs17is8w2m3w2qywiswk0sl")))) + "12rz0a9151fkry6ws1a8p5wnc4n5qbjl6xlynj7kxy223iz8isds")))) (build-system pyproject-build-system) (arguments (list #:test-flags '(list "-k" ;; Plot tests that fail. - (string-append "not test_dotplot_matrixplot_stacked_violin" - " and not test_violin_without_raw" - " and not test_correlation" - " and not test_scatterplots" - " and not test_scatter_embedding_add_outline_vmin_vmax_norm" - " and not test_paga" + (string-append "not test_clustermap" + " and not test_dotplot_matrixplot_stacked_violin" " and not test_paga_compare" - " and not test_clustermap" + " and not test_paga_path" + " and not test_paga_pie" + " and not test_paga_plots" + " and not test_violin" + " and not test_scatter_no_basis_per_obs" + + ;; Type mismatch + " and not test_obs_df" + " and not test_var_df" + + ;; Minor accuracy problem + " and not test_consistency[morans_i-allclose]" ;; These try to connect to the network " and not test_scrublet_plots" @@ -15783,10 +16535,6 @@ implementation differs in these ways: ;; These two fail with "ValueError: I/O operation on closed file." (delete-file "scanpy/tests/test_neighbors_key_added.py") - ;; TODO: these fail with TypingError and "Use of unsupported - ;; NumPy function 'numpy.split'". - (delete-file "scanpy/tests/test_metrics.py") - ;; The following tests requires 'scanorama', which isn't ;; packaged yet. (delete-file "scanpy/tests/external/test_scanorama_integrate.py") @@ -15827,8 +16575,11 @@ implementation differs in these ways: `(;; This package needs anndata.tests, which is not installed. ("python-anndata:source" ,(package-source python-anndata)) ("python-flit" ,python-flit) + ("python-hatchling" ,python-hatchling) + ("python-hatch-vcs" ,python-hatch-vcs) ("python-leidenalg" ,python-leidenalg) ("python-pytest" ,python-pytest) + ("python-pytest-nunit" ,python-pytest-nunit) ("python-setuptools-scm" ,python-setuptools-scm))) (home-page "https://github.com/theislab/scanpy") (synopsis "Single-Cell Analysis in Python") @@ -15842,36 +16593,34 @@ million cells.") (define-public python-bbknn (package (name "python-bbknn") - (version "1.5.1") + (version "1.6.0") (source (origin (method url-fetch) (uri (pypi-uri "bbknn" version)) (sha256 (base32 - "0q11xdmjr2kf6f179a6kjizj3lllfrq743gslgw67qyzimvrrnhn")))) - (build-system python-build-system) + "06q43cpi7wi6f2d2jqs8f9rbd94pg1hh7978gm92mi9gvzbaj08w")))) + (build-system pyproject-build-system) (arguments - `(#:tests? #f ; no tests are included - #:phases - (modify-phases %standard-phases + (list + #:tests? #f ;no tests are included + #:phases + '(modify-phases %standard-phases ;; Numba needs a writable dir to cache functions. (add-before 'check 'set-numba-cache-dir (lambda _ - (setenv "NUMBA_CACHE_DIR" "/tmp"))) - (add-after 'unpack 'do-not-fail-to-find-sklearn - (lambda _ - ;; XXX: I have no idea why it cannot seem to find sklearn. - (substitute* "setup.py" - (("'sklearn'") ""))))))) + (setenv "NUMBA_CACHE_DIR" "/tmp")))))) (propagated-inputs (list python-annoy python-cython python-numpy python-pandas + python-pynndescent python-scikit-learn python-scipy python-umap-learn)) + (native-inputs (list python-flit-core)) (home-page "https://github.com/Teichlab/bbknn") (synopsis "Batch balanced KNN") (description "BBKNN is a batch effect removal tool that can be directly @@ -16176,55 +16925,26 @@ fasta subsequences.") (define-public python-cooler (package (name "python-cooler") - (version "0.8.11") + (version "0.9.1") (source (origin (method url-fetch) (uri (pypi-uri "cooler" version)) (sha256 (base32 - "1i96fmpsimj4wrx51rxn8lw2gqxf5a2pvrj5rwdd6ivnm3pmhyrn")))) - (build-system python-build-system) + "0capn4jj3mkxfwcc65cg644zvrv4sqr2wxr0ylx5w767jx3yb7p2")))) + (build-system pyproject-build-system) (arguments - `(#:phases - (modify-phases %standard-phases - ;; cooler requests cytoolz<0.11. It only uses cytoolz for "compose", - ;; which composes two functions. - (add-after 'unpack 'use-recent-cytoolz - (lambda _ - (substitute* '("requirements.txt" - "cooler.egg-info/requires.txt") - (("cytoolz.*<.*0.11") "cytoolz")))) - ;; This version of flake8 just won't work with this version of - ;; pytest, because of dependency pinning. - (add-after 'unpack 'do-not-use-flake8 - (lambda _ - (substitute* "setup.cfg" - (("addopts = --flake8") "addopts = ")))) - (add-after 'unpack 'patch-tests - (lambda _ - (substitute* "tests/test_create.py" - (("def test_roundtrip") - (string-append "@pytest.mark.skip(reason=\"requires network " - "access to genome.ucsc.edu\")\n" - "def test_roundtrip"))) - (substitute* "tests/test_util.py" - (("def test_fetch_chromsizes") - (string-append "@pytest.mark.skip(reason=\"requires network " - "access to genome.ucsc.edu\")\n" - "def test_fetch_chromsizes")) - ;; See https://github.com/open2c/cooler/issues/287 - (("skipif\\(six.PY2, reason=\"Scipy on Py2 is too old\"") - "skip(reason=\"Scipy is too new\"")) - ;; This test depends on ipytree, which contains a lot of minified - ;; JavaScript. - (substitute* "tests/test_fileops.py" - (("def test_print_trees") - "def _test_print_trees")))) - (replace 'check - (lambda* (#:key tests? #:allow-other-keys) - (when tests? - (invoke "python" "-m" "pytest" "-v"))))))) + (list + #:test-flags + '(list "-k" + (string-append + ;; These tests download files from the internet. + "not test_fetch_chromsizes" + " and not test_roundtrip" + ;; This test depends on ipytree, which contains a lot of + ;; minified JavaScript. + " and not test_print_trees")))) (propagated-inputs (list python-asciitree python-biopython @@ -16240,12 +16960,13 @@ fasta subsequences.") python-pysam python-pyyaml python-scipy - python-simplejson - python-six - python-sparse)) + python-simplejson)) (native-inputs - (list python-codecov python-mock python-pytest python-pytest-cov - python-pytest-flake8)) + (list python-coverage + python-hatchling + python-isort + python-pytest + python-pytest-cov)) ;; Almost all the projects of the Mirnylab are moved under Open2C umbrella (home-page "https://github.com/open2c/cooler") (synopsis "Sparse binary format for genomic interaction matrices") @@ -16302,7 +17023,7 @@ includes operations like compartment, insulation or peak calling.") (define-public python-hicmatrix (package (name "python-hicmatrix") - (version "16") + (version "17.1") (source (origin ;;Pypi sources do not contain any test @@ -16313,16 +17034,8 @@ includes operations like compartment, insulation or peak calling.") (file-name (git-file-name name version)) (sha256 (base32 - "00b9l62j4knrsdp7l3pawi9cqcsl09diycbhmmnar850bzssmq4f")))) + "14gq7r9b64ff56l5f8h8zc2i2y3xri646jl0anb74japqxrwvlna")))) (build-system pyproject-build-system) - (arguments - (list - #:phases - '(modify-phases %standard-phases - (add-after 'unpack 'remove-invalid-syntax - (lambda _ - (substitute* "setup.py" - ((".\\*\"") "\""))))))) (propagated-inputs (list python-cooler python-intervaltree @@ -16353,7 +17066,16 @@ the HiCExplorer and pyGenomeTracks packages.") (file-name (git-file-name name version)) (sha256 (base32 - "1yavgxry38g326z10bclvdf8glmma05fxj5m73h15m1r2l9xmw3v")))) + "1yavgxry38g326z10bclvdf8glmma05fxj5m73h15m1r2l9xmw3v")) + (modules '((guix build utils))) + ;; setup.py is malformed. The requirements are defined using a catchall + ;; pattern for the patch version number. This has been fixed in version + ;; 3.7.3, but we cannot upgrade to this version yet, since some Guix + ;; packages are not new enough. (See upstream commit + ;; 4845c715ec7b105e938d0c2426e27d0181690bfe for the fix). + (snippet '(substitute* "setup.py" + (("\\.\\*") + ""))))) (build-system pyproject-build-system) (arguments (list @@ -16431,7 +17153,18 @@ the HiCExplorer and pyGenomeTracks packages.") "general/test_hicHyperoptDetectLoopsHiCCUPS.py" "general/test_hicAggregateContacts.py" "general/test_hicInterIntraTAD.py") - (("^memory =.*") "memory = 1\n")))))))) + (("^memory =.*") "memory = 1\n"))))) + ;; This is fixed in version 3.7.3, but we cannot upgrade yet as we + ;; don't have Pandas 2. + (add-after 'unpack 'scipy-compatibility + (lambda _ + (substitute* "hicexplorer/hicAverageRegions.py" + (("from scipy.sparse import csr_matrix, save_npz, lil_matrix") + "from scipy.sparse import csr_matrix, save_npz, lil_matrix, coo_matrix") + (("summed_matrix = np.array\\(summed_matrix\\)") + "summed_matrix = coo_matrix(summed_matrix)") + (("data = summed_matrix\\[np.nonzero\\(summed_matrix\\)\\]") + "data = summed_matrix.toarray()[np.nonzero(summed_matrix)]"))))))) (propagated-inputs (list python-biopython python-cleanlab-1 @@ -17165,7 +17898,7 @@ polymorphisms) and indels with respect to a reference genome and more.") (define-public cnvkit (package (name "cnvkit") - (version "0.9.9") + (version "0.9.10") (source (origin (method git-fetch) @@ -17174,17 +17907,8 @@ polymorphisms) and indels with respect to a reference genome and more.") (commit (string-append "v" version)))) (file-name (git-file-name name version)) (sha256 - (base32 "1q4l7jhr1k135an3n9aa9wsid5lk6fwxb0hcldrr6v6y76zi4gj1")))) + (base32 "0r303pqjg70zpxa564bavbfj99c6di0dafgqqwx2vh4vfsiif94q")))) (build-system pyproject-build-system) - (arguments - (list - #:phases - '(modify-phases %standard-phases - ;; See upstream commit eee0f6eaec57d5c6e58142d661979f3aacc5f76a - (add-after 'unpack 'compatibility - (lambda _ - (substitute* "setup.py" - (("'joblib.*") ""))))))) (propagated-inputs (list python-biopython python-future @@ -17199,6 +17923,7 @@ polymorphisms) and indels with respect to a reference genome and more.") python-scipy ;; R packages r-dnacopy)) + (inputs (list r-minimal)) ;for tests (home-page "https://cnvkit.readthedocs.org/") (synopsis "Copy number variant detection from targeted DNA sequencing") (description @@ -17747,12 +18472,40 @@ The tool enables the de novo search for new structural elements and facilitates comparative analysis of known RNA families.") (license license:bsd-3))) +(define-public r-databaselinke-r + (let ((commit "cf3d6cc3d36f2e1c9a557390232e9a8ed5abb7fd") + (revision "1")) + (package + (name "r-databaselinke-r") + (version (git-version "1.7.0" revision commit)) + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/vertesy/DatabaseLinke.R") + (commit commit))) + (file-name (git-file-name name version)) + (sha256 + (base32 "0hk76sb3w1v8a7c1knpc572ypsbgqlrv0p49c9y55a0dr12n16s1")))) + (properties `((upstream-name . "DatabaseLinke.R"))) + (build-system r-build-system) + (propagated-inputs (list r-readwriter)) + (home-page "https://github.com/vertesy/DatabaseLinke.R") + (synopsis + "Parse links to databases from your list of gene symbols") + (description + "This package provides a set of functions to parse and open (search +query) links to genomics related and other websites for R. Useful when you +want to explore e.g.: the function of a set of differentially expressed +genes.") + (license license:gpl3)))) + (define-public r-seurat-utils - (let ((commit "0b6f5b548a49148cfbeaa654e8a618c0a020afa5") + (let ((commit "c0374cc9e25ce391ba8013fda0f8c7babbb9201d") (revision "1")) (package (name "r-seurat-utils") - (version (git-version "1.6.5" revision commit)) + (version (git-version "2.5.0" revision commit)) (source (origin (method git-fetch) (uri (git-reference @@ -17761,12 +18514,15 @@ facilitates comparative analysis of known RNA families.") (file-name (git-file-name name version)) (sha256 (base32 - "1mn64h375mkj6x4ix5493z32gqg96yc507j5jr0lx9g5wk1bf762")))) + "15l86b43q245gzz7gsr5rhs4sir74lc14d64yqxfqcb0zrb2bzzd")))) (properties `((upstream-name . "Seurat.utils"))) (build-system r-build-system) (propagated-inputs (list r-codeandroll2 r-cowplot + r-databaselinke-r r-dplyr + r-enhancedvolcano + r-foreach r-ggcorrplot r-ggexpress r-ggplot2 @@ -17774,15 +18530,21 @@ facilitates comparative analysis of known RNA families.") r-ggrepel r-hgnchelper r-htmlwidgets + r-job + r-magrittr r-markdownhelpers r-markdownreports r-matrix r-matrixstats + r-pheatmap + r-plotly r-princurve + r-qs r-r-utils r-readr r-readwriter r-reshape2 + r-rstudioapi r-scales r-seurat r-soupx @@ -17791,6 +18553,7 @@ facilitates comparative analysis of known RNA families.") r-stringr r-tibble r-tictoc + r-tidyverse r-vroom)) (home-page "https://github.com/vertesy/Seurat.utils") (synopsis "Collection of utility functions for Seurat") @@ -17842,7 +18605,7 @@ updated much more frequently.") (define-public python-ctxcore (package (name "python-ctxcore") - (version "0.1.1") + (version "0.2.0") (source (origin (method git-fetch) @@ -17852,8 +18615,8 @@ updated much more frequently.") (file-name (git-file-name name version)) (sha256 (base32 - "16nlj7z8pirgjad7vlgm7226b3hpw4a7n967vyfg26dsf5n8k70d")))) - (build-system python-build-system) + "0nv4lc46cnzpg5gcdxrsv7b4srmkq55zl3rcadw5pn3yyz5fzd2k")))) + (build-system pyproject-build-system) (arguments (list #:phases @@ -17865,12 +18628,12 @@ updated much more frequently.") (setenv "SETUPTOOLS_SCM_PRETEND_VERSION" #$version)))))) (propagated-inputs (list python-cytoolz - python-numba python-frozendict + python-numba python-numpy python-pandas + python-pyarrow python-pyyaml - python-pyarrow-0.16 python-tqdm)) (native-inputs (list python-pytest @@ -17918,67 +18681,77 @@ tree-based ensemble regressors.") (license license:bsd-3))) (define-public pyscenic - (package - (name "pyscenic") - (version "0.11.2") - (source - (origin - (method git-fetch) - (uri (git-reference - (url "https://github.com/aertslab/pySCENIC") - (commit version))) - (file-name (git-file-name name version)) - (sha256 - (base32 - "0pbmmr1zdb1vbbs6wx357s59d13pna6x03wq8blj6ckjws8bbq73")))) - (build-system python-build-system) - (arguments - `(#:phases - (modify-phases %standard-phases - ;; Numba needs a writable dir to cache functions. - (add-before 'check 'set-numba-cache-dir - (lambda _ - (setenv "NUMBA_CACHE_DIR" "/tmp"))) - (replace 'check - (lambda _ - (invoke "pytest" "-v")))))) - (propagated-inputs - (list python-ctxcore - python-cytoolz - python-multiprocessing-on-dill - python-llvmlite - python-numba - python-attrs - python-frozendict - python-numpy - python-pandas - python-cloudpickle - python-dask - python-distributed - python-arboreto - python-boltons - python-setuptools - python-pyyaml - python-tqdm - python-interlap - python-umap-learn - python-loompy - python-networkx - python-scipy - python-fsspec - python-requests - python-aiohttp - python-scikit-learn)) - (native-inputs - (list python-pytest)) - (home-page "https://scenic.aertslab.org/") - (synopsis "Single-Cell regulatory network inference and clustering") - (description - "pySCENIC is a Python implementation of the SCENIC pipeline (Single-Cell + ;; Latest commit from the update-pyarrow branch + (let ((commit "5f170fdf474548c37ab381d1849c662820d658ee") + (revision "1")) + (package + (name "pyscenic") + (version (git-version "0.11.2" revision commit)) + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/aertslab/pySCENIC") + (commit commit))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "03qkvy400rjndg2ds6bhcaprir71mqr2v3yv9vd77lcnzxgw3s0z")))) + (build-system pyproject-build-system) + (arguments + (list + #:phases + '(modify-phases %standard-phases + ;; The cli modules referenced here have been removed, so this + ;; breaks the sanity check. + (add-after 'unpack 'do-not-reference-deleted-modules + (lambda _ + (substitute* "setup.py" + (("'db2feather = .*',") "") + (("'invertdb = .*',") "") + (("'gmt2regions = pyscenic.cli.gmt2regions:main'") "")))) + ;; Numba needs a writable dir to cache functions. + (add-before 'check 'set-numba-cache-dir + (lambda _ + (setenv "NUMBA_CACHE_DIR" "/tmp")))))) + (propagated-inputs + (list python-ctxcore + python-cytoolz + python-multiprocessing-on-dill + python-llvmlite + python-numba + python-attrs + python-frozendict + python-numpy + python-pandas + python-cloudpickle + python-dask + python-pyarrow ;XXX for dask + python-distributed + python-arboreto + python-boltons + python-setuptools + python-pyyaml + python-tqdm + python-interlap + python-umap-learn + python-loompy + python-networkx + python-scipy + python-fsspec + python-requests + python-aiohttp + python-scikit-learn)) + (native-inputs + (list python-pytest)) + (home-page "https://scenic.aertslab.org/") + (synopsis "Single-Cell regulatory network inference and clustering") + (description + "pySCENIC is a Python implementation of the SCENIC pipeline (Single-Cell rEgulatory Network Inference and Clustering) which enables biologists to infer transcription factors, gene regulatory networks and cell types from single-cell RNA-seq data.") - (license license:gpl3+))) + (license license:gpl3+)))) (define-public python-ikarus (package @@ -19636,11 +20409,11 @@ translates between different variant encodings.") (license license:asl2.0)))) (define-public r-signac - (let ((commit "af4142724b72574d957f7fe3d422ed5828ec3ad0") + (let ((commit "8ecdde291676102bb3b503f48926c993354b5471") (revision "1")) (package (name "r-signac") - (version (git-version "1.9.0" revision commit)) + (version (git-version "1.12.0" revision commit)) (source (origin (method git-fetch) (uri (git-reference @@ -19649,7 +20422,7 @@ translates between different variant encodings.") (file-name (git-file-name name version)) (sha256 (base32 - "0ps0lp1dcy20r6lakil6ih81m04r0s6fnirvfjf01sfs0gsyddww")))) + "0idkcqb1i13m164nxbdwgs3vflw0cxzm2ir84aw0i811vx9lqz8c")))) (properties `((upstream-name . "Signac"))) (build-system r-build-system) (inputs (list zlib)) @@ -20027,24 +20800,33 @@ aligner.") (uri (pypi-uri "scvelo" version)) (sha256 (base32 "0h5ha1459ljs0qgpnlfsw592i8dxqn6p9bl08l1ikpwk36baxb7z")))) - (build-system python-build-system) + (build-system pyproject-build-system) (arguments - `(#:phases - (modify-phases %standard-phases - ;; Numba needs a writable dir to cache functions. - (add-before 'check 'set-numba-cache-dir - (lambda _ - (setenv "NUMBA_CACHE_DIR" "/tmp"))) - (replace 'check - (lambda* (#:key outputs tests? #:allow-other-keys) - (when tests? - ;; The discovered test file names must match the names of the - ;; compiled files, so we cannot run the tests from - ;; /tmp/guix-build-*. - (with-directory-excursion - (string-append (assoc-ref outputs "out") - "/lib/python3.10/site-packages/scvelo/core/tests/") - (invoke "pytest" "-v")))))))) + (list + #:test-flags + ;; XXX: these two tests fail for unknown reasons + '(list "-k" "not test_perfect_fit and not test_perfect_fit_2d") + #:phases + #~(modify-phases %standard-phases + (add-after 'unpack 'matplotlib-compatibility + (lambda _ + (substitute* "scvelo/settings.py" + (("warnings.filterwarnings\\(\"ignore\", category=cbook.mplDeprecation\\)") + "")))) + ;; Numba needs a writable dir to cache functions. + (add-before 'check 'set-numba-cache-dir + (lambda _ + (setenv "NUMBA_CACHE_DIR" "/tmp"))) + (replace 'check + (lambda* (#:key tests? test-flags #:allow-other-keys) + (when tests? + ;; The discovered test file names must match the names of the + ;; compiled files, so we cannot run the tests from + ;; /tmp/guix-build-*. + (with-directory-excursion + (string-append #$output + "/lib/python3.10/site-packages/scvelo/core/tests/") + (apply invoke "pytest" "-v" test-flags)))))))) (propagated-inputs (list python-anndata python-hnswlib @@ -20620,15 +21402,18 @@ based on the pairwise alignment of hidden Markov models (HMMs).") (define-public wfmash (package (name "wfmash") - (version "0.10.5") + (version "0.12.5") (source (origin - (method url-fetch) - (uri (string-append "https://github.com/ekg/wfmash/releases/download/v" - version "/wfmash-v" version ".tar.gz")) + ;; There are no release tarballs after version 0.10.5. + (method git-fetch) + (uri (git-reference + (url "https://github.com/waveygang/wfmash") + (commit (string-append "v" version)))) + (file-name (git-file-name name version)) (sha256 (base32 - "1jsvnnh14h3ir4l13qhmglhd25kzwvni9apgvr1lbikqwgrpkiq4")) + "1qh2chnwp7nqgp88afc4xzdkd21vh6cfqq73siqw7vc0qinqadm6")) (snippet #~(begin (use-modules (guix build utils)) @@ -20654,8 +21439,7 @@ based on the pairwise alignment of hidden Markov models (HMMs).") (let ((samtools (search-input-file inputs "/bin/samtools"))) ;; This is the easiest way to access the data ;; needed for the test suite. - (symlink (string-append "../wfmash-v" #$version "/data") - "data") + (symlink "../source/data" "data") (and ;; This test takes 60 minutes on riscv64-linux. #$@(if (not (target-riscv64?)) @@ -20747,8 +21531,7 @@ based on the pairwise alignment of hidden Markov models (HMMs).") (lambda _ (invoke "bin/wfmash" "data/reads.255bps.fa.gz" - "data/reads.255bps.fa.gz" - "-X" "-w" "16"))) + "-w" "16" "-s" "100" "-L"))) (invoke "head" "reads.255bps.paf")))))))))) (inputs (list atomic-queue @@ -20765,7 +21548,7 @@ distances and the wavefront alignment algorithm. It is a fork of MashMap that implements base-level alignment via the wflign tiled wavefront global alignment algorithm. It completes MashMap with a high-performance alignment module capable of computing base-level alignments for very large sequences.") - (home-page "https://github.com/ekg/wfmash") + (home-page "https://github.com/waveygang/wfmash") (license license:expat))) (define-public gdcm |