summaryrefslogtreecommitdiff
path: root/gnu/packages/bioinformatics.scm
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/packages/bioinformatics.scm')
-rw-r--r--gnu/packages/bioinformatics.scm882
1 files changed, 682 insertions, 200 deletions
diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
index f6a7a83fe3..d684e4249b 100644
--- a/gnu/packages/bioinformatics.scm
+++ b/gnu/packages/bioinformatics.scm
@@ -4,7 +4,7 @@
;;; Copyright © 2015, 2016, 2018, 2019, 2020 Pjotr Prins <pjotr.guix@thebird.nl>
;;; Copyright © 2015 Andreas Enge <andreas@enge.fr>
;;; Copyright © 2016, 2020, 2021 Roel Janssen <roel@gnu.org>
-;;; Copyright © 2016, 2017, 2018, 2019, 2020, 2021, 2022 Efraim Flashner <efraim@flashner.co.il>
+;;; Copyright © 2016-2023 Efraim Flashner <efraim@flashner.co.il>
;;; Copyright © 2016, 2020, 2022 Marius Bakke <marius@gnu.org>
;;; Copyright © 2016, 2018 Raoul Bonnal <ilpuccio.febo@gmail.com>
;;; Copyright © 2017, 2018 Tobias Geerinckx-Rice <me@tobias.gr>
@@ -56,7 +56,6 @@
#:use-module (guix build-system go)
#:use-module (guix build-system haskell)
#:use-module (guix build-system meson)
- #:use-module (guix build-system ocaml)
#:use-module (guix build-system perl)
#:use-module (guix build-system pyproject)
#:use-module (guix build-system python)
@@ -512,6 +511,63 @@ BED, GFF/GTF, VCF.")
(inputs
(list samtools zlib))))
+(define-public bitmapperbs
+ (package
+ (name "bitmapperbs")
+ (version "1.0.2.3")
+ (source (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/chhylp123/BitMapperBS/")
+ (commit (string-append "v" version))))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "02ksssfnvmpskld0a2016smfz5nrzm3d90v8974f3cpzywckvp8v"))
+ (modules '((guix build utils)))
+ ;; This package bundles a modified copy of htslib, so we cannot
+ ;; unbundle it.
+ (snippet
+ '(begin
+ (delete-file-recursively "libdivsufsort-2.0.1")
+ (delete-file-recursively "pSAscan-0.1.0")))))
+ (build-system gnu-build-system)
+ (arguments
+ (list
+ #:tests? #false
+ #:make-flags '(list "bitmapperBS")
+ ;; The build system checks for CPU features. For this reason, we want
+ ;; users to build it locally instead of using substitutes.
+ #:substitutable? #false
+ #:phases
+ #~(modify-phases %standard-phases
+ (add-after 'unpack 'fix-build-system
+ (lambda _
+ (substitute* "Makefile"
+ (("make prefix=../htslib_aim install")
+ (string-append "make prefix=" #$output " install-so"))
+ (("htslib_aim/include") "htslib")
+ (("htslib_aim/lib")
+ (string-append #$output "/lib")))))
+ (add-after 'unpack 'patch-references-to-psascan
+ (lambda* (#:key inputs #:allow-other-keys)
+ (substitute* "Makefile"
+ (("\"(./)?psascan" pre all)
+ (string-append "\"" pre (search-input-file inputs "/bin/psascan"))))))
+ (delete 'configure)
+ (replace 'install
+ (lambda _
+ (install-file "bitmapperBS"
+ (string-append #$output "/bin/")))))))
+ (inputs
+ (list libdivsufsort psascan zlib))
+ (home-page "https://github.com/chhylp123/BitMapperBS/")
+ (synopsis "Read aligner for whole-genome bisulfite sequencing")
+ (description
+ "BitMapperBS is memory-efficient aligner that is designed for
+whole-genome bisulfite sequencing (WGBS) reads from directional protocol.")
+ (license license:asl2.0)))
+
(define-public cellsnp-lite
;; Last release is from November 2021 and does not contain fixes.
(let ((commit "0885d746b0b1ea65c8ef92f8943ca7669ca9734a")
@@ -663,6 +719,53 @@ suite native in R.")
for all types of microbial diversity analyses.")
(license license:expat))))
+(define-public r-conospanel
+ (let ((commit "39e76b201a783b4e92fd615010a735a61746fbb9")
+ (revision "1"))
+ (package
+ (name "r-conospanel")
+ (version (git-version "1.0.0" revision commit))
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/kharchenkolab/conosPanel")
+ (commit commit)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32 "1zf0aj5d4iaxc3ghvjnaja5qby1avlmljzh94bpyvxbd359z9snn"))))
+ (properties `((upstream-name . "conosPanel")))
+ (build-system r-build-system)
+ (home-page "https://github.com/kharchenkolab/conosPanel")
+ (synopsis "Data for the conos package")
+ (description "The data within this package is a panel of four samples,
+each with 3000 cells. There are two samples which are bone marrow (BM), and
+two samples which are cord blood (CB).")
+ (license license:gpl3))))
+
+(define-public r-p2data
+ (let ((commit "7d4c0e17d7899f9d9b08ab2bf455abe150912f4c")
+ (revision "1"))
+ (package
+ (name "r-p2data")
+ (version (git-version "1.0.0" revision commit))
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/kharchenkolab/p2data")
+ (commit commit)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32 "1hadrldldxvhqs43aqs3c88bqfgql3wcfkbll3jz7fh6z7p3x324"))))
+ (properties `((upstream-name . "p2data")))
+ (build-system r-build-system)
+ (home-page "https://github.com/kharchenkolab/p2data")
+ (synopsis "Data for pagoda2")
+ (description "This package contains data used by pagoda2. The data
+within this package are the 3000 bone marrow cells used for vignettes.")
+ (license license:gpl3))))
+
(define-public r-rhtslib12
(let ((commit "ee186daf04876969c7f31c16a0e0fda8e7c16a30")
(revision "1"))
@@ -690,6 +793,41 @@ high-throughput sequence analysis. The package is primarily useful to
developers of other R packages who wish to make use of HTSlib.")
(license license:lgpl2.0+))))
+(define-public r-streamgraph
+ (let ((commit "76f7173ec89d456ace5943a512e20b1f6810bbcb")
+ (revision "1"))
+ (package
+ (name "r-streamgraph")
+ (version (git-version "0.9.0" revision commit))
+ (source (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/hrbrmstr/streamgraph")
+ (commit commit)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "010rhnby5a9dg08jvlkr65b3p9iipdxi2f5m1k6j53s80p25yvig"))))
+ (properties `((upstream-name . "streamgraph")))
+ (build-system r-build-system)
+ (propagated-inputs
+ (list r-dplyr
+ r-htmltools
+ r-htmlwidgets
+ r-magrittr
+ r-tidyr
+ r-xts))
+ (native-inputs (list r-knitr))
+ (home-page "https://github.com/hrbrmstr/streamgraph")
+ (synopsis "Htmlwidget for building streamgraph visualizations")
+ (description
+ "A streamgraph is a type of stacked area chart. It represents the
+evolution of a numeric variable for several groups. Areas are usually
+displayed around a central axis, and edges are rounded to give a flowing
+shape. This package provides an @code{htmlwidget} for building streamgraph
+visualizations.")
+ (license license:expat))))
+
(define-public pbbam
(package
(name "pbbam")
@@ -1057,6 +1195,83 @@ from high-throughput single-cell RNA sequencing (scRNA-seq) data.")
and sequence consensus.")
(license license:expat)))
+(define-public python-demuxem
+ (package
+ (name "python-demuxem")
+ (version "0.1.7")
+ (source (origin
+ (method url-fetch)
+ (uri (pypi-uri "demuxEM" version))
+ (sha256
+ (base32
+ "1bhyxqjk44bmyd26m1smapf68wyf7252kk65i27k50dd3kswgnd6"))))
+ (build-system pyproject-build-system)
+ (propagated-inputs
+ (list python-docopt
+ python-importlib-metadata
+ python-numpy
+ python-pandas
+ python-pegasusio
+ python-scikit-learn
+ python-scipy
+ python-seaborn))
+ (native-inputs (list python-cython python-setuptools-scm))
+ (home-page "https://github.com/lilab-bcb/demuxEM")
+ (synopsis "Analyze cell-hashing/nucleus-hashing data")
+ (description
+ "This is a Python module for analyzing cell-hashing/nucleus-hashing data.
+It is the demultiplexing module of Pegasus, which is used by Cumulus in the
+demultiplexing step.")
+ (license license:bsd-3)))
+
+(define-public python-doubletdetection
+ (package
+ (name "python-doubletdetection")
+ (version "4.2")
+ (source (origin
+ (method url-fetch)
+ (uri (pypi-uri "doubletdetection" version))
+ (sha256
+ (base32
+ "0v0a19014h4p6x8pyz1s78xn3q5w5166cysvg574z6vw79a3s9vp"))))
+ (build-system pyproject-build-system)
+ (arguments
+ (list
+ #:tests? #false ;there are none
+ #:phases
+ #~(modify-phases %standard-phases
+ (add-after 'unpack 'use-poetry-core
+ (lambda _
+ ;; Patch to use the core poetry API.
+ (substitute* "pyproject.toml"
+ (("poetry.masonry.api")
+ "poetry.core.masonry.api")))))))
+ (propagated-inputs
+ (list python-anndata
+ python-ipywidgets
+ python-leidenalg
+ python-vtraag-louvain
+ python-matplotlib
+ python-numpy
+ python-pandas
+ python-phenograph
+ python-scanpy
+ python-scipy
+ python-tqdm))
+ (native-inputs
+ (list python-black
+ python-flake8
+ python-poetry-core
+ python-pytest
+ python-pre-commit))
+ (home-page "https://github.com/JonathanShor/DoubletDetection")
+ (synopsis
+ "This is a package to detect doublets in single-cell RNA-seq count matrices")
+ (description
+ "This package provides a method to detect and enable removal of doublets
+from single-cell RNA-sequencing.")
+ (license license:expat)))
+
(define-public python-hclust2
(package
(name "python-hclust2")
@@ -1103,6 +1318,97 @@ protocol. It provides a simple and reliable way to retrieve genomic data from
servers supporting the protocol.")
(license license:asl2.0)))
+(define-public python-pegasusio
+ (package
+ (name "python-pegasusio")
+ (version "0.7.1")
+ (source (origin
+ (method url-fetch)
+ (uri (pypi-uri "pegasusio" version))
+ (sha256
+ (base32
+ "0gqygspdy398vjymdy6756jmk99s7fhwav9rivdx59kpqjcdxaz9"))))
+ (build-system pyproject-build-system)
+ (propagated-inputs
+ (list python-anndata
+ python-docopt
+ python-h5py
+ python-importlib-metadata
+ python-loompy
+ python-natsort
+ python-numpy
+ python-pandas
+ python-pillow
+ python-scipy
+ python-zarr))
+ (native-inputs (list python-cython python-setuptools-scm))
+ (home-page "https://github.com/lilab-bcb/pegasusio")
+ (synopsis "Read or write single-cell genomics data")
+ (description
+ "Pegasusio is a Python package for reading or writing single-cell
+genomics data.")
+ (license license:bsd-3)))
+
+(define-public python-phenograph
+ (package
+ (name "python-phenograph")
+ (version "1.5.7")
+ (source (origin
+ (method url-fetch)
+ (uri (pypi-uri "PhenoGraph" version))
+ (sha256
+ (base32
+ "0nji449mzwgp1f87iknl5fmnjdkrhkfkapxvafxdw01s0jg8zcj6"))
+ (modules '((guix build utils)))
+ ;; Remove bundled binaries
+ (snippet
+ '(delete-file-recursively "phenograph/louvain"))))
+ (build-system pyproject-build-system)
+ (arguments
+ (list
+ #:phases
+ #~(modify-phases %standard-phases
+ ;; This test can never succeed because Q_leiden is never set to
+ ;; anything other than None.
+ (add-after 'unpack 'disable-leiden-test
+ (lambda _
+ (substitute* "tests/test_cluster.py"
+ (("def test_run_leiden") "def _test_run_leiden"))))
+ (add-after 'unpack 'patch-louvain
+ (lambda* (#:key inputs #:allow-other-keys)
+ (substitute* "phenograph/core.py"
+ (("lpath = os.path.*")
+ (string-append "lpath = \""
+ (dirname (search-input-file inputs "/bin/community"))
+ "\"\n"))
+ (("linux-(community|hierarchy|convert)" _ thing) thing)
+ ;; Do not write binaries, because the unmodified "convert"
+ ;; from louvain only knows how to process plain text files.
+ (("with open\\(filename \\+ \".bin\", \"w\\+b\"\\) as f:")
+ "with open(filename + \".bin\", \"w+\") as f:")
+ (("f.writelines\\(\\[e for t in zip\\(ij, s\\) for e in t\\]\\)")
+ "for [src, dest], weight in zip(ij, s): \
+f.write(src.astype(\"str\") + ' ' + \
+dest.astype(\"str\") + ' ' + \
+weight.astype(\"str\") + '\\n')")))))))
+ (inputs
+ (list louvain))
+ (propagated-inputs
+ (list python-leidenalg
+ python-numpy
+ python-psutil
+ python-scikit-learn
+ python-scipy))
+ (native-inputs
+ (list python-pytest))
+ (home-page "https://github.com/dpeerlab/PhenoGraph.git")
+ (synopsis "Graph-based clustering for high-dimensional single-cell data")
+ (description
+ "PhenoGraph is a clustering method designed for high-dimensional
+single-cell data. It works by creating a graph representing phenotypic
+similarities between cells and then identifying communities in this graph.")
+ (license license:expat)))
+
(define-public python-phylophlan
(package
(name "python-phylophlan")
@@ -1317,7 +1623,7 @@ use-case, we encourage users to compose functions to achieve their goals.")
python-scipy))
(native-inputs
(list python-cython python-pytest python-pytest-cov python-nose))
- (home-page "http://www.biom-format.org")
+ (home-page "https://www.biom-format.org")
(synopsis "Biological Observation Matrix (BIOM) format utilities")
(description
"The BIOM file format is designed to be a general-use format for
@@ -2244,7 +2550,7 @@ errors at the end of reads.")
("python" ,python-wrapper)))
(native-inputs
(list perl perl-clone perl-test-deep perl-test-simple))
- (home-page "http://bowtie-bio.sourceforge.net/bowtie2/index.shtml")
+ (home-page "https://bowtie-bio.sourceforge.net/bowtie2/index.shtml")
(synopsis "Fast and sensitive nucleotide sequence read aligner")
(description
"Bowtie 2 is a fast and memory-efficient tool for aligning sequencing
@@ -2286,7 +2592,7 @@ gapped, local, and paired-end alignment modes.")
(inputs
(list python-wrapper tbb-2020 zlib))
(supported-systems '("x86_64-linux"))
- (home-page "http://bowtie-bio.sourceforge.net/index.shtml")
+ (home-page "https://bowtie-bio.sourceforge.net/index.shtml")
(synopsis "Fast aligner for short nucleotide sequence reads")
(description
"Bowtie is a fast, memory-efficient short read aligner. It aligns short
@@ -2407,7 +2713,7 @@ splice junctions between exons.")
;; Non-portable SSE instructions are used so building fails on platforms
;; other than x86_64.
(supported-systems '("x86_64-linux"))
- (home-page "http://bio-bwa.sourceforge.net/")
+ (home-page "https://bio-bwa.sourceforge.net/")
(synopsis "Burrows-Wheeler sequence aligner")
(description
"BWA is a software package for mapping low-divergent sequences against a
@@ -3000,7 +3306,7 @@ time.")
zlib))
(native-inputs
(list python-cython python-nose))
- (home-page "http://crossmap.sourceforge.net/")
+ (home-page "https://crossmap.sourceforge.net/")
(synopsis "Convert genome coordinates between assemblies")
(description
"CrossMap is a program for conversion of genome coordinates or annotation
@@ -3307,6 +3613,35 @@ and record oriented data modeling and the Semantic Web.")
resources for bioinformatics.")
(license license:bsd-3)))
+(define-public python-scrublet
+ (package
+ (name "python-scrublet")
+ (version "0.2.3")
+ (source (origin
+ (method url-fetch)
+ (uri (pypi-uri "scrublet" version))
+ (sha256
+ (base32
+ "0fk5pwk12yz9wpvwkl6j2l2g28f3x35b9r9n5bw6f0i9f0qgd191"))))
+ (build-system pyproject-build-system)
+ (arguments '(#:tests? #false)) ;there are none
+ (propagated-inputs
+ (list python-annoy
+ python-cython
+ python-matplotlib
+ python-numba
+ python-numpy
+ python-pandas
+ python-scikit-image
+ python-scikit-learn
+ python-scipy
+ python-umap-learn))
+ (home-page "https://github.com/swolock/scrublet")
+ (synopsis "Tool to indentify and remove doublets in single-cell data")
+ (description "This package provides a tool for identifying and removing
+doublets in single-cell RNA-seq data.")
+ (license license:expat)))
+
(define-public cwltool
(package
(name "cwltool")
@@ -4595,7 +4930,7 @@ high-throughput sequencing (HTS) assays")
("java-xz" ,java-xz)))
(native-inputs
(list java-testng))
- (home-page "http://samtools.github.io/htsjdk/")
+ (home-page "https://samtools.github.io/htsjdk/")
(synopsis "Java API for high-throughput sequencing data (HTS) formats")
(description
"HTSJDK is an implementation of a unified Java library for accessing
@@ -4638,7 +4973,7 @@ manipulating HTS data.")
("java-xz" ,java-xz)))
(native-inputs
(list java-junit))
- (home-page "http://samtools.github.io/htsjdk/")
+ (home-page "https://samtools.github.io/htsjdk/")
(synopsis "Java API for high-throughput sequencing data (HTS) formats")
(description
"HTSJDK is an implementation of a unified Java library for accessing
@@ -4743,7 +5078,7 @@ manipulating HTS data.")
(list java-htsjdk java-guava))
(native-inputs
(list java-testng))
- (home-page "http://broadinstitute.github.io/picard/")
+ (home-page "https://broadinstitute.github.io/picard/")
(synopsis "Tools for manipulating high-throughput sequencing data and formats")
(description "Picard is a set of Java command line tools for manipulating
high-throughput sequencing (HTS) data and formats. Picard is implemented
@@ -4832,7 +5167,7 @@ Class-Path: /~a \
(list java-htsjdk-2.10.1))
(native-inputs
(list java-testng java-guava))
- (home-page "http://broadinstitute.github.io/picard/")
+ (home-page "https://broadinstitute.github.io/picard/")
(synopsis "Tools for manipulating high-throughput sequencing data and formats")
(description "Picard is a set of Java command line tools for manipulating
high-throughput sequencing (HTS) data and formats. Picard is implemented
@@ -4961,56 +5296,70 @@ VCF.")
(define-public fastqc
(package
(name "fastqc")
- (version "0.11.5")
+ (version "0.11.9")
(source
(origin
- (method url-fetch)
- (uri (string-append "http://www.bioinformatics.babraham.ac.uk/"
- "projects/fastqc/fastqc_v"
- version "_source.zip"))
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/s-andrews/FastQC")
+ (commit (string-append "v" version))))
+ (file-name (git-file-name name version))
(sha256
(base32
- "18rrlkhcrxvvvlapch4dpj6xc6mpayzys8qfppybi8jrpgx5cc5f"))))
+ "00y9drm0bkpxw8xfl8ysss18jmnhj8blgqgr6fpa58rkpfcbg8qk"))
+ (snippet
+ '(for-each delete-file
+ '("cisd-jhdf5.jar"
+ "sam-1.103.jar"
+ "jbzip2-0.9.jar")))))
(build-system ant-build-system)
(arguments
- `(#:tests? #f ; there are no tests
- #:build-target "build"
- #:phases
- (modify-phases %standard-phases
- (add-after 'unpack 'fix-dependencies
- (lambda* (#:key inputs #:allow-other-keys)
- (substitute* "build.xml"
- (("jbzip2-0.9.jar")
- (search-input-file inputs "/share/java/jbzip2.jar"))
- (("sam-1.103.jar")
- (search-input-file inputs
- "/share/java/sam-1.112.jar"))
- (("cisd-jhdf5.jar")
- (search-input-file inputs
- "/share/java/sis-jhdf5.jar")))))
- ;; There is no installation target
- (replace 'install
- (lambda* (#:key inputs outputs #:allow-other-keys)
- (let* ((out (assoc-ref outputs "out"))
- (bin (string-append out "/bin"))
- (share (string-append out "/share/fastqc/"))
- (exe (string-append share "/fastqc")))
- (for-each mkdir-p (list bin share))
- (copy-recursively "bin" share)
- (substitute* exe
- (("my \\$java_bin = 'java';")
- (string-append "my $java_bin = '"
- (assoc-ref inputs "java")
- "/bin/java';")))
- (chmod exe #o555)
- (symlink exe (string-append bin "/fastqc"))
- #t))))))
+ (list
+ #:tests? #f ;there are no tests
+ #:build-target "build"
+ #:phases
+ #~(modify-phases %standard-phases
+ (add-after 'unpack 'fix-dependencies
+ (lambda* (#:key inputs #:allow-other-keys)
+ (substitute* '("build.xml" ".classpath")
+ (("jbzip2-0.9.jar")
+ (search-input-file inputs "/share/java/jbzip2.jar"))
+ (("sam-1.103.jar")
+ (search-input-file inputs "/share/java/sam-1.112.jar"))
+ (("cisd-jhdf5.jar")
+ (search-input-file inputs "/share/java/sis-jhdf5.jar")))))
+ ;; There is no installation target
+ (replace 'install
+ (lambda* (#:key inputs #:allow-other-keys)
+ (let* ((bin (string-append #$output "/bin"))
+ (share (string-append #$output "/share/fastqc/"))
+ (exe (string-append share "/fastqc")))
+ (for-each mkdir-p (list bin share))
+ (copy-recursively "bin" share)
+ (substitute* exe
+ (("my \\$java_bin = \"java\";")
+ (string-append "my $java_bin = \""
+ ;; Use java from the JRE, not the JDK
+ #$(this-package-input "icedtea") "/bin/java"
+ "\";"))
+ (("\\$RealBin\\$delimiter\\$RealBin.*")
+ (string-append
+ (string-join
+ (list
+ share
+ (search-input-file inputs "/share/java/sam-1.112.jar")
+ (search-input-file inputs "/share/java/jbzip2.jar")
+ (search-input-file inputs "/share/java/sis-jhdf5.jar"))
+ "$delimiter")
+ "\";\n")))
+ (chmod exe #o555)
+ (symlink exe (string-append bin "/fastqc"))))))))
(inputs
- `(("java" ,icedtea)
- ("perl" ,perl) ; needed for the wrapper script
- ("java-cisd-jhdf5" ,java-cisd-jhdf5)
- ("java-picard-1.113" ,java-picard-1.113)
- ("java-jbzip2" ,java-jbzip2)))
+ (list icedtea
+ java-cisd-jhdf5
+ java-picard-1.113
+ java-jbzip2
+ perl)) ;needed for the wrapper script
(native-inputs
(list unzip))
(home-page "https://www.bioinformatics.babraham.ac.uk/projects/fastqc/")
@@ -5038,7 +5387,7 @@ The main functions of FastQC are:
(define-public fastp
(package
(name "fastp")
- (version "0.20.1")
+ (version "0.23.2")
(source
(origin
(method git-fetch)
@@ -5048,20 +5397,21 @@ The main functions of FastQC are:
(file-name (git-file-name name version))
(sha256
(base32
- "0ly8mxdvrcy23jwxyppysx3dhb1lwsqhfbgpyvargxhfk6k700x4"))))
+ "04nmrqpjc3qni0cbazlwvpg8rk1mkfmfma0n4q3zivs3zi6rfnav"))))
(build-system gnu-build-system)
(arguments
- `(#:tests? #f ; there are none
- #:make-flags
- ,#~(list (string-append "PREFIX=" #$output))
- #:phases
- (modify-phases %standard-phases
- (delete 'configure)
- (add-before 'install 'create-target-dir
- (lambda* (#:key outputs #:allow-other-keys)
- (mkdir-p (string-append (assoc-ref outputs "out") "/bin")))))))
+ (list
+ #:tests? #false ;there are none
+ #:make-flags
+ #~(list (string-append "PREFIX=" #$output))
+ #:phases
+ #~(modify-phases %standard-phases
+ (delete 'configure)
+ (add-before 'install 'create-target-dir
+ (lambda _
+ (mkdir-p (string-append #$output "/bin")))))))
(inputs
- (list zlib))
+ (list isa-l libdeflate))
(home-page "https://github.com/OpenGene/fastp/")
(synopsis "All-in-one FastQ preprocessor")
(description
@@ -5252,21 +5602,22 @@ experiments and provide highly stable thresholds based on reproducibility.")
(outputs '("out" ;for library
"python")) ;for Python bindings
(arguments
- `(#:configure-flags
- ,#~(list "--without-sse" ; configure script probes for CPU features when SSE is enabled.
- (string-append "--enable-python-binding=" #$output:python))
- #:phases
- (modify-phases %standard-phases
+ (list
+ #:configure-flags
+ #~(list "--without-sse" ; configure script probes for CPU features when SSE is enabled.
+ (string-append "--enable-python-binding=" #$output:python))
+ #:phases
+ '(modify-phases %standard-phases
(add-before 'check 'set-SHELL-variable
(lambda _
;; generator_manager.hpp either uses /bin/sh or $SHELL
;; to run tests.
(setenv "SHELL" (which "bash")))))))
(native-inputs
- `(("bc" ,bc)
- ("time" ,time)
- ("python" ,python-wrapper)
- ("pkg-config" ,pkg-config)))
+ (list bc
+ time
+ python-wrapper
+ pkg-config))
(inputs
(list htslib))
(synopsis "Tool for fast counting of k-mers in DNA")
@@ -5415,7 +5766,7 @@ data.")
tar
wget
zlib))
- (home-page "http://kaiju.binf.ku.dk/")
+ (home-page "https://kaiju.binf.ku.dk/")
(synopsis "Fast and sensitive taxonomic classification for metagenomics")
(description "Kaiju is a program for sensitive taxonomic classification
of high-throughput sequencing reads from metagenomic whole genome sequencing
@@ -5931,7 +6282,7 @@ predicts the locations of structural units in the sequences.")
("openblas" ,openblas)))
(native-inputs
(list which))
- (home-page "http://www.bioinf.uni-leipzig.de/Software/proteinortho")
+ (home-page "https://www.bioinf.uni-leipzig.de/Software/proteinortho")
(synopsis "Detect orthologous genes across species")
(description
"Proteinortho is a tool to detect orthologous genes across different
@@ -6206,7 +6557,7 @@ phylogenies.")
"rsem-run-prsem-testing-procedure"))))))))
(inputs
(list boost r-minimal perl htslib-1.3 zlib))
- (home-page "http://deweylab.biostat.wisc.edu/rsem/")
+ (home-page "https://deweylab.biostat.wisc.edu/rsem/")
(synopsis "Estimate gene expression levels from RNA-Seq data")
(description
"RSEM is a software package for estimating gene and isoform expression
@@ -6241,7 +6592,7 @@ BAM and Wiggle files in both transcript-coordinate and genomic-coordinate.")
zlib))
(native-inputs
(list python-nose))
- (home-page "http://rseqc.sourceforge.net/")
+ (home-page "https://rseqc.sourceforge.net/")
(synopsis "RNA-seq quality control package")
(description
"RSeQC provides a number of modules that can comprehensively evaluate
@@ -6301,7 +6652,7 @@ distribution, coverage uniformity, strand specificity, etc.")
("readline" ,readline)))
(native-inputs
(list pkg-config))
- (home-page "http://seek.princeton.edu")
+ (home-page "https://seek.princeton.edu")
(synopsis "Gene co-expression search engine")
(description
"SEEK is a computational gene co-expression search engine. SEEK provides
@@ -6342,7 +6693,7 @@ to the user's query of interest.")
(native-inputs (list pkg-config))
(inputs
(list htslib ncurses perl python zlib))
- (home-page "http://samtools.sourceforge.net")
+ (home-page "https://samtools.sourceforge.net")
(synopsis "Utilities to efficiently manipulate nucleotide sequence alignments")
(description
"Samtools implements various utilities for post-processing nucleotide
@@ -7420,7 +7771,7 @@ optionally compressed by gzip.")
(install-file "SNAPCommand" bin)))))))
(native-inputs
(list zlib))
- (home-page "http://snap.cs.berkeley.edu/")
+ (home-page "https://snap.cs.berkeley.edu/")
(synopsis "Short read DNA sequence aligner")
(description
"SNAP is a fast and accurate aligner for short DNA reads. It is
@@ -7687,7 +8038,7 @@ sequences.")
;; no "configure" script
(delete 'configure))))
(inputs (list zlib))
- (home-page "http://subread.sourceforge.net/")
+ (home-page "https://subread.sourceforge.net/")
(synopsis "Tool kit for processing next-gen sequencing data")
(description
"The subread package contains the following tools: subread aligner, a
@@ -7734,7 +8085,7 @@ against local background noises.")
(install-file "stringtie" bin)))))))
(inputs
(list bzip2 htslib-for-stringtie libdeflate zlib))
- (home-page "http://ccb.jhu.edu/software/stringtie/")
+ (home-page "https://ccb.jhu.edu/software/stringtie/")
(synopsis "Transcript assembly and quantification for RNA-Seq data")
(description
"StringTie is a fast and efficient assembler of RNA-Seq sequence
@@ -7900,6 +8251,36 @@ sequence.")
3D perspective axes, 3D perspective annotations, and wireframe plots.")
(license license:gpl3+))))
+(define-public r-ggsankey
+ (let ((commit "be08dd0f86eaee9f9ff9e7ff95d47930660a3c36")
+ (revision "1"))
+ (package
+ (name "r-ggsankey")
+ (version (git-version "0.0.99999" revision commit))
+ (source (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/davidsjoberg/ggsankey")
+ (commit commit)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "0acpmydqqc91pq5p9wpkpmgqp3nhiljabd7d3i00kwhjxgm2bvba"))))
+ (properties `((upstream-name . "ggsankey")))
+ (build-system r-build-system)
+ (propagated-inputs (list r-dplyr
+ r-ggplot2
+ r-magrittr
+ r-purrr
+ r-stringr
+ r-tidyr))
+ (home-page "https://github.com/davidsjoberg/ggsankey")
+ (synopsis "Sankey, Alluvial and Sankey bump plots")
+ (description
+ "This package provides a package that makes it easy to implement
+sankey, alluvial and sankey bump plots in @code{ggplot2}.")
+ (license license:expat))))
+
(define-public r-gutils
(let ((commit "10e36c7b580aacb2d952140a3fdd82418aaddea6")
(revision "1"))
@@ -8163,6 +8544,52 @@ doublet-detection methods. In addition, this tool is used for execution and
benchmark of those eight mentioned methods.")
(license license:gpl3+))))
+(define-public r-psupertime
+ (let ((commit "73825a28d3bd9bc881c15ee0c4c218eec1c9c207")
+ (revision "1"))
+ (package
+ (name "r-psupertime")
+ (version (git-version "0.2.6" revision commit))
+ (source (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/wmacnair/psupertime")
+ (commit commit)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "00h1r3ffz6m9dwcgkvyki8405b059qn6mnjsd8d76a1rabaf2vfh"))))
+ (properties `((upstream-name . "psupertime")))
+ (build-system r-build-system)
+ (propagated-inputs
+ (list r-cowplot
+ r-data-table
+ r-fastcluster
+ r-forcats
+ r-ggplot2
+ r-glmnet
+ r-knitr
+ r-matrix
+ r-rcolorbrewer
+ r-scales
+ r-scran
+ r-singlecellexperiment
+ r-stringr
+ r-summarizedexperiment
+ r-topgo))
+ (native-inputs (list r-knitr))
+ (home-page "https://github.com/wmacnair/psupertime")
+ (synopsis
+ "Psupertime is supervised pseudotime for single cell RNAseq data")
+ (description
+ "Psupertime is supervised pseudotime for single cell RNAseq data. It
+uses single cell RNAseq data, where the cells have a known ordering. This
+ordering helps to identify a small number of genes which place cells in that
+known order. It can be used for discovery of relevant genes, for
+identification of subpopulations, and characterization of further unknown or
+differently labelled data.")
+ (license license:gpl3))))
+
(define-public r-pando
(package
(name "r-pando")
@@ -8772,7 +9199,7 @@ of contact distributions around selected landmarks.")
(base32
"1hsx6qgwr0i67fhy9257zj7s0ppncph2hjgbia5nn6nfmj0ax6l9"))))
(build-system r-build-system)
- (home-page "http://centipede.uchicago.edu/")
+ (home-page "https://centipede.uchicago.edu/")
(synopsis "Predict transcription factor binding sites")
(description
"CENTIPEDE applies a hierarchical Bayesian mixture model to infer regions
@@ -9010,7 +9437,7 @@ experience substantial biological insertions and deletions.")
perl-getopt-long
perl-json
perl-statistics-pca))
- (home-page "http://prinseq.sourceforge.net/")
+ (home-page "https://prinseq.sourceforge.net/")
(synopsis "Preprocess sequence data in FASTA or FASTQ formats")
(description
"PRINSEQ is a bioinformatics tool to help you preprocess your genomic or
@@ -9495,7 +9922,7 @@ AM_CONDITIONAL(AMPNG, true)"))
zlib))
(native-inputs
(list autoconf automake libtool pkg-config))
- (home-page "http://emboss.sourceforge.net")
+ (home-page "https://emboss.sourceforge.net")
(synopsis "Molecular biology analysis suite")
(description "EMBOSS is the \"European Molecular Biology Open Software
Suite\". EMBOSS is an analysis package specially developed for the needs of
@@ -10172,7 +10599,7 @@ remove biased methylation positions for RRBS sequence files.")
(for-each (lambda (file)
(install-file file target))
(find-files "../exe" ".*"))))))))
- (home-page "http://evolution.genetics.washington.edu/phylip/")
+ (home-page "https://evolution.genetics.washington.edu/phylip/")
(synopsis "Tools for inferring phylogenies")
(description "PHYLIP (the PHYLogeny Inference Package) is a package of
programs for inferring phylogenies (evolutionary trees).")
@@ -14443,110 +14870,125 @@ datasets.")
(define-public ngless
(package
(name "ngless")
- (version "1.3.0")
- (source
- (origin
- (method git-fetch)
- (uri (git-reference
- (url "https://github.com/ngless-toolkit/ngless.git")
- (commit (string-append "v" version))))
- (file-name (git-file-name name version))
- (sha256
- (base32
- "0pb9f6b0yk9p4cdwiym8r190q1bcdiwvc7i2s6rw54qgi8r3g6pj"))
- (patches (search-patches "ngless-unliftio.patch"))))
+ (version "1.5.0")
+ (source (origin
+ (method url-fetch)
+ (uri (hackage-uri "NGLess" version))
+ (sha256
+ (base32
+ "0pljyrlpr9r3cl5311dhgxdl8y40szyi4vprn34i3piy0qrldymi"))))
(build-system haskell-build-system)
(arguments
- (list
- #:haddock? #f ;The haddock phase fails with: NGLess/CmdArgs.hs:20:1:
- ;error: parse error on input import
- ;import Options.Applicative
- #:phases
- #~(modify-phases %standard-phases
- (add-after 'unpack 'create-Versions.hs
- (lambda _
- (substitute* "Makefile"
- (("BWA_VERSION = .*")
- (string-append "BWA_VERSION = "
- #$(package-version bwa) "\n"))
- (("SAM_VERSION = .*")
- (string-append "SAM_VERSION = "
- #$(package-version samtools) "\n"))
- (("PRODIGAL_VERSION = .*")
- (string-append "PRODIGAL_VERSION = "
- #$(package-version prodigal) "\n"))
- (("MINIMAP2_VERSION = .*")
- (string-append "MINIMAP2_VERSION = "
- #$(package-version minimap2) "\n")))
- (invoke "make" "NGLess/Dependencies/Versions.hs")))
- (add-after 'create-Versions.hs 'create-cabal-file
- (lambda _ (invoke "hpack")))
- ;; These tools are expected to be installed alongside ngless.
- (add-after 'install 'link-tools
- (lambda* (#:key inputs #:allow-other-keys)
- (let ((bin (string-append #$output "/bin/")))
- (symlink (search-input-file inputs "/bin/prodigal")
- (string-append bin "ngless-" #$version "-prodigal"))
- (symlink (search-input-file inputs "/bin/minimap2")
- (string-append bin "ngless-" #$version "-minimap2"))
- (symlink (search-input-file inputs "/bin/samtools")
- (string-append bin "ngless-" #$version "-samtools"))
- (symlink (search-input-file inputs "/bin/bwa")
- (string-append bin "ngless-" #$version "-bwa"))))))))
- (inputs
- (list prodigal
- bwa
- samtools
- minimap2
- ghc-aeson
- ghc-ansi-terminal
- ghc-async
- ghc-atomic-write
- ghc-bytestring-lexing
- ghc-conduit
- ghc-conduit-algorithms
- ghc-conduit-extra
- ghc-configurator
- ghc-convertible
- ghc-data-default
- ghc-diagrams-core
- ghc-diagrams-lib
- ghc-diagrams-svg
- ghc-double-conversion
- ghc-edit-distance
- ghc-either
- ghc-errors
- ghc-extra
- ghc-filemanip
- ghc-file-embed
- ghc-gitrev
- ghc-hashtables
- ghc-http-conduit
- ghc-inline-c
- ghc-inline-c-cpp
- ghc-int-interval-map
- ghc-missingh
- ghc-optparse-applicative
- ghc-regex
- ghc-safe
- ghc-safeio
- ghc-strict
- ghc-tar
- ghc-tar-conduit
- ghc-unliftio
- ghc-unliftio-core
- ghc-vector
- ghc-yaml
- ghc-zlib))
+ `(#:haddock? #f
+ #:phases
+ (modify-phases %standard-phases
+ (add-before 'configure 'update-constraints
+ (lambda _
+ (substitute* "NGLess.cabal"
+ (("\\b(base)\\s+[^,]+" all dep)
+ dep))))
+ (add-after 'unpack 'create-Versions.hs
+ (lambda _
+ (substitute* "NGLess/Dependencies/Versions.hs"
+ (("bwaVersion = .+")
+ (string-append "bwaVersion = \""
+ ,(package-version bwa) "\""))
+ (("samtoolsVersion = .+")
+ (string-append "samtoolsVersion = \""
+ ,(package-version samtools) "\""))
+ (("prodigalVersion = .+")
+ (string-append "prodigalVersion = \""
+ ,(package-version prodigal) "\""))
+ (("megahitVersion = .+")
+ (string-append "megahitVersion = \""
+ ,(package-version megahit) "\""))
+ (("minimap2Version = .+")
+ (string-append "minimap2Version = \""
+ ,(package-version minimap2) "\"")))))
+ ;; See NGLess/FileManagement.hs.
+ (add-after 'install 'wrap-program
+ (lambda* (#:key inputs outputs #:allow-other-keys)
+ (let* ((out (assoc-ref outputs "out"))
+ (bwa (search-input-file inputs "/bin/bwa"))
+ (samtools (search-input-file inputs "/bin/samtools"))
+ (prodigal (search-input-file inputs "/bin/prodigal"))
+ (minimap2 (search-input-file inputs "/bin/minimap2"))
+ (megahit (search-input-file inputs "/bin/megahit")))
+ (wrap-program (string-append out "/bin/ngless")
+ `("NGLESS_BWA_BIN" " " = (,bwa))
+ `("NGLESS_SAMTOOLS_BIN" " " = (,samtools))
+ `("NGLESS_PRODIGAL_BIN" " " = (,prodigal))
+ `("NGLESS_MINIMAP2_BIN" " " = (,minimap2))
+ `("NGLESS_MEGAHIT_BIN" " " = (,megahit))))))
+ ;; Sanity check.
+ (add-after 'wrap-program 'check-install
+ (lambda* (#:key outputs #:allow-other-keys)
+ (let* ((ngless (string-append (assoc-ref outputs "out") "/bin/ngless")))
+ (invoke ngless "--check-install"))))
+ (add-after 'register 'remove-libraries
+ (lambda* (#:key outputs #:allow-other-keys)
+ (delete-file-recursively (string-append (assoc-ref outputs "out") "/lib")))))))
+ (inputs (list prodigal
+ bwa
+ samtools
+ minimap2
+ megahit
+ ghc-missingh
+ ghc-aeson
+ ghc-ansi-terminal
+ ghc-async
+ ghc-atomic-write
+ ghc-bytestring-lexing
+ ghc-conduit
+ ghc-conduit-algorithms
+ ghc-conduit-extra
+ ghc-configurator
+ ghc-convertible
+ ghc-data-default
+ ghc-edit-distance
+ ghc-either
+ ghc-errors
+ ghc-extra
+ ghc-file-embed
+ ghc-filemanip
+ ghc-hashable
+ ghc-hashtables
+ ghc-hostname
+ ghc-http-client
+ ghc-http-conduit
+ ghc-inline-c
+ ghc-inline-c-cpp
+ ghc-int-interval-map
+ ghc-network
+ ghc-optparse-applicative
+ ghc-primitive
+ ghc-random-shuffle
+ ghc-regex
+ ghc-resourcet
+ ghc-safe
+ ghc-stm-chans
+ ghc-stm-conduit
+ ghc-strict
+ ghc-tar
+ ghc-tar-conduit
+ ghc-unix-compat
+ ghc-unliftio
+ ghc-unliftio-core
+ ghc-vector
+ ghc-vector-algorithms
+ ghc-yaml
+ ghc-zlib
+ ghc-bzlib-conduit
+ ghc-double-conversion
+ ghc-safeio))
(propagated-inputs
(list r-r6 r-hdf5r r-iterators r-itertools r-matrix))
- (native-inputs
- (list ghc-hpack
- ghc-quickcheck
- ghc-test-framework
- ghc-test-framework-hunit
- ghc-test-framework-quickcheck2
- ghc-test-framework-th))
+ (native-inputs (list ghc-hunit
+ ghc-quickcheck
+ ghc-tasty
+ ghc-tasty-hunit
+ ghc-tasty-quickcheck
+ ghc-tasty-th))
(home-page "https://ngless.embl.de/")
(synopsis "DSL for processing next-generation sequencing data")
(description "Ngless is a domain-specific language for
@@ -16451,7 +16893,7 @@ The output is in SAM format.")
(list libxml2))
(native-inputs
(list check swig))
- (home-page "http://sbml.org/Software/libSBML")
+ (home-page "https://sbml.org/Software/libSBML")
(synopsis "Process SBML files and data streams")
(description "LibSBML is a library to help you read, write, manipulate,
translate, and validate SBML files and data streams. The @dfn{Systems Biology
@@ -16582,6 +17024,40 @@ sequencing (e.g. mapping or base/indel alignment uncertainty), which are
usually ignored by other methods or only used for filtering.")
(license license:expat)))
+(define-public louvain
+ (package
+ (name "louvain")
+ (version "0.2")
+ (source (origin
+ (method url-fetch)
+ (uri "mirror://sourceforge/louvain/louvain_latest.tar.gz")
+ (sha256
+ (base32
+ "0hqlv5jqc889nbv7j1bchrx4zhh69hgr2mqvfdygc7kwrywn22lb"))))
+ (build-system gnu-build-system)
+ (arguments
+ (list
+ #:tests? #false ;there are none
+ #:phases
+ #~(modify-phases %standard-phases
+ (delete 'configure)
+ (add-after 'unpack 'patch-includes
+ (lambda _
+ (substitute* "main_community.cpp"
+ (("using namespace std;" m)
+ (string-append "#include <unistd.h> /* for getpid */\n" m)))))
+ (replace 'install
+ (lambda _
+ (for-each
+ (lambda (exe)
+ (install-file exe (string-append #$output "/bin")))
+ '("convert" "community" "hierarchy")))))))
+ (home-page "https://sourceforge.net/projects/louvain/")
+ (synopsis "Multi-criteria community detection")
+ (description "This package offers a set of functions to use in order to
+compute communities on graphs weighted or unweighted.")
+ (license license:gpl3+)))
+
(define-public ivar
(package
(name "ivar")
@@ -17388,7 +17864,7 @@ populations.")
(define-public scregseg
(package
(name "scregseg")
- (version "0.1.1")
+ (version "0.1.3")
(source (origin
(method git-fetch)
(uri (git-reference
@@ -17397,16 +17873,22 @@ populations.")
(file-name (git-file-name name version))
(sha256
(base32
- "1k8hllr5if6k2mm2zj391fv40sfc008cjm04l9vgfsdppb80i112"))
+ "07g2barywa1wi8mggbxkbxqjw1fzd0a0l9cjdbkx4s40imb1dbxb"))
(snippet
- #~(begin
- (use-modules ((guix build utils)))
- (delete-file "src/scregseg/_utils.c")))))
- (build-system python-build-system)
+ '(delete-file "src/scregseg/_utils.c"))))
+ (build-system pyproject-build-system)
(arguments
- `(#:tests? #false ; tests require network access
- #:phases
- (modify-phases %standard-phases
+ (list
+ #:phases
+ '(modify-phases %standard-phases
+ ;; Numba needs a writable dir to cache functions.
+ (add-before 'check 'set-numba-cache-dir
+ (lambda _
+ (setenv "NUMBA_CACHE_DIR" "/tmp")))
+ ;; Cython extensions have to be built before running the tests.
+ (add-before 'check 'build-extensions
+ (lambda _
+ (invoke "python" "setup.py" "build_ext" "--inplace")))
(add-after 'unpack 'do-not-fail-to-find-sklearn
(lambda _
;; XXX: I have no idea why it cannot seem to find sklearn.
@@ -18324,7 +18806,7 @@ useful for bioinformatic analysis.")
(define-public go-github-com-biogo-hts-bam
(package
(name "go-github-com-biogo-hts-bam")
- (version "1.4.3")
+ (version "1.4.4")
(source (origin
(method git-fetch)
(uri (git-reference
@@ -18333,7 +18815,7 @@ useful for bioinformatic analysis.")
(file-name (git-file-name name version))
(sha256
(base32
- "013ga6ilc4m3hyfr3yyiva9g4vs81afhj73v2sy7r75b5zxw7lx1"))))
+ "1vkcqxyajghx5p5j7g2i376nbsxh8q2smk0smlv8mi34yr7hlw5b"))))
(build-system go-build-system)
(arguments
'(#:import-path "github.com/biogo/hts/bam"