summaryrefslogtreecommitdiff
path: root/gnu/packages/bioinformatics.scm
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/packages/bioinformatics.scm')
-rw-r--r--gnu/packages/bioinformatics.scm1006
1 files changed, 978 insertions, 28 deletions
diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
index 1214a0b708..b29ca67dbd 100644
--- a/gnu/packages/bioinformatics.scm
+++ b/gnu/packages/bioinformatics.scm
@@ -19,23 +19,68 @@
(define-module (gnu packages bioinformatics)
#:use-module ((guix licenses) #:prefix license:)
#:use-module (guix packages)
+ #:use-module (guix utils)
#:use-module (guix download)
+ #:use-module (guix git-download)
#:use-module (guix build-system gnu)
#:use-module (guix build-system cmake)
#:use-module (guix build-system python)
#:use-module (guix build-system trivial)
#:use-module (gnu packages)
#:use-module (gnu packages base)
+ #:use-module (gnu packages boost)
#:use-module (gnu packages compression)
+ #:use-module (gnu packages file)
#:use-module (gnu packages java)
+ #:use-module (gnu packages linux)
+ #:use-module (gnu packages maths)
#:use-module (gnu packages ncurses)
#:use-module (gnu packages perl)
#:use-module (gnu packages pkg-config)
+ #:use-module (gnu packages popt)
+ #:use-module (gnu packages protobuf)
#:use-module (gnu packages python)
+ #:use-module (gnu packages statistics)
+ #:use-module (gnu packages swig)
#:use-module (gnu packages tbb)
+ #:use-module (gnu packages textutils)
#:use-module (gnu packages vim)
+ #:use-module (gnu packages xml)
#:use-module (gnu packages zip))
+(define-public bamtools
+ (package
+ (name "bamtools")
+ (version "2.3.0")
+ (source (origin
+ (method url-fetch)
+ (uri (string-append
+ "https://github.com/pezmaster31/bamtools/archive/v"
+ version ".tar.gz"))
+ (file-name (string-append name "-" version ".tar.gz"))
+ (sha256
+ (base32
+ "1brry29bw2xr2l9pqn240rkqwayg85b8qq78zk2zs6nlspk4d018"))))
+ (build-system cmake-build-system)
+ (arguments
+ `(#:tests? #f ;no "check" target
+ #:phases
+ (modify-phases %standard-phases
+ (add-before
+ 'configure 'set-ldflags
+ (lambda* (#:key outputs #:allow-other-keys)
+ (setenv "LDFLAGS"
+ (string-append
+ "-Wl,-rpath="
+ (assoc-ref outputs "out") "/lib/bamtools")))))))
+ (inputs `(("zlib" ,zlib)))
+ (home-page "https://github.com/pezmaster31/bamtools")
+ (synopsis "C++ API and command-line toolkit for working with BAM data")
+ (description
+ "BamTools provides both a C++ API and a command-line toolkit for handling
+BAM files.")
+ (license license:expat)))
+
(define-public bedops
(package
(name "bedops")
@@ -177,6 +222,39 @@ pybedtools extends BEDTools by offering feature-level manipulations from with
Python.")
(license license:gpl2+)))
+(define-public python-biopython
+ (package
+ (name "python-biopython")
+ (version "1.65")
+ (source (origin
+ (method url-fetch)
+ (uri (string-append
+ "http://biopython.org/DIST/biopython-"
+ version ".tar.gz"))
+ (sha256
+ (base32
+ "13m8s9jkrw40zvdp1rl709n6lmgdh4f52aann7gzr6sfp0fwhg26"))))
+ (build-system python-build-system)
+ (inputs
+ `(("python-numpy" ,python-numpy)))
+ (native-inputs
+ `(("python-setuptools" ,python2-setuptools)))
+ (home-page "http://biopython.org/")
+ (synopsis "Tools for biological computation in Python")
+ (description
+ "Biopython is a set of tools for biological computation including parsers
+for bioinformatics files into Python data structures; interfaces to common
+bioinformatics programs; a standard sequence class and tools for performing
+common operations on them; code to perform data classification; code for
+dealing with alignments; code making it easy to split up parallelizable tasks
+into separate processes; and more.")
+ (license (license:non-copyleft "http://www.biopython.org/DIST/LICENSE"))))
+
+(define-public python2-biopython
+ (package (inherit (package-with-python2 python-biopython))
+ (inputs
+ `(("python2-numpy" ,python2-numpy)))))
+
(define-public bowtie
(package
(name "bowtie")
@@ -285,6 +363,41 @@ and more accurate. BWA-MEM also has better performance than BWA-backtrack for
70-100bp Illumina reads.")
(license license:gpl3+)))
+(define-public python2-bx-python
+ (package
+ (name "python2-bx-python")
+ (version "0.7.2")
+ (source (origin
+ (method url-fetch)
+ (uri (string-append
+ "https://pypi.python.org/packages/source/b/bx-python/bx-python-"
+ version ".tar.gz"))
+ (sha256
+ (base32
+ "0ld49idhc5zjdvbhvjq1a2qmpjj7h5v58rqr25dzmfq7g34b50xh"))
+ (modules '((guix build utils)))
+ (snippet
+ '(substitute* "setup.py"
+ ;; remove dependency on outdated "distribute" module
+ (("^from distribute_setup import use_setuptools") "")
+ (("^use_setuptools\\(\\)") "")))))
+ (build-system python-build-system)
+ (arguments
+ `(#:tests? #f ;tests fail because test data are not included
+ #:python ,python-2))
+ (inputs
+ `(("python-numpy" ,python2-numpy)
+ ("zlib" ,zlib)))
+ (native-inputs
+ `(("python-nose" ,python2-nose)
+ ("python-setuptools" ,python2-setuptools)))
+ (home-page "http://bitbucket.org/james_taylor/bx-python/")
+ (synopsis "Tools for manipulating biological data")
+ (description
+ "bx-python provides tools for manipulating biological data, particularly
+multiple sequence alignments.")
+ (license license:expat)))
+
(define-public clipper
(package
(name "clipper")
@@ -323,6 +436,30 @@ and more accurate. BWA-MEM also has better performance than BWA-backtrack for
"CLIPper is a tool to define peaks in CLIP-seq datasets.")
(license license:gpl2)))
+(define-public clustal-omega
+ (package
+ (name "clustal-omega")
+ (version "1.2.1")
+ (source (origin
+ (method url-fetch)
+ (uri (string-append
+ "http://www.clustal.org/omega/clustal-omega-"
+ version ".tar.gz"))
+ (sha256
+ (base32
+ "02ibkx0m0iwz8nscg998bh41gg251y56cgh86bvyrii5m8kjgwqf"))))
+ (build-system gnu-build-system)
+ (inputs
+ `(("argtable" ,argtable)))
+ (home-page "http://www.clustal.org/omega/")
+ (synopsis "Multiple sequence aligner for protein and DNA/RNA")
+ (description
+ "Clustal-Omega is a general purpose multiple sequence alignment (MSA)
+program for protein and DNA/RNA. It produces high quality MSAs and is capable
+of handling data-sets of hundreds of thousands of sequences in reasonable
+time.")
+ (license license:gpl2+)))
+
(define-public crossmap
(package
(name "crossmap")
@@ -365,6 +502,123 @@ files between different genome assemblies. It supports most commonly used
file formats including SAM/BAM, Wiggle/BigWig, BED, GFF/GTF, VCF.")
(license license:gpl2+)))
+(define-public cutadapt
+ (package
+ (name "cutadapt")
+ (version "1.8")
+ (source (origin
+ (method url-fetch)
+ (uri (string-append
+ "https://github.com/marcelm/cutadapt/archive/v"
+ version ".tar.gz"))
+ (file-name (string-append name "-" version ".tar.gz"))
+ (sha256
+ (base32
+ "161bp87y6gd6r5bmvjpn2b1k942i3fizfpa139f0jn6jv1wcp5h5"))))
+ (build-system python-build-system)
+ (arguments
+ ;; tests must be run after install
+ `(#:phases (alist-cons-after
+ 'install 'check
+ (lambda* (#:key inputs outputs #:allow-other-keys)
+ (setenv "PYTHONPATH"
+ (string-append
+ (getenv "PYTHONPATH")
+ ":" (assoc-ref outputs "out")
+ "/lib/python"
+ (string-take (string-take-right
+ (assoc-ref inputs "python") 5) 3)
+ "/site-packages"))
+ (zero? (system* "nosetests" "-P" "tests")))
+ (alist-delete 'check %standard-phases))))
+ (native-inputs
+ `(("python-cython" ,python-cython)
+ ("python-nose" ,python-nose)
+ ("python-setuptools" ,python-setuptools)))
+ (home-page "https://code.google.com/p/cutadapt/")
+ (synopsis "Remove adapter sequences from nucleotide sequencing reads")
+ (description
+ "Cutadapt finds and removes adapter sequences, primers, poly-A tails and
+other types of unwanted sequence from high-throughput sequencing reads.")
+ (license license:expat)))
+
+(define-public express
+ (package
+ (name "express")
+ (version "1.5.1")
+ (source (origin
+ (method url-fetch)
+ (uri
+ (string-append
+ "http://bio.math.berkeley.edu/eXpress/downloads/express-"
+ version "/express-" version "-src.tgz"))
+ (sha256
+ (base32
+ "03rczxd0gjp2l1jxcmjfmf5j94j77zqyxa6x063zsc585nj40n0c"))))
+ (build-system cmake-build-system)
+ (arguments
+ `(#:tests? #f ;no "check" target
+ #:phases
+ (alist-cons-after
+ 'unpack 'use-shared-boost-libs-and-set-bamtools-paths
+ (lambda* (#:key inputs #:allow-other-keys)
+ (substitute* "CMakeLists.txt"
+ (("set\\(Boost_USE_STATIC_LIBS ON\\)")
+ "set(Boost_USE_STATIC_LIBS OFF)")
+ (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/bamtools/include")
+ (string-append (assoc-ref inputs "bamtools") "/include/bamtools")))
+ (substitute* "src/CMakeLists.txt"
+ (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/\\.\\./bamtools/lib")
+ (string-append (assoc-ref inputs "bamtools") "/lib/bamtools")))
+ #t)
+ %standard-phases)))
+ (inputs
+ `(("boost" ,boost)
+ ("bamtools" ,bamtools)
+ ("protobuf" ,protobuf)
+ ("zlib" ,zlib)))
+ (home-page "http://bio.math.berkeley.edu/eXpress")
+ (synopsis "Streaming quantification for high-throughput genomic sequencing")
+ (description
+ "eXpress is a streaming tool for quantifying the abundances of a set of
+target sequences from sampled subsequences. Example applications include
+transcript-level RNA-Seq quantification, allele-specific/haplotype expression
+analysis (from RNA-Seq), transcription factor binding quantification in
+ChIP-Seq, and analysis of metagenomic data.")
+ (license license:artistic2.0)))
+
+(define-public fastx-toolkit
+ (package
+ (name "fastx-toolkit")
+ (version "0.0.14")
+ (source (origin
+ (method url-fetch)
+ (uri
+ (string-append
+ "https://github.com/agordon/fastx_toolkit/releases/download/"
+ version "/fastx_toolkit-" version ".tar.bz2"))
+ (sha256
+ (base32
+ "01jqzw386873sr0pjp1wr4rn8fsga2vxs1qfmicvx1pjr72007wy"))))
+ (build-system gnu-build-system)
+ (inputs
+ `(("libgtextutils" ,libgtextutils)))
+ (native-inputs
+ `(("pkg-config" ,pkg-config)))
+ (home-page "http://hannonlab.cshl.edu/fastx_toolkit/")
+ (synopsis "Tools for FASTA/FASTQ file preprocessing")
+ (description
+ "The FASTX-Toolkit is a collection of command line tools for Short-Reads
+FASTA/FASTQ files preprocessing.
+
+Next-Generation sequencing machines usually produce FASTA or FASTQ files,
+containing multiple short-reads sequences. The main processing of such
+FASTA/FASTQ files is mapping the sequences to reference genomes. However, it
+is sometimes more productive to preprocess the files before mapping the
+sequences to the genome---manipulating the sequences to produce better mapping
+results. The FASTX-Toolkit tools perform some of these preprocessing tasks.")
+ (license license:agpl3+)))
+
(define-public flexbar
(package
(name "flexbar")
@@ -379,15 +633,20 @@ file formats including SAM/BAM, Wiggle/BigWig, BED, GFF/GTF, VCF.")
"13jaykc3y1x8y5nn9j8ljnb79s5y51kyxz46hdmvvjj6qhyympmf"))))
(build-system cmake-build-system)
(arguments
- `(;; There is no test target, although there is a directory containing
- ;; test data and scripts (launched by flexbar_validate.sh).
- #:tests? #f
- #:configure-flags (list
+ `(#:configure-flags (list
(string-append "-DFLEXBAR_BINARY_DIR="
(assoc-ref %outputs "out")
"/bin/"))
#:phases
- (alist-delete 'install %standard-phases)))
+ (alist-replace
+ 'check
+ (lambda* (#:key outputs #:allow-other-keys)
+ (setenv "PATH" (string-append
+ (assoc-ref outputs "out") "/bin:"
+ (getenv "PATH")))
+ (chdir "../flexbar_v2.5_src/test")
+ (zero? (system* "bash" "flexbar_validate.sh")))
+ (alist-delete 'install %standard-phases))))
(inputs
`(("tbb" ,tbb)
("zlib" ,zlib)))
@@ -405,6 +664,57 @@ supports next-generation sequencing data in fasta/q and csfasta/q format from
Illumina, Roche 454, and the SOLiD platform.")
(license license:gpl3)))
+(define-public grit
+ (package
+ (name "grit")
+ (version "2.0.2")
+ (source (origin
+ (method url-fetch)
+ (uri (string-append
+ "https://github.com/nboley/grit/archive/"
+ version ".tar.gz"))
+ (file-name (string-append name "-" version ".tar.gz"))
+ (sha256
+ (base32
+ "157in84dj70wimbind3x7sy1whs3h57qfgcnj2s6lrd38fbrb7mj"))))
+ (build-system python-build-system)
+ (arguments
+ `(#:python ,python-2
+ #:phases
+ (alist-cons-after
+ 'unpack 'generate-from-cython-sources
+ (lambda* (#:key inputs outputs #:allow-other-keys)
+ ;; Delete these C files to force fresh generation from pyx sources.
+ (delete-file "grit/sparsify_support_fns.c")
+ (delete-file "grit/call_peaks_support_fns.c")
+ (substitute* "setup.py"
+ (("Cython.Setup") "Cython.Build")
+ ;; Add numpy include path to fix compilation
+ (("pyx\", \\]")
+ (string-append "pyx\", ], include_dirs = ['"
+ (assoc-ref inputs "python-numpy")
+ "/lib/python2.7/site-packages/numpy/core/include/"
+ "']"))) #t)
+ %standard-phases)))
+ (inputs
+ `(("python-scipy" ,python2-scipy)
+ ("python-numpy" ,python2-numpy)
+ ("python-pysam" ,python2-pysam)
+ ("python-networkx" ,python2-networkx)))
+ (native-inputs
+ `(("python-cython" ,python2-cython)
+ ("python-setuptools" ,python2-setuptools)))
+ (home-page "http://grit-bio.org")
+ (synopsis "Tool for integrative analysis of RNA-seq type assays")
+ (description
+ "GRIT is designed to use RNA-seq, TES, and TSS data to build and quantify
+full length transcript models. When none of these data sources are available,
+GRIT can be run by providing a candidate set of TES or TSS sites. In
+addition, GRIT can merge in reference junctions and gene boundaries. GRIT can
+also be run in quantification mode, where it uses a provided GTF file and just
+estimates transcript expression.")
+ (license license:gpl3+)))
+
(define-public hisat
(package
(name "hisat")
@@ -615,6 +925,172 @@ RNA-Seq, the MISO model uses Bayesian inference to compute the probability
that a read originated from a particular isoform.")
(license license:gpl2)))
+(define-public python2-pbcore
+ (package
+ (name "python2-pbcore")
+ (version "0.9.3")
+ (source (origin
+ (method url-fetch)
+ (uri (string-append
+ "https://github.com/PacificBiosciences/pbcore/archive/"
+ version ".tar.gz"))
+ (file-name (string-append name "-" version ".tar.gz"))
+ (sha256
+ (base32
+ "1z46rwjac93jm87cbj2zgjg6qvsgs65140wkbbxsvxps7ai4pm09"))))
+ (build-system python-build-system)
+ (arguments `(#:python ,python-2)) ; pbcore requires Python 2.7
+ (inputs
+ `(("python-cython" ,python2-cython)
+ ("python-numpy" ,python2-numpy)
+ ("python-pysam" ,python2-pysam)
+ ("python-h5py" ,python2-h5py)))
+ (native-inputs
+ `(("python-setuptools" ,python2-setuptools)))
+ (home-page "http://pacificbiosciences.github.io/pbcore/")
+ (synopsis "Library for reading and writing PacBio data files")
+ (description
+ "The pbcore package provides Python APIs for interacting with PacBio data
+files and writing bioinformatics applications.")
+ (license license:bsd-3)))
+
+(define-public pbtranscript-tofu
+ (let ((commit "c7bbd5472"))
+ (package
+ (name "pbtranscript-tofu")
+ (version (string-append "0.4.1." commit))
+ (source (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/PacificBiosciences/cDNA_primer.git")
+ (commit commit)))
+ (file-name (string-append name "-" version ".tar.gz"))
+ (sha256
+ (base32
+ "148xkzi689c49g6fdhckp6mnmj2qhjdf1j4wifm6ja7ij95d7fxx"))))
+ (build-system python-build-system)
+ (arguments
+ `(#:python ,python-2
+ ;; With standard flags, the install phase attempts to create a zip'd
+ ;; egg file, and fails with an error: 'ZIP does not support timestamps
+ ;; before 1980'
+ #:configure-flags '("--single-version-externally-managed"
+ "--record=pbtranscript-tofu.txt")
+ #:phases
+ (alist-cons-after
+ 'unpack 'enter-directory-and-clean-up
+ (lambda _
+ (chdir "pbtranscript-tofu/pbtranscript/")
+ ;; Delete clutter
+ (delete-file-recursively "dist/")
+ (delete-file-recursively "build/")
+ (delete-file-recursively "setuptools_cython-0.2.1-py2.6.egg/")
+ (delete-file-recursively "pbtools.pbtranscript.egg-info")
+ (delete-file "Cython-0.20.1.tar.gz")
+ (delete-file "setuptools_cython-0.2.1-py2.7.egg")
+ (delete-file "setuptools_cython-0.2.1.tar.gz")
+ (delete-file "setup.cfg")
+ (for-each delete-file
+ (find-files "." "\\.so$"))
+ ;; files should be writable for install phase
+ (for-each (lambda (f) (chmod f #o755))
+ (find-files "." "\\.py$")))
+ %standard-phases)))
+ (inputs
+ `(("python-cython" ,python2-cython)
+ ("python-numpy" ,python2-numpy)
+ ("python-bx-python" ,python2-bx-python)
+ ("python-networkx" ,python2-networkx)
+ ("python-scipy" ,python2-scipy)
+ ("python-pbcore" ,python2-pbcore)))
+ (native-inputs
+ `(("python-nose" ,python2-nose)
+ ("python-setuptools" ,python2-setuptools)))
+ (home-page "https://github.com/PacificBiosciences/cDNA_primer")
+ (synopsis "Analyze transcriptome data generated with the Iso-Seq protocol")
+ (description
+ "pbtranscript-tofu contains scripts to analyze transcriptome data
+generated using the PacBio Iso-Seq protocol.")
+ (license license:bsd-3))))
+
+(define-public rsem
+ (package
+ (name "rsem")
+ (version "1.2.20")
+ (source
+ (origin
+ (method url-fetch)
+ (uri
+ (string-append "http://deweylab.biostat.wisc.edu/rsem/src/rsem-"
+ version ".tar.gz"))
+ (sha256
+ (base32 "0nzdc0j0hjllhsd5f2xli95dafm3nawskigs140xzvjk67xh0r9q"))
+ (patches (list (search-patch "rsem-makefile.patch")))
+ (modules '((guix build utils)))
+ (snippet
+ '(begin
+ ;; remove bundled copy of boost
+ (delete-file-recursively "boost")
+ #t))))
+ (build-system gnu-build-system)
+ (arguments
+ `(#:tests? #f ;no "check" target
+ #:phases
+ (modify-phases %standard-phases
+ ;; No "configure" script.
+ ;; Do not build bundled samtools library.
+ (replace 'configure
+ (lambda _
+ (substitute* "Makefile"
+ (("^all : sam/libbam.a") "all : "))
+ #t))
+ (replace 'install
+ (lambda* (#:key outputs #:allow-other-keys)
+ (let* ((out (string-append (assoc-ref outputs "out")))
+ (bin (string-append out "/bin/"))
+ (perl (string-append out "/lib/perl5/site_perl")))
+ (mkdir-p bin)
+ (mkdir-p perl)
+ (for-each (lambda (file)
+ (copy-file file
+ (string-append bin (basename file))))
+ (find-files "." "rsem-.*"))
+ (copy-file "rsem_perl_utils.pm"
+ (string-append perl "/rsem_perl_utils.pm")))
+ #t))
+ (add-after
+ 'install 'wrap-program
+ (lambda* (#:key outputs #:allow-other-keys)
+ (let ((out (assoc-ref outputs "out")))
+ (for-each (lambda (prog)
+ (wrap-program (string-append out "/bin/" prog)
+ `("PERL5LIB" ":" prefix
+ (,(string-append out "/lib/perl5/site_perl")))))
+ '("rsem-plot-transcript-wiggles"
+ "rsem-calculate-expression"
+ "rsem-generate-ngvector"
+ "rsem-run-ebseq"
+ "rsem-prepare-reference")))
+ #t)))))
+ (inputs
+ `(("boost" ,boost)
+ ("ncurses" ,ncurses)
+ ("r" ,r)
+ ("perl" ,perl)
+ ("samtools" ,samtools-0.1)
+ ("zlib" ,zlib)))
+ (home-page "http://deweylab.biostat.wisc.edu/rsem/")
+ (synopsis "Estimate gene expression levels from RNA-Seq data")
+ (description
+ "RSEM is a software package for estimating gene and isoform expression
+levels from RNA-Seq data. The RSEM package provides a user-friendly
+interface, supports threads for parallel computation of the EM algorithm,
+single-end and paired-end read data, quality scores, variable-length reads and
+RSPD estimation. In addition, it provides posterior mean and 95% credibility
+interval estimates for expression levels. For visualization, it can generate
+BAM and Wiggle files in both transcript-coordinate and genomic-coordinate.")
+ (license license:gpl3+)))
+
(define-public rseqc
(package
(name "rseqc")
@@ -626,7 +1102,7 @@ that a read originated from a particular isoform.")
(string-append "mirror://sourceforge/rseqc/"
version "/RSeQC-" version ".tar.gz"))
(sha256
- (base32 "09rf0x9d6apjja5l01cgprj7vigpw6kiqhy34ibwwlxil0db0ri4"))
+ (base32 "15ly0254yi032qzkdplg00q144qfdsd986gh62829rl5bkxhj330"))
(modules '((guix build utils)))
(snippet
'(begin
@@ -680,32 +1156,31 @@ distribution, coverage uniformity, strand specificity, etc.")
;; systems.
#:tests? ,(string=? (or (%current-system) (%current-target-system))
"x86_64-linux")
- #:make-flags (list (string-append "prefix=" (assoc-ref %outputs "out")))
+ #:make-flags (list "LIBCURSES=-lncurses"
+ (string-append "prefix=" (assoc-ref %outputs "out")))
#:phases
(alist-cons-after
'unpack
- 'patch-makefile-curses
- (lambda _
- (substitute* "Makefile"
- (("-lcurses") "-lncurses")))
+ 'patch-tests
+ (lambda* (#:key inputs #:allow-other-keys)
+ (let ((bash (assoc-ref inputs "bash")))
+ (substitute* "test/test.pl"
+ ;; The test script calls out to /bin/bash
+ (("/bin/bash")
+ (string-append bash "/bin/bash"))
+ ;; There are two failing tests upstream relating to the "stats"
+ ;; subcommand in test_usage_subcommand ("did not have Usage"
+ ;; and "usage did not mention samtools stats"), so we disable
+ ;; them.
+ (("(test_usage_subcommand\\(.*\\);)" cmd)
+ (string-append "unless ($subcommand eq 'stats') {" cmd "};")))))
(alist-cons-after
- 'unpack
- 'patch-tests
- (lambda* (#:key inputs #:allow-other-keys)
- (let ((bash (assoc-ref inputs "bash")))
- (substitute* "test/test.pl"
- ;; The test script calls out to /bin/bash
- (("/bin/bash")
- (string-append bash "/bin/bash"))
- ;; There are two failing tests upstream relating to the "stats"
- ;; subcommand in test_usage_subcommand ("did not have Usage"
- ;; and "usage did not mention samtools stats"), so we disable
- ;; them.
- (("(test_usage_subcommand\\(.*\\);)" cmd)
- (string-append "unless ($subcommand eq 'stats') {" cmd "};")))))
- (alist-delete
- 'configure
- %standard-phases)))))
+ 'install 'install-library
+ (lambda* (#:key outputs #:allow-other-keys)
+ (let ((lib (string-append (assoc-ref outputs "out") "/lib")))
+ (mkdir-p lib)
+ (copy-file "libbam.a" (string-append lib "/libbam.a"))))
+ (alist-delete 'configure %standard-phases)))))
(native-inputs `(("pkg-config" ,pkg-config)))
(inputs `(("ncurses" ,ncurses)
("perl" ,perl)
@@ -720,6 +1195,296 @@ variant calling (in conjunction with bcftools), and a simple alignment
viewer.")
(license license:expat)))
+(define-public samtools-0.1
+ ;; This is the most recent version of the 0.1 line of samtools. The input
+ ;; and output formats differ greatly from that used and produced by samtools
+ ;; 1.x and is still used in many bioinformatics pipelines.
+ (package (inherit samtools)
+ (version "0.1.19")
+ (source
+ (origin
+ (method url-fetch)
+ (uri
+ (string-append "mirror://sourceforge/samtools/"
+ version "/samtools-" version ".tar.bz2"))
+ (sha256
+ (base32 "1m33xsfwz0s8qi45lylagfllqg7fphf4dr0780rsvw75av9wk06h"))))
+ (arguments
+ (substitute-keyword-arguments (package-arguments samtools)
+ ((#:tests? tests) #f) ;no "check" target
+ ((#:phases phases)
+ `(modify-phases ,phases
+ (replace 'install
+ (lambda* (#:key outputs #:allow-other-keys)
+ (let ((bin (string-append
+ (assoc-ref outputs "out") "/bin")))
+ (mkdir-p bin)
+ (copy-file "samtools"
+ (string-append bin "/samtools")))))
+ (delete 'patch-tests)))))))
+
+(define-public ngs-sdk
+ (package
+ (name "ngs-sdk")
+ (version "1.1.0")
+ (source
+ (origin
+ (method url-fetch)
+ (uri
+ (string-append "https://github.com/ncbi/ngs/archive/"
+ version ".tar.gz"))
+ (file-name (string-append name "-" version ".tar.gz"))
+ (sha256
+ (base32
+ "09fakv9w87lfg9g70kwzmnryqdjj1sz2c7kw01i6drjf787gkjhw"))))
+ (build-system gnu-build-system)
+ (arguments
+ `(#:parallel-build? #f ; not supported
+ #:tests? #f ; no "check" target
+ #:phases
+ (alist-replace
+ 'configure
+ (lambda* (#:key outputs #:allow-other-keys)
+ (let ((out (assoc-ref outputs "out")))
+ ;; Only replace the version suffix, not the version number in the
+ ;; directory name; fixed in commit 46d4509fa8 (no release yet).
+ (substitute* "setup/konfigure.perl"
+ (((string-append "\\$\\(subst "
+ "(\\$\\(VERSION[^\\)]*\\)),"
+ "(\\$\\([^\\)]+\\)),"
+ "(\\$\\([^\\)]+\\)|\\$\\@)"
+ "\\)")
+ _ pattern replacement target)
+ (string-append "$(patsubst "
+ "%" pattern ","
+ "%" replacement ","
+ target ")")))
+
+ ;; The 'configure' script doesn't recognize things like
+ ;; '--enable-fast-install'.
+ (zero? (system* "./configure"
+ (string-append "--build-prefix=" (getcwd) "/build")
+ (string-append "--prefix=" out)))))
+ (alist-cons-after
+ 'unpack 'enter-dir
+ (lambda _ (chdir "ngs-sdk") #t)
+ %standard-phases))))
+ (native-inputs `(("perl" ,perl)))
+ (home-page "https://github.com/ncbi/ngs")
+ (synopsis "API for accessing Next Generation Sequencing data")
+ (description
+ "NGS is a domain-specific API for accessing reads, alignments and pileups
+produced from Next Generation Sequencing. The API itself is independent from
+any particular back-end implementation, and supports use of multiple back-ends
+simultaneously.")
+ (license license:public-domain)))
+
+(define-public ngs-java
+ (package (inherit ngs-sdk)
+ (name "ngs-java")
+ (arguments
+ `(,@(substitute-keyword-arguments
+ `(#:modules ((guix build gnu-build-system)
+ (guix build utils)
+ (srfi srfi-1)
+ (srfi srfi-26))
+ ,@(package-arguments ngs-sdk))
+ ((#:phases phases)
+ `(alist-cons-after
+ 'enter-dir 'fix-java-symlink-installation
+ (lambda _
+ ;; Only replace the version suffix, not the version number in
+ ;; the directory name. Reported here:
+ ;; https://github.com/ncbi/ngs/pull/4
+ (substitute* "Makefile.java"
+ (((string-append "\\$\\(subst "
+ "(\\$\\(VERSION[^\\)]*\\)),"
+ "(\\$\\([^\\)]+\\)),"
+ "(\\$\\([^\\)]+\\)|\\$\\@)"
+ "\\)")
+ _ pattern replacement target)
+ (string-append "$(patsubst "
+ "%" pattern ","
+ "%" replacement ","
+ target ")"))))
+ (alist-replace
+ 'enter-dir (lambda _ (chdir "ngs-java") #t)
+ ,phases))))))
+ (inputs
+ `(("jdk" ,icedtea6 "jdk")
+ ("ngs-sdk" ,ngs-sdk)))
+ (synopsis "Java bindings for NGS SDK")))
+
+(define-public ncbi-vdb
+ (package
+ (name "ncbi-vdb")
+ (version "2.4.5-5")
+ (source
+ (origin
+ (method url-fetch)
+ (uri
+ (string-append "https://github.com/ncbi/ncbi-vdb/archive/"
+ version ".tar.gz"))
+ (file-name (string-append name "-" version ".tar.gz"))
+ (sha256
+ (base32
+ "1cj8nk6if8sqagv20vx36v566fdvhcaadf0x1ycnbgql6chbs6vy"))))
+ (build-system gnu-build-system)
+ (arguments
+ `(#:parallel-build? #f ; not supported
+ #:tests? #f ; no "check" target
+ #:phases
+ (alist-replace
+ 'configure
+ (lambda* (#:key inputs outputs #:allow-other-keys)
+ (let ((out (assoc-ref outputs "out")))
+ ;; Only replace the version suffix, not the version number in the
+ ;; directory name; fixed in commit 4dbba5c6a809 (no release yet).
+ (substitute* "setup/konfigure.perl"
+ (((string-append "\\$\\(subst "
+ "(\\$\\(VERSION[^\\)]*\\)),"
+ "(\\$\\([^\\)]+\\)),"
+ "(\\$\\([^\\)]+\\)|\\$\\@)"
+ "\\)")
+ _ pattern replacement target)
+ (string-append "$(patsubst "
+ "%" pattern ","
+ "%" replacement ","
+ target ")")))
+
+ ;; Override include path for libmagic
+ (substitute* "setup/package.prl"
+ (("name => 'magic', Include => '/usr/include'")
+ (string-append "name=> 'magic', Include => '"
+ (assoc-ref inputs "libmagic")
+ "/include" "'")))
+
+ ;; Install kdf5 library (needed by sra-tools)
+ (substitute* "build/Makefile.install"
+ (("LIBRARIES_TO_INSTALL =")
+ "LIBRARIES_TO_INSTALL = kdf5.$(VERSION_LIBX) kdf5.$(VERSION_SHLX)"))
+
+ ;; The 'configure' script doesn't recognize things like
+ ;; '--enable-fast-install'.
+ (zero? (system*
+ "./configure"
+ (string-append "--build-prefix=" (getcwd) "/build")
+ (string-append "--prefix=" (assoc-ref outputs "out"))
+ (string-append "--debug")
+ (string-append "--with-xml2-prefix="
+ (assoc-ref inputs "libxml2"))
+ (string-append "--with-ngs-sdk-prefix="
+ (assoc-ref inputs "ngs-sdk"))
+ (string-append "--with-ngs-java-prefix="
+ (assoc-ref inputs "ngs-java"))
+ (string-append "--with-hdf5-prefix="
+ (assoc-ref inputs "hdf5"))))))
+ (alist-cons-after
+ 'install 'install-interfaces
+ (lambda* (#:key system outputs #:allow-other-keys)
+ ;; Install interface libraries
+ (mkdir (string-append (assoc-ref outputs "out") "/ilib"))
+ (copy-recursively (string-append "build/ncbi-vdb/linux/gcc/"
+ (car (string-split system #\-))
+ "/rel/ilib")
+ (string-append (assoc-ref outputs "out")
+ "/ilib"))
+ ;; Install interface headers
+ (copy-recursively "interfaces"
+ (string-append (assoc-ref outputs "out")
+ "/include")))
+ %standard-phases))))
+ (inputs
+ `(("libxml2" ,libxml2)
+ ("ngs-sdk" ,ngs-sdk)
+ ("ngs-java" ,ngs-java)
+ ("libmagic" ,file)
+ ("hdf5" ,hdf5)))
+ (native-inputs `(("perl" ,perl)))
+ (home-page "https://github.com/ncbi/ncbi-vdb")
+ (synopsis "Database engine for genetic information")
+ (description
+ "The NCBI-VDB library implements a highly compressed columnar data
+warehousing engine that is most often used to store genetic information.
+Databases are stored in a portable image within the file system, and can be
+accessed/downloaded on demand across HTTP.")
+ (license license:public-domain)))
+
+(define-public sra-tools
+ (package
+ (name "sra-tools")
+ (version "2.4.5-5")
+ (source
+ (origin
+ (method url-fetch)
+ (uri
+ (string-append "https://github.com/ncbi/sra-tools/archive/"
+ version ".tar.gz"))
+ (file-name (string-append name "-" version ".tar.gz"))
+ (sha256
+ (base32
+ "11nrnvz7a012f4iryf0wiwrid0h111grsfxbxa9j51h3f2xbvgns"))))
+ (build-system gnu-build-system)
+ (arguments
+ `(#:parallel-build? #f ; not supported
+ #:tests? #f ; no "check" target
+ #:phases
+ (alist-replace
+ 'configure
+ (lambda* (#:key inputs outputs #:allow-other-keys)
+ ;; The build system expects a directory containing the sources and
+ ;; raw build output of ncbi-vdb, including files that are not
+ ;; installed. Since we are building against an installed version of
+ ;; ncbi-vdb, the following modifications are needed.
+ (substitute* "setup/konfigure.perl"
+ ;; Make the configure script look for the "ilib" directory of
+ ;; "ncbi-vdb" without first checking for the existence of a
+ ;; matching library in its "lib" directory.
+ (("^ my \\$f = File::Spec->catdir\\(\\$libdir, \\$lib\\);")
+ "my $f = File::Spec->catdir($ilibdir, $ilib);")
+ ;; Look for interface libraries in ncbi-vdb's "ilib" directory.
+ (("my \\$ilibdir = File::Spec->catdir\\(\\$builddir, 'ilib'\\);")
+ "my $ilibdir = File::Spec->catdir($dir, 'ilib');"))
+
+ ;; The 'configure' script doesn't recognize things like
+ ;; '--enable-fast-install'.
+ (zero? (system*
+ "./configure"
+ (string-append "--build-prefix=" (getcwd) "/build")
+ (string-append "--prefix=" (assoc-ref outputs "out"))
+ (string-append "--debug")
+ (string-append "--with-fuse-prefix="
+ (assoc-ref inputs "fuse"))
+ (string-append "--with-magic-prefix="
+ (assoc-ref inputs "libmagic"))
+ ;; TODO: building with libxml2 fails with linker errors
+ ;; (string-append "--with-xml2-prefix="
+ ;; (assoc-ref inputs "libxml2"))
+ (string-append "--with-ncbi-vdb-sources="
+ (assoc-ref inputs "ncbi-vdb"))
+ (string-append "--with-ncbi-vdb-build="
+ (assoc-ref inputs "ncbi-vdb"))
+ (string-append "--with-ngs-sdk-prefix="
+ (assoc-ref inputs "ngs-sdk"))
+ (string-append "--with-hdf5-prefix="
+ (assoc-ref inputs "hdf5")))))
+ %standard-phases)))
+ (native-inputs `(("perl" ,perl)))
+ (inputs
+ `(("ngs-sdk" ,ngs-sdk)
+ ("ncbi-vdb" ,ncbi-vdb)
+ ("libmagic" ,file)
+ ("fuse" ,fuse)
+ ("hdf5" ,hdf5)
+ ("zlib" ,zlib)))
+ (home-page "http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software")
+ (synopsis "Tools and libraries for reading and writing sequencing data")
+ (description
+ "The SRA Toolkit from NCBI is a collection of tools and libraries for
+reading of sequencing files from the Sequence Read Archive (SRA) database and
+writing files into the .sra format.")
+ (license license:public-domain)))
+
(define-public seqan
(package
(name "seqan")
@@ -810,3 +1575,188 @@ chimeric (fusion) transcripts, and is also capable of mapping full-length RNA
sequences.")
;; STAR is licensed under GPLv3 or later; htslib is MIT-licensed.
(license license:gpl3+)))
+
+(define-public subread
+ (package
+ (name "subread")
+ (version "1.4.6-p2")
+ (source (origin
+ (method url-fetch)
+ (uri (string-append
+ "mirror://sourceforge/subread/subread-"
+ version "-source.tar.gz"))
+ (sha256
+ (base32
+ "06sv9mpcsdj6p68y15d6gi70lca3lxmzk0dn61hg0kfsa7rxmsr3"))))
+ (build-system gnu-build-system)
+ (arguments
+ `(#:tests? #f ;no "check" target
+ #:make-flags '("-f" "Makefile.Linux")
+ #:phases
+ (alist-cons-after
+ 'unpack 'enter-dir
+ (lambda _ (chdir "src") #t)
+ (alist-replace
+ 'install
+ (lambda* (#:key outputs #:allow-other-keys)
+ (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
+ (mkdir-p bin)
+ (copy-recursively "../bin" bin)))
+ ;; no "configure" script
+ (alist-delete 'configure %standard-phases)))))
+ (inputs `(("zlib" ,zlib)))
+ (home-page "http://bioinf.wehi.edu.au/subread-package/")
+ (synopsis "Tool kit for processing next-gen sequencing data")
+ (description
+ "The subread package contains the following tools: subread aligner, a
+general-purpose read aligner; subjunc aligner: detecting exon-exon junctions
+and mapping RNA-seq reads; featureCounts: counting mapped reads for genomic
+features; exactSNP: a SNP caller that discovers SNPs by testing signals
+against local background noises.")
+ (license license:gpl3+)))
+
+(define-public shogun
+ (package
+ (name "shogun")
+ (version "4.0.0")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (string-append
+ "ftp://shogun-toolbox.org/shogun/releases/"
+ (version-major+minor version)
+ "/sources/shogun-" version ".tar.bz2"))
+ (sha256
+ (base32
+ "159nlijnb7mnrv9za80wnm1shwvy45hgrqzn51hxy7gw4z6d6fdb"))))
+ (build-system cmake-build-system)
+ (arguments
+ '(#:tests? #f ;no check target
+ #:phases
+ (alist-cons-after
+ 'unpack 'delete-broken-symlinks
+ (lambda _
+ (for-each delete-file '("applications/arts/data"
+ "applications/asp/data"
+ "applications/easysvm/data"
+ "applications/msplicer/data"
+ "applications/ocr/data"
+ "examples/documented/data"
+ "examples/documented/matlab_static"
+ "examples/documented/octave_static"
+ "examples/undocumented/data"
+ "examples/undocumented/matlab_static"
+ "examples/undocumented/octave_static"
+ "tests/integration/data"
+ "tests/integration/matlab_static"
+ "tests/integration/octave_static"
+ "tests/integration/python_modular/tests"))
+ #t)
+ (alist-cons-after
+ 'unpack 'change-R-target-path
+ (lambda* (#:key outputs #:allow-other-keys)
+ (substitute* '("src/interfaces/r_modular/CMakeLists.txt"
+ "src/interfaces/r_static/CMakeLists.txt"
+ "examples/undocumented/r_modular/CMakeLists.txt")
+ (("\\$\\{R_COMPONENT_LIB_PATH\\}")
+ (string-append (assoc-ref outputs "out")
+ "/lib/R/library/")))
+ #t)
+ (alist-cons-after
+ 'unpack 'fix-octave-modules
+ (lambda* (#:key outputs #:allow-other-keys)
+ (substitute* '("src/interfaces/octave_modular/CMakeLists.txt"
+ "src/interfaces/octave_static/CMakeLists.txt")
+ (("^include_directories\\(\\$\\{OCTAVE_INCLUDE_DIRS\\}")
+ "include_directories(${OCTAVE_INCLUDE_DIRS} ${OCTAVE_INCLUDE_DIRS}/octave"))
+
+ ;; change target directory
+ (substitute* "src/interfaces/octave_modular/CMakeLists.txt"
+ (("\\$\\{OCTAVE_OCT_LOCAL_API_FILE_DIR\\}")
+ (string-append (assoc-ref outputs "out")
+ "/share/octave/packages")))
+ #t)
+ (alist-cons-before
+ 'build 'set-HOME
+ ;; $HOME needs to be set at some point during the build phase
+ (lambda _ (setenv "HOME" "/tmp") #t)
+ %standard-phases))))
+ #:configure-flags
+ (list "-DUSE_SVMLIGHT=OFF" ;disable proprietary SVMLIGHT
+ ;;"-DJavaModular=ON" ;requires unpackaged jblas
+ ;;"-DRubyModular=ON" ;requires unpackaged ruby-narray
+ ;;"-DPerlModular=ON" ;"FindPerlLibs" does not exist
+ ;;"-DLuaModular=ON" ;fails because lua doesn't build pkgconfig file
+ "-DOctaveModular=ON"
+ "-DOctaveStatic=ON"
+ "-DPythonModular=ON"
+ "-DPythonStatic=ON"
+ "-DRModular=ON"
+ "-DRStatic=ON"
+ "-DCmdLineStatic=ON")))
+ (inputs
+ `(("python" ,python)
+ ("numpy" ,python-numpy)
+ ("r" ,r)
+ ("octave" ,octave)
+ ("swig" ,swig)
+ ("hdf5" ,hdf5)
+ ("atlas" ,atlas)
+ ("arpack" ,arpack-ng)
+ ("lapack" ,lapack)
+ ("glpk" ,glpk)
+ ("libxml2" ,libxml2)
+ ("lzo" ,lzo)
+ ("zlib" ,zlib)))
+ (native-inputs
+ `(("pkg-config" ,pkg-config)))
+ (home-page "http://shogun-toolbox.org/")
+ (synopsis "Machine learning toolbox")
+ (description
+ "The Shogun Machine learning toolbox provides a wide range of unified and
+efficient Machine Learning (ML) methods. The toolbox seamlessly allows to
+combine multiple data representations, algorithm classes, and general purpose
+tools. This enables both rapid prototyping of data pipelines and extensibility
+in terms of new algorithms.")
+ (license license:gpl3+)))
+
+(define-public vcftools
+ (package
+ (name "vcftools")
+ (version "0.1.12b")
+ (source (origin
+ (method url-fetch)
+ (uri (string-append
+ "mirror://sourceforge/vcftools/vcftools_"
+ version ".tar.gz"))
+ (sha256
+ (base32
+ "148al9h7f8g8my2qdnpax51kdd2yjrivlx6frvakf4lz5r8j88wx"))))
+ (build-system gnu-build-system)
+ (arguments
+ `(#:tests? #f ; no "check" target
+ #:make-flags (list
+ "CFLAGS=-O2" ; override "-m64" flag
+ (string-append "PREFIX=" (assoc-ref %outputs "out"))
+ (string-append "MANDIR=" (assoc-ref %outputs "out")
+ "/share/man/man1"))
+ #:phases
+ (alist-cons-after
+ 'unpack 'patch-manpage-install
+ (lambda _
+ (substitute* "Makefile"
+ (("cp \\$\\{PREFIX\\}/cpp/vcftools.1") "cp ./cpp/vcftools.1")))
+ (alist-delete 'configure %standard-phases))))
+ (inputs
+ `(("perl" ,perl)
+ ("zlib" ,zlib)))
+ (home-page "http://vcftools.sourceforge.net/")
+ (synopsis "Tools for working with VCF files")
+ (description
+ "VCFtools is a program package designed for working with VCF files, such
+as those generated by the 1000 Genomes Project. The aim of VCFtools is to
+provide easily accessible methods for working with complex genetic variation
+data in the form of VCF files.")
+ ;; The license is declared as LGPLv3 in the README and
+ ;; at http://vcftools.sourceforge.net/license.html
+ (license license:lgpl3)))