summaryrefslogtreecommitdiff
path: root/gnu/packages/bioinformatics.scm
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/packages/bioinformatics.scm')
-rw-r--r--gnu/packages/bioinformatics.scm684
1 files changed, 385 insertions, 299 deletions
diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
index 86270ecc75..77af190659 100644
--- a/gnu/packages/bioinformatics.scm
+++ b/gnu/packages/bioinformatics.scm
@@ -1,17 +1,17 @@
;;; GNU Guix --- Functional package management for GNU
-;;; Copyright © 2014, 2015, 2016, 2017, 2018, 2019, 2020 Ricardo Wurmus <rekado@elephly.net>
+;;; Copyright © 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021 Ricardo Wurmus <rekado@elephly.net>
;;; Copyright © 2015, 2016, 2017, 2018 Ben Woodcroft <donttrustben@gmail.com>
;;; Copyright © 2015, 2016, 2018, 2019, 2020 Pjotr Prins <pjotr.guix@thebird.nl>
;;; Copyright © 2015 Andreas Enge <andreas@enge.fr>
;;; Copyright © 2016, 2020 Roel Janssen <roel@gnu.org>
-;;; Copyright © 2016, 2017, 2018, 2019, 2020 Efraim Flashner <efraim@flashner.co.il>
+;;; Copyright © 2016, 2017, 2018, 2019, 2020, 2021 Efraim Flashner <efraim@flashner.co.il>
;;; Copyright © 2016, 2020 Marius Bakke <mbakke@fastmail.com>
;;; Copyright © 2016, 2018 Raoul Bonnal <ilpuccio.febo@gmail.com>
;;; Copyright © 2017, 2018 Tobias Geerinckx-Rice <me@tobias.gr>
;;; Copyright © 2017 Arun Isaac <arunisaac@systemreboot.net>
;;; Copyright © 2018 Joshua Sierles, Nextjournal <joshua@nextjournal.com>
;;; Copyright © 2018 Gábor Boskovits <boskovits@gmail.com>
-;;; Copyright © 2018, 2019, 2020 Mădălin Ionel Patrașcu <madalinionel.patrascu@mdc-berlin.de>
+;;; Copyright © 2018, 2019, 2020, 2021 Mădălin Ionel Patrașcu <madalinionel.patrascu@mdc-berlin.de>
;;; Copyright © 2019, 2020, 2021 Maxim Cournoyer <maxim.cournoyer@gmail.com>
;;; Copyright © 2019 Brian Leung <bkleung89@gmail.com>
;;; Copyright © 2019 Brett Gilio <brettg@gnu.org>
@@ -57,6 +57,7 @@
#:use-module (guix build-system trivial)
#:use-module (guix deprecation)
#:use-module (gnu packages)
+ #:use-module (gnu packages assembly)
#:use-module (gnu packages autotools)
#:use-module (gnu packages algebra)
#:use-module (gnu packages base)
@@ -142,6 +143,7 @@
#:use-module (gnu packages xml)
#:use-module (gnu packages xorg)
#:use-module (srfi srfi-1)
+ #:use-module (srfi srfi-26)
#:use-module (ice-9 match))
(define-public aragorn
@@ -2770,6 +2772,86 @@ sequencing data. It uses paired-ends and split-reads to sensitively and
accurately delineate genomic rearrangements throughout the genome.")
(license license:gpl3+)))
+(define-public trf
+ (package
+ (name "trf")
+ (version "4.09.1")
+ (source (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/Benson-Genomics-Lab/TRF")
+ (commit (string-append "v" version))))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32 "0fhwr4s1mf8nw8fr5imwjvjr42b59p97zr961ifm8xl1bajz4wpg"))))
+ (build-system gnu-build-system)
+ (home-page "https://github.com/Benson-Genomics-Lab/TRF")
+ (synopsis "Tandem Repeats Finder: a program to analyze DNA sequences")
+ (description "A tandem repeat in DNA is two or more adjacent, approximate
+copies of a pattern of nucleotides. Tandem Repeats Finder is a program to
+locate and display tandem repeats in DNA sequences. In order to use the
+program, the user submits a sequence in FASTA format. The output consists of
+two files: a repeat table file and an alignment file. Submitted sequences may
+be of arbitrary length. Repeats with pattern size in the range from 1 to 2000
+bases are detected.")
+ (license license:agpl3+)))
+
+(define-public repeat-masker
+ (package
+ (name "repeat-masker")
+ (version "4.1.1")
+ (source (origin
+ (method url-fetch)
+ (uri (string-append "http://www.repeatmasker.org/"
+ "RepeatMasker/RepeatMasker-"
+ version ".tar.gz"))
+ (sha256
+ (base32 "03144sl9kh5ni2i33phi7x2pjndzbm5bjw3r4kqvmm6hxyb4k4x2"))))
+ (build-system gnu-build-system)
+ (arguments
+ `(#:tests? #false ; there are none
+ #:phases
+ (modify-phases %standard-phases
+ (delete 'configure)
+ (replace 'build
+ (lambda* (#:key inputs outputs #:allow-other-keys)
+ (let ((share (string-append (assoc-ref outputs "out")
+ "/share/RepeatMasker")))
+ (mkdir-p share)
+ (copy-recursively "." share)
+ (with-directory-excursion share
+ (invoke "perl" "configure"
+ "--trf_prgm" (which "trf")
+ "--hmmer_dir"
+ (string-append (assoc-ref inputs "hmmer")
+ "/bin"))))))
+ (replace 'install
+ (lambda* (#:key outputs #:allow-other-keys)
+ (let* ((out (assoc-ref outputs "out"))
+ (share (string-append out "/share/RepeatMasker"))
+ (bin (string-append out "/bin"))
+ (path (getenv "PERL5LIB")))
+ (install-file (string-append share "/RepeatMasker") bin)
+ (wrap-program (string-append bin "/RepeatMasker")
+ `("PERL5LIB" ":" prefix (,path ,share)))))))))
+ (inputs
+ `(("perl" ,perl)
+ ("perl-text-soundex" ,perl-text-soundex)
+ ("python" ,python)
+ ("python-h5py" ,python-h5py)
+ ("hmmer" ,hmmer)
+ ("trf" ,trf)))
+ (home-page "https://github.com/Benson-Genomics-Lab/TRF")
+ (synopsis "Tandem Repeats Finder: a program to analyze DNA sequences")
+ (description "A tandem repeat in DNA is two or more adjacent, approximate
+copies of a pattern of nucleotides. Tandem Repeats Finder is a program to
+locate and display tandem repeats in DNA sequences. In order to use the
+program, the user submits a sequence in FASTA format. The output consists of
+two files: a repeat table file and an alignment file. Submitted sequences may
+be of arbitrary length. Repeats with pattern size in the range from 1 to 2000
+bases are detected.")
+ (license license:osl2.1)))
+
(define-public diamond
(package
(name "diamond")
@@ -3508,61 +3590,60 @@ comment or quality sections.")
(define-public gemma
(package
(name "gemma")
- (version "0.98")
+ (version "0.98.3")
(source (origin
(method git-fetch)
(uri (git-reference
- (url "https://github.com/xiangzhou/GEMMA")
- (commit (string-append "v" version))))
+ (url "https://github.com/genetics-statistics/GEMMA")
+ (commit version)))
(file-name (git-file-name name version))
(sha256
(base32
- "1s3ncnbn45r2hh1cvrqky1kbqq6546biypr4f5mkw1kqlrgyh0yg"))))
+ "1p8a7kkfn1mmrg017aziy544aha8i9h6wd1x2dk3w2794wl33qb7"))
+ (modules '((guix build utils)))
+ (snippet
+ '(begin
+ (delete-file-recursively "contrib")
+ #t))))
+ (build-system gnu-build-system)
(inputs
- `(("eigen" ,eigen)
- ("gfortran" ,gfortran "lib")
- ("gsl" ,gsl)
- ("lapack" ,lapack)
+ `(("gsl" ,gsl)
("openblas" ,openblas)
("zlib" ,zlib)))
- (build-system gnu-build-system)
+ (native-inputs
+ `(("catch" ,catch-framework2-1)
+ ("perl" ,perl)
+ ("shunit2" ,shunit2)
+ ("which" ,which)))
(arguments
- `(#:make-flags
- '(,@(match (%current-system)
- ("x86_64-linux"
- '("FORCE_DYNAMIC=1"))
- ("i686-linux"
- '("FORCE_DYNAMIC=1" "FORCE_32BIT=1"))
- (_
- '("FORCE_DYNAMIC=1" "NO_INTEL_COMPAT=1"))))
- #:phases
+ `(#:phases
(modify-phases %standard-phases
(delete 'configure)
- (add-after 'unpack 'find-eigen
+ (add-after 'unpack 'prepare-build
(lambda* (#:key inputs #:allow-other-keys)
- ;; Ensure that Eigen headers can be found
- (setenv "CPLUS_INCLUDE_PATH"
- (string-append (assoc-ref inputs "eigen")
- "/include/eigen3"))
+ (mkdir-p "bin")
+ (substitute* "Makefile"
+ (("/usr/local/opt/openblas")
+ (assoc-ref inputs "openblas")))
#t))
- (add-before 'build 'bin-mkdir
- (lambda _
- (mkdir-p "bin")
- #t))
+ (replace 'check
+ (lambda* (#:key tests? #:allow-other-keys)
+ (when tests?
+ ;; 'make slow-check' expects shunit2-2.0.3.
+ (with-directory-excursion "test"
+ (invoke "./test_suite.sh"))
+ #t)))
(replace 'install
(lambda* (#:key outputs #:allow-other-keys)
- (let ((out (assoc-ref outputs "out")))
- (install-file "bin/gemma"
- (string-append
- out "/bin")))
- #t)))
- #:tests? #f)) ; no tests included yet
- (home-page "https://github.com/xiangzhou/GEMMA")
+ (install-file "bin/gemma"
+ (string-append (assoc-ref outputs "out") "/bin"))
+ #t)))))
+ (home-page "https://github.com/genetics-statistics/GEMMA")
(synopsis "Tool for genome-wide efficient mixed model association")
(description
- "Genome-wide Efficient Mixed Model Association (GEMMA) provides a
-standard linear mixed model resolver with application in genome-wide
-association studies (GWAS).")
+ "@acronym{GEMMA, Genome-wide Efficient Mixed Model Association} provides a
+standard linear mixed model resolver with application in @acronym{GWAS,
+genome-wide association studies}.")
(license license:gpl3)))
(define-public grit
@@ -4632,7 +4713,7 @@ sequencing tag position and orientation.")
(define-public mafft
(package
(name "mafft")
- (version "7.471")
+ (version "7.475")
(source (origin
(method url-fetch)
(uri (string-append
@@ -4641,7 +4722,7 @@ sequencing tag position and orientation.")
(file-name (string-append name "-" version ".tgz"))
(sha256
(base32
- "0r1973fx2scq4712zdqfy67wkzqj0c0bhrdy4jxhvq40mdxyry30"))))
+ "0i2i2m3blh2xkbkdk48hxfssks30ny0v381gdl7zwhcvp0axs26r"))))
(build-system gnu-build-system)
(arguments
`(#:tests? #f ; no automated tests, though there are tests in the read me
@@ -6999,6 +7080,55 @@ sequence.")
(supported-systems '("i686-linux" "x86_64-linux"))
(license license:bsd-3)))
+(define-public r-snapatac
+ (package
+ (name "r-snapatac")
+ (version "2.0")
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/r3fang/SnapATAC")
+ (commit (string-append "v" version))))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32 "037jzlbl436fi7lkpq7d83i2vd1crnrik3vac2x6xj75dbikb2av"))))
+ (properties `((upstream-name . "SnapATAC")))
+ (build-system r-build-system)
+ (propagated-inputs
+ `(("r-bigmemory" ,r-bigmemory)
+ ("r-doparallel" ,r-doparallel)
+ ("r-dosnow" ,r-dosnow)
+ ("r-edger" ,r-edger)
+ ("r-foreach" ,r-foreach)
+ ("r-genomicranges" ,r-genomicranges)
+ ("r-igraph" ,r-igraph)
+ ("r-iranges" ,r-iranges)
+ ("r-irlba" ,r-irlba)
+ ("r-matrix" ,r-matrix)
+ ("r-plyr" ,r-plyr)
+ ("r-plot3d" ,r-plot3d)
+ ("r-rann" ,r-rann)
+ ("r-raster" ,r-raster)
+ ("r-rcolorbrewer" ,r-rcolorbrewer)
+ ("r-rhdf5" ,r-rhdf5)
+ ("r-rtsne" ,r-rtsne)
+ ("r-scales" ,r-scales)
+ ("r-viridis" ,r-viridis)))
+ (home-page "https://github.com/r3fang/SnapATAC")
+ (synopsis "Single nucleus analysis package for ATAC-Seq")
+ (description
+ "This package provides a fast and accurate analysis toolkit for single
+cell ATAC-seq (Assay for transposase-accessible chromatin using sequencing).
+Single cell ATAC-seq can resolve the heterogeneity of a complex tissue and
+reveal cell-type specific regulatory landscapes. However, the exceeding data
+sparsity has posed unique challenges for the data analysis. This package
+@code{r-snapatac} is an end-to-end bioinformatics pipeline for analyzing large-
+scale single cell ATAC-seq data which includes quality control, normalization,
+clustering analysis, differential analysis, motif inference and exploration of
+single cell ATAC-seq sequencing data.")
+ (license license:gpl3)))
+
(define-public r-scde
(package
(name "r-scde")
@@ -7065,6 +7195,45 @@ between two different types of motif instances using as much relevant
information as possible.")
(license (list license:gpl2+ license:gpl3+))))
+(define-public r-demultiplex
+ (let ((commit "6e2a1422c8e6f418cfb271997eebc91f9195f299")
+ (revision "1"))
+ (package
+ (name "r-demultiplex")
+ (version (git-version "1.0.2" revision commit))
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/chris-mcginnis-ucsf/MULTI-seq")
+ (commit commit)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "01kv88wp8vdaq07sjk0d3d1cb553mq1xqg0war81pgmg63bgi38w"))))
+ (properties `((upstream-name . "deMULTIplex")))
+ (build-system r-build-system)
+ (propagated-inputs
+ `(("r-kernsmooth" ,r-kernsmooth)
+ ("r-reshape2" ,r-reshape2)
+ ("r-rtsne" ,r-rtsne)
+ ("r-shortread" ,r-shortread)
+ ("r-stringdist" ,r-stringdist)))
+ (home-page "https://github.com/chris-mcginnis-ucsf/MULTI-seq")
+ (synopsis "MULTI-seq pre-processing and classification tools")
+ (description
+ "deMULTIplex is an R package for analyzing single-cell RNA sequencing
+data generated with the MULTI-seq sample multiplexing method. The package
+includes software to
+
+@enumerate
+@item Convert raw MULTI-seq sample barcode library FASTQs into a sample
+ barcode UMI count matrix, and
+@item Classify cell barcodes into sample barcode groups.
+@end enumerate
+")
+ (license license:cc0))))
+
(define-public r-genefilter
(package
(name "r-genefilter")
@@ -7652,6 +7821,31 @@ BLAST, KEGG, GenBank, MEDLINE and GO.")
;; (LGPLv2.1+) and scripts in samples (which have GPL2 and GPL2+)
(license (list license:ruby license:lgpl2.1+ license:gpl2+ ))))
+(define-public bio-vcf
+ (package
+ (name "bio-vcf")
+ (version "0.9.5")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (rubygems-uri "bio-vcf" version))
+ (sha256
+ (base32
+ "1glw5pn9s8z13spxk6yyfqaz80n9lga67f33w35nkpq9dwi2vg6g"))))
+ (build-system ruby-build-system)
+ (native-inputs
+ `(("ruby-cucumber" ,ruby-cucumber)))
+ (synopsis "Smart VCF parser DSL")
+ (description
+ "Bio-vcf provides a @acronym{DSL, domain specific language} for processing
+the VCF format. Record named fields can be queried with regular expressions.
+Bio-vcf is a new generation VCF parser, filter and converter. Bio-vcf is not
+only very fast for genome-wide (WGS) data, it also comes with a filtering,
+evaluation and rewrite language and can output any type of textual data,
+including VCF header and contents in RDF and JSON.")
+ (home-page "https://github.com/vcflib/bio-vcf")
+ (license license:expat)))
+
(define-public r-biocviews
(package
(name "r-biocviews")
@@ -9312,8 +9506,9 @@ group or two ChIP groups run under different conditions.")
(delete 'configure) ; There is no configure phase.
(replace 'install
(lambda* (#:key outputs #:allow-other-keys)
- (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
- (install-file "filevercmp" bin)
+ (let ((out (assoc-ref outputs "out")))
+ (install-file "filevercmp" (string-append out "/bin"))
+ (install-file "filevercmp.h" (string-append out "/include"))
#t))))))
(home-page "https://github.com/ekg/filevercmp")
(synopsis "This program compares version strings")
@@ -10440,41 +10635,20 @@ explore and perform basic analysis of single cell sequencing data coming from
droplet sequencing. It has been particularly tailored for Drop-seq.")
(license license:gpl3))))
-(define htslib-for-sambamba
- (let ((commit "2f3c3ea7b301f9b45737a793c0b2dcf0240e5ee5"))
- (package
- (inherit htslib)
- (name "htslib-for-sambamba")
- (version (string-append "1.3.1-1." (string-take commit 9)))
- (source
- (origin
- (method git-fetch)
- (uri (git-reference
- (url "https://github.com/lomereiter/htslib")
- (commit commit)))
- (file-name (string-append "htslib-" version "-checkout"))
- (sha256
- (base32
- "0g38g8s3npr0gjm9fahlbhiskyfws9l5i0x1ml3rakzj7az5l9c9"))))
- (native-inputs
- `(("autoconf" ,autoconf)
- ("automake" ,automake)
- ,@(package-native-inputs htslib))))))
-
(define-public sambamba
(package
(name "sambamba")
- (version "0.7.1")
+ (version "0.8.0")
(source
(origin
(method git-fetch)
(uri (git-reference
- (url "https://github.com/lomereiter/sambamba")
+ (url "https://github.com/biod/sambamba")
(commit (string-append "v" version))))
- (file-name (string-append name "-" version "-checkout"))
+ (file-name (git-file-name name version))
(sha256
(base32
- "111h05b60pj8dxbidiamy4imc92x2962b3lmb7wgysl6lx064qis"))))
+ "07dznzl6m8k7sw84jxw2kx6i3ymrapbmcmyh0fxz8wrybhw8fmwc"))))
(build-system gnu-build-system)
(arguments
`(#:tests? #f ; there is no test target
@@ -10484,52 +10658,30 @@ droplet sequencing. It has been particularly tailored for Drop-seq.")
(delete 'configure)
(add-after 'unpack 'fix-ldc-version
(lambda _
- (substitute* "gen_ldc_version_info.py"
- (("/usr/bin/env.*") (which "python3")))
(substitute* "Makefile"
;; We use ldc2 instead of ldmd2 to compile sambamba.
(("\\$\\(shell which ldmd2\\)") (which "ldc2")))
#t))
- (add-after 'unpack 'place-biod-and-undead
- (lambda* (#:key inputs #:allow-other-keys)
- (copy-recursively (assoc-ref inputs "biod") "BioD")
- #t))
(add-after 'unpack 'unbundle-prerequisites
(lambda _
(substitute* "Makefile"
- (("htslib/libhts.a lz4/lib/liblz4.a")
- "-L-lhts -L-llz4")
- ((" lz4-static htslib-static") ""))
+ (("= lz4/lib/liblz4.a") "= -L-llz4")
+ (("ldc_version_info lz4-static") "ldc_version_info"))
#t))
(replace 'install
(lambda* (#:key outputs #:allow-other-keys)
- (let* ((out (assoc-ref outputs "out"))
- (bin (string-append out "/bin")))
+ (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
(mkdir-p bin)
(copy-file (string-append "bin/sambamba-" ,version)
(string-append bin "/sambamba"))
#t))))))
(native-inputs
- `(("ldc" ,ldc)
- ("rdmd" ,rdmd)
- ("python" ,python)
- ("biod"
- ,(let ((commit "7969eb0a847b05874e83ffddead26e193ece8101"))
- (origin
- (method git-fetch)
- (uri (git-reference
- (url "https://github.com/biod/BioD")
- (commit commit)))
- (file-name (string-append "biod-"
- (string-take commit 9)
- "-checkout"))
- (sha256
- (base32
- "0mjxsmbmv0jxl3pq21p8j5r829d648if8q58ka50b2956lc6qkpm")))))))
+ `(("python" ,python)))
(inputs
- `(("lz4" ,lz4)
- ("htslib" ,htslib-for-sambamba)))
- (home-page "https://lomereiter.github.io/sambamba/")
+ `(("ldc" ,ldc)
+ ("lz4" ,lz4)
+ ("zlib" ,zlib)))
+ (home-page "https://github.com/biod/sambamba")
(synopsis "Tools for working with SAM/BAM data")
(description "Sambamba is a high performance modern robust and
fast tool (and library), written in the D programming language, for
@@ -14773,10 +14925,14 @@ some of the details of opening and jumping in tabix-indexed files.")
(delete 'configure) ; There is no configure phase.
(replace 'install
(lambda* (#:key outputs #:allow-other-keys)
- (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
+ (let* ((out (assoc-ref outputs "out"))
+ (bin (string-append out "/bin"))
+ (include (string-append out "/include")))
;; TODO: There are Python modules for these programs too.
(install-file "multichoose" bin)
- (install-file "multipermute" bin))
+ (install-file "multipermute" bin)
+ (install-file "multichoose.h" include)
+ (install-file "multipermute.h" include))
#t)))))
(home-page "https://github.com/ekg/multichoose")
(synopsis "Efficient loopless multiset combination generation algorithm")
@@ -14885,32 +15041,44 @@ library automatically handles index file generation and use.")
(define-public vcflib
(package
(name "vcflib")
- (version "1.0.1")
+ (version "1.0.2")
(source
(origin
- (method url-fetch)
- (uri (string-append "https://github.com/vcflib/vcflib/releases/"
- "download/v" version
- "/vcflib-" version "-src.tar.gz"))
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/vcflib/vcflib")
+ (commit (string-append "v" version))))
+ (file-name (git-file-name name version))
(sha256
- (base32 "14zzrg8hg8cq9cvq2wdvp21j7nmxxkjrbagw2apd2yqv2kyx42lm"))
- (patches (search-patches "vcflib-use-shared-libraries.patch"))
+ (base32 "1k1z3876kbzifj1sqfzsf3lgb4rw779hvkg6ryxbyq5bc2paj9kh"))
(modules '((guix build utils)))
(snippet
- `(begin
+ '(begin
+ (substitute* "CMakeLists.txt"
+ ((".*fastahack.*") "")
+ ((".*smithwaterman.*") "")
+ (("(pkg_check_modules\\(TABIXPP)" text)
+ (string-append
+ "pkg_check_modules(FASTAHACK REQUIRED fastahack)\n"
+ "pkg_check_modules(SMITHWATERMAN REQUIRED smithwaterman)\n"
+ text))
+ (("\\$\\{TABIXPP_LIBRARIES\\}" text)
+ (string-append "${FASTAHACK_LIBRARIES} "
+ "${SMITHWATERMAN_LIBRARIES} "
+ text)))
(substitute* (find-files "." "\\.(h|c)(pp)?$")
(("\"SmithWatermanGotoh.h\"") "<smithwaterman/SmithWatermanGotoh.h>")
(("\"convert.h\"") "<smithwaterman/convert.h>")
(("\"disorder.h\"") "<smithwaterman/disorder.h>")
- (("\"tabix.hpp\"") "<tabix.hpp>")
- (("\"Fasta.h\"") "<fastahack/Fasta.h>"))
+ (("Fasta.h") "fastahack/Fasta.h"))
(for-each delete-file-recursively
'("fastahack" "filevercmp" "fsom" "googletest" "intervaltree"
- "libVCFH" "multichoose" "smithwaterman" "tabixpp"))
+ "libVCFH" "multichoose" "smithwaterman"))
#t))))
- (build-system gnu-build-system)
+ (build-system cmake-build-system)
(inputs
- `(("htslib" ,htslib)
+ `(("bzip2" ,bzip2)
+ ("htslib" ,htslib)
("fastahack" ,fastahack)
("perl" ,perl)
("python" ,python)
@@ -14923,22 +15091,20 @@ library automatically handles index file generation and use.")
;; Submodules.
;; This package builds against the .o files so we need to extract the source.
("filevercmp-src" ,(package-source filevercmp))
+ ("fsom-src" ,(package-source fsom))
("intervaltree-src" ,(package-source intervaltree))
("multichoose-src" ,(package-source multichoose))))
(arguments
`(#:tests? #f ; no tests
#:phases
(modify-phases %standard-phases
- (add-after 'unpack 'set-flags
- (lambda* (#:key outputs #:allow-other-keys)
- (substitute* "Makefile"
- (("LDFLAGS =")
- (string-append "LDFLAGS = -Wl,-rpath="
- (assoc-ref outputs "out") "/lib ")))
- (substitute* "filevercmp/Makefile"
- (("-c") "-c -fPIC"))
+ (add-after 'unpack 'build-shared-library
+ (lambda _
+ (substitute* "CMakeLists.txt"
+ (("vcflib STATIC") "vcflib SHARED"))
+ (substitute* "test/Makefile"
+ (("libvcflib.a") "libvcflib.so"))
#t))
- (delete 'configure)
(add-after 'unpack 'unpack-submodule-sources
(lambda* (#:key inputs #:allow-other-keys)
(let ((unpack (lambda (source target)
@@ -14951,39 +15117,31 @@ library automatically handles index file generation and use.")
"--strip-components=1"))))))
(and
(unpack "filevercmp-src" "filevercmp")
+ (unpack "fsom-src" "fsom")
(unpack "intervaltree-src" "intervaltree")
- (unpack "multichoose-src" "multichoose")))))
- (replace 'install
+ (unpack "multichoose-src" "multichoose"))
+ #t)))
+ ;; This pkg-config file is provided by other distributions.
+ (add-after 'install 'install-pkg-config-file
(lambda* (#:key outputs #:allow-other-keys)
(let* ((out (assoc-ref outputs "out"))
- (bin (string-append out "/bin"))
- (lib (string-append out "/lib")))
- (for-each (lambda (file)
- (install-file file bin))
- (find-files "bin" ".*"))
- (install-file "libvcflib.so" lib)
- (install-file "libvcflib.a" lib)
- (for-each
- (lambda (file)
- (install-file file (string-append out "/include")))
- (find-files "include" "\\.h(pp)?$"))
- (mkdir-p (string-append lib "/pkgconfig"))
- (with-output-to-file (string-append lib "/pkgconfig/vcflib.pc")
+ (pkgconfig (string-append out "/lib/pkgconfig")))
+ (mkdir-p pkgconfig)
+ (with-output-to-file (string-append pkgconfig "/vcflib.pc")
(lambda _
(format #t "prefix=~a~@
exec_prefix=${prefix}~@
libdir=${exec_prefix}/lib~@
includedir=${prefix}/include~@
~@
- ~@
- Name: libvcflib~@
+ Name: vcflib~@
Version: ~a~@
- Requires: smithwaterman, fastahack~@
+ Requires: smithwaterman, fastahack, tabixpp~@
Description: C++ library for parsing and manipulating VCF files~@
Libs: -L${libdir} -lvcflib~@
Cflags: -I${includedir}~%"
- out ,version))))
- #t)))))
+ out ,version)))
+ #t))))))
(home-page "https://github.com/vcflib/vcflib/")
(synopsis "Library for parsing and manipulating VCF files")
(description "Vcflib provides methods to manipulate and interpret
@@ -14994,125 +15152,94 @@ manipulations on VCF files.")
(license license:expat)))
(define-public freebayes
- (let ((commit "3ce827d8ebf89bb3bdc097ee0fe7f46f9f30d5fb")
- (revision "1")
- (version "1.0.2"))
- (package
- (name "freebayes")
- (version (git-version version revision commit))
- (source (origin
- (method git-fetch)
- (uri (git-reference
- (url "https://github.com/ekg/freebayes")
- (commit commit)))
- (file-name (git-file-name name version))
- (sha256
- (base32 "1sbzwmcbn78ybymjnhwk7qc5r912azy5vqz2y7y81616yc3ba2a2"))))
- (build-system gnu-build-system)
- (inputs
- `(("bamtools" ,bamtools)
- ("htslib" ,htslib)
- ("zlib" ,zlib)))
- (native-inputs
- `(("bc" ,bc) ; Needed for running tests.
- ("samtools" ,samtools) ; Needed for running tests.
- ("parallel" ,parallel) ; Needed for running tests.
- ("perl" ,perl) ; Needed for running tests.
- ("procps" ,procps) ; Needed for running tests.
- ("python" ,python-2) ; Needed for running tests.
- ("vcflib-src" ,(package-source vcflib))
- ;; These are submodules for the vcflib version used in freebayes.
- ;; This package builds against the .o files so we need to extract the source.
- ("tabixpp-src" ,(package-source tabixpp))
- ("smithwaterman-src" ,(package-source smithwaterman))
- ("multichoose-src" ,(package-source multichoose))
- ("fsom-src" ,(package-source fsom))
- ("filevercmp-src" ,(package-source filevercmp))
- ("fastahack-src" ,(package-source fastahack))
- ("intervaltree-src" ,(package-source intervaltree))
- ;; These submodules are needed to run the tests.
- ("bash-tap-src" ,(package-source bash-tap))
- ("test-simple-bash-src"
- ,(origin
- (method git-fetch)
- (uri (git-reference
- (url "https://github.com/ingydotnet/test-simple-bash/")
- (commit "124673ff204b01c8e96b7fc9f9b32ee35d898acc")))
- (file-name "test-simple-bash-src-checkout")
- (sha256
- (base32 "043plp6z0x9yf7mdpky1fw7zcpwn1p47px95w9mh16603zqqqpga"))))))
- (arguments
- `(#:make-flags
- (list "CC=gcc"
- (string-append "BAMTOOLS_ROOT="
- (assoc-ref %build-inputs "bamtools")))
- #:test-target "test"
- #:phases
- (modify-phases %standard-phases
- (delete 'configure)
- (add-after 'unpack 'fix-tests
- (lambda _
- (substitute* "test/t/01_call_variants.t"
- (("grep -P \"\\(\\\\t500\\$\\|\\\\t11000\\$\\|\\\\t1000\\$\\)\"")
- "grep -E ' (500|11000|1000)$'"))
- #t))
- (add-after 'unpack 'unpack-submodule-sources
- (lambda* (#:key inputs #:allow-other-keys)
- (let ((unpack (lambda (source target)
- (with-directory-excursion target
- (if (file-is-directory? (assoc-ref inputs source))
- (copy-recursively (assoc-ref inputs source) ".")
- (invoke "tar" "xvf"
- (assoc-ref inputs source)
- "--strip-components=1"))))))
- (and
- (unpack "vcflib-src" "vcflib")
- (unpack "fastahack-src" "vcflib/fastahack")
- (unpack "filevercmp-src" "vcflib/filevercmp")
- (unpack "fsom-src" "vcflib/fsom")
- (unpack "intervaltree-src" "vcflib/intervaltree")
- (unpack "multichoose-src" "vcflib/multichoose")
- (unpack "smithwaterman-src" "vcflib/smithwaterman")
- (unpack "tabixpp-src" "vcflib/tabixpp")
- (unpack "test-simple-bash-src" "test/test-simple-bash")
- (unpack "bash-tap-src" "test/bash-tap")))))
- (add-after 'unpack-submodule-sources 'fix-makefiles
- (lambda _
- ;; We don't have the .git folder to get the version tag from.
- (substitute* "vcflib/Makefile"
- (("^GIT_VERSION.*")
- (string-append "GIT_VERSION = v" ,version)))
- (substitute* "src/Makefile"
- (("-I\\$\\(BAMTOOLS_ROOT\\)/src")
- "-I$(BAMTOOLS_ROOT)/include/bamtools"))
- #t))
- (add-before 'build 'build-tabixpp-and-vcflib
- (lambda* (#:key inputs make-flags #:allow-other-keys)
- (with-directory-excursion "vcflib"
- (with-directory-excursion "tabixpp"
- (apply invoke "make"
- (string-append "HTS_LIB="
- (assoc-ref inputs "htslib")
- "/lib/libhts.a")
- make-flags))
- (apply invoke "make"
- (string-append "CFLAGS=-Itabixpp")
- "all"
- make-flags))))
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
- (install-file "bin/freebayes" bin)
- (install-file "bin/bamleftalign" bin))
- #t)))))
- (home-page "https://github.com/ekg/freebayes")
- (synopsis "Haplotype-based variant detector")
- (description "FreeBayes is a Bayesian genetic variant detector designed to
+ (package
+ (name "freebayes")
+ (version "1.3.3")
+ (source (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/freebayes/freebayes")
+ (commit (string-append "v" version))))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32 "0myz3giad7jqp6ricdfnig9ymlcps2h67mlivadvx97ngagm85z8"))
+ (patches (search-patches "freebayes-devendor-deps.patch"))
+ (modules '((guix build utils)))
+ (snippet
+ '(begin
+ (delete-file-recursively "contrib/htslib")
+ #t))))
+ (build-system meson-build-system)
+ (inputs
+ `(("fastahack" ,fastahack)
+ ("htslib" ,htslib)
+ ("smithwaterman" ,smithwaterman)
+ ("tabixpp" ,tabixpp)
+ ("vcflib" ,vcflib)
+ ("zlib" ,zlib)))
+ (native-inputs
+ `(("bash-tap" ,bash-tap)
+ ("bc" ,bc)
+ ("grep" ,grep) ; Built with perl support.
+ ("parallel" ,parallel)
+ ("perl" ,perl)
+ ("pkg-config" ,pkg-config)
+ ("samtools" ,samtools)
+ ("simde" ,simde)
+ ;; This submodule is needed to run the tests.
+ ("test-simple-bash-src"
+ ,(origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/ingydotnet/test-simple-bash/")
+ (commit "124673ff204b01c8e96b7fc9f9b32ee35d898acc")))
+ (file-name "test-simple-bash-src-checkout")
+ (sha256
+ (base32 "043plp6z0x9yf7mdpky1fw7zcpwn1p47px95w9mh16603zqqqpga"))))))
+ (arguments
+ `(#:phases
+ (modify-phases %standard-phases
+ (add-after 'unpack 'patch-source
+ (lambda* (#:key inputs #:allow-other-keys)
+ (let ((bash-tap (assoc-ref inputs "bash-tap")))
+ (substitute* (find-files "test/t")
+ (("BASH_TAP_ROOT=bash-tap")
+ (string-append "BASH_TAP_ROOT=" bash-tap "/bin"))
+ (("bash-tap/bash-tap-bootstrap")
+ (string-append bash-tap "/bin/bash-tap-bootstrap"))
+ (("source.*bash-tap-bootstrap")
+ (string-append "source " bash-tap "/bin/bash-tap-bootstrap")))
+ (substitute* "meson.build"
+ ;; Some inputs aren't actually needed.
+ ((".*bamtools/src.*") "")
+ ((".*multichoose.*") ""))
+ (substitute* '("src/BedReader.cpp"
+ "src/BedReader.h")
+ (("../intervaltree/IntervalTree.h") "IntervalTree.h"))
+ #t)))
+ (add-after 'unpack 'unpack-submodule-sources
+ (lambda* (#:key inputs #:allow-other-keys)
+ (mkdir-p "test/test-simple-bash")
+ (copy-recursively (assoc-ref inputs "test-simple-bash-src")
+ "test/test-simple-bash")
+ #t))
+ ;; The slow tests take longer than the specified timeout.
+ ,@(if (any (cute string=? <> (%current-system))
+ '("armhf-linux" "aarch64-linux"))
+ '((replace 'check
+ (lambda* (#:key tests? #:allow-other-keys)
+ (when tests?
+ (invoke "meson" "test" "--timeout-multiplier" "5"))
+ #t)))
+ '()))))
+ (home-page "https://github.com/freebayes/freebayes")
+ (synopsis "Haplotype-based variant detector")
+ (description "FreeBayes is a Bayesian genetic variant detector designed to
find small polymorphisms, specifically SNPs (single-nucleotide polymorphisms),
indels (insertions and deletions), MNPs (multi-nucleotide polymorphisms), and
complex events (composite insertion and substitution events) smaller than the
length of a short-read sequencing alignment.")
- (license license:expat))))
+ (license license:expat)))
(define-public samblaster
(package
@@ -15398,44 +15525,3 @@ biological processes. SBML is useful for models of metabolism, cell
signaling, and more. It continues to be evolved and expanded by an
international community.")
(license license:lgpl2.1+)))
-
-(define-public grocsvs
- ;; The last release is out of date and new features have been added.
- (let ((commit "ecd956a65093a0b2c41849050e4512d46fecea5d")
- (revision "1"))
- (package
- (name "grocsvs")
- (version (git-version "0.2.6.1" revision commit))
- (source (origin
- (method git-fetch)
- (uri (git-reference
- (url "https://github.com/grocsvs/grocsvs")
- (commit commit)))
- (file-name (git-file-name name version))
- (sha256
- (base32 "14505725gr7qxc17cxxf0k6lzcwmgi64pija4mwf29aw70qn35cc"))
- (patches (search-patches "grocsvs-dont-use-admiral.patch"))))
- (build-system python-build-system)
- (arguments
- `(#:tests? #f ; No test suite.
- #:python ,python-2)) ; Only python-2 supported.
- (inputs
- `(("python2-h5py" ,python2-h5py)
- ("python2-ipython-cluster-helper" ,python2-ipython-cluster-helper)
- ("python2-networkx" ,python2-networkx)
- ("python2-psutil" ,python2-psutil)
- ("python2-pandas" ,python2-pandas)
- ("python2-pybedtools" ,python2-pybedtools)
- ("python2-pyfaidx" ,python2-pyfaidx)
- ("python2-pygraphviz" ,python2-pygraphviz)
- ("python2-pysam" ,python2-pysam)
- ("python2-scipy" ,python2-scipy)))
- (home-page "https://github.com/grocsvs/grocsvs")
- (synopsis "Genome-wide reconstruction of complex structural variants")
- (description
- "@dfn{Genome-wide Reconstruction of Complex Structural Variants}
-(GROC-SVs) is a software pipeline for identifying large-scale structural
-variants, performing sequence assembly at the breakpoints, and reconstructing
-the complex structural variants using the long-fragment information from the
-10x Genomics platform.")
- (license license:expat))))