gnu: Add cpat.

* gnu/packages/bioinformatics.scm (cpat): New variable. Change-Id: I7b3acca1bdec2610d7cdaaf6f96440fe000421dd
author: Ricardo Wurmus <rekado@elephly.net> 2024-01-02 22:46:00 +0100
committer: Ricardo Wurmus <rekado@elephly.net> 2024-01-02 22:47:07 +0100
commit: f4628000024219bf373922ff4a6fa752eb821797 (patch)
tree: 76b30b514934ea9bd53da4ac83e5a9a98a54b573 /gnu/packages/bioinformatics.scm
parent: 8ed9ffedd4b7a01fe1ecb73e75931d2ccfcd4923 (diff)
1 files changed, 44 insertions, 0 deletions
diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
index bdad03b000..41cac296fe 100644
--- a/gnu/packages/bioinformatics.scm
+++ b/gnu/packages/bioinformatics.scm
@@ -617,6 +617,50 @@ Compared to cellSNP, this package is more efficient with higher speed and less
 memory usage.")
       (license license:asl2.0))))
 
+(define-public cpat
+  (package
+    (name "cpat")
+    (version "3.0.4")
+    (source (origin
+              (method url-fetch)
+              (uri (pypi-uri "CPAT" version))
+              (sha256
+               (base32
+                "0dfrwwbhv1n4nh2a903d1qfb30fgxgya89sa70aci3wzf8h2z0vd"))
+              (modules '((guix build utils)))
+              (snippet
+               '(for-each delete-file-recursively
+                          (list ".eggs"
+                                "lib/__pycache__/"
+                                "lib/cpmodule/__pycache__/")))))
+    (build-system pyproject-build-system)
+    (arguments
+     (list
+      #:phases
+      '(modify-phases %standard-phases
+         (replace 'check
+           (lambda* (#:key tests? #:allow-other-keys)
+             (when tests?
+               (with-directory-excursion "test"
+                 ;; There is no test4.fa
+                 (substitute* "test.sh"
+                   ((".*-g test4.fa.*") ""))
+                 (invoke "bash" "test.sh"))))))))
+    (propagated-inputs
+     (list python-numpy python-pysam))
+    (inputs
+     (list r-minimal))
+    (home-page "https://wlcb.oit.uci.edu/cpat/")
+    (synopsis "Alignment-free distinction between coding and noncoding RNA")
+    (description
+     "CPAT is a method to distinguish coding and noncoding RNA by using a
+logistic regression model based on four pure sequence-based, linguistic
+features: ORF size, ORF coverage, Ficket TESTCODE, and Hexamer usage bias.
+Linguistic features based method does not require other genomes or protein
+databases to perform alignment and is more robust.  Because it is
+alignment-free, it runs much faster and also easier to use.")
+    (license license:gpl2+)))
+
 (define-public pbcopper
   (package
     (name "pbcopper")
author	Ricardo Wurmus <rekado@elephly.net>	2024-01-02 22:46:00 +0100
committer	Ricardo Wurmus <rekado@elephly.net>	2024-01-02 22:47:07 +0100
commit	f4628000024219bf373922ff4a6fa752eb821797 (patch)
tree	76b30b514934ea9bd53da4ac83e5a9a98a54b573 /gnu/packages/bioinformatics.scm
parent	8ed9ffedd4b7a01fe1ecb73e75931d2ccfcd4923 (diff)