summaryrefslogtreecommitdiff
path: root/gnu/packages/python-science.scm
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/packages/python-science.scm')
-rw-r--r--gnu/packages/python-science.scm210
1 files changed, 178 insertions, 32 deletions
diff --git a/gnu/packages/python-science.scm b/gnu/packages/python-science.scm
index 6fce8d2319..738b504e3b 100644
--- a/gnu/packages/python-science.scm
+++ b/gnu/packages/python-science.scm
@@ -35,6 +35,7 @@
#:use-module (gnu packages)
#:use-module (gnu packages base)
#:use-module (gnu packages check)
+ #:use-module (gnu packages databases)
#:use-module (gnu packages gcc)
#:use-module (gnu packages image-processing)
#:use-module (gnu packages machine-learning)
@@ -44,6 +45,7 @@
#:use-module (gnu packages pkg-config)
#:use-module (gnu packages python)
#:use-module (gnu packages python-build)
+ #:use-module (gnu packages python-crypto)
#:use-module (gnu packages python-check)
#:use-module (gnu packages python-web)
#:use-module (gnu packages python-xyz)
@@ -327,49 +329,55 @@ of the SGP4 satellite tracking algorithm.")
(define-public python-pandas
(package
(name "python-pandas")
- (version "1.0.5")
+ (version "1.3.0")
(source
(origin
(method url-fetch)
(uri (pypi-uri "pandas" version))
(sha256
- (base32 "1a2gv3g6jr6vb5ca43fkwjl5xf86wpfz8y3zcy787adjl0hdkib9"))))
+ (base32 "1qi2cv450m05dwccx3p1s373k5b4ncvwi74plnms2pidrz4ycm65"))))
(build-system python-build-system)
(arguments
`(#:modules ((guix build utils)
(guix build python-build-system)
(ice-9 ftw)
+ (srfi srfi-1)
(srfi srfi-26))
- #:phases (modify-phases %standard-phases
- (add-after 'unpack 'patch-which
- (lambda* (#:key inputs #:allow-other-keys)
- (let ((which (assoc-ref inputs "which")))
- (substitute* "pandas/io/clipboard/__init__.py"
- (("^WHICH_CMD = .*")
- (string-append "WHICH_CMD = \"" which "\"\n"))))
- #t))
- (add-before 'check 'prepare-x
- (lambda _
- (system "Xvfb &")
- (setenv "DISPLAY" ":0")
- ;; xsel needs to write a log file.
- (setenv "HOME" "/tmp")
- #t))
- (replace 'check
- (lambda _
- (let ((build-directory
- (string-append
- (getcwd) "/build/"
- (car (scandir "build"
- (cut string-prefix? "lib." <>))))))
- ;; Disable the "strict data files" option which causes
- ;; the build to error out if required data files are
- ;; not available (as is the case with PyPI archives).
- (substitute* "setup.cfg"
- (("addopts = --strict-data-files") "addopts = "))
- (with-directory-excursion build-directory
- (invoke "pytest" "-vv" "pandas" "--skip-slow"
- "--skip-network"))))))))
+ #:phases
+ (modify-phases %standard-phases
+ (add-after 'unpack 'patch-which
+ (lambda* (#:key inputs #:allow-other-keys)
+ (let ((which (assoc-ref inputs "which")))
+ (substitute* "pandas/io/clipboard/__init__.py"
+ (("^WHICH_CMD = .*")
+ (string-append "WHICH_CMD = \"" which "\"\n"))))))
+ (add-before 'check 'prepare-x
+ (lambda _
+ (system "Xvfb &")
+ (setenv "DISPLAY" ":0")
+ ;; xsel needs to write a log file.
+ (setenv "HOME" "/tmp")))
+ (replace 'check
+ (lambda _
+ (let ((build-directory
+ (string-append
+ (getcwd) "/build/"
+ (first (scandir "build"
+ (cut string-prefix? "lib." <>))))))
+ (with-directory-excursion build-directory
+ (invoke "pytest" "-vv" "pandas" "--skip-slow"
+ "--skip-network"
+ "-k"
+ ;; These tets access the internet:
+ ;; pandas/tests/io/xml/test_xml.py::test_wrong_url[lxml]
+ ;; pandas/tests/io/xml/test_xml.py::test_wrong_url[etree]
+ ;; TODO: the excel tests fail for unknown reasons
+ (string-append "not test_wrong_url"
+ " and not test_excelwriter_fspath"
+ " and not test_ExcelWriter_dispatch"
+ ;; TODO: Missing input
+ " and not TestS3"
+ " and not s3")))))))))
(propagated-inputs
`(("python-jinja2" ,python-jinja2)
("python-numpy" ,python-numpy)
@@ -835,3 +843,141 @@ and more
@end itemize")
(license license:gpl3)))
+(define-public python-distributed
+ (package
+ (name "python-distributed")
+ (version "2021.07.1")
+ (source
+ (origin
+ ;; The test files are not included in the archive on pypi
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/dask/distributed")
+ (commit version)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "0i55zf3k55sqjxnwlzsyj3h3v1588fn54ng4mj3dfiqzh3nlj0dg"))))
+ (build-system python-build-system)
+ (arguments
+ '(#:phases
+ (modify-phases %standard-phases
+ (add-after 'unpack 'fix-references
+ (lambda* (#:key outputs #:allow-other-keys)
+ (substitute* '("distributed/comm/tests/test_ucx_config.py"
+ "distributed/tests/test_client.py"
+ "distributed/tests/test_queues.py"
+ "distributed/tests/test_variable.py"
+ "distributed/cli/tests/test_tls_cli.py"
+ "distributed/cli/tests/test_dask_spec.py"
+ "distributed/cli/tests/test_dask_worker.py"
+ "distributed/cli/tests/test_dask_scheduler.py")
+ (("\"dask-scheduler\"")
+ (format #false "\"~a/bin/dask-scheduler\""
+ (assoc-ref outputs "out")))
+ (("\"dask-worker\"")
+ (format #false "\"~a/bin/dask-worker\""
+ (assoc-ref outputs "out"))))))
+ (replace 'check
+ (lambda* (#:key tests? #:allow-other-keys)
+ (when tests?
+ (setenv "DISABLE_IPV6" "1")
+ (invoke "pytest" "-vv" "distributed"
+ "-m" "not slow and not gpu and not ipython and not avoid_ci"
+ "-k"
+ ;; TODO: These tests fail for unknown reasons:
+ ;; Assertion error.
+ (string-append
+ "not test_version_option"
+ ;; "The 'distributed' distribution was not found"
+ " and not test_register_backend_entrypoint"
+ ;; "AttributeError: module 'distributed.dashboard' has no attribute 'scheduler'"
+ " and not test_get_client_functions_spawn_clusters"))))))))
+ (propagated-inputs
+ `(("python-click" ,python-click)
+ ("python-cloudpickle" ,python-cloudpickle)
+ ("python-cryptography" ,python-cryptography)
+ ("python-dask" ,python-dask)
+ ("python-msgpack" ,python-msgpack)
+ ("python-psutil" ,python-psutil)
+ ("python-pyyaml" ,python-pyyaml)
+ ("python-setuptools" ,python-setuptools)
+ ("python-sortedcontainers" ,python-sortedcontainers)
+ ("python-tblib" ,python-tblib)
+ ("python-toolz" ,python-toolz)
+ ("python-tornado" ,python-tornado-6)
+ ("python-zict" ,python-zict)))
+ (native-inputs
+ `(("python-pytest" ,python-pytest)))
+ (home-page "https://distributed.dask.org")
+ (synopsis "Distributed scheduler for Dask")
+ (description "Dask.distributed is a lightweight library for distributed
+computing in Python. It extends both the @code{concurrent.futures} and
+@code{dask} APIs to moderate sized clusters.")
+ (license license:bsd-3)))
+
+(define-public python-modin
+ (package
+ (name "python-modin")
+ (version "0.10.1")
+ (source
+ (origin
+ ;; The archive on pypi does not include all required files.
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/modin-project/modin")
+ (commit version)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "128ghfb9ncmnn8km409xjcdppvn9nr9jqw8rkvsfavh7wnwlk509"))))
+ (build-system python-build-system)
+ (arguments
+ `(#:phases
+ (modify-phases %standard-phases
+ (add-after 'unpack 'make-files-writable
+ (lambda _
+ (for-each make-file-writable (find-files "."))))
+ (replace 'check
+ (lambda* (#:key tests? #:allow-other-keys)
+ (when tests?
+ (setenv "MODIN_ENGINE" "dask")
+ (invoke "python" "-m" "pytest"
+ "modin/pandas/test/test_concat.py")
+ (setenv "MODIN_ENGINE" "python")
+ (invoke "python" "-m" "pytest"
+ "modin/pandas/test/test_concat.py")))))))
+ (propagated-inputs
+ `(("python-cloudpickle" ,python-cloudpickle)
+ ("python-dask" ,python-dask)
+ ("python-distributed" ,python-distributed)
+ ("python-numpy" ,python-numpy)
+ ("python-packaging" ,python-packaging)
+ ("python-pandas" ,python-pandas)))
+ (native-inputs
+ `(("python-coverage" ,python-coverage)
+ ("python-jinja2" ,python-jinja2)
+ ("python-lxml" ,python-lxml)
+ ("python-matplotlib" ,python-matplotlib)
+ ("python-msgpack" ,python-msgpack)
+ ("python-openpyxl" ,python-openpyxl)
+ ("python-psutil" ,python-psutil)
+ ("python-pyarrow" ,python-pyarrow)
+ ("python-pytest" ,python-pytest)
+ ("python-pytest-benchmark" ,python-pytest-benchmark)
+ ("python-pytest-cov" ,python-pytest-cov)
+ ("python-pytest-xdist" ,python-pytest-xdist)
+ ("python-scipy" ,python-scipy)
+ ("python-sqlalchemy" ,python-sqlalchemy)
+ ("python-tables" ,python-tables)
+ ("python-tqdm" ,python-tqdm)
+ ("python-xarray" ,python-xarray)
+ ("python-xlrd" ,python-xlrd)))
+ (home-page "https://github.com/modin-project/modin")
+ (synopsis "Make your pandas code run faster")
+ (description
+ "Modin uses Ray or Dask to provide an effortless way to speed up your
+pandas notebooks, scripts, and libraries. Unlike other distributed DataFrame
+libraries, Modin provides seamless integration and compatibility with existing
+pandas code.")
+ (license license:asl2.0)))