summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRicardo Wurmus <rekado@elephly.net>2022-12-23 20:16:03 +0100
committerRicardo Wurmus <rekado@elephly.net>2022-12-23 20:20:06 +0100
commitad141242f8d2a1e9f69f59e53aa26b897c6cd3a6 (patch)
treebe3c90872fe56fe2955cbbe4eacc646bdaaf4163
parent0a45d4bad485b6f27833a1a57cffbaae63407f98 (diff)
gnu: Add apache-arrow-for-ceph.
* gnu/packages/databases.scm (apache-arrow-for-ceph): New variable.
-rw-r--r--gnu/packages/databases.scm132
1 files changed, 132 insertions, 0 deletions
diff --git a/gnu/packages/databases.scm b/gnu/packages/databases.scm
index 83515ad17e..2a24ba580d 100644
--- a/gnu/packages/databases.scm
+++ b/gnu/packages/databases.scm
@@ -4310,6 +4310,138 @@ language-bindings for structure manipulation. It also provides IPC and common
algorithm implementations.")
(license license:asl2.0)))
+(define-public apache-arrow-for-ceph
+ (package
+ (name "apache-arrow")
+ (version "6.0.1")
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/apache/arrow")
+ (commit (string-append "apache-arrow-" version))))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "0mcw361akqw4sxnnpnr9c9v1zk4hphk6gcq763pcb19yzljh88ig"))))
+ (build-system cmake-build-system)
+ (arguments
+ `(#:tests? #f
+ #:phases
+ (modify-phases %standard-phases
+ (add-before 'configure 'enter-source-directory
+ (lambda _ (chdir "cpp")))
+ (add-after 'unpack 'set-env
+ (lambda* (#:key inputs #:allow-other-keys)
+ (substitute* "cpp/src/parquet/parquet.pc.in"
+ (("includedir=\\$\\{prefix\\}/")
+ "includedir="))
+ (substitute* "cpp/cmake_modules/ThirdpartyToolchain.cmake"
+ (("set\\(xsimd_SOURCE.*") ""))
+ (setenv "BOOST_ROOT" (assoc-ref inputs "boost"))
+ (setenv "BROTLI_HOME" (assoc-ref inputs "brotli"))
+ (setenv "FLATBUFFERS_HOME" (assoc-ref inputs "flatbuffers"))
+ (setenv "RAPIDJSON_HOME" (assoc-ref inputs "rapidjson")))))
+ #:build-type "Release"
+ #:configure-flags
+ (list "-DARROW_PYTHON=ON"
+ "-DARROW_GLOG=ON"
+ ;; Parquet options
+ "-DARROW_PARQUET=ON"
+ "-DPARQUET_BUILD_EXECUTABLES=ON"
+ ;; The maintainers disallow using system versions of
+ ;; jemalloc:
+ ;; https://issues.apache.org/jira/browse/ARROW-3507. This
+ ;; is unfortunate because jemalloc increases performance:
+ ;; https://arrow.apache.org/blog/2018/07/20/jemalloc/.
+ "-DARROW_JEMALLOC=OFF"
+
+ ;; The CMake option ARROW_DEPENDENCY_SOURCE is a global
+ ;; option that instructs the build system how to resolve
+ ;; each dependency. SYSTEM = Finding the dependency in
+ ;; system paths using CMake's built-in find_package
+ ;; function, or using pkg-config for packages that do not
+ ;; have this feature
+ "-DARROW_DEPENDENCY_SOURCE=SYSTEM"
+ "-Dxsimd_SOURCE=SYSTEM"
+
+ "-DARROW_RUNTIME_SIMD_LEVEL=NONE"
+ "-DARROW_SIMD_LEVEL=NONE"
+ "-DARROW_PACKAGE_KIND=Guix"
+
+ ;; Split output into its component packages.
+ (string-append "-DCMAKE_INSTALL_PREFIX="
+ (assoc-ref %outputs "lib"))
+ (string-append "-DCMAKE_INSTALL_RPATH="
+ (assoc-ref %outputs "lib")
+ "/lib")
+ (string-append "-DCMAKE_INSTALL_BINDIR="
+ (assoc-ref %outputs "out")
+ "/bin")
+ (string-append "-DCMAKE_INSTALL_INCLUDEDIR="
+ (assoc-ref %outputs "include")
+ "/share/include")
+
+ "-DARROW_WITH_SNAPPY=ON"
+ "-DARROW_WITH_ZLIB=ON"
+ "-DARROW_WITH_ZSTD=ON"
+ "-DARROW_WITH_LZ4=ON"
+ "-DARROW_COMPUTE=ON"
+ "-DARROW_CSV=ON"
+ "-DARROW_DATASET=ON"
+ "-DARROW_FILESYSTEM=ON"
+ "-DARROW_HDFS=ON"
+ "-DARROW_JSON=ON"
+ ;; Arrow Python C++ integration library (required for
+ ;; building pyarrow). This library must be built against
+ ;; the same Python version for which you are building
+ ;; pyarrow. NumPy must also be installed. Enabling this
+ ;; option also enables ARROW_COMPUTE, ARROW_CSV,
+ ;; ARROW_DATASET, ARROW_FILESYSTEM, ARROW_HDFS, and
+ ;; ARROW_JSON.
+ "-DARROW_PYTHON=ON"
+
+ ;; Building the tests forces on all the
+ ;; optional features and the use of static
+ ;; libraries.
+ "-DARROW_BUILD_TESTS=OFF"
+ "-DBENCHMARK_ENABLE_GTEST_TESTS=OFF"
+ ;;"-DBENCHMARK_ENABLE_TESTING=OFF"
+ "-DARROW_BUILD_STATIC=OFF")))
+ (inputs
+ (list boost
+ brotli
+ bzip2
+ double-conversion
+ gflags
+ glog
+ grpc
+ protobuf
+ python
+ python-numpy
+ rapidjson
+ re2
+ snappy
+ xsimd))
+ ;; These are all listed under Requires.private in arrow.pc
+ (propagated-inputs
+ (list (list apache-thrift "lib")
+ lz4
+ utf8proc
+ zlib
+ (list zstd "lib")))
+ (native-inputs
+ (list pkg-config))
+ (outputs '("out" "lib" "include"))
+ (home-page "https://arrow.apache.org/")
+ (synopsis "Columnar in-memory analytics")
+ (description "Apache Arrow is a columnar in-memory analytics layer
+designed to accelerate big data. It houses a set of canonical in-memory
+representations of flat and hierarchical data along with multiple
+language-bindings for structure manipulation. It also provides IPC and common
+algorithm implementations.")
+ (license license:asl2.0)))
+
(define-public apache-arrow-0.16
(package
(name "apache-arrow")