From ad141242f8d2a1e9f69f59e53aa26b897c6cd3a6 Mon Sep 17 00:00:00 2001 From: Ricardo Wurmus Date: Fri, 23 Dec 2022 20:16:03 +0100 Subject: gnu: Add apache-arrow-for-ceph. * gnu/packages/databases.scm (apache-arrow-for-ceph): New variable. --- gnu/packages/databases.scm | 132 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) (limited to 'gnu/packages/databases.scm') diff --git a/gnu/packages/databases.scm b/gnu/packages/databases.scm index 83515ad17e..2a24ba580d 100644 --- a/gnu/packages/databases.scm +++ b/gnu/packages/databases.scm @@ -4310,6 +4310,138 @@ (define-public apache-arrow algorithm implementations.") (license license:asl2.0))) +(define-public apache-arrow-for-ceph + (package + (name "apache-arrow") + (version "6.0.1") + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/apache/arrow") + (commit (string-append "apache-arrow-" version)))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "0mcw361akqw4sxnnpnr9c9v1zk4hphk6gcq763pcb19yzljh88ig")))) + (build-system cmake-build-system) + (arguments + `(#:tests? #f + #:phases + (modify-phases %standard-phases + (add-before 'configure 'enter-source-directory + (lambda _ (chdir "cpp"))) + (add-after 'unpack 'set-env + (lambda* (#:key inputs #:allow-other-keys) + (substitute* "cpp/src/parquet/parquet.pc.in" + (("includedir=\\$\\{prefix\\}/") + "includedir=")) + (substitute* "cpp/cmake_modules/ThirdpartyToolchain.cmake" + (("set\\(xsimd_SOURCE.*") "")) + (setenv "BOOST_ROOT" (assoc-ref inputs "boost")) + (setenv "BROTLI_HOME" (assoc-ref inputs "brotli")) + (setenv "FLATBUFFERS_HOME" (assoc-ref inputs "flatbuffers")) + (setenv "RAPIDJSON_HOME" (assoc-ref inputs "rapidjson"))))) + #:build-type "Release" + #:configure-flags + (list "-DARROW_PYTHON=ON" + "-DARROW_GLOG=ON" + ;; Parquet options + "-DARROW_PARQUET=ON" + "-DPARQUET_BUILD_EXECUTABLES=ON" + ;; The maintainers disallow using system versions of + ;; jemalloc: + ;; https://issues.apache.org/jira/browse/ARROW-3507. This + ;; is unfortunate because jemalloc increases performance: + ;; https://arrow.apache.org/blog/2018/07/20/jemalloc/. + "-DARROW_JEMALLOC=OFF" + + ;; The CMake option ARROW_DEPENDENCY_SOURCE is a global + ;; option that instructs the build system how to resolve + ;; each dependency. SYSTEM = Finding the dependency in + ;; system paths using CMake's built-in find_package + ;; function, or using pkg-config for packages that do not + ;; have this feature + "-DARROW_DEPENDENCY_SOURCE=SYSTEM" + "-Dxsimd_SOURCE=SYSTEM" + + "-DARROW_RUNTIME_SIMD_LEVEL=NONE" + "-DARROW_SIMD_LEVEL=NONE" + "-DARROW_PACKAGE_KIND=Guix" + + ;; Split output into its component packages. + (string-append "-DCMAKE_INSTALL_PREFIX=" + (assoc-ref %outputs "lib")) + (string-append "-DCMAKE_INSTALL_RPATH=" + (assoc-ref %outputs "lib") + "/lib") + (string-append "-DCMAKE_INSTALL_BINDIR=" + (assoc-ref %outputs "out") + "/bin") + (string-append "-DCMAKE_INSTALL_INCLUDEDIR=" + (assoc-ref %outputs "include") + "/share/include") + + "-DARROW_WITH_SNAPPY=ON" + "-DARROW_WITH_ZLIB=ON" + "-DARROW_WITH_ZSTD=ON" + "-DARROW_WITH_LZ4=ON" + "-DARROW_COMPUTE=ON" + "-DARROW_CSV=ON" + "-DARROW_DATASET=ON" + "-DARROW_FILESYSTEM=ON" + "-DARROW_HDFS=ON" + "-DARROW_JSON=ON" + ;; Arrow Python C++ integration library (required for + ;; building pyarrow). This library must be built against + ;; the same Python version for which you are building + ;; pyarrow. NumPy must also be installed. Enabling this + ;; option also enables ARROW_COMPUTE, ARROW_CSV, + ;; ARROW_DATASET, ARROW_FILESYSTEM, ARROW_HDFS, and + ;; ARROW_JSON. + "-DARROW_PYTHON=ON" + + ;; Building the tests forces on all the + ;; optional features and the use of static + ;; libraries. + "-DARROW_BUILD_TESTS=OFF" + "-DBENCHMARK_ENABLE_GTEST_TESTS=OFF" + ;;"-DBENCHMARK_ENABLE_TESTING=OFF" + "-DARROW_BUILD_STATIC=OFF"))) + (inputs + (list boost + brotli + bzip2 + double-conversion + gflags + glog + grpc + protobuf + python + python-numpy + rapidjson + re2 + snappy + xsimd)) + ;; These are all listed under Requires.private in arrow.pc + (propagated-inputs + (list (list apache-thrift "lib") + lz4 + utf8proc + zlib + (list zstd "lib"))) + (native-inputs + (list pkg-config)) + (outputs '("out" "lib" "include")) + (home-page "https://arrow.apache.org/") + (synopsis "Columnar in-memory analytics") + (description "Apache Arrow is a columnar in-memory analytics layer +designed to accelerate big data. It houses a set of canonical in-memory +representations of flat and hierarchical data along with multiple +language-bindings for structure manipulation. It also provides IPC and common +algorithm implementations.") + (license license:asl2.0))) + (define-public apache-arrow-0.16 (package (name "apache-arrow") -- cgit v1.2.3