summaryrefslogtreecommitdiff
path: root/gnu/packages/databases.scm
diff options
context:
space:
mode:
authorRicardo Wurmus <rekado@elephly.net>2024-04-30 18:36:14 +0200
committerRicardo Wurmus <rekado@elephly.net>2024-05-05 22:38:16 +0200
commit442eba98be21518423f2cf0a10d1fa5d6c7865ca (patch)
treec4101ddebb1782569d9ec8d92bd0d53c49f45123 /gnu/packages/databases.scm
parentadf54dea4e685fe00ab7f106ef264acad6e18cee (diff)
gnu: Add apache-orc.
* gnu/packages/databases.scm (apache-orc): New variable. Change-Id: I9e7df4a03a5d2f258ff44d9705f539f9fc925a99
Diffstat (limited to 'gnu/packages/databases.scm')
-rw-r--r--gnu/packages/databases.scm84
1 files changed, 84 insertions, 0 deletions
diff --git a/gnu/packages/databases.scm b/gnu/packages/databases.scm
index 238ba34053..8ad73e400a 100644
--- a/gnu/packages/databases.scm
+++ b/gnu/packages/databases.scm
@@ -4510,6 +4510,90 @@ transforms idiomatic python function calls to well-formed SQL queries.")
the SQL language using a syntax that reflects the resulting query.")
(license license:asl2.0)))
+(define-public apache-orc
+ (package
+ (name "apache-orc")
+ (version "2.0.0")
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/apache/orc")
+ (commit (string-append "v" version))))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "1fi6d045wakks0x8clplyxgal342kljqjql7vq5gbd6a2qnaz6m2"))))
+ (build-system cmake-build-system)
+ (arguments
+ (list
+ #:build-type "Release"
+ #:configure-flags
+ #~(list "-DBUILD_JAVA=OFF"
+ "-DINSTALL_VENDORED_LIBS=OFF"
+ "-DCMAKE_CXX_FLAGS=-fPIC"
+ (string-append "-DGTEST_HOME=" #$(this-package-native-input "googletest"))
+ (string-append "-DZSTD_HOME=" (assoc-ref %build-inputs "zstd:lib"))
+ (string-append "-DZLIB_HOME=" #$(this-package-input "zlib"))
+ (string-append "-DPROTOBUF_HOME=" #$(this-package-input "protobuf"))
+ (string-append "-DLZ4_HOME=" #$(this-package-input "lz4"))
+ (string-append "-DSNAPPY_HOME=" #$(this-package-input "snappy")))
+ #:phases
+ #~(modify-phases %standard-phases
+ (add-after 'unpack 'disable-bad-test
+ (lambda _
+ ;; This one test fails with an obscure error:
+ ;;
+ ;; Expected: (std::string::npos) != (error.find(error_msg)),
+ ;; actual: 18446744073709551615 vs 18446744073709551615
+ (substitute* "tools/test/TestFileScan.cc"
+ (("findProgram\\(\"tools/src/orc-scan\"\\);" m)
+ (string-append m "return;")))))
+ (add-after 'unpack 'do-not-download-orc-format
+ (lambda _
+ (substitute* "cmake_modules/ThirdpartyToolchain.cmake"
+ (("URL \"https://archive.apache.org/dist/orc/orc-format.*")
+ (string-append "URL \"file://"
+ #$(this-package-native-input "orc-format")
+ "\"\n")))))
+ (add-after 'unpack 'timezone-fallback
+ (lambda _
+ ;; In the build container we don't have /etc/localtime
+ (substitute* "c++/src/Timezone.cc"
+ (("return getTimezoneByFilename\\(LOCAL_TIMEZONE\\);")
+ "if (!std::filesystem::exists(std::filesystem::path(LOCAL_TIMEZONE))) {
+ return getTimezoneByName(\"UTC\");
+}
+return getTimezoneByFilename(LOCAL_TIMEZONE);"))))
+ (add-before 'check 'pre-check
+ (lambda* (#:key inputs #:allow-other-keys)
+ (setenv "TZDIR" (search-input-directory inputs
+ "share/zoneinfo")))))))
+ (inputs
+ `(("lz4" ,lz4)
+ ("protobuf" ,protobuf)
+ ("snappy" ,snappy)
+ ("zlib" ,zlib "static")
+ ("zstd" ,zstd)
+ ("zstd:lib" ,zstd "lib")))
+ (native-inputs
+ `(("googletest" ,googletest)
+ ("orc-format" ,(origin
+ (method url-fetch)
+ (uri "https://archive.apache.org/dist/orc/orc-format-1.0.0/\
+orc-format-1.0.0.tar.gz")
+ (sha256
+ (base32
+ "1mccbna3mqhhlqs4pw0fa4pgjnq4c41jhxrh84mq27sbz5gsx7vk"))))
+ ("pkg-config" ,pkg-config)
+ ("tzdata" ,tzdata-for-tests)))
+ (home-page "https://orc.apache.org/")
+ (synopsis "Columnar storage for Hadoop workloads")
+ (description "ORC is a self-describing type-aware columnar file format
+designed for Hadoop workloads. It is optimized for large streaming reads, but
+with integrated support for finding required rows quickly.")
+ (license license:asl2.0)))
+
;; There are many wrappers for this in other languages. When touching, please
;; be sure to ensure all dependencies continue to build.
(define-public apache-arrow