summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPierre Neidhardt <mail@ambrevar.xyz>2020-08-04 17:34:44 +0200
committerPierre Neidhardt <mail@ambrevar.xyz>2020-08-13 17:49:19 +0200
commit49b52c2c7be03caf3636632c31f4451d5bc88125 (patch)
tree85b61332aeca63ba935e2476df9ea7c9954505db
parent763b52dc54a5732d014ffaecba45125e9f38ed35 (diff)
guix: Add filesearch draft.
* guix/scripts/filesearch.scm: New file. * guix/scripts/schema.sql: New file.
-rw-r--r--guix/scripts/filesearch.scm222
-rw-r--r--guix/scripts/schema.sql15
2 files changed, 237 insertions, 0 deletions
diff --git a/guix/scripts/filesearch.scm b/guix/scripts/filesearch.scm
new file mode 100644
index 0000000000..a409dc152e
--- /dev/null
+++ b/guix/scripts/filesearch.scm
@@ -0,0 +1,222 @@
+;;; GNU Guix --- Functional package management for GNU
+;;; Copyright © 2020 Pierre Neidhardt <mail@ambrevar.xyz>
+;;;
+;;; This file is part of GNU Guix.
+;;;
+;;; GNU Guix is free software; you can redistribute it and/or modify it
+;;; under the terms of the GNU General Public License as published by
+;;; the Free Software Foundation; either version 3 of the License, or (at
+;;; your option) any later version.
+;;;
+;;; GNU Guix is distributed in the hope that it will be useful, but
+;;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;; GNU General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU General Public License
+;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
+
+(define-module (guix scripts filesearch)
+ #:use-module (guix config) ; For %guix-version.
+ #:use-module (sqlite3)
+ #:use-module (guix gexp) ; For lower-object.
+ #:use-module (guix packages)
+ #:use-module (guix store)
+ #:use-module (guix store database)
+ #:use-module (guix monads)
+ #:use-module (guix grafts)
+ #:use-module (guix records)
+ #:use-module (guix derivations)
+ #:use-module (guix utils) ; For cache-directory
+ #:use-module (gnu packages)
+ #:use-module (ice-9 match)
+ #:use-module (ice-9 format)
+ #:use-module (ice-9 ftw))
+
+;; TODO: We need to remove package duplicates.
+;; Using "insert or replace ... on conflict (path) do nothing" moves database
+;; generation time from 30s to 100s.
+;;
+;; Remove duplicates afterwards?
+
+;; TODO: Vacuum database? When?
+;; https://sqlite.org/lang_vacuum.html
+
+(define %db (format #f "~a/files.db" (cache-directory)))
+(define %schema (search-path %load-path "guix/scripts/schema.sql"))
+
+(define-syntax-rule (with-statement db sql stmt exp ...) ; TODO: From (guix store database)
+ "Run EXP... with STMT bound to a prepared statement corresponding to the sql
+string SQL for DB."
+ ((@@ (guix store database) call-with-statement) db sql ; TODO: Export?
+ (lambda (stmt) exp ...)))
+
+(define* (add-files db
+ #:key
+ (name (error "Missing argument"))
+ (system (error "Missing argument"))
+ (output "out")
+ (path (error "Missing argument"))
+ (files (error "Missing argument"))
+ (version (error "Missing argument"))
+ (guix-version (error "Missing argument")))
+ "FILES is a list of path underneath PATH."
+ (sqlite-exec
+ db
+ (string-append "insert into Packages (name, system, output, path, version, guix)"
+ ;; "insert or replace into Packages (name, system, output, path, version, guix)"
+ (format #f " values (~s, ~s, ~s, ~s, ~s, ~s)"
+ name system output path version guix-version)
+ ;; " on conflict (path) do nothing"
+ ))
+ (let ((id ((@@ (guix store database) last-insert-row-id) db))) ; TODO: Export?
+ (for-each
+ (lambda (file)
+ (sqlite-exec
+ db
+ (string-append "insert into Files (subpath, package) "
+ (format #f " values (~s, ~s)"
+ file id))))
+ files)))
+
+(define (directory-files path)
+ "Return a list of all files within PATH, recursively.
+Each file is returned as the path relative to PATH, starting with a '/'.
+Empty directories are ignored.
+
+It's important that the first character be the directory separator because it
+gives more expressive power for search. For instance, searching \"/bin\"
+matches both \"/bin/foo\" and \"/usr/bin/foo\" but not \"barbin\"."
+ (let ((file-list '()))
+ (ftw path
+ (lambda (filename statinfo flag)
+ (when (eq? flag 'regular)
+ (set! file-list (cons (string-drop filename (string-length path))
+ file-list))) #t))
+ file-list))
+
+(define-record-type* <package-store-items> package-store-items make-package-store-items
+ package-store-items?
+ this-package-store-items
+ (system package-store-items-system)
+ (output-paths package-store-items-output-paths))
+
+(define* (package-store-info package)
+ "Return store items, even if not present locally."
+ (define (lower-object/no-grafts obj system) ; From (guix scripts weather)
+ (mlet* %store-monad ((previous (set-grafting #f))
+ (drv (lower-object obj system))
+ (_ (set-grafting previous)))
+ (return drv)))
+ (with-store store
+ (run-with-store store
+ (mlet %store-monad ((drv (lower-object/no-grafts package (%current-system))))
+ ;; Note: we don't try building DRV like 'guix archive' does
+ ;; because we don't have to since we can instead rely on
+ ;; substitute meta-data.
+ (return
+ (package-store-items
+ (system (derivation-system drv))
+ (output-paths (derivation->output-paths drv))))))))
+
+(define (persist-package-files db package)
+ (let* ((info (package-store-info package))
+ (system (package-store-items-system info))
+ (output-path-pairs (package-store-items-output-paths info)))
+ (map (match-lambda
+ ((output . path)
+ ;; TODO: Don't list files if entry is already in database.
+ ;; TODO: Try fetching info from remote substitute server database.
+ (when (file-exists? path)
+ (add-files db ; TODO: Merge this function and add-files?
+ #:name (package-name package)
+ #:system system
+ #:output output
+ #:path path ; Storing /gnu/store for all packages has no significant size cost.
+ #:version (package-version package)
+ #:guix-version %guix-version
+ #:files (directory-files path)))))
+ output-path-pairs)))
+
+(define (search-file-package pattern)
+ "Return corresponding packages.
+Packages or ordered by most relevant last.
+Path is subject to SQLite \"full-text search\" pattern matching.
+See https://www.sqlite.org/fts5.html.
+
+Example patterns:
+
+- \"foo bar\": Both the \"foo\" and \"bar\" full words are in the path.
+- \"bar foo\": Same as above, order does not matter.
+- \"foo*\": Matches any word starting with \"foo\".
+- \"foo OR bar\": Either \"foo\" or \"bar\" full words are in the path."
+ (with-database %db db
+ (with-statement
+ db
+ ;; REVIEW: Is this inner join cheap?
+ (string-append
+ "select subpath, name, version, output"
+ " from Files inner join Packages on Files.package = Packages.id"
+ (format #f " where Files.subpath match '~a' order by rank" pattern))
+ stmt
+ (map vector->list
+ (sqlite-fold cons '() stmt)))))
+
+(define (format-search search-result)
+ (for-each
+ (match-lambda
+ ((subpath name version output)
+ (format #t "~a:~a@~a~/~a~%"
+ name output version subpath)))
+ search-result))
+
+(define (persist-all-local-packages)
+ "Return number of persisted packages."
+ (parameterize ((sql-schema %schema))
+ (with-database %db db
+ ;; It's important to persist all entries in a single transaction to
+ ;; avoid a performance bottleneck. See
+ ;; https://www.sqlite.org/fts5.html.
+ ((@@ (guix store database) call-with-transaction) ; TODO: Export?
+ db
+ (lambda ()
+ (fold-packages
+ (lambda (package count)
+ (persist-package-files db package)
+ (+ 1 count))
+ 1))))))
+
+
+(define (test-missing-package)
+ (package-store-info
+ (@@ (gnu packages chromium) ungoogled-chromium)))
+
+(define (test-index-git)
+ (parameterize ((sql-schema %schema))
+ (with-database %db db
+ (persist-package-files db (@@ (gnu packages version-control) git)))))
+
+(define (test-search)
+ (test-index-git)
+ (format-search (search-file-package "git perl5")))
+
+;; TODO: Catch case we don't have a derivation.
+
+;; TODO: Sync databases with substitute server: SQLite diffs? Binary diff
+;; with xdelta (probably not since it would send entries for Guix versions
+;; that the user does not have).
+
+;; Statistics
+;;
+;; Context:
+;; - 14,000 packages
+;; - 1700 store items
+;; - CPU 3.5 GHz
+;; - SSD
+;;
+;; Results:
+;; - Database generation time: 30 seconds.
+;; - Database size: 31 MiB.
+;; - Database Zstd-compressed size: 6.1 MiB.
+;; - Zstd-compression time: 0.13 seconds.
+;; - FTS queries: < 0.01 seconds.
diff --git a/guix/scripts/schema.sql b/guix/scripts/schema.sql
new file mode 100644
index 0000000000..ce90de2b86
--- /dev/null
+++ b/guix/scripts/schema.sql
@@ -0,0 +1,15 @@
+create table if not exists Packages (
+ id integer primary key autoincrement not null,
+ name text not null,
+ output text default "out",
+ system text not null,
+ path text not null, -- store path, e.g. /gnu/store/abcd...-foo
+ -- path text unique not null, -- TODO: Make unique? Maybe to slow.
+ version text not null,
+ guix text not null
+);
+
+create virtual table if not exists Files using fts5(
+ subpath,
+ package
+);