summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLudovic Courtès <ludo@gnu.org>2020-06-22 12:29:15 +0200
committerLudovic Courtès <ludo@gnu.org>2020-06-22 15:42:55 +0200
commitfe5de925aa0f2854a679cebdea02b03cac561c8f (patch)
tree7c256a5c405b677c7ab7994c06aa20db17c99978
parenta0d419e63955c50c270e0697af3eb74ac321b108 (diff)
deduplicate: Avoid traversing directories twice.
Until now, we'd call (nar-sha256 file) unconditionally. Thus, if FILE was a directory, we would traverse it for no reason, and then call 'deduplicate' on FILE, which would again traverse it. This change also removes redundant (mkdir-p store) calls from the loop, and avoids 'lstat' calls by using 'scandir*'. * guix/store/deduplication.scm (deduplicate): Add named loop. Move 'mkdir-p' outside the loop. Use 'scandir*' instead of 'scandir'. Do not call 'nar-sha256' when FILE has type 'directory.
-rw-r--r--guix/store/deduplication.scm81
1 files changed, 45 insertions, 36 deletions
diff --git a/guix/store/deduplication.scm b/guix/store/deduplication.scm
index 80868692c0..6784ee0b92 100644
--- a/guix/store/deduplication.scm
+++ b/guix/store/deduplication.scm
@@ -23,10 +23,12 @@
(define-module (guix store deduplication)
#:use-module (gcrypt hash)
#:use-module (guix build utils)
+ #:use-module (guix build syscalls)
#:use-module (guix base32)
#:use-module (srfi srfi-11)
#:use-module (rnrs io ports)
#:use-module (ice-9 ftw)
+ #:use-module (ice-9 match)
#:use-module (guix serialization)
#:export (nar-sha256
deduplicate))
@@ -138,40 +140,47 @@ Note: TARGET, TO-REPLACE, and SWAP-DIRECTORY must be on the same file system."
replace PATH with a hardlink to the already-existing one. If not, register
PATH so that future duplicates can hardlink to it. PATH is assumed to be
under STORE."
- (let* ((links-directory (string-append store "/.links"))
- (link-file (string-append links-directory "/"
- (bytevector->nix-base32-string hash))))
- (mkdir-p links-directory)
- (if (eq? 'directory (stat:type (lstat path)))
+ (define links-directory
+ (string-append store "/.links"))
+
+ (mkdir-p links-directory)
+ (let loop ((path path)
+ (type (stat:type (lstat path)))
+ (hash hash))
+ (if (eq? 'directory type)
;; Can't hardlink directories, so hardlink their atoms.
- (for-each (lambda (file)
- (unless (or (member file '("." ".."))
- (and (string=? path store)
- (string=? file ".links")))
- (let ((file (string-append path "/" file)))
- (deduplicate file (nar-sha256 file)
- #:store store))))
- (scandir path))
- (if (file-exists? link-file)
- (replace-with-link link-file path
- #:swap-directory links-directory)
- (catch 'system-error
- (lambda ()
- (link path link-file))
- (lambda args
- (let ((errno (system-error-errno args)))
- (cond ((= errno EEXIST)
- ;; Someone else put an entry for PATH in
- ;; LINKS-DIRECTORY before we could. Let's use it.
- (replace-with-link path link-file
- #:swap-directory links-directory))
- ((= errno ENOSPC)
- ;; There's not enough room in the directory index for
- ;; more entries in .links, but that's fine: we can
- ;; just stop.
- #f)
- ((= errno EMLINK)
- ;; PATH has reached the maximum number of links, but
- ;; that's OK: we just can't deduplicate it more.
- #f)
- (else (apply throw args))))))))))
+ (for-each (match-lambda
+ ((file . properties)
+ (unless (member file '("." ".."))
+ (let* ((file (string-append path "/" file))
+ (type (or (assq-ref properties 'type)
+ (stat:type (lstat file)))))
+ (loop file type
+ (and (not (eq? 'directory type))
+ (nar-sha256 file)))))))
+ (scandir* path))
+ (let ((link-file (string-append links-directory "/"
+ (bytevector->nix-base32-string hash))))
+ (if (file-exists? link-file)
+ (replace-with-link link-file path
+ #:swap-directory links-directory)
+ (catch 'system-error
+ (lambda ()
+ (link path link-file))
+ (lambda args
+ (let ((errno (system-error-errno args)))
+ (cond ((= errno EEXIST)
+ ;; Someone else put an entry for PATH in
+ ;; LINKS-DIRECTORY before we could. Let's use it.
+ (replace-with-link path link-file
+ #:swap-directory links-directory))
+ ((= errno ENOSPC)
+ ;; There's not enough room in the directory index for
+ ;; more entries in .links, but that's fine: we can
+ ;; just stop.
+ #f)
+ ((= errno EMLINK)
+ ;; PATH has reached the maximum number of links, but
+ ;; that's OK: we just can't deduplicate it more.
+ #f)
+ (else (apply throw args)))))))))))