summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLudovic Courtès <ludo@gnu.org>2020-12-10 15:12:34 +0100
committerLudovic Courtès <ludo@gnu.org>2020-12-15 17:32:10 +0100
commit6a060ff27ff68384d7c90076baa36c349fff689d (patch)
treef7b1f9c7a52e84848fbcaa90d4dc38c25d7d65eb
parentdea1ee1fd740248307f74ca4cb70b94742264098 (diff)
store-copy: 'populate-store' can optionally deduplicate files.
Until now deduplication was performed as an additional pass after copying files, which involve re-traversing all the files that had just been copied. * guix/store/deduplication.scm (copy-file/deduplicate): New procedure. * tests/store-deduplication.scm ("copy-file/deduplicate"): New test. * guix/build/store-copy.scm (populate-store): Add #:deduplicate? parameter and honor it. * tests/gexp.scm ("gexp->derivation, store copy"): Pass #:deduplicate? #f to 'populate-store'. * gnu/build/image.scm (initialize-root-partition): Pass #:deduplicate? to 'populate-store'. Pass #:deduplicate? #f to 'register-closure'. * gnu/build/vm.scm (root-partition-initializer): Likewise. * gnu/build/install.scm (populate-single-profile-directory): Pass #:deduplicate? #f to 'populate-store'. * gnu/build/linux-initrd.scm (build-initrd): Likewise. * guix/scripts/pack.scm (self-contained-tarball)[import-module?]: New procedure. [build]: Pass it as an argument to 'source-module-closure'. * guix/scripts/pack.scm (squashfs-image)[build]: Wrap in 'with-extensions'. * gnu/system/linux-initrd.scm (expression->initrd)[import-module?]: New procedure. [builder]: Pass it to 'source-module-closure'. * gnu/system/install.scm (cow-store-service-type)[import-module?]: New procedure. Pass it to 'source-module-closure'.
-rw-r--r--gnu/build/image.scm5
-rw-r--r--gnu/build/install.scm3
-rw-r--r--gnu/build/linux-initrd.scm3
-rw-r--r--gnu/build/vm.scm5
-rw-r--r--gnu/system/install.scm12
-rw-r--r--gnu/system/linux-initrd.scm10
-rw-r--r--guix/build/store-copy.scm13
-rw-r--r--guix/scripts/pack.scm258
-rw-r--r--guix/store/deduplication.scm16
-rw-r--r--tests/gexp.scm3
-rw-r--r--tests/store-deduplication.scm18
11 files changed, 207 insertions, 139 deletions
diff --git a/gnu/build/image.scm b/gnu/build/image.scm
index 0deea10a9d..8f50f27f78 100644
--- a/gnu/build/image.scm
+++ b/gnu/build/image.scm
@@ -186,7 +186,8 @@ rest of the store when registering the closures. SYSTEM-DIRECTORY is the name
of the directory of the 'system' derivation. Pass WAL-MODE? to
register-closure."
(populate-root-file-system system-directory root)
- (populate-store references-graphs root)
+ (populate-store references-graphs root
+ #:deduplicate? deduplicate?)
;; Populate /dev.
(when make-device-nodes
@@ -195,7 +196,7 @@ register-closure."
(when register-closures?
(for-each (lambda (closure)
(register-closure root closure
- #:deduplicate? deduplicate?
+ #:deduplicate? #f
#:wal-mode? wal-mode?))
references-graphs))
diff --git a/gnu/build/install.scm b/gnu/build/install.scm
index 63995e1d09..f5c8407b89 100644
--- a/gnu/build/install.scm
+++ b/gnu/build/install.scm
@@ -214,7 +214,8 @@ This is used to create the self-contained tarballs with 'guix pack'."
(symlink old (scope new)))
;; Populate the store.
- (populate-store (list closure) directory)
+ (populate-store (list closure) directory
+ #:deduplicate? #f)
(when database
(install-database-and-gc-roots directory database profile
diff --git a/gnu/build/linux-initrd.scm b/gnu/build/linux-initrd.scm
index 99796adba6..bb2ed0db0c 100644
--- a/gnu/build/linux-initrd.scm
+++ b/gnu/build/linux-initrd.scm
@@ -127,7 +127,8 @@ REFERENCES-GRAPHS."
(mkdir "contents")
;; Copy the closures of all the items referenced in REFERENCES-GRAPHS.
- (populate-store references-graphs "contents")
+ (populate-store references-graphs "contents"
+ #:deduplicate? #f)
(with-directory-excursion "contents"
;; Make '/init'.
diff --git a/gnu/build/vm.scm b/gnu/build/vm.scm
index abb0317faf..03be5697b7 100644
--- a/gnu/build/vm.scm
+++ b/gnu/build/vm.scm
@@ -395,7 +395,8 @@ system that is passed to 'populate-root-file-system'."
(when copy-closures?
;; Populate the store.
(populate-store (map (cut string-append "/xchg/" <>) closures)
- target))
+ target
+ #:deduplicate? deduplicate?))
;; Populate /dev.
(make-device-nodes target)
@@ -412,7 +413,7 @@ system that is passed to 'populate-root-file-system'."
(for-each (lambda (closure)
(register-closure target
(string-append "/xchg/" closure)
- #:deduplicate? deduplicate?))
+ #:deduplicate? #f))
closures)
(unless copy-closures?
(umount target-store)))
diff --git a/gnu/system/install.scm b/gnu/system/install.scm
index a6b9e3d952..e753463473 100644
--- a/gnu/system/install.scm
+++ b/gnu/system/install.scm
@@ -1,5 +1,5 @@
;;; GNU Guix --- Functional package management for GNU
-;;; Copyright © 2014, 2015, 2016, 2017, 2018, 2019 Ludovic Courtès <ludo@gnu.org>
+;;; Copyright © 2014, 2015, 2016, 2017, 2018, 2019, 2020 Ludovic Courtès <ludo@gnu.org>
;;; Copyright © 2015 Mark H Weaver <mhw@netris.org>
;;; Copyright © 2016 Andreas Enge <andreas@enge.fr>
;;; Copyright © 2017 Marius Bakke <mbakke@fastmail.com>
@@ -176,6 +176,13 @@ manual."
(shepherd-service-type
'cow-store
(lambda _
+ (define (import-module? module)
+ ;; Since we don't use deduplication support in 'populate-store', don't
+ ;; import (guix store deduplication) and its dependencies, which
+ ;; includes Guile-Gcrypt.
+ (and (guix-module-name? module)
+ (not (equal? module '(guix store deduplication)))))
+
(shepherd-service
(requirement '(root-file-system user-processes))
(provision '(cow-store))
@@ -190,7 +197,8 @@ the given target.")
,@%default-modules))
(start
(with-imported-modules (source-module-closure
- '((gnu build install)))
+ '((gnu build install))
+ #:select? import-module?)
#~(case-lambda
((target)
(mount-cow-store target #$%backing-directory)
diff --git a/gnu/system/linux-initrd.scm b/gnu/system/linux-initrd.scm
index 4fb1d863c9..c6ba9bb560 100644
--- a/gnu/system/linux-initrd.scm
+++ b/gnu/system/linux-initrd.scm
@@ -76,12 +76,20 @@ the derivations referenced by EXP are automatically copied to the initrd."
(define init
(program-file "init" exp #:guile guile))
+ (define (import-module? module)
+ ;; Since we don't use deduplication support in 'populate-store', don't
+ ;; import (guix store deduplication) and its dependencies, which includes
+ ;; Guile-Gcrypt. That way we can run tests with '--bootstrap'.
+ (and (guix-module-name? module)
+ (not (equal? module '(guix store deduplication)))))
+
(define builder
;; Do not use "guile-zlib" extension here, otherwise it would drag the
;; non-static "zlib" package to the initrd closure. It is not needed
;; anyway because the modules are stored uncompressed within the initrd.
(with-imported-modules (source-module-closure
- '((gnu build linux-initrd)))
+ '((gnu build linux-initrd))
+ #:select? import-module?)
#~(begin
(use-modules (gnu build linux-initrd))
diff --git a/guix/build/store-copy.scm b/guix/build/store-copy.scm
index 95dcb8e114..7f0672cd9d 100644
--- a/guix/build/store-copy.scm
+++ b/guix/build/store-copy.scm
@@ -20,6 +20,7 @@
#:use-module ((guix build utils) #:hide (copy-recursively))
#:use-module (guix sets)
#:use-module (guix progress)
+ #:autoload (guix store deduplication) (copy-file/deduplicate)
#:use-module (srfi srfi-1)
#:use-module (srfi srfi-9)
#:use-module (srfi srfi-26)
@@ -242,10 +243,13 @@ permissions. Write verbose output to the LOG port."
lstat)))
(define* (populate-store reference-graphs target
- #:key (log-port (current-error-port)))
+ #:key
+ (deduplicate? #t)
+ (log-port (current-error-port)))
"Populate the store under directory TARGET with the items specified in
REFERENCE-GRAPHS, a list of reference-graph files. Items copied to TARGET
-maintain timestamps and permissions."
+maintain timestamps and permissions. When DEDUPLICATE? is true, deduplicate
+regular files as they are copied to TARGET."
(define store
(string-append target (%store-directory)))
@@ -273,6 +277,11 @@ maintain timestamps and permissions."
(string-append target thing)
#:keep-mtime? #t
#:keep-permissions? #t
+ #:copy-file
+ (if deduplicate?
+ (cut copy-file/deduplicate <> <>
+ #:store store)
+ copy-file)
#:log (%make-void-port "w"))
(report))
things)))))
diff --git a/guix/scripts/pack.scm b/guix/scripts/pack.scm
index 1612ec8f04..440c4b0903 100644
--- a/guix/scripts/pack.scm
+++ b/guix/scripts/pack.scm
@@ -203,12 +203,19 @@ added to the pack."
#+(file-append glibc-utf8-locales "/lib/locale"))
(setlocale LC_ALL "en_US.utf8"))))
+ (define (import-module? module)
+ ;; Since we don't use deduplication support in 'populate-store', don't
+ ;; import (guix store deduplication) and its dependencies, which includes
+ ;; Guile-Gcrypt. That way we can run tests with '--bootstrap'.
+ (and (not-config? module)
+ (not (equal? '(guix store deduplication) module))))
+
(define build
(with-imported-modules (source-module-closure
`((guix build utils)
(guix build union)
(gnu build install))
- #:select? not-config?)
+ #:select? import-module?)
#~(begin
(use-modules (guix build utils)
((guix build union) #:select (relative-file-name))
@@ -382,138 +389,139 @@ added to the pack."
`(("/bin" -> "bin") ,@symlinks)))
(define build
- (with-imported-modules (source-module-closure
- '((guix build utils)
- (guix build store-copy)
- (guix build union)
- (gnu build install))
- #:select? not-config?)
- #~(begin
- (use-modules (guix build utils)
- (guix build store-copy)
- ((guix build union) #:select (relative-file-name))
- (gnu build install)
- (srfi srfi-1)
- (srfi srfi-26)
- (ice-9 match))
+ (with-extensions (list guile-gcrypt)
+ (with-imported-modules (source-module-closure
+ '((guix build utils)
+ (guix build store-copy)
+ (guix build union)
+ (gnu build install))
+ #:select? not-config?)
+ #~(begin
+ (use-modules (guix build utils)
+ (guix build store-copy)
+ ((guix build union) #:select (relative-file-name))
+ (gnu build install)
+ (srfi srfi-1)
+ (srfi srfi-26)
+ (ice-9 match))
- (define database #+database)
- (define entry-point #$entry-point)
+ (define database #+database)
+ (define entry-point #$entry-point)
- (define (mksquashfs args)
- (apply invoke "mksquashfs"
- `(,@args
+ (define (mksquashfs args)
+ (apply invoke "mksquashfs"
+ `(,@args
- ;; Do not create a "recovery file" when appending to the
- ;; file system since it's useless in this case.
- "-no-recovery"
+ ;; Do not create a "recovery file" when appending to the
+ ;; file system since it's useless in this case.
+ "-no-recovery"
- ;; Do not attempt to store extended attributes.
- ;; See <https://bugs.gnu.org/40043>.
- "-no-xattrs"
+ ;; Do not attempt to store extended attributes.
+ ;; See <https://bugs.gnu.org/40043>.
+ "-no-xattrs"
- ;; Set file times and the file system creation time to
- ;; one second after the Epoch.
- "-all-time" "1" "-mkfs-time" "1"
+ ;; Set file times and the file system creation time to
+ ;; one second after the Epoch.
+ "-all-time" "1" "-mkfs-time" "1"
- ;; Reset all UIDs and GIDs.
- "-force-uid" "0" "-force-gid" "0")))
+ ;; Reset all UIDs and GIDs.
+ "-force-uid" "0" "-force-gid" "0")))
- (setenv "PATH" #+(file-append archiver "/bin"))
+ (setenv "PATH" #+(file-append archiver "/bin"))
- ;; We need an empty file in order to have a valid file argument when
- ;; we reparent the root file system. Read on for why that's
- ;; necessary.
- (with-output-to-file ".empty" (lambda () (display "")))
-
- ;; Create the squashfs image in several steps.
- ;; Add all store items. Unfortunately mksquashfs throws away all
- ;; ancestor directories and only keeps the basename. We fix this
- ;; in the following invocations of mksquashfs.
- (mksquashfs `(,@(map store-info-item
- (call-with-input-file "profile"
- read-reference-graph))
- #$environment
- ,#$output
-
- ;; Do not perform duplicate checking because we
- ;; don't have any dupes.
- "-no-duplicates"
- "-comp"
- ,#+(compressor-name compressor)))
-
- ;; Here we reparent the store items. For each sub-directory of
- ;; the store prefix we need one invocation of "mksquashfs".
- (for-each (lambda (dir)
- (mksquashfs `(".empty"
- ,#$output
- "-root-becomes" ,dir)))
- (reverse (string-tokenize (%store-directory)
- (char-set-complement (char-set #\/)))))
-
- ;; Add symlinks and mount points.
- (mksquashfs
- `(".empty"
- ,#$output
- ;; Create SYMLINKS via pseudo file definitions.
- ,@(append-map
- (match-lambda
- ((source '-> target)
- ;; Create relative symlinks to work around a bug in
- ;; Singularity 2.x:
- ;; https://bugs.gnu.org/34913
- ;; https://github.com/sylabs/singularity/issues/1487
- (let ((target (string-append #$profile "/" target)))
- (list "-p"
- (string-join
- ;; name s mode uid gid symlink
- (list source
- "s" "777" "0" "0"
- (relative-file-name (dirname source)
- target)))))))
- '#$symlinks*)
-
- "-p" "/.singularity.d d 555 0 0"
-
- ;; Create the environment file.
- "-p" "/.singularity.d/env d 555 0 0"
- "-p" ,(string-append
- "/.singularity.d/env/90-environment.sh s 777 0 0 "
- (relative-file-name "/.singularity.d/env"
- #$environment))
-
- ;; Create /.singularity.d/actions, and optionally the 'run'
- ;; script, used by 'singularity run'.
- "-p" "/.singularity.d/actions d 555 0 0"
-
- ,@(if entry-point
- `(;; This one if for Singularity 2.x.
- "-p"
- ,(string-append
- "/.singularity.d/actions/run s 777 0 0 "
- (relative-file-name "/.singularity.d/actions"
- (string-append #$profile "/"
- entry-point)))
-
- ;; This one is for Singularity 3.x.
- "-p"
- ,(string-append
- "/.singularity.d/runscript s 777 0 0 "
- (relative-file-name "/.singularity.d"
- (string-append #$profile "/"
- entry-point))))
- '())
-
- ;; Create empty mount points.
- "-p" "/proc d 555 0 0"
- "-p" "/sys d 555 0 0"
- "-p" "/dev d 555 0 0"
- "-p" "/home d 555 0 0"))
-
- (when database
- ;; Initialize /var/guix.
- (install-database-and-gc-roots "var-etc" database #$profile)
- (mksquashfs `("var-etc" ,#$output))))))
+ ;; We need an empty file in order to have a valid file argument when
+ ;; we reparent the root file system. Read on for why that's
+ ;; necessary.
+ (with-output-to-file ".empty" (lambda () (display "")))
+
+ ;; Create the squashfs image in several steps.
+ ;; Add all store items. Unfortunately mksquashfs throws away all
+ ;; ancestor directories and only keeps the basename. We fix this
+ ;; in the following invocations of mksquashfs.
+ (mksquashfs `(,@(map store-info-item
+ (call-with-input-file "profile"
+ read-reference-graph))
+ #$environment
+ ,#$output
+
+ ;; Do not perform duplicate checking because we
+ ;; don't have any dupes.
+ "-no-duplicates"
+ "-comp"
+ ,#+(compressor-name compressor)))
+
+ ;; Here we reparent the store items. For each sub-directory of
+ ;; the store prefix we need one invocation of "mksquashfs".
+ (for-each (lambda (dir)
+ (mksquashfs `(".empty"
+ ,#$output
+ "-root-becomes" ,dir)))
+ (reverse (string-tokenize (%store-directory)
+ (char-set-complement (char-set #\/)))))
+
+ ;; Add symlinks and mount points.
+ (mksquashfs
+ `(".empty"
+ ,#$output
+ ;; Create SYMLINKS via pseudo file definitions.
+ ,@(append-map
+ (match-lambda
+ ((source '-> target)
+ ;; Create relative symlinks to work around a bug in
+ ;; Singularity 2.x:
+ ;; https://bugs.gnu.org/34913
+ ;; https://github.com/sylabs/singularity/issues/1487
+ (let ((target (string-append #$profile "/" target)))
+ (list "-p"
+ (string-join
+ ;; name s mode uid gid symlink
+ (list source
+ "s" "777" "0" "0"
+ (relative-file-name (dirname source)
+ target)))))))
+ '#$symlinks*)
+
+ "-p" "/.singularity.d d 555 0 0"
+
+ ;; Create the environment file.
+ "-p" "/.singularity.d/env d 555 0 0"
+ "-p" ,(string-append
+ "/.singularity.d/env/90-environment.sh s 777 0 0 "
+ (relative-file-name "/.singularity.d/env"
+ #$environment))
+
+ ;; Create /.singularity.d/actions, and optionally the 'run'
+ ;; script, used by 'singularity run'.
+ "-p" "/.singularity.d/actions d 555 0 0"
+
+ ,@(if entry-point
+ `( ;; This one if for Singularity 2.x.
+ "-p"
+ ,(string-append
+ "/.singularity.d/actions/run s 777 0 0 "
+ (relative-file-name "/.singularity.d/actions"
+ (string-append #$profile "/"
+ entry-point)))
+
+ ;; This one is for Singularity 3.x.
+ "-p"
+ ,(string-append
+ "/.singularity.d/runscript s 777 0 0 "
+ (relative-file-name "/.singularity.d"
+ (string-append #$profile "/"
+ entry-point))))
+ '())
+
+ ;; Create empty mount points.
+ "-p" "/proc d 555 0 0"
+ "-p" "/sys d 555 0 0"
+ "-p" "/dev d 555 0 0"
+ "-p" "/home d 555 0 0"))
+
+ (when database
+ ;; Initialize /var/guix.
+ (install-database-and-gc-roots "var-etc" database #$profile)
+ (mksquashfs `("var-etc" ,#$output)))))))
(gexp->derivation (string-append name
(compressor-extension compressor)
diff --git a/guix/store/deduplication.scm b/guix/store/deduplication.scm
index b4d37d4525..8564f12107 100644
--- a/guix/store/deduplication.scm
+++ b/guix/store/deduplication.scm
@@ -34,7 +34,8 @@
#:use-module (guix serialization)
#:export (nar-sha256
deduplicate
- dump-file/deduplicate))
+ dump-file/deduplicate
+ copy-file/deduplicate))
;; XXX: This port is used as a workaround on Guile <= 2.2.4 where
;; 'port-position' throws to 'out-of-range' when the offset is great than or
@@ -256,3 +257,16 @@ down the road."
(get-hash)))))
(deduplicate file hash #:store store))
+
+(define* (copy-file/deduplicate source target
+ #:key (store (%store-directory)))
+ "Like 'copy-file', but additionally deduplicate TARGET in STORE."
+ (call-with-input-file source
+ (lambda (input)
+ (let ((stat (stat input)))
+ (dump-file/deduplicate target input (stat:size stat)
+ (if (zero? (logand (stat:mode stat)
+ #o100))
+ 'regular
+ 'executable)
+ #:store store)))))
diff --git a/tests/gexp.scm b/tests/gexp.scm
index a0e55178fa..6e92f0e4b3 100644
--- a/tests/gexp.scm
+++ b/tests/gexp.scm
@@ -736,7 +736,8 @@
(zero? (logand #o222 (stat:mode st)))))))
(mkdir #$output)
- (populate-store '("graph") #$output)
+ (populate-store '("graph") #$output
+ #:deduplicate? #f)
;; Check whether 'populate-store' canonicalizes
;; permissions and timestamps.
diff --git a/tests/store-deduplication.scm b/tests/store-deduplication.scm
index e2870a363d..7b01acae24 100644
--- a/tests/store-deduplication.scm
+++ b/tests/store-deduplication.scm
@@ -1,5 +1,5 @@
;;; GNU Guix --- Functional package management for GNU
-;;; Copyright © 2018 Ludovic Courtès <ludo@gnu.org>
+;;; Copyright © 2018, 2020 Ludovic Courtès <ludo@gnu.org>
;;;
;;; This file is part of GNU Guix.
;;;
@@ -25,6 +25,7 @@
#:use-module (rnrs bytevectors)
#:use-module (ice-9 binary-ports)
#:use-module (srfi srfi-1)
+ #:use-module (srfi srfi-26)
#:use-module (srfi srfi-64))
(test-begin "store-deduplication")
@@ -106,4 +107,19 @@
(cons (apply = (map (compose stat:ino stat) identical))
(map (compose stat:nlink stat) identical))))))
+(test-assert "copy-file/deduplicate"
+ (call-with-temporary-directory
+ (lambda (store)
+ (let ((source (search-path %load-path "gnu/packages/emacs-xyz.scm")))
+ (for-each (lambda (target)
+ (copy-file/deduplicate source
+ (string-append store target)
+ #:store store))
+ '("/a" "/b" "/c"))
+ (and (directory-exists? (string-append store "/.links"))
+ (file=? source (string-append store "/a"))
+ (apply = (map (compose stat:ino stat
+ (cut string-append store <>))
+ '("/a" "/b" "/c"))))))))
+
(test-end "store-deduplication")