From 472a0e82a52a3d5d841e1dfad6b13e26082a5750 Mon Sep 17 00:00:00 2001 From: Ludovic Courtès Date: Sat, 13 Nov 2021 21:47:15 +0100 Subject: daemon: Do not deduplicate files smaller than 8 KiB. Files smaller than 8 KiB typically represent ~70% of the entries in /gnu/store/.links but only contribute to ~4% of the space savings afforded by deduplication. Not considering these files for deduplication speeds up file insertion in the store and, more importantly, leaves 'removeUnusedLinks' with fewer entries to traverse, thereby speeding it up proportionally. Partly fixes . * config-daemon.ac: Remove symlink hard link check and CAN_LINK_SYMLINK definition. * guix/store/deduplication.scm (%deduplication-minimum-size): New variable. (deduplicate)[loop]: Do not recurse when FILE's size is below %DEDUPLICATION-MINIMUM-SIZE. (dump-port): New procedure. (dump-file/deduplicate)[hash]: Turn into... [dump-and-compute-hash]: ... this thunk. Call 'deduplicate' only when SIZE is greater than %DEDUPLICATION-MINIMUM-SIZE; otherwise call 'dump-port'. * nix/libstore/gc.cc (LocalStore::removeUnusedLinks): Drop files where st.st_size < deduplicationMinSize. * nix/libstore/local-store.hh (deduplicationMinSize): New declaration. * nix/libstore/optimise-store.cc (deduplicationMinSize): New variable. (LocalStore::optimisePath_): Return when PATH is a symlink or smaller than 'deduplicationMinSize'. * tests/derivations.scm ("identical files are deduplicated"): Produce files bigger than %DEDUPLICATION-MINIMUM-SIZE. * tests/nar.scm ("restore-file-set with directories (signed, valid)"): Likewise. * tests/store-deduplication.scm ("deduplicate, below %deduplication-minimum-size"): New test. ("deduplicate", "deduplicate, ENOSPC"): Produce files bigger than %DEDUPLICATION-MINIMUM-SIZE. * tests/store.scm ("substitute, deduplication"): Likewise. --- tests/derivations.scm | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'tests/derivations.scm') diff --git a/tests/derivations.scm b/tests/derivations.scm index cd165d1be6..0775719ea3 100644 --- a/tests/derivations.scm +++ b/tests/derivations.scm @@ -1,5 +1,5 @@ ;;; GNU Guix --- Functional package management for GNU -;;; Copyright © 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020 Ludovic Courtès +;;; Copyright © 2012-2021 Ludovic Courtès ;;; ;;; This file is part of GNU Guix. ;;; @@ -170,11 +170,15 @@ (define prefix-len (string-length dir)) #f)))) (test-assert "identical files are deduplicated" - (let* ((build1 (add-text-to-store %store "one.sh" - "echo hello, world > \"$out\"\n" + ;; Note: DATA must be longer than %DEDUPLICATION-MINIMUM-SIZE. + (let* ((data (make-string 9000 #\a)) + (build1 (add-text-to-store %store "one.sh" + (string-append "echo -n " data + " > \"$out\"\n") '())) (build2 (add-text-to-store %store "two.sh" - "# Hey!\necho hello, world > \"$out\"\n" + (string-append "# Hey!\necho -n " + data " > \"$out\"\n") '())) (drv1 (derivation %store "foo" %bash `(,build1) @@ -187,7 +191,7 @@ (define prefix-len (string-length dir)) (file2 (derivation->output-path drv2))) (and (valid-path? %store file1) (valid-path? %store file2) (string=? (call-with-input-file file1 get-string-all) - "hello, world\n") + data) (= (stat:ino (lstat file1)) (stat:ino (lstat file2)))))))) -- cgit v1.2.3