about summary refs log tree commit diff
path: root/lib/fileset
diff options
context:
space:
mode:
authorSilvan Mosberger <silvan.mosberger@tweag.io>2023-09-14 23:34:21 +0200
committerSilvan Mosberger <silvan.mosberger@tweag.io>2023-09-21 00:21:02 +0200
commitfe6c1539cc27c52d1f4cffd28c1479e973689766 (patch)
tree15beb2f6e4a8a91215620e87c4ecc9c43f96cd1e /lib/fileset
parent45bf2c7617afd7ac794fb56519301e1ee3324c08 (diff)
lib.fileset: Internal representation v2, ~12x faster unions!
    $ ./benchmark.sh HEAD
    [...]
    Mean CPU time 0.04006 (σ = 0.0040146) for 10 runs is 8.193619775953792% (σ = 0.9584251052704821%) of the old value 0.488917 (σ = 0.0294955)
    [...]
Diffstat (limited to 'lib/fileset')
-rw-r--r--lib/fileset/README.md6
-rw-r--r--lib/fileset/internal.nix62
-rwxr-xr-xlib/fileset/tests.sh12
3 files changed, 32 insertions, 48 deletions
diff --git a/lib/fileset/README.md b/lib/fileset/README.md
index a75efc496c4ed..c50f7936aa771 100644
--- a/lib/fileset/README.md
+++ b/lib/fileset/README.md
@@ -41,7 +41,7 @@ An attribute set with these values:
 - `_type` (constant string `"fileset"`):
   Tag to indicate this value is a file set.
 
-- `_internalVersion` (constant `1`, the current version):
+- `_internalVersion` (constant `2`, the current version):
   Version of the representation.
 
 - `_internalBase` (path):
@@ -67,8 +67,8 @@ An attribute set with these values:
 One of the following:
 
 - `{ <name> = filesetTree; }`:
-  A directory with a nested `filesetTree` value for every directory entry.
-  Even entries that aren't included are present as `null` because it improves laziness and allows using this as a sort of `builtins.readDir` cache.
+  A directory with a nested `filesetTree` value for directory entries.
+  Entries not included may either be omitted or set to `null`, as necessary to improve efficiency or laziness.
 
 - `"directory"`:
   A directory with all its files included recursively, allowing early cutoff for some operations.
diff --git a/lib/fileset/internal.nix b/lib/fileset/internal.nix
index 43af0acc73e2b..3462b510b367f 100644
--- a/lib/fileset/internal.nix
+++ b/lib/fileset/internal.nix
@@ -14,6 +14,7 @@ let
   inherit (lib.attrsets)
     attrValues
     mapAttrs
+    setAttrByPath
     zipAttrsWith
     ;
 
@@ -63,7 +64,7 @@ rec {
   # - Increment this version
   # - Add an additional migration function below
   # - Update the description of the internal representation in ./README.md
-  _currentVersion = 1;
+  _currentVersion = 2;
 
   # Migrations between versions. The 0th element converts from v0 to v1, and so on
   migrations = [
@@ -79,6 +80,15 @@ rec {
         _internalBaseComponents = components parts.subpath;
       }
     )
+
+    # Convert v1 into v2: filesetTree's can now also omit attributes to signal paths not being included
+    (
+      filesetV1:
+      # This change is backwards compatible (but not forwards compatible, so we still need a new version)
+      filesetV1 // {
+        _internalVersion = 2;
+      }
+    )
   ];
 
   # Create a fileset, see ./README.md#fileset
@@ -174,50 +184,23 @@ rec {
       # - _internalBase: ./.
       # - _internalTree: {
       #     "default.nix" = <type>;
-      #     # Other directory entries
-      #     <name> = null;
       #   }
       # See ./README.md#single-files
       _create (dirOf path)
-        (_nestTree
-          (dirOf path)
-          [ (baseNameOf path) ]
-          type
-        );
-
-  /*
-    Nest a filesetTree under some extra components, while filling out all the other directory entries that aren't included with null
-
-    _nestTree ./. [ "foo" "bar" ] tree == {
-      foo = {
-        bar = tree;
-        <other-entries> = null;
-      }
-      <other-entries> = null;
-    }
-
-    Type: Path -> [ String ] -> filesetTree -> filesetTree
-  */
-  _nestTree = targetBase: extraComponents: tree:
-    let
-      recurse = index: focusPath:
-        if index == length extraComponents then
-          tree
-        else
-          mapAttrs (_: _: null) (readDir focusPath)
-          // {
-            ${elemAt extraComponents index} = recurse (index + 1) (append focusPath (elemAt extraComponents index));
-          };
-    in
-    recurse 0 targetBase;
+        {
+          ${baseNameOf path} = type;
+        };
 
-  # Expand "directory" filesetTree representation to the equivalent { <name> = filesetTree; }
+  # Expand a directory representation to an equivalent one in attribute set form.
+  # All directory entries are included in the result.
   # Type: Path -> filesetTree -> { <name> = filesetTree; }
   _directoryEntries = path: value:
-    if isAttrs value then
-      value
+    if value == "directory" then
+      readDir path
     else
-      readDir path;
+      # Set all entries not present to null
+      mapAttrs (name: value: null) (readDir path)
+      // value;
 
   /*
     Simplify a filesetTree recursively:
@@ -368,8 +351,7 @@ rec {
       # while the tree under `/foo/baz` gets nested under `{ baz = ...; ... }`
       # Therefore allowing combined operations over them.
       trees = map (fileset:
-        _nestTree
-          commonBase
+        setAttrByPath
           (drop commonBaseComponentsCount fileset._internalBaseComponents)
           fileset._internalTree
         ) filesets;
diff --git a/lib/fileset/tests.sh b/lib/fileset/tests.sh
index ce936a9b02261..7d38aaa7f41df 100755
--- a/lib/fileset/tests.sh
+++ b/lib/fileset/tests.sh
@@ -285,19 +285,21 @@ expectFailure 'union ./. ./.' 'lib.fileset: Directly evaluating a file set is no
 
 # Past versions of the internal representation are supported
 expectEqual '_coerce "<tests>: value" { _type = "fileset"; _internalVersion = 0; _internalBase = ./.; }' \
-    '{ _internalBase = ./.; _internalBaseComponents = path.subpath.components (path.splitRoot ./.).subpath; _internalBaseRoot = /.; _internalVersion = 1; _type = "fileset"; }'
+    '{ _internalBase = ./.; _internalBaseComponents = path.subpath.components (path.splitRoot ./.).subpath; _internalBaseRoot = /.; _internalVersion = 2; _type = "fileset"; }'
+expectEqual '_coerce "<tests>: value" { _type = "fileset"; _internalVersion = 1; }' \
+    '{ _type = "fileset"; _internalVersion = 2; }'
 
 # Future versions of the internal representation are unsupported
-expectFailure '_coerce "<tests>: value" { _type = "fileset"; _internalVersion = 2; }' '<tests>: value is a file set created from a future version of the file set library with a different internal representation:
-\s*- Internal version of the file set: 2
-\s*- Internal version of the library: 1
+expectFailure '_coerce "<tests>: value" { _type = "fileset"; _internalVersion = 3; }' '<tests>: value is a file set created from a future version of the file set library with a different internal representation:
+\s*- Internal version of the file set: 3
+\s*- Internal version of the library: 2
 \s*Make sure to update your Nixpkgs to have a newer version of `lib.fileset`.'
 
 # _create followed by _coerce should give the inputs back without any validation
 expectEqual '{
   inherit (_coerce "<test>" (_create ./. "directory"))
     _internalVersion _internalBase _internalTree;
-}' '{ _internalBase = ./.; _internalTree = "directory"; _internalVersion = 1; }'
+}' '{ _internalBase = ./.; _internalTree = "directory"; _internalVersion = 2; }'
 
 #### Resulting store path ####