diff options
author | Silvan Mosberger <silvan.mosberger@tweag.io> | 2023-09-14 21:41:03 +0200 |
---|---|---|
committer | Silvan Mosberger <silvan.mosberger@tweag.io> | 2023-09-21 00:21:01 +0200 |
commit | 7ab764e575135586cb8ac496702663a40a2bfa56 (patch) | |
tree | 9cd36f6abb8f19314b4a7d32d6697cbd952986e2 /lib/fileset | |
parent | e04e40d05e5d959e30188d62c76e3dfb5cb26b16 (diff) |
lib.fileset.unions: Don't stack overflow for many files
Diffstat (limited to 'lib/fileset')
-rw-r--r-- | lib/fileset/internal.nix | 65 | ||||
-rwxr-xr-x | lib/fileset/tests.sh | 13 |
2 files changed, 39 insertions, 39 deletions
diff --git a/lib/fileset/internal.nix b/lib/fileset/internal.nix index 19db7adcff4b0..43af0acc73e2b 100644 --- a/lib/fileset/internal.nix +++ b/lib/fileset/internal.nix @@ -14,6 +14,7 @@ let inherit (lib.attrsets) attrValues mapAttrs + zipAttrsWith ; inherit (lib.filesystem) @@ -25,6 +26,7 @@ let commonPrefix drop elemAt + filter findFirstIndex foldl' head @@ -342,7 +344,9 @@ rec { # Base paths can be unioned by taking their common prefix, # e.g. such that `union /foo/bar /foo/baz` has the base path `/foo` - # A list of path components common to all base paths + # A list of path components common to all base paths. + # Note that commonPrefix can only be fully evaluated, + # so this cannot cause a stack overflow due to a build-up of unevaluated thunks. commonBaseComponents = foldl' (components: el: commonPrefix components el._internalBaseComponents) first._internalBaseComponents @@ -371,46 +375,29 @@ rec { ) filesets; # Folds all trees together into a single one using _unionTree - resultTree = foldl' - _unionTree - (head trees) - # We could also not do the `tail` here to avoid a list allocation, - # but then we'd have to pay for a potentially expensive - # but unnecessary `_unionTree (head trees) (head trees)` call. - (tail trees); + # We do not use a fold here because it would cause a thunk build-up + # which could cause a stack overflow for a large number of trees + resultTree = _unionTrees trees; in _create commonBase resultTree; - - # Legend for branch tables in the below tree combinator functions - # - lhs\rhs : The values for the left hand side (columns) and right hand side (rows) arguments - # - null : Value `null`, a file/directory that's not included - # - attrs : Satisfies `isAttrs value`, an explicitly listed directory containing nested trees - # - str : Satisfies `isString value`, either "directory" or a file type, a fully included file/directory - # - rec : A result computed by recursing - # - <number> : Indicates that the result is computed by the branch with that number - # - * : Only the lhs/rhs needs to be evaluated, the result is always the same no matter the other side - - # The union of two filesetTree's with the same base path. - # The second argument is only evaluated if necessary. - # Type: filesetTree -> filesetTree -> filesetTree - _unionTree = lhs: rhs: - # This branch table shows the correctness of the branch conditions, - # see the legend above for more details - # - # lhs\rhs | null | attrs | str | - # ------- | ------- | ------- | ----- | - # null | 1 null | 3 attrs | 3 str | - # attrs | 1 attrs | 2 rec | 3 str | - # * str | 1 str | 1 str | 1 str | - - if isString lhs || rhs == null then - # Branch 1 - lhs - else if isAttrs lhs && isAttrs rhs then - # Branch 2 - mapAttrs (name: _unionTree lhs.${name}) rhs + # The union of multiple filesetTree's with the same base path. + # Later elements are only evaluated if necessary. + # Type: [ filesetTree ] -> filesetTree + _unionTrees = trees: + let + stringIndex = findFirstIndex isString null trees; + withoutNull = filter (tree: tree != null) trees; + in + if stringIndex != null then + # If there's a string, it's always a fully included tree (dir or file), + # no need to look at other elements + elemAt trees stringIndex + else if withoutNull == [ ] then + # If all trees are null, then the resulting tree is also null + null else - # Branch 3 - rhs; + # The non-null elements have to be attribute sets representing partial trees + # We need to recurse into those + zipAttrsWith (name: _unionTrees) withoutNull; } diff --git a/lib/fileset/tests.sh b/lib/fileset/tests.sh index cee93615d56f8..1a8f1372ebfa0 100755 --- a/lib/fileset/tests.sh +++ b/lib/fileset/tests.sh @@ -457,6 +457,19 @@ checkFileset 'unions [ ./x/a ./x/b ./y/a ./z/b ]' checkFileset 'union (union ./x/a ./x/b) (union ./y/a ./z/b)' checkFileset 'union (union (union ./x/a ./x/b) ./y/a) ./z/b' +# unions should not stack overflow, even if many elements are passed +tree=() +for i in $(seq 1000); do + tree[$i/a]=1 + tree[$i/b]=0 +done +( + # Locally limit the maximum stack size to 100 * 1024 bytes + # If unions was implemented recursively, this would stack overflow + ulimit -s 100 + checkFileset 'unions (mapAttrsToList (name: _: ./. + "/${name}/a") (builtins.readDir ./.))' +) + # TODO: Once we have combinators and a property testing library, derive property tests from https://en.wikipedia.org/wiki/Algebra_of_sets echo >&2 tests ok |