about summary refs log tree commit diff
path: root/pkgs/development
diff options
context:
space:
mode:
authorNoah D. Brenowitz <nbren12@gmail.com>2021-04-23 00:50:38 -0700
committerNoah D. Brenowitz <nbren12@gmail.com>2021-04-23 17:57:01 -0700
commitceeaf2d0669cc5a4d86e6313cc0fdb6774659884 (patch)
treef2f84390c74ea70d4a610f0bcfaa833af360e74a /pkgs/development
parentc21475e7e8aeaa38b13af380c64da59690056086 (diff)
python3Packages.dask: fix sandboxed builds
Importing dask.dataframe in a sandboxed build results in a TypeError like
this:

  File "/nix/store/nv60iri29bia4szhhcvsdxgsci4wxvp6-python3.8-dask-2021.03.0/lib/python3.8/site-packages/dask/dataframe/io/csv.py", line 392, in <module>
    AUTO_BLOCKSIZE = auto_blocksize(TOTAL_MEM, CPU_COUNT)
  File "/nix/store/nv60iri29bia4szhhcvsdxgsci4wxvp6-python3.8-dask-2021.03.0/lib/python3.8/site-packages/dask/dataframe/io/csv.py", line 382, in auto_blocksize
    blocksize = int(total_memory // cpu_count / memory_factor)
  TypeError: unsupported operand type(s) for //: 'int' and 'NoneType'

This occurs because dask.dataframe has a non-deterministic component which
generates an automatic chunk-size based on system information.

This went unnoticed because the dask tests were disabled.

Changes:
- add a patch making the chunk-size inference more robust
- re-enable the tests

Resolves #120307
Diffstat (limited to 'pkgs/development')
-rw-r--r--pkgs/development/python-modules/dask/default.nix18
1 files changed, 17 insertions, 1 deletions
diff --git a/pkgs/development/python-modules/dask/default.nix b/pkgs/development/python-modules/dask/default.nix
index 8f3e4d08583b2..13c03a1f7968e 100644
--- a/pkgs/development/python-modules/dask/default.nix
+++ b/pkgs/development/python-modules/dask/default.nix
@@ -1,6 +1,7 @@
 { lib
 , bokeh
 , buildPythonPackage
+, fetchpatch
 , fetchFromGitHub
 , fsspec
 , pytestCheckHook
@@ -42,7 +43,7 @@ buildPythonPackage rec {
     distributed
   ];
 
-  doCheck = false;
+  doCheck = true;
 
   checkInputs = [
     pytestCheckHook
@@ -52,6 +53,16 @@ buildPythonPackage rec {
 
   dontUseSetuptoolsCheck = true;
 
+  patches = [
+    # dask dataframe cannot be imported in sandboxed builds
+    # See https://github.com/dask/dask/pull/7601
+    (fetchpatch {
+      url = "https://github.com/dask/dask/commit/9ce5b0d258cecb3ef38fd844135ad1f7ac3cea5f.patch";
+      sha256 = "sha256-1EVRYwAdTSEEH9jp+UOnrijzezZN3iYR6q6ieYJM3kY=";
+      name = "fix-dask-dataframe-imports-in-sandbox.patch";
+    })
+  ];
+
   postPatch = ''
     # versioneer hack to set version of github package
     echo "def get_versions(): return {'dirty': False, 'error': None, 'full-revisionid': None, 'version': '${version}'}" > dask/_version.py
@@ -66,8 +77,13 @@ buildPythonPackage rec {
   disabledTests = [
     "test_annotation_pack_unpack"
     "test_annotations_blockwise_unpack"
+    # this test requires features of python3Packages.psutil that are
+    # blocked in sandboxed-builds
+    "test_auto_blocksize_csv"
   ];
 
+  pythonImportsCheck = [ "dask.dataframe" "dask" "dask.array" ];
+
   meta = with lib; {
     description = "Minimal task scheduling abstraction";
     homepage = "https://dask.org/";