about summary refs log tree commit diff
path: root/pkgs/development/python-modules/shap/default.nix
blob: 14d7ca02388458d22a4411d8060b2551d594a7d4 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
{
  lib,
  buildPythonPackage,
  fetchFromGitHub,
  pytestCheckHook,
  pythonOlder,
  writeText,
  catboost,
  cloudpickle,
  ipython,
  lightgbm,
  lime,
  matplotlib,
  numba,
  numpy,
  opencv4,
  pandas,
  pyspark,
  pytest-mpl,
  scikit-learn,
  scipy,
  sentencepiece,
  setuptools,
  setuptools-scm,
  slicer,
  tqdm,
  transformers,
  xgboost,
}:

buildPythonPackage rec {
  pname = "shap";
  version = "0.46.0";
  pyproject = true;

  disabled = pythonOlder "3.8";

  src = fetchFromGitHub {
    owner = "slundberg";
    repo = "shap";
    rev = "refs/tags/v${version}";
    hash = "sha256-qW36/Xw5oaYKmaMfE5euzkED9CKkjl2O55aO0OpCkfI=";
  };

  postPatch = ''
    substituteInPlace pyproject.toml \
      --replace-fail "numpy>=2.0" "numpy"
  '';

  build-system = [
    numpy
    setuptools
    setuptools-scm
  ];

  dependencies = [
    cloudpickle
    numba
    numpy
    pandas
    scikit-learn
    scipy
    slicer
    tqdm
  ];

  optional-dependencies = {
    plots = [
      matplotlib
      ipython
    ];
    others = [ lime ];
  };

  preCheck =
    let
      # This pytest hook mocks and catches attempts at accessing the network
      # tests that try to access the network will raise, get caught, be marked as skipped and tagged as xfailed.
      conftestSkipNetworkErrors = writeText "conftest.py" ''
        from _pytest.runner import pytest_runtest_makereport as orig_pytest_runtest_makereport
        import urllib, requests, transformers

        class NetworkAccessDeniedError(RuntimeError): pass
        def deny_network_access(*a, **kw):
          raise NetworkAccessDeniedError

        requests.head = deny_network_access
        requests.get  = deny_network_access
        urllib.request.urlopen = deny_network_access
        urllib.request.Request = deny_network_access
        transformers.AutoTokenizer.from_pretrained = deny_network_access

        def pytest_runtest_makereport(item, call):
          tr = orig_pytest_runtest_makereport(item, call)
          if call.excinfo is not None and call.excinfo.type is NetworkAccessDeniedError:
              tr.outcome = 'skipped'
              tr.wasxfail = "reason: Requires network access."
          return tr
      '';
    in
    ''
      export HOME=$TMPDIR
      # when importing the local copy the extension is not found
      rm -r shap

      # Add pytest hook skipping tests that access network.
      # These tests are marked as "Expected fail" (xfail)
      cat ${conftestSkipNetworkErrors} >> tests/conftest.py
    '';

  nativeCheckInputs = [
    ipython
    matplotlib
    pytest-mpl
    pytestCheckHook
    # optional dependencies, which only serve to enable more tests:
    catboost
    lightgbm
    opencv4
    pyspark
    sentencepiece
    #torch # we already skip all its tests due to slowness, adding it does nothing
    transformers
    xgboost
  ];

  # Test startup hangs with 0.43.0 and Hydra ends with a timeout
  doCheck = false;

  disabledTestPaths = [
    # The resulting plots look sane, but does not match pixel-perfectly with the baseline.
    # Likely due to a matplotlib version mismatch, different backend, or due to missing fonts.
    "tests/plots/test_summary.py" # FIXME: enable
  ];

  disabledTests = [
    # The same reason as above test_summary.py
    "test_random_force_plot_negative_sign"
    "test_random_force_plot_positive_sign"
    "test_random_summary_layered_violin_with_data2"
    "test_random_summary_violin_with_data2"
    "test_simple_bar_with_cohorts_dict"
  ];

  pythonImportsCheck = [ "shap" ];

  meta = with lib; {
    description = "Unified approach to explain the output of any machine learning model";
    homepage = "https://github.com/slundberg/shap";
    changelog = "https://github.com/slundberg/shap/releases/tag/v${version}";
    license = licenses.mit;
    maintainers = with maintainers; [
      evax
      natsukium
    ];
  };
}