about summary refs log tree commit diff
path: root/pkgs/development/python-modules/datafusion/default.nix
blob: b4d5292a8d8a9bfeba84ede4e3aa93529c46d64c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
{ lib
, stdenv
, buildPythonPackage
, fetchPypi
, fetchFromGitHub
, rustPlatform
, pytestCheckHook
, libiconv
, numpy
, protobuf
, pyarrow
, Security
}:

let
  arrow-testing = fetchFromGitHub {
    name = "arrow-testing";
    owner = "apache";
    repo = "arrow-testing";
    rev = "47f7b56b25683202c1fd957668e13f2abafc0f12";
    hash = "sha256-ZDznR+yi0hm5O1s9as8zq5nh1QxJ8kXCRwbNQlzXpnI=";
  };

  parquet-testing = fetchFromGitHub {
    name = "parquet-testing";
    owner = "apache";
    repo = "parquet-testing";
    rev = "b2e7cc755159196e3a068c8594f7acbaecfdaaac";
    hash = "sha256-IFvGTOkaRSNgZOj8DziRj88yH5JRF+wgSDZ5N0GNvjk=";
  };
in

buildPythonPackage rec {
  pname = "datafusion";
  version = "22.0.0";
  format = "pyproject";

  src = fetchFromGitHub {
    name = "datafusion-source";
    owner = "apache";
    repo = "arrow-datafusion-python";
    rev = "22.0.0";
    hash = "sha256-EKurQ4h5IOTU3JiGN+MHrDciQUadUrywNRhnv9S/9iY=";
  };

  cargoDeps = rustPlatform.fetchCargoTarball {
    name = "datafusion-cargo-deps";
    inherit src pname version;
    hash = "sha256-0kfavTFqsQ1Uvg5nQw6VFGlvih8ysOyS2KGT4cTIsVI=";
  };

  nativeBuildInputs = with rustPlatform; [
    cargoSetupHook
    maturinBuildHook
  ];

  buildInputs = [ protobuf ] ++ lib.optionals stdenv.isDarwin [ libiconv Security ];

  propagatedBuildInputs = [ pyarrow ];

  nativeCheckInputs = [ pytestCheckHook numpy ];
  pythonImportsCheck = [ "datafusion" ];
  pytestFlagsArray = [ "--pyargs" pname ];

  preCheck = ''
    pushd $TMPDIR
    ln -s ${arrow-testing} ./testing
    ln -s ${parquet-testing} ./parquet
  '';

  postCheck = ''
    popd
  '';

  meta = with lib; {
    description = "Extensible query execution framework";
    longDescription = ''
      DataFusion is an extensible query execution framework, written in Rust,
      that uses Apache Arrow as its in-memory format.
    '';
    homepage = "https://arrow.apache.org/datafusion/";
    license = with licenses; [ asl20 ];
    maintainers = with maintainers; [ cpcloud ];
  };
}