1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
|
{ stdenv, lib, fetchurl, fetchFromGitHub, fixDarwinDylibNames
, autoconf, boost, brotli, cmake, flatbuffers, gflags, glog, gtest, lz4
, perl, python3, rapidjson, re2, snappy, thrift, utf8proc, which, xsimd
, zlib, zstd
, enableShared ? !stdenv.hostPlatform.isStatic
}:
let
arrow-testing = fetchFromGitHub {
owner = "apache";
repo = "arrow-testing";
rev = "d6c4deb22c4b4e9e3247a2f291046e3c671ad235";
sha256 = "0cwhnqijam632zp07j98i8ym967wz6kd35fim1msv88x2rhqky1i";
};
parquet-testing = fetchFromGitHub {
owner = "apache";
repo = "parquet-testing";
rev = "ddd898958803cb89b7156c6350584d1cda0fe8de";
sha256 = "0n16xqlpxn2ryp43w8pppxrbwmllx6sk4hv3ycgikfj57nd3ibc0";
};
in stdenv.mkDerivation rec {
pname = "arrow-cpp";
version = "4.0.1";
src = fetchurl {
url =
"mirror://apache/arrow/arrow-${version}/apache-arrow-${version}.tar.gz";
sha256 = "0vl926i6jvsvj5vigdgqzp9v1i1h5zzj1abqr6qwc9drfsibzk3m";
};
sourceRoot = "apache-arrow-${version}/cpp";
ARROW_JEMALLOC_URL = fetchurl {
# From
# ./cpp/cmake_modules/ThirdpartyToolchain.cmake
# ./cpp/thirdparty/versions.txt
url =
"https://github.com/jemalloc/jemalloc/releases/download/5.2.1/jemalloc-5.2.1.tar.bz2";
sha256 = "1xl7z0vwbn5iycg7amka9jd6hxd8nmfk7nahi4p9w2bnw9f0wcrl";
};
ARROW_MIMALLOC_URL = fetchurl {
# From
# ./cpp/cmake_modules/ThirdpartyToolchain.cmake
# ./cpp/thirdparty/versions.txt
url =
"https://github.com/microsoft/mimalloc/archive/v1.6.4.tar.gz";
sha256 = "1b8av0974q70alcmaw5cwzbn6n9blnpmj721ik1qwmbbwwd6nqgs";
};
patches = [
# patch to fix python-test
./darwin.patch
];
nativeBuildInputs = [
cmake
autoconf # for vendored jemalloc
flatbuffers
] ++ lib.optional stdenv.isDarwin fixDarwinDylibNames;
buildInputs = [
boost
brotli
flatbuffers
gflags
glog
gtest
lz4
rapidjson
re2
snappy
thrift
utf8proc
zlib
zstd
] ++ lib.optionals enableShared [
python3.pkgs.python
python3.pkgs.numpy
];
preConfigure = ''
patchShebangs build-support/
'';
cmakeFlags = [
"-DCMAKE_FIND_PACKAGE_PREFER_CONFIG=ON"
"-DARROW_BUILD_SHARED=${if enableShared then "ON" else "OFF"}"
"-DARROW_BUILD_STATIC=${if enableShared then "OFF" else "ON"}"
"-DARROW_BUILD_TESTS=ON"
"-DARROW_VERBOSE_THIRDPARTY_BUILD=ON"
"-DARROW_DEPENDENCY_SOURCE=SYSTEM"
"-DARROW_DEPENDENCY_USE_SHARED=${if enableShared then "ON" else "OFF"}"
"-DARROW_COMPUTE=ON"
"-DARROW_CSV=ON"
"-DARROW_DATASET=ON"
"-DARROW_JSON=ON"
"-DARROW_PLASMA=ON"
# Disable Python for static mode because openblas is currently broken there.
"-DARROW_PYTHON=${if enableShared then "ON" else "OFF"}"
"-DARROW_USE_GLOG=ON"
"-DARROW_WITH_BROTLI=ON"
"-DARROW_WITH_LZ4=ON"
"-DARROW_WITH_SNAPPY=ON"
"-DARROW_WITH_UTF8PROC=ON"
"-DARROW_WITH_ZLIB=ON"
"-DARROW_WITH_ZSTD=ON"
"-DARROW_MIMALLOC=ON"
# Parquet options:
"-DARROW_PARQUET=ON"
"-DPARQUET_BUILD_EXECUTABLES=ON"
] ++ lib.optionals (!enableShared) [
"-DARROW_TEST_LINKAGE=static"
] ++ lib.optionals stdenv.isDarwin [
"-DCMAKE_SKIP_BUILD_RPATH=OFF" # needed for tests
"-DCMAKE_INSTALL_RPATH=@loader_path/../lib" # needed for tools executables
] ++ lib.optional (!stdenv.isx86_64) "-DARROW_USE_SIMD=OFF";
ARROW_XSIMD_URL = xsimd.src;
doInstallCheck = true;
ARROW_TEST_DATA =
if doInstallCheck then "${arrow-testing}/data" else null;
PARQUET_TEST_DATA =
if doInstallCheck then "${parquet-testing}/data" else null;
GTEST_FILTER =
if doInstallCheck then let
# Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11398
filteredTests = lib.optionals stdenv.hostPlatform.isAarch64 [
"TestFilterKernelWithNumeric/3.CompareArrayAndFilterRandomNumeric"
"TestFilterKernelWithNumeric/7.CompareArrayAndFilterRandomNumeric"
"TestCompareKernel.PrimitiveRandomTests"
];
in "-${builtins.concatStringsSep ":" filteredTests}" else null;
installCheckInputs = [ perl which ];
installCheckPhase =
let
excludedTests = lib.optionals stdenv.isDarwin [
# Some plasma tests need to be patched to use a shorter AF_UNIX socket
# path on Darwin. See https://github.com/NixOS/nix/pull/1085
"plasma-external-store-tests"
"plasma-client-tests"
];
in ''
ctest -L unittest -V \
--exclude-regex '^(${builtins.concatStringsSep "|" excludedTests})$'
'';
meta = with lib; {
description = "A cross-language development platform for in-memory data";
homepage = "https://arrow.apache.org/";
license = licenses.asl20;
platforms = platforms.unix;
maintainers = with maintainers; [ tobim veprbl ];
};
}
|