From 0341138ad72a1900ee347c6804da9212be443e08 Mon Sep 17 00:00:00 2001 From: Alexander Tsvyashchenko Date: Thu, 6 Jan 2022 21:55:12 +0100 Subject: python3Packages.apache-beam: init at 2.35.0 --- .../python-modules/apache-beam/default.nix | 145 +++++++++++++++++++++ .../python-modules/apache-beam/fix-cython.patch | 41 ++++++ .../python-modules/apache-beam/relax-deps.patch | 20 +++ 3 files changed, 206 insertions(+) create mode 100644 pkgs/development/python-modules/apache-beam/default.nix create mode 100644 pkgs/development/python-modules/apache-beam/fix-cython.patch create mode 100644 pkgs/development/python-modules/apache-beam/relax-deps.patch (limited to 'pkgs/development/python-modules/apache-beam') diff --git a/pkgs/development/python-modules/apache-beam/default.nix b/pkgs/development/python-modules/apache-beam/default.nix new file mode 100644 index 0000000000000..44748f2fdb05f --- /dev/null +++ b/pkgs/development/python-modules/apache-beam/default.nix @@ -0,0 +1,145 @@ +{ buildPythonPackage +, cloudpickle +, crcmod +, cython +, dill +, fastavro +, fetchFromGitHub +, freezegun +, grpcio +, grpcio-tools +, hdfs +, httplib2 +, lib +, mock +, mypy-protobuf +, numpy +, oauth2client +, orjson +, pandas +, parameterized +, proto-plus +, protobuf +, psycopg2 +, pyarrow +, pydot +, pyhamcrest +, pymongo +, pytest-timeout +, pytest-xdist +, pytestCheckHook +, python +, python-dateutil +, pytz +, pyyaml +, requests +, requests-mock +, setuptools +, sqlalchemy +, tenacity +, typing-extensions +}: + +buildPythonPackage rec { + pname = "apache-beam"; + version = "2.35.0"; + + src = fetchFromGitHub { + owner = "apache"; + repo = "beam"; + rev = "v${version}"; + sha256 = "0qxkas33d8i6yj133plnadbfm74ak7arn7ldpziyiwdav3hj68sy"; + }; + + patches = [ + ./relax-deps.patch + # Fixes https://issues.apache.org/jira/browse/BEAM-9324 + ./fix-cython.patch + ]; + + sourceRoot = "source/sdks/python"; + + nativeBuildInputs = [ + cython + grpcio-tools + mypy-protobuf + ]; + + propagatedBuildInputs = [ + cloudpickle + crcmod + cython + dill + fastavro + grpcio + hdfs + httplib2 + numpy + oauth2client + orjson + proto-plus + protobuf + pyarrow + pydot + pymongo + python-dateutil + pytz + requests + setuptools + typing-extensions + ]; + + pythonImportsCheck = [ + "apache_beam" + ]; + + checkInputs = [ + freezegun + mock + pandas + parameterized + psycopg2 + pyhamcrest + pytest-timeout + pytest-xdist + pytestCheckHook + pyyaml + requests-mock + sqlalchemy + tenacity + ]; + + # Make sure we're running the tests for the actually installed + # package, so that cython's .so files are available. + preCheck = "cd $out/lib/${python.libPrefix}/site-packages"; + + disabledTestPaths = [ + # These tests depend on the availability of specific servers backends. + "apache_beam/runners/portability/flink_runner_test.py" + "apache_beam/runners/portability/samza_runner_test.py" + "apache_beam/runners/portability/spark_runner_test.py" + ]; + + disabledTests = [ + # The reasons of failures for these tests are unclear. + # They reproduce in Docker with Ubuntu 22.04 + # (= they're not `nixpkgs`-specific) but given the upstream uses + # quite elaborate testing infra with containers and multiple + # different runners - I don't expect them to help debugging these + # when running via our (= custom from their PoV) testing infra. + "testBuildListUnpack" + "testBuildTupleUnpack" + "testBuildTupleUnpackWithCall" + "test_convert_bare_types" + "test_incomparable_default" + "test_pardo_type_inference" + "test_with_main_session" + ]; + + meta = with lib; { + description = "Unified model for defining both batch and streaming data-parallel processing pipelines"; + homepage = "https://beam.apache.org/"; + license = licenses.asl20; + maintainers = with maintainers; [ ndl ]; + }; +} diff --git a/pkgs/development/python-modules/apache-beam/fix-cython.patch b/pkgs/development/python-modules/apache-beam/fix-cython.patch new file mode 100644 index 0000000000000..f73d75b4b84a4 --- /dev/null +++ b/pkgs/development/python-modules/apache-beam/fix-cython.patch @@ -0,0 +1,41 @@ +diff --git a/apache_beam/runners/worker/operations.py b/apache_beam/runners/worker/operations.py +index 3464c5750c..5921c72b90 100644 +--- a/apache_beam/runners/worker/operations.py ++++ b/apache_beam/runners/worker/operations.py +@@ -69,18 +69,6 @@ if TYPE_CHECKING: + from apache_beam.runners.worker.statesampler import StateSampler + from apache_beam.transforms.userstate import TimerSpec + +-# Allow some "pure mode" declarations. +-try: +- import cython +-except ImportError: +- +- class FakeCython(object): +- @staticmethod +- def cast(type, value): +- return value +- +- globals()['cython'] = FakeCython() +- + _globally_windowed_value = GlobalWindows.windowed_value(None) + _global_window_type = type(_globally_windowed_value.windows[0]) + +@@ -149,7 +137,7 @@ class ConsumerSet(Receiver): + # type: (WindowedValue) -> None + self.update_counters_start(windowed_value) + for consumer in self.consumers: +- cython.cast(Operation, consumer).process(windowed_value) ++ consumer.process(windowed_value) + self.update_counters_finish() + + def try_split(self, fraction_of_remainder): +@@ -345,7 +333,7 @@ class Operation(object): + + def output(self, windowed_value, output_index=0): + # type: (WindowedValue, int) -> None +- cython.cast(Receiver, self.receivers[output_index]).receive(windowed_value) ++ self.receivers[output_index].receive(windowed_value) + + def add_receiver(self, operation, output_index=0): + # type: (Operation, int) -> None diff --git a/pkgs/development/python-modules/apache-beam/relax-deps.patch b/pkgs/development/python-modules/apache-beam/relax-deps.patch new file mode 100644 index 0000000000000..67bd38f5e7cc6 --- /dev/null +++ b/pkgs/development/python-modules/apache-beam/relax-deps.patch @@ -0,0 +1,20 @@ +diff --git a/setup.py b/setup.py +index 9429459622..2727b3becb 100644 +--- a/setup.py ++++ b/setup.py +@@ -136,12 +136,12 @@ REQUIRED_PACKAGES = [ + # version of dill. It is best to use the same version of dill on client and + # server, therefore list of allowed versions is very narrow. + # See: https://github.com/uqfoundation/dill/issues/341. +- 'dill>=0.3.1.1,<0.3.2', ++ 'dill>=0.3.1.1', + 'fastavro>=0.21.4,<2', + 'grpcio>=1.29.0,<2', + 'hdfs>=2.1.0,<3.0.0', +- 'httplib2>=0.8,<0.20.0', +- 'numpy>=1.14.3,<1.21.0', ++ 'httplib2>=0.8', ++ 'numpy>=1.14.3', + 'pymongo>=3.8.0,<4.0.0', + 'oauth2client>=2.0.1,<5', + 'protobuf>=3.12.2,<4', -- cgit 1.4.1