Merge pull request #153780 from ndl/submit/apache-beam
python3Packages.apache-beam: init at 2.35.0
This commit is contained in:
commit
a142f0a004
145
pkgs/development/python-modules/apache-beam/default.nix
Normal file
145
pkgs/development/python-modules/apache-beam/default.nix
Normal file
@ -0,0 +1,145 @@
|
||||
{ buildPythonPackage
|
||||
, cloudpickle
|
||||
, crcmod
|
||||
, cython
|
||||
, dill
|
||||
, fastavro
|
||||
, fetchFromGitHub
|
||||
, freezegun
|
||||
, grpcio
|
||||
, grpcio-tools
|
||||
, hdfs
|
||||
, httplib2
|
||||
, lib
|
||||
, mock
|
||||
, mypy-protobuf
|
||||
, numpy
|
||||
, oauth2client
|
||||
, orjson
|
||||
, pandas
|
||||
, parameterized
|
||||
, proto-plus
|
||||
, protobuf
|
||||
, psycopg2
|
||||
, pyarrow
|
||||
, pydot
|
||||
, pyhamcrest
|
||||
, pymongo
|
||||
, pytest-timeout
|
||||
, pytest-xdist
|
||||
, pytestCheckHook
|
||||
, python
|
||||
, python-dateutil
|
||||
, pytz
|
||||
, pyyaml
|
||||
, requests
|
||||
, requests-mock
|
||||
, setuptools
|
||||
, sqlalchemy
|
||||
, tenacity
|
||||
, typing-extensions
|
||||
}:
|
||||
|
||||
buildPythonPackage rec {
|
||||
pname = "apache-beam";
|
||||
version = "2.35.0";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "apache";
|
||||
repo = "beam";
|
||||
rev = "v${version}";
|
||||
sha256 = "0qxkas33d8i6yj133plnadbfm74ak7arn7ldpziyiwdav3hj68sy";
|
||||
};
|
||||
|
||||
patches = [
|
||||
./relax-deps.patch
|
||||
# Fixes https://issues.apache.org/jira/browse/BEAM-9324
|
||||
./fix-cython.patch
|
||||
];
|
||||
|
||||
sourceRoot = "source/sdks/python";
|
||||
|
||||
nativeBuildInputs = [
|
||||
cython
|
||||
grpcio-tools
|
||||
mypy-protobuf
|
||||
];
|
||||
|
||||
propagatedBuildInputs = [
|
||||
cloudpickle
|
||||
crcmod
|
||||
cython
|
||||
dill
|
||||
fastavro
|
||||
grpcio
|
||||
hdfs
|
||||
httplib2
|
||||
numpy
|
||||
oauth2client
|
||||
orjson
|
||||
proto-plus
|
||||
protobuf
|
||||
pyarrow
|
||||
pydot
|
||||
pymongo
|
||||
python-dateutil
|
||||
pytz
|
||||
requests
|
||||
setuptools
|
||||
typing-extensions
|
||||
];
|
||||
|
||||
pythonImportsCheck = [
|
||||
"apache_beam"
|
||||
];
|
||||
|
||||
checkInputs = [
|
||||
freezegun
|
||||
mock
|
||||
pandas
|
||||
parameterized
|
||||
psycopg2
|
||||
pyhamcrest
|
||||
pytest-timeout
|
||||
pytest-xdist
|
||||
pytestCheckHook
|
||||
pyyaml
|
||||
requests-mock
|
||||
sqlalchemy
|
||||
tenacity
|
||||
];
|
||||
|
||||
# Make sure we're running the tests for the actually installed
|
||||
# package, so that cython's .so files are available.
|
||||
preCheck = "cd $out/lib/${python.libPrefix}/site-packages";
|
||||
|
||||
disabledTestPaths = [
|
||||
# These tests depend on the availability of specific servers backends.
|
||||
"apache_beam/runners/portability/flink_runner_test.py"
|
||||
"apache_beam/runners/portability/samza_runner_test.py"
|
||||
"apache_beam/runners/portability/spark_runner_test.py"
|
||||
];
|
||||
|
||||
disabledTests = [
|
||||
# The reasons of failures for these tests are unclear.
|
||||
# They reproduce in Docker with Ubuntu 22.04
|
||||
# (= they're not `nixpkgs`-specific) but given the upstream uses
|
||||
# quite elaborate testing infra with containers and multiple
|
||||
# different runners - I don't expect them to help debugging these
|
||||
# when running via our (= custom from their PoV) testing infra.
|
||||
"testBuildListUnpack"
|
||||
"testBuildTupleUnpack"
|
||||
"testBuildTupleUnpackWithCall"
|
||||
"test_convert_bare_types"
|
||||
"test_incomparable_default"
|
||||
"test_pardo_type_inference"
|
||||
"test_with_main_session"
|
||||
];
|
||||
|
||||
meta = with lib; {
|
||||
description = "Unified model for defining both batch and streaming data-parallel processing pipelines";
|
||||
homepage = "https://beam.apache.org/";
|
||||
license = licenses.asl20;
|
||||
maintainers = with maintainers; [ ndl ];
|
||||
};
|
||||
}
|
41
pkgs/development/python-modules/apache-beam/fix-cython.patch
Normal file
41
pkgs/development/python-modules/apache-beam/fix-cython.patch
Normal file
@ -0,0 +1,41 @@
|
||||
diff --git a/apache_beam/runners/worker/operations.py b/apache_beam/runners/worker/operations.py
|
||||
index 3464c5750c..5921c72b90 100644
|
||||
--- a/apache_beam/runners/worker/operations.py
|
||||
+++ b/apache_beam/runners/worker/operations.py
|
||||
@@ -69,18 +69,6 @@ if TYPE_CHECKING:
|
||||
from apache_beam.runners.worker.statesampler import StateSampler
|
||||
from apache_beam.transforms.userstate import TimerSpec
|
||||
|
||||
-# Allow some "pure mode" declarations.
|
||||
-try:
|
||||
- import cython
|
||||
-except ImportError:
|
||||
-
|
||||
- class FakeCython(object):
|
||||
- @staticmethod
|
||||
- def cast(type, value):
|
||||
- return value
|
||||
-
|
||||
- globals()['cython'] = FakeCython()
|
||||
-
|
||||
_globally_windowed_value = GlobalWindows.windowed_value(None)
|
||||
_global_window_type = type(_globally_windowed_value.windows[0])
|
||||
|
||||
@@ -149,7 +137,7 @@ class ConsumerSet(Receiver):
|
||||
# type: (WindowedValue) -> None
|
||||
self.update_counters_start(windowed_value)
|
||||
for consumer in self.consumers:
|
||||
- cython.cast(Operation, consumer).process(windowed_value)
|
||||
+ consumer.process(windowed_value)
|
||||
self.update_counters_finish()
|
||||
|
||||
def try_split(self, fraction_of_remainder):
|
||||
@@ -345,7 +333,7 @@ class Operation(object):
|
||||
|
||||
def output(self, windowed_value, output_index=0):
|
||||
# type: (WindowedValue, int) -> None
|
||||
- cython.cast(Receiver, self.receivers[output_index]).receive(windowed_value)
|
||||
+ self.receivers[output_index].receive(windowed_value)
|
||||
|
||||
def add_receiver(self, operation, output_index=0):
|
||||
# type: (Operation, int) -> None
|
20
pkgs/development/python-modules/apache-beam/relax-deps.patch
Normal file
20
pkgs/development/python-modules/apache-beam/relax-deps.patch
Normal file
@ -0,0 +1,20 @@
|
||||
diff --git a/setup.py b/setup.py
|
||||
index 9429459622..2727b3becb 100644
|
||||
--- a/setup.py
|
||||
+++ b/setup.py
|
||||
@@ -136,12 +136,12 @@ REQUIRED_PACKAGES = [
|
||||
# version of dill. It is best to use the same version of dill on client and
|
||||
# server, therefore list of allowed versions is very narrow.
|
||||
# See: https://github.com/uqfoundation/dill/issues/341.
|
||||
- 'dill>=0.3.1.1,<0.3.2',
|
||||
+ 'dill>=0.3.1.1',
|
||||
'fastavro>=0.21.4,<2',
|
||||
'grpcio>=1.29.0,<2',
|
||||
'hdfs>=2.1.0,<3.0.0',
|
||||
- 'httplib2>=0.8,<0.20.0',
|
||||
- 'numpy>=1.14.3,<1.21.0',
|
||||
+ 'httplib2>=0.8',
|
||||
+ 'numpy>=1.14.3',
|
||||
'pymongo>=3.8.0,<4.0.0',
|
||||
'oauth2client>=2.0.1,<5',
|
||||
'protobuf>=3.12.2,<4',
|
@ -550,6 +550,8 @@ in {
|
||||
|
||||
apache-airflow = callPackage ../development/python-modules/apache-airflow { };
|
||||
|
||||
apache-beam = callPackage ../development/python-modules/apache-beam { };
|
||||
|
||||
apcaccess = callPackage ../development/python-modules/apcaccess { };
|
||||
|
||||
apipkg = callPackage ../development/python-modules/apipkg { };
|
||||
|
Loading…
Reference in New Issue
Block a user