Merge pull request #153780 from ndl/submit/apache-beam

python3Packages.apache-beam: init at 2.35.0
This commit is contained in:
Samuel Ainsworth 2022-01-08 17:18:12 -08:00 committed by GitHub
commit a142f0a004
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 208 additions and 0 deletions

View File

@ -0,0 +1,145 @@
{ buildPythonPackage
, cloudpickle
, crcmod
, cython
, dill
, fastavro
, fetchFromGitHub
, freezegun
, grpcio
, grpcio-tools
, hdfs
, httplib2
, lib
, mock
, mypy-protobuf
, numpy
, oauth2client
, orjson
, pandas
, parameterized
, proto-plus
, protobuf
, psycopg2
, pyarrow
, pydot
, pyhamcrest
, pymongo
, pytest-timeout
, pytest-xdist
, pytestCheckHook
, python
, python-dateutil
, pytz
, pyyaml
, requests
, requests-mock
, setuptools
, sqlalchemy
, tenacity
, typing-extensions
}:
buildPythonPackage rec {
pname = "apache-beam";
version = "2.35.0";
src = fetchFromGitHub {
owner = "apache";
repo = "beam";
rev = "v${version}";
sha256 = "0qxkas33d8i6yj133plnadbfm74ak7arn7ldpziyiwdav3hj68sy";
};
patches = [
./relax-deps.patch
# Fixes https://issues.apache.org/jira/browse/BEAM-9324
./fix-cython.patch
];
sourceRoot = "source/sdks/python";
nativeBuildInputs = [
cython
grpcio-tools
mypy-protobuf
];
propagatedBuildInputs = [
cloudpickle
crcmod
cython
dill
fastavro
grpcio
hdfs
httplib2
numpy
oauth2client
orjson
proto-plus
protobuf
pyarrow
pydot
pymongo
python-dateutil
pytz
requests
setuptools
typing-extensions
];
pythonImportsCheck = [
"apache_beam"
];
checkInputs = [
freezegun
mock
pandas
parameterized
psycopg2
pyhamcrest
pytest-timeout
pytest-xdist
pytestCheckHook
pyyaml
requests-mock
sqlalchemy
tenacity
];
# Make sure we're running the tests for the actually installed
# package, so that cython's .so files are available.
preCheck = "cd $out/lib/${python.libPrefix}/site-packages";
disabledTestPaths = [
# These tests depend on the availability of specific servers backends.
"apache_beam/runners/portability/flink_runner_test.py"
"apache_beam/runners/portability/samza_runner_test.py"
"apache_beam/runners/portability/spark_runner_test.py"
];
disabledTests = [
# The reasons of failures for these tests are unclear.
# They reproduce in Docker with Ubuntu 22.04
# (= they're not `nixpkgs`-specific) but given the upstream uses
# quite elaborate testing infra with containers and multiple
# different runners - I don't expect them to help debugging these
# when running via our (= custom from their PoV) testing infra.
"testBuildListUnpack"
"testBuildTupleUnpack"
"testBuildTupleUnpackWithCall"
"test_convert_bare_types"
"test_incomparable_default"
"test_pardo_type_inference"
"test_with_main_session"
];
meta = with lib; {
description = "Unified model for defining both batch and streaming data-parallel processing pipelines";
homepage = "https://beam.apache.org/";
license = licenses.asl20;
maintainers = with maintainers; [ ndl ];
};
}

View File

@ -0,0 +1,41 @@
diff --git a/apache_beam/runners/worker/operations.py b/apache_beam/runners/worker/operations.py
index 3464c5750c..5921c72b90 100644
--- a/apache_beam/runners/worker/operations.py
+++ b/apache_beam/runners/worker/operations.py
@@ -69,18 +69,6 @@ if TYPE_CHECKING:
from apache_beam.runners.worker.statesampler import StateSampler
from apache_beam.transforms.userstate import TimerSpec
-# Allow some "pure mode" declarations.
-try:
- import cython
-except ImportError:
-
- class FakeCython(object):
- @staticmethod
- def cast(type, value):
- return value
-
- globals()['cython'] = FakeCython()
-
_globally_windowed_value = GlobalWindows.windowed_value(None)
_global_window_type = type(_globally_windowed_value.windows[0])
@@ -149,7 +137,7 @@ class ConsumerSet(Receiver):
# type: (WindowedValue) -> None
self.update_counters_start(windowed_value)
for consumer in self.consumers:
- cython.cast(Operation, consumer).process(windowed_value)
+ consumer.process(windowed_value)
self.update_counters_finish()
def try_split(self, fraction_of_remainder):
@@ -345,7 +333,7 @@ class Operation(object):
def output(self, windowed_value, output_index=0):
# type: (WindowedValue, int) -> None
- cython.cast(Receiver, self.receivers[output_index]).receive(windowed_value)
+ self.receivers[output_index].receive(windowed_value)
def add_receiver(self, operation, output_index=0):
# type: (Operation, int) -> None

View File

@ -0,0 +1,20 @@
diff --git a/setup.py b/setup.py
index 9429459622..2727b3becb 100644
--- a/setup.py
+++ b/setup.py
@@ -136,12 +136,12 @@ REQUIRED_PACKAGES = [
# version of dill. It is best to use the same version of dill on client and
# server, therefore list of allowed versions is very narrow.
# See: https://github.com/uqfoundation/dill/issues/341.
- 'dill>=0.3.1.1,<0.3.2',
+ 'dill>=0.3.1.1',
'fastavro>=0.21.4,<2',
'grpcio>=1.29.0,<2',
'hdfs>=2.1.0,<3.0.0',
- 'httplib2>=0.8,<0.20.0',
- 'numpy>=1.14.3,<1.21.0',
+ 'httplib2>=0.8',
+ 'numpy>=1.14.3',
'pymongo>=3.8.0,<4.0.0',
'oauth2client>=2.0.1,<5',
'protobuf>=3.12.2,<4',

View File

@ -550,6 +550,8 @@ in {
apache-airflow = callPackage ../development/python-modules/apache-airflow { };
apache-beam = callPackage ../development/python-modules/apache-beam { };
apcaccess = callPackage ../development/python-modules/apcaccess { };
apipkg = callPackage ../development/python-modules/apipkg { };