diff --git a/pkgs/development/python-modules/scrapy/default.nix b/pkgs/development/python-modules/scrapy/default.nix new file mode 100644 index 000000000000..8f3b2ef74b21 --- /dev/null +++ b/pkgs/development/python-modules/scrapy/default.nix @@ -0,0 +1,38 @@ +{ buildPythonPackage, fetchurl, glibcLocales, mock, pytest, botocore, + testfixtures, pillow, six, twisted, w3lib, lxml, queuelib, pyopenssl, + service-identity, parsel, pydispatcher, cssselect, lib }: +buildPythonPackage rec { + name = "Scrapy-${version}"; + version = "1.3.1"; + + buildInputs = [ glibcLocales mock pytest botocore testfixtures pillow ]; + propagatedBuildInputs = [ + six twisted w3lib lxml cssselect queuelib pyopenssl service-identity parsel pydispatcher + ]; + + # Scrapy is usually installed via pip where copying all + # permissions makes sense. In Nix the files copied are owned by + # root and readonly. As a consequence scrapy can't edit the + # project templates. + patches = [ ./permissions-fix.patch ]; + + LC_ALL="en_US.UTF-8"; + + checkPhase = '' + py.test --ignore=tests/test_linkextractors_deprecated.py --ignore=tests/test_proxy_connect.py + # The ignored tests require mitmproxy, which depends on protobuf, but it's disabled on Python3 + ''; + + src = fetchurl { + url = "mirror://pypi/S/Scrapy/${name}.tar.gz"; + sha256 = "0s5qkxwfq842maxjd2j82ldp4dyb70kla3z5rr56z0p7ig53cbvk"; + }; + + meta = with lib; { + description = "A fast high-level web crawling and web scraping framework, used to crawl websites and extract structured data from their pages"; + homepage = "http://scrapy.org/"; + license = licenses.bsd3; + maintainers = with maintainers; [ drewkett ]; + platforms = platforms.linux; + }; +} diff --git a/pkgs/development/python-modules/scrapy/permissions-fix.patch b/pkgs/development/python-modules/scrapy/permissions-fix.patch new file mode 100644 index 000000000000..5ea5269c799e --- /dev/null +++ b/pkgs/development/python-modules/scrapy/permissions-fix.patch @@ -0,0 +1,28 @@ +diff --git a/scrapy/commands/startproject.py b/scrapy/commands/startproject.py +index 5941066..89f8edb 100644 +--- a/scrapy/commands/startproject.py ++++ b/scrapy/commands/startproject.py +@@ -4,7 +4,7 @@ import os + import string + from importlib import import_module + from os.path import join, exists, abspath +-from shutil import ignore_patterns, move, copy2, copystat ++from shutil import ignore_patterns, move, copyfile, copystat + + import scrapy + from scrapy.commands import ScrapyCommand +@@ -76,8 +76,7 @@ class Command(ScrapyCommand): + if os.path.isdir(srcname): + self._copytree(srcname, dstname) + else: +- copy2(srcname, dstname) +- copystat(src, dst) ++ copyfile(srcname, dstname) + + def run(self, args, opts): + if len(args) not in (1, 2): +@@ -118,4 +117,3 @@ class Command(ScrapyCommand): + _templates_base_dir = self.settings['TEMPLATES_DIR'] or \ + join(scrapy.__path__[0], 'templates') + return join(_templates_base_dir, 'project') +- diff --git a/pkgs/top-level/python-packages.nix b/pkgs/top-level/python-packages.nix index 12d59d9847c9..03f6d7ce07eb 100644 --- a/pkgs/top-level/python-packages.nix +++ b/pkgs/top-level/python-packages.nix @@ -31002,35 +31002,8 @@ EOF }; }; - scrapy = buildPythonPackage rec { - name = "Scrapy-${version}"; - version = "1.3.1"; + scrapy = callPackage ../development/python-modules/scrapy { }; - buildInputs = with self; [ pkgs.glibcLocales mock pytest botocore testfixtures pillow ]; - propagatedBuildInputs = with self; [ - six twisted w3lib lxml cssselect queuelib pyopenssl service-identity parsel pydispatcher - ]; - - LC_ALL="en_US.UTF-8"; - - checkPhase = '' - py.test --ignore=tests/test_linkextractors_deprecated.py --ignore=tests/test_proxy_connect.py - # The ignored tests require mitmproxy, which depends on protobuf, but it's disabled on Python3 - ''; - - src = pkgs.fetchurl { - url = "mirror://pypi/S/Scrapy/${name}.tar.gz"; - sha256 = "0s5qkxwfq842maxjd2j82ldp4dyb70kla3z5rr56z0p7ig53cbvk"; - }; - - meta = { - description = "A fast high-level web crawling and web scraping framework, used to crawl websites and extract structured data from their pages"; - homepage = "http://scrapy.org/"; - license = licenses.bsd3; - maintainers = with maintainers; [ drewkett ]; - platforms = platforms.linux; - }; - }; pandocfilters = buildPythonPackage rec{ version = "1.4.1"; pname = "pandocfilters";