python310Packages.unstructured: init at 0.8.1
This commit is contained in:
parent
8c82245e29
commit
35f43f9532
143
pkgs/development/python-modules/unstructured/default.nix
Normal file
143
pkgs/development/python-modules/unstructured/default.nix
Normal file
@ -0,0 +1,143 @@
|
||||
{ lib
|
||||
, buildPythonPackage
|
||||
, fetchFromGitHub
|
||||
# propagated build inputs
|
||||
, chardet
|
||||
, filetype
|
||||
, lxml
|
||||
, msg-parser
|
||||
, nltk
|
||||
, openpyxl
|
||||
, pandas
|
||||
, pdf2image
|
||||
, pdfminer-six
|
||||
, pillow
|
||||
, pypandoc
|
||||
, python-docx
|
||||
, python-pptx
|
||||
, python-magic
|
||||
, markdown
|
||||
, requests
|
||||
, tabulate
|
||||
, xlrd
|
||||
# optional-dependencies
|
||||
, langdetect
|
||||
, sacremoses
|
||||
, sentencepiece
|
||||
, torch
|
||||
, transformers
|
||||
, unstructured-inference
|
||||
, s3fs
|
||||
, fsspec
|
||||
, adlfs
|
||||
# , discord-py
|
||||
, pygithub
|
||||
, python-gitlab
|
||||
, praw
|
||||
, slack-sdk
|
||||
, wikipedia
|
||||
, google-api-python-client
|
||||
# , gcsfs
|
||||
, elasticsearch8
|
||||
, jq
|
||||
# , dropboxdrivefs
|
||||
, atlassian-python-api
|
||||
# test dependencies
|
||||
, pytestCheckHook
|
||||
, black
|
||||
, coverage
|
||||
, click
|
||||
, freezegun
|
||||
# , label-studio-sdk
|
||||
, mypy
|
||||
, pytest-cov
|
||||
, pytest-mock
|
||||
, vcrpy
|
||||
, grpcio
|
||||
}:
|
||||
let
|
||||
version = "0.8.1";
|
||||
optional-dependencies = {
|
||||
huggingflace = [
|
||||
langdetect
|
||||
sacremoses
|
||||
sentencepiece
|
||||
torch
|
||||
transformers
|
||||
];
|
||||
local-inference = [ unstructured-inference ];
|
||||
s3 = [ s3fs fsspec ];
|
||||
azure = [ adlfs fsspec ];
|
||||
discord = [ ]; # discord-py
|
||||
github = [ pygithub ];
|
||||
gitlab = [ python-gitlab ];
|
||||
reddit = [ praw ];
|
||||
slack = [ slack-sdk ];
|
||||
wikipedia = [ wikipedia ];
|
||||
google-drive = [ google-api-python-client ];
|
||||
gcs = []; # gcsfs fsspec
|
||||
elasticsearch = [ elasticsearch8 jq ];
|
||||
dropbox = []; # dropboxdrivefs fsspec
|
||||
confluence = [ atlassian-python-api ];
|
||||
};
|
||||
in
|
||||
buildPythonPackage {
|
||||
pname = "unstructured";
|
||||
inherit version;
|
||||
format = "setuptools";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "Unstructured-IO";
|
||||
repo = "unstructured";
|
||||
rev = version;
|
||||
hash = "sha256-I9pRycg3uGn7Xfd4YGxic16SXi8+gslsIVarzDT8X2w=";
|
||||
};
|
||||
|
||||
propagatedBuildInputs = [
|
||||
chardet
|
||||
filetype
|
||||
lxml
|
||||
msg-parser
|
||||
nltk
|
||||
openpyxl
|
||||
pandas
|
||||
pdf2image
|
||||
pdfminer-six
|
||||
pillow
|
||||
pypandoc
|
||||
python-docx
|
||||
python-pptx
|
||||
python-magic
|
||||
markdown
|
||||
requests
|
||||
tabulate
|
||||
xlrd
|
||||
];
|
||||
|
||||
pythonImportsCheck = [ "unstructured" ];
|
||||
|
||||
# test try to download punkt from nltk
|
||||
# figure out how to make it available to enable the tests
|
||||
doCheck = false;
|
||||
|
||||
nativeCheckInputs = [
|
||||
pytestCheckHook
|
||||
black
|
||||
coverage
|
||||
click
|
||||
freezegun
|
||||
mypy
|
||||
pytest-cov
|
||||
pytest-mock
|
||||
vcrpy
|
||||
grpcio
|
||||
];
|
||||
|
||||
meta = with lib; {
|
||||
description = "Open source libraries and APIs to build custom preprocessing pipelines for labeling, training, or production machine learning pipelines";
|
||||
homepage = "https://github.com/Unstructured-IO/unstructured";
|
||||
changelog = "https://github.com/Unstructured-IO/unstructured/blob/${version}/CHANGELOG.md";
|
||||
license = licenses.asl20;
|
||||
maintainers = with maintainers; [ happysalada ];
|
||||
};
|
||||
}
|
@ -13100,6 +13100,8 @@ self: super: with self; {
|
||||
|
||||
unrpa = callPackage ../development/python-modules/unrpa { };
|
||||
|
||||
unstructured = callPackage ../development/python-modules/unstructured { };
|
||||
|
||||
unstructured-inference = callPackage ../development/python-modules/unstructured-inference { };
|
||||
|
||||
untangle = callPackage ../development/python-modules/untangle { };
|
||||
|
Loading…
Reference in New Issue
Block a user