python310Packages.unstructured: init at 0.8.1

This commit is contained in:
happysalada 2023-07-23 15:32:23 +09:00 committed by Yt
parent 8c82245e29
commit 35f43f9532
2 changed files with 145 additions and 0 deletions

View File

@ -0,0 +1,143 @@
{ lib
, buildPythonPackage
, fetchFromGitHub
# propagated build inputs
, chardet
, filetype
, lxml
, msg-parser
, nltk
, openpyxl
, pandas
, pdf2image
, pdfminer-six
, pillow
, pypandoc
, python-docx
, python-pptx
, python-magic
, markdown
, requests
, tabulate
, xlrd
# optional-dependencies
, langdetect
, sacremoses
, sentencepiece
, torch
, transformers
, unstructured-inference
, s3fs
, fsspec
, adlfs
# , discord-py
, pygithub
, python-gitlab
, praw
, slack-sdk
, wikipedia
, google-api-python-client
# , gcsfs
, elasticsearch8
, jq
# , dropboxdrivefs
, atlassian-python-api
# test dependencies
, pytestCheckHook
, black
, coverage
, click
, freezegun
# , label-studio-sdk
, mypy
, pytest-cov
, pytest-mock
, vcrpy
, grpcio
}:
let
version = "0.8.1";
optional-dependencies = {
huggingflace = [
langdetect
sacremoses
sentencepiece
torch
transformers
];
local-inference = [ unstructured-inference ];
s3 = [ s3fs fsspec ];
azure = [ adlfs fsspec ];
discord = [ ]; # discord-py
github = [ pygithub ];
gitlab = [ python-gitlab ];
reddit = [ praw ];
slack = [ slack-sdk ];
wikipedia = [ wikipedia ];
google-drive = [ google-api-python-client ];
gcs = []; # gcsfs fsspec
elasticsearch = [ elasticsearch8 jq ];
dropbox = []; # dropboxdrivefs fsspec
confluence = [ atlassian-python-api ];
};
in
buildPythonPackage {
pname = "unstructured";
inherit version;
format = "setuptools";
src = fetchFromGitHub {
owner = "Unstructured-IO";
repo = "unstructured";
rev = version;
hash = "sha256-I9pRycg3uGn7Xfd4YGxic16SXi8+gslsIVarzDT8X2w=";
};
propagatedBuildInputs = [
chardet
filetype
lxml
msg-parser
nltk
openpyxl
pandas
pdf2image
pdfminer-six
pillow
pypandoc
python-docx
python-pptx
python-magic
markdown
requests
tabulate
xlrd
];
pythonImportsCheck = [ "unstructured" ];
# test try to download punkt from nltk
# figure out how to make it available to enable the tests
doCheck = false;
nativeCheckInputs = [
pytestCheckHook
black
coverage
click
freezegun
mypy
pytest-cov
pytest-mock
vcrpy
grpcio
];
meta = with lib; {
description = "Open source libraries and APIs to build custom preprocessing pipelines for labeling, training, or production machine learning pipelines";
homepage = "https://github.com/Unstructured-IO/unstructured";
changelog = "https://github.com/Unstructured-IO/unstructured/blob/${version}/CHANGELOG.md";
license = licenses.asl20;
maintainers = with maintainers; [ happysalada ];
};
}

View File

@ -13100,6 +13100,8 @@ self: super: with self; {
unrpa = callPackage ../development/python-modules/unrpa { };
unstructured = callPackage ../development/python-modules/unstructured { };
unstructured-inference = callPackage ../development/python-modules/unstructured-inference { };
untangle = callPackage ../development/python-modules/untangle { };