Merge pull request #251318 from happysalada/unstructured_api_init

Unstructured api init
This commit is contained in:
OTABI Tomoya 2023-08-31 11:53:28 +09:00 committed by GitHub
commit ad1abff502
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 72 additions and 0 deletions

View File

@ -133,6 +133,8 @@ buildPythonPackage {
grpcio
];
passthru.optional-dependencies = optional-dependencies;
meta = with lib; {
description = "Open source libraries and APIs to build custom preprocessing pipelines for labeling, training, or production machine learning pipelines";
homepage = "https://github.com/Unstructured-IO/unstructured";

View File

@ -0,0 +1,68 @@
{
lib,
stdenvNoCC,
fetchFromGitHub,
python3,
makeWrapper,
nix-update-script,
symlinkJoin,
nltk-data,
}:
let
pythonEnv = python3.withPackages (packages: with packages; [
unstructured-api-tools
unstructured
pydantic
click
ratelimit
requests
pypdf
pycryptodome
safetensors
uvicorn
] ++ packages.unstructured.optional-dependencies.local-inference);
version = "0.0.39";
unstructured_api_nltk_data = symlinkJoin {
name = "unstructured_api_nltk_data";
paths = [ nltk-data.punkt nltk-data.averaged_perceptron_tagger ];
};
in stdenvNoCC.mkDerivation {
pname = "unstructured-api";
inherit version;
src = fetchFromGitHub {
owner = "Unstructured-IO";
repo = "unstructured-api";
rev = version;
hash = "sha256-fk0YkGllggi0eWdp9ytHy4/9rChkcDnQvEvVAp1+RJw=";
};
nativeBuildInputs = [ makeWrapper ];
installPhase = ''
runHook preInstall
mkdir -p $out $out/bin $out/lib
cp -r . $out/lib
makeWrapper ${pythonEnv}/bin/uvicorn $out/bin/unstructured-api \
--set NLTK_DATA ${unstructured_api_nltk_data} \
--prefix PYTHONPATH : $out/lib \
--add-flags "prepline_general.api.app:app"
runHook postInstall
'';
passthru = {
updateScript = nix-update-script { };
};
meta = with lib; {
description = "open-source toolkit designed to make it easy to prepare unstructured data like PDFs, HTML and Word Documents for downstream data science tasks";
homepage = "https://github.com/Unstructured-IO/unstructured-api";
changelog = "https://github.com/Unstructured-IO/unstructured-api/releases/tag/${version}";
license = licenses.asl20;
maintainers = with maintainers; [ happysalada ];
};
}

View File

@ -27593,6 +27593,8 @@ with pkgs;
inherit (darwin.apple_sdk.frameworks) Cocoa WebKit;
};
unstructured-api = callPackage ../servers/unstructured-api { };
urserver = callPackage ../servers/urserver { };
uxplay = callPackage ../servers/uxplay { };